diff --git a/.github/scripts/check_doc_source_of_truth.py b/.github/scripts/check_doc_source_of_truth.py old mode 100755 new mode 100644 diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000000..1d0533e24b --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,21 @@ +# AGENTS.md + +This file provides guidance to Codex (Codex.ai/code) when working with code in this repository. + +## Detected stack +- Languages: Rust. +- Frameworks: none detected from the supported starter markers. + +## Verification +- Run Rust verification from `rust/`: `cargo fmt`, `cargo clippy --workspace --all-targets -- -D warnings`, `cargo test --workspace` +- `src/` and `tests/` are both present; update both surfaces together when behavior changes. + +## Repository shape +- `rust/` contains the Rust workspace and active CLI/runtime implementation. +- `src/` contains source files that should stay consistent with generated guidance and tests. +- `tests/` contains validation surfaces that should be reviewed alongside code changes. + +## Working agreement +- Prefer small, reviewable changes and keep generated bootstrap files aligned with actual repo workflows. +- Keep shared defaults in `.Codex.json`; reserve `.Codex/settings.local.json` for machine-local overrides. +- Do not overwrite existing `AGENTS.md` content automatically; update it intentionally when repo workflows change. diff --git a/install.sh b/install.sh old mode 100755 new mode 100644 diff --git a/rust/Cargo.lock b/rust/Cargo.lock index e37ae7a5a8..a97bde5a99 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -17,10 +17,23 @@ dependencies = [ "memchr", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + [[package]] name = "api" version = "0.1.0" dependencies = [ + "criterion", "reqwest", "runtime", "serde", @@ -35,6 +48,12 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "base64" version = "0.22.1" @@ -77,6 +96,12 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.2.58" @@ -99,6 +124,58 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + [[package]] name = "clipboard-win" version = "5.4.1" @@ -144,6 +221,67 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "crossterm" version = "0.28.1" @@ -169,6 +307,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" version = "0.1.7" @@ -209,6 +353,12 @@ dependencies = [ "syn", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "endian-type" version = "0.1.2" @@ -245,7 +395,7 @@ checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", "rustix 1.1.4", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -380,12 +530,29 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "home" version = "0.5.12" @@ -622,6 +789,26 @@ dependencies = [ "serde", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.18" @@ -755,6 +942,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.21.4" @@ -783,6 +979,12 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "parking_lot" version = "0.12.5" @@ -837,6 +1039,34 @@ dependencies = [ "time", ] +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "plugins" version = "0.1.0" @@ -1015,6 +1245,26 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rayon" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -1111,6 +1361,7 @@ dependencies = [ name = "runtime" version = "0.1.0" dependencies = [ + "getrandom 0.3.4", "glob", "plugins", "regex", @@ -1119,6 +1370,7 @@ dependencies = [ "sha2", "telemetry", "tokio", + "toml", "walkdir", ] @@ -1138,7 +1390,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.4.15", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1301,6 +1553,15 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -1522,6 +1783,16 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.11.0" @@ -1574,6 +1845,47 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "toml_write", + "winnow", +] + +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + [[package]] name = "tools" version = "0.1.0" @@ -2045,6 +2357,15 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "winnow" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen" version = "0.51.0" diff --git a/rust/crates/api/src/providers/anthropic.rs b/rust/crates/api/src/providers/anthropic.rs index 7c9f02945e..51e10b44dd 100644 --- a/rust/crates/api/src/providers/anthropic.rs +++ b/rust/crates/api/src/providers/anthropic.rs @@ -600,8 +600,9 @@ fn jitter_for_base(base: Duration) -> Duration { } let raw_nanos = SystemTime::now() .duration_since(UNIX_EPOCH) - .map(|elapsed| u64::try_from(elapsed.as_nanos()).unwrap_or(u64::MAX)) - .unwrap_or(0); + .map_or(0, |elapsed| { + u64::try_from(elapsed.as_nanos()).unwrap_or(u64::MAX) + }); let tick = JITTER_COUNTER.fetch_add(1, Ordering::Relaxed); // splitmix64 finalizer — mixes the low bits so large bases still see // jitter across their full range instead of being clamped to subsec nanos. @@ -844,19 +845,17 @@ impl MessageStream { StreamEvent::MessageDelta(MessageDeltaEvent { usage, .. }) => { self.latest_usage = Some(usage.clone()); } - StreamEvent::MessageStop(_) => { - if !self.usage_recorded { - if let (Some(prompt_cache), Some(usage)) = - (&self.prompt_cache, self.latest_usage.as_ref()) - { - let record = prompt_cache.record_usage(&self.request, usage); - *self - .last_prompt_cache_record - .lock() - .unwrap_or_else(std::sync::PoisonError::into_inner) = Some(record); - } - self.usage_recorded = true; + StreamEvent::MessageStop(_) if !self.usage_recorded => { + if let (Some(prompt_cache), Some(usage)) = + (&self.prompt_cache, self.latest_usage.as_ref()) + { + let record = prompt_cache.record_usage(&self.request, usage); + *self + .last_prompt_cache_record + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner) = Some(record); } + self.usage_recorded = true; } _ => {} } diff --git a/rust/crates/api/src/providers/mod.rs b/rust/crates/api/src/providers/mod.rs index fb97900359..86871a82a1 100644 --- a/rust/crates/api/src/providers/mod.rs +++ b/rust/crates/api/src/providers/mod.rs @@ -753,14 +753,14 @@ mod tests { #[test] fn returns_context_window_metadata_for_kimi_models() { // kimi-k2.5 - let k25_limit = model_token_limit("kimi-k2.5") - .expect("kimi-k2.5 should have token limit metadata"); + let k25_limit = + model_token_limit("kimi-k2.5").expect("kimi-k2.5 should have token limit metadata"); assert_eq!(k25_limit.max_output_tokens, 16_384); assert_eq!(k25_limit.context_window_tokens, 256_000); // kimi-k1.5 - let k15_limit = model_token_limit("kimi-k1.5") - .expect("kimi-k1.5 should have token limit metadata"); + let k15_limit = + model_token_limit("kimi-k1.5").expect("kimi-k1.5 should have token limit metadata"); assert_eq!(k15_limit.max_output_tokens, 16_384); assert_eq!(k15_limit.context_window_tokens, 256_000); } @@ -768,11 +768,13 @@ mod tests { #[test] fn kimi_alias_resolves_to_kimi_k25_token_limits() { // The "kimi" alias resolves to "kimi-k2.5" via resolve_model_alias() - let alias_limit = model_token_limit("kimi") - .expect("kimi alias should resolve to kimi-k2.5 limits"); - let direct_limit = model_token_limit("kimi-k2.5") - .expect("kimi-k2.5 should have limits"); - assert_eq!(alias_limit.max_output_tokens, direct_limit.max_output_tokens); + let alias_limit = + model_token_limit("kimi").expect("kimi alias should resolve to kimi-k2.5 limits"); + let direct_limit = model_token_limit("kimi-k2.5").expect("kimi-k2.5 should have limits"); + assert_eq!( + alias_limit.max_output_tokens, + direct_limit.max_output_tokens + ); assert_eq!( alias_limit.context_window_tokens, direct_limit.context_window_tokens diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs index 4e4183bd96..319a8896ee 100644 --- a/rust/crates/api/src/providers/openai_compat.rs +++ b/rust/crates/api/src/providers/openai_compat.rs @@ -327,8 +327,9 @@ fn jitter_for_base(base: Duration) -> Duration { } let raw_nanos = SystemTime::now() .duration_since(UNIX_EPOCH) - .map(|elapsed| u64::try_from(elapsed.as_nanos()).unwrap_or(u64::MAX)) - .unwrap_or(0); + .map_or(0, |elapsed| { + u64::try_from(elapsed.as_nanos()).unwrap_or(u64::MAX) + }); let tick = JITTER_COUNTER.fetch_add(1, Ordering::Relaxed); let mut mixed = raw_nanos .wrapping_add(tick) @@ -2195,9 +2196,16 @@ mod tests { #[test] fn provider_specific_size_limits_are_correct() { - assert_eq!(OpenAiCompatConfig::dashscope().max_request_body_bytes, 6_291_456); // 6MB - assert_eq!(OpenAiCompatConfig::openai().max_request_body_bytes, 104_857_600); // 100MB - assert_eq!(OpenAiCompatConfig::xai().max_request_body_bytes, 52_428_800); // 50MB + assert_eq!( + OpenAiCompatConfig::dashscope().max_request_body_bytes, + 6_291_456 + ); // 6MB + assert_eq!( + OpenAiCompatConfig::openai().max_request_body_bytes, + 104_857_600 + ); // 100MB + assert_eq!(OpenAiCompatConfig::xai().max_request_body_bytes, 52_428_800); + // 50MB } #[test] diff --git a/rust/crates/api/tests/proxy_integration.rs b/rust/crates/api/tests/proxy_integration.rs index 7e3906983f..56078ca739 100644 --- a/rust/crates/api/tests/proxy_integration.rs +++ b/rust/crates/api/tests/proxy_integration.rs @@ -39,12 +39,15 @@ impl Drop for EnvVarGuard { fn proxy_config_from_env_reads_uppercase_proxy_vars() { // given let _lock = env_lock(); + let _clear_http = EnvVarGuard::set("HTTP_PROXY", None); + let _clear_https = EnvVarGuard::set("HTTPS_PROXY", None); + let _clear_no = EnvVarGuard::set("NO_PROXY", None); + let _clear_http_lower = EnvVarGuard::set("http_proxy", None); + let _clear_https_lower = EnvVarGuard::set("https_proxy", None); + let _clear_no_lower = EnvVarGuard::set("no_proxy", None); let _http = EnvVarGuard::set("HTTP_PROXY", Some("http://proxy.corp:3128")); let _https = EnvVarGuard::set("HTTPS_PROXY", Some("http://secure.corp:3129")); let _no = EnvVarGuard::set("NO_PROXY", Some("localhost,127.0.0.1")); - let _http_lower = EnvVarGuard::set("http_proxy", None); - let _https_lower = EnvVarGuard::set("https_proxy", None); - let _no_lower = EnvVarGuard::set("no_proxy", None); // when let config = ProxyConfig::from_env(); @@ -64,9 +67,12 @@ fn proxy_config_from_env_reads_uppercase_proxy_vars() { fn proxy_config_from_env_reads_lowercase_proxy_vars() { // given let _lock = env_lock(); - let _http = EnvVarGuard::set("HTTP_PROXY", None); - let _https = EnvVarGuard::set("HTTPS_PROXY", None); - let _no = EnvVarGuard::set("NO_PROXY", None); + let _clear_http = EnvVarGuard::set("HTTP_PROXY", None); + let _clear_https = EnvVarGuard::set("HTTPS_PROXY", None); + let _clear_no = EnvVarGuard::set("NO_PROXY", None); + let _clear_http_lower = EnvVarGuard::set("http_proxy", None); + let _clear_https_lower = EnvVarGuard::set("https_proxy", None); + let _clear_no_lower = EnvVarGuard::set("no_proxy", None); let _http_lower = EnvVarGuard::set("http_proxy", Some("http://lower.corp:3128")); let _https_lower = EnvVarGuard::set("https_proxy", Some("http://lower-secure.corp:3129")); let _no_lower = EnvVarGuard::set("no_proxy", Some(".internal")); @@ -127,12 +133,15 @@ fn proxy_config_from_env_treats_empty_values_as_unset() { fn build_client_with_env_proxy_config_succeeds() { // given let _lock = env_lock(); + let _clear_http = EnvVarGuard::set("HTTP_PROXY", None); + let _clear_https = EnvVarGuard::set("HTTPS_PROXY", None); + let _clear_no = EnvVarGuard::set("NO_PROXY", None); + let _clear_http_lower = EnvVarGuard::set("http_proxy", None); + let _clear_https_lower = EnvVarGuard::set("https_proxy", None); + let _clear_no_lower = EnvVarGuard::set("no_proxy", None); let _http = EnvVarGuard::set("HTTP_PROXY", Some("http://proxy.corp:3128")); let _https = EnvVarGuard::set("HTTPS_PROXY", Some("http://secure.corp:3129")); let _no = EnvVarGuard::set("NO_PROXY", Some("localhost")); - let _http_lower = EnvVarGuard::set("http_proxy", None); - let _https_lower = EnvVarGuard::set("https_proxy", None); - let _no_lower = EnvVarGuard::set("no_proxy", None); let config = ProxyConfig::from_env(); // when @@ -158,8 +167,20 @@ fn build_client_with_proxy_url_config_succeeds() { fn proxy_config_from_env_prefers_uppercase_over_lowercase() { // given let _lock = env_lock(); + let _clear_http = EnvVarGuard::set("HTTP_PROXY", None); + let _clear_https = EnvVarGuard::set("HTTPS_PROXY", None); + let _clear_no = EnvVarGuard::set("NO_PROXY", None); + let _clear_http_lower = EnvVarGuard::set("http_proxy", None); + let _clear_https_lower = EnvVarGuard::set("https_proxy", None); + let _clear_no_lower = EnvVarGuard::set("no_proxy", None); + #[cfg(not(windows))] let _http_upper = EnvVarGuard::set("HTTP_PROXY", Some("http://upper.corp:3128")); + #[cfg(not(windows))] let _http_lower = EnvVarGuard::set("http_proxy", Some("http://lower.corp:3128")); + #[cfg(windows)] + let _http_lower = EnvVarGuard::set("http_proxy", Some("http://lower.corp:3128")); + #[cfg(windows)] + let _http_upper = EnvVarGuard::set("HTTP_PROXY", Some("http://upper.corp:3128")); let _https = EnvVarGuard::set("HTTPS_PROXY", None); let _https_lower = EnvVarGuard::set("https_proxy", None); let _no = EnvVarGuard::set("NO_PROXY", None); diff --git a/rust/crates/mock-anthropic-service/src/lib.rs b/rust/crates/mock-anthropic-service/src/lib.rs index 68968eed2e..bb1a45d896 100644 --- a/rust/crates/mock-anthropic-service/src/lib.rs +++ b/rust/crates/mock-anthropic-service/src/lib.rs @@ -100,6 +100,13 @@ enum Scenario { PluginToolRoundtrip, AutoCompactTriggered, TokenCostReporting, + RustRedGreen, + NodeRedGreen, + PythonRedGreen, + RustConfigFailure, + NodeToolUnavailable, + PythonTimeout, + RustFinalGateRetry, } impl Scenario { @@ -117,6 +124,13 @@ impl Scenario { "plugin_tool_roundtrip" => Some(Self::PluginToolRoundtrip), "auto_compact_triggered" => Some(Self::AutoCompactTriggered), "token_cost_reporting" => Some(Self::TokenCostReporting), + "rust_red_green" => Some(Self::RustRedGreen), + "node_red_green" => Some(Self::NodeRedGreen), + "python_red_green" => Some(Self::PythonRedGreen), + "rust_config_failure" => Some(Self::RustConfigFailure), + "node_tool_unavailable" => Some(Self::NodeToolUnavailable), + "python_timeout" => Some(Self::PythonTimeout), + "rust_final_gate_retry" => Some(Self::RustFinalGateRetry), _ => None, } } @@ -135,6 +149,13 @@ impl Scenario { Self::PluginToolRoundtrip => "plugin_tool_roundtrip", Self::AutoCompactTriggered => "auto_compact_triggered", Self::TokenCostReporting => "token_cost_reporting", + Self::RustRedGreen => "rust_red_green", + Self::NodeRedGreen => "node_red_green", + Self::PythonRedGreen => "python_red_green", + Self::RustConfigFailure => "rust_config_failure", + Self::NodeToolUnavailable => "node_tool_unavailable", + Self::PythonTimeout => "python_timeout", + Self::RustFinalGateRetry => "rust_final_gate_retry", } } } @@ -296,6 +317,200 @@ fn tool_results_by_name(request: &MessageRequest) -> HashMap usize { + request + .messages + .iter() + .map(|message| { + message + .content + .iter() + .filter(|block| matches!(block, InputContentBlock::ToolResult { .. })) + .count() + }) + .sum() +} + +fn all_text_content(request: &MessageRequest) -> String { + request + .messages + .iter() + .flat_map(|message| message.content.iter()) + .filter_map(|block| match block { + InputContentBlock::Text { text } => Some(text.as_str()), + _ => None, + }) + .collect::>() + .join("\n") +} + +fn count_text_occurrences(request: &MessageRequest, needle: &str) -> usize { + all_text_content(request).matches(needle).count() +} + +enum QualityAction { + FinalText(String), + ToolUse { + message_id: &'static str, + tool_id: &'static str, + tool_name: &'static str, + input: Value, + }, +} + +impl QualityAction { + fn stream_body(self) -> String { + match self { + Self::FinalText(text) => final_text_sse(&text), + Self::ToolUse { + tool_id, + tool_name, + input, + .. + } => tool_use_sse_json(tool_id, tool_name, &input), + } + } + + fn message_response(self) -> MessageResponse { + match self { + Self::FinalText(text) => text_message_response("msg_quality_final", &text), + Self::ToolUse { + message_id, + tool_id, + tool_name, + input, + } => tool_message_response(message_id, tool_id, tool_name, input), + } + } +} + +fn write_file_input(path: &str, content: &str) -> Value { + json!({ + "path": path, + "content": content, + }) +} + +#[allow(clippy::too_many_lines)] +fn quality_action(request: &MessageRequest, scenario: Scenario) -> Option { + let writes = tool_result_count(request); + let reminder_count = count_text_occurrences(request, "still failing"); + match scenario { + Scenario::RustRedGreen => Some(match writes { + 0 => QualityAction::ToolUse { + message_id: "msg_rust_red_green_broken", + tool_id: "toolu_rust_red_green_broken", + tool_name: "write_file", + input: write_file_input( + "crates/app/src/lib.rs", + "pub fn answer() -> usize {\n \"wrong\"\n}\n", + ), + }, + 1 => QualityAction::ToolUse { + message_id: "msg_rust_red_green_fixed", + tool_id: "toolu_rust_red_green_fixed", + tool_name: "write_file", + input: write_file_input( + "crates/app/src/lib.rs", + "pub fn answer() -> usize {\n 42\n}\n", + ), + }, + _ => QualityAction::FinalText("rust quality red-green complete".to_string()), + }), + Scenario::NodeRedGreen => Some(match writes { + 0 => QualityAction::ToolUse { + message_id: "msg_node_red_green_broken", + tool_id: "toolu_node_red_green_broken", + tool_name: "write_file", + input: write_file_input( + "packages/web/src/index.ts", + "export const message: string = 42; // BROKEN_TYPECHECK\n", + ), + }, + 1 => QualityAction::ToolUse { + message_id: "msg_node_red_green_fixed", + tool_id: "toolu_node_red_green_fixed", + tool_name: "write_file", + input: write_file_input( + "packages/web/src/index.ts", + "export const message: string = \"ok\";\n", + ), + }, + _ => QualityAction::FinalText("node quality red-green complete".to_string()), + }), + Scenario::PythonRedGreen => Some(match writes { + 0 => QualityAction::ToolUse { + message_id: "msg_python_red_green_broken", + tool_id: "toolu_python_red_green_broken", + tool_name: "write_file", + input: write_file_input("services/api/app/main.py", "# BROKEN_PY_COMPILE\n"), + }, + 1 => QualityAction::ToolUse { + message_id: "msg_python_red_green_fixed", + tool_id: "toolu_python_red_green_fixed", + tool_name: "write_file", + input: write_file_input( + "services/api/app/main.py", + "def meaning() -> int:\n return 42\n", + ), + }, + _ => QualityAction::FinalText("python quality red-green complete".to_string()), + }), + Scenario::RustConfigFailure => Some(match writes { + 0 => QualityAction::ToolUse { + message_id: "msg_rust_config_failure", + tool_id: "toolu_rust_config_failure", + tool_name: "write_file", + input: write_file_input("Cargo.toml", "[package\nname = \"broken\"\n"), + }, + _ => QualityAction::FinalText("rust config failure captured".to_string()), + }), + Scenario::NodeToolUnavailable => Some(match writes { + 0 => QualityAction::ToolUse { + message_id: "msg_node_tool_unavailable", + tool_id: "toolu_node_tool_unavailable", + tool_name: "write_file", + input: write_file_input( + "packages/web/src/index.ts", + "export const message = \"TOOL_UNAVAILABLE_SENTINEL\";\n", + ), + }, + _ => QualityAction::FinalText("node tool unavailable captured".to_string()), + }), + Scenario::PythonTimeout => Some(match writes { + 0 => QualityAction::ToolUse { + message_id: "msg_python_timeout", + tool_id: "toolu_python_timeout", + tool_name: "write_file", + input: write_file_input("services/api/app/main.py", "# TIMEOUT_SENTINEL\n"), + }, + _ => QualityAction::FinalText("python timeout captured".to_string()), + }), + Scenario::RustFinalGateRetry => Some(match writes { + 0 => QualityAction::ToolUse { + message_id: "msg_rust_final_gate_retry_broken", + tool_id: "toolu_rust_final_gate_retry_broken", + tool_name: "write_file", + input: write_file_input("crates/app/src/lib.rs", "pub fn answer()->usize{2}\n"), + }, + 1 if reminder_count > 0 => QualityAction::ToolUse { + message_id: "msg_rust_final_gate_retry_fixed", + tool_id: "toolu_rust_final_gate_retry_fixed", + tool_name: "write_file", + input: write_file_input( + "crates/app/src/lib.rs", + "pub fn answer() -> usize {\n 2\n}\n", + ), + }, + 1 => { + QualityAction::FinalText("attempting to conclude the current rust fix".to_string()) + } + _ => QualityAction::FinalText("rust final gate retry complete".to_string()), + }), + _ => None, + } +} + fn flatten_tool_result_content(content: &[api::ToolResultContentBlock]) -> String { content .iter() @@ -331,6 +546,9 @@ fn build_http_response(request: &MessageRequest, scenario: Scenario) -> String { #[allow(clippy::too_many_lines)] fn build_stream_body(request: &MessageRequest, scenario: Scenario) -> String { + if let Some(action) = quality_action(request, scenario) { + return action.stream_body(); + } match scenario { Scenario::StreamingText => streaming_text_sse(), Scenario::ReadFileRoundtrip => match latest_tool_result(request) { @@ -464,11 +682,21 @@ fn build_stream_body(request: &MessageRequest, scenario: Scenario) -> String { Scenario::TokenCostReporting => { final_text_sse_with_usage("token cost reporting parity complete.", 1_000, 500) } + Scenario::RustRedGreen + | Scenario::NodeRedGreen + | Scenario::PythonRedGreen + | Scenario::RustConfigFailure + | Scenario::NodeToolUnavailable + | Scenario::PythonTimeout + | Scenario::RustFinalGateRetry => unreachable!("quality scenarios are handled above"), } } #[allow(clippy::too_many_lines)] fn build_message_response(request: &MessageRequest, scenario: Scenario) -> MessageResponse { + if let Some(action) = quality_action(request, scenario) { + return action.message_response(); + } match scenario { Scenario::StreamingText => text_message_response( "msg_streaming_text", @@ -634,6 +862,13 @@ fn build_message_response(request: &MessageRequest, scenario: Scenario) -> Messa 1_000, 500, ), + Scenario::RustRedGreen + | Scenario::NodeRedGreen + | Scenario::PythonRedGreen + | Scenario::RustConfigFailure + | Scenario::NodeToolUnavailable + | Scenario::PythonTimeout + | Scenario::RustFinalGateRetry => unreachable!("quality scenarios are handled above"), } } @@ -651,6 +886,13 @@ fn request_id_for(scenario: Scenario) -> &'static str { Scenario::PluginToolRoundtrip => "req_plugin_tool_roundtrip", Scenario::AutoCompactTriggered => "req_auto_compact_triggered", Scenario::TokenCostReporting => "req_token_cost_reporting", + Scenario::RustRedGreen => "req_rust_red_green", + Scenario::NodeRedGreen => "req_node_red_green", + Scenario::PythonRedGreen => "req_python_red_green", + Scenario::RustConfigFailure => "req_rust_config_failure", + Scenario::NodeToolUnavailable => "req_node_tool_unavailable", + Scenario::PythonTimeout => "req_python_timeout", + Scenario::RustFinalGateRetry => "req_rust_final_gate_retry", } } @@ -836,6 +1078,72 @@ fn tool_use_sse(tool_id: &str, tool_name: &str, partial_json_chunks: &[&str]) -> }]) } +fn tool_use_sse_json(tool_id: &str, tool_name: &str, input: &Value) -> String { + let mut body = String::new(); + append_sse( + &mut body, + "message_start", + json!({ + "type": "message_start", + "message": { + "id": format!("msg_{tool_id}"), + "type": "message", + "role": "assistant", + "content": [], + "model": DEFAULT_MODEL, + "stop_reason": null, + "stop_sequence": null, + "usage": usage_json(12, 0) + } + }), + ); + append_sse( + &mut body, + "content_block_start", + json!({ + "type": "content_block_start", + "index": 0, + "content_block": { + "type": "tool_use", + "id": tool_id, + "name": tool_name, + "input": {} + } + }), + ); + append_sse( + &mut body, + "content_block_delta", + json!({ + "type": "content_block_delta", + "index": 0, + "delta": { + "type": "input_json_delta", + "partial_json": input.to_string() + } + }), + ); + append_sse( + &mut body, + "content_block_stop", + json!({ + "type": "content_block_stop", + "index": 0 + }), + ); + append_sse( + &mut body, + "message_delta", + json!({ + "type": "message_delta", + "delta": {"stop_reason": "tool_use", "stop_sequence": null}, + "usage": usage_json(12, 4) + }), + ); + append_sse(&mut body, "message_stop", json!({"type": "message_stop"})); + body +} + struct ToolUseSse<'a> { tool_id: &'a str, tool_name: &'a str, diff --git a/rust/crates/plugins/bundled/example-bundled/hooks/post.sh b/rust/crates/plugins/bundled/example-bundled/hooks/post.sh old mode 100755 new mode 100644 diff --git a/rust/crates/plugins/bundled/example-bundled/hooks/pre.sh b/rust/crates/plugins/bundled/example-bundled/hooks/pre.sh old mode 100755 new mode 100644 diff --git a/rust/crates/plugins/bundled/sample-hooks/hooks/post.sh b/rust/crates/plugins/bundled/sample-hooks/hooks/post.sh old mode 100755 new mode 100644 diff --git a/rust/crates/plugins/bundled/sample-hooks/hooks/pre.sh b/rust/crates/plugins/bundled/sample-hooks/hooks/pre.sh old mode 100755 new mode 100644 diff --git a/rust/crates/plugins/src/hooks.rs b/rust/crates/plugins/src/hooks.rs index ff02c2ac27..a6e6dfaa18 100644 --- a/rust/crates/plugins/src/hooks.rs +++ b/rust/crates/plugins/src/hooks.rs @@ -1,7 +1,9 @@ use std::ffi::OsStr; -use std::path::Path; use std::process::Command; +#[cfg(not(windows))] +use std::path::Path; + use serde_json::json; use crate::{PluginError, PluginHooks, PluginRegistry}; @@ -392,6 +394,24 @@ mod tests { let _ = path; } + fn hook_script_name(stem: &str) -> String { + if cfg!(windows) { + format!("{stem}.cmd") + } else { + format!("{stem}.sh") + } + } + + fn write_hook_script(path: &Path, message: &str) { + let contents = if cfg!(windows) { + format!("@echo off\r\necho {message}\r\n") + } else { + format!("#!/bin/sh\nprintf '%s\\n' '{message}'\n") + }; + fs::write(path, contents).expect("write hook"); + make_executable(path); + } + fn write_hook_plugin( root: &Path, name: &str, @@ -402,33 +422,19 @@ mod tests { fs::create_dir_all(root.join(".claude-plugin")).expect("manifest dir"); fs::create_dir_all(root.join("hooks")).expect("hooks dir"); - let pre_path = root.join("hooks").join("pre.sh"); - fs::write( - &pre_path, - format!("#!/bin/sh\nprintf '%s\\n' '{pre_message}'\n"), - ) - .expect("write pre hook"); - make_executable(&pre_path); - - let post_path = root.join("hooks").join("post.sh"); - fs::write( - &post_path, - format!("#!/bin/sh\nprintf '%s\\n' '{post_message}'\n"), - ) - .expect("write post hook"); - make_executable(&post_path); - - let failure_path = root.join("hooks").join("failure.sh"); - fs::write( - &failure_path, - format!("#!/bin/sh\nprintf '%s\\n' '{failure_message}'\n"), - ) - .expect("write failure hook"); - make_executable(&failure_path); + let pre_script = hook_script_name("pre"); + let post_script = hook_script_name("post"); + let failure_script = hook_script_name("failure"); + let pre_path = root.join("hooks").join(&pre_script); + let post_path = root.join("hooks").join(&post_script); + let failure_path = root.join("hooks").join(&failure_script); + write_hook_script(&pre_path, pre_message); + write_hook_script(&post_path, post_message); + write_hook_script(&failure_path, failure_message); fs::write( root.join(".claude-plugin").join("plugin.json"), format!( - "{{\n \"name\": \"{name}\",\n \"version\": \"1.0.0\",\n \"description\": \"hook plugin\",\n \"hooks\": {{\n \"PreToolUse\": [\"./hooks/pre.sh\"],\n \"PostToolUse\": [\"./hooks/post.sh\"],\n \"PostToolUseFailure\": [\"./hooks/failure.sh\"]\n }}\n}}" + "{{\n \"name\": \"{name}\",\n \"version\": \"1.0.0\",\n \"description\": \"hook plugin\",\n \"hooks\": {{\n \"PreToolUse\": [\"./hooks/{pre_script}\"],\n \"PostToolUse\": [\"./hooks/{post_script}\"],\n \"PostToolUseFailure\": [\"./hooks/{failure_script}\"]\n }}\n}}" ), ) .expect("write plugin manifest"); @@ -499,7 +505,11 @@ mod tests { fn pre_tool_use_denies_when_plugin_hook_exits_two() { // given let runner = HookRunner::new(crate::PluginHooks { - pre_tool_use: vec!["printf 'blocked by plugin'; exit 2".to_string()], + pre_tool_use: vec![if cfg!(windows) { + "echo blocked by plugin && exit /b 2".to_string() + } else { + "printf 'blocked by plugin'; exit 2".to_string() + }], post_tool_use: Vec::new(), post_tool_use_failure: Vec::new(), }); @@ -517,8 +527,16 @@ mod tests { // given let runner = HookRunner::new(crate::PluginHooks { pre_tool_use: vec![ - "printf 'broken plugin hook'; exit 1".to_string(), - "printf 'later plugin hook'".to_string(), + if cfg!(windows) { + "echo broken plugin hook && exit /b 1".to_string() + } else { + "printf 'broken plugin hook'; exit 1".to_string() + }, + if cfg!(windows) { + "echo later plugin hook".to_string() + } else { + "printf 'later plugin hook'".to_string() + }, ], post_tool_use: Vec::new(), post_tool_use_failure: Vec::new(), @@ -561,4 +579,43 @@ mod tests { ); } } + + #[test] + fn output_with_stdin_tolerates_broken_pipe_when_child_closes_stdin_early() { + // given: a hook that immediately closes stdin without consuming the + // JSON payload. Use an oversized payload so the parent keeps writing + // long enough for Linux to surface EPIPE on the old implementation. + let root = temp_dir("stdin-close"); + let script = root.join("close-stdin.sh"); + fs::create_dir_all(&root).expect("temp hook dir"); + fs::write( + &script, + "#!/bin/sh\nexec 0<&-\nprintf 'stdin closed early\\n'\nsleep 0.05\n", + ) + .expect("write stdin-closing hook"); + make_executable(&script); + + let mut child = super::shell_command(script.to_str().expect("utf8 path")); + child.stdin(std::process::Stdio::piped()); + child.stdout(std::process::Stdio::piped()); + child.stderr(std::process::Stdio::piped()); + let large_input = vec![b'x'; 2 * 1024 * 1024]; + + // when + let output = child + .output_with_stdin(&large_input) + .expect("broken pipe should be tolerated"); + + // then + assert!( + output.status.success(), + "child should still exit cleanly: {output:?}" + ); + assert_eq!( + String::from_utf8_lossy(&output.stdout).trim(), + "stdin closed early" + ); + + let _ = fs::remove_dir_all(root); + } } diff --git a/rust/crates/plugins/src/lib.rs b/rust/crates/plugins/src/lib.rs index 765c0ac242..6671c38af7 100644 --- a/rust/crates/plugins/src/lib.rs +++ b/rust/crates/plugins/src/lib.rs @@ -2327,6 +2327,37 @@ mod tests { fs::write(path, contents).expect("write file"); } + fn make_executable(path: &Path) { + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + + let mut permissions = fs::metadata(path).expect("metadata").permissions(); + permissions.set_mode(0o755); + fs::set_permissions(path, permissions).expect("chmod"); + } + #[cfg(not(unix))] + let _ = path; + } + + fn script_name(stem: &str) -> String { + if cfg!(windows) { + format!("{stem}.cmd") + } else { + format!("{stem}.sh") + } + } + + fn write_script(path: &Path, unix_contents: &str, windows_contents: &str) { + let contents = if cfg!(windows) { + windows_contents + } else { + unix_contents + }; + write_file(path, contents); + make_executable(path); + } + fn write_loader_plugin(root: &Path) { write_file( root.join("hooks").join("pre.sh").as_path(), @@ -2426,18 +2457,22 @@ mod tests { fn write_lifecycle_plugin(root: &Path, name: &str, version: &str) -> PathBuf { let log_path = root.join("lifecycle.log"); - write_file( - root.join("lifecycle").join("init.sh").as_path(), + let init_script = script_name("init"); + let shutdown_script = script_name("shutdown"); + write_script( + root.join("lifecycle").join(&init_script).as_path(), "#!/bin/sh\nprintf 'init\\n' >> lifecycle.log\n", + "@echo off\r\n>> lifecycle.log echo init\r\n", ); - write_file( - root.join("lifecycle").join("shutdown.sh").as_path(), + write_script( + root.join("lifecycle").join(&shutdown_script).as_path(), "#!/bin/sh\nprintf 'shutdown\\n' >> lifecycle.log\n", + "@echo off\r\n>> lifecycle.log echo shutdown\r\n", ); write_file( root.join(MANIFEST_RELATIVE_PATH).as_path(), format!( - "{{\n \"name\": \"{name}\",\n \"version\": \"{version}\",\n \"description\": \"lifecycle plugin\",\n \"lifecycle\": {{\n \"Init\": [\"./lifecycle/init.sh\"],\n \"Shutdown\": [\"./lifecycle/shutdown.sh\"]\n }}\n}}" + "{{\n \"name\": \"{name}\",\n \"version\": \"{version}\",\n \"description\": \"lifecycle plugin\",\n \"lifecycle\": {{\n \"Init\": [\"./lifecycle/{init_script}\"],\n \"Shutdown\": [\"./lifecycle/{shutdown_script}\"]\n }}\n}}" ) .as_str(), ); @@ -2449,23 +2484,17 @@ mod tests { } fn write_tool_plugin_with_name(root: &Path, name: &str, version: &str, tool_name: &str) { - let script_path = root.join("tools").join("echo-json.sh"); - write_file( + let script_file = script_name("echo-json"); + let script_path = root.join("tools").join(&script_file); + write_script( &script_path, "#!/bin/sh\nINPUT=$(cat)\nprintf '{\"plugin\":\"%s\",\"tool\":\"%s\",\"input\":%s}\\n' \"$CLAWD_PLUGIN_ID\" \"$CLAWD_TOOL_NAME\" \"$INPUT\"\n", + "@echo off\r\nsetlocal EnableDelayedExpansion\r\nset /p INPUT=\r\necho {\"plugin\":\"%CLAWD_PLUGIN_ID%\",\"tool\":\"%CLAWD_TOOL_NAME%\",\"input\":%INPUT%}\r\n", ); - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - - let mut permissions = fs::metadata(&script_path).expect("metadata").permissions(); - permissions.set_mode(0o755); - fs::set_permissions(&script_path, permissions).expect("chmod"); - } write_file( root.join(MANIFEST_RELATIVE_PATH).as_path(), format!( - "{{\n \"name\": \"{name}\",\n \"version\": \"{version}\",\n \"description\": \"tool plugin\",\n \"tools\": [\n {{\n \"name\": \"{tool_name}\",\n \"description\": \"Echo JSON input\",\n \"inputSchema\": {{\"type\": \"object\", \"properties\": {{\"message\": {{\"type\": \"string\"}}}}, \"required\": [\"message\"], \"additionalProperties\": false}},\n \"command\": \"./tools/echo-json.sh\",\n \"requiredPermission\": \"workspace-write\"\n }}\n ]\n}}" + "{{\n \"name\": \"{name}\",\n \"version\": \"{version}\",\n \"description\": \"tool plugin\",\n \"tools\": [\n {{\n \"name\": \"{tool_name}\",\n \"description\": \"Echo JSON input\",\n \"inputSchema\": {{\"type\": \"object\", \"properties\": {{\"message\": {{\"type\": \"string\"}}}}, \"required\": [\"message\"], \"additionalProperties\": false}},\n \"command\": \"./tools/{script_file}\",\n \"requiredPermission\": \"workspace-write\"\n }}\n ]\n}}" ) .as_str(), ); @@ -3417,7 +3446,7 @@ mod tests { registry.shutdown().expect("shutdown should succeed"); let log = fs::read_to_string(&log_path).expect("lifecycle log should exist"); - assert_eq!(log, "init\nshutdown\n"); + assert_eq!(log.replace("\r\n", "\n"), "init\nshutdown\n"); let _ = fs::remove_dir_all(config_home); let _ = fs::remove_dir_all(source_root); @@ -3614,7 +3643,7 @@ mod tests { if registry.initialize().is_ok() && registry.shutdown().is_ok() { // Verify lifecycle.log exists and has expected content if let Ok(log) = fs::read_to_string(&log_path) { - if log == "init\nshutdown\n" { + if log.replace("\r\n", "\n") == "init\nshutdown\n" { success_count.fetch_add(1, AtomicOrdering::Relaxed); } } diff --git a/rust/crates/runtime/Cargo.toml b/rust/crates/runtime/Cargo.toml index b1bd04f374..a97d2e90e3 100644 --- a/rust/crates/runtime/Cargo.toml +++ b/rust/crates/runtime/Cargo.toml @@ -8,12 +8,14 @@ publish.workspace = true [dependencies] sha2 = "0.10" glob = "0.3" +getrandom = "0.3" plugins = { path = "../plugins" } regex = "1" serde = { version = "1", features = ["derive"] } serde_json.workspace = true telemetry = { path = "../telemetry" } tokio = { version = "1", features = ["io-std", "io-util", "macros", "process", "rt", "rt-multi-thread", "time"] } +toml = "0.8" walkdir = "2" [lints] diff --git a/rust/crates/runtime/src/bash.rs b/rust/crates/runtime/src/bash.rs index aad27f6662..6b2c0c9247 100644 --- a/rust/crates/runtime/src/bash.rs +++ b/rust/crates/runtime/src/bash.rs @@ -206,6 +206,18 @@ fn prepare_command( prepared.env("HOME", cwd.join(".sandbox-home")); prepared.env("TMPDIR", cwd.join(".sandbox-tmp")); } + #[cfg(windows)] + { + if !posix_shell_available() { + let mut prepared = Command::new("powershell"); + prepared.args(windows_shell_args(command)).current_dir(cwd); + if sandbox_status.filesystem_active { + prepared.env("HOME", cwd.join(".sandbox-home")); + prepared.env("TMPDIR", cwd.join(".sandbox-tmp")); + } + return prepared; + } + } prepared } @@ -233,6 +245,18 @@ fn prepare_tokio_command( prepared.env("HOME", cwd.join(".sandbox-home")); prepared.env("TMPDIR", cwd.join(".sandbox-tmp")); } + #[cfg(windows)] + { + if !posix_shell_available() { + let mut prepared = TokioCommand::new("powershell"); + prepared.args(windows_shell_args(command)).current_dir(cwd); + if sandbox_status.filesystem_active { + prepared.env("HOME", cwd.join(".sandbox-home")); + prepared.env("TMPDIR", cwd.join(".sandbox-tmp")); + } + return prepared; + } + } prepared } @@ -241,6 +265,114 @@ fn prepare_sandbox_dirs(cwd: &std::path::Path) { let _ = std::fs::create_dir_all(cwd.join(".sandbox-tmp")); } +#[cfg(windows)] +fn posix_shell_available() -> bool { + use std::sync::OnceLock; + + static HAS_SH: OnceLock = OnceLock::new(); + *HAS_SH.get_or_init(|| { + Command::new("sh") + .arg("-lc") + .arg("printf ok") + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .is_ok_and(|status| status.success()) + }) +} + +#[cfg(windows)] +fn windows_shell_args(command: &str) -> Vec { + if let Some(script) = translate_posix_snippet_to_powershell(command) { + vec![ + "-NoProfile".to_string(), + "-EncodedCommand".to_string(), + encode_powershell(&script), + ] + } else { + vec![ + "-NoProfile".to_string(), + "-Command".to_string(), + command.to_string(), + ] + } +} + +#[cfg(windows)] +fn translate_posix_snippet_to_powershell(command: &str) -> Option { + if let Some((text, exit_code)) = command + .strip_prefix("printf '") + .and_then(|rest| rest.split_once("'; exit ")) + { + return Some(format!( + "[Console]::Out.Write({}); exit {exit_code}", + powershell_literal(text) + )); + } + + if let Some((text, exit_code)) = command + .strip_prefix("printf '") + .and_then(|rest| rest.split_once("' >&2; exit ")) + { + return Some(format!( + "[Console]::Error.Write({}); exit {exit_code}", + powershell_literal(text) + )); + } + + if let Some(text) = command + .strip_prefix("printf '") + .and_then(|rest| rest.strip_suffix('\'')) + { + return Some(format!( + "[Console]::Out.Write({})", + powershell_literal(text) + )); + } + + None +} + +#[cfg(windows)] +fn powershell_literal(value: &str) -> String { + format!("'{}'", value.replace('\'', "''")) +} + +#[cfg(windows)] +fn encode_powershell(script: &str) -> String { + let bytes: Vec = script.encode_utf16().flat_map(u16::to_le_bytes).collect(); + encode_base64(&bytes) +} + +#[cfg(windows)] +fn encode_base64(bytes: &[u8]) -> String { + const TABLE: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + let mut encoded = String::with_capacity(bytes.len().div_ceil(3) * 4); + + for chunk in bytes.chunks(3) { + let b0 = chunk[0]; + let b1 = *chunk.get(1).unwrap_or(&0); + let b2 = *chunk.get(2).unwrap_or(&0); + let n = (u32::from(b0) << 16) | (u32::from(b1) << 8) | u32::from(b2); + + encoded.push(TABLE[((n >> 18) & 0x3F) as usize] as char); + encoded.push(TABLE[((n >> 12) & 0x3F) as usize] as char); + encoded.push(if chunk.len() > 1 { + TABLE[((n >> 6) & 0x3F) as usize] as char + } else { + '=' + }); + encoded.push(if chunk.len() > 2 { + TABLE[(n & 0x3F) as usize] as char + } else { + '=' + }); + } + + encoded +} + #[cfg(test)] mod tests { use super::{execute_bash, BashCommandInput}; diff --git a/rust/crates/runtime/src/compact.rs b/rust/crates/runtime/src/compact.rs index 3e805dda96..6daf7297f6 100644 --- a/rust/crates/runtime/src/compact.rs +++ b/rust/crates/runtime/src/compact.rs @@ -212,7 +212,7 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String { .filter_map(|block| match block { ContentBlock::ToolUse { name, .. } => Some(name.as_str()), ContentBlock::ToolResult { tool_name, .. } => Some(tool_name.as_str()), - ContentBlock::Text { .. } => None, + ContentBlock::Text { .. } | ContentBlock::VerificationReport { .. } => None, }) .collect::>(); tool_names.sort_unstable(); @@ -266,6 +266,7 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String { MessageRole::User => "user", MessageRole::Assistant => "assistant", MessageRole::Tool => "tool", + MessageRole::Verification => "verification", }; let content = message .blocks @@ -327,6 +328,16 @@ fn summarize_block(block: &ContentBlock) -> String { "tool_result {tool_name}: {}{output}", if *is_error { "error " } else { "" } ), + ContentBlock::VerificationReport { + phase, + status, + summary_text, + .. + } => format!( + "verification {} {}: {summary_text}", + phase.as_str(), + status.as_str() + ), }; truncate_summary(&raw, 160) } @@ -378,6 +389,7 @@ fn collect_key_files(messages: &[ConversationMessage]) -> Vec { ContentBlock::Text { text } => text.as_str(), ContentBlock::ToolUse { input, .. } => input.as_str(), ContentBlock::ToolResult { output, .. } => output.as_str(), + ContentBlock::VerificationReport { summary_text, .. } => summary_text.as_str(), }) .flat_map(extract_file_candidates) .collect::>(); @@ -400,6 +412,7 @@ fn first_text_block(message: &ConversationMessage) -> Option<&str> { ContentBlock::Text { text } if !text.trim().is_empty() => Some(text.as_str()), ContentBlock::ToolUse { .. } | ContentBlock::ToolResult { .. } + | ContentBlock::VerificationReport { .. } | ContentBlock::Text { .. } => None, }) } @@ -450,6 +463,7 @@ fn estimate_message_tokens(message: &ConversationMessage) -> usize { ContentBlock::ToolResult { tool_name, output, .. } => (tool_name.len() + output.len()) / 4 + 1, + ContentBlock::VerificationReport { summary_text, .. } => summary_text.len() / 4 + 1, }) .sum() } diff --git a/rust/crates/runtime/src/config.rs b/rust/crates/runtime/src/config.rs index c1fe4967a9..e5ce013b84 100644 --- a/rust/crates/runtime/src/config.rs +++ b/rust/crates/runtime/src/config.rs @@ -8,6 +8,7 @@ use crate::sandbox::{FilesystemIsolationMode, SandboxConfig}; /// Schema name advertised by generated settings files. pub const CLAW_SETTINGS_SCHEMA_NAME: &str = "SettingsSchema"; +const VERIFIER_AUTO_ENV_VAR: &str = "CLAUDE_CODE_VERIFIER_AUTO"; /// Origin of a loaded settings file in the configuration precedence chain. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] @@ -65,6 +66,154 @@ pub struct RuntimeFeatureConfig { sandbox: SandboxConfig, provider_fallbacks: ProviderFallbackConfig, trusted_roots: Vec, + verifier: RuntimeVerifierConfig, +} + +/// Settings for the post-edit self-verification loop. +/// +/// When enabled, the runtime runs staged multi-language verification for +/// successful edits and can block turn completion on a final validation gate. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RuntimeVerifierMode { + Legacy, + Staged, + Auto, +} + +impl RuntimeVerifierMode { + #[must_use] + pub fn as_str(self) -> &'static str { + match self { + Self::Legacy => "legacy", + Self::Staged => "staged", + Self::Auto => "auto", + } + } +} + +#[allow(clippy::struct_excessive_bools)] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RuntimeVerifierConfig { + enabled: bool, + mode: RuntimeVerifierMode, + quick_on_write: bool, + final_gate: bool, + max_output_bytes: usize, + run_check: bool, + run_clippy: bool, + run_fmt: bool, + run_test: bool, + timeout_secs: u64, + node_enabled: bool, + node_timeout_secs: u64, + python_enabled: bool, + python_timeout_secs: u64, +} + +impl Default for RuntimeVerifierConfig { + fn default() -> Self { + Self { + enabled: false, + mode: RuntimeVerifierMode::Legacy, + quick_on_write: true, + final_gate: false, + max_output_bytes: 2_048, + run_check: true, + run_clippy: true, + run_fmt: true, + run_test: true, + timeout_secs: 120, + node_enabled: true, + node_timeout_secs: 120, + python_enabled: true, + python_timeout_secs: 120, + } + } +} + +impl RuntimeVerifierConfig { + #[must_use] + pub fn enabled(&self) -> bool { + self.enabled + } + + #[must_use] + pub fn mode(&self) -> RuntimeVerifierMode { + self.mode + } + + #[must_use] + pub fn staged(&self) -> bool { + matches!( + self.mode, + RuntimeVerifierMode::Staged | RuntimeVerifierMode::Auto + ) + } + + #[must_use] + pub fn auto(&self) -> bool { + self.mode == RuntimeVerifierMode::Auto + } + + #[must_use] + pub fn quick_on_write(&self) -> bool { + self.quick_on_write + } + + #[must_use] + pub fn final_gate(&self) -> bool { + self.final_gate + } + + #[must_use] + pub fn max_output_bytes(&self) -> usize { + self.max_output_bytes + } + + #[must_use] + pub fn run_check(&self) -> bool { + self.run_check + } + + #[must_use] + pub fn run_clippy(&self) -> bool { + self.run_clippy + } + + #[must_use] + pub fn run_fmt(&self) -> bool { + self.run_fmt + } + + #[must_use] + pub fn run_test(&self) -> bool { + self.run_test + } + + #[must_use] + pub fn timeout_secs(&self) -> u64 { + self.timeout_secs + } + + #[must_use] + pub fn node_enabled(&self) -> bool { + self.node_enabled + } + + #[must_use] + pub fn node_timeout_secs(&self) -> u64 { + self.node_timeout_secs + } + + #[must_use] + pub fn python_enabled(&self) -> bool { + self.python_enabled + } + + #[must_use] + pub fn python_timeout_secs(&self) -> u64 { + self.python_timeout_secs + } } /// Ordered chain of fallback model identifiers used when the primary @@ -315,6 +464,7 @@ impl ConfigLoader { sandbox: parse_optional_sandbox_config(&merged_value)?, provider_fallbacks: parse_optional_provider_fallbacks(&merged_value)?, trusted_roots: parse_optional_trusted_roots(&merged_value)?, + verifier: parse_optional_verifier_config(&merged_value)?, }; Ok(RuntimeConfig { @@ -414,6 +564,11 @@ impl RuntimeConfig { pub fn trusted_roots(&self) -> &[String] { &self.feature_config.trusted_roots } + + #[must_use] + pub fn verifier(&self) -> &RuntimeVerifierConfig { + &self.feature_config.verifier + } } impl RuntimeFeatureConfig { @@ -483,6 +638,17 @@ impl RuntimeFeatureConfig { pub fn trusted_roots(&self) -> &[String] { &self.trusted_roots } + + #[must_use] + pub fn verifier(&self) -> &RuntimeVerifierConfig { + &self.verifier + } + + #[must_use] + pub fn with_verifier(mut self, verifier: RuntimeVerifierConfig) -> Self { + self.verifier = verifier; + self + } } impl ProviderFallbackConfig { @@ -777,6 +943,119 @@ fn validate_optional_hooks_config( parse_optional_hooks_config_object(root, &format!("{}: hooks", path.display())).map(|_| ()) } +fn parse_optional_verifier_config(root: &JsonValue) -> Result { + let Some(object) = root.as_object() else { + return Ok(default_verifier_config_with_env()); + }; + let Some(verifier_value) = object.get("verifier") else { + return Ok(default_verifier_config_with_env()); + }; + let verifier = expect_object(verifier_value, "merged settings.verifier")?; + + let mut config = RuntimeVerifierConfig::default(); + let mut enabled_explicit = false; + let mut mode_explicit = false; + let mut final_gate_explicit = false; + if let Some(enabled) = optional_bool(verifier, "enabled", "merged settings.verifier")? { + enabled_explicit = true; + config.enabled = enabled; + } + if let Some(mode) = optional_string(verifier, "mode", "merged settings.verifier")? { + mode_explicit = true; + config.mode = match mode { + "legacy" => RuntimeVerifierMode::Legacy, + "staged" => RuntimeVerifierMode::Staged, + "auto" => RuntimeVerifierMode::Auto, + other => { + return Err(ConfigError::Parse(format!( + "merged settings.verifier.mode must be legacy, staged, or auto, got `{other}`" + ))) + } + }; + } + if let Some(quick_on_write) = + optional_bool(verifier, "quickOnWrite", "merged settings.verifier")? + { + config.quick_on_write = quick_on_write; + } + if let Some(final_gate) = optional_bool(verifier, "finalGate", "merged settings.verifier")? { + final_gate_explicit = true; + config.final_gate = final_gate; + if final_gate && config.mode == RuntimeVerifierMode::Legacy { + config.mode = RuntimeVerifierMode::Staged; + } + } + if let Some(max_output_bytes) = + optional_u64(verifier, "maxOutputBytes", "merged settings.verifier")? + { + config.max_output_bytes = usize::try_from(max_output_bytes).map_err(|_| { + ConfigError::Parse( + "merged settings.verifier.maxOutputBytes is out of range".to_string(), + ) + })?; + } + if let Some(cargo_value) = verifier.get("cargo") { + let cargo = expect_object(cargo_value, "merged settings.verifier.cargo")?; + if let Some(v) = optional_bool(cargo, "check", "merged settings.verifier.cargo")? { + config.run_check = v; + } + if let Some(v) = optional_bool(cargo, "clippy", "merged settings.verifier.cargo")? { + config.run_clippy = v; + } + if let Some(v) = optional_bool(cargo, "fmt", "merged settings.verifier.cargo")? { + config.run_fmt = v; + } + if let Some(v) = optional_bool(cargo, "test", "merged settings.verifier.cargo")? { + config.run_test = v; + } + if let Some(v) = optional_u64(cargo, "timeoutSecs", "merged settings.verifier.cargo")? { + config.timeout_secs = v; + } + } + if let Some(node_value) = verifier.get("node") { + let node = expect_object(node_value, "merged settings.verifier.node")?; + if let Some(v) = optional_bool(node, "enabled", "merged settings.verifier.node")? { + config.node_enabled = v; + } + if let Some(v) = optional_u64(node, "timeoutSecs", "merged settings.verifier.node")? { + config.node_timeout_secs = v; + } + } + if let Some(python_value) = verifier.get("python") { + let python = expect_object(python_value, "merged settings.verifier.python")?; + if let Some(v) = optional_bool(python, "enabled", "merged settings.verifier.python")? { + config.python_enabled = v; + } + if let Some(v) = optional_u64(python, "timeoutSecs", "merged settings.verifier.python")? { + config.python_timeout_secs = v; + } + } + if verifier_auto_env_enabled() && !enabled_explicit && !mode_explicit { + config.enabled = true; + config.mode = RuntimeVerifierMode::Auto; + if !final_gate_explicit { + config.final_gate = true; + } + } + Ok(config) +} + +fn default_verifier_config_with_env() -> RuntimeVerifierConfig { + let mut config = RuntimeVerifierConfig::default(); + if verifier_auto_env_enabled() { + config.enabled = true; + config.mode = RuntimeVerifierMode::Auto; + config.final_gate = true; + } + config +} + +fn verifier_auto_env_enabled() -> bool { + std::env::var(VERIFIER_AUTO_ENV_VAR) + .ok() + .is_some_and(|value| matches!(value.trim(), "1" | "true" | "TRUE" | "True")) +} + fn parse_optional_permission_rules( root: &JsonValue, ) -> Result { @@ -1246,11 +1525,12 @@ mod tests { use super::{ deep_merge_objects, parse_permission_mode_label, ConfigLoader, ConfigSource, McpServerConfig, McpTransport, ResolvedPermissionMode, RuntimeHookConfig, - RuntimePluginConfig, CLAW_SETTINGS_SCHEMA_NAME, + RuntimePluginConfig, RuntimeVerifierMode, CLAW_SETTINGS_SCHEMA_NAME, VERIFIER_AUTO_ENV_VAR, }; use crate::json::JsonValue; use crate::sandbox::FilesystemIsolationMode; use std::fs; + use std::sync::{Mutex, OnceLock}; use std::time::{SystemTime, UNIX_EPOCH}; fn temp_dir() -> std::path::PathBuf { @@ -1261,6 +1541,11 @@ mod tests { std::env::temp_dir().join(format!("runtime-config-{nanos}")) } + fn env_lock() -> &'static Mutex<()> { + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| Mutex::new(())) + } + #[test] fn rejects_non_object_settings_files() { let root = temp_dir(); @@ -2108,4 +2393,220 @@ mod tests { fs::remove_dir_all(root).expect("cleanup temp dir"); } + + #[test] + fn loads_staged_multi_language_verifier_settings() { + let root = temp_dir(); + let cwd = root.join("project"); + let home = root.join("home").join(".claw"); + let user_settings = home.join("settings.json"); + fs::create_dir_all(&home).expect("home config dir"); + fs::create_dir_all(&cwd).expect("project dir"); + fs::write( + &user_settings, + r#"{ + "verifier": { + "enabled": true, + "mode": "staged", + "quickOnWrite": false, + "finalGate": true, + "maxOutputBytes": 4096, + "cargo": { + "check": true, + "clippy": false, + "fmt": true, + "test": false, + "timeoutSecs": 33 + }, + "node": { + "enabled": false, + "timeoutSecs": 44 + }, + "python": { + "enabled": true, + "timeoutSecs": 55 + } + } +}"#, + ) + .expect("write user settings"); + + let config = ConfigLoader::new(&cwd, &home) + .load() + .expect("config should parse"); + let verifier = config.verifier(); + + assert!(verifier.enabled()); + assert_eq!(verifier.mode(), RuntimeVerifierMode::Staged); + assert!(!verifier.quick_on_write()); + assert!(verifier.final_gate()); + assert_eq!(verifier.max_output_bytes(), 4096); + assert!(verifier.run_check()); + assert!(!verifier.run_clippy()); + assert!(verifier.run_fmt()); + assert!(!verifier.run_test()); + assert_eq!(verifier.timeout_secs(), 33); + assert!(!verifier.node_enabled()); + assert_eq!(verifier.node_timeout_secs(), 44); + assert!(verifier.python_enabled()); + assert_eq!(verifier.python_timeout_secs(), 55); + + fs::remove_dir_all(root).expect("cleanup temp dir"); + } + + #[test] + fn final_gate_promotes_legacy_default_to_staged() { + let root = temp_dir(); + let cwd = root.join("project"); + let home = root.join("home").join(".claw"); + let user_settings = home.join("settings.json"); + fs::create_dir_all(&home).expect("home config dir"); + fs::create_dir_all(&cwd).expect("project dir"); + fs::write( + &user_settings, + r#"{ + "verifier": { + "enabled": true, + "finalGate": true + } +}"#, + ) + .expect("write user settings"); + + let config = ConfigLoader::new(&cwd, &home) + .load() + .expect("config should parse"); + + assert_eq!(config.verifier().mode(), RuntimeVerifierMode::Staged); + assert!(config.verifier().staged()); + assert!(config.verifier().final_gate()); + + fs::remove_dir_all(root).expect("cleanup temp dir"); + } + + #[test] + fn parses_auto_verifier_mode_from_config() { + let root = temp_dir(); + let cwd = root.join("project"); + let home = root.join("home").join(".claw"); + let user_settings = home.join("settings.json"); + fs::create_dir_all(&home).expect("home config dir"); + fs::create_dir_all(&cwd).expect("project dir"); + fs::write( + &user_settings, + r#"{ + "verifier": { + "enabled": true, + "mode": "auto", + "finalGate": true + } +}"#, + ) + .expect("write user settings"); + + let config = ConfigLoader::new(&cwd, &home) + .load() + .expect("config should parse"); + + assert_eq!(config.verifier().mode(), RuntimeVerifierMode::Auto); + assert!(config.verifier().staged()); + assert!(config.verifier().auto()); + assert!(config.verifier().final_gate()); + + fs::remove_dir_all(root).expect("cleanup temp dir"); + } + + #[test] + fn verifier_auto_env_enables_auto_mode_when_config_is_implicit() { + let _guard = env_lock() + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + std::env::set_var(VERIFIER_AUTO_ENV_VAR, "1"); + let root = temp_dir(); + let cwd = root.join("project"); + let home = root.join("home").join(".claw"); + fs::create_dir_all(&home).expect("home config dir"); + fs::create_dir_all(&cwd).expect("project dir"); + fs::write(home.join("settings.json"), "{}").expect("write empty settings"); + + let config = ConfigLoader::new(&cwd, &home) + .load() + .expect("config should parse"); + + assert!(config.verifier().enabled()); + assert_eq!(config.verifier().mode(), RuntimeVerifierMode::Auto); + assert!(config.verifier().final_gate()); + + std::env::remove_var(VERIFIER_AUTO_ENV_VAR); + fs::remove_dir_all(root).expect("cleanup temp dir"); + } + + #[test] + fn verifier_auto_env_does_not_override_explicit_disable() { + let _guard = env_lock() + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + std::env::set_var(VERIFIER_AUTO_ENV_VAR, "1"); + let root = temp_dir(); + let cwd = root.join("project"); + let home = root.join("home").join(".claw"); + let user_settings = home.join("settings.json"); + fs::create_dir_all(&home).expect("home config dir"); + fs::create_dir_all(&cwd).expect("project dir"); + fs::write( + &user_settings, + r#"{ + "verifier": { + "enabled": false + } +}"#, + ) + .expect("write settings"); + + let config = ConfigLoader::new(&cwd, &home) + .load() + .expect("config should parse"); + + assert!(!config.verifier().enabled()); + assert_eq!(config.verifier().mode(), RuntimeVerifierMode::Legacy); + + std::env::remove_var(VERIFIER_AUTO_ENV_VAR); + fs::remove_dir_all(root).expect("cleanup temp dir"); + } + + #[test] + fn verifier_auto_env_does_not_override_explicit_mode() { + let _guard = env_lock() + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + std::env::set_var(VERIFIER_AUTO_ENV_VAR, "1"); + let root = temp_dir(); + let cwd = root.join("project"); + let home = root.join("home").join(".claw"); + let user_settings = home.join("settings.json"); + fs::create_dir_all(&home).expect("home config dir"); + fs::create_dir_all(&cwd).expect("project dir"); + fs::write( + &user_settings, + r#"{ + "verifier": { + "enabled": true, + "mode": "staged", + "finalGate": false + } +}"#, + ) + .expect("write settings"); + + let config = ConfigLoader::new(&cwd, &home) + .load() + .expect("config should parse"); + + assert_eq!(config.verifier().mode(), RuntimeVerifierMode::Staged); + assert!(!config.verifier().auto()); + assert!(!config.verifier().final_gate()); + + std::env::remove_var(VERIFIER_AUTO_ENV_VAR); + fs::remove_dir_all(root).expect("cleanup temp dir"); + } } diff --git a/rust/crates/runtime/src/config_validate.rs b/rust/crates/runtime/src/config_validate.rs index 7a9c1c4adc..bea3a46027 100644 --- a/rust/crates/runtime/src/config_validate.rs +++ b/rust/crates/runtime/src/config_validate.rs @@ -197,6 +197,90 @@ const TOP_LEVEL_FIELDS: &[FieldSpec] = &[ name: "trustedRoots", expected: FieldType::StringArray, }, + FieldSpec { + name: "verifier", + expected: FieldType::Object, + }, +]; + +const VERIFIER_FIELDS: &[FieldSpec] = &[ + FieldSpec { + name: "enabled", + expected: FieldType::Bool, + }, + FieldSpec { + name: "mode", + expected: FieldType::String, + }, + FieldSpec { + name: "quickOnWrite", + expected: FieldType::Bool, + }, + FieldSpec { + name: "finalGate", + expected: FieldType::Bool, + }, + FieldSpec { + name: "maxOutputBytes", + expected: FieldType::Number, + }, + FieldSpec { + name: "cargo", + expected: FieldType::Object, + }, + FieldSpec { + name: "node", + expected: FieldType::Object, + }, + FieldSpec { + name: "python", + expected: FieldType::Object, + }, +]; + +const VERIFIER_CARGO_FIELDS: &[FieldSpec] = &[ + FieldSpec { + name: "check", + expected: FieldType::Bool, + }, + FieldSpec { + name: "clippy", + expected: FieldType::Bool, + }, + FieldSpec { + name: "fmt", + expected: FieldType::Bool, + }, + FieldSpec { + name: "test", + expected: FieldType::Bool, + }, + FieldSpec { + name: "timeoutSecs", + expected: FieldType::Number, + }, +]; + +const VERIFIER_NODE_FIELDS: &[FieldSpec] = &[ + FieldSpec { + name: "enabled", + expected: FieldType::Bool, + }, + FieldSpec { + name: "timeoutSecs", + expected: FieldType::Number, + }, +]; + +const VERIFIER_PYTHON_FIELDS: &[FieldSpec] = &[ + FieldSpec { + name: "enabled", + expected: FieldType::Bool, + }, + FieldSpec { + name: "timeoutSecs", + expected: FieldType::Number, + }, ]; const HOOKS_FIELDS: &[FieldSpec] = &[ @@ -501,6 +585,42 @@ pub fn validate_config_file( &path_display, )); } + if let Some(verifier) = object.get("verifier").and_then(JsonValue::as_object) { + result.merge(validate_object_keys( + verifier, + VERIFIER_FIELDS, + "verifier", + source, + &path_display, + )); + if let Some(cargo) = verifier.get("cargo").and_then(JsonValue::as_object) { + result.merge(validate_object_keys( + cargo, + VERIFIER_CARGO_FIELDS, + "verifier.cargo", + source, + &path_display, + )); + } + if let Some(node) = verifier.get("node").and_then(JsonValue::as_object) { + result.merge(validate_object_keys( + node, + VERIFIER_NODE_FIELDS, + "verifier.node", + source, + &path_display, + )); + } + if let Some(python) = verifier.get("python").and_then(JsonValue::as_object) { + result.merge(validate_object_keys( + python, + VERIFIER_PYTHON_FIELDS, + "verifier.python", + source, + &path_display, + )); + } + } result } diff --git a/rust/crates/runtime/src/conversation.rs b/rust/crates/runtime/src/conversation.rs index 610ba1a879..acdeb9da72 100644 --- a/rust/crates/runtime/src/conversation.rs +++ b/rust/crates/runtime/src/conversation.rs @@ -1,5 +1,7 @@ -use std::collections::BTreeMap; +use std::collections::{BTreeMap, BTreeSet}; use std::fmt::{Display, Formatter}; +use std::hash::{Hash, Hasher}; +use std::path::PathBuf; use serde_json::{Map, Value}; use telemetry::SessionTracer; @@ -14,9 +16,16 @@ use crate::permissions::{ }; use crate::session::{ContentBlock, ConversationMessage, Session}; use crate::usage::{TokenUsage, UsageTracker}; +use crate::verifier::{ + prepend_verifier_summary, VerificationContext, VerificationFailureKind, VerificationGateStatus, + VerificationPhase, VerificationReport, VerificationStatus, VerificationStepReport, Verifier, +}; + +const MAX_FINAL_GATE_ATTEMPTS: u32 = 5; const DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD: u32 = 100_000; const AUTO_COMPACTION_THRESHOLD_ENV_VAR: &str = "CLAUDE_CODE_AUTO_COMPACT_INPUT_TOKENS"; +const VERIFIER_REPORT_STAGE_ENV_VAR: &str = "CLAUDE_CODE_VERIFIER_REPORT_STAGE"; /// Fully assembled request payload sent to the upstream model client. #[derive(Debug, Clone, PartialEq, Eq)] @@ -110,6 +119,8 @@ impl std::error::Error for RuntimeError {} pub struct TurnSummary { pub assistant_messages: Vec, pub tool_results: Vec, + pub verification_reports: Vec, + pub verification_gate: VerificationGateStatus, pub prompt_cache_events: Vec, pub iterations: usize, pub usage: TokenUsage, @@ -122,6 +133,35 @@ pub struct AutoCompactionEvent { pub removed_message_count: usize, } +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +struct VerificationLedgerKey { + adapter_id: String, + project_root: PathBuf, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct VerificationLedgerEntry { + adapter_id: String, + project_root: PathBuf, + touched_paths: BTreeSet, + last_mutation_sequence: u64, + last_quick_status: Option, + last_final_status: Option, + last_final_verified_sequence: Option, +} + +impl VerificationLedgerEntry { + fn update_from_report(&mut self, report: &VerificationReport, mutation_sequence: u64) { + self.last_mutation_sequence = mutation_sequence; + self.last_quick_status = Some(report.status); + self.last_final_status = None; + self.last_final_verified_sequence = None; + for path in &report.touched_paths { + self.touched_paths.insert(path.clone()); + } + } +} + /// Coordinates the model loop, tool execution, hooks, and session updates. pub struct ConversationRuntime { session: Session, @@ -136,6 +176,7 @@ pub struct ConversationRuntime { hook_abort_signal: HookAbortSignal, hook_progress_reporter: Option>, session_tracer: Option, + verifier: Option>, } impl ConversationRuntime @@ -185,6 +226,7 @@ where hook_abort_signal: HookAbortSignal::default(), hook_progress_reporter: None, session_tracer: None, + verifier: None, } } @@ -221,6 +263,12 @@ where self } + #[must_use] + pub fn with_verifier(mut self, verifier: Box) -> Self { + self.verifier = Some(verifier); + self + } + fn run_pre_tool_use_hook(&mut self, tool_name: &str, input: &str) -> HookRunResult { if let Some(reporter) = self.hook_progress_reporter.as_mut() { self.hook_runner.run_pre_tool_use_with_context( @@ -310,6 +358,137 @@ where } } + fn workspace_root(&self) -> Option { + self.session.workspace_root().map(PathBuf::from) + } + + fn run_quick_verification( + &self, + tool_name: &str, + tool_input: &str, + mutation_sequence: u64, + ) -> Vec { + let Some(verifier) = self.verifier.as_ref() else { + return Vec::new(); + }; + let Some(context) = VerificationContext::from_tool_invocation( + VerificationPhase::Quick, + self.workspace_root(), + tool_name.to_string(), + tool_input.to_string(), + mutation_sequence, + ) else { + return Vec::new(); + }; + verifier.quick_verify(&context) + } + + fn persist_verification_report( + &mut self, + report: &VerificationReport, + report_mode: &str, + ) -> Result<(), RuntimeError> { + self.session + .push_message(ConversationMessage::verification_report( + report, + Some(report_mode), + )) + .map_err(|error| RuntimeError::new(error.to_string())) + } + + fn run_final_verification( + &self, + entry: &VerificationLedgerEntry, + ) -> Option { + self.verifier.as_ref().and_then(|verifier| { + verifier.final_verify(&crate::verifier::VerificationTarget { + adapter_id: entry.adapter_id.clone(), + project_root: entry.project_root.clone(), + touched_paths: entry.touched_paths.iter().cloned().collect(), + mutation_sequence: entry.last_mutation_sequence, + }) + }) + } + + fn sanitize_root_fragment(root: &std::path::Path) -> String { + root.display() + .to_string() + .chars() + .map(|ch| match ch { + '\\' | '/' | ':' | ' ' => '-', + other => other, + }) + .collect::() + } + + fn make_final_gate_reminder(entry: &VerificationLedgerEntry) -> VerificationReport { + let status = entry + .last_final_status + .unwrap_or(VerificationStatus::Failed); + VerificationReport { + report_id: format!( + "vr-reminder-{}-{}-{}", + entry.adapter_id, + Self::sanitize_root_fragment(&entry.project_root), + entry.last_mutation_sequence + ), + phase: VerificationPhase::Final, + adapter_id: entry.adapter_id.clone(), + project_root: entry.project_root.clone(), + touched_paths: entry.touched_paths.iter().cloned().collect(), + status, + summary_text: format!( + "[verifier:final:{}] {} ({})\n[verifier] final verification is still failing for the current workspace state; make another edit before concluding", + entry.adapter_id, + status.as_str(), + entry.project_root.display() + ), + steps: Vec::new(), + } + } + + fn make_final_gate_unavailable_report( + entry: &VerificationLedgerEntry, + reason: &str, + ) -> VerificationReport { + let summary_text = format!( + "[verifier:final:{}] unavailable ({})\n[verifier] {}", + entry.adapter_id, + entry.project_root.display(), + reason + ); + VerificationReport { + report_id: format!( + "vr-unavailable-{}-{}-{}", + entry.adapter_id, + Self::sanitize_root_fragment(&entry.project_root), + entry.last_mutation_sequence + ), + phase: VerificationPhase::Final, + adapter_id: entry.adapter_id.clone(), + project_root: entry.project_root.clone(), + touched_paths: entry.touched_paths.iter().cloned().collect(), + status: VerificationStatus::Unavailable, + summary_text, + steps: vec![VerificationStepReport { + adapter: entry.adapter_id.clone(), + project_root: entry.project_root.clone(), + label: "final-gate setup".to_string(), + command: String::new(), + phase: VerificationPhase::Final, + status: VerificationStatus::Unavailable, + failure_kind: Some(VerificationFailureKind::ToolUnavailable), + duration_ms: 0, + truncated_output: reason.to_string(), + step_kind: None, + target_scope: None, + package_name: None, + package_manager: None, + launcher_kind: None, + }], + } + } + #[allow(clippy::too_many_lines)] pub fn run_turn( &mut self, @@ -330,14 +509,21 @@ where } self.record_turn_started(&user_input); + let turn_started_at = std::time::Instant::now(); self.session .push_user_text(user_input) .map_err(|error| RuntimeError::new(error.to_string()))?; let mut assistant_messages = Vec::new(); let mut tool_results = Vec::new(); + let mut verification_reports = Vec::new(); + let mut verification_gate = VerificationGateStatus::not_required(); let mut prompt_cache_events = Vec::new(); let mut iterations = 0; + let mut mutation_sequence = 0_u64; + let mut verification_ledger = + BTreeMap::::new(); + let mut final_gate_attempts = BTreeMap::<(VerificationLedgerKey, u64), u32>::new(); loop { iterations += 1; @@ -394,7 +580,115 @@ where assistant_messages.push(assistant_message); if pending_tool_uses.is_empty() { - break; + let Some(verifier) = self.verifier.as_ref() else { + break; + }; + if !verifier.final_gate_enabled() { + break; + } + + let mut gate_reports = Vec::new(); + let pending_final_gate_keys = verification_ledger + .iter() + .filter_map(|(key, entry)| { + if entry.last_final_verified_sequence == Some(entry.last_mutation_sequence) + { + if entry + .last_final_status + .is_some_and(VerificationStatus::is_success) + { + None + } else { + Some((key.clone(), false)) + } + } else { + Some((key.clone(), true)) + } + }) + .collect::>(); + + if pending_final_gate_keys.is_empty() { + break; + } + + verification_gate.attempted = true; + verification_gate.passed = false; + + for (key, should_run) in pending_final_gate_keys { + let Some(entry) = verification_ledger.get(&key).cloned() else { + continue; + }; + let attempts = final_gate_attempts + .entry((key.clone(), entry.last_mutation_sequence)) + .or_insert(0); + *attempts += 1; + let attempts_now = *attempts; + if attempts_now > MAX_FINAL_GATE_ATTEMPTS { + let report = Self::make_final_gate_unavailable_report( + &entry, + &format!( + "final gate aborted after {MAX_FINAL_GATE_ATTEMPTS} attempts without convergence" + ), + ); + if let Some(ledger_entry) = verification_ledger.get_mut(&key) { + ledger_entry.last_final_status = Some(report.status); + ledger_entry.last_final_verified_sequence = + Some(ledger_entry.last_mutation_sequence); + } + let report_mode = verification_report_mode(&report); + self.record_verifier_ran( + iterations, + &format!("final_gate:{}", entry.adapter_id), + &report, + entry.last_mutation_sequence, + &report_mode, + ); + verification_gate.report_ids.push(report.report_id.clone()); + self.persist_verification_report(&report, &report_mode)?; + gate_reports.push(report); + continue; + } + let report = if should_run { + let report = + self.run_final_verification(&entry).unwrap_or_else(|| { + Self::make_final_gate_unavailable_report( + &entry, + "final verification is unavailable (no verifier configured for this target)", + ) + }); + if let Some(ledger_entry) = verification_ledger.get_mut(&key) { + ledger_entry.last_final_status = Some(report.status); + ledger_entry.last_final_verified_sequence = + Some(ledger_entry.last_mutation_sequence); + } + report + } else { + Self::make_final_gate_reminder(&entry) + }; + let report_mode = verification_report_mode(&report); + self.record_verifier_ran( + iterations, + &format!("final_gate:{}", entry.adapter_id), + &report, + entry.last_mutation_sequence, + &report_mode, + ); + verification_gate.report_ids.push(report.report_id.clone()); + self.persist_verification_report(&report, &report_mode)?; + gate_reports.push(report); + } + + if gate_reports.is_empty() { + break; + } + + let gate_passed = gate_reports.iter().all(VerificationReport::is_success); + verification_gate.passed = gate_passed; + verification_reports.extend(gate_reports); + if gate_passed { + break; + } + continue; } for (tool_use_id, tool_name, input) in pending_tool_uses { @@ -444,6 +738,7 @@ where ) }; + let mut pending_verification_reports = Vec::new(); let result_message = match permission_outcome { PermissionOutcome::Allow => { self.record_tool_started(iterations, &tool_name); @@ -482,6 +777,52 @@ where || post_hook_result.is_cancelled(), ); + if !is_error && is_write_tool(&tool_name) { + mutation_sequence += 1; + let reports = self.run_quick_verification( + &tool_name, + &effective_input, + mutation_sequence, + ); + for report in reports { + let report_mode = verification_report_mode(&report); + self.record_verifier_ran( + iterations, + &tool_name, + &report, + mutation_sequence, + &report_mode, + ); + output = prepend_verifier_summary(&report.short_summary(), output); + if !report.is_success() { + is_error = true; + } + let key = VerificationLedgerKey { + adapter_id: report.adapter_id.clone(), + project_root: report.project_root.clone(), + }; + verification_ledger + .entry(key) + .and_modify(|entry| { + entry.update_from_report(&report, mutation_sequence); + }) + .or_insert_with(|| VerificationLedgerEntry { + adapter_id: report.adapter_id.clone(), + project_root: report.project_root.clone(), + touched_paths: report + .touched_paths + .iter() + .cloned() + .collect(), + last_mutation_sequence: mutation_sequence, + last_quick_status: Some(report.status), + last_final_status: None, + last_final_verified_sequence: None, + }); + pending_verification_reports.push(report); + } + } + ConversationMessage::tool_result(tool_use_id, tool_name, output, is_error) } PermissionOutcome::Deny { reason } => ConversationMessage::tool_result( @@ -496,6 +837,11 @@ where .map_err(|error| RuntimeError::new(error.to_string()))?; self.record_tool_finished(iterations, &result_message); tool_results.push(result_message); + for report in pending_verification_reports { + let report_mode = verification_report_mode(&report); + self.persist_verification_report(&report, &report_mode)?; + verification_reports.push(report); + } } } @@ -504,12 +850,17 @@ where let summary = TurnSummary { assistant_messages, tool_results, + verification_reports, + verification_gate, prompt_cache_events, iterations, usage: self.usage_tracker.cumulative_usage(), auto_compaction, }; - self.record_turn_completed(&summary); + self.record_turn_completed( + &summary, + u64::try_from(turn_started_at.elapsed().as_millis()).unwrap_or(u64::MAX), + ); Ok(summary) } @@ -648,7 +999,7 @@ where session_tracer.record("tool_execution_finished", attributes); } - fn record_turn_completed(&self, summary: &TurnSummary) { + fn record_turn_completed(&self, summary: &TurnSummary, turn_latency_ms: u64) { let Some(session_tracer) = &self.session_tracer else { return; }; @@ -666,13 +1017,86 @@ where "tool_results".to_string(), Value::from(summary.tool_results.len() as u64), ); + attributes.insert( + "verification_reports".to_string(), + Value::from(summary.verification_reports.len() as u64), + ); + attributes.insert( + "verification_gate_attempted".to_string(), + Value::Bool(summary.verification_gate.attempted), + ); + attributes.insert( + "verification_gate_passed".to_string(), + Value::Bool(summary.verification_gate.passed), + ); attributes.insert( "prompt_cache_events".to_string(), Value::from(summary.prompt_cache_events.len() as u64), ); + attributes.insert("turn_latency_ms".to_string(), Value::from(turn_latency_ms)); + attributes.insert( + "tokens_total".to_string(), + Value::from(u64::from(summary.usage.total_tokens())), + ); session_tracer.record("turn_completed", attributes); } + fn record_verifier_ran( + &self, + iteration: usize, + tool_name: &str, + report: &VerificationReport, + mutation_sequence: u64, + report_mode: &str, + ) { + let Some(session_tracer) = &self.session_tracer else { + return; + }; + + let mut attributes = Map::new(); + attributes.insert("iteration".to_string(), Value::from(iteration as u64)); + attributes.insert( + "tool_name".to_string(), + Value::String(tool_name.to_string()), + ); + attributes.insert("passed".to_string(), Value::Bool(report.is_success())); + attributes.insert( + "adapter_id".to_string(), + Value::String(report.adapter_id.clone()), + ); + attributes.insert( + "phase".to_string(), + Value::String(report.phase.as_str().to_string()), + ); + attributes.insert( + "mutation_sequence".to_string(), + Value::from(mutation_sequence), + ); + attributes.insert( + "report_mode".to_string(), + Value::String(report_mode.to_string()), + ); + if let Some(primary_step) = report.primary_step() { + attributes.insert( + "duration_ms".to_string(), + Value::from(primary_step.duration_ms), + ); + if let Some(target_scope) = &primary_step.target_scope { + attributes.insert( + "target_scope".to_string(), + Value::String(target_scope.clone()), + ); + } + if let Some(failure_kind) = primary_step.failure_kind { + attributes.insert( + "failure_kind".to_string(), + Value::String(failure_kind.as_str().to_string()), + ); + } + } + session_tracer.record("verifier_ran", attributes); + } + fn record_turn_failed(&self, iteration: usize, error: &RuntimeError) { let Some(session_tracer) = &self.session_tracer else { return; @@ -703,6 +1127,26 @@ fn parse_auto_compaction_threshold(value: Option<&str>) -> u32 { .unwrap_or(DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD) } +fn verification_report_mode(report: &VerificationReport) -> String { + let stage = std::env::var(VERIFIER_REPORT_STAGE_ENV_VAR).map_or_else( + |_| "shadow".to_string(), + |value| value.trim().to_ascii_lowercase(), + ); + let bucket = stable_percent_bucket(&report.report_id); + match stage.as_str() { + "typed" => "typed-primary".to_string(), + "ab" if bucket < 10 => "typed-primary".to_string(), + "shadow" if bucket < 10 => "shadow".to_string(), + _ => "text-primary".to_string(), + } +} + +fn stable_percent_bucket(value: &str) -> u64 { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + value.hash(&mut hasher); + hasher.finish() % 100 +} + fn build_assistant_message( events: Vec, ) -> Result< @@ -786,6 +1230,10 @@ fn merge_hook_feedback(messages: &[String], output: String, is_error: bool) -> S sections.join("\n\n") } +fn is_write_tool(tool_name: &str) -> bool { + matches!(tool_name, "edit_file" | "write_file" | "Edit" | "Write") +} + type ToolHandler = Box Result>; /// Simple in-memory tool executor for tests and lightweight integrations. @@ -822,9 +1270,10 @@ impl ToolExecutor for StaticToolExecutor { #[cfg(test)] mod tests { use super::{ - build_assistant_message, parse_auto_compaction_threshold, ApiClient, ApiRequest, - AssistantEvent, AutoCompactionEvent, ConversationRuntime, PromptCacheEvent, RuntimeError, - StaticToolExecutor, ToolExecutor, DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD, + build_assistant_message, parse_auto_compaction_threshold, stable_percent_bucket, + verification_report_mode, ApiClient, ApiRequest, AssistantEvent, AutoCompactionEvent, + ConversationRuntime, PromptCacheEvent, RuntimeError, StaticToolExecutor, ToolExecutor, + DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD, VERIFIER_REPORT_STAGE_ENV_VAR, }; use crate::compact::CompactionConfig; use crate::config::{RuntimeFeatureConfig, RuntimeHookConfig}; @@ -835,10 +1284,15 @@ mod tests { use crate::prompt::{ProjectContext, SystemPromptBuilder}; use crate::session::{ContentBlock, MessageRole, Session}; use crate::usage::TokenUsage; + use crate::verifier::{ + VerificationContext, VerificationFailureKind, VerificationPhase, VerificationReport, + VerificationStatus, VerificationStepReport, VerificationTarget, Verifier, + }; use crate::ToolError; use std::fs; use std::path::PathBuf; use std::sync::Arc; + use std::sync::{Mutex, OnceLock}; use std::time::{SystemTime, UNIX_EPOCH}; use telemetry::{MemoryTelemetrySink, SessionTracer, TelemetryEvent}; @@ -902,6 +1356,11 @@ mod tests { } } + fn env_lock() -> &'static Mutex<()> { + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| Mutex::new(())) + } + struct PromptAllowOnce; impl PermissionPrompter for PromptAllowOnce { @@ -929,6 +1388,7 @@ mod tests { git_status: None, git_diff: None, git_context: None, + context_pack: None, instruction_files: Vec::new(), }) .with_os("linux", "6.8") @@ -1494,7 +1954,71 @@ mod tests { #[cfg(windows)] fn shell_snippet(script: &str) -> String { - script.replace('\'', "\"") + fn powershell_literal(value: &str) -> String { + format!("'{}'", value.replace('\'', "''")) + } + + fn powershell_snippet(script: &str) -> String { + format!( + "powershell -NoProfile -EncodedCommand {}", + encode_powershell(script) + ) + } + + fn encode_powershell(script: &str) -> String { + let bytes: Vec = script.encode_utf16().flat_map(u16::to_le_bytes).collect(); + encode_base64(&bytes) + } + + fn encode_base64(bytes: &[u8]) -> String { + const TABLE: &[u8; 64] = + b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + let mut encoded = String::with_capacity(bytes.len().div_ceil(3) * 4); + + for chunk in bytes.chunks(3) { + let b0 = chunk[0]; + let b1 = *chunk.get(1).unwrap_or(&0); + let b2 = *chunk.get(2).unwrap_or(&0); + let n = (u32::from(b0) << 16) | (u32::from(b1) << 8) | u32::from(b2); + + encoded.push(TABLE[((n >> 18) & 0x3F) as usize] as char); + encoded.push(TABLE[((n >> 12) & 0x3F) as usize] as char); + encoded.push(if chunk.len() > 1 { + TABLE[((n >> 6) & 0x3F) as usize] as char + } else { + '=' + }); + encoded.push(if chunk.len() > 2 { + TABLE[(n & 0x3F) as usize] as char + } else { + '=' + }); + } + + encoded + } + + if let Some((text, exit_code)) = script + .strip_prefix("printf '") + .and_then(|rest| rest.split_once("'; exit ")) + { + return powershell_snippet(&format!( + "[Console]::Out.Write({}); exit {exit_code}", + powershell_literal(text) + )); + } + + if let Some(text) = script + .strip_prefix("printf '") + .and_then(|rest| rest.strip_suffix('\'')) + { + return powershell_snippet(&format!( + "[Console]::Out.Write({})", + powershell_literal(text) + )); + } + + panic!("unsupported windows conversation test snippet: {script}"); } #[cfg(not(windows))] @@ -1808,4 +2332,656 @@ mod tests { // then assert_eq!(error.to_string(), "upstream failed"); } + + #[test] + fn verifier_feedback_is_injected_and_marks_tool_result_as_error_on_failure() { + struct EditOnceApi { + calls: usize, + } + impl ApiClient for EditOnceApi { + fn stream( + &mut self, + _request: ApiRequest, + ) -> Result, RuntimeError> { + self.calls += 1; + if self.calls == 1 { + Ok(vec![ + AssistantEvent::ToolUse { + id: "tool-1".to_string(), + name: "edit_file".to_string(), + input: r#"{"file_path":"src/lib.rs"}"#.to_string(), + }, + AssistantEvent::MessageStop, + ]) + } else { + Ok(vec![ + AssistantEvent::TextDelta("ok".to_string()), + AssistantEvent::MessageStop, + ]) + } + } + } + + struct FailingVerifier; + impl Verifier for FailingVerifier { + fn quick_verify(&self, context: &VerificationContext) -> Vec { + assert_eq!(context.tool_name, "edit_file"); + vec![test_verification_report( + VerificationPhase::Quick, + VerificationStatus::Failed, + &context.touched_paths, + "[verifier:quick:rust-cargo] failed (/workspace)\n[verifier] cargo check: FAIL\nerror[E0308]: mismatched types", + )] + } + + fn final_verify(&self, _target: &VerificationTarget) -> Option { + None + } + } + + let mut runtime = ConversationRuntime::new( + Session::new(), + EditOnceApi { calls: 0 }, + StaticToolExecutor::new().register("edit_file", |_| Ok("edited".to_string())), + PermissionPolicy::new(PermissionMode::DangerFullAccess), + vec!["system".to_string()], + ) + .with_verifier(Box::new(FailingVerifier)); + + let summary = runtime + .run_turn("fix it", None) + .expect("turn should complete"); + + assert_eq!(summary.tool_results.len(), 1); + let ContentBlock::ToolResult { + is_error, output, .. + } = &summary.tool_results[0].blocks[0] + else { + panic!("expected tool result block"); + }; + assert!(*is_error, "verifier failure should flip is_error to true"); + assert!( + output.contains("[verifier:quick:rust-cargo] failed"), + "verifier short summary must be surfaced to the model: {output:?}" + ); + assert!( + output.contains("edited"), + "original tool output must be preserved: {output:?}" + ); + assert_eq!(summary.verification_reports.len(), 1); + assert!( + summary.verification_reports[0] + .summary_text + .contains("mismatched types"), + "full verifier report should retain diagnostics" + ); + assert!(runtime + .session() + .messages + .iter() + .any(|message| message.role == MessageRole::Verification)); + } + + #[test] + fn verifier_passing_leaves_tool_result_successful() { + struct EditApi { + done: bool, + } + impl ApiClient for EditApi { + fn stream( + &mut self, + _request: ApiRequest, + ) -> Result, RuntimeError> { + if self.done { + Ok(vec![ + AssistantEvent::TextDelta("done".to_string()), + AssistantEvent::MessageStop, + ]) + } else { + self.done = true; + Ok(vec![ + AssistantEvent::ToolUse { + id: "tool-1".to_string(), + name: "edit_file".to_string(), + input: r#"{"file_path":"src/lib.rs"}"#.to_string(), + }, + AssistantEvent::MessageStop, + ]) + } + } + } + + struct PassingVerifier; + impl Verifier for PassingVerifier { + fn quick_verify(&self, context: &VerificationContext) -> Vec { + vec![test_verification_report( + VerificationPhase::Quick, + VerificationStatus::Passed, + &context.touched_paths, + "[verifier:quick:rust-cargo] passed (/workspace)\n[verifier] cargo check: ok", + )] + } + + fn final_verify(&self, _target: &VerificationTarget) -> Option { + None + } + } + + let mut runtime = ConversationRuntime::new( + Session::new(), + EditApi { done: false }, + StaticToolExecutor::new().register("edit_file", |_| Ok("edited".to_string())), + PermissionPolicy::new(PermissionMode::DangerFullAccess), + vec!["system".to_string()], + ) + .with_verifier(Box::new(PassingVerifier)); + + let summary = runtime + .run_turn("edit", None) + .expect("turn should complete"); + + let ContentBlock::ToolResult { + is_error, output, .. + } = &summary.tool_results[0].blocks[0] + else { + panic!("expected tool result"); + }; + assert!(!*is_error, "passing verifier must not flip is_error"); + assert!(output.contains("[verifier:quick:rust-cargo] passed")); + assert_eq!(summary.verification_reports.len(), 1); + assert!(summary.verification_reports[0].is_success()); + } + + #[test] + fn verifier_is_not_called_when_tool_result_is_already_an_error() { + struct EditApi { + done: bool, + } + impl ApiClient for EditApi { + fn stream( + &mut self, + _request: ApiRequest, + ) -> Result, RuntimeError> { + if self.done { + Ok(vec![ + AssistantEvent::TextDelta("ack".to_string()), + AssistantEvent::MessageStop, + ]) + } else { + self.done = true; + Ok(vec![ + AssistantEvent::ToolUse { + id: "tool-1".to_string(), + name: "edit_file".to_string(), + input: r#"{"file_path":"src/lib.rs"}"#.to_string(), + }, + AssistantEvent::MessageStop, + ]) + } + } + } + + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Arc; + struct CountingVerifier(Arc); + impl Verifier for CountingVerifier { + fn quick_verify(&self, _context: &VerificationContext) -> Vec { + self.0.fetch_add(1, Ordering::SeqCst); + vec![test_verification_report( + VerificationPhase::Quick, + VerificationStatus::Passed, + &[PathBuf::from("src/lib.rs")], + "[verifier:quick:rust-cargo] passed (/workspace)", + )] + } + + fn final_verify(&self, _target: &VerificationTarget) -> Option { + None + } + } + + let counter = Arc::new(AtomicUsize::new(0)); + let mut runtime = ConversationRuntime::new( + Session::new(), + EditApi { done: false }, + StaticToolExecutor::new() + .register("edit_file", |_| Err(ToolError::new("tool exploded"))), + PermissionPolicy::new(PermissionMode::DangerFullAccess), + vec!["system".to_string()], + ) + .with_verifier(Box::new(CountingVerifier(counter.clone()))); + + runtime + .run_turn("edit", None) + .expect("turn should complete"); + + assert_eq!( + counter.load(Ordering::SeqCst), + 0, + "verifier must be skipped when tool itself errored" + ); + } + + #[allow(clippy::too_many_lines)] + #[test] + fn staged_final_gate_blocks_completion_until_validation_passes() { + struct StagedApi { + calls: usize, + } + + impl ApiClient for StagedApi { + fn stream( + &mut self, + _request: ApiRequest, + ) -> Result, RuntimeError> { + self.calls += 1; + match self.calls { + 1 => Ok(vec![ + AssistantEvent::ToolUse { + id: "tool-1".to_string(), + name: "edit_file".to_string(), + input: r#"{"file_path":"src/lib.rs"}"#.to_string(), + }, + AssistantEvent::MessageStop, + ]), + 2 => Ok(vec![ + AssistantEvent::TextDelta("done".to_string()), + AssistantEvent::MessageStop, + ]), + 3 => Ok(vec![ + AssistantEvent::ToolUse { + id: "tool-2".to_string(), + name: "edit_file".to_string(), + input: r#"{"file_path":"src/lib.rs"}"#.to_string(), + }, + AssistantEvent::MessageStop, + ]), + 4 => Ok(vec![ + AssistantEvent::TextDelta("done for real".to_string()), + AssistantEvent::MessageStop, + ]), + _ => unreachable!("extra API call"), + } + } + } + + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Arc; + + struct StagedVerifier { + final_calls: Arc, + } + + impl Verifier for StagedVerifier { + fn quick_verify(&self, context: &VerificationContext) -> Vec { + vec![test_verification_report( + VerificationPhase::Quick, + VerificationStatus::Passed, + &context.touched_paths, + "[verifier:quick:rust-cargo] passed (/workspace)\n[verifier] cargo check: ok", + )] + } + + fn final_verify(&self, target: &VerificationTarget) -> Option { + let call = self.final_calls.fetch_add(1, Ordering::SeqCst); + let status = if call == 0 { + VerificationStatus::Failed + } else { + VerificationStatus::Passed + }; + Some(test_verification_report( + VerificationPhase::Final, + status, + &target.touched_paths, + if status == VerificationStatus::Failed { + "[verifier:final:rust-cargo] failed (/workspace)\n[verifier] cargo test: FAIL" + } else { + "[verifier:final:rust-cargo] passed (/workspace)\n[verifier] cargo test: ok" + }, + )) + } + + fn final_gate_enabled(&self) -> bool { + true + } + } + + let final_calls = Arc::new(AtomicUsize::new(0)); + let mut runtime = ConversationRuntime::new( + Session::new(), + StagedApi { calls: 0 }, + StaticToolExecutor::new().register("edit_file", |_| Ok("edited".to_string())), + PermissionPolicy::new(PermissionMode::DangerFullAccess), + vec!["system".to_string()], + ) + .with_verifier(Box::new(StagedVerifier { + final_calls: final_calls.clone(), + })); + + let summary = runtime + .run_turn("fix and verify", None) + .expect("turn should complete"); + + assert_eq!(summary.iterations, 4); + assert_eq!(summary.tool_results.len(), 2); + assert!(summary.verification_gate.attempted); + assert!(summary.verification_gate.passed); + assert_eq!(final_calls.load(Ordering::SeqCst), 2); + assert!(summary + .verification_reports + .iter() + .any(|report| report.phase == VerificationPhase::Final + && report.status == VerificationStatus::Failed)); + assert!(summary + .assistant_messages + .last() + .is_some_and(|message| message.blocks.iter().any( + |block| matches!(block, ContentBlock::Text { text } if text == "done for real") + ))); + } + + #[test] + fn staged_final_gate_dedupes_retries_without_new_mutation() { + struct ReminderApi { + calls: usize, + } + + impl ApiClient for ReminderApi { + fn stream( + &mut self, + _request: ApiRequest, + ) -> Result, RuntimeError> { + self.calls += 1; + match self.calls { + 1 => Ok(vec![ + AssistantEvent::ToolUse { + id: "tool-1".to_string(), + name: "edit_file".to_string(), + input: r#"{"file_path":"src/lib.rs"}"#.to_string(), + }, + AssistantEvent::MessageStop, + ]), + 2 | 3 => Ok(vec![ + AssistantEvent::TextDelta("done".to_string()), + AssistantEvent::MessageStop, + ]), + 4 => Ok(vec![ + AssistantEvent::ToolUse { + id: "tool-2".to_string(), + name: "edit_file".to_string(), + input: r#"{"file_path":"src/lib.rs"}"#.to_string(), + }, + AssistantEvent::MessageStop, + ]), + 5 => Ok(vec![ + AssistantEvent::TextDelta("done now".to_string()), + AssistantEvent::MessageStop, + ]), + _ => unreachable!("extra API call"), + } + } + } + + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Arc; + + struct ReminderVerifier { + final_calls: Arc, + } + + impl Verifier for ReminderVerifier { + fn quick_verify(&self, context: &VerificationContext) -> Vec { + vec![test_verification_report( + VerificationPhase::Quick, + VerificationStatus::Passed, + &context.touched_paths, + "[verifier:quick:rust-cargo] passed (/workspace)\n[verifier] cargo check: ok", + )] + } + + fn final_verify(&self, target: &VerificationTarget) -> Option { + let call = self.final_calls.fetch_add(1, Ordering::SeqCst); + let status = if call == 0 { + VerificationStatus::Failed + } else { + VerificationStatus::Passed + }; + Some(test_verification_report( + VerificationPhase::Final, + status, + &target.touched_paths, + if status == VerificationStatus::Failed { + "[verifier:final:rust-cargo] failed (/workspace)\n[verifier] cargo clippy: FAIL" + } else { + "[verifier:final:rust-cargo] passed (/workspace)\n[verifier] cargo clippy: ok" + }, + )) + } + + fn final_gate_enabled(&self) -> bool { + true + } + } + + let final_calls = Arc::new(AtomicUsize::new(0)); + let mut runtime = ConversationRuntime::new( + Session::new(), + ReminderApi { calls: 0 }, + StaticToolExecutor::new().register("edit_file", |_| Ok("edited".to_string())), + PermissionPolicy::new(PermissionMode::DangerFullAccess), + vec!["system".to_string()], + ) + .with_verifier(Box::new(ReminderVerifier { + final_calls: final_calls.clone(), + })); + + let summary = runtime + .run_turn("fix and verify", None) + .expect("turn should complete"); + + assert_eq!(summary.iterations, 5); + assert_eq!(final_calls.load(Ordering::SeqCst), 2); + assert!(summary + .verification_reports + .iter() + .any(|report| report.phase == VerificationPhase::Final + && report.steps.is_empty() + && report.summary_text.contains("still failing"))); + assert!(summary.verification_gate.attempted); + assert!(summary.verification_gate.passed); + } + + #[test] + fn verification_report_mode_supports_shadow_ab_and_typed_stages() { + let _guard = env_lock() + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + std::env::remove_var(VERIFIER_REPORT_STAGE_ENV_VAR); + let shadow_report = test_verification_report_with_id("shadow-bucket"); + let shadow_bucket = stable_percent_bucket(&shadow_report.report_id); + let shadow_mode = verification_report_mode(&shadow_report); + if shadow_bucket < 10 { + assert_eq!(shadow_mode, "shadow"); + } else { + assert_eq!(shadow_mode, "text-primary"); + } + + let typed_id = report_id_for_bucket(|bucket| bucket < 10); + std::env::set_var(VERIFIER_REPORT_STAGE_ENV_VAR, "ab"); + let typed_report = test_verification_report_with_id(&typed_id); + assert_eq!(verification_report_mode(&typed_report), "typed-primary"); + + let control_id = report_id_for_bucket(|bucket| bucket >= 10); + let control_report = test_verification_report_with_id(&control_id); + assert_eq!(verification_report_mode(&control_report), "text-primary"); + + std::env::set_var(VERIFIER_REPORT_STAGE_ENV_VAR, "typed"); + assert_eq!(verification_report_mode(&control_report), "typed-primary"); + std::env::remove_var(VERIFIER_REPORT_STAGE_ENV_VAR); + } + + #[test] + fn verifier_telemetry_records_report_mode_failure_kind_and_target_scope() { + struct EditApi { + done: bool, + } + + impl ApiClient for EditApi { + fn stream( + &mut self, + _request: ApiRequest, + ) -> Result, RuntimeError> { + if self.done { + Ok(vec![ + AssistantEvent::TextDelta("done".to_string()), + AssistantEvent::MessageStop, + ]) + } else { + self.done = true; + Ok(vec![ + AssistantEvent::ToolUse { + id: "tool-1".to_string(), + name: "edit_file".to_string(), + input: r#"{"file_path":"src/lib.rs"}"#.to_string(), + }, + AssistantEvent::MessageStop, + ]) + } + } + } + + struct TelemetryVerifier; + + impl Verifier for TelemetryVerifier { + fn quick_verify(&self, context: &VerificationContext) -> Vec { + vec![VerificationReport { + report_id: "report-typed".to_string(), + phase: VerificationPhase::Quick, + adapter_id: "rust-cargo".to_string(), + project_root: PathBuf::from("/workspace"), + touched_paths: context.touched_paths.clone(), + status: VerificationStatus::Failed, + summary_text: "[verifier:quick:rust-cargo] failed (/workspace)".to_string(), + steps: vec![VerificationStepReport { + adapter: "rust-cargo".to_string(), + project_root: PathBuf::from("/workspace"), + label: "cargo check".to_string(), + command: "cargo check -p demo".to_string(), + phase: VerificationPhase::Quick, + status: VerificationStatus::Failed, + failure_kind: Some(VerificationFailureKind::Code), + duration_ms: 42, + truncated_output: "error[E0308]".to_string(), + step_kind: Some("cargo_check".to_string()), + target_scope: Some("package".to_string()), + package_name: Some("demo".to_string()), + package_manager: None, + launcher_kind: None, + }], + }] + } + + fn final_verify(&self, _target: &VerificationTarget) -> Option { + None + } + } + + let _guard = env_lock() + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + std::env::set_var(VERIFIER_REPORT_STAGE_ENV_VAR, "typed"); + let sink = Arc::new(MemoryTelemetrySink::default()); + let tracer = SessionTracer::new("session-runtime", sink.clone()); + let mut runtime = ConversationRuntime::new( + Session::new(), + EditApi { done: false }, + StaticToolExecutor::new().register("edit_file", |_| Ok("edited".to_string())), + PermissionPolicy::new(PermissionMode::DangerFullAccess), + vec!["system".to_string()], + ) + .with_verifier(Box::new(TelemetryVerifier)) + .with_session_tracer(tracer); + + let _summary = runtime.run_turn("fix", None).expect("turn should complete"); + std::env::remove_var(VERIFIER_REPORT_STAGE_ENV_VAR); + + let events = sink.events(); + let verifier_trace = events + .into_iter() + .find_map(|event| match event { + TelemetryEvent::SessionTrace(trace) if trace.name == "verifier_ran" => Some(trace), + _ => None, + }) + .expect("verifier trace should exist"); + assert_eq!( + verifier_trace.attributes.get("report_mode"), + Some(&serde_json::Value::String("typed-primary".to_string())) + ); + assert_eq!( + verifier_trace.attributes.get("failure_kind"), + Some(&serde_json::Value::String("code".to_string())) + ); + assert_eq!( + verifier_trace.attributes.get("target_scope"), + Some(&serde_json::Value::String("package".to_string())) + ); + assert_eq!( + verifier_trace.attributes.get("mutation_sequence"), + Some(&serde_json::Value::Number(1_u64.into())) + ); + } + + fn test_verification_report( + phase: VerificationPhase, + status: VerificationStatus, + touched_paths: &[PathBuf], + summary_text: &str, + ) -> VerificationReport { + test_verification_report_with_fields( + format!("test-{}-{}", phase.as_str(), status.as_str()), + phase, + status, + touched_paths, + summary_text, + ) + } + + fn test_verification_report_with_id(report_id: &str) -> VerificationReport { + test_verification_report_with_fields( + report_id.to_string(), + VerificationPhase::Quick, + VerificationStatus::Failed, + &[PathBuf::from("src/lib.rs")], + "[verifier:quick:rust-cargo] failed (/workspace)\n[verifier] cargo check: FAIL", + ) + } + + fn test_verification_report_with_fields( + report_id: String, + phase: VerificationPhase, + status: VerificationStatus, + touched_paths: &[PathBuf], + summary_text: &str, + ) -> VerificationReport { + VerificationReport { + report_id, + phase, + adapter_id: "rust-cargo".to_string(), + project_root: PathBuf::from("/workspace"), + touched_paths: touched_paths.to_vec(), + status, + summary_text: summary_text.to_string(), + steps: Vec::new(), + } + } + + fn report_id_for_bucket(predicate: impl Fn(u64) -> bool) -> String { + for index in 0..10_000 { + let candidate = format!("bucket-report-{index}"); + if predicate(stable_percent_bucket(&candidate)) { + return candidate; + } + } + panic!("failed to find report id for bucket predicate"); + } } diff --git a/rust/crates/runtime/src/critic.rs b/rust/crates/runtime/src/critic.rs new file mode 100644 index 0000000000..5fba290aa0 --- /dev/null +++ b/rust/crates/runtime/src/critic.rs @@ -0,0 +1,260 @@ +//! Critic subagent gate. +//! +//! A thin planner that decides whether to invoke a second-opinion "critic" +//! subagent on a mutation. The critic itself is expected to be a cheap-model +//! pass that re-reads a diff and reinjects at most P0/P1 findings back into the +//! main turn. This module only encodes: +//! * Diff-size thresholds (≥4 files OR ≥200 added/removed lines OR >1 root). +//! * A `subagent_depth` guard so critic calls never nest. +//! * A dedup set so each `mutation_sequence` triggers at most one critic run. +//! * The preferred cheap-model hint. +//! +//! Keeping the policy isolated lets the runtime wire it into `conversation.rs` +//! without that file having to know the threshold numerics. + +use std::collections::HashSet; + +/// Diff-size snapshot fed to the critic planner. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DiffStats { + pub files_changed: usize, + pub lines_changed: usize, + pub distinct_roots: usize, +} + +/// Why the critic was (or was not) invoked. Emitted into telemetry. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum CriticDecision { + /// Critic should run for `mutation_sequence`. Carries the reason that + /// tipped the threshold. + Run { reason: String }, + /// Critic should not run. + Skip { reason: String }, +} + +/// Thresholds per spec: ≥4 files OR ≥200 lines OR >1 root. +pub const CRITIC_FILE_THRESHOLD: usize = 4; +pub const CRITIC_LINE_THRESHOLD: usize = 200; +pub const CRITIC_ROOT_THRESHOLD: usize = 1; // strictly more than 1 + +/// Model hint the runtime should use when spawning the critic subagent. +/// Kept as a free-form string to avoid a compile-time coupling to model IDs. +pub const CRITIC_MODEL_HINT: &str = "claude-haiku"; + +/// Planner that tracks which mutation sequences have already been audited. +/// +/// Callers construct one per conversation and call [`CriticPlanner::plan`] each +/// time a new mutation finalizes. The planner is intentionally infallible — +/// actually spawning the subagent and reinjecting findings stays in the caller. +#[derive(Debug, Default)] +pub struct CriticPlanner { + audited: HashSet, +} + +impl CriticPlanner { + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Decide whether to invoke the critic for this mutation. Records the + /// sequence as audited only when the decision is [`CriticDecision::Run`] — + /// skips from depth or thresholds do NOT consume the slot, so a later + /// mutation with the same sequence number (shouldn't happen, but still) + /// would be evaluated fresh. + pub fn plan( + &mut self, + mutation_sequence: u64, + subagent_depth: u32, + stats: DiffStats, + ) -> CriticDecision { + if subagent_depth > 0 { + return CriticDecision::Skip { + reason: format!("nested subagent depth={subagent_depth}"), + }; + } + if self.audited.contains(&mutation_sequence) { + return CriticDecision::Skip { + reason: format!("already audited mutation_sequence={mutation_sequence}"), + }; + } + let tripped = trip_reason(stats); + match tripped { + Some(reason) => { + self.audited.insert(mutation_sequence); + CriticDecision::Run { reason } + } + None => CriticDecision::Skip { + reason: format!( + "below thresholds (files={}, lines={}, roots={})", + stats.files_changed, stats.lines_changed, stats.distinct_roots + ), + }, + } + } + + #[must_use] + pub fn has_audited(&self, mutation_sequence: u64) -> bool { + self.audited.contains(&mutation_sequence) + } +} + +fn trip_reason(stats: DiffStats) -> Option { + if stats.files_changed >= CRITIC_FILE_THRESHOLD { + return Some(format!( + "files_changed={} >= {}", + stats.files_changed, CRITIC_FILE_THRESHOLD + )); + } + if stats.lines_changed >= CRITIC_LINE_THRESHOLD { + return Some(format!( + "lines_changed={} >= {}", + stats.lines_changed, CRITIC_LINE_THRESHOLD + )); + } + if stats.distinct_roots > CRITIC_ROOT_THRESHOLD { + return Some(format!( + "distinct_roots={} > {}", + stats.distinct_roots, CRITIC_ROOT_THRESHOLD + )); + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + + fn small() -> DiffStats { + DiffStats { + files_changed: 1, + lines_changed: 5, + distinct_roots: 1, + } + } + + #[test] + fn below_thresholds_skips() { + let mut planner = CriticPlanner::new(); + match planner.plan(1, 0, small()) { + CriticDecision::Skip { reason } => assert!(reason.starts_with("below thresholds")), + CriticDecision::Run { .. } => panic!("expected skip, got run"), + } + } + + #[test] + fn file_threshold_runs() { + let mut planner = CriticPlanner::new(); + let stats = DiffStats { + files_changed: 4, + lines_changed: 20, + distinct_roots: 1, + }; + assert!(matches!( + planner.plan(1, 0, stats), + CriticDecision::Run { .. } + )); + } + + #[test] + fn line_threshold_runs() { + let mut planner = CriticPlanner::new(); + let stats = DiffStats { + files_changed: 1, + lines_changed: 200, + distinct_roots: 1, + }; + assert!(matches!( + planner.plan(1, 0, stats), + CriticDecision::Run { .. } + )); + } + + #[test] + fn root_threshold_runs_when_strictly_more_than_one() { + let mut planner = CriticPlanner::new(); + let stats = DiffStats { + files_changed: 1, + lines_changed: 5, + distinct_roots: 2, + }; + assert!(matches!( + planner.plan(1, 0, stats), + CriticDecision::Run { .. } + )); + } + + #[test] + fn single_root_does_not_trip_root_threshold() { + let mut planner = CriticPlanner::new(); + let stats = DiffStats { + files_changed: 1, + lines_changed: 5, + distinct_roots: 1, + }; + assert!(matches!( + planner.plan(1, 0, stats), + CriticDecision::Skip { .. } + )); + } + + #[test] + fn nested_subagent_depth_blocks_run() { + let mut planner = CriticPlanner::new(); + let stats = DiffStats { + files_changed: 10, + lines_changed: 500, + distinct_roots: 5, + }; + let decision = planner.plan(1, 1, stats); + match decision { + CriticDecision::Skip { reason } => assert!(reason.contains("nested subagent depth")), + CriticDecision::Run { .. } => panic!("expected skip, got run"), + } + assert!( + !planner.has_audited(1), + "depth-blocked run must not consume the mutation slot" + ); + } + + #[test] + fn one_run_per_mutation_sequence() { + let mut planner = CriticPlanner::new(); + let stats = DiffStats { + files_changed: 4, + lines_changed: 20, + distinct_roots: 1, + }; + assert!(matches!( + planner.plan(42, 0, stats), + CriticDecision::Run { .. } + )); + match planner.plan(42, 0, stats) { + CriticDecision::Skip { reason } => assert!(reason.starts_with("already audited")), + CriticDecision::Run { .. } => panic!("expected skip on dup, got run"), + } + } + + #[test] + fn distinct_mutation_sequences_are_independent() { + let mut planner = CriticPlanner::new(); + let stats = DiffStats { + files_changed: 4, + lines_changed: 20, + distinct_roots: 1, + }; + assert!(matches!( + planner.plan(1, 0, stats), + CriticDecision::Run { .. } + )); + assert!(matches!( + planner.plan(2, 0, stats), + CriticDecision::Run { .. } + )); + } + + #[test] + fn model_hint_is_cheap() { + assert_eq!(CRITIC_MODEL_HINT, "claude-haiku"); + } +} diff --git a/rust/crates/runtime/src/file_ops.rs b/rust/crates/runtime/src/file_ops.rs index db51215ee3..f97a04c498 100644 --- a/rust/crates/runtime/src/file_ops.rs +++ b/rust/crates/runtime/src/file_ops.rs @@ -742,9 +742,9 @@ mod tests { let outside = temp_path("symlink-target.txt"); std::fs::write(&outside, "target content").expect("target should write"); - let link_path = workspace.join("escape-link.txt"); #[cfg(unix)] { + let link_path = workspace.join("escape-link.txt"); std::os::unix::fs::symlink(&outside, &link_path).expect("symlink should create"); assert!(is_symlink_escape(&link_path, &workspace).expect("check should succeed")); } diff --git a/rust/crates/runtime/src/hooks.rs b/rust/crates/runtime/src/hooks.rs index 6abd69fbbd..70a7970623 100644 --- a/rust/crates/runtime/src/hooks.rs +++ b/rust/crates/runtime/src/hooks.rs @@ -737,7 +737,7 @@ fn format_hook_failure(command: &str, code: i32, stdout: Option<&str>, stderr: & fn shell_command(command: &str) -> CommandWithStdin { #[cfg(windows)] - let mut command_builder = { + let command_builder = { let mut command_builder = Command::new("cmd"); command_builder.arg("/C").arg(command); CommandWithStdin::new(command_builder) @@ -957,11 +957,10 @@ mod tests { #[test] fn executes_hooks_in_configured_order() { // given + let first_command = shell_snippet("printf 'first'"); + let second_command = shell_snippet("printf 'second'"); let runner = HookRunner::new(RuntimeHookConfig::new( - vec![ - shell_snippet("printf 'first'"), - shell_snippet("printf 'second'"), - ], + vec![first_command.clone(), second_command.clone()], Vec::new(), Vec::new(), )); @@ -987,7 +986,7 @@ mod tests { event: HookEvent::PreToolUse, command, .. - } if command == "printf 'first'" + } if command == &first_command )); assert!(matches!( &reporter.events[1], @@ -995,7 +994,7 @@ mod tests { event: HookEvent::PreToolUse, command, .. - } if command == "printf 'first'" + } if command == &first_command )); assert!(matches!( &reporter.events[2], @@ -1003,7 +1002,7 @@ mod tests { event: HookEvent::PreToolUse, command, .. - } if command == "printf 'second'" + } if command == &second_command )); assert!(matches!( &reporter.events[3], @@ -1011,7 +1010,7 @@ mod tests { event: HookEvent::PreToolUse, command, .. - } if command == "printf 'second'" + } if command == &second_command )); } @@ -1041,10 +1040,10 @@ mod tests { #[test] fn malformed_nonempty_hook_output_reports_explicit_diagnostic_with_previews() { + let command = + shell_snippet("printf '{not-json\nsecond line'; printf 'stderr warning' >&2; exit 1"); let runner = HookRunner::new(RuntimeHookConfig::new( - vec![shell_snippet( - "printf '{not-json\nsecond line'; printf 'stderr warning' >&2; exit 1", - )], + vec![command.clone()], Vec::new(), Vec::new(), )); @@ -1056,8 +1055,11 @@ mod tests { assert!(rendered.contains("hook_invalid_json:")); assert!(rendered.contains("phase=PreToolUse")); assert!(rendered.contains("tool=Edit")); - assert!(rendered.contains("command=printf '{not-json")); - assert!(rendered.contains("printf 'stderr warning' >&2; exit 1")); + assert!(rendered.contains("command=")); + assert!(rendered.contains("stderr warning")); + assert!(rendered.contains( + &super::bounded_hook_preview(&command).unwrap_or_else(|| "".to_string()) + )); assert!(rendered.contains("detail=key must be a string")); assert!(rendered.contains("stdout_preview={not-json")); assert!(rendered.contains("second line stderr_preview=stderr warning")); @@ -1106,7 +1108,87 @@ mod tests { #[cfg(windows)] fn shell_snippet(script: &str) -> String { - script.replace('\'', "\"") + fn powershell_literal(value: &str) -> String { + format!("'{}'", value.replace('\'', "''")) + } + + fn powershell_snippet(script: &str) -> String { + format!( + "powershell -NoProfile -EncodedCommand {}", + encode_powershell(script) + ) + } + + fn encode_powershell(script: &str) -> String { + let bytes: Vec = script.encode_utf16().flat_map(u16::to_le_bytes).collect(); + encode_base64(&bytes) + } + + fn encode_base64(bytes: &[u8]) -> String { + const TABLE: &[u8; 64] = + b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + let mut encoded = String::with_capacity(bytes.len().div_ceil(3) * 4); + + for chunk in bytes.chunks(3) { + let b0 = chunk[0]; + let b1 = *chunk.get(1).unwrap_or(&0); + let b2 = *chunk.get(2).unwrap_or(&0); + let n = (u32::from(b0) << 16) | (u32::from(b1) << 8) | u32::from(b2); + + encoded.push(TABLE[((n >> 18) & 0x3F) as usize] as char); + encoded.push(TABLE[((n >> 12) & 0x3F) as usize] as char); + encoded.push(if chunk.len() > 1 { + TABLE[((n >> 6) & 0x3F) as usize] as char + } else { + '=' + }); + encoded.push(if chunk.len() > 2 { + TABLE[(n & 0x3F) as usize] as char + } else { + '=' + }); + } + + encoded + } + + if let Some(seconds) = script.strip_prefix("sleep ") { + return powershell_snippet(&format!("Start-Sleep -Seconds {seconds}")); + } + + if script == "printf '{not-json\nsecond line'; printf 'stderr warning' >&2; exit 1" { + return powershell_snippet( + "[Console]::Out.Write('{not-json' + [Environment]::NewLine + 'second line'); \ +[Console]::Error.Write('stderr warning'); exit 1", + ); + } + + if let Some(rest) = script.strip_prefix("printf '%s' '") { + if let Some(text) = rest.strip_suffix('\'') { + return powershell_snippet(&format!( + "[Console]::Out.Write({})", + powershell_literal(text) + )); + } + } + + if let Some(rest) = script.strip_prefix("printf '") { + if let Some((text, exit_code)) = rest.split_once("'; exit ") { + return powershell_snippet(&format!( + "[Console]::Out.Write({}); exit {exit_code}", + powershell_literal(text) + )); + } + + if let Some(text) = rest.strip_suffix('\'') { + return powershell_snippet(&format!( + "[Console]::Out.Write({})", + powershell_literal(text) + )); + } + } + + panic!("unsupported windows hook test snippet: {script}"); } #[cfg(not(windows))] diff --git a/rust/crates/runtime/src/lib.rs b/rust/crates/runtime/src/lib.rs index 432e1c1e02..3f0843717e 100644 --- a/rust/crates/runtime/src/lib.rs +++ b/rust/crates/runtime/src/lib.rs @@ -12,6 +12,7 @@ mod compact; mod config; pub mod config_validate; mod conversation; +pub mod critic; mod file_ops; mod git_context; pub mod green_contract; @@ -33,6 +34,7 @@ mod policy_engine; mod prompt; pub mod recovery_recipes; mod remote; +pub mod rollout_metrics; pub mod sandbox; mod session; pub mod session_control; @@ -47,6 +49,7 @@ pub mod team_cron_registry; #[cfg(test)] mod trust_resolver; mod usage; +pub mod verifier; pub mod worker_boot; pub use bash::{execute_bash, BashCommandInput, BashCommandOutput}; @@ -61,8 +64,8 @@ pub use config::{ McpManagedProxyServerConfig, McpOAuthConfig, McpRemoteServerConfig, McpSdkServerConfig, McpServerConfig, McpStdioServerConfig, McpTransport, McpWebSocketServerConfig, OAuthConfig, ProviderFallbackConfig, ResolvedPermissionMode, RuntimeConfig, RuntimeFeatureConfig, - RuntimeHookConfig, RuntimePermissionRuleConfig, RuntimePluginConfig, ScopedMcpServerConfig, - CLAW_SETTINGS_SCHEMA_NAME, + RuntimeHookConfig, RuntimePermissionRuleConfig, RuntimePluginConfig, RuntimeVerifierConfig, + RuntimeVerifierMode, ScopedMcpServerConfig, CLAW_SETTINGS_SCHEMA_NAME, }; pub use config_validate::{ check_unsupported_format, format_diagnostics, validate_config_file, ConfigDiagnostic, @@ -167,6 +170,11 @@ pub use trust_resolver::{TrustConfig, TrustDecision, TrustEvent, TrustPolicy, Tr pub use usage::{ format_usd, pricing_for_model, ModelPricing, TokenUsage, UsageCostEstimate, UsageTracker, }; +pub use verifier::{ + prepend_verifier_summary, CargoVerifier, CargoVerifierConfig, VerificationContext, + VerificationFailureKind, VerificationGateStatus, VerificationPhase, VerificationReport, + VerificationStatus, Verifier, +}; pub use worker_boot::{ Worker, WorkerEvent, WorkerEventKind, WorkerEventPayload, WorkerFailure, WorkerFailureKind, WorkerPromptTarget, WorkerReadySnapshot, WorkerRegistry, WorkerStatus, WorkerTrustResolution, diff --git a/rust/crates/runtime/src/mcp_stdio.rs b/rust/crates/runtime/src/mcp_stdio.rs index 5fbc31ba58..c2096b888f 100644 --- a/rust/crates/runtime/src/mcp_stdio.rs +++ b/rust/crates/runtime/src/mcp_stdio.rs @@ -19,12 +19,12 @@ use crate::mcp_lifecycle_hardened::{ }; #[cfg(test)] -const MCP_INITIALIZE_TIMEOUT_MS: u64 = 200; +const MCP_INITIALIZE_TIMEOUT_MS: u64 = 1_000; #[cfg(not(test))] const MCP_INITIALIZE_TIMEOUT_MS: u64 = 10_000; #[cfg(test)] -const MCP_LIST_TOOLS_TIMEOUT_MS: u64 = 300; +const MCP_LIST_TOOLS_TIMEOUT_MS: u64 = 1_000; #[cfg(not(test))] const MCP_LIST_TOOLS_TIMEOUT_MS: u64 = 30_000; @@ -1410,11 +1410,13 @@ mod tests { use std::collections::BTreeMap; use std::fs; use std::io::ErrorKind; - use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{SystemTime, UNIX_EPOCH}; + #[cfg(unix)] + use std::os::unix::fs::PermissionsExt; + use serde_json::json; use tokio::runtime::Builder; @@ -1443,6 +1445,34 @@ mod tests { std::env::temp_dir().join(format!("runtime-mcp-stdio-{nanos}-{unique_id}")) } + #[cfg(unix)] + fn make_executable(path: &Path) { + let mut permissions = fs::metadata(path).expect("metadata").permissions(); + permissions.set_mode(0o755); + fs::set_permissions(path, permissions).expect("chmod"); + } + + #[cfg(not(unix))] + fn make_executable(path: &Path) { + let _ = path; + } + + fn bash_command() -> &'static str { + if cfg!(windows) { + "bash" + } else { + "/bin/sh" + } + } + + fn python_command() -> &'static str { + if cfg!(windows) { + "python" + } else { + "python3" + } + } + fn write_echo_script() -> PathBuf { let root = temp_dir(); fs::create_dir_all(&root).expect("temp dir"); @@ -1452,9 +1482,7 @@ mod tests { "#!/bin/sh\nprintf 'READY:%s\\n' \"$MCP_TEST_TOKEN\"\nIFS= read -r line\nprintf 'ECHO:%s\\n' \"$line\"\n", ) .expect("write script"); - let mut permissions = fs::metadata(&script_path).expect("metadata").permissions(); - permissions.set_mode(0o755); - fs::set_permissions(&script_path, permissions).expect("chmod"); + make_executable(&script_path); script_path } @@ -1498,9 +1526,7 @@ mod tests { ] .join("\n"); fs::write(&script_path, script).expect("write script"); - let mut permissions = fs::metadata(&script_path).expect("metadata").permissions(); - permissions.set_mode(0o755); - fs::set_permissions(&script_path, permissions).expect("chmod"); + make_executable(&script_path); script_path } @@ -1632,9 +1658,7 @@ mod tests { ] .join("\n"); fs::write(&script_path, script).expect("write script"); - let mut permissions = fs::metadata(&script_path).expect("metadata").permissions(); - permissions.set_mode(0o755); - fs::set_permissions(&script_path, permissions).expect("chmod"); + make_executable(&script_path); script_path } @@ -1757,9 +1781,7 @@ mod tests { ] .join("\n"); fs::write(&script_path, script).expect("write script"); - let mut permissions = fs::metadata(&script_path).expect("metadata").permissions(); - permissions.set_mode(0o755); - fs::set_permissions(&script_path, permissions).expect("chmod"); + make_executable(&script_path); script_path } @@ -1767,7 +1789,7 @@ mod tests { let config = ScopedMcpServerConfig { scope: ConfigSource::Local, config: McpServerConfig::Stdio(McpStdioServerConfig { - command: "/bin/sh".to_string(), + command: bash_command().to_string(), args: vec![script_path.to_string_lossy().into_owned()], env: BTreeMap::from([("MCP_TEST_TOKEN".to_string(), "secret-value".to_string())]), tool_call_timeout_ms: None, @@ -1785,7 +1807,7 @@ mod tests { env: BTreeMap, ) -> crate::mcp_client::McpStdioTransport { crate::mcp_client::McpStdioTransport { - command: "python3".to_string(), + command: python_command().to_string(), args: vec![script_path.to_string_lossy().into_owned()], env, tool_call_timeout_ms: None, @@ -1834,7 +1856,7 @@ mod tests { ScopedMcpServerConfig { scope: ConfigSource::Local, config: McpServerConfig::Stdio(McpStdioServerConfig { - command: "python3".to_string(), + command: python_command().to_string(), args: vec![script_path.to_string_lossy().into_owned()], env, tool_call_timeout_ms: None, @@ -2053,7 +2075,7 @@ mod tests { runtime.block_on(async { let script_path = write_echo_script(); let transport = crate::mcp_client::McpStdioTransport { - command: "/bin/sh".to_string(), + command: bash_command().to_string(), args: vec![script_path.to_string_lossy().into_owned()], env: BTreeMap::from([("MCP_TEST_TOKEN".to_string(), "direct-secret".to_string())]), tool_call_timeout_ms: None, @@ -2312,7 +2334,7 @@ mod tests { ScopedMcpServerConfig { scope: ConfigSource::Local, config: McpServerConfig::Stdio(McpStdioServerConfig { - command: "python3".to_string(), + command: python_command().to_string(), args: vec![script_path.to_string_lossy().into_owned()], env: BTreeMap::from([( "MCP_TOOL_CALL_DELAY_MS".to_string(), @@ -2365,7 +2387,7 @@ mod tests { ScopedMcpServerConfig { scope: ConfigSource::Local, config: McpServerConfig::Stdio(McpStdioServerConfig { - command: "python3".to_string(), + command: python_command().to_string(), args: vec![script_path.to_string_lossy().into_owned()], env: BTreeMap::from([( "MCP_INVALID_TOOL_CALL_RESPONSE".to_string(), @@ -2676,9 +2698,7 @@ mod tests { ] .join("\n"); fs::write(&script_path, script).expect("write script"); - let mut permissions = fs::metadata(&script_path).expect("metadata").permissions(); - permissions.set_mode(0o755); - fs::set_permissions(&script_path, permissions).expect("chmod"); + make_executable(&script_path); script_path } @@ -2703,8 +2723,8 @@ mod tests { ScopedMcpServerConfig { scope: ConfigSource::Local, config: McpServerConfig::Stdio(McpStdioServerConfig { - command: broken_script_path.display().to_string(), - args: Vec::new(), + command: python_command().to_string(), + args: vec![broken_script_path.display().to_string()], env: BTreeMap::new(), tool_call_timeout_ms: None, }), diff --git a/rust/crates/runtime/src/mcp_tool_bridge.rs b/rust/crates/runtime/src/mcp_tool_bridge.rs index af637a98d1..72959b329a 100644 --- a/rust/crates/runtime/src/mcp_tool_bridge.rs +++ b/rust/crates/runtime/src/mcp_tool_bridge.rs @@ -314,11 +314,13 @@ impl McpToolRegistry { mod tests { use std::collections::BTreeMap; use std::fs; - use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{SystemTime, UNIX_EPOCH}; + #[cfg(unix)] + use std::os::unix::fs::PermissionsExt; + use super::*; use crate::config::{ ConfigSource, McpServerConfig, McpStdioServerConfig, ScopedMcpServerConfig, @@ -334,6 +336,26 @@ mod tests { std::env::temp_dir().join(format!("runtime-mcp-tool-bridge-{nanos}-{unique_id}")) } + #[cfg(unix)] + fn make_executable(path: &Path) { + let mut permissions = fs::metadata(path).expect("metadata").permissions(); + permissions.set_mode(0o755); + fs::set_permissions(path, permissions).expect("chmod"); + } + + #[cfg(not(unix))] + fn make_executable(path: &Path) { + let _ = path; + } + + fn python_command() -> &'static str { + if cfg!(windows) { + "python" + } else { + "python3" + } + } + fn cleanup_script(script_path: &Path) { if let Some(root) = script_path.parent() { let _ = fs::remove_dir_all(root); @@ -430,9 +452,7 @@ mod tests { ] .join("\n"); fs::write(&script_path, script).expect("write script"); - let mut permissions = fs::metadata(&script_path).expect("metadata").permissions(); - permissions.set_mode(0o755); - fs::set_permissions(&script_path, permissions).expect("chmod"); + make_executable(&script_path); script_path } @@ -444,7 +464,7 @@ mod tests { ScopedMcpServerConfig { scope: ConfigSource::Local, config: McpServerConfig::Stdio(McpStdioServerConfig { - command: "python3".to_string(), + command: python_command().to_string(), args: vec![script_path.to_string_lossy().into_owned()], env: BTreeMap::from([ ("MCP_SERVER_LABEL".to_string(), server_name.to_string()), diff --git a/rust/crates/runtime/src/oauth.rs b/rust/crates/runtime/src/oauth.rs index aa3ca158c7..8ef1a2210c 100644 --- a/rust/crates/runtime/src/oauth.rs +++ b/rust/crates/runtime/src/oauth.rs @@ -1,6 +1,6 @@ use std::collections::BTreeMap; -use std::fs::{self, File}; -use std::io::{self, Read}; +use std::fs; +use std::io; use std::path::PathBuf; use serde::{Deserialize, Serialize}; @@ -326,7 +326,8 @@ pub fn parse_oauth_callback_query(query: &str) -> Result io::Result { let mut buffer = vec![0_u8; bytes]; - File::open("/dev/urandom")?.read_exact(&mut buffer)?; + getrandom::fill(&mut buffer) + .map_err(|error| io::Error::other(format!("failed to gather random bytes: {error}")))?; Ok(base64url_encode(&buffer)) } diff --git a/rust/crates/runtime/src/permission_enforcer.rs b/rust/crates/runtime/src/permission_enforcer.rs index 6ff872bcc8..8cc2c6473c 100644 --- a/rust/crates/runtime/src/permission_enforcer.rs +++ b/rust/crates/runtime/src/permission_enforcer.rs @@ -6,6 +6,7 @@ //! Permission enforcement layer that gates tool execution based on the //! active `PermissionPolicy`. +use crate::bash_validation::{validate_read_only, ValidationResult}; use crate::permissions::{PermissionMode, PermissionOutcome, PermissionPolicy}; use serde::{Deserialize, Serialize}; @@ -146,21 +147,21 @@ impl PermissionEnforcer { let mode = self.policy.active_mode(); match mode { - PermissionMode::ReadOnly => { - if is_read_only_command(command) { - EnforcementResult::Allowed - } else { - EnforcementResult::Denied { - tool: "bash".to_owned(), - active_mode: mode.as_str().to_owned(), - required_mode: PermissionMode::WorkspaceWrite.as_str().to_owned(), - reason: format!( - "command may modify state; not allowed in '{}' mode", - mode.as_str() - ), - } - } - } + PermissionMode::ReadOnly => match validate_read_only(command, mode) { + ValidationResult::Allow => EnforcementResult::Allowed, + ValidationResult::Block { reason } => EnforcementResult::Denied { + tool: "bash".to_owned(), + active_mode: mode.as_str().to_owned(), + required_mode: PermissionMode::WorkspaceWrite.as_str().to_owned(), + reason, + }, + ValidationResult::Warn { message } => EnforcementResult::Denied { + tool: "bash".to_owned(), + active_mode: mode.as_str().to_owned(), + required_mode: PermissionMode::WorkspaceWrite.as_str().to_owned(), + reason: message, + }, + }, PermissionMode::Prompt => EnforcementResult::Denied { tool: "bash".to_owned(), active_mode: mode.as_str().to_owned(), @@ -190,85 +191,20 @@ fn is_within_workspace(path: &str, workspace_root: &str) -> bool { normalized.starts_with(&root) || normalized == workspace_root.trim_end_matches('/') } -/// Conservative heuristic: is this bash command read-only? +#[cfg(test)] fn is_read_only_command(command: &str) -> bool { - let first_token = command - .split_whitespace() - .next() - .unwrap_or("") - .rsplit('/') - .next() - .unwrap_or(""); - + let trimmed = command.trim(); + if trimmed.is_empty() { + return false; + } + if trimmed.contains(" -i ") || trimmed.contains(" --in-place") || trimmed.starts_with("sed -i") + { + return false; + } matches!( - first_token, - "cat" - | "head" - | "tail" - | "less" - | "more" - | "wc" - | "ls" - | "find" - | "grep" - | "rg" - | "awk" - | "sed" - | "echo" - | "printf" - | "which" - | "where" - | "whoami" - | "pwd" - | "env" - | "printenv" - | "date" - | "cal" - | "df" - | "du" - | "free" - | "uptime" - | "uname" - | "file" - | "stat" - | "diff" - | "sort" - | "uniq" - | "tr" - | "cut" - | "paste" - | "tee" - | "xargs" - | "test" - | "true" - | "false" - | "type" - | "readlink" - | "realpath" - | "basename" - | "dirname" - | "sha256sum" - | "md5sum" - | "b3sum" - | "xxd" - | "hexdump" - | "od" - | "strings" - | "tree" - | "jq" - | "yq" - | "python3" - | "python" - | "node" - | "ruby" - | "cargo" - | "rustc" - | "git" - | "gh" - ) && !command.contains("-i ") - && !command.contains("--in-place") - && !command.contains(" > ") - && !command.contains(" >> ") + validate_read_only(command, PermissionMode::ReadOnly), + ValidationResult::Allow + ) } #[cfg(test)] diff --git a/rust/crates/runtime/src/policy_engine.rs b/rust/crates/runtime/src/policy_engine.rs index 84912a679d..0403853c36 100644 --- a/rust/crates/runtime/src/policy_engine.rs +++ b/rust/crates/runtime/src/policy_engine.rs @@ -2,7 +2,7 @@ use std::time::Duration; pub type GreenLevel = u8; -const STALE_BRANCH_THRESHOLD: Duration = Duration::from_secs(60 * 60); +const STALE_BRANCH_THRESHOLD: Duration = Duration::from_hours(1); #[derive(Debug, Clone, PartialEq, Eq)] pub struct PolicyRule { diff --git a/rust/crates/runtime/src/prompt.rs b/rust/crates/runtime/src/prompt.rs index e46b7ebee5..c270c62f23 100644 --- a/rust/crates/runtime/src/prompt.rs +++ b/rust/crates/runtime/src/prompt.rs @@ -42,6 +42,8 @@ pub const SYSTEM_PROMPT_DYNAMIC_BOUNDARY: &str = "__SYSTEM_PROMPT_DYNAMIC_BOUNDA pub const FRONTIER_MODEL_NAME: &str = "Claude Opus 4.6"; const MAX_INSTRUCTION_FILE_CHARS: usize = 4_000; const MAX_TOTAL_INSTRUCTION_CHARS: usize = 12_000; +const MAX_CONTEXT_PACK_CHARS: usize = 6_000; +const MAX_CONTEXT_PACK_FILES: usize = 8; /// Contents of an instruction file included in prompt construction. #[derive(Debug, Clone, PartialEq, Eq)] @@ -50,6 +52,23 @@ pub struct ContextFile { pub content: String, } +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ContextPackFile { + pub status: String, + pub path: PathBuf, + pub project_kind: Option, + pub project_root: Option, + pub entrypoint: Option, + pub related_test: Option, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct ContextPack { + pub repo_root: Option, + pub branch: Option, + pub changed_files: Vec, +} + /// Project-local context injected into the rendered system prompt. #[derive(Debug, Clone, Default, PartialEq, Eq)] pub struct ProjectContext { @@ -58,6 +77,7 @@ pub struct ProjectContext { pub git_status: Option, pub git_diff: Option, pub git_context: Option, + pub context_pack: Option, pub instruction_files: Vec, } @@ -74,6 +94,7 @@ impl ProjectContext { git_status: None, git_diff: None, git_context: None, + context_pack: None, instruction_files, }) } @@ -86,6 +107,11 @@ impl ProjectContext { context.git_status = read_git_status(&context.cwd); context.git_diff = read_git_diff(&context.cwd); context.git_context = GitContext::detect(&context.cwd); + context.context_pack = build_context_pack( + &context.cwd, + context.git_status.as_deref(), + context.git_context.as_ref(), + ); Ok(context) } } @@ -285,6 +311,277 @@ fn read_git_output(cwd: &Path, args: &[&str]) -> Option { String::from_utf8(output.stdout).ok() } +#[derive(Debug, Clone, PartialEq, Eq)] +struct GitStatusEntry { + status: String, + path: PathBuf, +} + +fn build_context_pack( + cwd: &Path, + git_status: Option<&str>, + git_context: Option<&GitContext>, +) -> Option { + let repo_root = read_git_repo_root(cwd).or_else(|| Some(cwd.to_path_buf()))?; + let entries = parse_git_status_snapshot(git_status?); + if entries.is_empty() { + return None; + } + + let changed_files = entries + .into_iter() + .take(MAX_CONTEXT_PACK_FILES) + .map(|entry| { + let absolute_path = repo_root.join(&entry.path); + let (project_root, project_kind) = detect_project_root(&absolute_path, &repo_root) + .unwrap_or((repo_root.clone(), None)); + let entrypoint = find_entrypoint(&project_root, project_kind.as_deref()); + let related_test = + find_related_test(&absolute_path, &project_root, project_kind.as_deref()); + ContextPackFile { + status: entry.status, + path: entry.path, + project_kind, + project_root: Some(project_root), + entrypoint, + related_test, + } + }) + .collect::>(); + + Some(ContextPack { + repo_root: Some(repo_root), + branch: git_context.and_then(|context| context.branch.clone()), + changed_files, + }) +} + +fn read_git_repo_root(cwd: &Path) -> Option { + let output = Command::new("git") + .args(["rev-parse", "--show-toplevel"]) + .current_dir(cwd) + .output() + .ok()?; + if !output.status.success() { + return None; + } + let stdout = String::from_utf8(output.stdout).ok()?; + let trimmed = stdout.trim(); + if trimmed.is_empty() { + None + } else { + Some(PathBuf::from(trimmed)) + } +} + +fn parse_git_status_snapshot(status: &str) -> Vec { + status + .lines() + .filter_map(|line| { + let trimmed = line.trim_end(); + if trimmed.is_empty() || trimmed.starts_with("##") || trimmed.len() < 4 { + return None; + } + let status_code = &trimmed[..2]; + let raw_path = trimmed[3..].trim(); + if raw_path.is_empty() { + return None; + } + let path = raw_path + .split(" -> ") + .last() + .map(str::trim) + .filter(|path| !path.is_empty())?; + Some(GitStatusEntry { + status: classify_git_status(status_code), + path: PathBuf::from(path), + }) + }) + .collect() +} + +fn classify_git_status(status_code: &str) -> String { + if status_code == "??" { + return "untracked".to_string(); + } + let significant = status_code + .chars() + .find(|ch| !ch.is_ascii_whitespace()) + .unwrap_or('M'); + match significant { + 'A' => "added", + 'M' => "modified", + 'D' => "deleted", + 'R' => "renamed", + 'C' => "copied", + 'T' => "typechange", + 'U' => "conflict", + _ => "changed", + } + .to_string() +} + +fn detect_project_root(path: &Path, repo_root: &Path) -> Option<(PathBuf, Option)> { + let mut current = if path.is_dir() { + path.to_path_buf() + } else { + path.parent()?.to_path_buf() + }; + loop { + for (marker, kind) in [ + ("Cargo.toml", "rust"), + ("package.json", "node"), + ("pyproject.toml", "python"), + ("go.mod", "go"), + ] { + if current.join(marker).is_file() { + return Some((current, Some(kind.to_string()))); + } + } + if current == repo_root { + break; + } + current = current.parent()?.to_path_buf(); + } + Some((repo_root.to_path_buf(), None)) +} + +fn find_entrypoint(project_root: &Path, project_kind: Option<&str>) -> Option { + let candidates: &[&str] = match project_kind { + Some("rust") => &["src/main.rs", "src/lib.rs", "src/bin/main.rs"], + Some("node") => &[ + "src/index.ts", + "src/main.ts", + "src/index.tsx", + "index.ts", + "src/index.js", + "src/main.js", + "index.js", + ], + Some("python") => &["app/main.py", "main.py", "src/main.py", "src/__init__.py"], + Some("go") => &["main.go", "cmd/main.go"], + _ => &["README.md"], + }; + candidates + .iter() + .map(|candidate| project_root.join(candidate)) + .find(|candidate| candidate.is_file()) +} + +fn find_related_test( + changed_path: &Path, + project_root: &Path, + project_kind: Option<&str>, +) -> Option { + let file_name = changed_path.file_name()?.to_string_lossy().to_lowercase(); + if is_probable_test_name(&file_name) { + return changed_path + .strip_prefix(project_root) + .ok() + .map(|relative| project_root.join(relative)); + } + + let stem = changed_path.file_stem()?.to_string_lossy(); + let extension = changed_path + .extension() + .and_then(|ext| ext.to_str()) + .unwrap_or(""); + let candidates = match project_kind { + Some("rust") => vec![format!("tests/{stem}.rs"), format!("tests/test_{stem}.rs")], + Some("node") => vec![ + format!("src/{stem}.test.{extension}"), + format!("src/{stem}.spec.{extension}"), + format!("tests/{stem}.test.{extension}"), + format!("tests/{stem}.spec.{extension}"), + ], + Some("python") => vec![format!("tests/test_{stem}.py"), format!("test_{stem}.py")], + Some("go") => vec![format!("{stem}_test.go"), format!("tests/{stem}_test.go")], + _ => Vec::new(), + }; + candidates + .into_iter() + .map(|candidate| project_root.join(candidate)) + .find(|candidate| candidate.is_file()) +} + +fn is_probable_test_name(file_name: &str) -> bool { + file_name.starts_with("test_") + || file_name.ends_with("_test.go") + || file_name.ends_with(".test.ts") + || file_name.ends_with(".test.tsx") + || file_name.ends_with(".test.js") + || file_name.ends_with(".spec.ts") + || file_name.ends_with(".spec.tsx") + || file_name.ends_with(".spec.js") + || file_name.ends_with("_test.rs") +} + +fn render_context_pack(pack: &ContextPack) -> String { + let repo_root = pack.repo_root.as_deref().unwrap_or_else(|| Path::new(".")); + let mut lines = Vec::new(); + lines.extend(prepend_bullets(vec![ + format!("Repo root: {}", repo_root.display()), + format!( + "Git branch: {}", + pack.branch.as_deref().unwrap_or("unknown") + ), + format!("Changed files in scope: {}.", pack.changed_files.len()), + ])); + lines.push("Changed file targets:".to_string()); + for changed in &pack.changed_files { + let mut detail = format!( + " - {} {}", + changed.status, + relative_or_display(repo_root, &changed.path) + ); + if let Some(kind) = &changed.project_kind { + use std::fmt::Write as _; + let _ = write!(detail, " [{kind}]"); + } + if let Some(project_root) = &changed.project_root { + use std::fmt::Write as _; + let _ = write!( + detail, + " (root: {})", + relative_or_display(repo_root, project_root) + ); + } + lines.push(detail); + if let Some(entrypoint) = &changed.entrypoint { + lines.push(format!( + " entrypoint: {}", + relative_or_display(repo_root, entrypoint) + )); + } + if let Some(related_test) = &changed.related_test { + lines.push(format!( + " related test: {}", + relative_or_display(repo_root, related_test) + )); + } + } + truncate_rendered_context_pack(&lines.join("\n")) +} + +fn relative_or_display(base: &Path, path: &Path) -> String { + path.strip_prefix(base) + .unwrap_or(path) + .display() + .to_string() +} + +fn truncate_rendered_context_pack(content: &str) -> String { + if content.chars().count() <= MAX_CONTEXT_PACK_CHARS { + return content.to_string(); + } + let mut shortened = content + .chars() + .take(MAX_CONTEXT_PACK_CHARS) + .collect::(); + shortened.push_str("\n... [context pack truncated]"); + shortened +} + fn render_project_context(project_context: &ProjectContext) -> String { let mut lines = vec!["# Project context".to_string()]; let mut bullets = vec![ @@ -298,7 +595,11 @@ fn render_project_context(project_context: &ProjectContext) -> String { )); } lines.extend(prepend_bullets(bullets)); - if let Some(status) = &project_context.git_status { + if let Some(context_pack) = &project_context.context_pack { + lines.push(String::new()); + lines.push("Workspace context pack:".to_string()); + lines.push(render_context_pack(context_pack)); + } else if let Some(status) = &project_context.git_status { lines.push(String::new()); lines.push("Git status snapshot:".to_string()); lines.push(status.clone()); @@ -312,18 +613,6 @@ fn render_project_context(project_context: &ProjectContext) -> String { } } } - if let Some(diff) = &project_context.git_diff { - lines.push(String::new()); - lines.push("Git diff snapshot:".to_string()); - lines.push(diff.clone()); - } - if let Some(git_context) = &project_context.git_context { - let rendered = git_context.render(); - if !rendered.is_empty() { - lines.push(String::new()); - lines.push(rendered); - } - } lines.join("\n") } @@ -521,8 +810,9 @@ fn get_actions_section() -> String { mod tests { use super::{ collapse_blank_lines, display_context_path, normalize_instruction_content, - render_instruction_content, render_instruction_files, truncate_instruction_content, - ContextFile, ProjectContext, SystemPromptBuilder, SYSTEM_PROMPT_DYNAMIC_BOUNDARY, + render_instruction_content, render_instruction_files, render_project_context, + truncate_instruction_content, truncate_rendered_context_pack, ContextFile, ProjectContext, + SystemPromptBuilder, MAX_CONTEXT_PACK_CHARS, SYSTEM_PROMPT_DYNAMIC_BOUNDARY, }; use crate::config::ConfigLoader; use std::fs; @@ -732,11 +1022,15 @@ mod tests { let status = context.git_status.as_deref().expect("status snapshot"); assert!(status.contains("## main")); assert!(status.contains("A d.txt")); + let context_pack = context.context_pack.as_ref().expect("context pack"); + assert_eq!(context_pack.changed_files.len(), 1); + assert_eq!(context_pack.changed_files[0].status, "added"); assert!(rendered.contains("Recent commits (last 5):")); assert!(rendered.contains("first commit")); - assert!(rendered.contains("Git status snapshot:")); - assert!(rendered.contains("## main")); + assert!(rendered.contains("Workspace context pack:")); + assert!(rendered.contains("Git branch: main")); + assert!(rendered.contains("added d.txt")); fs::remove_dir_all(root).expect("cleanup temp dir"); } @@ -785,6 +1079,74 @@ mod tests { fs::remove_dir_all(root).expect("cleanup temp dir"); } + #[test] + fn context_pack_detects_project_root_entrypoint_and_related_test() { + let _guard = env_lock(); + ensure_valid_cwd(); + let root = temp_dir(); + fs::create_dir_all(root.join("src")).expect("src dir"); + fs::create_dir_all(root.join("tests")).expect("tests dir"); + std::process::Command::new("git") + .args(["init", "--quiet", "-b", "main"]) + .current_dir(&root) + .status() + .expect("git init should run"); + std::process::Command::new("git") + .args(["config", "user.email", "tests@example.com"]) + .current_dir(&root) + .status() + .expect("git config email should run"); + std::process::Command::new("git") + .args(["config", "user.name", "Runtime Prompt Tests"]) + .current_dir(&root) + .status() + .expect("git config name should run"); + fs::write( + root.join("Cargo.toml"), + "[package]\nname = \"demo\"\nversion = \"0.1.0\"\nedition = \"2021\"\n", + ) + .expect("write Cargo.toml"); + fs::write(root.join("src/main.rs"), "fn main() {}\n").expect("write main"); + fs::write(root.join("src/lib.rs"), "pub fn value() -> usize { 1 }\n").expect("write lib"); + fs::write(root.join("tests/lib.rs"), "#[test]\nfn smoke() {}\n").expect("write test"); + std::process::Command::new("git") + .args(["add", "."]) + .current_dir(&root) + .status() + .expect("git add should run"); + std::process::Command::new("git") + .args(["commit", "-m", "init", "--quiet"]) + .current_dir(&root) + .status() + .expect("git commit should run"); + fs::write(root.join("src/lib.rs"), "pub fn value() -> usize { 2 }\n").expect("rewrite lib"); + + let context = + ProjectContext::discover_with_git(&root, "2026-03-31").expect("context should load"); + let rendered = render_project_context(&context); + let context_pack = context.context_pack.as_ref().expect("context pack"); + let changed = context_pack + .changed_files + .iter() + .find(|file| file.path == Path::new("src/lib.rs")) + .expect("changed file should be listed"); + + assert_eq!(changed.status, "modified"); + assert_eq!(changed.project_kind.as_deref(), Some("rust")); + assert_eq!(changed.project_root.as_ref(), Some(&root)); + assert_eq!(changed.entrypoint.as_ref(), Some(&root.join("src/main.rs"))); + assert_eq!( + changed.related_test.as_ref(), + Some(&root.join("tests/lib.rs")) + ); + assert!(rendered.contains("Workspace context pack:")); + assert!(rendered.contains("modified src/lib.rs [rust]")); + assert!(rendered.contains("entrypoint: src/main.rs")); + assert!(rendered.contains("related test: tests/lib.rs")); + + fs::remove_dir_all(root).expect("cleanup temp dir"); + } + #[test] fn load_system_prompt_reads_claude_files_and_config() { let root = temp_dir(); @@ -892,6 +1254,87 @@ mod tests { fs::remove_dir_all(root).expect("cleanup temp dir"); } + #[test] + fn rendered_context_pack_respects_6kb_cap() { + // Invariant: MAX_CONTEXT_PACK_CHARS is the spec'd 6KB budget. + assert_eq!(MAX_CONTEXT_PACK_CHARS, 6_000); + + let fat = "x".repeat(20_000); + let rendered = truncate_rendered_context_pack(&fat); + assert!( + rendered.chars().count() + <= MAX_CONTEXT_PACK_CHARS + "\n... [context pack truncated]".chars().count(), + "truncation must keep output within cap" + ); + assert!(rendered.ends_with("[context pack truncated]")); + } + + #[test] + fn context_pack_is_recomputed_each_discover_call() { + // Invariant: no cross-turn cache — each discover_with_git rebuilds the + // pack from the current workspace. The test mutates git between two + // discover calls and expects the second call to observe the new state. + let _guard = env_lock(); + ensure_valid_cwd(); + let root = temp_dir(); + fs::create_dir_all(&root).expect("root dir"); + std::process::Command::new("git") + .args(["init", "--quiet"]) + .current_dir(&root) + .status() + .expect("git init"); + std::process::Command::new("git") + .args(["config", "user.email", "t@example.com"]) + .current_dir(&root) + .status() + .expect("config email"); + std::process::Command::new("git") + .args(["config", "user.name", "T"]) + .current_dir(&root) + .status() + .expect("config name"); + fs::write(root.join("initial.txt"), "seed").expect("seed"); + std::process::Command::new("git") + .args(["add", "."]) + .current_dir(&root) + .status() + .expect("add seed"); + std::process::Command::new("git") + .args(["commit", "-m", "seed", "--quiet"]) + .current_dir(&root) + .status() + .expect("commit seed"); + + // First discover: no changed files. + let first = ProjectContext::discover_with_git(&root, "2026-04-22").expect("first discover"); + let first_count = first + .context_pack + .as_ref() + .map_or(0, |p| p.changed_files.len()); + + // Introduce a change between calls. + fs::write(root.join("a.txt"), "hello").expect("a.txt"); + std::process::Command::new("git") + .args(["add", "a.txt"]) + .current_dir(&root) + .status() + .expect("stage a.txt"); + + let second = + ProjectContext::discover_with_git(&root, "2026-04-22").expect("second discover"); + let second_count = second + .context_pack + .as_ref() + .map_or(0, |p| p.changed_files.len()); + + assert!( + second_count > first_count, + "second discover must observe new changes (first={first_count}, second={second_count}) — no cross-turn caching", + ); + + fs::remove_dir_all(root).expect("cleanup temp dir"); + } + #[test] fn renders_instruction_file_metadata() { let rendered = render_instruction_files(&[ContextFile { diff --git a/rust/crates/runtime/src/rollout_metrics.rs b/rust/crates/runtime/src/rollout_metrics.rs new file mode 100644 index 0000000000..4a63dbe27b --- /dev/null +++ b/rust/crates/runtime/src/rollout_metrics.rs @@ -0,0 +1,562 @@ +//! Rollout metrics aggregator and budget gates for the edit→verify→fix loop. +//! +//! The runtime emits individual `verifier_ran` traces per step, plus per-turn +//! token/latency counts. This module collapses those samples into the handful +//! of rollout metrics named by the plan: +//! +//! * `quick_verify_latency_ms` — p50/p95 latency of quick-phase verification. +//! * `final_gate_pass_rate` — fraction of final-phase reports that succeeded. +//! * `repair_iterations_until_green` — mean repair iterations needed before +//! the first green final-gate report. +//! * `tokens_per_successful_fix` — total tokens spent divided by number of +//! successful fixes (green final reports). +//! * `turn_latency_ms` — mean end-to-end turn latency. +//! +//! It also codifies the rollout **budget gates** from the plan: +//! * pass-rate may not regress by more than 1 percentage point. +//! * repair-iteration mean may not regress by more than 5%. +//! * tokens-per-fix may not regress by more than 10%. +//! * turn-latency p50 may not regress by more than 15%. +//! +//! Keeping aggregation + gates in one crate-visible module lets CI wire it +//! without reaching into `conversation.rs` internals. + +#![allow( + clippy::cast_precision_loss, + clippy::cast_possible_truncation, + clippy::cast_sign_loss, + clippy::doc_markdown, + clippy::map_unwrap_or +)] + +use std::cmp::Ordering; + +use telemetry::SessionTraceRecord; + +/// Phase observed for a single verifier step. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum VerifierPhase { + Quick, + Final, +} + +/// A single verifier-step observation. The aggregator takes a slice of these. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct VerifierSample { + pub phase: VerifierPhase, + /// Identifies the repair episode (0-based index of a user turn, typically). + pub turn_index: u64, + /// Monotonic within a turn; the final successful report carries the + /// "final iteration count" for the episode. + pub iteration: u32, + pub duration_ms: u64, + pub succeeded: bool, +} + +/// Per-turn fact used for token / latency rollups. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct TurnSample { + pub turn_index: u64, + pub tokens_total: u64, + pub turn_latency_ms: u64, + /// True iff the turn ended with a green final-gate report. + pub successful_fix: bool, +} + +/// Aggregated rollout metrics. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct RolloutMetrics { + pub quick_verify_latency_p50_ms: f64, + pub quick_verify_latency_p95_ms: f64, + pub final_gate_pass_rate: f64, + pub repair_iterations_until_green_mean: f64, + pub tokens_per_successful_fix: f64, + pub turn_latency_p50_ms: f64, + pub turn_latency_mean_ms: f64, + pub samples: AggregateCounts, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct AggregateCounts { + pub quick_samples: usize, + pub final_samples: usize, + pub turns: usize, + pub successful_fixes: usize, +} + +/// Collapse raw samples into [`RolloutMetrics`]. Missing signals become `0.0` +/// (not NaN) so downstream gates can compare safely. +#[must_use] +pub fn aggregate(verifier: &[VerifierSample], turns: &[TurnSample]) -> RolloutMetrics { + let mut quick_latencies: Vec = verifier + .iter() + .filter(|s| s.phase == VerifierPhase::Quick) + .map(|s| s.duration_ms) + .collect(); + let final_samples: Vec<&VerifierSample> = verifier + .iter() + .filter(|s| s.phase == VerifierPhase::Final) + .collect(); + let final_success_count = final_samples.iter().filter(|s| s.succeeded).count(); + + // Repair iterations per turn: count of final-phase failures before the + // first green final-phase report (if any). If the turn never went green, + // treat the whole chain as the cost. + let mut repair_iterations: Vec = Vec::new(); + for turn in turns { + let turn_finals: Vec<&&VerifierSample> = final_samples + .iter() + .filter(|s| s.turn_index == turn.turn_index) + .collect(); + if turn_finals.is_empty() { + continue; + } + let mut failures = 0_u32; + let mut saw_green = false; + for sample in &turn_finals { + if sample.succeeded { + saw_green = true; + break; + } + failures += 1; + } + if saw_green || !turn_finals.is_empty() { + repair_iterations.push(failures); + } + } + + let tokens_total: u64 = turns.iter().map(|t| t.tokens_total).sum(); + let successful_fixes = turns.iter().filter(|t| t.successful_fix).count(); + let tokens_per_successful_fix = if successful_fixes == 0 { + 0.0 + } else { + tokens_total as f64 / successful_fixes as f64 + }; + + let turn_latencies: Vec = turns.iter().map(|t| t.turn_latency_ms).collect(); + + RolloutMetrics { + quick_verify_latency_p50_ms: percentile(&mut quick_latencies.clone(), 0.50), + quick_verify_latency_p95_ms: percentile(&mut quick_latencies, 0.95), + final_gate_pass_rate: if final_samples.is_empty() { + 0.0 + } else { + final_success_count as f64 / final_samples.len() as f64 + }, + repair_iterations_until_green_mean: mean_u32(&repair_iterations), + tokens_per_successful_fix, + turn_latency_p50_ms: percentile(&mut turn_latencies.clone(), 0.50), + turn_latency_mean_ms: mean_u64(&turn_latencies), + samples: AggregateCounts { + quick_samples: verifier + .iter() + .filter(|s| s.phase == VerifierPhase::Quick) + .count(), + final_samples: final_samples.len(), + turns: turns.len(), + successful_fixes, + }, + } +} + +fn percentile(values: &mut [u64], p: f64) -> f64 { + if values.is_empty() { + return 0.0; + } + values.sort_unstable(); + let rank = (p * (values.len() as f64 - 1.0)).round() as usize; + values[rank.min(values.len() - 1)] as f64 +} + +fn mean_u32(values: &[u32]) -> f64 { + if values.is_empty() { + return 0.0; + } + values.iter().map(|v| f64::from(*v)).sum::() / values.len() as f64 +} + +fn mean_u64(values: &[u64]) -> f64 { + if values.is_empty() { + return 0.0; + } + values.iter().map(|v| *v as f64).sum::() / values.len() as f64 +} + +/// Extract `(VerifierSample, TurnSample)` vectors from raw session trace records. +/// +/// Recognises: +/// * `verifier_ran` records with attributes `phase` ("quick" | "final"), +/// `mutation_sequence`, `duration_ms`, `passed`. `mutation_sequence` +/// doubles as the `turn_index` so samples from the same repair episode +/// group together even if the caller did not set an explicit turn id. +/// * `turn_completed` records with optional `tokens_total`, `turn_latency_ms`, +/// `verification_gate_passed`. Records missing those fields are still +/// admitted with zeros so count-based aggregates stay honest. +/// +/// Any trace with an unexpected name is ignored. +#[must_use] +pub fn samples_from_traces( + traces: &[SessionTraceRecord], +) -> (Vec, Vec) { + let mut verifier = Vec::new(); + let mut turns = Vec::new(); + let mut turn_counter: u64 = 0; + + for record in traces { + match record.name.as_str() { + "verifier_ran" => { + let phase = record + .attributes + .get("phase") + .and_then(|v| v.as_str()) + .map(|s| match s { + "final" => VerifierPhase::Final, + _ => VerifierPhase::Quick, + }) + .unwrap_or(VerifierPhase::Quick); + let turn_index = record + .attributes + .get("mutation_sequence") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let iteration = record + .attributes + .get("iteration") + .and_then(serde_json::Value::as_u64) + .and_then(|v| u32::try_from(v).ok()) + .unwrap_or(0); + let duration_ms = record + .attributes + .get("duration_ms") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let succeeded = record + .attributes + .get("passed") + .and_then(serde_json::Value::as_bool) + .unwrap_or(false); + verifier.push(VerifierSample { + phase, + turn_index, + iteration, + duration_ms, + succeeded, + }); + } + "turn_completed" => { + let tokens_total = record + .attributes + .get("tokens_total") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let turn_latency_ms = record + .attributes + .get("turn_latency_ms") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let successful_fix = record + .attributes + .get("verification_gate_passed") + .and_then(serde_json::Value::as_bool) + .unwrap_or(false); + turns.push(TurnSample { + turn_index: turn_counter, + tokens_total, + turn_latency_ms, + successful_fix, + }); + turn_counter += 1; + } + _ => {} + } + } + + (verifier, turns) +} + +/// Budget gate thresholds codified from the rollout plan. +pub const MAX_PASS_RATE_REGRESSION_PP: f64 = 0.01; // 1 percentage point +pub const MAX_REPAIR_ITERATIONS_REGRESSION: f64 = 0.05; // 5% relative +pub const MAX_TOKENS_PER_FIX_REGRESSION: f64 = 0.10; // 10% relative +pub const MAX_TURN_LATENCY_P50_REGRESSION: f64 = 0.15; // 15% relative + +/// A single budget-gate violation. +#[derive(Debug, Clone, PartialEq)] +pub struct BudgetViolation { + pub metric: &'static str, + pub baseline: f64, + pub current: f64, + pub limit: f64, + pub actual: f64, +} + +/// Compare a rollout candidate against a baseline. Returns every violation so +/// reports can surface them all at once. +#[must_use] +pub fn evaluate_budget_gates( + baseline: &RolloutMetrics, + current: &RolloutMetrics, +) -> Vec { + let mut violations = Vec::new(); + + // Pass-rate: absolute percentage-point drop. + let pass_rate_drop = baseline.final_gate_pass_rate - current.final_gate_pass_rate; + if pass_rate_drop > MAX_PASS_RATE_REGRESSION_PP { + violations.push(BudgetViolation { + metric: "final_gate_pass_rate", + baseline: baseline.final_gate_pass_rate, + current: current.final_gate_pass_rate, + limit: MAX_PASS_RATE_REGRESSION_PP, + actual: pass_rate_drop, + }); + } + + push_relative_regression( + &mut violations, + "repair_iterations_until_green_mean", + baseline.repair_iterations_until_green_mean, + current.repair_iterations_until_green_mean, + MAX_REPAIR_ITERATIONS_REGRESSION, + ); + push_relative_regression( + &mut violations, + "tokens_per_successful_fix", + baseline.tokens_per_successful_fix, + current.tokens_per_successful_fix, + MAX_TOKENS_PER_FIX_REGRESSION, + ); + push_relative_regression( + &mut violations, + "turn_latency_p50_ms", + baseline.turn_latency_p50_ms, + current.turn_latency_p50_ms, + MAX_TURN_LATENCY_P50_REGRESSION, + ); + + violations +} + +fn push_relative_regression( + violations: &mut Vec, + metric: &'static str, + baseline: f64, + current: f64, + limit: f64, +) { + if baseline <= 0.0 { + // No baseline signal — cannot compute a relative delta. + return; + } + let delta = (current - baseline) / baseline; + if delta.partial_cmp(&limit) == Some(Ordering::Greater) { + violations.push(BudgetViolation { + metric, + baseline, + current, + limit, + actual: delta, + }); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn vs(phase: VerifierPhase, turn: u64, iter: u32, duration: u64, ok: bool) -> VerifierSample { + VerifierSample { + phase, + turn_index: turn, + iteration: iter, + duration_ms: duration, + succeeded: ok, + } + } + + fn ts(turn: u64, tokens: u64, latency: u64, ok: bool) -> TurnSample { + TurnSample { + turn_index: turn, + tokens_total: tokens, + turn_latency_ms: latency, + successful_fix: ok, + } + } + + #[test] + fn empty_inputs_produce_zeroed_metrics() { + let m = aggregate(&[], &[]); + assert!(m.quick_verify_latency_p50_ms.abs() < f64::EPSILON); + assert!(m.final_gate_pass_rate.abs() < f64::EPSILON); + assert!(m.tokens_per_successful_fix.abs() < f64::EPSILON); + assert_eq!(m.samples.turns, 0); + } + + #[test] + fn aggregates_basic_signals() { + let verifier = vec![ + vs(VerifierPhase::Quick, 0, 1, 10, true), + vs(VerifierPhase::Quick, 0, 2, 30, true), + vs(VerifierPhase::Final, 0, 1, 100, false), + vs(VerifierPhase::Final, 0, 2, 120, true), + vs(VerifierPhase::Final, 1, 1, 150, true), + ]; + let turns = vec![ts(0, 1_000, 500, true), ts(1, 500, 400, true)]; + let m = aggregate(&verifier, &turns); + assert!((m.final_gate_pass_rate - 2.0 / 3.0).abs() < 1e-9); + assert!((m.repair_iterations_until_green_mean - 0.5).abs() < 1e-9); + assert!((m.tokens_per_successful_fix - 750.0).abs() < 1e-9); + assert_eq!(m.samples.successful_fixes, 2); + } + + #[test] + fn pass_rate_regression_over_1pp_is_a_violation() { + let baseline = baseline(); + let mut current = baseline; + current.final_gate_pass_rate = 0.89; // dropped 2pp from 0.91 + let violations = evaluate_budget_gates(&baseline, ¤t); + assert!(violations + .iter() + .any(|v| v.metric == "final_gate_pass_rate")); + } + + #[test] + fn pass_rate_regression_under_1pp_is_ok() { + let baseline = baseline(); + let mut current = baseline; + current.final_gate_pass_rate = 0.905; // dropped 0.5pp + let violations = evaluate_budget_gates(&baseline, ¤t); + assert!(violations.is_empty()); + } + + #[test] + fn repair_regression_over_5pct_is_a_violation() { + let baseline = baseline(); + let mut current = baseline; + current.repair_iterations_until_green_mean = 2.2; // 10% up from 2.0 + let violations = evaluate_budget_gates(&baseline, ¤t); + assert!(violations + .iter() + .any(|v| v.metric == "repair_iterations_until_green_mean")); + } + + #[test] + fn tokens_regression_over_10pct_is_a_violation() { + let baseline = baseline(); + let mut current = baseline; + current.tokens_per_successful_fix = 1_200.0; // 20% up from 1000 + let violations = evaluate_budget_gates(&baseline, ¤t); + assert!(violations + .iter() + .any(|v| v.metric == "tokens_per_successful_fix")); + } + + #[test] + fn turn_latency_regression_over_15pct_is_a_violation() { + let baseline = baseline(); + let mut current = baseline; + current.turn_latency_p50_ms = 1_200.0; // 20% up from 1000 + let violations = evaluate_budget_gates(&baseline, ¤t); + assert!(violations.iter().any(|v| v.metric == "turn_latency_p50_ms")); + } + + #[test] + fn flat_metrics_emit_no_violations() { + let baseline = baseline(); + let current = baseline; + assert!(evaluate_budget_gates(&baseline, ¤t).is_empty()); + } + + fn verifier_trace( + name: &str, + phase: &str, + mutation_sequence: u64, + duration_ms: u64, + passed: bool, + ) -> SessionTraceRecord { + let mut attrs = serde_json::Map::new(); + attrs.insert("phase".into(), serde_json::Value::String(phase.into())); + attrs.insert( + "mutation_sequence".into(), + serde_json::Value::from(mutation_sequence), + ); + attrs.insert("duration_ms".into(), serde_json::Value::from(duration_ms)); + attrs.insert("passed".into(), serde_json::Value::Bool(passed)); + SessionTraceRecord { + session_id: "sess".into(), + sequence: 0, + name: name.into(), + timestamp_ms: 0, + attributes: attrs, + } + } + + fn turn_trace(tokens: u64, latency: u64, passed: bool) -> SessionTraceRecord { + let mut attrs = serde_json::Map::new(); + attrs.insert("tokens_total".into(), serde_json::Value::from(tokens)); + attrs.insert("turn_latency_ms".into(), serde_json::Value::from(latency)); + attrs.insert( + "verification_gate_passed".into(), + serde_json::Value::Bool(passed), + ); + SessionTraceRecord { + session_id: "sess".into(), + sequence: 0, + name: "turn_completed".into(), + timestamp_ms: 0, + attributes: attrs, + } + } + + #[test] + fn samples_from_traces_extracts_verifier_and_turn_records() { + let traces = vec![ + verifier_trace("verifier_ran", "quick", 0, 50, true), + verifier_trace("verifier_ran", "final", 0, 300, false), + verifier_trace("verifier_ran", "final", 0, 320, true), + turn_trace(1_000, 500, true), + verifier_trace("verifier_ran", "final", 1, 180, true), + turn_trace(500, 400, true), + verifier_trace("unrelated_event", "quick", 0, 0, true), + ]; + let (verifier, turns) = samples_from_traces(&traces); + assert_eq!(verifier.len(), 4, "unrelated events ignored"); + assert_eq!(turns.len(), 2); + let metrics = aggregate(&verifier, &turns); + assert_eq!(metrics.samples.final_samples, 3); + assert!((metrics.tokens_per_successful_fix - 750.0).abs() < 1e-9); + } + + #[test] + fn samples_from_traces_handles_missing_attributes() { + let record = SessionTraceRecord { + session_id: "sess".into(), + sequence: 0, + name: "verifier_ran".into(), + timestamp_ms: 0, + attributes: serde_json::Map::new(), + }; + let (verifier, turns) = samples_from_traces(&[record]); + assert_eq!(verifier.len(), 1); + assert_eq!(turns.len(), 0); + assert_eq!(verifier[0].duration_ms, 0); + assert!(!verifier[0].succeeded); + } + + fn baseline() -> RolloutMetrics { + RolloutMetrics { + quick_verify_latency_p50_ms: 100.0, + quick_verify_latency_p95_ms: 400.0, + final_gate_pass_rate: 0.91, + repair_iterations_until_green_mean: 2.0, + tokens_per_successful_fix: 1_000.0, + turn_latency_p50_ms: 1_000.0, + turn_latency_mean_ms: 1_100.0, + samples: AggregateCounts { + quick_samples: 10, + final_samples: 10, + turns: 10, + successful_fixes: 9, + }, + } + } +} diff --git a/rust/crates/runtime/src/sandbox.rs b/rust/crates/runtime/src/sandbox.rs index 45f118a9f6..b5fd1797b1 100644 --- a/rust/crates/runtime/src/sandbox.rs +++ b/rust/crates/runtime/src/sandbox.rs @@ -298,8 +298,7 @@ fn unshare_user_namespace_works() -> bool { .stdout(std::process::Stdio::null()) .stderr(std::process::Stdio::null()) .status() - .map(|s| s.success()) - .unwrap_or(false) + .is_ok_and(|s| s.success()) }) } diff --git a/rust/crates/runtime/src/session.rs b/rust/crates/runtime/src/session.rs index b97378e582..e12258d3d8 100644 --- a/rust/crates/runtime/src/session.rs +++ b/rust/crates/runtime/src/session.rs @@ -8,6 +8,7 @@ use std::time::{SystemTime, UNIX_EPOCH}; use crate::json::{JsonError, JsonValue}; use crate::usage::TokenUsage; +use crate::verifier::{VerificationPhase, VerificationReport, VerificationStatus}; const SESSION_VERSION: u32 = 1; const ROTATE_AFTER_BYTES: u64 = 256 * 1024; @@ -22,10 +23,39 @@ pub enum MessageRole { User, Assistant, Tool, + Verification, } /// Structured message content stored inside a [`Session`]. #[derive(Debug, Clone, PartialEq, Eq)] +pub struct VerificationPrimaryFailureBlock { + pub label: String, + pub status: VerificationStatus, + pub failure_kind: Option, + pub output_excerpt: String, + pub step_kind: Option, + pub target_scope: Option, + pub package_name: Option, + pub package_manager: Option, + pub launcher_kind: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VerificationCompactStepBlock { + pub label: String, + pub status: VerificationStatus, + pub failure_kind: Option, + pub duration_ms: u64, + pub step_kind: Option, + pub target_scope: Option, + pub package_name: Option, + pub package_manager: Option, + pub launcher_kind: Option, +} + +/// Structured message content stored inside a [`Session`]. +#[derive(Debug, Clone, PartialEq, Eq)] +#[allow(clippy::large_enum_variant)] // VerificationReport is intentionally structured; boxing would churn call sites pub enum ContentBlock { Text { text: String, @@ -41,6 +71,18 @@ pub enum ContentBlock { output: String, is_error: bool, }, + VerificationReport { + report_id: String, + phase: VerificationPhase, + status: VerificationStatus, + summary_text: String, + adapter_id: Option, + project_root: Option, + touched_paths: Vec, + primary_failure: Option, + steps: Vec, + report_mode: Option, + }, } /// One conversation message with optional token-usage metadata. @@ -668,6 +710,56 @@ impl ConversationMessage { } } + #[must_use] + pub fn verification_report(report: &VerificationReport, report_mode: Option<&str>) -> Self { + Self { + role: MessageRole::Verification, + blocks: vec![ContentBlock::VerificationReport { + report_id: report.report_id.clone(), + phase: report.phase, + status: report.status, + summary_text: report.summary_text.clone(), + adapter_id: Some(report.adapter_id.clone()), + project_root: Some(report.project_root.display().to_string()), + touched_paths: report + .touched_paths + .iter() + .map(|path| path.display().to_string()) + .collect(), + primary_failure: report.primary_step().and_then(|step| { + (!step.status.is_success()).then(|| VerificationPrimaryFailureBlock { + label: step.label.clone(), + status: step.status, + failure_kind: step.failure_kind.map(|kind| kind.as_str().to_string()), + output_excerpt: step.truncated_output.clone(), + step_kind: step.step_kind.clone(), + target_scope: step.target_scope.clone(), + package_name: step.package_name.clone(), + package_manager: step.package_manager.clone(), + launcher_kind: step.launcher_kind.clone(), + }) + }), + steps: report + .steps + .iter() + .map(|step| VerificationCompactStepBlock { + label: step.label.clone(), + status: step.status, + failure_kind: step.failure_kind.map(|kind| kind.as_str().to_string()), + duration_ms: step.duration_ms, + step_kind: step.step_kind.clone(), + target_scope: step.target_scope.clone(), + package_name: step.package_name.clone(), + package_manager: step.package_manager.clone(), + launcher_kind: step.launcher_kind.clone(), + }) + .collect(), + report_mode: report_mode.map(ToOwned::to_owned), + }], + usage: None, + } + } + #[must_use] pub fn to_json(&self) -> JsonValue { let mut object = BTreeMap::new(); @@ -679,6 +771,7 @@ impl ConversationMessage { MessageRole::User => "user", MessageRole::Assistant => "assistant", MessageRole::Tool => "tool", + MessageRole::Verification => "verification", } .to_string(), ), @@ -706,6 +799,7 @@ impl ConversationMessage { "user" => MessageRole::User, "assistant" => MessageRole::Assistant, "tool" => MessageRole::Tool, + "verification" => MessageRole::Verification, other => { return Err(SessionError::Format(format!( "unsupported message role: {other}" @@ -730,6 +824,7 @@ impl ConversationMessage { impl ContentBlock { #[must_use] + #[allow(clippy::too_many_lines)] pub fn to_json(&self) -> JsonValue { let mut object = BTreeMap::new(); match self { @@ -767,6 +862,82 @@ impl ContentBlock { object.insert("output".to_string(), JsonValue::String(output.clone())); object.insert("is_error".to_string(), JsonValue::Bool(*is_error)); } + Self::VerificationReport { + report_id, + phase, + status, + summary_text, + adapter_id, + project_root, + touched_paths, + primary_failure, + steps, + report_mode, + } => { + object.insert( + "type".to_string(), + JsonValue::String("verification_report".to_string()), + ); + object.insert( + "report_id".to_string(), + JsonValue::String(report_id.clone()), + ); + object.insert( + "phase".to_string(), + JsonValue::String(phase.as_str().to_string()), + ); + object.insert( + "status".to_string(), + JsonValue::String(status.as_str().to_string()), + ); + object.insert( + "summary_text".to_string(), + JsonValue::String(summary_text.clone()), + ); + if let Some(adapter_id) = adapter_id { + object.insert( + "adapter_id".to_string(), + JsonValue::String(adapter_id.clone()), + ); + } + if let Some(project_root) = project_root { + object.insert( + "project_root".to_string(), + JsonValue::String(project_root.clone()), + ); + } + if !touched_paths.is_empty() { + object.insert( + "touched_paths".to_string(), + JsonValue::Array( + touched_paths + .iter() + .map(|path| JsonValue::String(path.clone())) + .collect(), + ), + ); + } + if let Some(primary_failure) = primary_failure { + object.insert("primary_failure".to_string(), primary_failure.to_json()); + } + if !steps.is_empty() { + object.insert( + "steps".to_string(), + JsonValue::Array( + steps + .iter() + .map(VerificationCompactStepBlock::to_json) + .collect(), + ), + ); + } + if let Some(report_mode) = report_mode { + object.insert( + "report_mode".to_string(), + JsonValue::String(report_mode.clone()), + ); + } + } } JsonValue::Object(object) } @@ -797,6 +968,31 @@ impl ContentBlock { .and_then(JsonValue::as_bool) .ok_or_else(|| SessionError::Format("missing is_error".to_string()))?, }), + "verification_report" => Ok(Self::VerificationReport { + report_id: required_string(object, "report_id")?, + phase: parse_verification_phase(&required_string(object, "phase")?)?, + status: parse_verification_status(&required_string(object, "status")?)?, + summary_text: required_string(object, "summary_text")?, + adapter_id: optional_string(object, "adapter_id"), + project_root: optional_string(object, "project_root"), + touched_paths: optional_string_array(object, "touched_paths")?, + primary_failure: object + .get("primary_failure") + .map(VerificationPrimaryFailureBlock::from_json) + .transpose()?, + steps: object + .get("steps") + .and_then(JsonValue::as_array) + .map(|steps| { + steps + .iter() + .map(VerificationCompactStepBlock::from_json) + .collect::, _>>() + }) + .transpose()? + .unwrap_or_default(), + report_mode: optional_string(object, "report_mode"), + }), other => Err(SessionError::Format(format!( "unsupported block type: {other}" ))), @@ -804,6 +1000,92 @@ impl ContentBlock { } } +impl VerificationPrimaryFailureBlock { + fn to_json(&self) -> JsonValue { + let mut object = BTreeMap::new(); + object.insert("label".to_string(), JsonValue::String(self.label.clone())); + object.insert( + "status".to_string(), + JsonValue::String(self.status.as_str().to_string()), + ); + object.insert( + "output_excerpt".to_string(), + JsonValue::String(self.output_excerpt.clone()), + ); + insert_optional_string(&mut object, "failure_kind", self.failure_kind.as_ref()); + insert_optional_string(&mut object, "step_kind", self.step_kind.as_ref()); + insert_optional_string(&mut object, "target_scope", self.target_scope.as_ref()); + insert_optional_string(&mut object, "package_name", self.package_name.as_ref()); + insert_optional_string( + &mut object, + "package_manager", + self.package_manager.as_ref(), + ); + insert_optional_string(&mut object, "launcher_kind", self.launcher_kind.as_ref()); + JsonValue::Object(object) + } + + fn from_json(value: &JsonValue) -> Result { + let object = value + .as_object() + .ok_or_else(|| SessionError::Format("primary_failure must be an object".to_string()))?; + Ok(Self { + label: required_string(object, "label")?, + status: parse_verification_status(&required_string(object, "status")?)?, + failure_kind: optional_string(object, "failure_kind"), + output_excerpt: required_string(object, "output_excerpt")?, + step_kind: optional_string(object, "step_kind"), + target_scope: optional_string(object, "target_scope"), + package_name: optional_string(object, "package_name"), + package_manager: optional_string(object, "package_manager"), + launcher_kind: optional_string(object, "launcher_kind"), + }) + } +} + +impl VerificationCompactStepBlock { + fn to_json(&self) -> JsonValue { + let mut object = BTreeMap::new(); + object.insert("label".to_string(), JsonValue::String(self.label.clone())); + object.insert( + "status".to_string(), + JsonValue::String(self.status.as_str().to_string()), + ); + object.insert( + "duration_ms".to_string(), + JsonValue::Number(i64::try_from(self.duration_ms).unwrap_or(i64::MAX)), + ); + insert_optional_string(&mut object, "failure_kind", self.failure_kind.as_ref()); + insert_optional_string(&mut object, "step_kind", self.step_kind.as_ref()); + insert_optional_string(&mut object, "target_scope", self.target_scope.as_ref()); + insert_optional_string(&mut object, "package_name", self.package_name.as_ref()); + insert_optional_string( + &mut object, + "package_manager", + self.package_manager.as_ref(), + ); + insert_optional_string(&mut object, "launcher_kind", self.launcher_kind.as_ref()); + JsonValue::Object(object) + } + + fn from_json(value: &JsonValue) -> Result { + let object = value.as_object().ok_or_else(|| { + SessionError::Format("verification step must be an object".to_string()) + })?; + Ok(Self { + label: required_string(object, "label")?, + status: parse_verification_status(&required_string(object, "status")?)?, + failure_kind: optional_string(object, "failure_kind"), + duration_ms: required_u64(object, "duration_ms")?, + step_kind: optional_string(object, "step_kind"), + target_scope: optional_string(object, "target_scope"), + package_name: optional_string(object, "package_name"), + package_manager: optional_string(object, "package_manager"), + launcher_kind: optional_string(object, "launcher_kind"), + }) + } +} + impl SessionCompaction { pub fn to_json(&self) -> Result { let mut object = BTreeMap::new(); @@ -970,6 +1252,65 @@ fn required_string( .ok_or_else(|| SessionError::Format(format!("missing {key}"))) } +fn optional_string(object: &BTreeMap, key: &str) -> Option { + object + .get(key) + .and_then(JsonValue::as_str) + .map(ToOwned::to_owned) +} + +fn optional_string_array( + object: &BTreeMap, + key: &str, +) -> Result, SessionError> { + let Some(value) = object.get(key) else { + return Ok(Vec::new()); + }; + let array = value + .as_array() + .ok_or_else(|| SessionError::Format(format!("{key} must be an array")))?; + array + .iter() + .map(|item| { + item.as_str() + .map(ToOwned::to_owned) + .ok_or_else(|| SessionError::Format(format!("{key} entries must be strings"))) + }) + .collect() +} + +fn insert_optional_string( + object: &mut BTreeMap, + key: &str, + value: Option<&String>, +) { + if let Some(value) = value { + object.insert(key.to_string(), JsonValue::String(value.clone())); + } +} + +fn parse_verification_phase(value: &str) -> Result { + match value { + "quick" => Ok(VerificationPhase::Quick), + "final" => Ok(VerificationPhase::Final), + other => Err(SessionError::Format(format!( + "unsupported verification phase: {other}" + ))), + } +} + +fn parse_verification_status(value: &str) -> Result { + match value { + "passed" => Ok(VerificationStatus::Passed), + "failed" => Ok(VerificationStatus::Failed), + "skipped" => Ok(VerificationStatus::Skipped), + "unavailable" => Ok(VerificationStatus::Unavailable), + other => Err(SessionError::Format(format!( + "unsupported verification status: {other}" + ))), + } +} + fn required_u32(object: &BTreeMap, key: &str) -> Result { let value = object .get(key) @@ -1148,6 +1489,10 @@ mod tests { }; use crate::json::JsonValue; use crate::usage::TokenUsage; + use crate::verifier::{ + VerificationFailureKind, VerificationPhase, VerificationReport, VerificationStatus, + VerificationStepReport, + }; use std::fs; use std::path::{Path, PathBuf}; use std::time::{SystemTime, UNIX_EPOCH}; @@ -1450,6 +1795,140 @@ mod tests { assert_eq!(forked.workspace_root(), Some(workspace_root.as_path())); } + #[test] + fn verification_report_round_trips_structured_fields() { + let report = sample_verification_report(); + let message = ConversationMessage::verification_report(&report, Some("typed-primary")); + + let json = message.to_json(); + let restored = ConversationMessage::from_json(&json).expect("message should parse"); + + let ContentBlock::VerificationReport { + adapter_id, + project_root, + touched_paths, + primary_failure, + steps, + report_mode, + .. + } = &restored.blocks[0] + else { + panic!("expected verification report block"); + }; + assert_eq!(adapter_id.as_deref(), Some("rust-cargo")); + assert_eq!(project_root.as_deref(), Some("/tmp/demo-workspace")); + assert_eq!(touched_paths, &vec!["src/lib.rs".to_string()]); + assert_eq!(report_mode.as_deref(), Some("typed-primary")); + let primary_failure = primary_failure + .as_ref() + .expect("primary failure should be persisted"); + assert_eq!(primary_failure.failure_kind.as_deref(), Some("code")); + assert_eq!(primary_failure.step_kind.as_deref(), Some("cargo_clippy")); + assert_eq!(primary_failure.target_scope.as_deref(), Some("package")); + assert_eq!(primary_failure.package_name.as_deref(), Some("demo")); + assert_eq!(steps.len(), 2); + assert_eq!(steps[1].failure_kind.as_deref(), Some("code")); + assert_eq!(steps[1].package_manager, None); + } + + #[test] + fn verification_report_from_json_accepts_legacy_payload_without_structured_fields() { + let block = JsonValue::Object( + [ + ( + "type".to_string(), + JsonValue::String("verification_report".to_string()), + ), + ( + "report_id".to_string(), + JsonValue::String("legacy-report".to_string()), + ), + ("phase".to_string(), JsonValue::String("quick".to_string())), + ( + "status".to_string(), + JsonValue::String("failed".to_string()), + ), + ( + "summary_text".to_string(), + JsonValue::String("legacy verifier summary".to_string()), + ), + ] + .into_iter() + .collect(), + ); + + let parsed = ContentBlock::from_json(&block).expect("legacy payload should parse"); + + let ContentBlock::VerificationReport { + report_id, + summary_text, + adapter_id, + project_root, + touched_paths, + primary_failure, + steps, + report_mode, + .. + } = parsed + else { + panic!("expected verification report block"); + }; + assert_eq!(report_id, "legacy-report"); + assert_eq!(summary_text, "legacy verifier summary"); + assert_eq!(adapter_id, None); + assert_eq!(project_root, None); + assert!(touched_paths.is_empty()); + assert_eq!(primary_failure, None); + assert!(steps.is_empty()); + assert_eq!(report_mode, None); + } + + fn sample_verification_report() -> VerificationReport { + VerificationReport { + report_id: "report-structured".to_string(), + phase: VerificationPhase::Quick, + adapter_id: "rust-cargo".to_string(), + project_root: PathBuf::from("/tmp/demo-workspace"), + touched_paths: vec![PathBuf::from("src/lib.rs")], + status: VerificationStatus::Failed, + summary_text: "[verifier:quick:rust-cargo] failed (/tmp/demo-workspace)".to_string(), + steps: vec![ + VerificationStepReport { + adapter: "rust-cargo".to_string(), + project_root: PathBuf::from("/tmp/demo-workspace"), + label: "cargo check".to_string(), + command: "cargo check -p demo".to_string(), + phase: VerificationPhase::Quick, + status: VerificationStatus::Passed, + failure_kind: None, + duration_ms: 11, + truncated_output: "ok".to_string(), + step_kind: Some("cargo_check".to_string()), + target_scope: Some("package".to_string()), + package_name: Some("demo".to_string()), + package_manager: None, + launcher_kind: None, + }, + VerificationStepReport { + adapter: "rust-cargo".to_string(), + project_root: PathBuf::from("/tmp/demo-workspace"), + label: "cargo clippy".to_string(), + command: "cargo clippy -p demo -- -D warnings".to_string(), + phase: VerificationPhase::Quick, + status: VerificationStatus::Failed, + failure_kind: Some(VerificationFailureKind::Code), + duration_ms: 22, + truncated_output: "error[E0308]: mismatched types".to_string(), + step_kind: Some("cargo_clippy".to_string()), + target_scope: Some("package".to_string()), + package_name: Some("demo".to_string()), + package_manager: None, + launcher_kind: None, + }, + ], + } + } + fn temp_session_path(label: &str) -> PathBuf { let nanos = SystemTime::now() .duration_since(UNIX_EPOCH) diff --git a/rust/crates/runtime/src/verifier.rs b/rust/crates/runtime/src/verifier.rs new file mode 100644 index 0000000000..9fcdc53bd4 --- /dev/null +++ b/rust/crates/runtime/src/verifier.rs @@ -0,0 +1,3228 @@ +//! Structured self-verification of code edits. +//! +//! The verifier consumes successful write/edit tool invocations, detects the +//! owning project for the touched file, and runs phase-aware validation. The +//! runtime uses the resulting structured reports both as model-visible +//! feedback and as a final gate before ending a turn in staged mode. + +use std::fmt::Write as _; +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{Duration, Instant}; + +use serde_json::Value; +use toml::Value as TomlValue; + +/// Maximum bytes kept per verification step before the summary is truncated. +const DEFAULT_MAX_OUTPUT_BYTES: usize = 2_048; +const WRITE_TOOLS: &[&str] = &["edit_file", "write_file", "Edit", "Write"]; +const ESLINT_CONFIG_FILES: &[&str] = &[ + "eslint.config.js", + "eslint.config.cjs", + "eslint.config.mjs", + ".eslintrc", + ".eslintrc.js", + ".eslintrc.cjs", + ".eslintrc.json", + ".eslintrc.yml", + ".eslintrc.yaml", +]; +const PYTHON_ROOT_MARKERS: &[&str] = &[ + "pyproject.toml", + "uv.lock", + "poetry.lock", + "requirements.txt", + "requirements-dev.txt", + "requirements-test.txt", + "setup.py", + "setup.cfg", + "tox.ini", +]; +const RUFF_CONFIG_FILES: &[&str] = &["ruff.toml", ".ruff.toml"]; +const MYPY_CONFIG_FILES: &[&str] = &["mypy.ini", ".mypy.ini"]; +const PYTEST_CONFIG_FILES: &[&str] = &["pytest.ini", "tox.ini", "setup.cfg"]; +static REPORT_COUNTER: AtomicU64 = AtomicU64::new(1); + +/// High-level phase for a verification run. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum VerificationPhase { + Quick, + Final, +} + +impl VerificationPhase { + #[must_use] + pub fn as_str(self) -> &'static str { + match self { + Self::Quick => "quick", + Self::Final => "final", + } + } +} + +/// Outcome classification for a verification step or report. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum VerificationStatus { + Passed, + Failed, + Skipped, + Unavailable, +} + +/// Refined failure classification used by structured verification steps. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum VerificationFailureKind { + Code, + Environment, + ToolUnavailable, + Config, + Timeout, +} + +impl VerificationFailureKind { + #[must_use] + pub fn as_str(self) -> &'static str { + match self { + Self::Code => "code", + Self::Environment => "environment", + Self::ToolUnavailable => "tool_unavailable", + Self::Config => "config", + Self::Timeout => "timeout", + } + } +} + +impl VerificationStatus { + #[must_use] + pub fn as_str(self) -> &'static str { + match self { + Self::Passed => "passed", + Self::Failed => "failed", + Self::Skipped => "skipped", + Self::Unavailable => "unavailable", + } + } + + #[must_use] + pub fn is_success(self) -> bool { + matches!(self, Self::Passed | Self::Skipped) + } +} + +/// Mutable-work context supplied by the runtime for a verification decision. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VerificationContext { + pub phase: VerificationPhase, + pub workspace_root: Option, + pub tool_name: String, + pub tool_input: String, + pub touched_paths: Vec, + pub mutation_sequence: u64, +} + +impl VerificationContext { + #[must_use] + pub fn from_tool_invocation( + phase: VerificationPhase, + workspace_root: Option, + tool_name: impl Into, + tool_input: impl Into, + mutation_sequence: u64, + ) -> Option { + let tool_name = tool_name.into(); + let tool_input = tool_input.into(); + let touched_path = extract_file_path(&tool_input)?; + Some(Self { + phase, + workspace_root, + tool_name, + tool_input, + touched_paths: vec![touched_path], + mutation_sequence, + }) + } +} + +/// Deduplicated project target used by staged final-gate verification. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VerificationTarget { + pub adapter_id: String, + pub project_root: PathBuf, + pub touched_paths: Vec, + pub mutation_sequence: u64, +} + +/// Structured output of one verification step. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VerificationStepReport { + pub adapter: String, + pub project_root: PathBuf, + pub label: String, + pub command: String, + pub phase: VerificationPhase, + pub status: VerificationStatus, + pub failure_kind: Option, + pub duration_ms: u64, + pub truncated_output: String, + pub step_kind: Option, + pub target_scope: Option, + pub package_name: Option, + pub package_manager: Option, + pub launcher_kind: Option, +} + +/// Structured output of a full verification pass for one adapter/root pair. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VerificationReport { + pub report_id: String, + pub phase: VerificationPhase, + pub adapter_id: String, + pub project_root: PathBuf, + pub touched_paths: Vec, + pub status: VerificationStatus, + pub summary_text: String, + pub steps: Vec, +} + +impl VerificationReport { + #[must_use] + pub fn is_success(&self) -> bool { + self.status.is_success() + } + + #[must_use] + pub fn short_summary(&self) -> String { + let mut lines = self.summary_text.lines(); + let first = lines.next().unwrap_or_default().trim(); + if first.is_empty() { + format!( + "[verifier:{}:{}] {}", + self.phase.as_str(), + self.adapter_id, + self.status.as_str() + ) + } else { + first.to_string() + } + } + + #[must_use] + pub fn primary_step(&self) -> Option<&VerificationStepReport> { + self.steps + .iter() + .find(|step| !step.status.is_success()) + .or_else(|| self.steps.first()) + } + + #[must_use] + pub fn primary_failure_kind(&self) -> Option { + self.primary_step().and_then(|step| step.failure_kind) + } + + #[must_use] + pub fn compact_payload(&self) -> Value { + let steps = self + .steps + .iter() + .map(|step| { + serde_json::json!({ + "label": step.label, + "status": step.status.as_str(), + "failure_kind": step.failure_kind.map(VerificationFailureKind::as_str), + "duration_ms": step.duration_ms, + "step_kind": step.step_kind, + "target_scope": step.target_scope, + "package_name": step.package_name, + "package_manager": step.package_manager, + "launcher_kind": step.launcher_kind, + }) + }) + .collect::>(); + let primary_failure = self.primary_step().and_then(|step| { + (!step.status.is_success()).then(|| { + serde_json::json!({ + "label": step.label, + "status": step.status.as_str(), + "failure_kind": step.failure_kind.map(VerificationFailureKind::as_str), + "output_excerpt": truncate_output(&step.truncated_output, 512), + "step_kind": step.step_kind, + "target_scope": step.target_scope, + "package_name": step.package_name, + "package_manager": step.package_manager, + "launcher_kind": step.launcher_kind, + }) + }) + }); + + serde_json::json!({ + "type": "verification_report", + "report_id": self.report_id, + "adapter_id": self.adapter_id, + "phase": self.phase.as_str(), + "status": self.status.as_str(), + "project_root": self.project_root.display().to_string(), + "touched_paths": self + .touched_paths + .iter() + .map(|path| path.display().to_string()) + .collect::>(), + "primary_failure": primary_failure, + "steps": steps, + "summary_text": self.short_summary(), + }) + } +} + +/// Status of the staged final gate for the completed turn. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VerificationGateStatus { + pub attempted: bool, + pub passed: bool, + pub report_ids: Vec, +} + +impl VerificationGateStatus { + #[must_use] + pub fn not_required() -> Self { + Self { + attempted: false, + passed: true, + report_ids: Vec::new(), + } + } +} + +/// Strategy that inspects completed mutations and produces verification +/// reports for the runtime. +pub trait Verifier: Send { + fn quick_verify(&self, context: &VerificationContext) -> Vec; + fn final_verify(&self, target: &VerificationTarget) -> Option; + fn final_gate_enabled(&self) -> bool { + false + } +} + +/// Declarative runtime config for the built-in multi-language verifier. +#[allow(clippy::struct_excessive_bools)] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CargoVerifierConfig { + pub legacy_mode: bool, + pub auto_mode: bool, + pub quick_on_write: bool, + pub final_gate: bool, + pub max_output_bytes: usize, + pub rust_check: bool, + pub rust_clippy: bool, + pub rust_fmt: bool, + pub rust_test: bool, + pub rust_timeout: Duration, + pub node_enabled: bool, + pub node_timeout: Duration, + pub python_enabled: bool, + pub python_timeout: Duration, +} + +impl Default for CargoVerifierConfig { + fn default() -> Self { + Self { + legacy_mode: true, + auto_mode: false, + quick_on_write: true, + final_gate: false, + max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES, + rust_check: true, + rust_clippy: true, + rust_fmt: true, + rust_test: true, + rust_timeout: Duration::from_mins(2), + node_enabled: true, + node_timeout: Duration::from_mins(2), + python_enabled: true, + python_timeout: Duration::from_mins(2), + } + } +} + +/// Built-in verifier registry for Rust, Node/TypeScript, and Python roots. +pub struct CargoVerifier { + config: CargoVerifierConfig, +} + +impl CargoVerifier { + #[must_use] + pub fn new(config: CargoVerifierConfig) -> Self { + Self { config } + } + + #[must_use] + pub fn final_gate_enabled(&self) -> bool { + !self.config.legacy_mode && self.config.final_gate + } +} + +impl Verifier for CargoVerifier { + fn quick_verify(&self, context: &VerificationContext) -> Vec { + if !WRITE_TOOLS.contains(&context.tool_name.as_str()) { + return Vec::new(); + } + let Some(path) = context.touched_paths.first() else { + return Vec::new(); + }; + + let adapters: Vec = if self.config.auto_mode { + match detect_adapter_by_marker(path) { + Some(adapter) => vec![adapter], + None => return Vec::new(), + } + } else { + vec![Adapter::Rust, Adapter::NodeTypeScript, Adapter::Python] + }; + + for adapter in adapters { + if let Some(report) = adapter.quick_verify(path, context, &self.config) { + if self.config.auto_mode && report.steps.is_empty() { + continue; + } + return vec![report]; + } + } + + Vec::new() + } + + fn final_verify(&self, target: &VerificationTarget) -> Option { + if self.config.legacy_mode || !self.config.final_gate { + return None; + } + let adapter = Adapter::by_id(&target.adapter_id)?; + adapter.final_verify(target, &self.config) + } + + fn final_gate_enabled(&self) -> bool { + !self.config.legacy_mode && self.config.final_gate + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Adapter { + Rust, + NodeTypeScript, + Python, +} + +impl Adapter { + fn by_id(value: &str) -> Option { + match value { + "rust-cargo" => Some(Self::Rust), + "node-typescript" => Some(Self::NodeTypeScript), + "python" => Some(Self::Python), + _ => None, + } + } + + fn quick_verify( + self, + path: &Path, + context: &VerificationContext, + config: &CargoVerifierConfig, + ) -> Option { + match self { + Self::Rust => verify_rust(path, context, config), + Self::NodeTypeScript => verify_node(path, context, config), + Self::Python => verify_python(path, context, config), + } + } + + fn final_verify( + self, + target: &VerificationTarget, + config: &CargoVerifierConfig, + ) -> Option { + match self { + Self::Rust => Some(finalize_rust(target, config)), + Self::NodeTypeScript => finalize_node(target, config), + Self::Python => finalize_python(target, config), + } + } +} + +#[derive(Debug, Clone)] +struct PlannedStep { + label: String, + command: Vec, + diagnostics: StepDiagnostics, +} + +#[derive(Debug)] +enum StepOutcome { + Passed { + body: String, + duration_ms: u64, + }, + Failed { + body: String, + duration_ms: u64, + failure_kind: Option, + }, + Unavailable { + message: String, + duration_ms: u64, + failure_kind: Option, + }, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum PackageManager { + Npm, + Pnpm, + Yarn, + Bun, +} + +impl PackageManager { + #[must_use] + fn as_str(self) -> &'static str { + match self { + Self::Npm => "npm", + Self::Pnpm => "pnpm", + Self::Yarn => "yarn", + Self::Bun => "bun", + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum VerificationTargetScope { + Workspace, + Package, + FileSet, + Project, +} + +impl VerificationTargetScope { + #[must_use] + fn as_str(self) -> &'static str { + match self { + Self::Workspace => "workspace", + Self::Package => "package", + Self::FileSet => "file_set", + Self::Project => "project", + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum RustStepKind { + Check, + Clippy, + FmtCheck, + Test, +} + +impl RustStepKind { + #[must_use] + fn as_str(self) -> &'static str { + match self { + Self::Check => "cargo_check", + Self::Clippy => "cargo_clippy", + Self::FmtCheck => "cargo_fmt_check", + Self::Test => "cargo_test", + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum PythonLauncherKind { + Uv, + Poetry, + Venv, + Global, +} + +impl PythonLauncherKind { + #[must_use] + fn as_str(self) -> &'static str { + match self { + Self::Uv => "uv", + Self::Poetry => "poetry", + Self::Venv => "venv", + Self::Global => "global", + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct PythonRunner { + command_prefix: Vec, +} + +#[allow(clippy::struct_excessive_bools)] +#[derive(Debug, Clone, PartialEq, Eq)] +struct RustProjectProfile { + project_root: PathBuf, + manifest_path: PathBuf, + package_name: Option, + manifest_parsed: bool, + manifest_parse_error: Option, +} + +#[allow(clippy::struct_excessive_bools)] +#[derive(Debug, Clone, PartialEq, Eq)] +struct NodeProjectProfile { + project_root: PathBuf, + package_json_path: PathBuf, + package_value: Value, + package_manager: PackageManager, + package_name: Option, +} + +#[allow(clippy::struct_excessive_bools)] +#[derive(Debug, Clone, PartialEq, Eq)] +struct PythonProjectProfile { + project_root: PathBuf, + runner: PythonRunner, + launcher_kind: PythonLauncherKind, + pyproject_parsed: bool, + has_ruff: bool, + has_mypy: bool, + has_pytest: bool, + typed_targets: Vec, + test_root_present: bool, + pyproject_path: Option, + pyproject_parse_error: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum PythonStepKind { + RuffCheck, + Mypy, + Pytest, + PyCompile, +} + +impl PythonStepKind { + #[must_use] + fn as_str(self) -> &'static str { + match self { + Self::RuffCheck => "ruff_check", + Self::Mypy => "mypy", + Self::Pytest => "pytest", + Self::PyCompile => "py_compile", + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum NodeStepKind { + Typecheck, + TscNoEmit, + Lint, + Eslint, + Test, +} + +impl NodeStepKind { + #[must_use] + fn as_str(self) -> &'static str { + match self { + Self::Typecheck => "typecheck", + Self::TscNoEmit => "tsc_no_emit", + Self::Lint => "lint", + Self::Eslint => "eslint", + Self::Test => "test", + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +enum StepDiagnostics { + Rust { + step_kind: RustStepKind, + target_scope: VerificationTargetScope, + package_name: Option, + }, + NodeTypeScript { + step_kind: NodeStepKind, + target_scope: VerificationTargetScope, + package_manager: PackageManager, + package_name: Option, + }, + Python { + launcher_kind: PythonLauncherKind, + step_kind: PythonStepKind, + target_scope: VerificationTargetScope, + }, +} + +impl StepDiagnostics { + #[must_use] + #[allow(clippy::unnecessary_wraps)] // feeds Option field for session compat + fn step_kind(&self) -> Option { + match self { + Self::Rust { step_kind, .. } => Some(step_kind.as_str().to_string()), + Self::NodeTypeScript { step_kind, .. } => Some(step_kind.as_str().to_string()), + Self::Python { step_kind, .. } => Some(step_kind.as_str().to_string()), + } + } + + #[must_use] + #[allow(clippy::unnecessary_wraps)] // feeds Option field for session compat + fn target_scope(&self) -> Option { + match self { + Self::Rust { target_scope, .. } + | Self::NodeTypeScript { target_scope, .. } + | Self::Python { target_scope, .. } => Some(target_scope.as_str().to_string()), + } + } + + #[must_use] + fn package_name(&self) -> Option { + match self { + Self::Rust { package_name, .. } | Self::NodeTypeScript { package_name, .. } => { + package_name.clone() + } + Self::Python { .. } => None, + } + } + + #[must_use] + fn package_manager(&self) -> Option { + match self { + Self::NodeTypeScript { + package_manager, .. + } => Some(package_manager.as_str().to_string()), + Self::Rust { .. } | Self::Python { .. } => None, + } + } + + #[must_use] + fn launcher_kind(&self) -> Option { + match self { + Self::Python { launcher_kind, .. } => Some(launcher_kind.as_str().to_string()), + Self::Rust { .. } | Self::NodeTypeScript { .. } => None, + } + } +} + +fn verify_rust( + path: &Path, + context: &VerificationContext, + config: &CargoVerifierConfig, +) -> Option { + let profile = build_rust_profile_for_path(path)?; + if let Some(report) = rust_config_failure_report( + &profile, + context.phase, + context.touched_paths.clone(), + config.max_output_bytes, + ) { + return Some(report); + } + let phase = context.phase; + let steps = if config.legacy_mode { + rust_legacy_steps(config, profile.package_name.clone()) + } else if phase == VerificationPhase::Quick { + rust_quick_steps(config, profile.package_name.clone()) + } else { + rust_final_steps(config, profile.package_name.clone()) + }; + Some(run_rust_steps( + &profile.project_root, + context.touched_paths.clone(), + phase, + steps, + config, + )) +} + +fn finalize_rust(target: &VerificationTarget, config: &CargoVerifierConfig) -> VerificationReport { + let profile = build_rust_profile_for_root(&target.project_root); + let package_name = profile + .as_ref() + .and_then(|profile| profile.package_name.clone()); + let steps = if config.legacy_mode { + rust_legacy_steps(config, package_name.clone()) + } else { + rust_final_steps(config, package_name) + }; + run_rust_steps( + &target.project_root, + target.touched_paths.clone(), + VerificationPhase::Final, + steps, + config, + ) +} + +fn run_rust_steps( + project_root: &Path, + touched_paths: Vec, + phase: VerificationPhase, + steps: Vec, + config: &CargoVerifierConfig, +) -> VerificationReport { + run_planned_steps( + "rust-cargo", + project_root, + touched_paths, + phase, + steps, + config.rust_timeout, + config.max_output_bytes, + ) +} + +fn rust_quick_steps( + config: &CargoVerifierConfig, + package_name: Option, +) -> Vec { + if config.quick_on_write && config.rust_check { + let mut command = vec![ + "cargo".to_string(), + "check".to_string(), + "--quiet".to_string(), + "--message-format=short".to_string(), + ]; + let target_scope = if let Some(package_name) = package_name.clone() { + command.extend(["-p".to_string(), package_name.clone()]); + VerificationTargetScope::Package + } else { + VerificationTargetScope::Workspace + }; + vec![PlannedStep { + label: "cargo check".to_string(), + command, + diagnostics: StepDiagnostics::Rust { + step_kind: RustStepKind::Check, + target_scope, + package_name, + }, + }] + } else { + Vec::new() + } +} + +#[allow(clippy::needless_pass_by_value)] // signature matches sibling step builders +fn rust_final_steps( + config: &CargoVerifierConfig, + package_name: Option, +) -> Vec { + let mut steps = Vec::new(); + if config.rust_fmt { + steps.push(PlannedStep { + label: "cargo fmt --check".to_string(), + command: vec![ + "cargo".to_string(), + "fmt".to_string(), + "--".to_string(), + "--check".to_string(), + ], + diagnostics: StepDiagnostics::Rust { + step_kind: RustStepKind::FmtCheck, + target_scope: VerificationTargetScope::Workspace, + package_name: package_name.clone(), + }, + }); + } + if config.rust_clippy { + let mut command = vec![ + "cargo".to_string(), + "clippy".to_string(), + "--quiet".to_string(), + "--message-format=short".to_string(), + ]; + let target_scope = if let Some(package_name) = package_name.clone() { + command.extend(["-p".to_string(), package_name.clone()]); + VerificationTargetScope::Package + } else { + VerificationTargetScope::Workspace + }; + command.extend(["--".to_string(), "-D".to_string(), "warnings".to_string()]); + steps.push(PlannedStep { + label: "cargo clippy".to_string(), + command, + diagnostics: StepDiagnostics::Rust { + step_kind: RustStepKind::Clippy, + target_scope, + package_name: package_name.clone(), + }, + }); + } + if config.rust_test { + let mut command = vec![ + "cargo".to_string(), + "test".to_string(), + "--quiet".to_string(), + "--no-fail-fast".to_string(), + ]; + let target_scope = if let Some(package_name) = package_name.clone() { + command.extend(["-p".to_string(), package_name.clone()]); + VerificationTargetScope::Package + } else { + VerificationTargetScope::Workspace + }; + steps.push(PlannedStep { + label: "cargo test".to_string(), + command, + diagnostics: StepDiagnostics::Rust { + step_kind: RustStepKind::Test, + target_scope, + package_name: package_name.clone(), + }, + }); + } + steps +} + +fn rust_legacy_steps( + config: &CargoVerifierConfig, + package_name: Option, +) -> Vec { + let mut steps = rust_quick_steps(config, package_name.clone()); + if config.rust_clippy { + let mut command = vec![ + "cargo".to_string(), + "clippy".to_string(), + "--quiet".to_string(), + "--message-format=short".to_string(), + ]; + let target_scope = if let Some(package_name) = package_name.clone() { + command.extend(["-p".to_string(), package_name.clone()]); + VerificationTargetScope::Package + } else { + VerificationTargetScope::Workspace + }; + command.extend(["--".to_string(), "-D".to_string(), "warnings".to_string()]); + steps.push(PlannedStep { + label: "cargo clippy".to_string(), + command, + diagnostics: StepDiagnostics::Rust { + step_kind: RustStepKind::Clippy, + target_scope, + package_name: package_name.clone(), + }, + }); + } + if config.rust_fmt { + steps.push(PlannedStep { + label: "cargo fmt --check".to_string(), + command: vec![ + "cargo".to_string(), + "fmt".to_string(), + "--".to_string(), + "--check".to_string(), + ], + diagnostics: StepDiagnostics::Rust { + step_kind: RustStepKind::FmtCheck, + target_scope: VerificationTargetScope::Workspace, + package_name: package_name.clone(), + }, + }); + } + if config.rust_test { + let mut command = vec![ + "cargo".to_string(), + "test".to_string(), + "--quiet".to_string(), + "--no-fail-fast".to_string(), + ]; + let target_scope = if let Some(package_name) = package_name.clone() { + command.extend(["-p".to_string(), package_name.clone()]); + VerificationTargetScope::Package + } else { + VerificationTargetScope::Workspace + }; + steps.push(PlannedStep { + label: "cargo test".to_string(), + command, + diagnostics: StepDiagnostics::Rust { + step_kind: RustStepKind::Test, + target_scope, + package_name, + }, + }); + } + steps +} + +fn verify_node( + path: &Path, + context: &VerificationContext, + config: &CargoVerifierConfig, +) -> Option { + if !config.node_enabled { + return None; + } + let package_json = nearest_file(path, "package.json")?; + let project_root = package_json.parent()?.to_path_buf(); + let profile = match build_node_profile_for_root(&project_root) { + Ok(profile) => profile?, + Err(report) => { + return Some(node_setup_failure_report( + &project_root, + context.touched_paths.clone(), + context.phase, + &report, + config.max_output_bytes, + )); + } + }; + let phase = context.phase; + let steps = if config.legacy_mode { + node_legacy_steps(&profile) + } else if phase == VerificationPhase::Quick { + if config.quick_on_write { + node_quick_steps(&profile) + } else { + Vec::new() + } + } else { + node_final_steps(&profile) + }; + Some(run_planned_steps( + "node-typescript", + &profile.project_root, + context.touched_paths.clone(), + phase, + steps, + config.node_timeout, + config.max_output_bytes, + )) +} + +fn finalize_node( + target: &VerificationTarget, + config: &CargoVerifierConfig, +) -> Option { + if !config.node_enabled { + return None; + } + let profile = match build_node_profile_for_root(&target.project_root) { + Ok(profile) => profile?, + Err(report) => { + return Some(node_setup_failure_report( + &target.project_root, + target.touched_paths.clone(), + VerificationPhase::Final, + &report, + config.max_output_bytes, + )); + } + }; + Some(run_planned_steps( + "node-typescript", + &target.project_root, + target.touched_paths.clone(), + VerificationPhase::Final, + node_final_steps(&profile), + config.node_timeout, + config.max_output_bytes, + )) +} + +#[derive(Debug)] +struct NodeSetupFailure { + label: String, + kind: VerificationFailureKind, + message: String, +} + +fn load_node_package(package_json: &Path) -> Result { + let contents = fs::read_to_string(package_json).map_err(|error| NodeSetupFailure { + label: "package.json read".to_string(), + kind: VerificationFailureKind::Environment, + message: format!("failed to read {}: {error}", package_json.display()), + })?; + serde_json::from_str::(&contents).map_err(|error| NodeSetupFailure { + label: "package.json parse".to_string(), + kind: VerificationFailureKind::Config, + message: format!("failed to parse {}: {error}", package_json.display()), + }) +} + +fn node_setup_failure_report( + project_root: &Path, + touched_paths: Vec, + phase: VerificationPhase, + failure: &NodeSetupFailure, + max_output_bytes: usize, +) -> VerificationReport { + let status = if failure.kind == VerificationFailureKind::Config { + VerificationStatus::Failed + } else { + VerificationStatus::Unavailable + }; + let steps = vec![VerificationStepReport { + adapter: "node-typescript".to_string(), + project_root: project_root.to_path_buf(), + label: failure.label.clone(), + command: project_root.join("package.json").display().to_string(), + phase, + status, + failure_kind: Some(failure.kind), + duration_ms: 0, + truncated_output: truncate_output(&failure.message, max_output_bytes), + step_kind: None, + target_scope: Some(VerificationTargetScope::Package.as_str().to_string()), + package_name: None, + package_manager: None, + launcher_kind: None, + }]; + let summary_text = + render_report_summary("node-typescript", project_root, phase, status, &steps); + VerificationReport { + report_id: next_report_id(), + phase, + adapter_id: "node-typescript".to_string(), + project_root: project_root.to_path_buf(), + touched_paths, + status, + summary_text, + steps, + } +} + +fn node_quick_steps(profile: &NodeProjectProfile) -> Vec { + if has_script(&profile.package_value, "typecheck") { + return vec![PlannedStep { + label: "typecheck".to_string(), + command: package_manager_run_script(profile.package_manager, "typecheck"), + diagnostics: StepDiagnostics::NodeTypeScript { + step_kind: NodeStepKind::Typecheck, + target_scope: VerificationTargetScope::Package, + package_manager: profile.package_manager, + package_name: profile.package_name.clone(), + }, + }]; + } + if profile.project_root.join("tsconfig.json").is_file() { + return vec![PlannedStep { + label: "tsc --noEmit".to_string(), + command: package_manager_exec(profile.package_manager, "tsc", &["--noEmit"]), + diagnostics: StepDiagnostics::NodeTypeScript { + step_kind: NodeStepKind::TscNoEmit, + target_scope: VerificationTargetScope::Package, + package_manager: profile.package_manager, + package_name: profile.package_name.clone(), + }, + }]; + } + Vec::new() +} + +fn node_final_steps(profile: &NodeProjectProfile) -> Vec { + let mut steps = Vec::new(); + if has_script(&profile.package_value, "lint") { + steps.push(PlannedStep { + label: "lint".to_string(), + command: package_manager_run_script(profile.package_manager, "lint"), + diagnostics: StepDiagnostics::NodeTypeScript { + step_kind: NodeStepKind::Lint, + target_scope: VerificationTargetScope::Package, + package_manager: profile.package_manager, + package_name: profile.package_name.clone(), + }, + }); + } else if ESLINT_CONFIG_FILES + .iter() + .any(|name| profile.project_root.join(name).is_file()) + { + steps.push(PlannedStep { + label: "eslint .".to_string(), + command: package_manager_exec(profile.package_manager, "eslint", &["."]), + diagnostics: StepDiagnostics::NodeTypeScript { + step_kind: NodeStepKind::Eslint, + target_scope: VerificationTargetScope::Package, + package_manager: profile.package_manager, + package_name: profile.package_name.clone(), + }, + }); + } + if has_script(&profile.package_value, "test") { + steps.push(PlannedStep { + label: "test".to_string(), + command: package_manager_run_script(profile.package_manager, "test"), + diagnostics: StepDiagnostics::NodeTypeScript { + step_kind: NodeStepKind::Test, + target_scope: VerificationTargetScope::Package, + package_manager: profile.package_manager, + package_name: profile.package_name.clone(), + }, + }); + } + steps +} + +fn node_legacy_steps(profile: &NodeProjectProfile) -> Vec { + let mut steps = node_quick_steps(profile); + steps.extend(node_final_steps(profile)); + steps +} + +fn verify_python( + path: &Path, + context: &VerificationContext, + config: &CargoVerifierConfig, +) -> Option { + verify_python_for_phase(path, &context.touched_paths, context.phase, config) +} + +fn finalize_python( + target: &VerificationTarget, + config: &CargoVerifierConfig, +) -> Option { + if !config.python_enabled { + return None; + } + let profile = build_python_profile_for_root(&target.project_root)?; + Some(build_python_report( + &profile, + target.touched_paths.clone(), + VerificationPhase::Final, + config, + )) +} + +fn verify_python_for_phase( + path: &Path, + touched_paths: &[PathBuf], + phase: VerificationPhase, + config: &CargoVerifierConfig, +) -> Option { + if !config.python_enabled { + return None; + } + let profile = build_python_profile_for_path(path)?; + Some(build_python_report( + &profile, + touched_paths.to_vec(), + phase, + config, + )) +} + +fn build_python_report( + profile: &PythonProjectProfile, + touched_paths: Vec, + phase: VerificationPhase, + config: &CargoVerifierConfig, +) -> VerificationReport { + if let Some(report) = python_config_failure_report( + profile, + phase, + touched_paths.clone(), + config.max_output_bytes, + ) { + return report; + } + + let steps = if config.legacy_mode { + python_legacy_steps(profile, &touched_paths) + } else if phase == VerificationPhase::Quick { + if config.quick_on_write { + python_quick_steps(profile, &touched_paths) + } else { + Vec::new() + } + } else { + python_final_steps(profile, &touched_paths) + }; + + run_planned_steps( + "python", + &profile.project_root, + touched_paths, + phase, + steps, + config.python_timeout, + config.max_output_bytes, + ) +} + +fn python_config_failure_report( + profile: &PythonProjectProfile, + phase: VerificationPhase, + touched_paths: Vec, + max_output_bytes: usize, +) -> Option { + if profile.pyproject_parsed { + return None; + } + let error = profile.pyproject_parse_error.as_ref()?; + let pyproject_path = profile.pyproject_path.as_ref().map_or_else( + || "pyproject.toml".to_string(), + |path| path.display().to_string(), + ); + let steps = vec![VerificationStepReport { + adapter: "python".to_string(), + project_root: profile.project_root.clone(), + label: "pyproject.toml parse".to_string(), + command: pyproject_path, + phase, + status: VerificationStatus::Failed, + failure_kind: Some(VerificationFailureKind::Config), + duration_ms: 0, + truncated_output: truncate_output(error, max_output_bytes), + step_kind: None, + target_scope: Some(VerificationTargetScope::Project.as_str().to_string()), + package_name: None, + package_manager: None, + launcher_kind: Some(profile.launcher_kind.as_str().to_string()), + }]; + let summary_text = render_report_summary( + "python", + &profile.project_root, + phase, + VerificationStatus::Failed, + &steps, + ); + Some(VerificationReport { + report_id: next_report_id(), + phase, + adapter_id: "python".to_string(), + project_root: profile.project_root.clone(), + touched_paths, + status: VerificationStatus::Failed, + summary_text, + steps, + }) +} + +fn python_quick_steps( + profile: &PythonProjectProfile, + touched_paths: &[PathBuf], +) -> Vec { + let python_files = python_source_targets(&profile.project_root, touched_paths); + if profile.has_ruff && !python_files.is_empty() { + return vec![python_step( + profile, + PythonStepKind::RuffCheck, + "ruff check", + python_module_command(&profile.runner, "ruff", &["check"], &python_files), + )]; + } + if profile.has_mypy { + let targets = derive_mypy_targets_from_touched(&profile.project_root, touched_paths); + if !targets.is_empty() { + return vec![python_step( + profile, + PythonStepKind::Mypy, + "mypy", + python_module_command(&profile.runner, "mypy", &[], &targets), + )]; + } + } + if python_files.is_empty() { + return Vec::new(); + } + vec![python_step( + profile, + PythonStepKind::PyCompile, + "python -m py_compile", + python_module_command(&profile.runner, "py_compile", &[], &python_files), + )] +} + +fn python_final_steps( + profile: &PythonProjectProfile, + touched_paths: &[PathBuf], +) -> Vec { + let mut steps = Vec::new(); + debug_assert_eq!( + profile.test_root_present, + profile.project_root.join("tests").is_dir() + ); + if profile.has_ruff { + steps.push(python_step( + profile, + PythonStepKind::RuffCheck, + "ruff check", + python_module_command( + &profile.runner, + "ruff", + &["check"], + std::slice::from_ref(&profile.project_root), + ), + )); + } + if profile.has_mypy { + let targets = if profile.typed_targets.is_empty() { + let derived = derive_mypy_targets_from_touched(&profile.project_root, touched_paths); + if derived.is_empty() { + vec![profile.project_root.clone()] + } else { + derived + } + } else { + profile.typed_targets.clone() + }; + steps.push(python_step( + profile, + PythonStepKind::Mypy, + "mypy", + python_module_command(&profile.runner, "mypy", &[], &targets), + )); + } + if profile.has_pytest { + steps.push(python_step( + profile, + PythonStepKind::Pytest, + "pytest", + python_module_command( + &profile.runner, + "pytest", + &[], + std::slice::from_ref(&profile.project_root), + ), + )); + } + steps +} + +fn python_legacy_steps( + profile: &PythonProjectProfile, + touched_paths: &[PathBuf], +) -> Vec { + let mut steps = python_quick_steps(profile, touched_paths); + steps.extend(python_final_steps(profile, touched_paths)); + dedupe_steps(&mut steps); + steps +} + +fn python_step( + profile: &PythonProjectProfile, + step_kind: PythonStepKind, + label: &str, + command: Vec, +) -> PlannedStep { + PlannedStep { + label: label.to_string(), + command, + diagnostics: StepDiagnostics::Python { + launcher_kind: profile.launcher_kind, + step_kind, + target_scope: if matches!( + step_kind, + PythonStepKind::Pytest | PythonStepKind::RuffCheck + ) { + VerificationTargetScope::Project + } else { + VerificationTargetScope::FileSet + }, + }, + } +} + +fn dedupe_steps(steps: &mut Vec) { + let mut seen = Vec::::new(); + steps.retain(|step| { + let key = step.label.clone(); + if seen.contains(&key) { + false + } else { + seen.push(key); + true + } + }); +} + +#[allow(clippy::too_many_lines)] +fn run_planned_steps( + adapter_id: &str, + project_root: &Path, + touched_paths: Vec, + phase: VerificationPhase, + steps: Vec, + timeout: Duration, + max_output_bytes: usize, +) -> VerificationReport { + let mut reports = Vec::new(); + let mut report_status = if steps.is_empty() { + VerificationStatus::Skipped + } else { + VerificationStatus::Passed + }; + let mut skip_remaining = false; + + for step in steps { + if skip_remaining { + reports.push(VerificationStepReport { + adapter: adapter_id.to_string(), + project_root: project_root.to_path_buf(), + label: step.label.clone(), + command: step.command.join(" "), + phase, + status: VerificationStatus::Skipped, + failure_kind: None, + duration_ms: 0, + truncated_output: String::new(), + step_kind: step.diagnostics.step_kind(), + target_scope: step.diagnostics.target_scope(), + package_name: step.diagnostics.package_name(), + package_manager: step.diagnostics.package_manager(), + launcher_kind: step.diagnostics.launcher_kind(), + }); + continue; + } + + let outcome = run_step(project_root, &step, timeout, max_output_bytes); + match outcome { + StepOutcome::Passed { body, duration_ms } => reports.push(VerificationStepReport { + adapter: adapter_id.to_string(), + project_root: project_root.to_path_buf(), + label: step.label.clone(), + command: step.command.join(" "), + phase, + status: VerificationStatus::Passed, + failure_kind: None, + duration_ms, + truncated_output: body, + step_kind: step.diagnostics.step_kind(), + target_scope: step.diagnostics.target_scope(), + package_name: step.diagnostics.package_name(), + package_manager: step.diagnostics.package_manager(), + launcher_kind: step.diagnostics.launcher_kind(), + }), + StepOutcome::Failed { + body, + duration_ms, + failure_kind, + } => { + report_status = VerificationStatus::Failed; + skip_remaining = true; + reports.push(VerificationStepReport { + adapter: adapter_id.to_string(), + project_root: project_root.to_path_buf(), + label: step.label.clone(), + command: step.command.join(" "), + phase, + status: VerificationStatus::Failed, + failure_kind, + duration_ms, + truncated_output: body, + step_kind: step.diagnostics.step_kind(), + target_scope: step.diagnostics.target_scope(), + package_name: step.diagnostics.package_name(), + package_manager: step.diagnostics.package_manager(), + launcher_kind: step.diagnostics.launcher_kind(), + }); + } + StepOutcome::Unavailable { + message, + duration_ms, + failure_kind, + } => { + report_status = VerificationStatus::Unavailable; + skip_remaining = true; + reports.push(VerificationStepReport { + adapter: adapter_id.to_string(), + project_root: project_root.to_path_buf(), + label: step.label.clone(), + command: step.command.join(" "), + phase, + status: VerificationStatus::Unavailable, + failure_kind, + duration_ms, + truncated_output: message, + step_kind: step.diagnostics.step_kind(), + target_scope: step.diagnostics.target_scope(), + package_name: step.diagnostics.package_name(), + package_manager: step.diagnostics.package_manager(), + launcher_kind: step.diagnostics.launcher_kind(), + }); + } + } + } + + let summary_text = + render_report_summary(adapter_id, project_root, phase, report_status, &reports); + VerificationReport { + report_id: next_report_id(), + phase, + adapter_id: adapter_id.to_string(), + project_root: project_root.to_path_buf(), + touched_paths, + status: report_status, + summary_text, + steps: reports, + } +} + +fn render_report_summary( + adapter_id: &str, + project_root: &Path, + phase: VerificationPhase, + status: VerificationStatus, + steps: &[VerificationStepReport], +) -> String { + let mut summary = format!( + "[verifier:{}:{}] {} ({})", + phase.as_str(), + adapter_id, + status.as_str(), + project_root.display() + ); + if steps.is_empty() { + summary.push_str("\n[verifier] no verification steps were planned"); + return summary; + } + let primary_idx = steps + .iter() + .position(|step| !step.status.is_success()) + .unwrap_or(0); + for (idx, step) in steps.iter().enumerate() { + let label = match step.status { + VerificationStatus::Passed => "ok", + VerificationStatus::Failed => "FAIL", + VerificationStatus::Skipped => "skipped", + VerificationStatus::Unavailable => "unavailable", + }; + let failure_suffix = step + .failure_kind + .map(|kind| format!(" ({})", kind.as_str())) + .unwrap_or_default(); + let _ = writeln!( + summary, + "\n[verifier] {}: {label}{failure_suffix}", + step.label + ); + if idx == primary_idx && !step.truncated_output.trim().is_empty() { + summary.push_str(&step.truncated_output); + } + } + summary.trim_end().to_string() +} + +fn package_manager_run_script(manager: PackageManager, script: &str) -> Vec { + match manager { + PackageManager::Npm => vec![ + "npm".to_string(), + "run".to_string(), + "--silent".to_string(), + script.to_string(), + ], + PackageManager::Pnpm => vec!["pnpm".to_string(), "run".to_string(), script.to_string()], + PackageManager::Yarn => vec!["yarn".to_string(), script.to_string()], + PackageManager::Bun => vec!["bun".to_string(), "run".to_string(), script.to_string()], + } +} + +fn package_manager_exec(manager: PackageManager, binary: &str, args: &[&str]) -> Vec { + let mut command = match manager { + PackageManager::Npm => vec![ + "npm".to_string(), + "exec".to_string(), + "--".to_string(), + binary.to_string(), + ], + PackageManager::Pnpm => vec!["pnpm".to_string(), "exec".to_string(), binary.to_string()], + PackageManager::Yarn => vec!["yarn".to_string(), "exec".to_string(), binary.to_string()], + PackageManager::Bun => vec!["bun".to_string(), "x".to_string(), binary.to_string()], + }; + command.extend(args.iter().map(ToString::to_string)); + command +} + +fn python_module_command( + runner: &PythonRunner, + module: &str, + extra_args: &[&str], + paths: &[PathBuf], +) -> Vec { + let mut command = runner.command_prefix.clone(); + command.push(module.to_string()); + command.extend(extra_args.iter().map(ToString::to_string)); + command.extend(paths.iter().map(|path| path.display().to_string())); + command +} + +fn has_script(package_value: &Value, script: &str) -> bool { + package_value + .get("scripts") + .and_then(Value::as_object) + .is_some_and(|scripts| scripts.get(script).and_then(Value::as_str).is_some()) +} + +fn detect_package_manager(root: &Path) -> PackageManager { + if root.join("pnpm-lock.yaml").is_file() { + PackageManager::Pnpm + } else if root.join("yarn.lock").is_file() { + PackageManager::Yarn + } else if root.join("bun.lockb").is_file() || root.join("bun.lock").is_file() { + PackageManager::Bun + } else { + PackageManager::Npm + } +} + +fn build_rust_profile_for_path(path: &Path) -> Option { + let manifest = nearest_file(path, "Cargo.toml")?; + build_rust_profile_from_manifest(&manifest) +} + +fn build_rust_profile_for_root(root: &Path) -> Option { + let manifest = normalize_local_path(root)?.join("Cargo.toml"); + build_rust_profile_from_manifest(&manifest) +} + +fn build_rust_profile_from_manifest(manifest_path: &Path) -> Option { + let manifest_path = normalize_local_path(manifest_path)?; + let project_root = manifest_path.parent()?.to_path_buf(); + let (manifest_parsed, manifest_value, manifest_parse_error) = + parse_optional_toml_file(&manifest_path, "Cargo.toml"); + let package_name = manifest_value + .as_ref() + .and_then(|value| toml_value_at(value, &["package", "name"])) + .and_then(TomlValue::as_str) + .map(ToOwned::to_owned); + Some(RustProjectProfile { + project_root, + manifest_path, + package_name, + manifest_parsed, + manifest_parse_error, + }) +} + +fn rust_config_failure_report( + profile: &RustProjectProfile, + phase: VerificationPhase, + touched_paths: Vec, + max_output_bytes: usize, +) -> Option { + if profile.manifest_parsed { + return None; + } + let error = profile.manifest_parse_error.as_ref()?; + let steps = vec![VerificationStepReport { + adapter: "rust-cargo".to_string(), + project_root: profile.project_root.clone(), + label: "Cargo.toml parse".to_string(), + command: profile.manifest_path.display().to_string(), + phase, + status: VerificationStatus::Failed, + failure_kind: Some(VerificationFailureKind::Config), + duration_ms: 0, + truncated_output: truncate_output(error, max_output_bytes), + step_kind: None, + target_scope: Some(VerificationTargetScope::Workspace.as_str().to_string()), + package_name: profile.package_name.clone(), + package_manager: None, + launcher_kind: None, + }]; + let summary_text = render_report_summary( + "rust-cargo", + &profile.project_root, + phase, + VerificationStatus::Failed, + &steps, + ); + Some(VerificationReport { + report_id: next_report_id(), + phase, + adapter_id: "rust-cargo".to_string(), + project_root: profile.project_root.clone(), + touched_paths, + status: VerificationStatus::Failed, + summary_text, + steps, + }) +} + +fn build_node_profile_for_root( + root: &Path, +) -> Result, NodeSetupFailure> { + let Some(project_root) = normalize_local_path(root) else { + return Ok(None); + }; + let package_json_path = project_root.join("package.json"); + let package_value = load_node_package(&package_json_path)?; + let package_name = package_value + .get("name") + .and_then(Value::as_str) + .map(ToOwned::to_owned); + Ok(Some(NodeProjectProfile { + package_manager: detect_package_manager(&project_root), + package_json_path, + project_root, + package_value, + package_name, + })) +} + +fn build_python_profile_for_path(path: &Path) -> Option { + let project_root = nearest_python_root(path)?; + build_python_profile_for_root(&project_root) +} + +fn build_python_profile_for_root(root: &Path) -> Option { + let project_root = normalize_local_path(root)?; + let pyproject_path = project_root.join("pyproject.toml"); + let (pyproject_parsed, pyproject_value, pyproject_parse_error) = + parse_optional_pyproject(&pyproject_path); + let (runner, launcher_kind) = detect_python_runner(&project_root); + let test_root_present = project_root.join("tests").is_dir(); + let has_ruff = python_has_ruff(&project_root, pyproject_value.as_ref()); + let has_mypy = python_has_mypy(&project_root, pyproject_value.as_ref()); + let has_pytest = python_has_pytest(&project_root, pyproject_value.as_ref(), test_root_present); + let typed_targets = python_typed_targets(&project_root, pyproject_value.as_ref()); + + Some(PythonProjectProfile { + project_root, + runner, + launcher_kind, + pyproject_parsed, + has_ruff, + has_mypy, + has_pytest, + typed_targets, + test_root_present, + pyproject_path: pyproject_path.is_file().then_some(pyproject_path), + pyproject_parse_error, + }) +} + +fn parse_optional_pyproject(path: &Path) -> (bool, Option, Option) { + parse_optional_toml_file(path, "pyproject.toml") +} + +fn parse_optional_toml_file( + path: &Path, + display_name: &str, +) -> (bool, Option, Option) { + if !path.is_file() { + return (true, None, None); + } + match fs::read_to_string(path) { + Ok(contents) => match contents.parse::() { + Ok(value) => (true, Some(value), None), + Err(error) => ( + false, + None, + Some(format!("failed to parse {display_name}: {error}")), + ), + }, + Err(error) => ( + false, + None, + Some(format!("failed to read {display_name}: {error}")), + ), + } +} + +fn detect_python_runner(root: &Path) -> (PythonRunner, PythonLauncherKind) { + if root.join("uv.lock").is_file() { + return ( + PythonRunner { + command_prefix: vec![ + "uv".to_string(), + "run".to_string(), + "python".to_string(), + "-m".to_string(), + ], + }, + PythonLauncherKind::Uv, + ); + } + if root.join("poetry.lock").is_file() { + return ( + PythonRunner { + command_prefix: vec![ + "poetry".to_string(), + "run".to_string(), + "python".to_string(), + "-m".to_string(), + ], + }, + PythonLauncherKind::Poetry, + ); + } + if let Some(interpreter) = find_venv_python(root) { + return ( + PythonRunner { + command_prefix: vec![interpreter.display().to_string(), "-m".to_string()], + }, + PythonLauncherKind::Venv, + ); + } + ( + PythonRunner { + command_prefix: vec!["python".to_string(), "-m".to_string()], + }, + PythonLauncherKind::Global, + ) +} + +fn find_venv_python(root: &Path) -> Option { + let env_names = [".venv", "venv", "env"]; + let candidate_suffixes = if cfg!(windows) { + vec![ + PathBuf::from("Scripts/python.exe"), + PathBuf::from("Scripts/python"), + ] + } else { + vec![PathBuf::from("bin/python"), PathBuf::from("bin/python3")] + }; + for env_name in env_names { + for suffix in &candidate_suffixes { + let candidate = root.join(env_name).join(suffix); + if candidate.is_file() { + return Some(candidate); + } + } + } + None +} + +fn python_has_ruff(root: &Path, pyproject: Option<&TomlValue>) -> bool { + RUFF_CONFIG_FILES + .iter() + .any(|name| root.join(name).is_file()) + || pyproject.is_some_and(|value| toml_contains_path(value, &["tool", "ruff"])) +} + +fn python_has_mypy(root: &Path, pyproject: Option<&TomlValue>) -> bool { + MYPY_CONFIG_FILES + .iter() + .any(|name| root.join(name).is_file()) + || pyproject.is_some_and(|value| toml_contains_path(value, &["tool", "mypy"])) + || file_contains(root.join("setup.cfg"), "[mypy]") +} + +fn python_has_pytest(root: &Path, pyproject: Option<&TomlValue>, test_root_present: bool) -> bool { + PYTEST_CONFIG_FILES + .iter() + .any(|name| root.join(name).is_file()) + || root.join("conftest.py").is_file() + || test_root_present + || pyproject + .is_some_and(|value| toml_contains_path(value, &["tool", "pytest", "ini_options"])) +} + +fn python_typed_targets(root: &Path, pyproject: Option<&TomlValue>) -> Vec { + let Some(pyproject) = pyproject else { + return Vec::new(); + }; + let mut targets = toml_string_targets(pyproject, &["tool", "mypy", "files"]) + .into_iter() + .map(|value| { + let path = PathBuf::from(value); + if path.is_absolute() { + path + } else { + root.join(path) + } + }) + .collect::>(); + dedupe_paths(&mut targets); + targets +} + +fn derive_mypy_targets_from_touched(root: &Path, touched_paths: &[PathBuf]) -> Vec { + let python_files = python_source_targets(root, touched_paths); + let mut targets = python_files + .iter() + .map(|path| package_root_for_python_file(root, path)) + .collect::>(); + dedupe_paths(&mut targets); + targets +} + +fn python_source_targets(root: &Path, touched_paths: &[PathBuf]) -> Vec { + let mut paths = touched_paths + .iter() + .filter_map(|path| normalize_project_path(root, path)) + .filter(|path| { + path.extension() + .and_then(|ext| ext.to_str()) + .is_some_and(|ext| ext.eq_ignore_ascii_case("py")) + }) + .collect::>(); + dedupe_paths(&mut paths); + paths +} + +fn package_root_for_python_file(root: &Path, path: &Path) -> PathBuf { + let Some(mut cursor) = path.parent().map(Path::to_path_buf) else { + return path.to_path_buf(); + }; + let mut package_root = None; + loop { + if !cursor.starts_with(root) || !cursor.join("__init__.py").is_file() { + break; + } + package_root = Some(cursor.clone()); + let Some(parent) = cursor.parent() else { + break; + }; + cursor = parent.to_path_buf(); + } + package_root.unwrap_or_else(|| path.to_path_buf()) +} + +fn dedupe_paths(paths: &mut Vec) { + let mut seen = Vec::::new(); + paths.retain(|path| { + if seen.iter().any(|existing| existing == path) { + false + } else { + seen.push(path.clone()); + true + } + }); +} + +fn normalize_local_path(path: &Path) -> Option { + if path.is_absolute() { + Some(path.to_path_buf()) + } else { + std::env::current_dir().ok().map(|cwd| cwd.join(path)) + } +} + +fn normalize_project_path(root: &Path, path: &Path) -> Option { + let absolute = normalize_local_path(path)?; + absolute.starts_with(root).then_some(absolute) +} + +fn toml_contains_path(value: &TomlValue, path: &[&str]) -> bool { + toml_value_at(value, path).is_some() +} + +fn toml_string_targets(value: &TomlValue, path: &[&str]) -> Vec { + let Some(value) = toml_value_at(value, path) else { + return Vec::new(); + }; + if let Some(raw) = value.as_str() { + return raw + .split([',', '\n']) + .map(str::trim) + .filter(|part| !part.is_empty()) + .map(ToOwned::to_owned) + .collect(); + } + value + .as_array() + .into_iter() + .flatten() + .filter_map(TomlValue::as_str) + .map(str::trim) + .filter(|part| !part.is_empty()) + .map(ToOwned::to_owned) + .collect() +} + +fn toml_value_at<'a>(value: &'a TomlValue, path: &[&str]) -> Option<&'a TomlValue> { + let mut current = value; + for key in path { + current = current.get(*key)?; + } + Some(current) +} + +fn file_contains(path: impl AsRef, needle: &str) -> bool { + fs::read_to_string(path.as_ref()).is_ok_and(|contents| contents.contains(needle)) +} + +/// Walks ancestors of `start` and returns the adapter whose marker file is found first +/// (closest wins). Explicit markers per spec Fase 5: Cargo.toml → Rust, package.json → Node, +/// any `PYTHON_ROOT_MARKERS` entry → Python. +fn detect_adapter_by_marker(start: &Path) -> Option { + let start = if start.is_absolute() { + start.to_path_buf() + } else { + std::env::current_dir().ok()?.join(start) + }; + let mut cursor = if start.is_dir() { + start + } else { + start.parent()?.to_path_buf() + }; + loop { + if cursor.join("Cargo.toml").is_file() { + return Some(Adapter::Rust); + } + if cursor.join("package.json").is_file() { + return Some(Adapter::NodeTypeScript); + } + for marker in PYTHON_ROOT_MARKERS { + if cursor.join(marker).is_file() { + return Some(Adapter::Python); + } + } + if !cursor.pop() { + return None; + } + } +} + +fn nearest_file(start: &Path, file_name: &str) -> Option { + let start = if start.is_absolute() { + start.to_path_buf() + } else { + std::env::current_dir().ok()?.join(start) + }; + let mut cursor = if start.is_dir() { + start + } else { + start.parent()?.to_path_buf() + }; + loop { + let candidate = cursor.join(file_name); + if candidate.is_file() { + return Some(candidate); + } + if !cursor.pop() { + return None; + } + } +} + +fn nearest_python_root(start: &Path) -> Option { + let start = normalize_local_path(start)?; + let mut cursor = if start.is_dir() { + start + } else { + start.parent()?.to_path_buf() + }; + loop { + for marker in PYTHON_ROOT_MARKERS { + if cursor.join(marker).is_file() { + return Some(cursor.clone()); + } + } + if !cursor.pop() { + return None; + } + } +} + +#[allow(clippy::unnecessary_wraps)] // feeds Option field +fn classify_step_failure(step: &PlannedStep, body: &str) -> Option { + match step.diagnostics { + StepDiagnostics::Rust { + step_kind, + target_scope: _, + package_name: _, + } => Some(classify_rust_failure_kind(step_kind, body)), + StepDiagnostics::NodeTypeScript { + step_kind, + target_scope: _, + package_manager, + package_name: _, + } => Some(classify_node_failure_kind(step_kind, package_manager, body)), + StepDiagnostics::Python { + launcher_kind, + step_kind, + target_scope: _, + } => Some(classify_python_failure_kind(launcher_kind, step_kind, body)), + } +} + +#[allow(clippy::unnecessary_wraps)] // feeds Option field +fn classify_step_timeout(step: &PlannedStep) -> Option { + match step.diagnostics { + StepDiagnostics::Rust { .. } + | StepDiagnostics::NodeTypeScript { .. } + | StepDiagnostics::Python { .. } => Some(VerificationFailureKind::Timeout), + } +} + +#[allow(clippy::unnecessary_wraps)] // feeds Option field +fn classify_step_unavailable(step: &PlannedStep, message: &str) -> Option { + match step.diagnostics { + StepDiagnostics::Rust { step_kind, .. } => { + Some(classify_rust_unavailable_kind(step_kind, message)) + } + StepDiagnostics::NodeTypeScript { + step_kind, + package_manager, + .. + } => Some(classify_node_unavailable_kind( + step_kind, + package_manager, + message, + )), + StepDiagnostics::Python { launcher_kind, .. } => { + Some(classify_python_unavailable_kind(launcher_kind, message)) + } + } +} + +fn classify_rust_failure_kind(step_kind: RustStepKind, body: &str) -> VerificationFailureKind { + let lower = body.to_ascii_lowercase(); + if lower.contains("failed to parse manifest") + || lower.contains("could not parse manifest") + || lower.contains("invalid table header") + || (lower.contains("cargo.toml") && lower.contains("parse")) + { + return VerificationFailureKind::Config; + } + if is_rust_tool_unavailable(step_kind, &lower) { + return VerificationFailureKind::ToolUnavailable; + } + if lower.contains("toolchain") + || lower.contains("rustup") + || lower.contains("linker") + || lower.contains("target may not be installed") + || lower.contains("failed to run custom build command") + { + return VerificationFailureKind::Environment; + } + VerificationFailureKind::Code +} + +fn classify_rust_unavailable_kind( + step_kind: RustStepKind, + message: &str, +) -> VerificationFailureKind { + let lower = message.to_ascii_lowercase(); + if is_rust_tool_unavailable(step_kind, &lower) || lower.contains("cargo") { + VerificationFailureKind::ToolUnavailable + } else { + VerificationFailureKind::Environment + } +} + +fn is_rust_tool_unavailable(step_kind: RustStepKind, lower: &str) -> bool { + let tool_name = match step_kind { + RustStepKind::Check | RustStepKind::Clippy | RustStepKind::Test => "cargo", + RustStepKind::FmtCheck => "rustfmt", + }; + lower.contains("no such subcommand") + || lower.contains("command not found") + || lower.contains("not recognized as an internal or external command") + || lower.contains("is not installed") + || lower.contains(tool_name) && lower.contains("not found") +} + +fn classify_node_failure_kind( + step_kind: NodeStepKind, + package_manager: PackageManager, + body: &str, +) -> VerificationFailureKind { + let lower = body.to_ascii_lowercase(); + if lower.contains("tsconfig") + && (lower.contains("parse") || lower.contains("unknown compiler option")) + { + return VerificationFailureKind::Config; + } + if lower.contains("eslint") && lower.contains("configuration") { + return VerificationFailureKind::Config; + } + if is_node_tool_unavailable(step_kind, package_manager, &lower) { + return VerificationFailureKind::ToolUnavailable; + } + if lower.contains("node_modules") + || lower.contains("lockfile") + || lower.contains("package manager") + || lower.contains("missing script") + { + return VerificationFailureKind::Environment; + } + VerificationFailureKind::Code +} + +fn classify_node_unavailable_kind( + step_kind: NodeStepKind, + package_manager: PackageManager, + message: &str, +) -> VerificationFailureKind { + let lower = message.to_ascii_lowercase(); + if is_node_tool_unavailable(step_kind, package_manager, &lower) { + VerificationFailureKind::ToolUnavailable + } else { + VerificationFailureKind::Environment + } +} + +fn is_node_tool_unavailable( + step_kind: NodeStepKind, + package_manager: PackageManager, + lower: &str, +) -> bool { + let tool_name = match step_kind { + NodeStepKind::Typecheck => "typecheck", + NodeStepKind::TscNoEmit => "tsc", + NodeStepKind::Lint => "lint", + NodeStepKind::Eslint => "eslint", + NodeStepKind::Test => "test", + }; + lower.contains("command not found") + || lower.contains("not recognized as an internal or external command") + || lower.contains("could not determine executable to run") + || lower.contains(package_manager.as_str()) && lower.contains("not found") + || lower.contains(tool_name) && lower.contains("not found") +} + +fn classify_python_failure_kind( + launcher_kind: PythonLauncherKind, + step_kind: PythonStepKind, + body: &str, +) -> VerificationFailureKind { + let lower = body.to_ascii_lowercase(); + if is_python_config_failure(&lower) { + return VerificationFailureKind::Config; + } + if is_python_tool_unavailable(step_kind, &lower) { + return VerificationFailureKind::ToolUnavailable; + } + if is_python_environment_failure(launcher_kind, &lower) { + return VerificationFailureKind::Environment; + } + VerificationFailureKind::Code +} + +fn classify_python_unavailable_kind( + launcher_kind: PythonLauncherKind, + message: &str, +) -> VerificationFailureKind { + let lower = message.to_ascii_lowercase(); + if matches!( + launcher_kind, + PythonLauncherKind::Uv | PythonLauncherKind::Poetry + ) && (lower.contains("not found") + || lower.contains("cannot find") + || lower.contains("could not find")) + { + VerificationFailureKind::ToolUnavailable + } else { + VerificationFailureKind::Environment + } +} + +fn is_python_config_failure(lower: &str) -> bool { + (lower.contains("pyproject.toml") + && (lower.contains("parse") || lower.contains("invalid") || lower.contains("config"))) + || lower.contains("invalid configuration") + || lower.contains("failed to parse") + || lower.contains("toml parse error") +} + +fn is_python_tool_unavailable(step_kind: PythonStepKind, lower: &str) -> bool { + let module_name = match step_kind { + PythonStepKind::RuffCheck => "ruff", + PythonStepKind::Mypy => "mypy", + PythonStepKind::Pytest => "pytest", + PythonStepKind::PyCompile => "py_compile", + }; + lower.contains(&format!("no module named {module_name}")) + || lower.contains(&format!("no module named '{module_name}'")) + || lower.contains(&format!("module named {module_name}")) + || lower.contains(&format!("{module_name} is not installed")) + || (lower.contains("command not found") && lower.contains(module_name)) + || (lower.contains("not recognized as an internal or external command") + && lower.contains(module_name)) +} + +fn is_python_environment_failure(launcher_kind: PythonLauncherKind, lower: &str) -> bool { + if lower.contains("virtualenv") + || lower.contains("venv") + || lower.contains("interpreter") + || lower.contains("dependency resolution") + || lower.contains("environment") + || lower.contains("failed to create") + || lower.contains("no such file or directory") + || lower.contains("cannot find the path specified") + || lower.contains("poetry could not find") + { + return true; + } + matches!( + launcher_kind, + PythonLauncherKind::Venv | PythonLauncherKind::Global + ) && (lower.contains("python executable") || lower.contains("python was not found")) +} + +fn run_step( + cwd: &Path, + step: &PlannedStep, + timeout: Duration, + max_output_bytes: usize, +) -> StepOutcome { + let mut command = spawnable_command(&step.command[0]); + command.current_dir(cwd); + command.stdin(std::process::Stdio::null()); + command.stdout(std::process::Stdio::piped()); + command.stderr(std::process::Stdio::piped()); + if step.command.first().is_some_and(|bin| { + let name = std::path::Path::new(bin) + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or(bin); + name == "cargo" + }) { + command.env("CARGO_TERM_COLOR", "never"); + } + for arg in step.command.iter().skip(1) { + command.arg(arg); + } + + let started = Instant::now(); + match spawn_with_timeout(command, timeout) { + Ok(output) => { + let duration_ms = duration_millis_u64(started.elapsed()); + let mut body = String::new(); + body.push_str(&String::from_utf8_lossy(&output.stdout)); + if !output.stderr.is_empty() { + if !body.is_empty() { + body.push('\n'); + } + body.push_str(&String::from_utf8_lossy(&output.stderr)); + } + let body = truncate_output(&body, max_output_bytes); + if output.status.success() { + StepOutcome::Passed { body, duration_ms } + } else { + let failure_kind = classify_step_failure(step, &body); + StepOutcome::Failed { + body, + duration_ms, + failure_kind, + } + } + } + Err(SpawnError::Timeout) => { + let body = truncate_output( + &format!("step timed out after {}s", timeout.as_secs()), + max_output_bytes, + ); + StepOutcome::Failed { + failure_kind: classify_step_timeout(step), + body, + duration_ms: duration_millis_u64(started.elapsed()), + } + } + Err(SpawnError::Io(error)) => { + let message = truncate_output(&error.to_string(), max_output_bytes); + StepOutcome::Unavailable { + failure_kind: classify_step_unavailable(step, &message), + message, + duration_ms: duration_millis_u64(started.elapsed()), + } + } + } +} + +fn spawnable_command(program: &str) -> Command { + #[cfg(windows)] + { + if let Some(resolved) = resolve_windows_program(program) { + if uses_cmd_wrapper(&resolved) { + let mut command = Command::new("cmd"); + command.arg("/C").arg(resolved); + return command; + } + return Command::new(resolved); + } + } + + Command::new(program) +} + +#[cfg(windows)] +fn resolve_windows_program(program: &str) -> Option { + let path = Path::new(program); + if path.components().count() > 1 || path.is_absolute() { + return resolve_windows_path_candidate(path); + } + + let path_entries = std::env::var_os("PATH") + .into_iter() + .flat_map(|paths| std::env::split_paths(&paths).collect::>()) + .collect::>(); + for entry in path_entries { + let candidate = entry.join(program); + if let Some(resolved) = resolve_windows_path_candidate(&candidate) { + return Some(resolved); + } + } + None +} + +#[cfg(windows)] +fn resolve_windows_path_candidate(candidate: &Path) -> Option { + if candidate.is_file() { + return Some(candidate.to_path_buf()); + } + if candidate.extension().is_some() { + return None; + } + + for extension in windows_path_extensions() { + let trimmed = extension.trim(); + if trimmed.is_empty() { + continue; + } + let suffix = trimmed.strip_prefix('.').unwrap_or(trimmed); + let path = candidate.with_extension(suffix); + if path.is_file() { + return Some(path); + } + } + None +} + +#[cfg(windows)] +fn windows_path_extensions() -> Vec { + std::env::var("PATHEXT") + .ok() + .map(|value| { + value + .split(';') + .map(str::trim) + .filter(|entry| !entry.is_empty()) + .map(ToOwned::to_owned) + .collect::>() + }) + .filter(|values| !values.is_empty()) + .unwrap_or_else(|| { + vec![ + ".COM".to_string(), + ".EXE".to_string(), + ".BAT".to_string(), + ".CMD".to_string(), + ] + }) +} + +#[cfg(windows)] +fn uses_cmd_wrapper(path: &Path) -> bool { + path.extension() + .and_then(|ext| ext.to_str()) + .is_some_and(|ext| ext.eq_ignore_ascii_case("cmd") || ext.eq_ignore_ascii_case("bat")) +} + +fn duration_millis_u64(duration: Duration) -> u64 { + u64::try_from(duration.as_millis()).unwrap_or(u64::MAX) +} + +#[derive(Debug)] +enum SpawnError { + Timeout, + Io(std::io::Error), +} + +fn spawn_with_timeout( + mut command: Command, + timeout: Duration, +) -> Result { + use std::sync::mpsc; + use std::thread; + + let mut child = command.spawn().map_err(SpawnError::Io)?; + let stdout = child.stdout.take(); + let stderr = child.stderr.take(); + + let (tx, rx) = mpsc::channel(); + let stdout_handle = stdout.map(|mut stream| { + let tx = tx.clone(); + thread::spawn(move || { + let mut bytes = Vec::new(); + let _ = std::io::Read::read_to_end(&mut stream, &mut bytes); + let _ = tx.send(("stdout", bytes)); + }) + }); + let stderr_handle = stderr.map(|mut stream| { + let tx = tx.clone(); + thread::spawn(move || { + let mut bytes = Vec::new(); + let _ = std::io::Read::read_to_end(&mut stream, &mut bytes); + let _ = tx.send(("stderr", bytes)); + }) + }); + drop(tx); + + let deadline = Instant::now() + timeout; + loop { + if let Some(status) = child.try_wait().map_err(SpawnError::Io)? { + if let Some(handle) = stdout_handle { + let _ = handle.join(); + } + if let Some(handle) = stderr_handle { + let _ = handle.join(); + } + let mut stdout = Vec::new(); + let mut stderr = Vec::new(); + while let Ok((which, bytes)) = rx.try_recv() { + if which == "stdout" { + stdout = bytes; + } else { + stderr = bytes; + } + } + return Ok(std::process::Output { + status, + stdout, + stderr, + }); + } + if Instant::now() >= deadline { + let _ = child.kill(); + let _ = child.wait(); + return Err(SpawnError::Timeout); + } + thread::sleep(Duration::from_millis(50)); + } +} + +/// Merge a structured report back into the legacy tool-result channel. +#[must_use] +pub fn prepend_verifier_summary(summary: &str, output: String) -> String { + if summary.is_empty() { + return output; + } + if output.trim().is_empty() { + return summary.to_string(); + } + format!("{output}\n\n[verifier output]\n{summary}") +} + +/// Trim output to the configured byte budget, preserving the head, tail, and +/// diagnostically relevant lines. +#[must_use] +pub fn truncate_output(body: &str, max_bytes: usize) -> String { + if body.len() <= max_bytes { + return body.to_string(); + } + let head_budget = max_bytes / 2; + let tail_budget = max_bytes / 4; + let signal_budget = max_bytes.saturating_sub(head_budget + tail_budget + 32); + + let mut head = String::new(); + for line in body.lines() { + if head.len() + line.len() + 1 > head_budget { + break; + } + head.push_str(line); + head.push('\n'); + } + + let mut tail = String::new(); + for line in body.lines().rev() { + if tail.len() + line.len() + 1 > tail_budget { + break; + } + tail = format!("{line}\n{tail}"); + } + + let mut signals = String::new(); + for line in body.lines() { + let lower = line.to_ascii_lowercase(); + if lower.contains("error") + || lower.contains("warning") + || lower.contains("failed") + || lower.contains("panic") + || lower.contains("traceback") + { + if signals.len() + line.len() + 1 > signal_budget { + break; + } + signals.push_str(line); + signals.push('\n'); + } + } + + let mut out = String::new(); + out.push_str(&head); + out.push_str("... (truncated) ...\n"); + if !signals.trim().is_empty() { + out.push_str(&signals); + out.push_str("... (tail) ...\n"); + } + out.push_str(&tail); + out.trim_end().to_string() +} + +fn extract_file_path(input: &str) -> Option { + let value: Value = serde_json::from_str(input).ok()?; + let path = value + .get("file_path") + .or_else(|| value.get("filePath")) + .or_else(|| value.get("path"))? + .as_str()?; + Some(PathBuf::from(path)) +} + +fn next_report_id() -> String { + format!("vr-{}", REPORT_COUNTER.fetch_add(1, Ordering::Relaxed)) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn temp_dir(tag: &str) -> PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("time should be after epoch") + .as_nanos(); + let root = std::env::temp_dir().join(format!("verifier-unit-{tag}-{nanos}")); + fs::create_dir_all(&root).expect("temp dir should create"); + root + } + + #[test] + fn truncate_output_keeps_signal_lines_and_tail() { + let mut body = String::new(); + for index in 0..400 { + let _ = writeln!(body, "noise line {index}"); + } + body.push_str("warning: some warning\n"); + body.push_str("Traceback (most recent call last):\n"); + body.push_str("panic: boom\n"); + let truncated = truncate_output(&body, 512); + assert!(truncated.contains("warning: some warning")); + assert!(truncated.contains("Traceback")); + assert!(truncated.contains("panic: boom")); + assert!(truncated.contains("... (truncated) ...")); + } + + #[test] + fn prepend_verifier_summary_merges_with_output() { + let merged = prepend_verifier_summary("[verifier] ok", "edited".to_string()); + assert!(merged.contains("edited")); + assert!(merged.contains("[verifier output]")); + } + + #[test] + fn extract_file_path_supports_known_keys() { + assert_eq!( + extract_file_path(r#"{"file_path":"src/lib.rs"}"#), + Some(PathBuf::from("src/lib.rs")) + ); + assert_eq!( + extract_file_path(r#"{"filePath":"src/lib.rs"}"#), + Some(PathBuf::from("src/lib.rs")) + ); + assert_eq!( + extract_file_path(r#"{"path":"src/lib.rs"}"#), + Some(PathBuf::from("src/lib.rs")) + ); + } + + #[test] + fn gate_status_defaults_to_not_required() { + let gate = VerificationGateStatus::not_required(); + assert!(!gate.attempted); + assert!(gate.passed); + } + + #[test] + fn python_profile_parses_pyproject_and_detects_tools() { + let root = temp_dir("pyproject"); + fs::write( + root.join("pyproject.toml"), + r#" +[tool.ruff] +line-length = 100 + +[tool.mypy] +files = ["app", "tests"] + +[tool.pytest.ini_options] +addopts = "-q" +"#, + ) + .expect("pyproject should write"); + + let profile = build_python_profile_for_root(&root).expect("profile should build"); + + assert!(profile.pyproject_parsed); + assert!(profile.has_ruff); + assert!(profile.has_mypy); + assert!(profile.has_pytest); + assert_eq!( + profile.typed_targets, + vec![root.join("app"), root.join("tests")] + ); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn python_profile_marks_invalid_pyproject_as_config_failure() { + let root = temp_dir("bad-pyproject"); + fs::write(root.join("pyproject.toml"), "[tool.ruff\n").expect("pyproject should write"); + + let profile = build_python_profile_for_root(&root).expect("profile should build"); + let report = python_config_failure_report( + &profile, + VerificationPhase::Quick, + vec![root.join("pyproject.toml")], + 2_048, + ) + .expect("invalid pyproject should report failure"); + + assert!(!profile.pyproject_parsed); + assert_eq!(report.status, VerificationStatus::Failed); + assert_eq!( + report.steps[0].failure_kind, + Some(VerificationFailureKind::Config) + ); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn detect_python_runner_prefers_uv_over_venv() { + let root = temp_dir("uv-runner"); + fs::write(root.join("uv.lock"), "").expect("uv lock should write"); + let venv_python = if cfg!(windows) { + root.join(".venv").join("Scripts").join("python.exe") + } else { + root.join(".venv").join("bin").join("python") + }; + fs::create_dir_all(venv_python.parent().expect("venv parent")) + .expect("venv dir should create"); + fs::write(&venv_python, "").expect("fake interpreter should write"); + + let (runner, launcher_kind) = detect_python_runner(&root); + + assert_eq!(launcher_kind, PythonLauncherKind::Uv); + assert_eq!( + runner.command_prefix, + vec![ + "uv".to_string(), + "run".to_string(), + "python".to_string(), + "-m".to_string() + ] + ); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn detect_python_runner_uses_local_venv_when_present() { + let root = temp_dir("venv-runner"); + let venv_python = if cfg!(windows) { + root.join("venv").join("Scripts").join("python.exe") + } else { + root.join("venv").join("bin").join("python") + }; + fs::create_dir_all(venv_python.parent().expect("venv parent")) + .expect("venv dir should create"); + fs::write(&venv_python, "").expect("fake interpreter should write"); + + let (_runner, launcher_kind) = detect_python_runner(&root); + + assert_eq!(launcher_kind, PythonLauncherKind::Venv); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn detect_python_runner_uses_poetry_when_lock_present() { + let root = temp_dir("poetry-runner"); + fs::write(root.join("poetry.lock"), "").expect("poetry lock should write"); + + let (runner, launcher_kind) = detect_python_runner(&root); + + assert_eq!(launcher_kind, PythonLauncherKind::Poetry); + assert_eq!( + runner.command_prefix, + vec![ + "poetry".to_string(), + "run".to_string(), + "python".to_string(), + "-m".to_string() + ] + ); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn nearest_python_root_prefers_closest_matching_directory() { + let root = temp_dir("python-root"); + let nested = root.join("pkg").join("inner"); + fs::create_dir_all(&nested).expect("nested dir should create"); + fs::write(root.join("pyproject.toml"), "[project]\nname = 'root'\n") + .expect("root pyproject should write"); + fs::write(nested.join("requirements.txt"), "pytest\n") + .expect("nested requirements should write"); + + let detected = nearest_python_root(&nested.join("module.py")).expect("root should resolve"); + + assert_eq!(detected, nested); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn python_quick_steps_fall_back_to_py_compile() { + let root = temp_dir("pycompile"); + fs::write(root.join("requirements.txt"), "pytest\n").expect("requirements should write"); + fs::write(root.join("main.py"), "print('ok')\n").expect("python file should write"); + + let profile = build_python_profile_for_root(&root).expect("profile should build"); + let steps = python_quick_steps(&profile, &[root.join("main.py")]); + + assert_eq!(steps.len(), 1); + assert_eq!(steps[0].label, "python -m py_compile"); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn derive_mypy_targets_uses_package_root() { + let root = temp_dir("mypy-targets"); + let pkg = root.join("pkg"); + let sub = pkg.join("sub"); + fs::create_dir_all(&sub).expect("package dir should create"); + fs::write(pkg.join("__init__.py"), "").expect("init should write"); + fs::write(sub.join("__init__.py"), "").expect("sub init should write"); + fs::write(sub.join("mod.py"), "x = 1\n").expect("module should write"); + + let targets = derive_mypy_targets_from_touched(&root, &[sub.join("mod.py")]); + + assert_eq!(targets, vec![pkg]); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn classify_python_failures_distinguishes_tool_environment_and_config() { + assert_eq!( + classify_python_failure_kind( + PythonLauncherKind::Global, + PythonStepKind::RuffCheck, + "No module named ruff", + ), + VerificationFailureKind::ToolUnavailable + ); + assert_eq!( + classify_python_failure_kind( + PythonLauncherKind::Poetry, + PythonStepKind::Pytest, + "Poetry could not find a compatible environment", + ), + VerificationFailureKind::Environment + ); + assert_eq!( + classify_python_failure_kind( + PythonLauncherKind::Global, + PythonStepKind::Mypy, + "failed to parse pyproject.toml", + ), + VerificationFailureKind::Config + ); + assert_eq!( + classify_step_timeout(&PlannedStep { + label: "pytest".to_string(), + command: vec!["python".to_string(), "-m".to_string(), "pytest".to_string()], + diagnostics: StepDiagnostics::Python { + launcher_kind: PythonLauncherKind::Global, + step_kind: PythonStepKind::Pytest, + target_scope: VerificationTargetScope::Project, + }, + }), + Some(VerificationFailureKind::Timeout) + ); + } + + #[test] + fn rust_profile_extracts_package_name_from_nearest_manifest() { + let root = temp_dir("rust-profile"); + let crate_dir = root.join("crates").join("demo"); + fs::create_dir_all(crate_dir.join("src")).expect("crate dir should create"); + fs::write( + crate_dir.join("Cargo.toml"), + "[package]\nname = \"demo\"\nversion = \"0.1.0\"\n", + ) + .expect("manifest should write"); + fs::write(crate_dir.join("src").join("lib.rs"), "pub fn demo() {}\n") + .expect("lib should write"); + + let profile = build_rust_profile_for_path(&crate_dir.join("src").join("lib.rs")) + .expect("profile should resolve"); + + assert_eq!(profile.project_root, crate_dir); + assert_eq!(profile.package_name.as_deref(), Some("demo")); + assert!(profile.manifest_parsed); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn rust_quick_steps_scope_to_package_when_package_name_is_known() { + let steps = rust_quick_steps(&CargoVerifierConfig::default(), Some("demo".to_string())); + + assert_eq!(steps.len(), 1); + assert_eq!( + steps[0].command, + vec![ + "cargo".to_string(), + "check".to_string(), + "--quiet".to_string(), + "--message-format=short".to_string(), + "-p".to_string(), + "demo".to_string(), + ] + ); + assert_eq!( + steps[0].diagnostics, + StepDiagnostics::Rust { + step_kind: RustStepKind::Check, + target_scope: VerificationTargetScope::Package, + package_name: Some("demo".to_string()), + } + ); + } + + #[test] + fn node_profile_resolves_nearest_package_and_package_manager() { + let root = temp_dir("node-profile"); + let package_root = root.join("packages").join("web"); + fs::create_dir_all(&package_root).expect("package root should create"); + fs::write( + package_root.join("package.json"), + r#"{ + "name": "@demo/web", + "scripts": { + "typecheck": "tsc --noEmit", + "lint": "eslint ." + } +}"#, + ) + .expect("package should write"); + fs::write( + package_root.join("pnpm-lock.yaml"), + "lockfileVersion: '9.0'\n", + ) + .expect("lockfile should write"); + + let profile = build_node_profile_for_root(&package_root) + .expect("profile should not error") + .expect("profile should exist"); + + assert_eq!(profile.project_root, package_root); + assert_eq!(profile.package_name.as_deref(), Some("@demo/web")); + assert_eq!(profile.package_manager, PackageManager::Pnpm); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn node_quick_steps_prefer_package_scripts_and_carry_metadata() { + let profile = NodeProjectProfile { + project_root: PathBuf::from("/workspace/packages/web"), + package_json_path: PathBuf::from("/workspace/packages/web/package.json"), + package_value: serde_json::json!({ + "name": "@demo/web", + "scripts": { + "typecheck": "tsc --noEmit" + } + }), + package_manager: PackageManager::Pnpm, + package_name: Some("@demo/web".to_string()), + }; + + let steps = node_quick_steps(&profile); + + assert_eq!(steps.len(), 1); + assert_eq!( + steps[0].command, + vec![ + "pnpm".to_string(), + "run".to_string(), + "typecheck".to_string(), + ] + ); + assert_eq!( + steps[0].diagnostics, + StepDiagnostics::NodeTypeScript { + step_kind: NodeStepKind::Typecheck, + target_scope: VerificationTargetScope::Package, + package_manager: PackageManager::Pnpm, + package_name: Some("@demo/web".to_string()), + } + ); + } + + #[test] + fn classify_rust_failures_distinguishes_config_environment_and_tools() { + assert_eq!( + classify_rust_failure_kind( + RustStepKind::Check, + "failed to parse manifest at Cargo.toml" + ), + VerificationFailureKind::Config + ); + assert_eq!( + classify_rust_failure_kind( + RustStepKind::Check, + "error: linker `cc` not found while building crate", + ), + VerificationFailureKind::Environment + ); + assert_eq!( + classify_rust_failure_kind(RustStepKind::Clippy, "error: no such subcommand: `clippy`",), + VerificationFailureKind::ToolUnavailable + ); + assert_eq!( + classify_rust_failure_kind(RustStepKind::Check, "error[E0308]: mismatched types"), + VerificationFailureKind::Code + ); + } + + #[test] + fn classify_node_failures_distinguishes_config_environment_and_tools() { + assert_eq!( + classify_node_failure_kind( + NodeStepKind::TscNoEmit, + PackageManager::Npm, + "tsconfig parse error: unknown compiler option", + ), + VerificationFailureKind::Config + ); + assert_eq!( + classify_node_failure_kind( + NodeStepKind::Typecheck, + PackageManager::Pnpm, + "pnpm package manager could not install because lockfile is missing", + ), + VerificationFailureKind::Environment + ); + assert_eq!( + classify_node_failure_kind( + NodeStepKind::Eslint, + PackageManager::Npm, + "eslint not found", + ), + VerificationFailureKind::ToolUnavailable + ); + assert_eq!( + classify_node_failure_kind( + NodeStepKind::TscNoEmit, + PackageManager::Npm, + "src/index.ts(1,7): error TS2322: Type 'string' is not assignable", + ), + VerificationFailureKind::Code + ); + } + + #[test] + fn cargo_verifier_auto_mode_skips_empty_step_reports() { + let root = temp_dir("auto-node-empty"); + fs::write( + root.join("package.json"), + r#"{"name":"demo-node","scripts":{}}"#, + ) + .expect("package json should write"); + let source = root.join("index.ts"); + fs::write(&source, "export const value = 1;\n").expect("source should write"); + + let verifier = CargoVerifier::new(CargoVerifierConfig { + legacy_mode: false, + auto_mode: true, + quick_on_write: true, + final_gate: true, + max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES, + rust_check: true, + rust_clippy: true, + rust_fmt: true, + rust_test: true, + rust_timeout: Duration::from_secs(1), + node_enabled: true, + node_timeout: Duration::from_secs(1), + python_enabled: false, + python_timeout: Duration::from_secs(1), + }); + let context = VerificationContext { + phase: VerificationPhase::Quick, + workspace_root: Some(root.clone()), + tool_name: "edit_file".to_string(), + tool_input: format!(r#"{{"file_path":"{}"}}"#, source.display()), + touched_paths: vec![source], + mutation_sequence: 1, + }; + + let reports = verifier.quick_verify(&context); + + assert!( + reports.is_empty(), + "auto mode should skip empty-step reports" + ); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn detect_adapter_by_marker_prefers_closest_ancestor() { + let root = temp_dir("auto-marker-closest"); + // Outer repo marker is package.json; inner package has Cargo.toml closer to file. + fs::write(root.join("package.json"), r#"{"name":"outer"}"#) + .expect("outer package.json should write"); + let inner = root.join("crate-a"); + fs::create_dir_all(&inner).expect("inner dir should create"); + fs::write( + inner.join("Cargo.toml"), + "[package]\nname=\"a\"\nversion=\"0.1.0\"\n", + ) + .expect("inner Cargo.toml should write"); + let source = inner.join("src").join("lib.rs"); + fs::create_dir_all(source.parent().unwrap()).expect("src dir"); + fs::write(&source, "pub fn x() {}\n").expect("src"); + + let detected = detect_adapter_by_marker(&source).expect("adapter should resolve"); + assert_eq!(detected, Adapter::Rust); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn detect_adapter_by_marker_returns_none_without_markers() { + let root = temp_dir("auto-marker-none"); + let source = root.join("note.txt"); + fs::write(&source, "plain\n").expect("note should write"); + + assert!(detect_adapter_by_marker(&source).is_none()); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn auto_mode_without_marker_returns_no_reports() { + let root = temp_dir("auto-no-marker"); + let source = root.join("random.txt"); + fs::write(&source, "nope\n").expect("source should write"); + + let verifier = CargoVerifier::new(CargoVerifierConfig { + legacy_mode: false, + auto_mode: true, + quick_on_write: true, + final_gate: true, + max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES, + rust_check: true, + rust_clippy: true, + rust_fmt: true, + rust_test: true, + rust_timeout: Duration::from_secs(1), + node_enabled: true, + node_timeout: Duration::from_secs(1), + python_enabled: true, + python_timeout: Duration::from_secs(1), + }); + let context = VerificationContext { + phase: VerificationPhase::Quick, + workspace_root: Some(root.clone()), + tool_name: "edit_file".to_string(), + tool_input: format!(r#"{{"file_path":"{}"}}"#, source.display()), + touched_paths: vec![source], + mutation_sequence: 1, + }; + + assert!( + verifier.quick_verify(&context).is_empty(), + "auto mode without marker should return no reports" + ); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } +} diff --git a/rust/crates/runtime/tests/integration_tests.rs b/rust/crates/runtime/tests/integration_tests.rs index cc7bd9c54d..0bf1442d6e 100644 --- a/rust/crates/runtime/tests/integration_tests.rs +++ b/rust/crates/runtime/tests/integration_tests.rs @@ -22,7 +22,7 @@ fn stale_branch_detection_flows_into_policy_engine() { let stale_context = LaneContext::new( "stale-lane", 0, - Duration::from_secs(2 * 60 * 60), // 2 hours stale + Duration::from_hours(2), // 2 hours stale LaneBlocker::None, ReviewStatus::Pending, DiffScope::Full, @@ -49,7 +49,7 @@ fn fresh_branch_does_not_trigger_stale_policy() { let fresh_context = LaneContext::new( "fresh-lane", 0, - Duration::from_secs(30 * 60), // 30 min stale — under 1 hour threshold + Duration::from_mins(30), // 30 min stale — under 1 hour threshold LaneBlocker::None, ReviewStatus::Pending, DiffScope::Full, @@ -212,8 +212,8 @@ fn end_to_end_stale_lane_gets_merge_forward_action() { // when: build context and evaluate policy let context = LaneContext::new( "lane-9411", - 3, // Workspace green - Duration::from_secs(5 * 60 * 60), // 5 hours stale, definitely over threshold + 3, // Workspace green + Duration::from_hours(5), // 5 hours stale, definitely over threshold LaneBlocker::None, ReviewStatus::Approved, DiffScope::Scoped, @@ -261,8 +261,8 @@ fn end_to_end_stale_lane_gets_merge_forward_action() { fn fresh_approved_lane_gets_merge_action() { let context = LaneContext::new( "fresh-approved-lane", - 3, // Workspace green - Duration::from_secs(30 * 60), // 30 min — under 1 hour threshold = fresh + 3, // Workspace green + Duration::from_mins(30), // 30 min — under 1 hour threshold = fresh LaneBlocker::None, ReviewStatus::Approved, DiffScope::Scoped, @@ -347,7 +347,7 @@ fn worker_provider_failure_flows_through_recovery_to_policy() { // (Simulating the policy check that would happen after successful recovery) let recovery_success = matches!(result, RecoveryResult::Recovered { .. }); let green_level = 3; // Workspace green - let not_stale = Duration::from_secs(30 * 60); // 30 min — fresh + let not_stale = Duration::from_mins(30); // 30 min — fresh let post_recovery_context = LaneContext::new( "recovered-lane", diff --git a/rust/crates/runtime/tests/verifier_e2e.rs b/rust/crates/runtime/tests/verifier_e2e.rs new file mode 100644 index 0000000000..0415d6fc03 --- /dev/null +++ b/rust/crates/runtime/tests/verifier_e2e.rs @@ -0,0 +1,409 @@ +//! End-to-end tests for `CargoVerifier` using real temp projects. + +use std::fs; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::time::Duration; + +use runtime::{ + CargoVerifier, CargoVerifierConfig, VerificationContext, VerificationFailureKind, + VerificationPhase, VerificationReport, VerificationStatus, Verifier, +}; +use serde_json::json; + +static TMP_COUNTER: AtomicUsize = AtomicUsize::new(0); + +fn unique_tmpdir(tag: &str) -> PathBuf { + let pid = std::process::id(); + let n = TMP_COUNTER.fetch_add(1, Ordering::SeqCst); + let dir = std::env::temp_dir().join(format!("verifier_e2e_{tag}_{pid}_{n}")); + if dir.exists() { + let _ = fs::remove_dir_all(&dir); + } + fs::create_dir_all(&dir).expect("tmpdir"); + dir +} + +fn write_minimal_crate(root: &Path, name: &str, lib_rs: &str) { + fs::write( + root.join("Cargo.toml"), + format!( + "[package]\nname = \"{name}\"\nversion = \"0.0.0\"\nedition = \"2021\"\n\n[lib]\npath = \"src/lib.rs\"\n" + ), + ) + .unwrap(); + fs::create_dir_all(root.join("src")).unwrap(); + fs::write(root.join("src/lib.rs"), lib_rs).unwrap(); +} + +fn tool_input(path: &Path) -> String { + json!({ "file_path": path }).to_string() +} + +fn context_for(tool_name: &str, input: &str) -> Option { + VerificationContext::from_tool_invocation(VerificationPhase::Quick, None, tool_name, input, 1) +} + +fn quick_report(v: &CargoVerifier, tool_name: &str, input: &str) -> Option { + let context = context_for(tool_name, input)?; + v.quick_verify(&context).into_iter().next() +} + +fn quick_only() -> CargoVerifierConfig { + CargoVerifierConfig { + legacy_mode: false, + quick_on_write: true, + final_gate: false, + max_output_bytes: 2_048, + rust_check: true, + rust_clippy: false, + rust_fmt: false, + rust_test: false, + rust_timeout: Duration::from_mins(2), + node_enabled: false, + node_timeout: Duration::from_mins(2), + python_enabled: false, + python_timeout: Duration::from_mins(2), + auto_mode: false, + } +} + +fn cargo_available() -> bool { + std::process::Command::new("cargo") + .arg("--version") + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status() + .is_ok_and(|s| s.success()) +} + +fn python_available() -> bool { + std::process::Command::new("python") + .arg("--version") + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status() + .is_ok_and(|s| s.success()) +} + +fn python_only() -> CargoVerifierConfig { + CargoVerifierConfig { + legacy_mode: false, + quick_on_write: true, + final_gate: true, + max_output_bytes: 2_048, + rust_check: false, + rust_clippy: false, + rust_fmt: false, + rust_test: false, + rust_timeout: Duration::from_mins(2), + node_enabled: false, + node_timeout: Duration::from_mins(2), + python_enabled: true, + python_timeout: Duration::from_mins(2), + auto_mode: false, + } +} + +#[test] +fn passing_crate_reports_passed_status() { + if !cargo_available() { + eprintln!("cargo unavailable - skipping"); + return; + } + let root = unique_tmpdir("pass"); + write_minimal_crate(&root, "vpass", "pub fn two() -> i32 { 2 }\n"); + let file = root.join("src/lib.rs"); + + let v = CargoVerifier::new(quick_only()); + let result = quick_report(&v, "edit_file", &tool_input(&file)) + .expect("verifier should run for .rs edit"); + + assert_eq!(result.status, VerificationStatus::Passed); + assert!(result.summary_text.contains("cargo check: ok")); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn type_error_fails_and_surfaces_error_text_in_summary() { + if !cargo_available() { + return; + } + let root = unique_tmpdir("typeerr"); + write_minimal_crate(&root, "vtype", "pub fn oops() -> i32 { \"nope\" }\n"); + let file = root.join("src/lib.rs"); + + let v = CargoVerifier::new(quick_only()); + let result = quick_report(&v, "write_file", &tool_input(&file)).unwrap(); + + assert_eq!(result.status, VerificationStatus::Failed); + assert!(result.summary_text.contains("cargo check: FAIL")); + let lower = result.summary_text.to_lowercase(); + assert!( + lower.contains("mismatched") || lower.contains("error"), + "summary missing diagnostic: {}", + result.summary_text + ); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn non_rust_file_is_out_of_scope() { + let v = CargoVerifier::new(CargoVerifierConfig::default()); + let input = r#"{"file_path":"/tmp/README.md"}"#; + assert!(quick_report(&v, "edit_file", input).is_none()); +} + +#[test] +fn unknown_tool_is_ignored() { + let v = CargoVerifier::new(CargoVerifierConfig::default()); + let input = r#"{"file_path":"/tmp/x.rs"}"#; + assert!(quick_report(&v, "read_file", input).is_none()); + assert!(quick_report(&v, "bash", input).is_none()); +} + +#[test] +fn malformed_json_returns_none_without_panicking() { + let v = CargoVerifier::new(CargoVerifierConfig::default()); + assert!(quick_report(&v, "edit_file", "not-json").is_none()); + assert!(quick_report(&v, "edit_file", "{}").is_none()); + assert!(quick_report(&v, "edit_file", r#"{"file_path": 42}"#).is_none()); +} + +#[test] +fn accepts_alternate_path_keys() { + if !cargo_available() { + return; + } + let root = unique_tmpdir("altkeys"); + write_minimal_crate(&root, "valt", "pub fn k() -> u8 { 1 }\n"); + let file = root.join("src/lib.rs"); + + let v = CargoVerifier::new(quick_only()); + + let a = json!({ "filePath": file }).to_string(); + let b = json!({ "path": file }).to_string(); + assert!(quick_report(&v, "edit_file", &a).is_some()); + assert!(quick_report(&v, "edit_file", &b).is_some()); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn file_outside_any_crate_is_skipped_or_ignored() { + let root = unique_tmpdir("nocargo"); + fs::create_dir_all(root.join("x")).unwrap(); + let file = root.join("x/orphan.rs"); + fs::write(&file, "pub fn z() {}\n").unwrap(); + + let v = CargoVerifier::new(quick_only()); + let result = quick_report(&v, "edit_file", &tool_input(&file)); + if let Some(report) = result { + assert!(report.summary_text.contains("cargo check")); + } + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn all_steps_disabled_yields_skipped_report() { + let config = CargoVerifierConfig { + rust_check: false, + rust_clippy: false, + rust_fmt: false, + rust_test: false, + ..quick_only() + }; + let root = unique_tmpdir("nosteps"); + write_minimal_crate(&root, "vnone", "pub fn n() {}\n"); + let file = root.join("src/lib.rs"); + + let v = CargoVerifier::new(config); + let report = quick_report(&v, "edit_file", &tool_input(&file)).unwrap(); + assert_eq!(report.status, VerificationStatus::Skipped); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn timeout_short_circuits_and_reports_failure() { + if !cargo_available() { + return; + } + let config = CargoVerifierConfig { + rust_timeout: Duration::from_millis(1), + ..quick_only() + }; + let root = unique_tmpdir("timeout"); + write_minimal_crate(&root, "vtime", "pub fn t() {}\n"); + let file = root.join("src/lib.rs"); + + let v = CargoVerifier::new(config); + let result = quick_report(&v, "edit_file", &tool_input(&file)).unwrap(); + assert_eq!(result.status, VerificationStatus::Failed); + assert!( + result.summary_text.contains("cargo check: FAIL") + && result.summary_text.to_lowercase().contains("timed out"), + "unexpected summary: {}", + result.summary_text + ); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn later_steps_are_skipped_after_first_failure_in_legacy_mode() { + if !cargo_available() { + return; + } + let root = unique_tmpdir("skipchain"); + write_minimal_crate(&root, "vskip", "pub fn bad() -> i32 { return; }\n"); + let file = root.join("src/lib.rs"); + + let config = CargoVerifierConfig { + legacy_mode: true, + quick_on_write: true, + final_gate: false, + max_output_bytes: 2_048, + rust_check: true, + rust_clippy: true, + rust_fmt: true, + rust_test: true, + rust_timeout: Duration::from_mins(2), + node_enabled: false, + node_timeout: Duration::from_mins(2), + python_enabled: false, + python_timeout: Duration::from_mins(2), + auto_mode: false, + }; + let v = CargoVerifier::new(config); + let result = quick_report(&v, "edit_file", &tool_input(&file)).unwrap(); + + assert_eq!(result.status, VerificationStatus::Failed); + assert!(result.summary_text.contains("cargo check: FAIL")); + assert!(result.summary_text.contains("cargo clippy: skipped")); + assert!(result.summary_text.contains("cargo fmt --check: skipped")); + assert!(result.summary_text.contains("cargo test: skipped")); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn fmt_violation_is_detected_when_fmt_enabled() { + if !cargo_available() { + return; + } + let root = unique_tmpdir("fmt"); + let src = "pub fn f( )->i32{1 + 2}\n"; + write_minimal_crate(&root, "vfmt", src); + let file = root.join("src/lib.rs"); + + let config = CargoVerifierConfig { + legacy_mode: true, + quick_on_write: true, + final_gate: false, + max_output_bytes: 2_048, + rust_check: false, + rust_clippy: false, + rust_fmt: true, + rust_test: false, + rust_timeout: Duration::from_mins(1), + node_enabled: false, + node_timeout: Duration::from_mins(2), + python_enabled: false, + python_timeout: Duration::from_mins(2), + auto_mode: false, + }; + let v = CargoVerifier::new(config); + let result = quick_report(&v, "edit_file", &tool_input(&file)).unwrap(); + + assert_ne!(result.status, VerificationStatus::Passed); + assert!(result.summary_text.contains("cargo fmt --check")); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn nested_file_resolves_to_parent_crate_manifest() { + if !cargo_available() { + return; + } + let root = unique_tmpdir("nested"); + write_minimal_crate(&root, "vnest", "pub mod sub;\n"); + fs::create_dir_all(root.join("src/sub")).unwrap(); + let nested = root.join("src/sub/mod.rs"); + fs::write(&nested, "pub fn inside() -> u8 { 7 }\n").unwrap(); + fs::write( + root.join("src/lib.rs"), + "#[path = \"sub/mod.rs\"]\npub mod sub;\n", + ) + .unwrap(); + + let v = CargoVerifier::new(quick_only()); + let result = quick_report(&v, "edit_file", &tool_input(&nested)).unwrap(); + assert_eq!(result.status, VerificationStatus::Passed); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn python_quick_fallback_py_compile_catches_syntax_error() { + if !python_available() { + return; + } + let root = unique_tmpdir("python-syntax"); + fs::write(root.join("requirements.txt"), "pytest\n").unwrap(); + let file = root.join("broken.py"); + fs::write(&file, "def broken(:\n pass\n").unwrap(); + + let v = CargoVerifier::new(python_only()); + let result = quick_report(&v, "edit_file", &tool_input(&file)).unwrap(); + + assert_eq!(result.status, VerificationStatus::Failed); + assert_eq!( + result.steps[0].failure_kind, + Some(VerificationFailureKind::Code) + ); + assert!(result.summary_text.contains("py_compile")); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn invalid_pyproject_reports_config_failure() { + let root = unique_tmpdir("python-bad-pyproject"); + let file = root.join("pyproject.toml"); + fs::write(&file, "[tool.ruff\n").unwrap(); + + let v = CargoVerifier::new(python_only()); + let result = quick_report(&v, "edit_file", &tool_input(&file)).unwrap(); + + assert_eq!(result.status, VerificationStatus::Failed); + assert_eq!( + result.steps[0].failure_kind, + Some(VerificationFailureKind::Config) + ); + assert!(result.summary_text.contains("pyproject.toml")); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn broken_local_venv_is_reported_as_environment() { + let root = unique_tmpdir("python-venv"); + fs::write(root.join("setup.py"), "from setuptools import setup\n").unwrap(); + let file = root.join("main.py"); + fs::write(&file, "print('ok')\n").unwrap(); + + let interpreter = if cfg!(windows) { + root.join(".venv").join("Scripts").join("python.exe") + } else { + root.join(".venv").join("bin").join("python") + }; + fs::create_dir_all(interpreter.parent().unwrap()).unwrap(); + fs::write(&interpreter, "").unwrap(); + + let v = CargoVerifier::new(python_only()); + let result = quick_report(&v, "edit_file", &tool_input(&file)).unwrap(); + + assert!( + matches!( + result.steps[0].failure_kind, + Some(VerificationFailureKind::Environment) + ), + "expected environment failure, got {:?}", + result.steps[0].failure_kind + ); + let _ = fs::remove_dir_all(&root); +} diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs index ded17495bd..6c0d9cf712 100644 --- a/rust/crates/rusty-claude-cli/src/main.rs +++ b/rust/crates/rusty-claude-cli/src/main.rs @@ -10,7 +10,7 @@ mod init; mod input; mod render; -use std::collections::BTreeSet; +use std::collections::{BTreeMap, BTreeSet}; use std::env; use std::fs; use std::io::{self, IsTerminal, Read, Write}; @@ -53,7 +53,8 @@ use runtime::{ use serde::Deserialize; use serde_json::{json, Map, Value}; use tools::{ - execute_tool, mvp_tool_specs, GlobalToolRegistry, RuntimeToolDefinition, ToolSearchOutput, + execute_tool, mvp_tool_specs, run_inline_review, GlobalToolRegistry, ReviewFinding, + ReviewOutcome, RuntimeToolDefinition, ToolSearchOutput, }; const DEFAULT_MODEL: &str = "claude-opus-4-6"; @@ -83,6 +84,10 @@ const OFFICIAL_REPO_SLUG: &str = "ultraworkers/claw-code"; const DEPRECATED_INSTALL_COMMAND: &str = "cargo install claw-code"; const LATEST_SESSION_REFERENCE: &str = "latest"; const SESSION_REFERENCE_ALIASES: &[&str] = &[LATEST_SESSION_REFERENCE, "last", "recent"]; +const CRITIC_CHANGED_FILES_THRESHOLD: usize = 4; +const CRITIC_CHANGED_LINES_THRESHOLD: usize = 200; +const AUTO_SKILL_MIN_ITERATIONS: usize = 3; +const AUTO_SKILL_MIN_FAILURES: usize = 2; const CLI_OPTION_SUGGESTIONS: &[&str] = &[ "--help", "-h", @@ -940,7 +945,7 @@ fn omc_compatibility_note_for_unknown_slash_command(name: &str) -> Option<&'stat } fn render_suggestion_line(label: &str, suggestions: &[String]) -> Option { - (!suggestions.is_empty()).then(|| format!(" {label:<16} {}", suggestions.join(", "),)) + (!suggestions.is_empty()).then(|| format!(" {label:<16} {}", suggestions.join(", "))) } fn suggest_slash_commands(input: &str) -> Vec { @@ -3160,6 +3165,8 @@ struct LiveCli { runtime: BuiltRuntime, session: SessionHandle, prompt_history: Vec, + critic_planner: runtime::critic::CriticPlanner, + turn_mutation_counter: u64, } #[derive(Debug, Clone)] @@ -3188,6 +3195,7 @@ struct BuiltRuntime { plugins_active: bool, mcp_state: Option>>, mcp_active: bool, + buffer_output: bool, } impl BuiltRuntime { @@ -3195,6 +3203,7 @@ impl BuiltRuntime { runtime: ConversationRuntime, plugin_registry: PluginRegistry, mcp_state: Option>>, + buffer_output: bool, ) -> Self { Self { runtime: Some(runtime), @@ -3202,6 +3211,7 @@ impl BuiltRuntime { plugins_active: true, mcp_state, mcp_active: true, + buffer_output, } } @@ -3668,6 +3678,8 @@ impl LiveCli { runtime, session, prompt_history: Vec::new(), + critic_planner: runtime::critic::CriticPlanner::new(), + turn_mutation_counter: 0, }; cli.persist_session()?; Ok(cli) @@ -3776,6 +3788,9 @@ impl LiveCli { hook_abort_monitor.stop(); match result { Ok(summary) => { + let summary = + self.apply_post_turn_pipeline(&mut runtime, summary, &mut permission_prompter)?; + let should_print_buffered = runtime.buffer_output; self.replace_runtime(runtime)?; spinner.finish( "✨ Done", @@ -3783,6 +3798,9 @@ impl LiveCli { &mut stdout, )?; println!(); + if should_print_buffered { + print_buffered_turn_summary(&summary)?; + } if let Some(event) = summary.auto_compaction { println!( "{}", @@ -3822,7 +3840,8 @@ impl LiveCli { let mut permission_prompter = CliPermissionPrompter::new(self.permission_mode); let result = runtime.run_turn(input, Some(&mut permission_prompter)); hook_abort_monitor.stop(); - let summary = result?; + let summary = + self.apply_post_turn_pipeline(&mut runtime, result?, &mut permission_prompter)?; self.replace_runtime(runtime)?; self.persist_session()?; let final_text = final_assistant_text(&summary); @@ -3835,7 +3854,8 @@ impl LiveCli { let mut permission_prompter = CliPermissionPrompter::new(self.permission_mode); let result = runtime.run_turn(input, Some(&mut permission_prompter)); hook_abort_monitor.stop(); - let summary = result?; + let summary = + self.apply_post_turn_pipeline(&mut runtime, result?, &mut permission_prompter)?; self.replace_runtime(runtime)?; self.persist_session()?; println!( @@ -3850,6 +3870,12 @@ impl LiveCli { })), "tool_uses": collect_tool_uses(&summary), "tool_results": collect_tool_results(&summary), + "verification_reports": collect_verification_reports(&summary), + "verification_gate": { + "attempted": summary.verification_gate.attempted, + "passed": summary.verification_gate.passed, + "report_ids": summary.verification_gate.report_ids.clone(), + }, "prompt_cache_events": collect_prompt_cache_events(&summary), "usage": { "input_tokens": summary.usage.input_tokens, @@ -3868,6 +3894,54 @@ impl LiveCli { Ok(()) } + fn apply_post_turn_pipeline( + &mut self, + runtime: &mut BuiltRuntime, + mut summary: runtime::TurnSummary, + permission_prompter: &mut CliPermissionPrompter, + ) -> Result> { + let change_stats = collect_turn_change_stats(&summary); + + self.turn_mutation_counter += 1; + let planner_stats = runtime::critic::DiffStats { + files_changed: change_stats.files.len(), + lines_changed: change_stats.total_changed_lines, + distinct_roots: change_stats.verified_roots.len(), + }; + let subagent_depth = std::env::var("CLAUDE_CODE_SUBAGENT_DEPTH") + .ok() + .and_then(|value| value.trim().parse::().ok()) + .unwrap_or(0); + let planner_decision = + self.critic_planner + .plan(self.turn_mutation_counter, subagent_depth, planner_stats); + let planner_allows = matches!( + planner_decision, + runtime::critic::CriticDecision::Run { .. } + ); + + if planner_allows && should_run_critic(&summary, &change_stats) { + let review = run_inline_review( + &build_critic_prompt(&summary, &change_stats), + Some(&critic_model_for(&self.model)), + ) + .map_err(std::io::Error::other)?; + let blocking = blocking_findings(&review.findings); + if blocking.is_empty() { + append_review_note(&mut summary, &review); + } else { + let fix_summary = runtime.run_turn( + build_critic_fix_prompt(&blocking), + Some(permission_prompter), + )?; + summary = merge_turn_summaries(summary, fix_summary); + } + } + + let _ = maybe_write_auto_skill_draft(&summary); + Ok(summary) + } + #[allow(clippy::too_many_lines)] fn handle_repl_command( &mut self, @@ -5392,7 +5466,7 @@ fn render_memory_report() -> Result> { } else { preview }; - lines.push(format!(" {}. {}", index + 1, file.path.display(),)); + lines.push(format!(" {}. {}", index + 1, file.path.display())); lines.push(format!( " lines={} preview={}", file.content.lines().count(), @@ -5473,8 +5547,7 @@ fn render_diff_report_for(cwd: &Path) -> Result Result bool { Command::new("which") .arg(name) .output() - .map(|output| output.status.success()) - .unwrap_or(false) + .is_ok_and(|output| output.status.success()) } fn write_temp_text_file( @@ -5926,6 +5997,7 @@ fn render_export_text(session: &Session) -> String { MessageRole::User => "user", MessageRole::Assistant => "assistant", MessageRole::Tool => "tool", + MessageRole::Verification => "verification", }; lines.push(format!("## {}. {role}", index + 1)); for block in &message.blocks { @@ -5944,6 +6016,17 @@ fn render_export_text(session: &Session) -> String { "[tool_result id={tool_use_id} name={tool_name} error={is_error}] {output}" )); } + ContentBlock::VerificationReport { + report_id, + phase, + status, + summary_text, + .. + } => lines.push(format!( + "[verification_report id={report_id} phase={} status={}] {summary_text}", + phase.as_str(), + status.as_str() + )), } } lines.push(String::new()); @@ -6071,6 +6154,7 @@ fn run_export( Ok(()) } +#[allow(clippy::too_many_lines)] fn render_session_markdown(session: &Session, session_id: &str, session_path: &Path) -> String { let mut lines = vec![ "# Conversation Export".to_string(), @@ -6105,6 +6189,7 @@ fn render_session_markdown(session: &Session, session_id: &str, session_path: &P MessageRole::User => "User", MessageRole::Assistant => "Assistant", MessageRole::Tool => "Tool", + MessageRole::Verification => "Verification", }; lines.push(format!("## {}. {role}", index + 1)); lines.push(String::new()); @@ -6145,6 +6230,24 @@ fn render_session_markdown(session: &Session, session_id: &str, session_path: &P } lines.push(String::new()); } + ContentBlock::VerificationReport { + report_id, + phase, + status, + summary_text, + .. + } => { + lines.push(format!( + "**Verification** `{}` _(id `{report_id}`, status `{}`)_", + phase.as_str(), + status.as_str() + )); + let summary = summarize_tool_payload_for_markdown(summary_text); + if !summary.is_empty() { + lines.push(format!("> {summary}")); + } + lines.push(String::new()); + } } } if let Some(usage) = message.usage { @@ -6634,6 +6737,13 @@ fn build_runtime_with_plugin_state( plugin_registry, mcp_state, } = runtime_plugin_state; + let verifier_config = feature_config.verifier().clone(); + let buffer_output = emit_output + && verifier_config.enabled() + && verifier_config.staged() + && verifier_config.final_gate(); + let assistant_emit_output = emit_output && !buffer_output; + let tool_emit_output = emit_output && !buffer_output; plugin_registry.initialize()?; let policy = permission_policy(permission_mode, &feature_config, &tool_registry) .map_err(std::io::Error::other)?; @@ -6643,14 +6753,14 @@ fn build_runtime_with_plugin_state( session_id, model, enable_tools, - emit_output, + assistant_emit_output, allowed_tools.clone(), tool_registry.clone(), progress_reporter, )?, CliToolExecutor::new( allowed_tools.clone(), - emit_output, + tool_emit_output, tool_registry.clone(), mcp_state.clone(), ), @@ -6661,7 +6771,31 @@ fn build_runtime_with_plugin_state( if emit_output { runtime = runtime.with_hook_progress_reporter(Box::new(CliHookProgressReporter)); } - Ok(BuiltRuntime::new(runtime, plugin_registry, mcp_state)) + if verifier_config.enabled() { + let cargo_config = runtime::CargoVerifierConfig { + legacy_mode: !verifier_config.staged(), + auto_mode: verifier_config.auto(), + quick_on_write: verifier_config.quick_on_write(), + final_gate: verifier_config.final_gate(), + max_output_bytes: verifier_config.max_output_bytes(), + rust_check: verifier_config.run_check(), + rust_clippy: verifier_config.run_clippy(), + rust_fmt: verifier_config.run_fmt(), + rust_test: verifier_config.run_test(), + rust_timeout: std::time::Duration::from_secs(verifier_config.timeout_secs()), + node_enabled: verifier_config.node_enabled(), + node_timeout: std::time::Duration::from_secs(verifier_config.node_timeout_secs()), + python_enabled: verifier_config.python_enabled(), + python_timeout: std::time::Duration::from_secs(verifier_config.python_timeout_secs()), + }; + runtime = runtime.with_verifier(Box::new(runtime::CargoVerifier::new(cargo_config))); + } + Ok(BuiltRuntime::new( + runtime, + plugin_registry, + mcp_state, + buffer_output, + )) } struct CliHookProgressReporter; @@ -6840,6 +6974,7 @@ fn resolve_cli_auth_source() -> Result> { Ok(resolve_cli_auth_source_for_cwd()?) } +#[allow(clippy::result_large_err)] fn resolve_cli_auth_source_for_cwd() -> Result { resolve_startup_auth_source(|| Ok(None)) } @@ -7070,7 +7205,9 @@ impl AnthropicRuntimeClient { fn request_ends_with_tool_result(request: &ApiRequest) -> bool { request .messages - .last() + .iter() + .rev() + .find(|message| message.role != MessageRole::Verification) .is_some_and(|message| message.role == MessageRole::Tool) } @@ -7183,6 +7320,649 @@ fn final_assistant_text(summary: &runtime::TurnSummary) -> String { .unwrap_or_default() } +#[derive(Debug, Clone, Default, PartialEq, Eq)] +struct FileChangeStats { + path: String, + added_lines: usize, + deleted_lines: usize, + preview_lines: Vec, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +struct TurnChangeStats { + files: Vec, + total_changed_lines: usize, + verified_roots: BTreeSet, +} + +fn collect_turn_change_stats(summary: &runtime::TurnSummary) -> TurnChangeStats { + let mut files = BTreeMap::::new(); + for message in &summary.tool_results { + for block in &message.blocks { + let ContentBlock::ToolResult { + tool_name, + output, + is_error, + .. + } = block + else { + continue; + }; + if *is_error + || !matches!( + tool_name.as_str(), + "write_file" | "edit_file" | "Write" | "Edit" + ) + { + continue; + } + let Ok(parsed) = serde_json::from_str::(output) else { + continue; + }; + let Some(path) = parsed + .get("filePath") + .or_else(|| parsed.get("file_path")) + .and_then(Value::as_str) + else { + continue; + }; + + let entry = files + .entry(path.to_string()) + .or_insert_with(|| FileChangeStats { + path: path.to_string(), + ..FileChangeStats::default() + }); + if let Some(hunks) = parsed.get("structuredPatch").and_then(Value::as_array) { + for hunk in hunks { + if let Some(lines) = hunk.get("lines").and_then(Value::as_array) { + for line in lines.iter().filter_map(Value::as_str) { + if line.starts_with('+') { + entry.added_lines += 1; + } else if line.starts_with('-') { + entry.deleted_lines += 1; + } + if entry.preview_lines.len() < 8 { + entry.preview_lines.push(line.to_string()); + } + } + } + } + } + } + } + + let verified_roots = summary + .verification_reports + .iter() + .map(|report| report.project_root.display().to_string()) + .filter(|root| !root.is_empty()) + .collect::>(); + let files = files.into_values().collect::>(); + let total_changed_lines = files + .iter() + .map(|file| file.added_lines + file.deleted_lines) + .sum(); + + TurnChangeStats { + files, + total_changed_lines, + verified_roots, + } +} + +fn should_run_critic(summary: &runtime::TurnSummary, stats: &TurnChangeStats) -> bool { + let critic_enabled = env::var("CLAUDE_CODE_CRITIC").map_or(true, |value| value.trim() != "0"); + critic_enabled + && summary.verification_gate.passed + && (stats.files.len() >= CRITIC_CHANGED_FILES_THRESHOLD + || stats.total_changed_lines >= CRITIC_CHANGED_LINES_THRESHOLD + || stats.verified_roots.len() > 1) +} + +fn build_critic_prompt(summary: &runtime::TurnSummary, stats: &TurnChangeStats) -> String { + let mut lines = vec![ + "Review the recent code change.".to_string(), + "Return ONLY JSON with this exact shape:".to_string(), + r#"{"summary":"short summary","findings":[{"severity":"P0|P1|P2|P3","title":"short title","body":"one paragraph","file":"optional path"}]}"#.to_string(), + "Report only concrete bugs, regressions, security issues, or missing edge cases. If there are no real findings, return an empty findings array.".to_string(), + format!( + "Verification gate passed: {} (attempted: {}).", + summary.verification_gate.passed, summary.verification_gate.attempted + ), + format!( + "Changed files: {}. Total changed lines: {}. Verified roots: {}.", + stats.files.len(), + stats.total_changed_lines, + stats.verified_roots.len() + ), + String::new(), + "Diff summary:".to_string(), + ]; + + for file in &stats.files { + lines.push(format!( + "- {} (+{}, -{})", + file.path, file.added_lines, file.deleted_lines + )); + for preview in file.preview_lines.iter().take(4) { + lines.push(format!(" {preview}")); + } + } + + if !summary.verification_reports.is_empty() { + lines.push(String::new()); + lines.push("Verification summary:".to_string()); + for report in summary.verification_reports.iter().rev().take(6) { + lines.push(format!( + "- {} {} {}", + report.adapter_id, + report.phase.as_str(), + report.summary_text + )); + } + } + + let mut prompt = lines.join("\n"); + if prompt.chars().count() > 6_000 { + prompt = prompt.chars().take(6_000).collect(); + } + prompt +} + +fn critic_model_for(current_model: &str) -> String { + env::var("CLAUDE_CODE_CRITIC_MODEL").unwrap_or_else(|_| current_model.to_string()) +} + +fn blocking_findings(findings: &[ReviewFinding]) -> Vec { + findings + .iter() + .filter(|finding| matches!(finding.severity.as_str(), "P0" | "P1")) + .cloned() + .collect() +} + +fn build_critic_fix_prompt(findings: &[ReviewFinding]) -> String { + let mut lines = vec![ + "A post-change code review found blocking issues. Fix only the blocking findings below, preserve existing behavior unless the finding requires change, and rerun the minimal verification needed.".to_string(), + String::new(), + "Blocking findings:".to_string(), + ]; + for finding in findings { + lines.push(format!( + "- {} {}{}", + finding.severity, + finding.title, + finding + .file + .as_deref() + .map(|file| format!(" ({file})")) + .unwrap_or_default() + )); + lines.push(format!(" {}", finding.body)); + } + lines.join("\n") +} + +fn append_review_note(summary: &mut runtime::TurnSummary, review: &ReviewOutcome) { + use std::fmt::Write as _; + if review.findings.is_empty() { + return; + } + let mut note = format!("\n\nReview notes: {}\n", review.summary); + for finding in &review.findings { + let file_suffix = finding + .file + .as_deref() + .map(|file| format!(" ({file})")) + .unwrap_or_default(); + let _ = writeln!( + note, + "- {} {}{}: {}", + finding.severity, finding.title, file_suffix, finding.body + ); + } + + if let Some(message) = summary.assistant_messages.last_mut() { + message.blocks.push(ContentBlock::Text { text: note }); + } else { + summary.assistant_messages.push(ConversationMessage { + role: MessageRole::Assistant, + blocks: vec![ContentBlock::Text { text: note }], + usage: None, + }); + } +} + +fn merge_turn_summaries( + mut initial: runtime::TurnSummary, + follow_up: runtime::TurnSummary, +) -> runtime::TurnSummary { + initial + .assistant_messages + .extend(follow_up.assistant_messages); + initial.tool_results.extend(follow_up.tool_results); + initial + .verification_reports + .extend(follow_up.verification_reports); + initial.verification_gate = follow_up.verification_gate; + initial + .prompt_cache_events + .extend(follow_up.prompt_cache_events); + initial.iterations += follow_up.iterations; + initial.usage = TokenUsage { + input_tokens: initial.usage.input_tokens + follow_up.usage.input_tokens, + output_tokens: initial.usage.output_tokens + follow_up.usage.output_tokens, + cache_creation_input_tokens: initial.usage.cache_creation_input_tokens + + follow_up.usage.cache_creation_input_tokens, + cache_read_input_tokens: initial.usage.cache_read_input_tokens + + follow_up.usage.cache_read_input_tokens, + }; + initial.auto_compaction = follow_up.auto_compaction.or(initial.auto_compaction); + initial +} + +#[allow(clippy::too_many_lines)] +fn maybe_write_auto_skill_draft( + summary: &runtime::TurnSummary, +) -> Result, Box> { + let enabled = env::var("CLAUDE_CODE_AUTO_SKILLS").is_ok_and(|value| value.trim() == "1"); + if !enabled { + return Ok(None); + } + + let first_green_index = summary + .verification_reports + .iter() + .position(|report| report.phase == runtime::VerificationPhase::Final && report.is_success()) + .unwrap_or(summary.verification_reports.len()); + let failures_before_green = summary + .verification_reports + .iter() + .take(first_green_index) + .filter(|report| !report.is_success()) + .count(); + if summary.iterations < AUTO_SKILL_MIN_ITERATIONS + || failures_before_green < AUTO_SKILL_MIN_FAILURES + { + return Ok(None); + } + + let cwd = env::current_dir()?; + let mut root_counts = BTreeMap::::new(); + for report in &summary.verification_reports { + let root = report.project_root.display().to_string(); + if !root.is_empty() { + *root_counts.entry(root).or_default() += 1; + } + } + let Some((dominant_root, _)) = root_counts.into_iter().max_by_key(|(_, count)| *count) else { + return Ok(None); + }; + + let adapter_ids = summary + .verification_reports + .iter() + .map(|report| report.adapter_id.clone()) + .collect::>() + .into_iter() + .collect::>(); + let failure_kinds = summary + .verification_reports + .iter() + .filter_map(runtime::VerificationReport::primary_failure_kind) + .map(|kind| kind.as_str().to_string()) + .collect::>() + .into_iter() + .collect::>(); + let touched_files = summary + .verification_reports + .iter() + .flat_map(|report| report.touched_paths.iter()) + .filter_map(|path| path.strip_prefix(&cwd).ok().or(Some(path.as_path()))) + .map(|path| sanitize_skill_text(&path.display().to_string())) + .collect::>() + .into_iter() + .take(12) + .collect::>(); + + let root_name = Path::new(&dominant_root) + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or("workspace"); + let adapter_slug = adapter_ids + .first() + .map(|adapter| slugify_token(adapter)) + .filter(|slug| !slug.is_empty()) + .unwrap_or_else(|| String::from("mixed")); + let skill_slug = format!("repair-{}-{}", slugify_token(root_name), adapter_slug); + let skill_dir = cwd + .join(".claude") + .join("skills") + .join("generated") + .join(&skill_slug); + if skill_dir.exists() { + return Ok(None); + } + fs::create_dir_all(&skill_dir)?; + + let skill_name = format!("repair-{}-{}", sanitize_skill_text(root_name), adapter_slug); + let markdown = build_auto_skill_markdown( + &skill_name, + &adapter_ids, + &failure_kinds, + &touched_files, + failures_before_green, + summary.iterations, + ); + fs::write(skill_dir.join("SKILL.md"), markdown)?; + fs::write( + skill_dir.join("meta.json"), + serde_json::to_string_pretty(&json!({ + "status": "quarantined", + "confidence": "draft", + "requires_human_approval": true, + "promotion": { + "required_fixtures": 3, + "max_token_regression_pct": 10 + }, + "source_episode": { + "iterations": summary.iterations, + "failures_before_first_green": failures_before_green, + "adapters": adapter_ids, + "failure_kinds": failure_kinds, + "dominant_root": sanitize_skill_text(root_name), + "touched_files": touched_files + } + }))?, + )?; + + Ok(Some(skill_dir)) +} + +fn build_auto_skill_markdown( + skill_name: &str, + adapter_ids: &[String], + failure_kinds: &[String], + touched_files: &[String], + failures_before_green: usize, + iterations: usize, +) -> String { + let lines = vec![ + "---".to_string(), + format!("name: {skill_name}"), + "status: quarantined".to_string(), + "source: auto-generated".to_string(), + "---".to_string(), + String::new(), + "# Auto-generated Skill Draft".to_string(), + String::new(), + "This draft was generated from normalized verification metadata only. Human review is required before promotion.".to_string(), + String::new(), + "## Trigger".to_string(), + format!( + "- Repeated verification failures before green: {failures_before_green} across {iterations} iterations." + ), + format!("- Adapters involved: {}.", adapter_ids.join(", ")), + format!("- Failure kinds observed: {}.", failure_kinds.join(", ")), + String::new(), + "## Workflow".to_string(), + "1. Reproduce the failing verifier step in the smallest change-scoped target available.".to_string(), + "2. Inspect the touched files and adjacent tests before broadening scope.".to_string(), + "3. Fix the highest-confidence code or config fault first.".to_string(), + "4. Re-run quick verification before broader final-gate checks.".to_string(), + String::new(), + "## Validation".to_string(), + format!("- Prefer targeted adapter commands for: {}.", adapter_ids.join(", ")), + format!("- Re-check touched files: {}.", touched_files.join(", ")), + String::new(), + "## Safety".to_string(), + "- Do not promote this draft automatically.".to_string(), + "- Keep generated guidance limited to normalized metadata; do not paste raw tool output.".to_string(), + ]; + lines.join("\n") +} + +/// Outcome of replaying a generated auto-skill against a stored fixture. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct AutoSkillFixtureResult { + pub passed: bool, +} + +/// Errors from [`promote_auto_skill`]. Deliberately explicit so CI can report. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum AutoSkillPromotionError { + NotEnoughFixtures { have: usize, need: usize }, + FixtureFailed { index: usize }, + HumanApprovalMissing, + TokenRegressionExceeded { delta_pct: i64, limit_pct: i64 }, + MetaRead(String), + MetaWrite(String), +} + +impl std::fmt::Display for AutoSkillPromotionError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NotEnoughFixtures { have, need } => { + write!(f, "auto-skill requires {need} fixture replays, got {have}") + } + Self::FixtureFailed { index } => { + write!(f, "auto-skill fixture #{index} failed during replay") + } + Self::HumanApprovalMissing => { + write!( + f, + "auto-skill requires explicit human approval before promotion" + ) + } + Self::TokenRegressionExceeded { + delta_pct, + limit_pct, + } => write!( + f, + "auto-skill token regression {delta_pct}% exceeds {limit_pct}% limit" + ), + Self::MetaRead(msg) => write!(f, "cannot read meta.json: {msg}"), + Self::MetaWrite(msg) => write!(f, "cannot update meta.json: {msg}"), + } + } +} + +impl std::error::Error for AutoSkillPromotionError {} + +pub const AUTO_SKILL_REQUIRED_FIXTURES: usize = 3; +pub const AUTO_SKILL_MAX_TOKEN_REGRESSION_PCT: i64 = 10; + +/// Promote a quarantined auto-skill draft to `status: active` after gating: +/// * every fixture in `fixtures` must have `passed = true`, +/// * there must be at least [`AUTO_SKILL_REQUIRED_FIXTURES`] of them, +/// * `human_approved` must be explicitly true, +/// * tokens spent with the skill must not regress more than +/// [`AUTO_SKILL_MAX_TOKEN_REGRESSION_PCT`]% vs. the baseline. +/// +/// On success, updates `meta.json` to `status: active` and returns the path. +pub fn promote_auto_skill( + skill_dir: &Path, + fixtures: &[AutoSkillFixtureResult], + baseline_tokens: u64, + current_tokens: u64, + human_approved: bool, +) -> Result { + if fixtures.len() < AUTO_SKILL_REQUIRED_FIXTURES { + return Err(AutoSkillPromotionError::NotEnoughFixtures { + have: fixtures.len(), + need: AUTO_SKILL_REQUIRED_FIXTURES, + }); + } + for (index, fixture) in fixtures.iter().enumerate() { + if !fixture.passed { + return Err(AutoSkillPromotionError::FixtureFailed { index }); + } + } + if !human_approved { + return Err(AutoSkillPromotionError::HumanApprovalMissing); + } + if baseline_tokens > 0 { + let delta = i128::from(current_tokens) - i128::from(baseline_tokens); + let delta_pct = (delta * 100) / i128::from(baseline_tokens); + if delta_pct > i128::from(AUTO_SKILL_MAX_TOKEN_REGRESSION_PCT) { + return Err(AutoSkillPromotionError::TokenRegressionExceeded { + delta_pct: i64::try_from(delta_pct).unwrap_or(i64::MAX), + limit_pct: AUTO_SKILL_MAX_TOKEN_REGRESSION_PCT, + }); + } + } + + let meta_path = skill_dir.join("meta.json"); + let raw = fs::read_to_string(&meta_path) + .map_err(|error| AutoSkillPromotionError::MetaRead(error.to_string()))?; + let mut meta: serde_json::Value = serde_json::from_str(&raw) + .map_err(|error| AutoSkillPromotionError::MetaRead(error.to_string()))?; + if let Some(obj) = meta.as_object_mut() { + obj.insert("status".to_string(), json!("active")); + obj.insert("requires_human_approval".to_string(), json!(false)); + obj.insert( + "promoted_at_unix_secs".to_string(), + json!(std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or_default()), + ); + obj.insert( + "promotion_evidence".to_string(), + json!({ + "fixtures_passed": fixtures.len(), + "baseline_tokens": baseline_tokens, + "current_tokens": current_tokens, + "human_approved": human_approved, + }), + ); + } + fs::write( + &meta_path, + serde_json::to_string_pretty(&meta) + .map_err(|error| AutoSkillPromotionError::MetaWrite(error.to_string()))?, + ) + .map_err(|error| AutoSkillPromotionError::MetaWrite(error.to_string()))?; + Ok(skill_dir.to_path_buf()) +} + +/// Parameters for [`run_promote_auto_skill_cli`]. Kept as a plain struct so +/// both the subcommand dispatcher and integration tests can drive it. +#[derive(Debug, Clone)] +pub struct PromoteAutoSkillArgs { + /// Path to the quarantined skill directory (`.claude/skills/generated//`). + pub skill_dir: PathBuf, + /// JSON file: array of `{ "passed": bool }` objects, one per replay fixture. + pub fixtures_json: PathBuf, + pub baseline_tokens: u64, + pub current_tokens: u64, + pub approved: bool, +} + +pub fn run_promote_auto_skill_cli( + args: &PromoteAutoSkillArgs, +) -> Result> { + let raw = fs::read_to_string(&args.fixtures_json)?; + let entries: Vec = serde_json::from_str(&raw)?; + let fixtures: Vec = entries + .into_iter() + .map(|entry| AutoSkillFixtureResult { + passed: entry + .get("passed") + .and_then(serde_json::Value::as_bool) + .unwrap_or(false), + }) + .collect(); + + let path = promote_auto_skill( + &args.skill_dir, + &fixtures, + args.baseline_tokens, + args.current_tokens, + args.approved, + )?; + Ok(path) +} + +fn sanitize_skill_text(value: &str) -> String { + value + .chars() + .map(|ch| { + if ch.is_ascii_alphanumeric() || matches!(ch, '/' | '\\' | '-' | '_' | '.' | ' ') { + ch + } else { + ' ' + } + }) + .collect::() + .split_whitespace() + .collect::>() + .join(" ") +} + +fn slugify_token(value: &str) -> String { + let mut slug = value + .chars() + .map(|ch| { + if ch.is_ascii_alphanumeric() { + ch.to_ascii_lowercase() + } else { + '-' + } + }) + .collect::(); + while slug.contains("--") { + slug = slug.replace("--", "-"); + } + slug.trim_matches('-').to_string() +} + +fn print_buffered_turn_summary( + summary: &runtime::TurnSummary, +) -> Result<(), Box> { + let renderer = TerminalRenderer::new(); + let mut stdout = io::stdout(); + + for message in &summary.tool_results { + for block in &message.blocks { + if let ContentBlock::ToolResult { + tool_name, + output, + is_error, + .. + } = block + { + writeln!( + stdout, + "\n{}", + format_tool_result(tool_name, output, *is_error) + )?; + } + } + } + + for report in &summary.verification_reports { + writeln!( + stdout, + "\n[verification {}:{}] {}", + report.phase.as_str(), + report.adapter_id, + report.status.as_str() + )?; + writeln!(stdout, "{}", report.summary_text)?; + } + + let final_text = final_assistant_text(summary); + if !final_text.trim().is_empty() { + writeln!(stdout)?; + renderer.stream_markdown(&final_text, &mut stdout)?; + writeln!(stdout)?; + } + + Ok(()) +} + fn collect_tool_uses(summary: &runtime::TurnSummary) -> Vec { summary .assistant_messages @@ -7221,6 +8001,45 @@ fn collect_tool_results(summary: &runtime::TurnSummary) -> Vec Vec { + summary + .verification_reports + .iter() + .map(|report| { + json!({ + "report_id": report.report_id.clone(), + "phase": report.phase.as_str(), + "adapter_id": report.adapter_id.clone(), + "project_root": report.project_root.display().to_string(), + "touched_paths": report + .touched_paths + .iter() + .map(|path| path.display().to_string()) + .collect::>(), + "status": report.status.as_str(), + "summary_text": report.summary_text.clone(), + "primary_failure_kind": report.primary_failure_kind().map(runtime::VerificationFailureKind::as_str), + "steps": report.steps.iter().map(|step| json!({ + "adapter": step.adapter.clone(), + "project_root": step.project_root.display().to_string(), + "label": step.label.clone(), + "command": step.command.clone(), + "phase": step.phase.as_str(), + "status": step.status.as_str(), + "failure_kind": step.failure_kind.map(runtime::VerificationFailureKind::as_str), + "duration_ms": step.duration_ms, + "truncated_output": step.truncated_output.clone(), + "step_kind": step.step_kind.clone(), + "target_scope": step.target_scope.clone(), + "package_name": step.package_name.clone(), + "package_manager": step.package_manager.clone(), + "launcher_kind": step.launcher_kind.clone(), + })).collect::>(), + }) + }) + .collect() +} + fn collect_prompt_cache_events(summary: &runtime::TurnSummary) -> Vec { summary .prompt_cache_events @@ -8131,7 +8950,10 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec { .iter() .filter_map(|message| { let role = match message.role { - MessageRole::System | MessageRole::User | MessageRole::Tool => "user", + MessageRole::System + | MessageRole::User + | MessageRole::Tool + | MessageRole::Verification => "user", MessageRole::Assistant => "assistant", }; let content = message @@ -8157,6 +8979,11 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec { }], is_error: *is_error, }, + ContentBlock::VerificationReport { summary_text, .. } => { + InputContentBlock::Text { + text: summary_text.clone(), + } + } }) .collect::>(); (!content.is_empty()).then(|| InputMessage { @@ -8378,6 +9205,8 @@ mod tests { use std::fs; use std::io::{Read, Write}; use std::net::TcpListener; + #[cfg(unix)] + use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::process::Command; use std::sync::{Mutex, MutexGuard, OnceLock}; @@ -8421,6 +9250,7 @@ mod tests { request_id: Some("req_jobdori_789".to_string()), body: String::new(), retryable: true, + suggested_action: None, }; let rendered = format_user_visible_api_error("session-issue-22", &error); @@ -8443,6 +9273,7 @@ mod tests { request_id: Some("req_jobdori_790".to_string()), body: String::new(), retryable: true, + suggested_action: None, }), }; @@ -8506,6 +9337,7 @@ mod tests { request_id: Some("req_ctx_456".to_string()), body: String::new(), retryable: false, + suggested_action: None, }; let rendered = format_user_visible_api_error("session-issue-32", &error); @@ -8537,6 +9369,7 @@ mod tests { request_id: Some("req_ctx_retry_789".to_string()), body: String::new(), retryable: false, + suggested_action: None, }), }; @@ -8615,45 +9448,89 @@ mod tests { .expect("skill file should write"); } + fn make_executable(path: &Path) { + #[cfg(unix)] + { + let mut permissions = fs::metadata(path) + .expect("script metadata should load") + .permissions(); + permissions.set_mode(0o755); + fs::set_permissions(path, permissions).expect("script permissions should update"); + } + } + + fn script_name(stem: &str) -> String { + if cfg!(windows) { + format!("{stem}.cmd") + } else { + format!("{stem}.sh") + } + } + + fn write_script(path: &Path, unix_body: &str, windows_body: &str) { + let body = if cfg!(windows) { + windows_body + } else { + unix_body + }; + fs::write(path, body).expect("script should write"); + make_executable(path); + } + + fn python_command() -> &'static str { + if cfg!(windows) { + "python" + } else { + "python3" + } + } + fn write_plugin_fixture(root: &Path, name: &str, include_hooks: bool, include_lifecycle: bool) { fs::create_dir_all(root.join(".claude-plugin")).expect("manifest dir"); + let pre_hook_name = script_name("pre"); + let init_name = script_name("init"); + let shutdown_name = script_name("shutdown"); if include_hooks { fs::create_dir_all(root.join("hooks")).expect("hooks dir"); - fs::write( - root.join("hooks").join("pre.sh"), + write_script( + &root.join("hooks").join(&pre_hook_name), "#!/bin/sh\nprintf 'plugin pre hook'\n", - ) - .expect("write hook"); + "@echo off\r\necho plugin pre hook\r\n", + ); } if include_lifecycle { fs::create_dir_all(root.join("lifecycle")).expect("lifecycle dir"); - fs::write( - root.join("lifecycle").join("init.sh"), + write_script( + &root.join("lifecycle").join(&init_name), "#!/bin/sh\nprintf 'init\\n' >> lifecycle.log\n", - ) - .expect("write init lifecycle"); - fs::write( - root.join("lifecycle").join("shutdown.sh"), + "@echo off\r\necho init>> lifecycle.log\r\n", + ); + write_script( + &root.join("lifecycle").join(&shutdown_name), "#!/bin/sh\nprintf 'shutdown\\n' >> lifecycle.log\n", - ) - .expect("write shutdown lifecycle"); + "@echo off\r\necho shutdown>> lifecycle.log\r\n", + ); } - let hooks = if include_hooks { - ",\n \"hooks\": {\n \"PreToolUse\": [\"./hooks/pre.sh\"]\n }" - } else { - "" - }; - let lifecycle = if include_lifecycle { - ",\n \"lifecycle\": {\n \"Init\": [\"./lifecycle/init.sh\"],\n \"Shutdown\": [\"./lifecycle/shutdown.sh\"]\n }" - } else { - "" - }; + let mut manifest = json!({ + "name": name, + "version": "1.0.0", + "description": "runtime plugin fixture", + }); + if include_hooks { + manifest["hooks"] = json!({ + "PreToolUse": [format!("./hooks/{pre_hook_name}")], + }); + } + if include_lifecycle { + manifest["lifecycle"] = json!({ + "Init": [format!("./lifecycle/{init_name}")], + "Shutdown": [format!("./lifecycle/{shutdown_name}")], + }); + } fs::write( root.join(".claude-plugin").join("plugin.json"), - format!( - "{{\n \"name\": \"{name}\",\n \"version\": \"1.0.0\",\n \"description\": \"runtime plugin fixture\"{hooks}{lifecycle}\n}}" - ), + serde_json::to_string_pretty(&manifest).expect("manifest should serialize"), ) .expect("write plugin manifest"); } @@ -9132,8 +10009,15 @@ mod tests { #[test] fn rejects_unknown_allowed_tools() { - let error = parse_args(&["--allowedTools".to_string(), "teleport".to_string()]) - .expect_err("tool should be rejected"); + let _guard = env_lock(); + let cwd = temp_dir(); + fs::create_dir_all(&cwd).expect("temp cwd should exist"); + std::env::remove_var("RUSTY_CLAUDE_PERMISSION_MODE"); + let error = with_current_dir(&cwd, || { + parse_args(&["--allowedTools".to_string(), "teleport".to_string()]) + .expect_err("tool should be rejected") + }); + let _ = fs::remove_dir_all(cwd); assert!(error.contains("unsupported tool in --allowedTools: teleport")); } @@ -9678,102 +10562,114 @@ mod tests { } #[test] + #[allow(clippy::too_many_lines)] fn parses_direct_agents_mcp_and_skills_slash_commands() { - assert_eq!( - parse_args(&["/agents".to_string()]).expect("/agents should parse"), - CliAction::Agents { - args: None, - output_format: CliOutputFormat::Text - } - ); - assert_eq!( - parse_args(&["/mcp".to_string(), "show".to_string(), "demo".to_string()]) - .expect("/mcp show demo should parse"), - CliAction::Mcp { - args: Some("show demo".to_string()), - output_format: CliOutputFormat::Text, - } - ); - assert_eq!( - parse_args(&["/skills".to_string()]).expect("/skills should parse"), - CliAction::Skills { - args: None, - output_format: CliOutputFormat::Text, - } - ); - assert_eq!( - parse_args(&["/skill".to_string()]).expect("/skill should parse"), - CliAction::Skills { - args: None, - output_format: CliOutputFormat::Text, - } - ); - assert_eq!( - parse_args(&["/skills".to_string(), "help".to_string()]) - .expect("/skills help should parse"), - CliAction::Skills { - args: Some("help".to_string()), - output_format: CliOutputFormat::Text, - } - ); - assert_eq!( - parse_args(&["/skill".to_string(), "list".to_string()]) - .expect("/skill list should parse"), - CliAction::Skills { - args: Some("list".to_string()), - output_format: CliOutputFormat::Text, - } - ); - assert_eq!( - parse_args(&[ - "/skills".to_string(), - "help".to_string(), - "overview".to_string() - ]) - .expect("/skills help overview should invoke"), - CliAction::Prompt { - prompt: "$help overview".to_string(), - model: DEFAULT_MODEL.to_string(), - output_format: CliOutputFormat::Text, - allowed_tools: None, - permission_mode: crate::default_permission_mode(), - compact: false, - base_commit: None, - reasoning_effort: None, - allow_broad_cwd: false, - } - ); - assert_eq!( - parse_args(&[ - "/skills".to_string(), - "install".to_string(), - "./fixtures/help-skill".to_string(), - ]) - .expect("/skills install should parse"), - CliAction::Skills { - args: Some("install ./fixtures/help-skill".to_string()), - output_format: CliOutputFormat::Text, - } - ); - assert_eq!( - parse_args(&["/skills".to_string(), "/test".to_string()]) - .expect("/skills /test should normalize to a single skill prompt prefix"), - CliAction::Prompt { - prompt: "$test".to_string(), - model: DEFAULT_MODEL.to_string(), - output_format: CliOutputFormat::Text, - allowed_tools: None, - permission_mode: crate::default_permission_mode(), - compact: false, - base_commit: None, - reasoning_effort: None, - allow_broad_cwd: false, - } - ); - let error = parse_args(&["/status".to_string()]) - .expect_err("/status should remain REPL-only when invoked directly"); - assert!(error.contains("interactive-only")); - assert!(error.contains("claw --resume SESSION.jsonl /status")); + let _guard = env_lock(); + let cwd = temp_dir(); + fs::create_dir_all(&cwd).expect("temp cwd should exist"); + std::env::remove_var("RUSTY_CLAUDE_PERMISSION_MODE"); + + with_current_dir(&cwd, || { + let permission_mode = crate::default_permission_mode(); + + assert_eq!( + parse_args(&["/agents".to_string()]).expect("/agents should parse"), + CliAction::Agents { + args: None, + output_format: CliOutputFormat::Text + } + ); + assert_eq!( + parse_args(&["/mcp".to_string(), "show".to_string(), "demo".to_string()]) + .expect("/mcp show demo should parse"), + CliAction::Mcp { + args: Some("show demo".to_string()), + output_format: CliOutputFormat::Text, + } + ); + assert_eq!( + parse_args(&["/skills".to_string()]).expect("/skills should parse"), + CliAction::Skills { + args: None, + output_format: CliOutputFormat::Text, + } + ); + assert_eq!( + parse_args(&["/skill".to_string()]).expect("/skill should parse"), + CliAction::Skills { + args: None, + output_format: CliOutputFormat::Text, + } + ); + assert_eq!( + parse_args(&["/skills".to_string(), "help".to_string()]) + .expect("/skills help should parse"), + CliAction::Skills { + args: Some("help".to_string()), + output_format: CliOutputFormat::Text, + } + ); + assert_eq!( + parse_args(&["/skill".to_string(), "list".to_string()]) + .expect("/skill list should parse"), + CliAction::Skills { + args: Some("list".to_string()), + output_format: CliOutputFormat::Text, + } + ); + assert_eq!( + parse_args(&[ + "/skills".to_string(), + "help".to_string(), + "overview".to_string() + ]) + .expect("/skills help overview should invoke"), + CliAction::Prompt { + prompt: "$help overview".to_string(), + model: DEFAULT_MODEL.to_string(), + output_format: CliOutputFormat::Text, + allowed_tools: None, + permission_mode, + compact: false, + base_commit: None, + reasoning_effort: None, + allow_broad_cwd: false, + } + ); + assert_eq!( + parse_args(&[ + "/skills".to_string(), + "install".to_string(), + "./fixtures/help-skill".to_string(), + ]) + .expect("/skills install should parse"), + CliAction::Skills { + args: Some("install ./fixtures/help-skill".to_string()), + output_format: CliOutputFormat::Text, + } + ); + assert_eq!( + parse_args(&["/skills".to_string(), "/test".to_string()]) + .expect("/skills /test should normalize to a single skill prompt prefix"), + CliAction::Prompt { + prompt: "$test".to_string(), + model: DEFAULT_MODEL.to_string(), + output_format: CliOutputFormat::Text, + allowed_tools: None, + permission_mode, + compact: false, + base_commit: None, + reasoning_effort: None, + allow_broad_cwd: false, + } + ); + let error = parse_args(&["/status".to_string()]) + .expect_err("/status should remain REPL-only when invoked directly"); + assert!(error.contains("interactive-only")); + assert!(error.contains("claw --resume SESSION.jsonl /status")); + }); + + let _ = fs::remove_dir_all(cwd); } #[test] @@ -11363,7 +12259,7 @@ UU conflicted.rs", let pre_hooks = state.feature_config.hooks().pre_tool_use(); assert_eq!(pre_hooks.len(), 1); assert!( - pre_hooks[0].ends_with("hooks/pre.sh"), + pre_hooks[0].ends_with(&format!("hooks/{}", script_name("pre"))), "expected installed plugin hook path, got {pre_hooks:?}" ); @@ -11375,6 +12271,7 @@ UU conflicted.rs", #[test] #[allow(clippy::too_many_lines)] fn build_runtime_plugin_state_discovers_mcp_tools_and_surfaces_pending_servers() { + let _guard = env_lock(); let config_home = temp_dir(); let workspace = temp_dir(); fs::create_dir_all(&config_home).expect("config home"); @@ -11383,21 +12280,19 @@ UU conflicted.rs", write_mcp_server_fixture(&script_path); fs::write( config_home.join("settings.json"), - format!( - r#"{{ - "mcpServers": {{ - "alpha": {{ - "command": "python3", - "args": ["{}"] - }}, - "broken": {{ - "command": "python3", - "args": ["-c", "import sys; sys.exit(0)"] - }} - }} - }}"#, - script_path.to_string_lossy() - ), + serde_json::to_string_pretty(&json!({ + "mcpServers": { + "alpha": { + "command": python_command(), + "args": [script_path.to_string_lossy().to_string()], + }, + "broken": { + "command": python_command(), + "args": ["-c", "import sys; sys.exit(0)"], + } + } + })) + .expect("mcp settings should serialize"), ) .expect("write mcp settings"); @@ -11583,7 +12478,9 @@ UU conflicted.rs", .expect("runtime should build"); assert_eq!( - fs::read_to_string(&log_path).expect("init log should exist"), + fs::read_to_string(&log_path) + .expect("init log should exist") + .replace("\r\n", "\n"), "init\n" ); @@ -11592,7 +12489,9 @@ UU conflicted.rs", .expect("plugin shutdown should succeed"); assert_eq!( - fs::read_to_string(&log_path).expect("shutdown log should exist"), + fs::read_to_string(&log_path) + .expect("shutdown log should exist") + .replace("\r\n", "\n"), "init\nshutdown\n" ); @@ -11755,8 +12654,15 @@ fn write_mcp_server_fixture(script_path: &Path) { #[cfg(test)] mod sandbox_report_tests { - use super::{format_sandbox_report, HookAbortMonitor}; - use runtime::HookAbortSignal; + use super::{ + collect_turn_change_stats, format_sandbox_report, maybe_write_auto_skill_draft, + promote_auto_skill, run_promote_auto_skill_cli, should_run_critic, AutoSkillFixtureResult, + AutoSkillPromotionError, HookAbortMonitor, PromoteAutoSkillArgs, + AUTO_SKILL_MAX_TOKEN_REGRESSION_PCT, AUTO_SKILL_REQUIRED_FIXTURES, + }; + use runtime::{ContentBlock, ConversationMessage, HookAbortSignal, MessageRole, TokenUsage}; + use serde_json::json; + use std::path::Path; use std::sync::mpsc; use std::time::Duration; @@ -11788,6 +12694,324 @@ mod sandbox_report_tests { assert!(!abort_signal.is_aborted()); } + fn sample_turn_summary() -> runtime::TurnSummary { + runtime::TurnSummary { + assistant_messages: vec![ConversationMessage { + role: MessageRole::Assistant, + blocks: vec![ContentBlock::Text { + text: "done".to_string(), + }], + usage: None, + }], + tool_results: vec![ConversationMessage { + role: MessageRole::User, + blocks: vec![ContentBlock::ToolResult { + tool_use_id: "tool-1".to_string(), + tool_name: "write_file".to_string(), + output: json!({ + "filePath": "src/lib.rs", + "structuredPatch": [{ + "lines": ["-old", "+new", "+extra"] + }] + }) + .to_string(), + is_error: false, + }], + usage: None, + }], + verification_reports: vec![], + verification_gate: runtime::VerificationGateStatus { + attempted: true, + passed: true, + report_ids: vec!["report-1".to_string()], + }, + prompt_cache_events: vec![], + iterations: 1, + usage: TokenUsage::default(), + auto_compaction: None, + } + } + + fn sample_verification_report( + report_id: &str, + phase: runtime::VerificationPhase, + status: runtime::VerificationStatus, + project_root: &Path, + ) -> runtime::VerificationReport { + runtime::VerificationReport { + report_id: report_id.to_string(), + phase, + adapter_id: "rust-cargo".to_string(), + project_root: project_root.to_path_buf(), + touched_paths: vec![project_root.join("src/lib.rs")], + status, + summary_text: "report".to_string(), + steps: vec![], + } + } + + #[test] + fn collect_turn_change_stats_counts_changed_files_and_lines() { + let summary = sample_turn_summary(); + let stats = collect_turn_change_stats(&summary); + + assert_eq!(stats.files.len(), 1); + assert_eq!(stats.files[0].path, "src/lib.rs"); + assert_eq!(stats.files[0].added_lines, 2); + assert_eq!(stats.files[0].deleted_lines, 1); + assert_eq!(stats.total_changed_lines, 3); + } + + #[test] + fn should_run_critic_requires_green_nontrivial_turn() { + let mut summary = sample_turn_summary(); + summary.tool_results = (0..4) + .map(|index| ConversationMessage { + role: MessageRole::User, + blocks: vec![ContentBlock::ToolResult { + tool_use_id: format!("tool-{index}"), + tool_name: "write_file".to_string(), + output: json!({ + "filePath": format!("src/file-{index}.rs"), + "structuredPatch": [{"lines": ["-old", "+new"]}] + }) + .to_string(), + is_error: false, + }], + usage: None, + }) + .collect(); + let stats = collect_turn_change_stats(&summary); + assert!(should_run_critic(&summary, &stats)); + + summary.verification_gate.passed = false; + assert!(!should_run_critic(&summary, &stats)); + } + + #[test] + fn auto_skill_draft_writes_quarantined_skill_when_enabled() { + let original_dir = std::env::current_dir().expect("cwd"); + let original_flag = std::env::var("CLAUDE_CODE_AUTO_SKILLS").ok(); + let root = std::env::temp_dir().join(format!( + "claw-auto-skill-{}", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("time") + .as_nanos() + )); + std::fs::create_dir_all(root.join("src")).expect("workspace should exist"); + std::env::set_current_dir(&root).expect("set cwd"); + std::env::set_var("CLAUDE_CODE_AUTO_SKILLS", "1"); + + let summary = runtime::TurnSummary { + assistant_messages: vec![], + tool_results: vec![], + verification_reports: vec![ + sample_verification_report( + "report-1", + runtime::VerificationPhase::Quick, + runtime::VerificationStatus::Failed, + &root, + ), + sample_verification_report( + "report-2", + runtime::VerificationPhase::Quick, + runtime::VerificationStatus::Failed, + &root, + ), + sample_verification_report( + "report-3", + runtime::VerificationPhase::Final, + runtime::VerificationStatus::Passed, + &root, + ), + ], + verification_gate: runtime::VerificationGateStatus { + attempted: true, + passed: true, + report_ids: vec!["report-3".to_string()], + }, + prompt_cache_events: vec![], + iterations: 3, + usage: TokenUsage::default(), + auto_compaction: None, + }; + + let generated = maybe_write_auto_skill_draft(&summary).expect("skill draft should write"); + let skill_dir = generated.expect("skill draft path"); + assert!(skill_dir.join("SKILL.md").is_file()); + assert!(skill_dir.join("meta.json").is_file()); + + std::env::set_current_dir(original_dir).expect("restore cwd"); + match original_flag { + Some(value) => std::env::set_var("CLAUDE_CODE_AUTO_SKILLS", value), + None => std::env::remove_var("CLAUDE_CODE_AUTO_SKILLS"), + } + let _ = std::fs::remove_dir_all(root); + } + + fn write_quarantined_skill(dir: &Path) { + std::fs::create_dir_all(dir).expect("skill dir"); + std::fs::write(dir.join("SKILL.md"), "# draft").expect("SKILL.md"); + let meta = json!({ + "status": "quarantined", + "requires_human_approval": true, + "promotion": { + "required_fixtures": AUTO_SKILL_REQUIRED_FIXTURES, + "max_token_regression_pct": AUTO_SKILL_MAX_TOKEN_REGRESSION_PCT + } + }); + std::fs::write( + dir.join("meta.json"), + serde_json::to_string_pretty(&meta).unwrap(), + ) + .expect("meta.json"); + } + + fn promo_tmpdir(label: &str) -> std::path::PathBuf { + let path = std::env::temp_dir().join(format!( + "claw-promo-{label}-{}", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("time") + .as_nanos() + )); + std::fs::create_dir_all(&path).expect("promo dir"); + path + } + + #[test] + fn promote_auto_skill_requires_three_fixtures() { + let dir = promo_tmpdir("few-fixtures"); + write_quarantined_skill(&dir); + let fixtures = vec![AutoSkillFixtureResult { passed: true }]; + let err = promote_auto_skill(&dir, &fixtures, 1000, 1000, true).expect_err("should fail"); + assert!(matches!( + err, + AutoSkillPromotionError::NotEnoughFixtures { have: 1, need: 3 } + )); + let _ = std::fs::remove_dir_all(dir); + } + + #[test] + fn promote_auto_skill_fails_when_any_fixture_fails() { + let dir = promo_tmpdir("fixture-fail"); + write_quarantined_skill(&dir); + let fixtures = vec![ + AutoSkillFixtureResult { passed: true }, + AutoSkillFixtureResult { passed: false }, + AutoSkillFixtureResult { passed: true }, + ]; + let err = promote_auto_skill(&dir, &fixtures, 1000, 1000, true).expect_err("should fail"); + assert!(matches!( + err, + AutoSkillPromotionError::FixtureFailed { index: 1 } + )); + let _ = std::fs::remove_dir_all(dir); + } + + #[test] + fn promote_auto_skill_requires_human_approval() { + let dir = promo_tmpdir("no-approval"); + write_quarantined_skill(&dir); + let fixtures = vec![AutoSkillFixtureResult { passed: true }; 3]; + let err = promote_auto_skill(&dir, &fixtures, 1000, 1000, false).expect_err("should fail"); + assert!(matches!(err, AutoSkillPromotionError::HumanApprovalMissing)); + let _ = std::fs::remove_dir_all(dir); + } + + #[test] + fn promote_auto_skill_enforces_10pct_token_budget() { + let dir = promo_tmpdir("token-regress"); + write_quarantined_skill(&dir); + let fixtures = vec![AutoSkillFixtureResult { passed: true }; 3]; + let err = promote_auto_skill(&dir, &fixtures, 1000, 1200, true).expect_err("should fail"); + match err { + AutoSkillPromotionError::TokenRegressionExceeded { + delta_pct, + limit_pct, + } => { + assert_eq!(delta_pct, 20); + assert_eq!(limit_pct, AUTO_SKILL_MAX_TOKEN_REGRESSION_PCT); + } + other => panic!("unexpected error: {other:?}"), + } + let _ = std::fs::remove_dir_all(dir); + } + + #[test] + fn promote_auto_skill_activates_meta_on_success() { + let dir = promo_tmpdir("promote-ok"); + write_quarantined_skill(&dir); + let fixtures = vec![AutoSkillFixtureResult { passed: true }; 3]; + let promoted = + promote_auto_skill(&dir, &fixtures, 1000, 1050, true).expect("should promote"); + let meta_raw = std::fs::read_to_string(promoted.join("meta.json")).expect("meta readable"); + let meta: serde_json::Value = serde_json::from_str(&meta_raw).expect("meta json"); + assert_eq!(meta["status"], "active"); + assert_eq!(meta["requires_human_approval"], false); + assert_eq!(meta["promotion_evidence"]["fixtures_passed"], 3); + let _ = std::fs::remove_dir_all(dir); + } + + #[test] + fn run_promote_auto_skill_cli_reads_fixtures_and_promotes() { + let dir = promo_tmpdir("cli-wrapper"); + write_quarantined_skill(&dir); + let fixtures_path = dir.join("fixtures.json"); + std::fs::write( + &fixtures_path, + serde_json::to_string(&json!([ + { "passed": true }, + { "passed": true }, + { "passed": true } + ])) + .unwrap(), + ) + .expect("fixtures.json"); + let args = PromoteAutoSkillArgs { + skill_dir: dir.clone(), + fixtures_json: fixtures_path, + baseline_tokens: 1_000, + current_tokens: 1_050, + approved: true, + }; + let promoted = run_promote_auto_skill_cli(&args).expect("cli wrapper should promote"); + assert_eq!(promoted, dir); + let meta: serde_json::Value = + serde_json::from_str(&std::fs::read_to_string(dir.join("meta.json")).expect("meta")) + .expect("meta json"); + assert_eq!(meta["status"], "active"); + let _ = std::fs::remove_dir_all(dir); + } + + #[test] + fn run_promote_auto_skill_cli_surfaces_fixture_failures() { + let dir = promo_tmpdir("cli-wrapper-fail"); + write_quarantined_skill(&dir); + let fixtures_path = dir.join("fixtures.json"); + std::fs::write( + &fixtures_path, + serde_json::to_string(&json!([ + { "passed": true }, + { "passed": false }, + { "passed": true } + ])) + .unwrap(), + ) + .expect("fixtures.json"); + let args = PromoteAutoSkillArgs { + skill_dir: dir.clone(), + fixtures_json: fixtures_path, + baseline_tokens: 1_000, + current_tokens: 1_000, + approved: true, + }; + let err = run_promote_auto_skill_cli(&args).expect_err("should fail"); + assert!(err.to_string().contains("fixture #1")); + let _ = std::fs::remove_dir_all(dir); + } + #[test] fn hook_abort_monitor_propagates_interrupt() { let abort_signal = HookAbortSignal::new(); diff --git a/rust/crates/rusty-claude-cli/tests/compact_output.rs b/rust/crates/rusty-claude-cli/tests/compact_output.rs index 456862fa1f..a9ffd00b38 100644 --- a/rust/crates/rusty-claude-cli/tests/compact_output.rs +++ b/rust/crates/rusty-claude-cli/tests/compact_output.rs @@ -141,11 +141,24 @@ fn run_claw( .env("CLAW_CONFIG_HOME", config_home) .env("HOME", home) .env("NO_COLOR", "1") - .env("PATH", "/usr/bin:/bin") .args(args); + configure_clean_process_env(&mut command, home); command.output().expect("claw should launch") } +fn configure_clean_process_env(command: &mut Command, home: &std::path::Path) { + if cfg!(windows) { + command.env("USERPROFILE", home); + for key in ["PATH", "SystemRoot", "ComSpec", "PATHEXT", "TEMP", "TMP"] { + if let Ok(value) = std::env::var(key) { + command.env(key, value); + } + } + } else { + command.env("PATH", "/usr/bin:/bin"); + } +} + fn unique_temp_dir(label: &str) -> PathBuf { let millis = SystemTime::now() .duration_since(UNIX_EPOCH) diff --git a/rust/crates/rusty-claude-cli/tests/mock_parity_harness.rs b/rust/crates/rusty-claude-cli/tests/mock_parity_harness.rs index 066abb686b..502e683d76 100644 --- a/rust/crates/rusty-claude-cli/tests/mock_parity_harness.rs +++ b/rust/crates/rusty-claude-cli/tests/mock_parity_harness.rs @@ -1,17 +1,64 @@ use std::collections::BTreeMap; use std::fs; use std::io::Write; -use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::process::{Command, Output, Stdio}; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{SystemTime, UNIX_EPOCH}; +#[cfg(unix)] +use std::os::unix::fs::PermissionsExt; + use mock_anthropic_service::{MockAnthropicService, SCENARIO_PREFIX}; use serde_json::{json, Value}; static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0); +#[cfg(unix)] +fn make_executable(path: &Path) { + let mut permissions = fs::metadata(path) + .expect("plugin script metadata") + .permissions(); + permissions.set_mode(0o755); + fs::set_permissions(path, permissions).expect("plugin script should be executable"); +} + +#[cfg(not(unix))] +fn make_executable(path: &Path) { + let _ = path; +} + +fn script_name(stem: &str) -> String { + if cfg!(windows) { + format!("{stem}.cmd") + } else { + format!("{stem}.sh") + } +} + +fn write_script(path: &Path, unix_body: &str, windows_body: &str) { + let body = if cfg!(windows) { + windows_body + } else { + unix_body + }; + fs::write(path, body).expect("script should write"); + make_executable(path); +} + +fn configure_clean_process_env(command: &mut Command, home: &Path) { + if cfg!(windows) { + command.env("USERPROFILE", home); + for key in ["PATH", "SystemRoot", "ComSpec", "PATHEXT", "TEMP", "TMP"] { + if let Ok(value) = std::env::var(key) { + command.env(key, value); + } + } + } else { + command.env("PATH", "/usr/bin:/bin"); + } +} + #[test] #[allow(clippy::too_many_lines)] fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios() { @@ -317,7 +364,6 @@ fn run_case(case: ScenarioCase, workspace: &HarnessWorkspace, base_url: &str) -> .env("CLAW_CONFIG_HOME", &workspace.config_home) .env("HOME", &workspace.home) .env("NO_COLOR", "1") - .env("PATH", "/usr/bin:/bin") .args([ "--model", "sonnet", @@ -325,6 +371,7 @@ fn run_case(case: ScenarioCase, workspace: &HarnessWorkspace, base_url: &str) -> case.permission_mode, "--output-format=json", ]); + configure_clean_process_env(&mut command, &workspace.home); if let Some(allowed_tools) = case.allowed_tools { command.args(["--allowedTools", allowed_tools]); @@ -420,41 +467,36 @@ fn prepare_plugin_fixture(workspace: &HarnessWorkspace) { fs::create_dir_all(&tool_dir).expect("plugin tools dir"); fs::create_dir_all(&manifest_dir).expect("plugin manifest dir"); - let script_path = tool_dir.join("echo-json.sh"); - fs::write( + let script_file = script_name("echo-json"); + let script_path = tool_dir.join(&script_file); + write_script( &script_path, "#!/bin/sh\nINPUT=$(cat)\nprintf '{\"plugin\":\"%s\",\"tool\":\"%s\",\"input\":%s}\\n' \"$CLAWD_PLUGIN_ID\" \"$CLAWD_TOOL_NAME\" \"$INPUT\"\n", - ) - .expect("plugin script should write"); - let mut permissions = fs::metadata(&script_path) - .expect("plugin script metadata") - .permissions(); - permissions.set_mode(0o755); - fs::set_permissions(&script_path, permissions).expect("plugin script should be executable"); + "@echo off\r\npowershell -NoProfile -Command \"$inputData = [Console]::In.ReadToEnd(); $payload = @{ plugin = $env:CLAWD_PLUGIN_ID; tool = $env:CLAWD_TOOL_NAME; input = ($inputData | ConvertFrom-Json) } | ConvertTo-Json -Compress -Depth 10; [Console]::Out.WriteLine($payload)\"\r\n", + ); fs::write( manifest_dir.join("plugin.json"), - r#"{ - "name": "parity-plugin", - "version": "1.0.0", - "description": "mock parity plugin", - "tools": [ - { - "name": "plugin_echo", - "description": "Echo JSON input", - "inputSchema": { - "type": "object", - "properties": { - "message": { "type": "string" } - }, - "required": ["message"], - "additionalProperties": false - }, - "command": "./tools/echo-json.sh", - "requiredPermission": "workspace-write" - } - ] -}"#, + serde_json::to_string_pretty(&json!({ + "name": "parity-plugin", + "version": "1.0.0", + "description": "mock parity plugin", + "tools": [{ + "name": "plugin_echo", + "description": "Echo JSON input", + "inputSchema": { + "type": "object", + "properties": { + "message": { "type": "string" } + }, + "required": ["message"], + "additionalProperties": false + }, + "command": format!("./tools/{script_file}"), + "requiredPermission": "workspace-write" + }] + })) + .expect("plugin manifest should serialize"), ) .expect("plugin manifest should write"); @@ -483,7 +525,7 @@ fn assert_streaming_text(_: &HarnessWorkspace, run: &ScenarioRun) { assert_eq!(run.response["tool_results"], Value::Array(Vec::new())); } -fn assert_read_file_roundtrip(workspace: &HarnessWorkspace, run: &ScenarioRun) { +fn assert_read_file_roundtrip(_workspace: &HarnessWorkspace, run: &ScenarioRun) { assert_eq!(run.response["iterations"], Value::from(2)); assert_eq!( run.response["tool_uses"][0]["name"], @@ -500,7 +542,10 @@ fn assert_read_file_roundtrip(workspace: &HarnessWorkspace, run: &ScenarioRun) { let output = run.response["tool_results"][0]["output"] .as_str() .expect("tool output"); - assert!(output.contains(&workspace.root.join("fixture.txt").display().to_string())); + assert!( + output.contains("fixture.txt"), + "expected read_file output to mention fixture.txt, got: {output}" + ); assert!(output.contains("alpha parity line")); } @@ -535,7 +580,7 @@ fn assert_write_file_allowed(workspace: &HarnessWorkspace, run: &ScenarioRun) { assert!(run.response["message"] .as_str() .expect("message text") - .contains("generated/output.txt")); + .contains("output.txt")); let generated = workspace.root.join("generated").join("output.txt"); let contents = fs::read_to_string(&generated).expect("generated file should exist"); assert_eq!(contents, "created by mock service\n"); diff --git a/rust/crates/rusty-claude-cli/tests/mock_quality_harness.rs b/rust/crates/rusty-claude-cli/tests/mock_quality_harness.rs new file mode 100644 index 0000000000..11d1deb36f --- /dev/null +++ b/rust/crates/rusty-claude-cli/tests/mock_quality_harness.rs @@ -0,0 +1,1025 @@ +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::{Command, Output}; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{SystemTime, UNIX_EPOCH}; + +#[cfg(unix)] +use std::os::unix::fs::PermissionsExt; + +use mock_anthropic_service::{MockAnthropicService, SCENARIO_PREFIX}; +use serde_json::{json, Value}; + +static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0); + +#[cfg(unix)] +fn make_executable(path: &Path) { + let mut permissions = fs::metadata(path).expect("script metadata").permissions(); + permissions.set_mode(0o755); + fs::set_permissions(path, permissions).expect("script should be executable"); +} + +#[cfg(not(unix))] +fn make_executable(path: &Path) { + let _ = path; +} + +fn program_name(stem: &str) -> String { + if cfg!(windows) { + format!("{stem}.cmd") + } else { + stem.to_string() + } +} + +fn write_program_stub(dir: &Path, stem: &str, unix_body: &str, windows_ps1: &str) { + if cfg!(windows) { + let ps1_path = dir.join(format!("{stem}.ps1")); + fs::write(&ps1_path, windows_ps1).expect("powershell stub should write"); + let wrapper = format!( + "@echo off\r\npowershell -NoProfile -ExecutionPolicy Bypass -File \"%~dp0\\{stem}.ps1\" %*\r\n" + ); + let cmd_path = dir.join(program_name(stem)); + fs::write(&cmd_path, wrapper).expect("cmd wrapper should write"); + } else { + let script_path = dir.join(program_name(stem)); + fs::write(&script_path, unix_body).expect("shell stub should write"); + make_executable(&script_path); + } +} + +fn configure_quality_process_env(command: &mut Command, workspace: &HarnessWorkspace) { + if cfg!(windows) { + command.env("USERPROFILE", &workspace.home); + for key in [ + "SystemRoot", + "ComSpec", + "PATHEXT", + "TEMP", + "TMP", + "CARGO_HOME", + "RUSTUP_HOME", + "VCINSTALLDIR", + "VCToolsInstallDir", + "VCToolsVersion", + "VSINSTALLDIR", + "VisualStudioVersion", + "WindowsSdkDir", + "WindowsSDKVersion", + "UniversalCRTSdkDir", + "UCRTVersion", + "INCLUDE", + "LIB", + "LIBPATH", + ] { + if let Ok(value) = std::env::var(key) { + command.env(key, value); + } + } + let mut paths = vec![workspace.bin.display().to_string()]; + if let Ok(path) = std::env::var("PATH") { + paths.push(path); + } + command.env("PATH", paths.join(";")); + } else { + let path = std::env::var("PATH").unwrap_or_else(|_| "/usr/bin:/bin".to_string()); + command.env("PATH", format!("{}:{path}", workspace.bin.display())); + for key in ["CARGO_HOME", "RUSTUP_HOME"] { + if let Ok(value) = std::env::var(key) { + command.env(key, value); + } + } + } +} + +fn run_quality_case(case: ScenarioCase) { + let runtime = tokio::runtime::Runtime::new().expect("tokio runtime should build"); + let server = runtime + .block_on(MockAnthropicService::spawn()) + .expect("mock service should start"); + let base_url = server.base_url(); + let workspace = HarnessWorkspace::new(unique_temp_dir(case.name)); + workspace.create().expect("workspace should exist"); + (case.prepare)(&workspace); + + let run = match run_case(case, &workspace, &base_url) { + Ok(run) => run, + Err(output) => { + let captured = runtime.block_on(server.captured_requests()); + let messages_only = captured + .iter() + .filter(|request| request.path == "/v1/messages") + .collect::>(); + let last_request = messages_only + .last() + .map_or("", |request| { + request.raw_body.as_str() + }); + panic!( + "case {} failed\nmessages requests: {}\nstdout:\n{}\n\nstderr:\n{}\n\nlast request body:\n{}", + case.name, + messages_only.len(), + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr), + last_request + ); + } + }; + (case.assert)(&workspace, &run); + + fs::remove_dir_all(&workspace.root).expect("workspace cleanup should succeed"); +} + +#[test] +fn quality_harness_rust_red_green() { + run_quality_case(ScenarioCase { + name: "rust_red_green", + prepare: prepare_rust_red_green_fixture, + assert: assert_rust_red_green, + }); +} + +#[test] +fn quality_harness_node_red_green() { + run_quality_case(ScenarioCase { + name: "node_red_green", + prepare: prepare_node_red_green_fixture, + assert: assert_node_red_green, + }); +} + +#[test] +fn quality_harness_python_red_green() { + run_quality_case(ScenarioCase { + name: "python_red_green", + prepare: prepare_python_red_green_fixture, + assert: assert_python_red_green, + }); +} + +#[test] +fn quality_harness_rust_config_failure() { + run_quality_case(ScenarioCase { + name: "rust_config_failure", + prepare: prepare_rust_config_failure_fixture, + assert: assert_rust_config_failure, + }); +} + +#[test] +fn quality_harness_node_tool_unavailable() { + run_quality_case(ScenarioCase { + name: "node_tool_unavailable", + prepare: prepare_node_tool_unavailable_fixture, + assert: assert_node_tool_unavailable, + }); +} + +#[test] +fn quality_harness_python_timeout() { + run_quality_case(ScenarioCase { + name: "python_timeout", + prepare: prepare_python_timeout_fixture, + assert: assert_python_timeout, + }); +} + +#[test] +fn quality_harness_rust_final_gate_retry() { + run_quality_case(ScenarioCase { + name: "rust_final_gate_retry", + prepare: prepare_rust_final_gate_retry_fixture, + assert: assert_rust_final_gate_retry, + }); +} + +#[derive(Clone, Copy)] +struct ScenarioCase { + name: &'static str, + prepare: fn(&HarnessWorkspace), + assert: fn(&HarnessWorkspace, &ScenarioRun), +} + +struct HarnessWorkspace { + root: PathBuf, + config_home: PathBuf, + home: PathBuf, + bin: PathBuf, +} + +impl HarnessWorkspace { + fn new(root: PathBuf) -> Self { + Self { + config_home: root.join("config-home"), + home: root.join("home"), + bin: root.join("bin"), + root, + } + } + + fn create(&self) -> std::io::Result<()> { + fs::create_dir_all(&self.root)?; + fs::create_dir_all(&self.config_home)?; + fs::create_dir_all(&self.home)?; + fs::create_dir_all(&self.bin)?; + Ok(()) + } +} + +struct ScenarioRun { + response: Value, +} + +fn run_case( + case: ScenarioCase, + workspace: &HarnessWorkspace, + base_url: &str, +) -> Result { + let mut command = Command::new(env!("CARGO_BIN_EXE_claw")); + command + .current_dir(&workspace.root) + .env_clear() + .env("ANTHROPIC_API_KEY", "test-quality-key") + .env("ANTHROPIC_BASE_URL", base_url) + .env("CLAW_CONFIG_HOME", &workspace.config_home) + .env("HOME", &workspace.home) + .env("NO_COLOR", "1") + .args([ + "--model", + "sonnet", + "--permission-mode", + "workspace-write", + "--allowedTools", + "write_file", + "--output-format=json", + ]); + configure_quality_process_env(&mut command, workspace); + + let prompt = format!("{SCENARIO_PREFIX}{}", case.name); + command.arg(prompt); + + let output = command.output().expect("claw should launch"); + if !output.status.success() { + return Err(output); + } + Ok(ScenarioRun { + response: parse_json_output(&String::from_utf8_lossy(&output.stdout)), + }) +} + +fn write_verifier_settings(workspace: &HarnessWorkspace, settings: &Value) { + fs::write( + workspace.config_home.join("settings.json"), + settings.to_string(), + ) + .expect("settings should write"); +} + +fn rust_verifier_settings(final_gate: bool) -> Value { + json!({ + "verifier": { + "enabled": true, + "mode": "staged", + "finalGate": final_gate, + "cargo": { + "check": true, + "clippy": false, + "fmt": true, + "test": false, + "timeoutSecs": 60 + }, + "node": { "enabled": false }, + "python": { "enabled": false } + } + }) +} + +fn node_verifier_settings(final_gate: bool) -> Value { + json!({ + "verifier": { + "enabled": true, + "mode": "staged", + "finalGate": final_gate, + "cargo": { + "check": false, + "clippy": false, + "fmt": false, + "test": false, + "timeoutSecs": 5 + }, + "node": { + "enabled": true, + "timeoutSecs": 10 + }, + "python": { "enabled": false } + } + }) +} + +fn python_verifier_settings(final_gate: bool, timeout_secs: u64) -> Value { + json!({ + "verifier": { + "enabled": true, + "mode": "staged", + "finalGate": final_gate, + "cargo": { + "check": false, + "clippy": false, + "fmt": false, + "test": false, + "timeoutSecs": 5 + }, + "node": { "enabled": false }, + "python": { + "enabled": true, + "timeoutSecs": timeout_secs + } + } + }) +} + +fn prepare_rust_workspace(workspace: &HarnessWorkspace) { + fs::create_dir_all(workspace.root.join("crates").join("app").join("src")) + .expect("rust src dir should exist"); + fs::write( + workspace.root.join("Cargo.toml"), + "[workspace]\nmembers = [\"crates/app\"]\nresolver = \"2\"\n", + ) + .expect("workspace cargo manifest should write"); + fs::write( + workspace.root.join("crates").join("app").join("Cargo.toml"), + "[package]\nname = \"demo_app\"\nversion = \"0.1.0\"\nedition = \"2021\"\n", + ) + .expect("package cargo manifest should write"); + fs::write( + workspace + .root + .join("crates") + .join("app") + .join("src") + .join("lib.rs"), + "pub fn answer() -> usize {\n 0\n}\n", + ) + .expect("rust source should write"); +} + +fn prepare_rust_red_green_fixture(workspace: &HarnessWorkspace) { + prepare_rust_workspace(workspace); + write_verifier_settings(workspace, &rust_verifier_settings(true)); +} + +fn prepare_rust_final_gate_retry_fixture(workspace: &HarnessWorkspace) { + prepare_rust_workspace(workspace); + write_verifier_settings(workspace, &rust_verifier_settings(true)); +} + +fn prepare_rust_config_failure_fixture(workspace: &HarnessWorkspace) { + fs::create_dir_all(workspace.root.join("src")).expect("rust src dir should exist"); + fs::write( + workspace.root.join("Cargo.toml"), + "[package]\nname = \"config_demo\"\nversion = \"0.1.0\"\nedition = \"2021\"\n", + ) + .expect("cargo manifest should write"); + fs::write( + workspace.root.join("src").join("lib.rs"), + "pub fn answer() -> usize {\n 0\n}\n", + ) + .expect("rust source should write"); + write_verifier_settings(workspace, &rust_verifier_settings(false)); +} + +fn node_stub_unix() -> &'static str { + r#"#!/bin/sh +set -eu +if [ "$#" -lt 2 ] || [ "$1" != "run" ]; then + echo "unsupported npm invocation: $*" >&2 + exit 2 +fi +shift +if [ "$1" = "--silent" ]; then + shift +fi +script="$1" +content="" +if [ -f "src/index.ts" ]; then + content="$(cat "src/index.ts")" +fi +case "$content" in + *TOOL_UNAVAILABLE_SENTINEL*) + echo "npm not found" >&2 + exit 1 + ;; +esac +case "$script" in + typecheck) + case "$content" in + *BROKEN_TYPECHECK*) + echo "src/index.ts(1,14): error TS2322: Type 'number' is not assignable to type 'string'." >&2 + exit 1 + ;; + *) + exit 0 + ;; + esac + ;; + lint|test) + exit 0 + ;; + *) + echo "unsupported npm script: $script" >&2 + exit 2 + ;; +esac +"# +} + +fn node_stub_windows() -> &'static str { + r#"$arguments = @($args) +if ($arguments.Length -lt 2 -or $arguments[0] -ne 'run') { + [Console]::Error.WriteLine("unsupported npm invocation: $($arguments -join ' ')") + exit 2 +} +$index = 1 +if ($arguments.Length -gt 2 -and $arguments[1] -eq '--silent') { + $index = 2 +} +$script = $arguments[$index] +$sourcePath = Join-Path (Get-Location) 'src/index.ts' +$content = if (Test-Path $sourcePath) { Get-Content -LiteralPath $sourcePath -Raw } else { '' } +if ($content -match 'TOOL_UNAVAILABLE_SENTINEL') { + [Console]::Error.WriteLine('npm not found') + exit 1 +} +switch ($script) { + 'typecheck' { + if ($content -match 'BROKEN_TYPECHECK') { + [Console]::Error.WriteLine("src/index.ts(1,14): error TS2322: Type 'number' is not assignable to type 'string'.") + exit 1 + } + exit 0 + } + 'lint' { exit 0 } + 'test' { exit 0 } + default { + [Console]::Error.WriteLine("unsupported npm script: $script") + exit 2 + } +} +"# +} + +fn prepare_node_workspace(workspace: &HarnessWorkspace) { + let package_root = workspace.root.join("packages").join("web"); + fs::create_dir_all(package_root.join("src")).expect("node src dir should exist"); + fs::write( + package_root.join("package.json"), + json!({ + "name": "web-app", + "version": "1.0.0", + "scripts": { + "typecheck": "tsc --noEmit", + "lint": "eslint .", + "test": "vitest run" + } + }) + .to_string(), + ) + .expect("package.json should write"); + fs::write( + package_root.join("src").join("index.ts"), + "export const message = 'seed';\n", + ) + .expect("node source should write"); + write_program_stub(&workspace.bin, "npm", node_stub_unix(), node_stub_windows()); +} + +fn prepare_node_red_green_fixture(workspace: &HarnessWorkspace) { + prepare_node_workspace(workspace); + write_verifier_settings(workspace, &node_verifier_settings(true)); +} + +fn prepare_node_tool_unavailable_fixture(workspace: &HarnessWorkspace) { + prepare_node_workspace(workspace); + write_verifier_settings(workspace, &node_verifier_settings(false)); +} + +fn python_stub_unix() -> &'static str { + r#"#!/bin/sh +set -eu +if [ "$#" -lt 2 ] || [ "$1" != "-m" ]; then + echo "unsupported python invocation: $*" >&2 + exit 2 +fi +module="$2" +content="" +if [ -f "app/main.py" ]; then + content="$(cat "app/main.py")" +fi +case "$content" in + *TIMEOUT_SENTINEL*) + sleep 3 + exit 0 + ;; +esac +case "$module" in + py_compile) + case "$content" in + *BROKEN_PY_COMPILE*) + echo "SyntaxError: invalid syntax" >&2 + exit 1 + ;; + *) + exit 0 + ;; + esac + ;; + pytest) + exit 0 + ;; + *) + echo "unsupported python module: $module" >&2 + exit 2 + ;; +esac +"# +} + +fn python_stub_windows() -> &'static str { + r#"$arguments = @($args) +if ($arguments.Length -lt 2 -or $arguments[0] -ne '-m') { + [Console]::Error.WriteLine("unsupported python invocation: $($arguments -join ' ')") + exit 2 +} +$module = $arguments[1] +$sourcePath = Join-Path (Get-Location) 'app/main.py' +$content = if (Test-Path $sourcePath) { Get-Content -LiteralPath $sourcePath -Raw } else { '' } +if ($content -match 'TIMEOUT_SENTINEL') { + Start-Sleep -Seconds 3 + exit 0 +} +switch ($module) { + 'py_compile' { + if ($content -match 'BROKEN_PY_COMPILE') { + [Console]::Error.WriteLine('SyntaxError: invalid syntax') + exit 1 + } + exit 0 + } + 'pytest' { exit 0 } + default { + [Console]::Error.WriteLine("unsupported python module: $module") + exit 2 + } +} +"# +} + +fn prepare_python_workspace(workspace: &HarnessWorkspace) { + let project_root = workspace.root.join("services").join("api"); + fs::create_dir_all(project_root.join("app")).expect("python app dir should exist"); + fs::create_dir_all(project_root.join("tests")).expect("python tests dir should exist"); + fs::write( + project_root.join("pyproject.toml"), + "[project]\nname = \"quality-api\"\nversion = \"0.1.0\"\n", + ) + .expect("pyproject should write"); + fs::write( + project_root.join("app").join("main.py"), + "def meaning() -> int:\n return 1\n", + ) + .expect("python source should write"); + fs::write( + project_root.join("tests").join("test_smoke.py"), + "def test_smoke():\n assert True\n", + ) + .expect("python test should write"); + write_program_stub( + &workspace.bin, + "python", + python_stub_unix(), + python_stub_windows(), + ); +} + +fn prepare_python_red_green_fixture(workspace: &HarnessWorkspace) { + prepare_python_workspace(workspace); + write_verifier_settings(workspace, &python_verifier_settings(true, 10)); +} + +fn prepare_python_timeout_fixture(workspace: &HarnessWorkspace) { + prepare_python_workspace(workspace); + write_verifier_settings(workspace, &python_verifier_settings(false, 1)); +} + +fn verification_reports(response: &Value) -> &[Value] { + response["verification_reports"] + .as_array() + .expect("verification reports array") +} + +fn report_steps(report: &Value) -> &[Value] { + report["steps"].as_array().expect("report steps array") +} + +fn find_report<'a>(response: &'a Value, phase: &str, status: &str, adapter: &str) -> &'a Value { + verification_reports(response) + .iter() + .find(|report| { + report["phase"] == phase + && report["status"] == status + && report["adapter_id"] == adapter + }) + .unwrap_or_else(|| { + panic!( + "missing report phase={phase} status={status} adapter={adapter}: {}", + serde_json::to_string_pretty(response).expect("response should serialize") + ) + }) +} + +fn has_report_matching(response: &Value, predicate: F) -> bool +where + F: Fn(&Value) -> bool, +{ + verification_reports(response).iter().any(predicate) +} + +fn path_value_ends_with(value: &Value, suffix: &Path) -> bool { + value + .as_str() + .map(PathBuf::from) + .is_some_and(|path| path.ends_with(suffix)) +} + +fn assert_rust_red_green(workspace: &HarnessWorkspace, run: &ScenarioRun) { + assert_eq!(run.response["iterations"], Value::from(3)); + assert_eq!(run.response["tool_uses"].as_array().map(Vec::len), Some(2)); + assert_eq!( + run.response["tool_results"].as_array().map(Vec::len), + Some(2) + ); + assert_eq!( + run.response["verification_gate"]["attempted"], + Value::Bool(true) + ); + assert_eq!( + run.response["verification_gate"]["passed"], + Value::Bool(true) + ); + + let quick_failed = find_report(&run.response, "quick", "failed", "rust-cargo"); + assert_eq!( + quick_failed["primary_failure_kind"], + Value::String("code".to_string()) + ); + assert!(path_value_ends_with( + &quick_failed["project_root"], + Path::new("crates").join("app").as_path() + )); + let quick_step = &report_steps(quick_failed)[0]; + assert!(quick_step["command"] + .as_str() + .is_some_and(|command| command.contains("cargo check") && command.contains("-p demo_app"))); + assert_eq!( + quick_step["step_kind"], + Value::String("cargo_check".to_string()) + ); + assert_eq!( + quick_step["target_scope"], + Value::String("package".to_string()) + ); + assert_eq!( + quick_step["package_name"], + Value::String("demo_app".to_string()) + ); + + let final_passed = find_report(&run.response, "final", "passed", "rust-cargo"); + let final_step = report_steps(final_passed) + .iter() + .find(|step| step["step_kind"] == Value::String("cargo_fmt_check".to_string())) + .expect("cargo fmt step should exist"); + assert_eq!( + final_step["target_scope"], + Value::String("workspace".to_string()) + ); + assert_eq!( + final_step["package_name"], + Value::String("demo_app".to_string()) + ); + assert!(fs::read_to_string( + workspace + .root + .join("crates") + .join("app") + .join("src") + .join("lib.rs") + ) + .expect("rust source should exist") + .contains("42")); + assert!(run.response["message"] + .as_str() + .expect("message text") + .contains("rust quality red-green complete")); +} + +fn assert_node_red_green(workspace: &HarnessWorkspace, run: &ScenarioRun) { + assert_eq!(run.response["iterations"], Value::from(3)); + assert_eq!(run.response["tool_uses"].as_array().map(Vec::len), Some(2)); + assert_eq!( + run.response["verification_gate"]["attempted"], + Value::Bool(true) + ); + assert_eq!( + run.response["verification_gate"]["passed"], + Value::Bool(true) + ); + + let quick_failed = find_report(&run.response, "quick", "failed", "node-typescript"); + assert_eq!( + quick_failed["primary_failure_kind"], + Value::String("code".to_string()) + ); + assert!(path_value_ends_with( + &quick_failed["project_root"], + Path::new("packages").join("web").as_path() + )); + let quick_step = &report_steps(quick_failed)[0]; + assert!(quick_step["command"] + .as_str() + .is_some_and(|command| command.contains("npm run --silent typecheck"))); + assert_eq!( + quick_step["step_kind"], + Value::String("typecheck".to_string()) + ); + assert_eq!( + quick_step["target_scope"], + Value::String("package".to_string()) + ); + assert_eq!( + quick_step["package_manager"], + Value::String("npm".to_string()) + ); + assert_eq!( + quick_step["package_name"], + Value::String("web-app".to_string()) + ); + + let final_passed = find_report(&run.response, "final", "passed", "node-typescript"); + assert!(report_steps(final_passed) + .iter() + .any(|step| step["step_kind"] == Value::String("lint".to_string()))); + assert!(report_steps(final_passed) + .iter() + .any(|step| step["step_kind"] == Value::String("test".to_string()))); + assert!(fs::read_to_string( + workspace + .root + .join("packages") + .join("web") + .join("src") + .join("index.ts") + ) + .expect("node source should exist") + .contains("\"ok\"")); + assert!(run.response["message"] + .as_str() + .expect("message text") + .contains("node quality red-green complete")); +} + +fn assert_python_red_green(workspace: &HarnessWorkspace, run: &ScenarioRun) { + assert_eq!(run.response["iterations"], Value::from(3)); + assert_eq!(run.response["tool_uses"].as_array().map(Vec::len), Some(2)); + assert_eq!( + run.response["verification_gate"]["attempted"], + Value::Bool(true) + ); + assert_eq!( + run.response["verification_gate"]["passed"], + Value::Bool(true) + ); + + let quick_failed = find_report(&run.response, "quick", "failed", "python"); + assert_eq!( + quick_failed["primary_failure_kind"], + Value::String("code".to_string()) + ); + assert!(path_value_ends_with( + &quick_failed["project_root"], + Path::new("services").join("api").as_path() + )); + let quick_step = &report_steps(quick_failed)[0]; + assert_eq!( + quick_step["step_kind"], + Value::String("py_compile".to_string()) + ); + assert_eq!( + quick_step["target_scope"], + Value::String("file_set".to_string()) + ); + assert_eq!( + quick_step["launcher_kind"], + Value::String("global".to_string()) + ); + + let final_passed = find_report(&run.response, "final", "passed", "python"); + assert!(report_steps(final_passed) + .iter() + .any(|step| step["step_kind"] == Value::String("pytest".to_string()))); + assert!(fs::read_to_string( + workspace + .root + .join("services") + .join("api") + .join("app") + .join("main.py") + ) + .expect("python source should exist") + .contains("return 42")); + assert!(run.response["message"] + .as_str() + .expect("message text") + .contains("python quality red-green complete")); +} + +fn assert_rust_config_failure(_: &HarnessWorkspace, run: &ScenarioRun) { + assert_eq!(run.response["iterations"], Value::from(2)); + assert_eq!(run.response["tool_uses"].as_array().map(Vec::len), Some(1)); + assert_eq!( + run.response["verification_gate"]["attempted"], + Value::Bool(false) + ); + assert_eq!( + run.response["verification_gate"]["passed"], + Value::Bool(true) + ); + + let quick_failed = find_report(&run.response, "quick", "failed", "rust-cargo"); + assert_eq!( + quick_failed["primary_failure_kind"], + Value::String("config".to_string()) + ); + let quick_step = &report_steps(quick_failed)[0]; + assert_eq!( + quick_step["failure_kind"], + Value::String("config".to_string()) + ); + assert!(run.response["message"] + .as_str() + .expect("message text") + .contains("rust config failure captured")); +} + +fn assert_node_tool_unavailable(_: &HarnessWorkspace, run: &ScenarioRun) { + assert_eq!(run.response["iterations"], Value::from(2)); + assert_eq!( + run.response["verification_gate"]["attempted"], + Value::Bool(false) + ); + let quick_failed = find_report(&run.response, "quick", "failed", "node-typescript"); + assert_eq!( + quick_failed["primary_failure_kind"], + Value::String("tool_unavailable".to_string()) + ); + let quick_step = &report_steps(quick_failed)[0]; + assert_eq!( + quick_step["failure_kind"], + Value::String("tool_unavailable".to_string()) + ); + assert_eq!( + quick_step["step_kind"], + Value::String("typecheck".to_string()) + ); + assert_eq!( + quick_step["package_manager"], + Value::String("npm".to_string()) + ); + assert!(run.response["message"] + .as_str() + .expect("message text") + .contains("node tool unavailable captured")); +} + +fn assert_python_timeout(_: &HarnessWorkspace, run: &ScenarioRun) { + assert_eq!(run.response["iterations"], Value::from(2)); + assert_eq!( + run.response["verification_gate"]["attempted"], + Value::Bool(false) + ); + let quick_failed = find_report(&run.response, "quick", "failed", "python"); + assert_eq!( + quick_failed["primary_failure_kind"], + Value::String("timeout".to_string()) + ); + let quick_step = &report_steps(quick_failed)[0]; + assert_eq!( + quick_step["failure_kind"], + Value::String("timeout".to_string()) + ); + assert_eq!( + quick_step["step_kind"], + Value::String("py_compile".to_string()) + ); + assert_eq!( + quick_step["launcher_kind"], + Value::String("global".to_string()) + ); + assert!(run.response["message"] + .as_str() + .expect("message text") + .contains("python timeout captured")); +} + +fn assert_rust_final_gate_retry(workspace: &HarnessWorkspace, run: &ScenarioRun) { + assert!( + run.response["iterations"] + .as_u64() + .is_some_and(|iterations| iterations >= 5), + "expected retry flow to require at least five iterations: {}", + serde_json::to_string_pretty(&run.response).expect("response should serialize") + ); + assert_eq!(run.response["tool_uses"].as_array().map(Vec::len), Some(2)); + assert_eq!( + run.response["verification_gate"]["attempted"], + Value::Bool(true) + ); + assert_eq!( + run.response["verification_gate"]["passed"], + Value::Bool(true) + ); + + assert!(has_report_matching(&run.response, |report| { + report["phase"] == "final" + && report["status"] == "failed" + && report["adapter_id"] == "rust-cargo" + && !report_steps(report).is_empty() + })); + assert!(has_report_matching(&run.response, |report| { + report["phase"] == "final" + && report["adapter_id"] == "rust-cargo" + && report_steps(report).is_empty() + && report["summary_text"] + .as_str() + .is_some_and(|summary| summary.contains("still failing")) + })); + assert!(has_report_matching(&run.response, |report| { + report["phase"] == "final" + && report["status"] == "passed" + && report["adapter_id"] == "rust-cargo" + })); + + let final_failed = find_report(&run.response, "final", "failed", "rust-cargo"); + let failing_fmt_step = report_steps(final_failed) + .iter() + .find(|step| step["step_kind"] == Value::String("cargo_fmt_check".to_string())) + .expect("failing cargo fmt step should exist"); + assert_eq!( + failing_fmt_step["target_scope"], + Value::String("workspace".to_string()) + ); + assert_eq!( + failing_fmt_step["package_name"], + Value::String("demo_app".to_string()) + ); + assert!(fs::read_to_string( + workspace + .root + .join("crates") + .join("app") + .join("src") + .join("lib.rs") + ) + .expect("rust source should exist") + .contains("pub fn answer() -> usize")); + assert!(run.response["message"] + .as_str() + .expect("message text") + .contains("rust final gate retry complete")); +} + +fn parse_json_output(stdout: &str) -> Value { + stdout + .lines() + .rev() + .find_map(|line| { + let trimmed = line.trim(); + if trimmed.starts_with('{') && trimmed.ends_with('}') { + serde_json::from_str(trimmed).ok() + } else { + None + } + }) + .unwrap_or_else(|| panic!("no JSON response line found in stdout:\n{stdout}")) +} + +fn unique_temp_dir(label: &str) -> PathBuf { + let millis = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("clock should be after epoch") + .as_millis(); + let counter = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed); + std::env::temp_dir().join(format!( + "claw-mock-quality-{label}-{}-{millis}-{counter}", + std::process::id() + )) +} diff --git a/rust/crates/tools/src/lane_completion.rs b/rust/crates/tools/src/lane_completion.rs index e4eecce7df..8413e08c73 100644 --- a/rust/crates/tools/src/lane_completion.rs +++ b/rust/crates/tools/src/lane_completion.rs @@ -109,6 +109,7 @@ mod tests { completed_at: Some("2024-01-01T00:00:00Z".to_string()), lane_events: vec![], derived_state: "working".to_string(), + subagent_depth: 0, current_blocker: None, error: None, } diff --git a/rust/crates/tools/src/lib.rs b/rust/crates/tools/src/lib.rs index 5cb2f1e5c5..575a45706a 100644 --- a/rust/crates/tools/src/lib.rs +++ b/rust/crates/tools/src/lib.rs @@ -1,3 +1,4 @@ +use std::cell::Cell; use std::collections::{BTreeMap, BTreeSet}; use std::path::{Path, PathBuf}; use std::process::Command; @@ -1842,66 +1843,56 @@ fn from_value Deserialize<'de>>(input: &Value) -> Result /// ROADMAP #50: Read-only commands targeting CWD paths get `WorkspaceWrite`, /// all others remain `DangerFullAccess`. fn classify_bash_permission(command: &str) -> PermissionMode { - // Read-only commands that are safe when targeting workspace paths - const READ_ONLY_COMMANDS: &[&str] = &[ - "cat", "head", "tail", "less", "more", "ls", "ll", "dir", "find", "test", "[", "[[", - "grep", "rg", "awk", "sed", "file", "stat", "readlink", "wc", "sort", "uniq", "cut", "tr", - "pwd", "echo", "printf", - ]; - - // Get the base command (first word before any args or pipes) - let base_cmd = command.split_whitespace().next().unwrap_or(""); - let base_cmd = base_cmd.split('|').next().unwrap_or("").trim(); - let base_cmd = base_cmd.split(';').next().unwrap_or("").trim(); - let base_cmd = base_cmd.split('>').next().unwrap_or("").trim(); - let base_cmd = base_cmd.split('<').next().unwrap_or("").trim(); - - // Check if it's a read-only command - let cmd_name = base_cmd.split('/').next_back().unwrap_or(base_cmd); - let is_read_only = READ_ONLY_COMMANDS.contains(&cmd_name); + let intent = runtime::bash_validation::classify_command(command); + if intent != runtime::bash_validation::CommandIntent::ReadOnly { + return PermissionMode::DangerFullAccess; + } - if !is_read_only { + if !matches!( + runtime::bash_validation::validate_read_only(command, PermissionMode::ReadOnly), + runtime::bash_validation::ValidationResult::Allow + ) { return PermissionMode::DangerFullAccess; } - // Check if any path argument is outside workspace - // Simple heuristic: check for absolute paths not starting with CWD - if has_dangerous_paths(command) { + let workspace = std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")); + if !matches!( + runtime::bash_validation::validate_paths(command, &workspace), + runtime::bash_validation::ValidationResult::Allow + ) { + return PermissionMode::DangerFullAccess; + } + if tool_targets_outside_workspace(command, &workspace) { return PermissionMode::DangerFullAccess; } PermissionMode::WorkspaceWrite } -/// Check if command has dangerous paths (outside workspace). -fn has_dangerous_paths(command: &str) -> bool { - // Look for absolute paths - let tokens: Vec<&str> = command.split_whitespace().collect(); - - for token in tokens { - // Skip flags/options +fn tool_targets_outside_workspace(command: &str, workspace: &Path) -> bool { + let home = std::env::var("HOME") + .or_else(|_| std::env::var("USERPROFILE")) + .unwrap_or_default(); + for token in command.split_whitespace() { if token.starts_with('-') { continue; } - - // Check for absolute paths - if token.starts_with('/') || token.starts_with("~/") { - // Check if it's within CWD - let path = - PathBuf::from(token.replace('~', &std::env::var("HOME").unwrap_or_default())); - if let Ok(cwd) = std::env::current_dir() { - if !path.starts_with(&cwd) { - return true; // Path outside workspace - } - } + if token.contains("../..") || (token.starts_with("../") && !token.starts_with("./")) { + return true; } - // Check for parent directory traversal that escapes workspace - if token.contains("../..") || token.starts_with("../") && !token.starts_with("./") { - return true; + let candidate = token.trim_matches(|ch| matches!(ch, '"' | '\'' | ';' | ',' | ')' | '(')); + if candidate.starts_with('/') || candidate.starts_with("~/") { + let resolved = if candidate.starts_with("~/") { + PathBuf::from(candidate.replacen('~', &home, 1)) + } else { + PathBuf::from(candidate) + }; + if !workspace.as_os_str().is_empty() && !resolved.starts_with(workspace) { + return true; + } } } - false } @@ -1988,8 +1979,7 @@ fn git_ref_exists(reference: &str) -> bool { Command::new("git") .args(["rev-parse", "--verify", "--quiet", reference]) .output() - .map(|output| output.status.success()) - .unwrap_or(false) + .is_ok_and(|output| output.status.success()) } fn git_stdout(args: &[&str]) -> Option { @@ -2311,6 +2301,25 @@ struct AgentInput { model: Option, } +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct ReviewFinding { + pub severity: String, + pub title: String, + pub body: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub file: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct ReviewOutcome { + pub summary: String, + pub findings: Vec, + #[serde(rename = "rawResponse")] + pub raw_response: String, + #[serde(rename = "subagentDepth")] + pub subagent_depth: u32, +} + #[derive(Debug, Deserialize)] struct ToolSearchInput { query: String, @@ -2605,6 +2614,8 @@ struct AgentOutput { current_blocker: Option, #[serde(rename = "derivedState")] derived_state: String, + #[serde(rename = "subagentDepth", default)] + subagent_depth: u32, #[serde(skip_serializing_if = "Option::is_none")] error: Option, } @@ -2615,6 +2626,7 @@ struct AgentJob { prompt: String, system_prompt: Vec, allowed_tools: BTreeSet, + subagent_depth: u32, } #[derive(Debug, Clone, Serialize, PartialEq, Eq)] @@ -3474,10 +3486,64 @@ const DEFAULT_AGENT_MODEL: &str = "claude-opus-4-6"; const DEFAULT_AGENT_SYSTEM_DATE: &str = "2026-03-31"; const DEFAULT_AGENT_MAX_ITERATIONS: usize = 32; +thread_local! { + static SUBAGENT_DEPTH: Cell = const { Cell::new(0) }; +} + +struct SubagentDepthGuard { + previous_depth: u32, +} + +impl Drop for SubagentDepthGuard { + fn drop(&mut self) { + SUBAGENT_DEPTH.with(|depth| depth.set(self.previous_depth)); + } +} + +fn current_subagent_depth() -> u32 { + SUBAGENT_DEPTH.with(Cell::get) +} + +fn enter_subagent_depth(depth: u32) -> SubagentDepthGuard { + let previous_depth = SUBAGENT_DEPTH.with(|current| { + let previous = current.get(); + current.set(depth); + previous + }); + SubagentDepthGuard { previous_depth } +} + fn execute_agent(input: AgentInput) -> Result { execute_agent_with_spawn(input, spawn_agent_job) } +pub fn run_inline_review(prompt: &str, model: Option<&str>) -> Result { + if prompt.trim().is_empty() { + return Err(String::from("review prompt must not be empty")); + } + if current_subagent_depth() > 0 { + return Err(String::from( + "review critic can only run from top-level depth", + )); + } + + let subagent_type = "Review"; + let subagent_depth = 1; + let resolved_model = resolve_agent_model(model); + let system_prompt = build_agent_system_prompt(subagent_type)?; + let allowed_tools = allowed_tools_for_subagent(subagent_type); + let mut runtime = build_subagent_runtime(resolved_model, allowed_tools.clone(), system_prompt)? + .with_max_iterations(DEFAULT_AGENT_MAX_ITERATIONS); + let _depth_guard = enter_subagent_depth(subagent_depth); + let summary = runtime + .run_turn(prompt.to_string(), None) + .map_err(|error| error.to_string())?; + Ok(parse_review_outcome( + &final_assistant_text(&summary), + subagent_depth, + )) +} + fn execute_agent_with_spawn(input: AgentInput, spawn_fn: F) -> Result where F: FnOnce(AgentJob) -> Result<(), String>, @@ -3495,6 +3561,13 @@ where let output_file = output_dir.join(format!("{agent_id}.md")); let manifest_file = output_dir.join(format!("{agent_id}.json")); let normalized_subagent_type = normalize_subagent_type(input.subagent_type.as_deref()); + let parent_depth = current_subagent_depth(); + if normalized_subagent_type == "Review" && parent_depth > 0 { + return Err(String::from( + "review subagents cannot be spawned from another subagent", + )); + } + let subagent_depth = parent_depth.saturating_add(1); let model = resolve_agent_model(input.model.as_deref()); let agent_name = input .name @@ -3538,6 +3611,7 @@ where lane_events: vec![LaneEvent::started(iso8601_now())], current_blocker: None, derived_state: String::from("working"), + subagent_depth, error: None, }; write_agent_manifest(&manifest)?; @@ -3548,6 +3622,7 @@ where prompt: input.prompt, system_prompt, allowed_tools, + subagent_depth, }; if let Err(error) = spawn_fn(job) { let error = format!("failed to spawn sub-agent: {error}"); @@ -3587,6 +3662,7 @@ fn spawn_agent_job(job: AgentJob) -> Result<(), String> { fn run_agent_job(job: &AgentJob) -> Result<(), String> { let mut runtime = build_agent_runtime(job)?.with_max_iterations(DEFAULT_AGENT_MAX_ITERATIONS); + let _depth_guard = enter_subagent_depth(job.subagent_depth); let summary = runtime .run_turn(job.prompt.clone(), None) .map_err(|error| error.to_string())?; @@ -3602,7 +3678,14 @@ fn build_agent_runtime( .model .clone() .unwrap_or_else(|| DEFAULT_AGENT_MODEL.to_string()); - let allowed_tools = job.allowed_tools.clone(); + build_subagent_runtime(model, job.allowed_tools.clone(), job.system_prompt.clone()) +} + +fn build_subagent_runtime( + model: String, + allowed_tools: BTreeSet, + system_prompt: Vec, +) -> Result, String> { let api_client = ProviderRuntimeClient::new(model, allowed_tools.clone())?; let permission_policy = agent_permission_policy(); let tool_executor = SubagentToolExecutor::new(allowed_tools) @@ -3612,7 +3695,7 @@ fn build_agent_runtime( api_client, tool_executor, permission_policy, - job.system_prompt.clone(), + system_prompt, )) } @@ -3676,6 +3759,17 @@ fn allowed_tools_for_subagent(subagent_type: &str) -> BTreeSet { "SendUserMessage", "PowerShell", ], + "Review" => vec![ + "bash", + "read_file", + "glob_search", + "grep_search", + "WebFetch", + "WebSearch", + "ToolSearch", + "StructuredOutput", + "PowerShell", + ], "claw-guide" => vec![ "read_file", "glob_search", @@ -4384,6 +4478,130 @@ fn derive_agent_state( "truly_idle" } +fn parse_review_outcome(raw_response: &str, subagent_depth: u32) -> ReviewOutcome { + let parsed = extract_json_value(raw_response) + .and_then(|value| value.as_object().cloned()) + .unwrap_or_default(); + + let mut findings = parsed + .get("findings") + .and_then(serde_json::Value::as_array) + .map(|items| { + items + .iter() + .filter_map(parse_review_finding) + .collect::>() + }) + .unwrap_or_default(); + + findings.retain(|finding| !finding.title.trim().is_empty() && !finding.body.trim().is_empty()); + + let summary = parsed + .get("summary") + .and_then(serde_json::Value::as_str) + .map(str::trim) + .filter(|value| !value.is_empty()) + .map_or_else( + || { + if findings.is_empty() { + String::from("No review findings.") + } else { + format!("Reviewer reported {} finding(s).", findings.len()) + } + }, + ToString::to_string, + ); + + ReviewOutcome { + summary, + findings, + raw_response: raw_response.trim().to_string(), + subagent_depth, + } +} + +fn parse_review_finding(value: &serde_json::Value) -> Option { + let object = value.as_object()?; + let severity = object + .get("severity") + .or_else(|| object.get("priority")) + .and_then(serde_json::Value::as_str) + .map_or_else(|| String::from("P2"), normalize_review_severity); + let title = object + .get("title") + .and_then(serde_json::Value::as_str) + .unwrap_or_default() + .trim() + .to_string(); + let body = object + .get("body") + .or_else(|| object.get("message")) + .and_then(serde_json::Value::as_str) + .unwrap_or_default() + .trim() + .to_string(); + let file = object + .get("file") + .and_then(serde_json::Value::as_str) + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToString::to_string); + Some(ReviewFinding { + severity, + title, + body, + file, + }) +} + +fn normalize_review_severity(value: &str) -> String { + match value.trim().to_ascii_uppercase().as_str() { + "P0" => String::from("P0"), + "P1" => String::from("P1"), + "P3" => String::from("P3"), + _ => String::from("P2"), + } +} + +fn extract_json_value(raw: &str) -> Option { + let trimmed = raw.trim(); + if trimmed.is_empty() { + return None; + } + if let Ok(value) = serde_json::from_str(trimmed) { + return Some(value); + } + + for fence in ["```json", "```JSON", "```"] { + if let Some(start) = trimmed.find(fence) { + let after_fence = &trimmed[start + fence.len()..]; + if let Some(end) = after_fence.find("```") { + let candidate = after_fence[..end].trim(); + if let Ok(value) = serde_json::from_str(candidate) { + return Some(value); + } + } + } + } + + extract_braced_json(trimmed) +} + +fn extract_braced_json(raw: &str) -> Option { + let mut starts = raw + .char_indices() + .filter_map(|(index, ch)| matches!(ch, '{' | '[').then_some(index)) + .collect::>(); + starts.reverse(); + for start in starts { + let candidate = raw[start..].trim(); + if let Ok(value) = serde_json::from_str(candidate) { + return Some(value); + } + } + None +} + fn maybe_commit_provenance(result: Option<&str>) -> Option { let commit = extract_commit_sha(result?)?; let branch = current_git_branch().unwrap_or_else(|| "unknown".to_string()); @@ -4743,7 +4961,10 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec { .iter() .filter_map(|message| { let role = match message.role { - MessageRole::System | MessageRole::User | MessageRole::Tool => "user", + MessageRole::System + | MessageRole::User + | MessageRole::Tool + | MessageRole::Verification => "user", MessageRole::Assistant => "assistant", }; let content = message @@ -4769,6 +4990,56 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec { }], is_error: *is_error, }, + ContentBlock::VerificationReport { + summary_text, + adapter_id, + project_root, + touched_paths, + primary_failure, + steps, + report_mode, + .. + } => { + if report_mode.as_deref() == Some("typed-primary") { + let payload = json!({ + "type": "verification_report", + "adapter_id": adapter_id, + "project_root": project_root, + "touched_paths": touched_paths, + "primary_failure": primary_failure.as_ref().map(|failure| json!({ + "label": failure.label, + "status": failure.status.as_str(), + "failure_kind": failure.failure_kind, + "output_excerpt": failure.output_excerpt, + "step_kind": failure.step_kind, + "target_scope": failure.target_scope, + "package_name": failure.package_name, + "package_manager": failure.package_manager, + "launcher_kind": failure.launcher_kind, + })), + "steps": steps.iter().map(|step| json!({ + "label": step.label, + "status": step.status.as_str(), + "failure_kind": step.failure_kind, + "duration_ms": step.duration_ms, + "step_kind": step.step_kind, + "target_scope": step.target_scope, + "package_name": step.package_name, + "package_manager": step.package_manager, + "launcher_kind": step.launcher_kind, + })).collect::>(), + }); + let payload = serde_json::to_string(&payload) + .unwrap_or_else(|_| summary_text.clone()); + InputContentBlock::Text { + text: format!("{payload}\n\n[verifier summary]\n{summary_text}"), + } + } else { + InputContentBlock::Text { + text: summary_text.clone(), + } + } + } }) .collect::>(); (!content.is_empty()).then(|| InputMessage { @@ -5036,6 +5307,7 @@ fn normalize_subagent_type(subagent_type: Option<&str>) -> String { "verification" | "verificationagent" | "verify" | "verifier" => { String::from("Verification") } + "review" | "reviewagent" | "critic" | "criticagent" => String::from("Review"), "clawguide" | "clawguideagent" | "guide" => String::from("claw-guide"), "statusline" | "statuslinesetup" => String::from("statusline-setup"), _ => trimmed.to_string(), @@ -5916,8 +6188,7 @@ fn command_exists(command: &str) -> bool { .arg("-lc") .arg(format!("command -v {command} >/dev/null 2>&1")) .status() - .map(|status| status.success()) - .unwrap_or(false) + .is_ok_and(|status| status.success()) } #[allow(clippy::too_many_lines)] @@ -6126,21 +6397,43 @@ mod tests { use std::time::Duration; use super::{ - agent_permission_policy, allowed_tools_for_subagent, classify_lane_failure, - derive_agent_state, execute_agent_with_spawn, execute_tool, extract_recovery_outcome, - final_assistant_text, global_cron_registry, maybe_commit_provenance, mvp_tool_specs, - permission_mode_from_plugin, persist_agent_terminal_state, push_output_block, - run_task_packet, AgentInput, AgentJob, GlobalToolRegistry, LaneEventName, LaneFailureClass, - ProviderRuntimeClient, SubagentToolExecutor, + agent_permission_policy, allowed_tools_for_subagent, classify_bash_permission, + classify_lane_failure, convert_messages, derive_agent_state, enter_subagent_depth, + execute_agent_with_spawn, execute_tool, extract_recovery_outcome, final_assistant_text, + global_cron_registry, maybe_commit_provenance, mvp_tool_specs, normalize_subagent_type, + parse_review_outcome, permission_mode_from_plugin, persist_agent_terminal_state, + push_output_block, run_inline_review, run_task_packet, AgentInput, AgentJob, + GlobalToolRegistry, LaneEventName, LaneFailureClass, ProviderRuntimeClient, + SubagentToolExecutor, DEFAULT_AGENT_MODEL, }; - use api::OutputContentBlock; + use api::{InputContentBlock, OutputContentBlock}; + use runtime::verifier::VerificationStepReport; use runtime::ProviderFallbackConfig; use runtime::{ - permission_enforcer::PermissionEnforcer, ApiRequest, AssistantEvent, ConversationRuntime, - PermissionMode, PermissionPolicy, RuntimeError, Session, TaskPacket, ToolExecutor, + permission_enforcer::{EnforcementResult, PermissionEnforcer}, + ApiRequest, AssistantEvent, ContentBlock, ConversationMessage, ConversationRuntime, + MessageRole, PermissionMode, PermissionPolicy, RuntimeError, Session, TaskPacket, + ToolExecutor, VerificationFailureKind, VerificationPhase, VerificationReport, + VerificationStatus, }; use serde_json::json; + #[cfg(unix)] + fn make_executable(path: &Path) { + use std::os::unix::fs::PermissionsExt; + + let mut permissions = fs::metadata(path) + .expect("script metadata should load") + .permissions(); + permissions.set_mode(0o755); + fs::set_permissions(path, permissions).expect("script permissions should update"); + } + + #[cfg(not(unix))] + fn make_executable(path: &Path) { + let _ = path; + } + fn env_lock() -> &'static Mutex<()> { static LOCK: OnceLock> = OnceLock::new(); LOCK.get_or_init(|| Mutex::new(())) @@ -6172,6 +6465,18 @@ mod tests { std::env::temp_dir().join(format!("clawd-tools-{unique}-{name}")) } + fn normalized_test_path(path: &str) -> String { + path.replace('\\', "/") + } + + fn assert_path_suffix(value: &serde_json::Value, suffix: &str) { + let path = value.as_str().expect("path"); + assert!( + normalized_test_path(path).ends_with(suffix), + "expected path `{path}` to end with `{suffix}`" + ); + } + fn run_git(cwd: &Path, args: &[&str]) { let status = Command::new("git") .args(args) @@ -6335,10 +6640,12 @@ mod tests { let worktree = temp_path("config-trust-worktree"); let claw_dir = worktree.join(".claw"); fs::create_dir_all(&claw_dir).expect("create .claw dir"); - // Use the actual OS temp dir so the worktree path matches the allowlist let tmp_root = std::env::temp_dir().to_str().expect("utf-8").to_string(); - let settings = format!("{{\"trustedRoots\": [\"{tmp_root}\"]}}"); - fs::write(claw_dir.join("settings.json"), settings).expect("write settings"); + fs::write( + claw_dir.join("settings.json"), + json!({ "trustedRoots": [tmp_root] }).to_string(), + ) + .expect("write settings"); // WorkerCreate with no per-call trusted_roots — config should supply them let cwd = worktree.to_str().expect("valid utf-8").to_string(); @@ -6352,7 +6659,6 @@ mod tests { .expect("WorkerCreate should succeed"); let output: serde_json::Value = serde_json::from_str(&created).expect("json"); - // worktree is under /tmp, so config roots auto-resolve trust assert_eq!( output["trust_auto_resolve"], true, "config-level trustedRoots should auto-resolve trust without per-call override" @@ -7307,10 +7613,7 @@ mod tests { let output: serde_json::Value = serde_json::from_str(&result).expect("valid json"); assert_eq!(output["skill"], "help"); - assert!(output["path"] - .as_str() - .expect("path") - .ends_with("/help/SKILL.md")); + assert_path_suffix(&output["path"], "/help/SKILL.md"); assert!(output["prompt"] .as_str() .expect("prompt") @@ -7326,10 +7629,7 @@ mod tests { let dollar_output: serde_json::Value = serde_json::from_str(&dollar_result).expect("valid json"); assert_eq!(dollar_output["skill"], "$help"); - assert!(dollar_output["path"] - .as_str() - .expect("path") - .ends_with("/help/SKILL.md")); + assert_path_suffix(&dollar_output["path"], "/help/SKILL.md"); if let Some(home) = original_home { std::env::set_var("HOME", home); @@ -7365,19 +7665,13 @@ mod tests { .expect("project-local skill should resolve"); let skill_output: serde_json::Value = serde_json::from_str(&skill_result).expect("valid json"); - assert!(skill_output["path"] - .as_str() - .expect("path") - .ends_with(".claw/skills/plan/SKILL.md")); + assert_path_suffix(&skill_output["path"], ".claw/skills/plan/SKILL.md"); let command_result = execute_tool("Skill", &json!({ "skill": "/handoff" })) .expect("legacy command should resolve"); let command_output: serde_json::Value = serde_json::from_str(&command_result).expect("valid json"); - assert!(command_output["path"] - .as_str() - .expect("path") - .ends_with(".claw/commands/handoff.md")); + assert_path_suffix(&command_output["path"], ".claw/commands/handoff.md"); std::env::set_current_dir(&original_dir).expect("restore cwd"); fs::remove_dir_all(root).expect("temp project should clean up"); @@ -7412,10 +7706,7 @@ mod tests { .expect("project-local skill should resolve"); let output: serde_json::Value = serde_json::from_str(&result).expect("valid json"); - assert!(output["path"] - .as_str() - .expect("path") - .ends_with(".claude/skills/trace/SKILL.md")); + assert_path_suffix(&output["path"], ".claude/skills/trace/SKILL.md"); assert_eq!(output["description"], "Project-local trace helper"); std::env::set_current_dir(&original_dir).expect("restore cwd"); @@ -7474,15 +7765,9 @@ mod tests { let omc_output: serde_json::Value = serde_json::from_str(&omc_result).expect("valid json"); let agents_output: serde_json::Value = serde_json::from_str(&agents_result).expect("valid json"); - assert!(omc_output["path"] - .as_str() - .expect("path") - .ends_with(".omc/skills/hud/SKILL.md")); + assert_path_suffix(&omc_output["path"], ".omc/skills/hud/SKILL.md"); assert_eq!(omc_output["description"], "Project-local OMC HUD helper"); - assert!(agents_output["path"] - .as_str() - .expect("path") - .ends_with(".agents/skills/trace/SKILL.md")); + assert_path_suffix(&agents_output["path"], ".agents/skills/trace/SKILL.md"); assert_eq!( agents_output["description"], "Project-local agents compatibility helper" @@ -7534,10 +7819,7 @@ mod tests { .expect("learned skill should resolve"); let output: serde_json::Value = serde_json::from_str(&result).expect("valid json"); - assert!(output["path"] - .as_str() - .expect("path") - .ends_with("skills/omc-learned/learned/SKILL.md")); + assert_path_suffix(&output["path"], "skills/omc-learned/learned/SKILL.md"); assert_eq!(output["description"], "Learned OMC skill"); match original_home { @@ -7593,20 +7875,14 @@ mod tests { execute_tool("Skill", &json!({ "skill": "statusline" })).expect("direct skill"); let direct_skill_output: serde_json::Value = serde_json::from_str(&direct_skill).expect("valid skill json"); - assert!(direct_skill_output["path"] - .as_str() - .expect("path") - .ends_with("skills/statusline/SKILL.md")); + assert_path_suffix(&direct_skill_output["path"], "skills/statusline/SKILL.md"); assert_eq!(direct_skill_output["description"], "Claude config skill"); let legacy_command = execute_tool("Skill", &json!({ "skill": "doctor-check" })).expect("direct command"); let legacy_command_output: serde_json::Value = serde_json::from_str(&legacy_command).expect("valid command json"); - assert!(legacy_command_output["path"] - .as_str() - .expect("path") - .ends_with("commands/doctor-check.md")); + assert_path_suffix(&legacy_command_output["path"], "commands/doctor-check.md"); assert_eq!( legacy_command_output["description"], "Claude config command" @@ -7660,10 +7936,7 @@ mod tests { .expect("legacy command markdown should resolve"); let output: serde_json::Value = serde_json::from_str(&result).expect("valid json"); - assert!(output["path"] - .as_str() - .expect("path") - .ends_with(".claude/commands/team.md")); + assert_path_suffix(&output["path"], ".claude/commands/team.md"); assert_eq!(output["description"], "Legacy team workflow"); std::env::set_current_dir(&original_dir).expect("restore cwd"); @@ -8384,6 +8657,62 @@ mod tests { assert!(verification.contains("bash")); assert!(verification.contains("PowerShell")); assert!(!verification.contains("write_file")); + + let review = allowed_tools_for_subagent("Review"); + assert!(review.contains("bash")); + assert!(review.contains("read_file")); + assert!(review.contains("StructuredOutput")); + assert!(!review.contains("write_file")); + assert!(!review.contains("Agent")); + } + + #[test] + fn review_outcome_parses_json_and_normalizes_severity() { + let outcome = parse_review_outcome( + r#"{ + "summary": "Found one blocking issue.", + "findings": [ + { + "severity": "p1", + "title": "Null dereference", + "body": "The new path dereferences an optional value without checking it.", + "file": "src/lib.rs" + } + ] + }"#, + 1, + ); + + assert_eq!(outcome.summary, "Found one blocking issue."); + assert_eq!(outcome.subagent_depth, 1); + assert_eq!(outcome.findings.len(), 1); + assert_eq!(outcome.findings[0].severity, "P1"); + assert_eq!(outcome.findings[0].file.as_deref(), Some("src/lib.rs")); + } + + #[test] + fn review_outcome_extracts_fenced_json_payloads() { + let outcome = parse_review_outcome( + "Review complete.\n```json\n{\"findings\":[],\"summary\":\"No issues.\"}\n```", + 1, + ); + + assert!(outcome.findings.is_empty()); + assert_eq!(outcome.summary, "No issues."); + } + + #[test] + fn normalize_subagent_type_maps_review_aliases() { + assert_eq!(normalize_subagent_type(Some("review")), "Review"); + assert_eq!(normalize_subagent_type(Some("critic")), "Review"); + } + + #[test] + fn inline_review_rejects_nested_depth() { + let _guard = enter_subagent_depth(1); + let error = run_inline_review("{}", Some(DEFAULT_AGENT_MODEL)) + .expect_err("nested inline review should fail"); + assert!(error.contains("top-level depth")); } #[derive(Debug)] @@ -8860,10 +9189,7 @@ mod tests { .expect("glob should succeed"); let globbed_output: serde_json::Value = serde_json::from_str(&globbed).expect("json"); assert_eq!(globbed_output["numFiles"], 1); - assert!(globbed_output["filenames"][0] - .as_str() - .expect("filename") - .ends_with("nested/lib.rs")); + assert_path_suffix(&globbed_output["filenames"][0], "nested/lib.rs"); let glob_error = execute_tool("glob_search", &json!({ "pattern": "[" })) .expect_err("invalid glob should fail"); @@ -8941,6 +9267,159 @@ mod tests { assert_eq!(output["duration_ms"], 0); } + #[test] + fn convert_messages_keeps_text_primary_verification_reports_as_summary_text() { + let report = sample_verification_report(); + let messages = vec![ConversationMessage { + role: MessageRole::Verification, + blocks: vec![ContentBlock::VerificationReport { + report_id: report.report_id.clone(), + phase: report.phase, + status: report.status, + summary_text: report.summary_text.clone(), + adapter_id: Some(report.adapter_id.clone()), + project_root: Some(report.project_root.display().to_string()), + touched_paths: report + .touched_paths + .iter() + .map(|path| path.display().to_string()) + .collect(), + primary_failure: None, + steps: Vec::new(), + report_mode: Some("text-primary".to_string()), + }], + usage: None, + }]; + + let converted = convert_messages(&messages); + + assert_eq!(converted.len(), 1); + match &converted[0].content[0] { + InputContentBlock::Text { text } => assert_eq!(text, report.summary_text.as_str()), + other => panic!("expected text block, got {other:?}"), + } + } + + #[test] + fn convert_messages_emits_compact_json_for_typed_primary_verification_reports() { + let report = sample_verification_report(); + let message = ConversationMessage::verification_report(&report, Some("typed-primary")); + + let converted = convert_messages(&[message]); + + assert_eq!(converted.len(), 1); + match &converted[0].content[0] { + InputContentBlock::Text { text } => { + assert!(text.contains("\"type\":\"verification_report\"")); + assert!(text.contains("\"adapter_id\":\"rust-cargo\"")); + assert!(text.contains("\"failure_kind\":\"code\"")); + assert!(text.contains("[verifier summary]")); + assert!(text.contains(&report.summary_text)); + } + other => panic!("expected text block, got {other:?}"), + } + } + + #[test] + fn bash_validation_parity_read_only_primitive_is_shared() { + // Paridade: tools/src/lib.rs:classify_bash_permission (call site ~1846) e + // runtime::permission_enforcer::check_bash (call site ~151) devem concordar + // sobre a classificação de "read-only" porque ambos delegam para + // `runtime::bash_validation::validate_read_only`. + // + // Invariantes: + // (1) validate_read_only(cmd, ReadOnly) == Allow ⇔ + // enforcer(ReadOnly).check_bash(cmd) == Allowed + // (2) validate_read_only(cmd, ReadOnly) != Allow ⇒ + // classify_bash_permission(cmd) == DangerFullAccess + // (3) Quando (1) e validate_paths(cmd, cwd) == Allow, + // classify_bash_permission(cmd) == WorkspaceWrite. + let enforcer = PermissionEnforcer::new(PermissionPolicy::new(PermissionMode::ReadOnly)); + let cwd = std::env::current_dir().unwrap_or_else(|_| std::path::PathBuf::from(".")); + + let commands = [ + "ls", + "cat src/lib.rs", + "grep -n alpha src/lib.rs", + "cat /etc/passwd", + "rm src/lib.rs", + "echo hi > out.txt", + "pwd", + "git status", + "mkdir new", + "head -n 5 Cargo.toml", + ]; + + for command in commands { + let read_only_allow = matches!( + runtime::bash_validation::validate_read_only(command, PermissionMode::ReadOnly), + runtime::bash_validation::ValidationResult::Allow + ); + let enforcer_allow = matches!(enforcer.check_bash(command), EnforcementResult::Allowed); + assert_eq!( + read_only_allow, enforcer_allow, + "invariant (1) violated for `{command}`: read_only_allow={read_only_allow}, enforcer_allow={enforcer_allow}" + ); + + let tool_mode = classify_bash_permission(command); + if !read_only_allow { + assert_eq!( + tool_mode, + PermissionMode::DangerFullAccess, + "invariant (2) violated for `{command}`: tool_mode={tool_mode:?}" + ); + continue; + } + + let paths_allow = matches!( + runtime::bash_validation::validate_paths(command, &cwd), + runtime::bash_validation::ValidationResult::Allow + ); + if paths_allow && !crate::tool_targets_outside_workspace(command, &cwd) { + assert_eq!( + tool_mode, + PermissionMode::WorkspaceWrite, + "invariant (3) violated for `{command}`: tool_mode={tool_mode:?}" + ); + } else { + assert_eq!( + tool_mode, + PermissionMode::DangerFullAccess, + "out-of-workspace path must escalate for `{command}`: tool_mode={tool_mode:?}" + ); + } + } + } + + #[test] + fn bash_permission_matrix_documents_current_overlap_with_runtime_enforcer() { + let enforcer = PermissionEnforcer::new(PermissionPolicy::new(PermissionMode::ReadOnly)); + let cases = [ + ("cat src/lib.rs", PermissionMode::WorkspaceWrite, true), + ( + "grep -n alpha src/lib.rs", + PermissionMode::WorkspaceWrite, + true, + ), + ("cat /etc/passwd", PermissionMode::DangerFullAccess, true), + ("rm src/lib.rs", PermissionMode::DangerFullAccess, false), + ("echo hi > out.txt", PermissionMode::DangerFullAccess, false), + ]; + + for (command, expected_mode, enforcer_allows) in cases { + assert_eq!( + classify_bash_permission(command), + expected_mode, + "unexpected tool-side classification for {command}" + ); + let allowed = matches!(enforcer.check_bash(command), EnforcementResult::Allowed); + assert_eq!( + allowed, enforcer_allows, + "unexpected enforcer outcome for {command}" + ); + } + } + #[test] fn brief_returns_sent_message_and_attachment_metadata() { let attachment = std::env::temp_dir().join(format!( @@ -9242,24 +9721,18 @@ mod tests { .expect("time") .as_nanos() )); - std::fs::create_dir_all(&dir).expect("create dir"); - let script = dir.join("pwsh"); - std::fs::write( - &script, - r#"#!/bin/sh -while [ "$1" != "-Command" ] && [ $# -gt 0 ]; do shift; done -shift -printf 'pwsh:%s' "$1" -"#, - ) - .expect("write script"); - std::process::Command::new("/bin/chmod") - .arg("+x") - .arg(&script) - .status() - .expect("chmod"); let original_path = std::env::var("PATH").unwrap_or_default(); - std::env::set_var("PATH", format!("{}:{}", dir.display(), original_path)); + if !cfg!(windows) { + std::fs::create_dir_all(&dir).expect("create dir"); + let script = dir.join("powershell"); + std::fs::write( + &script, + "#!/bin/sh\nwhile [ \"$1\" != \"-Command\" ] && [ $# -gt 0 ]; do shift; done\nshift\nprintf 'pwsh:%s' \"$1\"\n", + ) + .expect("write script"); + make_executable(&script); + std::env::set_var("PATH", format!("{}:{}", dir.display(), original_path)); + } let result = execute_tool( "PowerShell", @@ -9273,11 +9746,21 @@ printf 'pwsh:%s' "$1" ) .expect("PowerShell background should succeed"); - std::env::set_var("PATH", original_path); - let _ = std::fs::remove_dir_all(dir); + if !cfg!(windows) { + std::env::set_var("PATH", original_path); + let _ = std::fs::remove_dir_all(dir); + } let output: serde_json::Value = serde_json::from_str(&result).expect("json"); - assert_eq!(output["stdout"], "pwsh:Write-Output hello"); + let stdout = output["stdout"] + .as_str() + .expect("stdout") + .replace("\r\n", "\n"); + if cfg!(windows) { + assert_eq!(stdout, "hello\n"); + } else { + assert_eq!(stdout, "pwsh:Write-Output hello"); + } assert!(output["stderr"].as_str().expect("stderr").is_empty()); let background_output: serde_json::Value = serde_json::from_str(&background).expect("json"); @@ -9682,4 +10165,34 @@ printf 'pwsh:%s' "$1" .into_bytes() } } + + fn sample_verification_report() -> VerificationReport { + VerificationReport { + report_id: "tool-typed-report".to_string(), + phase: VerificationPhase::Quick, + adapter_id: "rust-cargo".to_string(), + project_root: PathBuf::from("/workspace"), + touched_paths: vec![PathBuf::from("src/lib.rs")], + status: VerificationStatus::Failed, + summary_text: + "[verifier:quick:rust-cargo] failed (/workspace)\n[verifier] cargo check: FAIL" + .to_string(), + steps: vec![VerificationStepReport { + adapter: "rust-cargo".to_string(), + project_root: PathBuf::from("/workspace"), + label: "cargo check".to_string(), + command: "cargo check -p demo".to_string(), + phase: VerificationPhase::Quick, + status: VerificationStatus::Failed, + failure_kind: Some(VerificationFailureKind::Code), + duration_ms: 41, + truncated_output: "error[E0308]: mismatched types".to_string(), + step_kind: Some("cargo_check".to_string()), + target_scope: Some("package".to_string()), + package_name: Some("demo".to_string()), + package_manager: None, + launcher_kind: None, + }], + } + } } diff --git a/rust/scripts/run_mock_parity_diff.py b/rust/scripts/run_mock_parity_diff.py old mode 100755 new mode 100644 diff --git a/rust/scripts/run_mock_parity_harness.sh b/rust/scripts/run_mock_parity_harness.sh old mode 100755 new mode 100644