From 8b87d0c85e42496037e3cb8c955e1f2b861fc3f7 Mon Sep 17 00:00:00 2001 From: Guajir0-code Date: Mon, 20 Apr 2026 11:00:54 +0000 Subject: [PATCH 1/6] Add self-verification loop for Rust edits After the model edits a .rs file via edit_file/write_file, the runtime now automatically runs cargo check, clippy, fmt --check, and test on the owning crate and folds the result back into the tool_result. If any step fails, is_error=true forces the model to correct on the next iteration instead of waiting for the user to notice. - New verifier module with Verifier trait and CargoVerifier impl (manifest discovery, subprocess timeout, output truncation preserving error/warning lines, early-exit after first failure). - RuntimeVerifierConfig wired through settings.json with nested schema validation, precedence User/Project/Local. - ConversationRuntime integrates the verifier between post-hook and the tool_result, with record_verifier_ran telemetry. - CLI wires CargoVerifier from config. - 12 e2e tests spawn real cargo against temp crates to cover passing code, type errors, fmt violations, timeouts, step skipping after failure, nested files, alternate path keys, and malformed input. Also clears pre-existing clippy/compile errors in unrelated crates (ApiError missing suggested_action in 4 CLI tests, map_unwrap_or, duration_suboptimal_units, trailing commas, result_large_err) so the workspace passes cargo clippy --workspace --all-targets -D warnings and cargo test --workspace end-to-end. Co-Authored-By: Claude Opus 4.7 --- rust/Cargo.lock | 264 +++++++++- rust/crates/api/src/providers/anthropic.rs | 27 +- rust/crates/api/src/providers/mod.rs | 20 +- .../crates/api/src/providers/openai_compat.rs | 18 +- rust/crates/runtime/src/config.rs | 113 +++++ rust/crates/runtime/src/config_validate.rs | 56 +++ rust/crates/runtime/src/conversation.rs | 236 +++++++++ rust/crates/runtime/src/lib.rs | 8 +- rust/crates/runtime/src/policy_engine.rs | 2 +- rust/crates/runtime/src/sandbox.rs | 3 +- rust/crates/runtime/src/verifier.rs | 472 ++++++++++++++++++ .../crates/runtime/tests/integration_tests.rs | 14 +- rust/crates/runtime/tests/verifier_e2e.rs | 295 +++++++++++ rust/crates/rusty-claude-cli/src/main.rs | 29 +- rust/crates/tools/src/lib.rs | 6 +- 15 files changed, 1509 insertions(+), 54 deletions(-) create mode 100644 rust/crates/runtime/src/verifier.rs create mode 100644 rust/crates/runtime/tests/verifier_e2e.rs diff --git a/rust/Cargo.lock b/rust/Cargo.lock index e37ae7a5a8..740147e78e 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -17,10 +17,23 @@ dependencies = [ "memchr", ] +[[package]] +name = "anes" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + [[package]] name = "api" version = "0.1.0" dependencies = [ + "criterion", "reqwest", "runtime", "serde", @@ -35,6 +48,12 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "autocfg" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" + [[package]] name = "base64" version = "0.22.1" @@ -77,6 +96,12 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "cast" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" + [[package]] name = "cc" version = "1.2.58" @@ -99,6 +124,58 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "ciborium" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" +dependencies = [ + "ciborium-io", + "ciborium-ll", + "serde", +] + +[[package]] +name = "ciborium-io" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" + +[[package]] +name = "ciborium-ll" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" +dependencies = [ + "ciborium-io", + "half", +] + +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstyle", + "clap_lex", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + [[package]] name = "clipboard-win" version = "5.4.1" @@ -144,6 +221,67 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "criterion" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2b12d017a929603d80db1831cd3a24082f8137ce19c69e6447f54f5fc8d692f" +dependencies = [ + "anes", + "cast", + "ciborium", + "clap", + "criterion-plot", + "is-terminal", + "itertools", + "num-traits", + "once_cell", + "oorandom", + "plotters", + "rayon", + "regex", + "serde", + "serde_derive", + "serde_json", + "tinytemplate", + "walkdir", +] + +[[package]] +name = "criterion-plot" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b50826342786a51a89e2da3a28f1c32b06e387201bc2d19791f622c673706b1" +dependencies = [ + "cast", + "itertools", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + [[package]] name = "crossterm" version = "0.28.1" @@ -169,6 +307,12 @@ dependencies = [ "winapi", ] +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" version = "0.1.7" @@ -209,6 +353,12 @@ dependencies = [ "syn", ] +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "endian-type" version = "0.1.2" @@ -245,7 +395,7 @@ checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78" dependencies = [ "cfg-if", "rustix 1.1.4", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -380,12 +530,29 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "zerocopy", +] + [[package]] name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "hermit-abi" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" + [[package]] name = "home" version = "0.5.12" @@ -622,6 +789,26 @@ dependencies = [ "serde", ] +[[package]] +name = "is-terminal" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.61.2", +] + +[[package]] +name = "itertools" +version = "0.10.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0fd2260e829bddf4cb6ea802289de2f86d6a7a690192fbe91b3f46e0f2c8473" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.18" @@ -755,6 +942,15 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.21.4" @@ -783,6 +979,12 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "oorandom" +version = "11.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e" + [[package]] name = "parking_lot" version = "0.12.5" @@ -837,6 +1039,34 @@ dependencies = [ "time", ] +[[package]] +name = "plotters" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aeb6f403d7a4911efb1e33402027fc44f29b5bf6def3effcc22d7bb75f2b747" +dependencies = [ + "num-traits", + "plotters-backend", + "plotters-svg", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "plotters-backend" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df42e13c12958a16b3f7f4386b9ab1f3e7933914ecea48da7139435263a4172a" + +[[package]] +name = "plotters-svg" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51bae2ac328883f7acdfea3d66a7c35751187f870bc81f94563733a154d7a670" +dependencies = [ + "plotters-backend", +] + [[package]] name = "plugins" version = "0.1.0" @@ -1015,6 +1245,26 @@ dependencies = [ "getrandom 0.3.4", ] +[[package]] +name = "rayon" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -1138,7 +1388,7 @@ dependencies = [ "errno", "libc", "linux-raw-sys 0.4.15", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -1522,6 +1772,16 @@ dependencies = [ "zerovec", ] +[[package]] +name = "tinytemplate" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4d6b5f19ff7664e8c98d03e2139cb510db9b0a60b55f8e8709b689d939b6bc" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "tinyvec" version = "1.11.0" diff --git a/rust/crates/api/src/providers/anthropic.rs b/rust/crates/api/src/providers/anthropic.rs index 7c9f02945e..51e10b44dd 100644 --- a/rust/crates/api/src/providers/anthropic.rs +++ b/rust/crates/api/src/providers/anthropic.rs @@ -600,8 +600,9 @@ fn jitter_for_base(base: Duration) -> Duration { } let raw_nanos = SystemTime::now() .duration_since(UNIX_EPOCH) - .map(|elapsed| u64::try_from(elapsed.as_nanos()).unwrap_or(u64::MAX)) - .unwrap_or(0); + .map_or(0, |elapsed| { + u64::try_from(elapsed.as_nanos()).unwrap_or(u64::MAX) + }); let tick = JITTER_COUNTER.fetch_add(1, Ordering::Relaxed); // splitmix64 finalizer — mixes the low bits so large bases still see // jitter across their full range instead of being clamped to subsec nanos. @@ -844,19 +845,17 @@ impl MessageStream { StreamEvent::MessageDelta(MessageDeltaEvent { usage, .. }) => { self.latest_usage = Some(usage.clone()); } - StreamEvent::MessageStop(_) => { - if !self.usage_recorded { - if let (Some(prompt_cache), Some(usage)) = - (&self.prompt_cache, self.latest_usage.as_ref()) - { - let record = prompt_cache.record_usage(&self.request, usage); - *self - .last_prompt_cache_record - .lock() - .unwrap_or_else(std::sync::PoisonError::into_inner) = Some(record); - } - self.usage_recorded = true; + StreamEvent::MessageStop(_) if !self.usage_recorded => { + if let (Some(prompt_cache), Some(usage)) = + (&self.prompt_cache, self.latest_usage.as_ref()) + { + let record = prompt_cache.record_usage(&self.request, usage); + *self + .last_prompt_cache_record + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner) = Some(record); } + self.usage_recorded = true; } _ => {} } diff --git a/rust/crates/api/src/providers/mod.rs b/rust/crates/api/src/providers/mod.rs index fb97900359..86871a82a1 100644 --- a/rust/crates/api/src/providers/mod.rs +++ b/rust/crates/api/src/providers/mod.rs @@ -753,14 +753,14 @@ mod tests { #[test] fn returns_context_window_metadata_for_kimi_models() { // kimi-k2.5 - let k25_limit = model_token_limit("kimi-k2.5") - .expect("kimi-k2.5 should have token limit metadata"); + let k25_limit = + model_token_limit("kimi-k2.5").expect("kimi-k2.5 should have token limit metadata"); assert_eq!(k25_limit.max_output_tokens, 16_384); assert_eq!(k25_limit.context_window_tokens, 256_000); // kimi-k1.5 - let k15_limit = model_token_limit("kimi-k1.5") - .expect("kimi-k1.5 should have token limit metadata"); + let k15_limit = + model_token_limit("kimi-k1.5").expect("kimi-k1.5 should have token limit metadata"); assert_eq!(k15_limit.max_output_tokens, 16_384); assert_eq!(k15_limit.context_window_tokens, 256_000); } @@ -768,11 +768,13 @@ mod tests { #[test] fn kimi_alias_resolves_to_kimi_k25_token_limits() { // The "kimi" alias resolves to "kimi-k2.5" via resolve_model_alias() - let alias_limit = model_token_limit("kimi") - .expect("kimi alias should resolve to kimi-k2.5 limits"); - let direct_limit = model_token_limit("kimi-k2.5") - .expect("kimi-k2.5 should have limits"); - assert_eq!(alias_limit.max_output_tokens, direct_limit.max_output_tokens); + let alias_limit = + model_token_limit("kimi").expect("kimi alias should resolve to kimi-k2.5 limits"); + let direct_limit = model_token_limit("kimi-k2.5").expect("kimi-k2.5 should have limits"); + assert_eq!( + alias_limit.max_output_tokens, + direct_limit.max_output_tokens + ); assert_eq!( alias_limit.context_window_tokens, direct_limit.context_window_tokens diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs index 4e4183bd96..319a8896ee 100644 --- a/rust/crates/api/src/providers/openai_compat.rs +++ b/rust/crates/api/src/providers/openai_compat.rs @@ -327,8 +327,9 @@ fn jitter_for_base(base: Duration) -> Duration { } let raw_nanos = SystemTime::now() .duration_since(UNIX_EPOCH) - .map(|elapsed| u64::try_from(elapsed.as_nanos()).unwrap_or(u64::MAX)) - .unwrap_or(0); + .map_or(0, |elapsed| { + u64::try_from(elapsed.as_nanos()).unwrap_or(u64::MAX) + }); let tick = JITTER_COUNTER.fetch_add(1, Ordering::Relaxed); let mut mixed = raw_nanos .wrapping_add(tick) @@ -2195,9 +2196,16 @@ mod tests { #[test] fn provider_specific_size_limits_are_correct() { - assert_eq!(OpenAiCompatConfig::dashscope().max_request_body_bytes, 6_291_456); // 6MB - assert_eq!(OpenAiCompatConfig::openai().max_request_body_bytes, 104_857_600); // 100MB - assert_eq!(OpenAiCompatConfig::xai().max_request_body_bytes, 52_428_800); // 50MB + assert_eq!( + OpenAiCompatConfig::dashscope().max_request_body_bytes, + 6_291_456 + ); // 6MB + assert_eq!( + OpenAiCompatConfig::openai().max_request_body_bytes, + 104_857_600 + ); // 100MB + assert_eq!(OpenAiCompatConfig::xai().max_request_body_bytes, 52_428_800); + // 50MB } #[test] diff --git a/rust/crates/runtime/src/config.rs b/rust/crates/runtime/src/config.rs index c1fe4967a9..7192978483 100644 --- a/rust/crates/runtime/src/config.rs +++ b/rust/crates/runtime/src/config.rs @@ -65,6 +65,68 @@ pub struct RuntimeFeatureConfig { sandbox: SandboxConfig, provider_fallbacks: ProviderFallbackConfig, trusted_roots: Vec, + verifier: RuntimeVerifierConfig, +} + +/// Settings for the post-edit self-verification loop. +/// +/// When enabled, the runtime runs cargo-based checks on the crate owning a +/// freshly edited Rust file and injects the result into the tool output so +/// the assistant can react on the next iteration. +#[allow(clippy::struct_excessive_bools)] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct RuntimeVerifierConfig { + enabled: bool, + run_check: bool, + run_clippy: bool, + run_fmt: bool, + run_test: bool, + timeout_secs: u64, +} + +impl Default for RuntimeVerifierConfig { + fn default() -> Self { + Self { + enabled: false, + run_check: true, + run_clippy: true, + run_fmt: true, + run_test: true, + timeout_secs: 120, + } + } +} + +impl RuntimeVerifierConfig { + #[must_use] + pub fn enabled(&self) -> bool { + self.enabled + } + + #[must_use] + pub fn run_check(&self) -> bool { + self.run_check + } + + #[must_use] + pub fn run_clippy(&self) -> bool { + self.run_clippy + } + + #[must_use] + pub fn run_fmt(&self) -> bool { + self.run_fmt + } + + #[must_use] + pub fn run_test(&self) -> bool { + self.run_test + } + + #[must_use] + pub fn timeout_secs(&self) -> u64 { + self.timeout_secs + } } /// Ordered chain of fallback model identifiers used when the primary @@ -315,6 +377,7 @@ impl ConfigLoader { sandbox: parse_optional_sandbox_config(&merged_value)?, provider_fallbacks: parse_optional_provider_fallbacks(&merged_value)?, trusted_roots: parse_optional_trusted_roots(&merged_value)?, + verifier: parse_optional_verifier_config(&merged_value)?, }; Ok(RuntimeConfig { @@ -414,6 +477,11 @@ impl RuntimeConfig { pub fn trusted_roots(&self) -> &[String] { &self.feature_config.trusted_roots } + + #[must_use] + pub fn verifier(&self) -> &RuntimeVerifierConfig { + &self.feature_config.verifier + } } impl RuntimeFeatureConfig { @@ -483,6 +551,17 @@ impl RuntimeFeatureConfig { pub fn trusted_roots(&self) -> &[String] { &self.trusted_roots } + + #[must_use] + pub fn verifier(&self) -> &RuntimeVerifierConfig { + &self.verifier + } + + #[must_use] + pub fn with_verifier(mut self, verifier: RuntimeVerifierConfig) -> Self { + self.verifier = verifier; + self + } } impl ProviderFallbackConfig { @@ -777,6 +856,40 @@ fn validate_optional_hooks_config( parse_optional_hooks_config_object(root, &format!("{}: hooks", path.display())).map(|_| ()) } +fn parse_optional_verifier_config(root: &JsonValue) -> Result { + let Some(object) = root.as_object() else { + return Ok(RuntimeVerifierConfig::default()); + }; + let Some(verifier_value) = object.get("verifier") else { + return Ok(RuntimeVerifierConfig::default()); + }; + let verifier = expect_object(verifier_value, "merged settings.verifier")?; + + let mut config = RuntimeVerifierConfig::default(); + if let Some(enabled) = optional_bool(verifier, "enabled", "merged settings.verifier")? { + config.enabled = enabled; + } + if let Some(cargo_value) = verifier.get("cargo") { + let cargo = expect_object(cargo_value, "merged settings.verifier.cargo")?; + if let Some(v) = optional_bool(cargo, "check", "merged settings.verifier.cargo")? { + config.run_check = v; + } + if let Some(v) = optional_bool(cargo, "clippy", "merged settings.verifier.cargo")? { + config.run_clippy = v; + } + if let Some(v) = optional_bool(cargo, "fmt", "merged settings.verifier.cargo")? { + config.run_fmt = v; + } + if let Some(v) = optional_bool(cargo, "test", "merged settings.verifier.cargo")? { + config.run_test = v; + } + if let Some(v) = optional_u64(cargo, "timeoutSecs", "merged settings.verifier.cargo")? { + config.timeout_secs = v; + } + } + Ok(config) +} + fn parse_optional_permission_rules( root: &JsonValue, ) -> Result { diff --git a/rust/crates/runtime/src/config_validate.rs b/rust/crates/runtime/src/config_validate.rs index 7a9c1c4adc..7e45119651 100644 --- a/rust/crates/runtime/src/config_validate.rs +++ b/rust/crates/runtime/src/config_validate.rs @@ -197,6 +197,44 @@ const TOP_LEVEL_FIELDS: &[FieldSpec] = &[ name: "trustedRoots", expected: FieldType::StringArray, }, + FieldSpec { + name: "verifier", + expected: FieldType::Object, + }, +]; + +const VERIFIER_FIELDS: &[FieldSpec] = &[ + FieldSpec { + name: "enabled", + expected: FieldType::Bool, + }, + FieldSpec { + name: "cargo", + expected: FieldType::Object, + }, +]; + +const VERIFIER_CARGO_FIELDS: &[FieldSpec] = &[ + FieldSpec { + name: "check", + expected: FieldType::Bool, + }, + FieldSpec { + name: "clippy", + expected: FieldType::Bool, + }, + FieldSpec { + name: "fmt", + expected: FieldType::Bool, + }, + FieldSpec { + name: "test", + expected: FieldType::Bool, + }, + FieldSpec { + name: "timeoutSecs", + expected: FieldType::Number, + }, ]; const HOOKS_FIELDS: &[FieldSpec] = &[ @@ -501,6 +539,24 @@ pub fn validate_config_file( &path_display, )); } + if let Some(verifier) = object.get("verifier").and_then(JsonValue::as_object) { + result.merge(validate_object_keys( + verifier, + VERIFIER_FIELDS, + "verifier", + source, + &path_display, + )); + if let Some(cargo) = verifier.get("cargo").and_then(JsonValue::as_object) { + result.merge(validate_object_keys( + cargo, + VERIFIER_CARGO_FIELDS, + "verifier.cargo", + source, + &path_display, + )); + } + } result } diff --git a/rust/crates/runtime/src/conversation.rs b/rust/crates/runtime/src/conversation.rs index 610ba1a879..1820060810 100644 --- a/rust/crates/runtime/src/conversation.rs +++ b/rust/crates/runtime/src/conversation.rs @@ -14,6 +14,7 @@ use crate::permissions::{ }; use crate::session::{ContentBlock, ConversationMessage, Session}; use crate::usage::{TokenUsage, UsageTracker}; +use crate::verifier::{prepend_verifier_summary, Verifier}; const DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD: u32 = 100_000; const AUTO_COMPACTION_THRESHOLD_ENV_VAR: &str = "CLAUDE_CODE_AUTO_COMPACT_INPUT_TOKENS"; @@ -136,6 +137,7 @@ pub struct ConversationRuntime { hook_abort_signal: HookAbortSignal, hook_progress_reporter: Option>, session_tracer: Option, + verifier: Option>, } impl ConversationRuntime @@ -185,6 +187,7 @@ where hook_abort_signal: HookAbortSignal::default(), hook_progress_reporter: None, session_tracer: None, + verifier: None, } } @@ -221,6 +224,12 @@ where self } + #[must_use] + pub fn with_verifier(mut self, verifier: Box) -> Self { + self.verifier = Some(verifier); + self + } + fn run_pre_tool_use_hook(&mut self, tool_name: &str, input: &str) -> HookRunResult { if let Some(reporter) = self.hook_progress_reporter.as_mut() { self.hook_runner.run_pre_tool_use_with_context( @@ -482,6 +491,19 @@ where || post_hook_result.is_cancelled(), ); + if !is_error { + if let Some(verifier) = self.verifier.as_ref() { + if let Some(result) = verifier.verify(&tool_name, &effective_input) + { + self.record_verifier_ran(iterations, &tool_name, result.passed); + output = prepend_verifier_summary(&result.summary, output); + if !result.passed { + is_error = true; + } + } + } + } + ConversationMessage::tool_result(tool_use_id, tool_name, output, is_error) } PermissionOutcome::Deny { reason } => ConversationMessage::tool_result( @@ -673,6 +695,21 @@ where session_tracer.record("turn_completed", attributes); } + fn record_verifier_ran(&self, iteration: usize, tool_name: &str, passed: bool) { + let Some(session_tracer) = &self.session_tracer else { + return; + }; + + let mut attributes = Map::new(); + attributes.insert("iteration".to_string(), Value::from(iteration as u64)); + attributes.insert( + "tool_name".to_string(), + Value::String(tool_name.to_string()), + ); + attributes.insert("passed".to_string(), Value::Bool(passed)); + session_tracer.record("verifier_ran", attributes); + } + fn record_turn_failed(&self, iteration: usize, error: &RuntimeError) { let Some(session_tracer) = &self.session_tracer else { return; @@ -835,6 +872,7 @@ mod tests { use crate::prompt::{ProjectContext, SystemPromptBuilder}; use crate::session::{ContentBlock, MessageRole, Session}; use crate::usage::TokenUsage; + use crate::verifier::{VerificationResult, Verifier}; use crate::ToolError; use std::fs; use std::path::PathBuf; @@ -1808,4 +1846,202 @@ mod tests { // then assert_eq!(error.to_string(), "upstream failed"); } + + #[test] + fn verifier_feedback_is_injected_and_marks_tool_result_as_error_on_failure() { + struct EditOnceApi { + calls: usize, + } + impl ApiClient for EditOnceApi { + fn stream( + &mut self, + _request: ApiRequest, + ) -> Result, RuntimeError> { + self.calls += 1; + if self.calls == 1 { + Ok(vec![ + AssistantEvent::ToolUse { + id: "tool-1".to_string(), + name: "edit_file".to_string(), + input: r#"{"file_path":"src/lib.rs"}"#.to_string(), + }, + AssistantEvent::MessageStop, + ]) + } else { + Ok(vec![ + AssistantEvent::TextDelta("ok".to_string()), + AssistantEvent::MessageStop, + ]) + } + } + } + + struct FailingVerifier; + impl Verifier for FailingVerifier { + fn verify(&self, tool_name: &str, _tool_input: &str) -> Option { + assert_eq!(tool_name, "edit_file"); + Some(VerificationResult { + passed: false, + summary: "[verifier] cargo check: FAIL\nerror[E0308]: mismatched types" + .to_string(), + }) + } + } + + let mut runtime = ConversationRuntime::new( + Session::new(), + EditOnceApi { calls: 0 }, + StaticToolExecutor::new().register("edit_file", |_| Ok("edited".to_string())), + PermissionPolicy::new(PermissionMode::DangerFullAccess), + vec!["system".to_string()], + ) + .with_verifier(Box::new(FailingVerifier)); + + let summary = runtime + .run_turn("fix it", None) + .expect("turn should complete"); + + assert_eq!(summary.tool_results.len(), 1); + let ContentBlock::ToolResult { + is_error, output, .. + } = &summary.tool_results[0].blocks[0] + else { + panic!("expected tool result block"); + }; + assert!(*is_error, "verifier failure should flip is_error to true"); + assert!( + output.contains("[verifier]") && output.contains("mismatched types"), + "verifier summary must be surfaced to the model: {output:?}" + ); + assert!( + output.contains("edited"), + "original tool output must be preserved: {output:?}" + ); + } + + #[test] + fn verifier_passing_leaves_tool_result_successful() { + struct EditApi { + done: bool, + } + impl ApiClient for EditApi { + fn stream( + &mut self, + _request: ApiRequest, + ) -> Result, RuntimeError> { + if self.done { + Ok(vec![ + AssistantEvent::TextDelta("done".to_string()), + AssistantEvent::MessageStop, + ]) + } else { + self.done = true; + Ok(vec![ + AssistantEvent::ToolUse { + id: "tool-1".to_string(), + name: "edit_file".to_string(), + input: r#"{"file_path":"src/lib.rs"}"#.to_string(), + }, + AssistantEvent::MessageStop, + ]) + } + } + } + + struct PassingVerifier; + impl Verifier for PassingVerifier { + fn verify(&self, _tool_name: &str, _tool_input: &str) -> Option { + Some(VerificationResult { + passed: true, + summary: "[verifier] cargo check: ok".to_string(), + }) + } + } + + let mut runtime = ConversationRuntime::new( + Session::new(), + EditApi { done: false }, + StaticToolExecutor::new().register("edit_file", |_| Ok("edited".to_string())), + PermissionPolicy::new(PermissionMode::DangerFullAccess), + vec!["system".to_string()], + ) + .with_verifier(Box::new(PassingVerifier)); + + let summary = runtime + .run_turn("edit", None) + .expect("turn should complete"); + + let ContentBlock::ToolResult { + is_error, output, .. + } = &summary.tool_results[0].blocks[0] + else { + panic!("expected tool result"); + }; + assert!(!*is_error, "passing verifier must not flip is_error"); + assert!(output.contains("cargo check: ok")); + } + + #[test] + fn verifier_is_not_called_when_tool_result_is_already_an_error() { + struct EditApi { + done: bool, + } + impl ApiClient for EditApi { + fn stream( + &mut self, + _request: ApiRequest, + ) -> Result, RuntimeError> { + if self.done { + Ok(vec![ + AssistantEvent::TextDelta("ack".to_string()), + AssistantEvent::MessageStop, + ]) + } else { + self.done = true; + Ok(vec![ + AssistantEvent::ToolUse { + id: "tool-1".to_string(), + name: "edit_file".to_string(), + input: r#"{"file_path":"src/lib.rs"}"#.to_string(), + }, + AssistantEvent::MessageStop, + ]) + } + } + } + + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Arc; + struct CountingVerifier(Arc); + impl Verifier for CountingVerifier { + fn verify(&self, _tool_name: &str, _tool_input: &str) -> Option { + self.0.fetch_add(1, Ordering::SeqCst); + Some(VerificationResult { + passed: true, + summary: String::new(), + }) + } + } + + let counter = Arc::new(AtomicUsize::new(0)); + let mut runtime = ConversationRuntime::new( + Session::new(), + EditApi { done: false }, + StaticToolExecutor::new() + .register("edit_file", |_| Err(ToolError::new("tool exploded"))), + PermissionPolicy::new(PermissionMode::DangerFullAccess), + vec!["system".to_string()], + ) + .with_verifier(Box::new(CountingVerifier(counter.clone()))); + + runtime + .run_turn("edit", None) + .expect("turn should complete"); + + assert_eq!( + counter.load(Ordering::SeqCst), + 0, + "verifier must be skipped when tool itself errored" + ); + } } diff --git a/rust/crates/runtime/src/lib.rs b/rust/crates/runtime/src/lib.rs index 432e1c1e02..ec1ac4981a 100644 --- a/rust/crates/runtime/src/lib.rs +++ b/rust/crates/runtime/src/lib.rs @@ -47,6 +47,7 @@ pub mod team_cron_registry; #[cfg(test)] mod trust_resolver; mod usage; +pub mod verifier; pub mod worker_boot; pub use bash::{execute_bash, BashCommandInput, BashCommandOutput}; @@ -61,8 +62,8 @@ pub use config::{ McpManagedProxyServerConfig, McpOAuthConfig, McpRemoteServerConfig, McpSdkServerConfig, McpServerConfig, McpStdioServerConfig, McpTransport, McpWebSocketServerConfig, OAuthConfig, ProviderFallbackConfig, ResolvedPermissionMode, RuntimeConfig, RuntimeFeatureConfig, - RuntimeHookConfig, RuntimePermissionRuleConfig, RuntimePluginConfig, ScopedMcpServerConfig, - CLAW_SETTINGS_SCHEMA_NAME, + RuntimeHookConfig, RuntimePermissionRuleConfig, RuntimePluginConfig, RuntimeVerifierConfig, + ScopedMcpServerConfig, CLAW_SETTINGS_SCHEMA_NAME, }; pub use config_validate::{ check_unsupported_format, format_diagnostics, validate_config_file, ConfigDiagnostic, @@ -167,6 +168,9 @@ pub use trust_resolver::{TrustConfig, TrustDecision, TrustEvent, TrustPolicy, Tr pub use usage::{ format_usd, pricing_for_model, ModelPricing, TokenUsage, UsageCostEstimate, UsageTracker, }; +pub use verifier::{ + prepend_verifier_summary, CargoVerifier, CargoVerifierConfig, VerificationResult, Verifier, +}; pub use worker_boot::{ Worker, WorkerEvent, WorkerEventKind, WorkerEventPayload, WorkerFailure, WorkerFailureKind, WorkerPromptTarget, WorkerReadySnapshot, WorkerRegistry, WorkerStatus, WorkerTrustResolution, diff --git a/rust/crates/runtime/src/policy_engine.rs b/rust/crates/runtime/src/policy_engine.rs index 84912a679d..0403853c36 100644 --- a/rust/crates/runtime/src/policy_engine.rs +++ b/rust/crates/runtime/src/policy_engine.rs @@ -2,7 +2,7 @@ use std::time::Duration; pub type GreenLevel = u8; -const STALE_BRANCH_THRESHOLD: Duration = Duration::from_secs(60 * 60); +const STALE_BRANCH_THRESHOLD: Duration = Duration::from_hours(1); #[derive(Debug, Clone, PartialEq, Eq)] pub struct PolicyRule { diff --git a/rust/crates/runtime/src/sandbox.rs b/rust/crates/runtime/src/sandbox.rs index 45f118a9f6..b5fd1797b1 100644 --- a/rust/crates/runtime/src/sandbox.rs +++ b/rust/crates/runtime/src/sandbox.rs @@ -298,8 +298,7 @@ fn unshare_user_namespace_works() -> bool { .stdout(std::process::Stdio::null()) .stderr(std::process::Stdio::null()) .status() - .map(|s| s.success()) - .unwrap_or(false) + .is_ok_and(|s| s.success()) }) } diff --git a/rust/crates/runtime/src/verifier.rs b/rust/crates/runtime/src/verifier.rs new file mode 100644 index 0000000000..563f19cba3 --- /dev/null +++ b/rust/crates/runtime/src/verifier.rs @@ -0,0 +1,472 @@ +//! Self-verification of code edits. +//! +//! After the model runs a writing tool (`edit_file` / `write_file`) on a Rust +//! source file, a [`Verifier`] is given the chance to run additional checks +//! (`cargo check`, `cargo clippy`, `cargo fmt --check`, `cargo test`) against +//! the affected crate and feed the result back into the tool output. The +//! assistant then sees any compilation, lint, formatting, or test failures on +//! the very next iteration and can correct them without the user having to +//! intervene. + +use std::fmt::Write as _; +use std::path::{Path, PathBuf}; +use std::process::Command; +use std::time::Duration; + +use serde_json::Value; + +/// Output of a single verification run injected back into the tool result. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VerificationResult { + pub passed: bool, + pub summary: String, +} + +/// Strategy that inspects a completed tool invocation and optionally produces +/// additional diagnostics to inject into the tool result. +pub trait Verifier: Send { + /// Return `Some` when the tool/input pair is in scope for verification, + /// `None` otherwise (e.g. a `read_file` call, or an edit on `README.md`). + fn verify(&self, tool_name: &str, tool_input: &str) -> Option; +} + +/// Declarative configuration for the built-in cargo-based verifier. +#[allow(clippy::struct_excessive_bools)] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CargoVerifierConfig { + pub run_check: bool, + pub run_clippy: bool, + pub run_fmt: bool, + pub run_test: bool, + pub timeout: Duration, +} + +impl Default for CargoVerifierConfig { + fn default() -> Self { + Self { + run_check: true, + run_clippy: true, + run_fmt: true, + run_test: true, + timeout: Duration::from_mins(2), + } + } +} + +/// Maximum bytes kept per cargo step before the summary is truncated. +const MAX_STEP_OUTPUT_BYTES: usize = 2_048; + +/// Tool names recognised as writing to a file on disk. +const WRITE_TOOLS: &[&str] = &["edit_file", "write_file", "Edit", "Write"]; + +/// Built-in verifier that drives cargo against the crate that owns the +/// edited file. Runs checks sequentially with early-exit on the first failure. +pub struct CargoVerifier { + config: CargoVerifierConfig, +} + +impl CargoVerifier { + #[must_use] + pub fn new(config: CargoVerifierConfig) -> Self { + Self { config } + } +} + +impl Verifier for CargoVerifier { + fn verify(&self, tool_name: &str, tool_input: &str) -> Option { + if !WRITE_TOOLS.contains(&tool_name) { + return None; + } + let file_path = extract_file_path(tool_input)?; + if file_path.extension().and_then(|ext| ext.to_str()) != Some("rs") { + return None; + } + let manifest = nearest_cargo_manifest(&file_path)?; + + let steps = planned_steps(&self.config); + if steps.is_empty() { + return None; + } + + let mut summary = String::new(); + let mut overall_passed = true; + let mut skip_remaining = false; + + for step in steps { + if skip_remaining { + writeln!(summary, "[verifier] {}: skipped", step.label).ok(); + continue; + } + let outcome = run_step(&step, &manifest, self.config.timeout); + match outcome { + StepOutcome::Passed => { + writeln!(summary, "[verifier] {}: ok", step.label).ok(); + } + StepOutcome::Failed { body } => { + overall_passed = false; + skip_remaining = true; + writeln!(summary, "[verifier] {}: FAIL", step.label).ok(); + summary.push_str(&truncate_output(&body)); + summary.push('\n'); + } + StepOutcome::Unavailable { message } => { + overall_passed = false; + skip_remaining = true; + writeln!( + summary, + "[verifier] {}: could not run ({message})", + step.label + ) + .ok(); + } + } + } + + Some(VerificationResult { + passed: overall_passed, + summary: summary.trim_end().to_string(), + }) + } +} + +/// Prepend the verifier summary to the tool output using a visible delimiter. +#[must_use] +pub fn prepend_verifier_summary(summary: &str, output: String) -> String { + if summary.is_empty() { + return output; + } + if output.trim().is_empty() { + return summary.to_string(); + } + format!("{output}\n\n[verifier output]\n{summary}") +} + +fn extract_file_path(input: &str) -> Option { + let value: Value = serde_json::from_str(input).ok()?; + let path_str = value + .get("file_path") + .or_else(|| value.get("filePath")) + .or_else(|| value.get("path"))? + .as_str()?; + Some(PathBuf::from(path_str)) +} + +fn nearest_cargo_manifest(file_path: &Path) -> Option { + let start = if file_path.is_absolute() { + file_path.to_path_buf() + } else { + std::env::current_dir().ok()?.join(file_path) + }; + let mut cursor = start.parent()?.to_path_buf(); + loop { + let candidate = cursor.join("Cargo.toml"); + if candidate.is_file() { + return Some(candidate); + } + if !cursor.pop() { + return None; + } + } +} + +#[derive(Debug, Clone)] +struct PlannedStep { + label: &'static str, + args: Vec<&'static str>, +} + +enum StepOutcome { + Passed, + Failed { body: String }, + Unavailable { message: String }, +} + +fn planned_steps(config: &CargoVerifierConfig) -> Vec { + let mut steps = Vec::new(); + if config.run_check { + steps.push(PlannedStep { + label: "cargo check", + args: vec!["check", "--quiet", "--message-format=short"], + }); + } + if config.run_clippy { + steps.push(PlannedStep { + label: "cargo clippy", + args: vec![ + "clippy", + "--quiet", + "--message-format=short", + "--", + "-D", + "warnings", + ], + }); + } + if config.run_fmt { + steps.push(PlannedStep { + label: "cargo fmt --check", + args: vec!["fmt", "--", "--check"], + }); + } + if config.run_test { + steps.push(PlannedStep { + label: "cargo test", + args: vec!["test", "--quiet", "--no-fail-fast"], + }); + } + steps +} + +fn run_step(step: &PlannedStep, manifest: &Path, timeout: Duration) -> StepOutcome { + let mut command = Command::new("cargo"); + command.arg(step.args[0]); + command.arg("--manifest-path").arg(manifest); + for arg in step.args.iter().skip(1) { + command.arg(arg); + } + command.env("CARGO_TERM_COLOR", "never"); + + match spawn_with_timeout(command, timeout) { + Ok(output) => { + if output.status.success() { + StepOutcome::Passed + } else { + let mut body = String::new(); + body.push_str(&String::from_utf8_lossy(&output.stdout)); + if !output.stderr.is_empty() { + if !body.is_empty() { + body.push('\n'); + } + body.push_str(&String::from_utf8_lossy(&output.stderr)); + } + if body.trim().is_empty() { + body = format!("exit status: {}", output.status); + } + StepOutcome::Failed { body } + } + } + Err(SpawnError::Timeout) => StepOutcome::Failed { + body: format!("step timed out after {}s", timeout.as_secs()), + }, + Err(SpawnError::Io(error)) => StepOutcome::Unavailable { + message: error.to_string(), + }, + } +} + +enum SpawnError { + Timeout, + Io(std::io::Error), +} + +fn spawn_with_timeout( + mut command: Command, + timeout: Duration, +) -> Result { + use std::sync::mpsc; + use std::thread; + + command.stdin(std::process::Stdio::null()); + command.stdout(std::process::Stdio::piped()); + command.stderr(std::process::Stdio::piped()); + let mut child = command.spawn().map_err(SpawnError::Io)?; + + let stdout = child.stdout.take(); + let stderr = child.stderr.take(); + + let (tx, rx) = mpsc::channel(); + let stdout_handle = stdout.map(|mut s| { + let tx = tx.clone(); + thread::spawn(move || { + let mut buf = Vec::new(); + let _ = std::io::Read::read_to_end(&mut s, &mut buf); + let _ = tx.send(("stdout", buf)); + }) + }); + let stderr_handle = stderr.map(|mut s| { + let tx = tx.clone(); + thread::spawn(move || { + let mut buf = Vec::new(); + let _ = std::io::Read::read_to_end(&mut s, &mut buf); + let _ = tx.send(("stderr", buf)); + }) + }); + drop(tx); + + let deadline = std::time::Instant::now() + timeout; + loop { + if let Some(status) = child.try_wait().map_err(SpawnError::Io)? { + if let Some(h) = stdout_handle { + let _ = h.join(); + } + if let Some(h) = stderr_handle { + let _ = h.join(); + } + let mut stdout_bytes = Vec::new(); + let mut stderr_bytes = Vec::new(); + while let Ok((which, bytes)) = rx.try_recv() { + if which == "stdout" { + stdout_bytes = bytes; + } else { + stderr_bytes = bytes; + } + } + return Ok(std::process::Output { + status, + stdout: stdout_bytes, + stderr: stderr_bytes, + }); + } + if std::time::Instant::now() >= deadline { + let _ = child.kill(); + let _ = child.wait(); + return Err(SpawnError::Timeout); + } + thread::sleep(Duration::from_millis(50)); + } +} + +/// Trim output to `MAX_STEP_OUTPUT_BYTES`, preserving the head plus any lines +/// containing `error` or `warning` so the model keeps the actionable signal. +fn truncate_output(body: &str) -> String { + if body.len() <= MAX_STEP_OUTPUT_BYTES { + return body.to_string(); + } + let head_budget = MAX_STEP_OUTPUT_BYTES * 2 / 3; + let mut head = String::new(); + for line in body.lines() { + if head.len() + line.len() + 1 > head_budget { + break; + } + head.push_str(line); + head.push('\n'); + } + let mut signal_lines = Vec::new(); + for line in body.lines() { + let lower = line.to_ascii_lowercase(); + if lower.contains("error") || lower.contains("warning") { + signal_lines.push(line); + } + } + let mut tail = String::new(); + let tail_budget = MAX_STEP_OUTPUT_BYTES - head.len(); + for line in signal_lines.into_iter().rev() { + if tail.len() + line.len() + 1 > tail_budget { + break; + } + tail = format!("{line}\n{tail}"); + } + format!("{head}... (truncated) ...\n{tail}") +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + #[test] + fn verify_returns_none_for_non_write_tools() { + let v = CargoVerifier::new(CargoVerifierConfig::default()); + assert!(v.verify("read_file", r#"{"file_path":"lib.rs"}"#).is_none()); + assert!(v.verify("grep_search", "{}").is_none()); + } + + #[test] + fn verify_returns_none_for_non_rust_extension() { + let v = CargoVerifier::new(CargoVerifierConfig::default()); + assert!(v + .verify("edit_file", r#"{"file_path":"notes.md"}"#) + .is_none()); + assert!(v + .verify("write_file", r#"{"file_path":"config.json"}"#) + .is_none()); + } + + #[test] + fn verify_returns_none_when_no_manifest_found() { + let v = CargoVerifier::new(CargoVerifierConfig { + run_check: true, + run_clippy: false, + run_fmt: false, + run_test: false, + timeout: Duration::from_secs(1), + }); + // /tmp has no Cargo.toml up the tree. + let input = r#"{"file_path":"/tmp/__claw_verifier_test_missing.rs"}"#; + assert!(v.verify("edit_file", input).is_none()); + } + + #[test] + fn extract_file_path_supports_multiple_key_names() { + assert_eq!( + extract_file_path(r#"{"file_path":"a.rs"}"#), + Some(PathBuf::from("a.rs")) + ); + assert_eq!( + extract_file_path(r#"{"filePath":"b.rs"}"#), + Some(PathBuf::from("b.rs")) + ); + assert_eq!( + extract_file_path(r#"{"path":"c.rs"}"#), + Some(PathBuf::from("c.rs")) + ); + assert_eq!(extract_file_path("not json"), None); + assert_eq!(extract_file_path(r#"{"other":1}"#), None); + } + + #[test] + fn nearest_cargo_manifest_walks_up_directories() { + let tmp = tempdir(); + let crate_root = tmp.join("my_crate"); + let nested = crate_root.join("src").join("nested"); + fs::create_dir_all(&nested).unwrap(); + fs::write(crate_root.join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap(); + let file = nested.join("thing.rs"); + fs::write(&file, "").unwrap(); + + let manifest = nearest_cargo_manifest(&file).expect("manifest should be found"); + assert_eq!(manifest, crate_root.join("Cargo.toml")); + } + + #[test] + fn truncate_preserves_error_lines() { + let mut body = String::new(); + for i in 0..2_000 { + writeln!(body, "noise line {i}").unwrap(); + } + body.push_str("error[E0308]: mismatched types\n"); + let truncated = truncate_output(&body); + assert!(truncated.len() <= MAX_STEP_OUTPUT_BYTES + 64); + assert!(truncated.contains("error[E0308]")); + assert!(truncated.contains("... (truncated) ...")); + } + + #[test] + fn prepend_verifier_summary_merges_with_existing_output() { + let merged = prepend_verifier_summary("[verifier] cargo check: ok", "edited 1 file".into()); + assert!(merged.contains("edited 1 file")); + assert!(merged.contains("[verifier output]")); + assert!(merged.contains("cargo check: ok")); + } + + #[test] + fn prepend_verifier_summary_passes_through_empty_summary() { + let merged = prepend_verifier_summary("", "edited".into()); + assert_eq!(merged, "edited"); + } + + fn tempdir() -> PathBuf { + let base = std::env::temp_dir(); + let unique = format!( + "claw_verifier_{}_{}", + std::process::id(), + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() + ); + let path = base.join(unique); + fs::create_dir_all(&path).unwrap(); + path + } +} diff --git a/rust/crates/runtime/tests/integration_tests.rs b/rust/crates/runtime/tests/integration_tests.rs index cc7bd9c54d..0bf1442d6e 100644 --- a/rust/crates/runtime/tests/integration_tests.rs +++ b/rust/crates/runtime/tests/integration_tests.rs @@ -22,7 +22,7 @@ fn stale_branch_detection_flows_into_policy_engine() { let stale_context = LaneContext::new( "stale-lane", 0, - Duration::from_secs(2 * 60 * 60), // 2 hours stale + Duration::from_hours(2), // 2 hours stale LaneBlocker::None, ReviewStatus::Pending, DiffScope::Full, @@ -49,7 +49,7 @@ fn fresh_branch_does_not_trigger_stale_policy() { let fresh_context = LaneContext::new( "fresh-lane", 0, - Duration::from_secs(30 * 60), // 30 min stale — under 1 hour threshold + Duration::from_mins(30), // 30 min stale — under 1 hour threshold LaneBlocker::None, ReviewStatus::Pending, DiffScope::Full, @@ -212,8 +212,8 @@ fn end_to_end_stale_lane_gets_merge_forward_action() { // when: build context and evaluate policy let context = LaneContext::new( "lane-9411", - 3, // Workspace green - Duration::from_secs(5 * 60 * 60), // 5 hours stale, definitely over threshold + 3, // Workspace green + Duration::from_hours(5), // 5 hours stale, definitely over threshold LaneBlocker::None, ReviewStatus::Approved, DiffScope::Scoped, @@ -261,8 +261,8 @@ fn end_to_end_stale_lane_gets_merge_forward_action() { fn fresh_approved_lane_gets_merge_action() { let context = LaneContext::new( "fresh-approved-lane", - 3, // Workspace green - Duration::from_secs(30 * 60), // 30 min — under 1 hour threshold = fresh + 3, // Workspace green + Duration::from_mins(30), // 30 min — under 1 hour threshold = fresh LaneBlocker::None, ReviewStatus::Approved, DiffScope::Scoped, @@ -347,7 +347,7 @@ fn worker_provider_failure_flows_through_recovery_to_policy() { // (Simulating the policy check that would happen after successful recovery) let recovery_success = matches!(result, RecoveryResult::Recovered { .. }); let green_level = 3; // Workspace green - let not_stale = Duration::from_secs(30 * 60); // 30 min — fresh + let not_stale = Duration::from_mins(30); // 30 min — fresh let post_recovery_context = LaneContext::new( "recovered-lane", diff --git a/rust/crates/runtime/tests/verifier_e2e.rs b/rust/crates/runtime/tests/verifier_e2e.rs new file mode 100644 index 0000000000..d46767c1d6 --- /dev/null +++ b/rust/crates/runtime/tests/verifier_e2e.rs @@ -0,0 +1,295 @@ +//! End-to-end tests for `CargoVerifier` — spawn a real temp crate and drive +//! the verifier against it so we catch regressions in manifest discovery, +//! subprocess handling, output truncation, and scope selection. + +use std::fs; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicUsize, Ordering}; +use std::time::Duration; + +use runtime::{CargoVerifier, CargoVerifierConfig, VerificationResult, Verifier}; + +static TMP_COUNTER: AtomicUsize = AtomicUsize::new(0); + +fn unique_tmpdir(tag: &str) -> PathBuf { + let pid = std::process::id(); + let n = TMP_COUNTER.fetch_add(1, Ordering::SeqCst); + let dir = std::env::temp_dir().join(format!("verifier_e2e_{tag}_{pid}_{n}")); + if dir.exists() { + let _ = fs::remove_dir_all(&dir); + } + fs::create_dir_all(&dir).expect("tmpdir"); + dir +} + +fn write_minimal_crate(root: &Path, name: &str, lib_rs: &str) { + fs::write( + root.join("Cargo.toml"), + format!( + "[package]\nname = \"{name}\"\nversion = \"0.0.0\"\nedition = \"2021\"\n\n[lib]\npath = \"src/lib.rs\"\n" + ), + ) + .unwrap(); + fs::create_dir_all(root.join("src")).unwrap(); + fs::write(root.join("src/lib.rs"), lib_rs).unwrap(); +} + +fn tool_input(path: &Path) -> String { + format!(r#"{{"file_path":"{}"}}"#, path.display()) +} + +fn check_only() -> CargoVerifierConfig { + CargoVerifierConfig { + run_check: true, + run_clippy: false, + run_fmt: false, + run_test: false, + timeout: Duration::from_mins(2), + } +} + +fn cargo_available() -> bool { + std::process::Command::new("cargo") + .arg("--version") + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status() + .is_ok_and(|s| s.success()) +} + +#[test] +fn passing_crate_reports_ok_and_passed_true() { + if !cargo_available() { + eprintln!("cargo unavailable — skipping"); + return; + } + let root = unique_tmpdir("pass"); + write_minimal_crate(&root, "vpass", "pub fn two() -> i32 { 2 }\n"); + let file = root.join("src/lib.rs"); + + let v = CargoVerifier::new(check_only()); + let result: VerificationResult = v + .verify("edit_file", &tool_input(&file)) + .expect("verifier should run for .rs edit"); + + assert!(result.passed, "summary was: {}", result.summary); + assert!(result.summary.contains("cargo check: ok")); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn type_error_fails_and_surfaces_error_text_in_summary() { + if !cargo_available() { + return; + } + let root = unique_tmpdir("typeerr"); + // Type mismatch: declared i32 but returns &str. + write_minimal_crate(&root, "vtype", "pub fn oops() -> i32 { \"nope\" }\n"); + let file = root.join("src/lib.rs"); + + let v = CargoVerifier::new(check_only()); + let result = v.verify("write_file", &tool_input(&file)).unwrap(); + + assert!(!result.passed); + assert!(result.summary.contains("cargo check: FAIL")); + let lower = result.summary.to_lowercase(); + assert!( + lower.contains("mismatched") || lower.contains("error"), + "summary missing diagnostic: {}", + result.summary + ); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn non_rust_file_is_out_of_scope() { + let v = CargoVerifier::new(CargoVerifierConfig::default()); + let input = r#"{"file_path":"/tmp/README.md"}"#; + assert!(v.verify("edit_file", input).is_none()); +} + +#[test] +fn unknown_tool_is_ignored() { + let v = CargoVerifier::new(CargoVerifierConfig::default()); + let input = r#"{"file_path":"/tmp/x.rs"}"#; + assert!(v.verify("read_file", input).is_none()); + assert!(v.verify("bash", input).is_none()); +} + +#[test] +fn malformed_json_returns_none_without_panicking() { + let v = CargoVerifier::new(CargoVerifierConfig::default()); + assert!(v.verify("edit_file", "not-json").is_none()); + assert!(v.verify("edit_file", "{}").is_none()); + assert!(v.verify("edit_file", r#"{"file_path": 42}"#).is_none()); +} + +#[test] +fn accepts_alternate_path_keys() { + if !cargo_available() { + return; + } + let root = unique_tmpdir("altkeys"); + write_minimal_crate(&root, "valt", "pub fn k() -> u8 { 1 }\n"); + let file = root.join("src/lib.rs"); + + let v = CargoVerifier::new(check_only()); + + let a = format!(r#"{{"filePath":"{}"}}"#, file.display()); + let b = format!(r#"{{"path":"{}"}}"#, file.display()); + assert!(v.verify("edit_file", &a).is_some()); + assert!(v.verify("edit_file", &b).is_some()); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn file_outside_any_crate_is_skipped() { + let root = unique_tmpdir("nocargo"); + // No Cargo.toml anywhere up the tree we control — but the tmp root's + // ancestors might have one. To guarantee "none found", create a file + // whose parent chain hits filesystem root without Cargo.toml only when + // the OS tmpdir itself isn't under a cargo project. Skip the strong + // assertion in that case. + fs::create_dir_all(root.join("x")).unwrap(); + let file = root.join("x/orphan.rs"); + fs::write(&file, "pub fn z() {}\n").unwrap(); + + let v = CargoVerifier::new(check_only()); + let result = v.verify("edit_file", &tool_input(&file)); + // Either no manifest found (None) — the preferred case — or one was + // discovered in an ancestor; both are acceptable. We just assert it + // doesn't panic and returns a well-formed value. + if let Some(r) = result { + assert!(r.summary.contains("cargo check")); + } + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn all_steps_disabled_yields_none() { + let config = CargoVerifierConfig { + run_check: false, + run_clippy: false, + run_fmt: false, + run_test: false, + timeout: Duration::from_secs(5), + }; + let root = unique_tmpdir("nosteps"); + write_minimal_crate(&root, "vnone", "pub fn n() {}\n"); + let file = root.join("src/lib.rs"); + + let v = CargoVerifier::new(config); + assert!(v.verify("edit_file", &tool_input(&file)).is_none()); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn timeout_short_circuits_and_reports_failure() { + if !cargo_available() { + return; + } + // 1ms timeout — cargo process spawn alone takes longer than this on any + // real machine, so the verifier should report a timeout failure. + let config = CargoVerifierConfig { + run_check: true, + run_clippy: false, + run_fmt: false, + run_test: false, + timeout: Duration::from_millis(1), + }; + let root = unique_tmpdir("timeout"); + write_minimal_crate(&root, "vtime", "pub fn t() {}\n"); + let file = root.join("src/lib.rs"); + + let v = CargoVerifier::new(config); + let result = v.verify("edit_file", &tool_input(&file)).unwrap(); + assert!(!result.passed); + assert!( + result.summary.contains("cargo check: FAIL") + && result.summary.to_lowercase().contains("timed out"), + "unexpected summary: {}", + result.summary + ); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn later_steps_are_skipped_after_first_failure() { + if !cargo_available() { + return; + } + let root = unique_tmpdir("skipchain"); + // Broken code so `cargo check` fails — the later steps (clippy/fmt/test) + // must all be recorded as `skipped` to save time. + write_minimal_crate(&root, "vskip", "pub fn bad() -> i32 { return; }\n"); + let file = root.join("src/lib.rs"); + + let config = CargoVerifierConfig { + run_check: true, + run_clippy: true, + run_fmt: true, + run_test: true, + timeout: Duration::from_mins(2), + }; + let v = CargoVerifier::new(config); + let result = v.verify("edit_file", &tool_input(&file)).unwrap(); + + assert!(!result.passed); + assert!(result.summary.contains("cargo check: FAIL")); + assert!(result.summary.contains("cargo clippy: skipped")); + assert!(result.summary.contains("cargo fmt --check: skipped")); + assert!(result.summary.contains("cargo test: skipped")); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn fmt_violation_is_detected_when_fmt_enabled() { + if !cargo_available() { + return; + } + let root = unique_tmpdir("fmt"); + // Deliberately badly-formatted source that still compiles. + let src = "pub fn f( )->i32{1 + 2}\n"; + write_minimal_crate(&root, "vfmt", src); + let file = root.join("src/lib.rs"); + + let config = CargoVerifierConfig { + run_check: false, + run_clippy: false, + run_fmt: true, + run_test: false, + timeout: Duration::from_mins(1), + }; + let v = CargoVerifier::new(config); + let result = v.verify("edit_file", &tool_input(&file)).unwrap(); + + // rustfmt may be unavailable in some toolchains — accept both "FAIL" + // (violations found) and "could not run" (binary missing), but never ok. + assert!(!result.passed, "summary: {}", result.summary); + assert!(result.summary.contains("cargo fmt --check")); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn nested_file_resolves_to_parent_crate_manifest() { + if !cargo_available() { + return; + } + let root = unique_tmpdir("nested"); + write_minimal_crate(&root, "vnest", "pub mod sub;\n"); + fs::create_dir_all(root.join("src/sub")).unwrap(); + let nested = root.join("src/sub/mod.rs"); + fs::write(&nested, "pub fn inside() -> u8 { 7 }\n").unwrap(); + fs::write(root.join("src/lib.rs"), "pub mod sub;\n").unwrap(); + fs::remove_file(root.join("src/lib.rs")).ok(); + fs::write( + root.join("src/lib.rs"), + "#[path = \"sub/mod.rs\"]\npub mod sub;\n", + ) + .unwrap(); + + let v = CargoVerifier::new(check_only()); + let result = v.verify("edit_file", &tool_input(&nested)).unwrap(); + assert!(result.passed, "summary: {}", result.summary); + let _ = fs::remove_dir_all(&root); +} diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs index ded17495bd..572cc15f92 100644 --- a/rust/crates/rusty-claude-cli/src/main.rs +++ b/rust/crates/rusty-claude-cli/src/main.rs @@ -940,7 +940,7 @@ fn omc_compatibility_note_for_unknown_slash_command(name: &str) -> Option<&'stat } fn render_suggestion_line(label: &str, suggestions: &[String]) -> Option { - (!suggestions.is_empty()).then(|| format!(" {label:<16} {}", suggestions.join(", "),)) + (!suggestions.is_empty()).then(|| format!(" {label:<16} {}", suggestions.join(", "))) } fn suggest_slash_commands(input: &str) -> Vec { @@ -5392,7 +5392,7 @@ fn render_memory_report() -> Result> { } else { preview }; - lines.push(format!(" {}. {}", index + 1, file.path.display(),)); + lines.push(format!(" {}. {}", index + 1, file.path.display())); lines.push(format!( " lines={} preview={}", file.content.lines().count(), @@ -5473,8 +5473,7 @@ fn render_diff_report_for(cwd: &Path) -> Result Result bool { Command::new("which") .arg(name) .output() - .map(|output| output.status.success()) - .unwrap_or(false) + .is_ok_and(|output| output.status.success()) } fn write_temp_text_file( @@ -6661,6 +6658,17 @@ fn build_runtime_with_plugin_state( if emit_output { runtime = runtime.with_hook_progress_reporter(Box::new(CliHookProgressReporter)); } + let verifier_config = feature_config.verifier(); + if verifier_config.enabled() { + let cargo_config = runtime::CargoVerifierConfig { + run_check: verifier_config.run_check(), + run_clippy: verifier_config.run_clippy(), + run_fmt: verifier_config.run_fmt(), + run_test: verifier_config.run_test(), + timeout: std::time::Duration::from_secs(verifier_config.timeout_secs()), + }; + runtime = runtime.with_verifier(Box::new(runtime::CargoVerifier::new(cargo_config))); + } Ok(BuiltRuntime::new(runtime, plugin_registry, mcp_state)) } @@ -6840,6 +6848,7 @@ fn resolve_cli_auth_source() -> Result> { Ok(resolve_cli_auth_source_for_cwd()?) } +#[allow(clippy::result_large_err)] fn resolve_cli_auth_source_for_cwd() -> Result { resolve_startup_auth_source(|| Ok(None)) } @@ -8421,6 +8430,7 @@ mod tests { request_id: Some("req_jobdori_789".to_string()), body: String::new(), retryable: true, + suggested_action: None, }; let rendered = format_user_visible_api_error("session-issue-22", &error); @@ -8443,6 +8453,7 @@ mod tests { request_id: Some("req_jobdori_790".to_string()), body: String::new(), retryable: true, + suggested_action: None, }), }; @@ -8506,6 +8517,7 @@ mod tests { request_id: Some("req_ctx_456".to_string()), body: String::new(), retryable: false, + suggested_action: None, }; let rendered = format_user_visible_api_error("session-issue-32", &error); @@ -8537,6 +8549,7 @@ mod tests { request_id: Some("req_ctx_retry_789".to_string()), body: String::new(), retryable: false, + suggested_action: None, }), }; diff --git a/rust/crates/tools/src/lib.rs b/rust/crates/tools/src/lib.rs index 5cb2f1e5c5..3ffa7d0188 100644 --- a/rust/crates/tools/src/lib.rs +++ b/rust/crates/tools/src/lib.rs @@ -1988,8 +1988,7 @@ fn git_ref_exists(reference: &str) -> bool { Command::new("git") .args(["rev-parse", "--verify", "--quiet", reference]) .output() - .map(|output| output.status.success()) - .unwrap_or(false) + .is_ok_and(|output| output.status.success()) } fn git_stdout(args: &[&str]) -> Option { @@ -5916,8 +5915,7 @@ fn command_exists(command: &str) -> bool { .arg("-lc") .arg(format!("command -v {command} >/dev/null 2>&1")) .status() - .map(|status| status.success()) - .unwrap_or(false) + .is_ok_and(|status| status.success()) } #[allow(clippy::too_many_lines)] From 7286cb9af9d652f2828f6e978f49a40f1dd10406 Mon Sep 17 00:00:00 2001 From: Guajir0-code Date: Mon, 20 Apr 2026 21:55:41 +0000 Subject: [PATCH 2/6] merge: upstream staged multi-language verifier + windows test compat Bring in the staged verifier rework from upstream (Rust / Node-TS / Python adapters, quick+final phases, structured VerificationReport, final-gate loop), the new Verification message role, getrandom-based OAuth PKCE generation, and the Windows-compatible hook/MCP test infrastructure. Co-Authored-By: Claude Opus 4.7 --- .github/scripts/check_doc_source_of_truth.py | 0 AGENTS.md | 21 + install.sh | 0 rust/Cargo.lock | 61 + rust/crates/api/tests/proxy_integration.rs | 39 +- .../bundled/example-bundled/hooks/post.sh | 0 .../bundled/example-bundled/hooks/pre.sh | 0 .../bundled/sample-hooks/hooks/post.sh | 0 .../plugins/bundled/sample-hooks/hooks/pre.sh | 0 rust/crates/plugins/src/hooks.rs | 74 +- rust/crates/plugins/src/lib.rs | 65 +- rust/crates/runtime/Cargo.toml | 2 + rust/crates/runtime/src/compact.rs | 16 +- rust/crates/runtime/src/config.rs | 223 +- rust/crates/runtime/src/config_validate.rs | 64 + rust/crates/runtime/src/conversation.rs | 657 ++++- rust/crates/runtime/src/file_ops.rs | 2 +- rust/crates/runtime/src/hooks.rs | 112 +- rust/crates/runtime/src/lib.rs | 6 +- rust/crates/runtime/src/mcp_stdio.rs | 72 +- rust/crates/runtime/src/mcp_tool_bridge.rs | 30 +- rust/crates/runtime/src/oauth.rs | 7 +- rust/crates/runtime/src/session.rs | 79 + rust/crates/runtime/src/verifier.rs | 2262 ++++++++++++++--- rust/crates/runtime/tests/verifier_e2e.rs | 306 ++- rust/crates/rusty-claude-cli/src/main.rs | 520 ++-- .../rusty-claude-cli/tests/compact_output.rs | 15 +- .../tests/mock_parity_harness.rs | 115 +- rust/crates/tools/src/lib.rs | 151 +- rust/scripts/run_mock_parity_diff.py | 0 rust/scripts/run_mock_parity_harness.sh | 0 31 files changed, 4103 insertions(+), 796 deletions(-) mode change 100755 => 100644 .github/scripts/check_doc_source_of_truth.py create mode 100644 AGENTS.md mode change 100755 => 100644 install.sh mode change 100755 => 100644 rust/crates/plugins/bundled/example-bundled/hooks/post.sh mode change 100755 => 100644 rust/crates/plugins/bundled/example-bundled/hooks/pre.sh mode change 100755 => 100644 rust/crates/plugins/bundled/sample-hooks/hooks/post.sh mode change 100755 => 100644 rust/crates/plugins/bundled/sample-hooks/hooks/pre.sh mode change 100755 => 100644 rust/scripts/run_mock_parity_diff.py mode change 100755 => 100644 rust/scripts/run_mock_parity_harness.sh diff --git a/.github/scripts/check_doc_source_of_truth.py b/.github/scripts/check_doc_source_of_truth.py old mode 100755 new mode 100644 diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000000..1d0533e24b --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,21 @@ +# AGENTS.md + +This file provides guidance to Codex (Codex.ai/code) when working with code in this repository. + +## Detected stack +- Languages: Rust. +- Frameworks: none detected from the supported starter markers. + +## Verification +- Run Rust verification from `rust/`: `cargo fmt`, `cargo clippy --workspace --all-targets -- -D warnings`, `cargo test --workspace` +- `src/` and `tests/` are both present; update both surfaces together when behavior changes. + +## Repository shape +- `rust/` contains the Rust workspace and active CLI/runtime implementation. +- `src/` contains source files that should stay consistent with generated guidance and tests. +- `tests/` contains validation surfaces that should be reviewed alongside code changes. + +## Working agreement +- Prefer small, reviewable changes and keep generated bootstrap files aligned with actual repo workflows. +- Keep shared defaults in `.Codex.json`; reserve `.Codex/settings.local.json` for machine-local overrides. +- Do not overwrite existing `AGENTS.md` content automatically; update it intentionally when repo workflows change. diff --git a/install.sh b/install.sh old mode 100755 new mode 100644 diff --git a/rust/Cargo.lock b/rust/Cargo.lock index 740147e78e..a97bde5a99 100644 --- a/rust/Cargo.lock +++ b/rust/Cargo.lock @@ -1361,6 +1361,7 @@ dependencies = [ name = "runtime" version = "0.1.0" dependencies = [ + "getrandom 0.3.4", "glob", "plugins", "regex", @@ -1369,6 +1370,7 @@ dependencies = [ "sha2", "telemetry", "tokio", + "toml", "walkdir", ] @@ -1551,6 +1553,15 @@ dependencies = [ "zmij", ] +[[package]] +name = "serde_spanned" +version = "0.6.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" +dependencies = [ + "serde", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -1834,6 +1845,47 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + +[[package]] +name = "toml_datetime" +version = "0.6.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_edit" +version = "0.22.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" +dependencies = [ + "indexmap", + "serde", + "serde_spanned", + "toml_datetime", + "toml_write", + "winnow", +] + +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + [[package]] name = "tools" version = "0.1.0" @@ -2305,6 +2357,15 @@ version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" +[[package]] +name = "winnow" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" +dependencies = [ + "memchr", +] + [[package]] name = "wit-bindgen" version = "0.51.0" diff --git a/rust/crates/api/tests/proxy_integration.rs b/rust/crates/api/tests/proxy_integration.rs index 7e3906983f..56078ca739 100644 --- a/rust/crates/api/tests/proxy_integration.rs +++ b/rust/crates/api/tests/proxy_integration.rs @@ -39,12 +39,15 @@ impl Drop for EnvVarGuard { fn proxy_config_from_env_reads_uppercase_proxy_vars() { // given let _lock = env_lock(); + let _clear_http = EnvVarGuard::set("HTTP_PROXY", None); + let _clear_https = EnvVarGuard::set("HTTPS_PROXY", None); + let _clear_no = EnvVarGuard::set("NO_PROXY", None); + let _clear_http_lower = EnvVarGuard::set("http_proxy", None); + let _clear_https_lower = EnvVarGuard::set("https_proxy", None); + let _clear_no_lower = EnvVarGuard::set("no_proxy", None); let _http = EnvVarGuard::set("HTTP_PROXY", Some("http://proxy.corp:3128")); let _https = EnvVarGuard::set("HTTPS_PROXY", Some("http://secure.corp:3129")); let _no = EnvVarGuard::set("NO_PROXY", Some("localhost,127.0.0.1")); - let _http_lower = EnvVarGuard::set("http_proxy", None); - let _https_lower = EnvVarGuard::set("https_proxy", None); - let _no_lower = EnvVarGuard::set("no_proxy", None); // when let config = ProxyConfig::from_env(); @@ -64,9 +67,12 @@ fn proxy_config_from_env_reads_uppercase_proxy_vars() { fn proxy_config_from_env_reads_lowercase_proxy_vars() { // given let _lock = env_lock(); - let _http = EnvVarGuard::set("HTTP_PROXY", None); - let _https = EnvVarGuard::set("HTTPS_PROXY", None); - let _no = EnvVarGuard::set("NO_PROXY", None); + let _clear_http = EnvVarGuard::set("HTTP_PROXY", None); + let _clear_https = EnvVarGuard::set("HTTPS_PROXY", None); + let _clear_no = EnvVarGuard::set("NO_PROXY", None); + let _clear_http_lower = EnvVarGuard::set("http_proxy", None); + let _clear_https_lower = EnvVarGuard::set("https_proxy", None); + let _clear_no_lower = EnvVarGuard::set("no_proxy", None); let _http_lower = EnvVarGuard::set("http_proxy", Some("http://lower.corp:3128")); let _https_lower = EnvVarGuard::set("https_proxy", Some("http://lower-secure.corp:3129")); let _no_lower = EnvVarGuard::set("no_proxy", Some(".internal")); @@ -127,12 +133,15 @@ fn proxy_config_from_env_treats_empty_values_as_unset() { fn build_client_with_env_proxy_config_succeeds() { // given let _lock = env_lock(); + let _clear_http = EnvVarGuard::set("HTTP_PROXY", None); + let _clear_https = EnvVarGuard::set("HTTPS_PROXY", None); + let _clear_no = EnvVarGuard::set("NO_PROXY", None); + let _clear_http_lower = EnvVarGuard::set("http_proxy", None); + let _clear_https_lower = EnvVarGuard::set("https_proxy", None); + let _clear_no_lower = EnvVarGuard::set("no_proxy", None); let _http = EnvVarGuard::set("HTTP_PROXY", Some("http://proxy.corp:3128")); let _https = EnvVarGuard::set("HTTPS_PROXY", Some("http://secure.corp:3129")); let _no = EnvVarGuard::set("NO_PROXY", Some("localhost")); - let _http_lower = EnvVarGuard::set("http_proxy", None); - let _https_lower = EnvVarGuard::set("https_proxy", None); - let _no_lower = EnvVarGuard::set("no_proxy", None); let config = ProxyConfig::from_env(); // when @@ -158,8 +167,20 @@ fn build_client_with_proxy_url_config_succeeds() { fn proxy_config_from_env_prefers_uppercase_over_lowercase() { // given let _lock = env_lock(); + let _clear_http = EnvVarGuard::set("HTTP_PROXY", None); + let _clear_https = EnvVarGuard::set("HTTPS_PROXY", None); + let _clear_no = EnvVarGuard::set("NO_PROXY", None); + let _clear_http_lower = EnvVarGuard::set("http_proxy", None); + let _clear_https_lower = EnvVarGuard::set("https_proxy", None); + let _clear_no_lower = EnvVarGuard::set("no_proxy", None); + #[cfg(not(windows))] let _http_upper = EnvVarGuard::set("HTTP_PROXY", Some("http://upper.corp:3128")); + #[cfg(not(windows))] let _http_lower = EnvVarGuard::set("http_proxy", Some("http://lower.corp:3128")); + #[cfg(windows)] + let _http_lower = EnvVarGuard::set("http_proxy", Some("http://lower.corp:3128")); + #[cfg(windows)] + let _http_upper = EnvVarGuard::set("HTTP_PROXY", Some("http://upper.corp:3128")); let _https = EnvVarGuard::set("HTTPS_PROXY", None); let _https_lower = EnvVarGuard::set("https_proxy", None); let _no = EnvVarGuard::set("NO_PROXY", None); diff --git a/rust/crates/plugins/bundled/example-bundled/hooks/post.sh b/rust/crates/plugins/bundled/example-bundled/hooks/post.sh old mode 100755 new mode 100644 diff --git a/rust/crates/plugins/bundled/example-bundled/hooks/pre.sh b/rust/crates/plugins/bundled/example-bundled/hooks/pre.sh old mode 100755 new mode 100644 diff --git a/rust/crates/plugins/bundled/sample-hooks/hooks/post.sh b/rust/crates/plugins/bundled/sample-hooks/hooks/post.sh old mode 100755 new mode 100644 diff --git a/rust/crates/plugins/bundled/sample-hooks/hooks/pre.sh b/rust/crates/plugins/bundled/sample-hooks/hooks/pre.sh old mode 100755 new mode 100644 diff --git a/rust/crates/plugins/src/hooks.rs b/rust/crates/plugins/src/hooks.rs index ff02c2ac27..5932123ca0 100644 --- a/rust/crates/plugins/src/hooks.rs +++ b/rust/crates/plugins/src/hooks.rs @@ -1,7 +1,9 @@ use std::ffi::OsStr; -use std::path::Path; use std::process::Command; +#[cfg(not(windows))] +use std::path::Path; + use serde_json::json; use crate::{PluginError, PluginHooks, PluginRegistry}; @@ -392,6 +394,24 @@ mod tests { let _ = path; } + fn hook_script_name(stem: &str) -> String { + if cfg!(windows) { + format!("{stem}.cmd") + } else { + format!("{stem}.sh") + } + } + + fn write_hook_script(path: &Path, message: &str) { + let contents = if cfg!(windows) { + format!("@echo off\r\necho {message}\r\n") + } else { + format!("#!/bin/sh\nprintf '%s\\n' '{message}'\n") + }; + fs::write(path, contents).expect("write hook"); + make_executable(path); + } + fn write_hook_plugin( root: &Path, name: &str, @@ -402,33 +422,19 @@ mod tests { fs::create_dir_all(root.join(".claude-plugin")).expect("manifest dir"); fs::create_dir_all(root.join("hooks")).expect("hooks dir"); - let pre_path = root.join("hooks").join("pre.sh"); - fs::write( - &pre_path, - format!("#!/bin/sh\nprintf '%s\\n' '{pre_message}'\n"), - ) - .expect("write pre hook"); - make_executable(&pre_path); - - let post_path = root.join("hooks").join("post.sh"); - fs::write( - &post_path, - format!("#!/bin/sh\nprintf '%s\\n' '{post_message}'\n"), - ) - .expect("write post hook"); - make_executable(&post_path); - - let failure_path = root.join("hooks").join("failure.sh"); - fs::write( - &failure_path, - format!("#!/bin/sh\nprintf '%s\\n' '{failure_message}'\n"), - ) - .expect("write failure hook"); - make_executable(&failure_path); + let pre_script = hook_script_name("pre"); + let post_script = hook_script_name("post"); + let failure_script = hook_script_name("failure"); + let pre_path = root.join("hooks").join(&pre_script); + let post_path = root.join("hooks").join(&post_script); + let failure_path = root.join("hooks").join(&failure_script); + write_hook_script(&pre_path, pre_message); + write_hook_script(&post_path, post_message); + write_hook_script(&failure_path, failure_message); fs::write( root.join(".claude-plugin").join("plugin.json"), format!( - "{{\n \"name\": \"{name}\",\n \"version\": \"1.0.0\",\n \"description\": \"hook plugin\",\n \"hooks\": {{\n \"PreToolUse\": [\"./hooks/pre.sh\"],\n \"PostToolUse\": [\"./hooks/post.sh\"],\n \"PostToolUseFailure\": [\"./hooks/failure.sh\"]\n }}\n}}" + "{{\n \"name\": \"{name}\",\n \"version\": \"1.0.0\",\n \"description\": \"hook plugin\",\n \"hooks\": {{\n \"PreToolUse\": [\"./hooks/{pre_script}\"],\n \"PostToolUse\": [\"./hooks/{post_script}\"],\n \"PostToolUseFailure\": [\"./hooks/{failure_script}\"]\n }}\n}}" ), ) .expect("write plugin manifest"); @@ -499,7 +505,11 @@ mod tests { fn pre_tool_use_denies_when_plugin_hook_exits_two() { // given let runner = HookRunner::new(crate::PluginHooks { - pre_tool_use: vec!["printf 'blocked by plugin'; exit 2".to_string()], + pre_tool_use: vec![if cfg!(windows) { + "echo blocked by plugin && exit /b 2".to_string() + } else { + "printf 'blocked by plugin'; exit 2".to_string() + }], post_tool_use: Vec::new(), post_tool_use_failure: Vec::new(), }); @@ -517,8 +527,16 @@ mod tests { // given let runner = HookRunner::new(crate::PluginHooks { pre_tool_use: vec![ - "printf 'broken plugin hook'; exit 1".to_string(), - "printf 'later plugin hook'".to_string(), + if cfg!(windows) { + "echo broken plugin hook && exit /b 1".to_string() + } else { + "printf 'broken plugin hook'; exit 1".to_string() + }, + if cfg!(windows) { + "echo later plugin hook".to_string() + } else { + "printf 'later plugin hook'".to_string() + }, ], post_tool_use: Vec::new(), post_tool_use_failure: Vec::new(), diff --git a/rust/crates/plugins/src/lib.rs b/rust/crates/plugins/src/lib.rs index 765c0ac242..6671c38af7 100644 --- a/rust/crates/plugins/src/lib.rs +++ b/rust/crates/plugins/src/lib.rs @@ -2327,6 +2327,37 @@ mod tests { fs::write(path, contents).expect("write file"); } + fn make_executable(path: &Path) { + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + + let mut permissions = fs::metadata(path).expect("metadata").permissions(); + permissions.set_mode(0o755); + fs::set_permissions(path, permissions).expect("chmod"); + } + #[cfg(not(unix))] + let _ = path; + } + + fn script_name(stem: &str) -> String { + if cfg!(windows) { + format!("{stem}.cmd") + } else { + format!("{stem}.sh") + } + } + + fn write_script(path: &Path, unix_contents: &str, windows_contents: &str) { + let contents = if cfg!(windows) { + windows_contents + } else { + unix_contents + }; + write_file(path, contents); + make_executable(path); + } + fn write_loader_plugin(root: &Path) { write_file( root.join("hooks").join("pre.sh").as_path(), @@ -2426,18 +2457,22 @@ mod tests { fn write_lifecycle_plugin(root: &Path, name: &str, version: &str) -> PathBuf { let log_path = root.join("lifecycle.log"); - write_file( - root.join("lifecycle").join("init.sh").as_path(), + let init_script = script_name("init"); + let shutdown_script = script_name("shutdown"); + write_script( + root.join("lifecycle").join(&init_script).as_path(), "#!/bin/sh\nprintf 'init\\n' >> lifecycle.log\n", + "@echo off\r\n>> lifecycle.log echo init\r\n", ); - write_file( - root.join("lifecycle").join("shutdown.sh").as_path(), + write_script( + root.join("lifecycle").join(&shutdown_script).as_path(), "#!/bin/sh\nprintf 'shutdown\\n' >> lifecycle.log\n", + "@echo off\r\n>> lifecycle.log echo shutdown\r\n", ); write_file( root.join(MANIFEST_RELATIVE_PATH).as_path(), format!( - "{{\n \"name\": \"{name}\",\n \"version\": \"{version}\",\n \"description\": \"lifecycle plugin\",\n \"lifecycle\": {{\n \"Init\": [\"./lifecycle/init.sh\"],\n \"Shutdown\": [\"./lifecycle/shutdown.sh\"]\n }}\n}}" + "{{\n \"name\": \"{name}\",\n \"version\": \"{version}\",\n \"description\": \"lifecycle plugin\",\n \"lifecycle\": {{\n \"Init\": [\"./lifecycle/{init_script}\"],\n \"Shutdown\": [\"./lifecycle/{shutdown_script}\"]\n }}\n}}" ) .as_str(), ); @@ -2449,23 +2484,17 @@ mod tests { } fn write_tool_plugin_with_name(root: &Path, name: &str, version: &str, tool_name: &str) { - let script_path = root.join("tools").join("echo-json.sh"); - write_file( + let script_file = script_name("echo-json"); + let script_path = root.join("tools").join(&script_file); + write_script( &script_path, "#!/bin/sh\nINPUT=$(cat)\nprintf '{\"plugin\":\"%s\",\"tool\":\"%s\",\"input\":%s}\\n' \"$CLAWD_PLUGIN_ID\" \"$CLAWD_TOOL_NAME\" \"$INPUT\"\n", + "@echo off\r\nsetlocal EnableDelayedExpansion\r\nset /p INPUT=\r\necho {\"plugin\":\"%CLAWD_PLUGIN_ID%\",\"tool\":\"%CLAWD_TOOL_NAME%\",\"input\":%INPUT%}\r\n", ); - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - - let mut permissions = fs::metadata(&script_path).expect("metadata").permissions(); - permissions.set_mode(0o755); - fs::set_permissions(&script_path, permissions).expect("chmod"); - } write_file( root.join(MANIFEST_RELATIVE_PATH).as_path(), format!( - "{{\n \"name\": \"{name}\",\n \"version\": \"{version}\",\n \"description\": \"tool plugin\",\n \"tools\": [\n {{\n \"name\": \"{tool_name}\",\n \"description\": \"Echo JSON input\",\n \"inputSchema\": {{\"type\": \"object\", \"properties\": {{\"message\": {{\"type\": \"string\"}}}}, \"required\": [\"message\"], \"additionalProperties\": false}},\n \"command\": \"./tools/echo-json.sh\",\n \"requiredPermission\": \"workspace-write\"\n }}\n ]\n}}" + "{{\n \"name\": \"{name}\",\n \"version\": \"{version}\",\n \"description\": \"tool plugin\",\n \"tools\": [\n {{\n \"name\": \"{tool_name}\",\n \"description\": \"Echo JSON input\",\n \"inputSchema\": {{\"type\": \"object\", \"properties\": {{\"message\": {{\"type\": \"string\"}}}}, \"required\": [\"message\"], \"additionalProperties\": false}},\n \"command\": \"./tools/{script_file}\",\n \"requiredPermission\": \"workspace-write\"\n }}\n ]\n}}" ) .as_str(), ); @@ -3417,7 +3446,7 @@ mod tests { registry.shutdown().expect("shutdown should succeed"); let log = fs::read_to_string(&log_path).expect("lifecycle log should exist"); - assert_eq!(log, "init\nshutdown\n"); + assert_eq!(log.replace("\r\n", "\n"), "init\nshutdown\n"); let _ = fs::remove_dir_all(config_home); let _ = fs::remove_dir_all(source_root); @@ -3614,7 +3643,7 @@ mod tests { if registry.initialize().is_ok() && registry.shutdown().is_ok() { // Verify lifecycle.log exists and has expected content if let Ok(log) = fs::read_to_string(&log_path) { - if log == "init\nshutdown\n" { + if log.replace("\r\n", "\n") == "init\nshutdown\n" { success_count.fetch_add(1, AtomicOrdering::Relaxed); } } diff --git a/rust/crates/runtime/Cargo.toml b/rust/crates/runtime/Cargo.toml index b1bd04f374..a97d2e90e3 100644 --- a/rust/crates/runtime/Cargo.toml +++ b/rust/crates/runtime/Cargo.toml @@ -8,12 +8,14 @@ publish.workspace = true [dependencies] sha2 = "0.10" glob = "0.3" +getrandom = "0.3" plugins = { path = "../plugins" } regex = "1" serde = { version = "1", features = ["derive"] } serde_json.workspace = true telemetry = { path = "../telemetry" } tokio = { version = "1", features = ["io-std", "io-util", "macros", "process", "rt", "rt-multi-thread", "time"] } +toml = "0.8" walkdir = "2" [lints] diff --git a/rust/crates/runtime/src/compact.rs b/rust/crates/runtime/src/compact.rs index 3e805dda96..6daf7297f6 100644 --- a/rust/crates/runtime/src/compact.rs +++ b/rust/crates/runtime/src/compact.rs @@ -212,7 +212,7 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String { .filter_map(|block| match block { ContentBlock::ToolUse { name, .. } => Some(name.as_str()), ContentBlock::ToolResult { tool_name, .. } => Some(tool_name.as_str()), - ContentBlock::Text { .. } => None, + ContentBlock::Text { .. } | ContentBlock::VerificationReport { .. } => None, }) .collect::>(); tool_names.sort_unstable(); @@ -266,6 +266,7 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String { MessageRole::User => "user", MessageRole::Assistant => "assistant", MessageRole::Tool => "tool", + MessageRole::Verification => "verification", }; let content = message .blocks @@ -327,6 +328,16 @@ fn summarize_block(block: &ContentBlock) -> String { "tool_result {tool_name}: {}{output}", if *is_error { "error " } else { "" } ), + ContentBlock::VerificationReport { + phase, + status, + summary_text, + .. + } => format!( + "verification {} {}: {summary_text}", + phase.as_str(), + status.as_str() + ), }; truncate_summary(&raw, 160) } @@ -378,6 +389,7 @@ fn collect_key_files(messages: &[ConversationMessage]) -> Vec { ContentBlock::Text { text } => text.as_str(), ContentBlock::ToolUse { input, .. } => input.as_str(), ContentBlock::ToolResult { output, .. } => output.as_str(), + ContentBlock::VerificationReport { summary_text, .. } => summary_text.as_str(), }) .flat_map(extract_file_candidates) .collect::>(); @@ -400,6 +412,7 @@ fn first_text_block(message: &ConversationMessage) -> Option<&str> { ContentBlock::Text { text } if !text.trim().is_empty() => Some(text.as_str()), ContentBlock::ToolUse { .. } | ContentBlock::ToolResult { .. } + | ContentBlock::VerificationReport { .. } | ContentBlock::Text { .. } => None, }) } @@ -450,6 +463,7 @@ fn estimate_message_tokens(message: &ConversationMessage) -> usize { ContentBlock::ToolResult { tool_name, output, .. } => (tool_name.len() + output.len()) / 4 + 1, + ContentBlock::VerificationReport { summary_text, .. } => summary_text.len() / 4 + 1, }) .sum() } diff --git a/rust/crates/runtime/src/config.rs b/rust/crates/runtime/src/config.rs index 7192978483..f356e38f43 100644 --- a/rust/crates/runtime/src/config.rs +++ b/rust/crates/runtime/src/config.rs @@ -70,29 +70,60 @@ pub struct RuntimeFeatureConfig { /// Settings for the post-edit self-verification loop. /// -/// When enabled, the runtime runs cargo-based checks on the crate owning a -/// freshly edited Rust file and injects the result into the tool output so -/// the assistant can react on the next iteration. +/// When enabled, the runtime runs staged multi-language verification for +/// successful edits and can block turn completion on a final validation gate. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RuntimeVerifierMode { + Legacy, + Staged, +} + +impl RuntimeVerifierMode { + #[must_use] + pub fn as_str(self) -> &'static str { + match self { + Self::Legacy => "legacy", + Self::Staged => "staged", + } + } +} + #[allow(clippy::struct_excessive_bools)] #[derive(Debug, Clone, PartialEq, Eq)] pub struct RuntimeVerifierConfig { enabled: bool, + mode: RuntimeVerifierMode, + quick_on_write: bool, + final_gate: bool, + max_output_bytes: usize, run_check: bool, run_clippy: bool, run_fmt: bool, run_test: bool, timeout_secs: u64, + node_enabled: bool, + node_timeout_secs: u64, + python_enabled: bool, + python_timeout_secs: u64, } impl Default for RuntimeVerifierConfig { fn default() -> Self { Self { enabled: false, + mode: RuntimeVerifierMode::Legacy, + quick_on_write: true, + final_gate: false, + max_output_bytes: 2_048, run_check: true, run_clippy: true, run_fmt: true, run_test: true, timeout_secs: 120, + node_enabled: true, + node_timeout_secs: 120, + python_enabled: true, + python_timeout_secs: 120, } } } @@ -103,6 +134,31 @@ impl RuntimeVerifierConfig { self.enabled } + #[must_use] + pub fn mode(&self) -> RuntimeVerifierMode { + self.mode + } + + #[must_use] + pub fn staged(&self) -> bool { + self.mode == RuntimeVerifierMode::Staged + } + + #[must_use] + pub fn quick_on_write(&self) -> bool { + self.quick_on_write + } + + #[must_use] + pub fn final_gate(&self) -> bool { + self.final_gate + } + + #[must_use] + pub fn max_output_bytes(&self) -> usize { + self.max_output_bytes + } + #[must_use] pub fn run_check(&self) -> bool { self.run_check @@ -127,6 +183,26 @@ impl RuntimeVerifierConfig { pub fn timeout_secs(&self) -> u64 { self.timeout_secs } + + #[must_use] + pub fn node_enabled(&self) -> bool { + self.node_enabled + } + + #[must_use] + pub fn node_timeout_secs(&self) -> u64 { + self.node_timeout_secs + } + + #[must_use] + pub fn python_enabled(&self) -> bool { + self.python_enabled + } + + #[must_use] + pub fn python_timeout_secs(&self) -> u64 { + self.python_timeout_secs + } } /// Ordered chain of fallback model identifiers used when the primary @@ -869,6 +945,37 @@ fn parse_optional_verifier_config(root: &JsonValue) -> Result RuntimeVerifierMode::Legacy, + "staged" => RuntimeVerifierMode::Staged, + other => { + return Err(ConfigError::Parse(format!( + "merged settings.verifier.mode must be legacy or staged, got `{other}`" + ))) + } + }; + } + if let Some(quick_on_write) = + optional_bool(verifier, "quickOnWrite", "merged settings.verifier")? + { + config.quick_on_write = quick_on_write; + } + if let Some(final_gate) = optional_bool(verifier, "finalGate", "merged settings.verifier")? { + config.final_gate = final_gate; + if final_gate && config.mode == RuntimeVerifierMode::Legacy { + config.mode = RuntimeVerifierMode::Staged; + } + } + if let Some(max_output_bytes) = + optional_u64(verifier, "maxOutputBytes", "merged settings.verifier")? + { + config.max_output_bytes = usize::try_from(max_output_bytes).map_err(|_| { + ConfigError::Parse( + "merged settings.verifier.maxOutputBytes is out of range".to_string(), + ) + })?; + } if let Some(cargo_value) = verifier.get("cargo") { let cargo = expect_object(cargo_value, "merged settings.verifier.cargo")?; if let Some(v) = optional_bool(cargo, "check", "merged settings.verifier.cargo")? { @@ -887,6 +994,24 @@ fn parse_optional_verifier_config(root: &JsonValue) -> Result, pub tool_results: Vec, + pub verification_reports: Vec, + pub verification_gate: VerificationGateStatus, pub prompt_cache_events: Vec, pub iterations: usize, pub usage: TokenUsage, @@ -123,6 +129,35 @@ pub struct AutoCompactionEvent { pub removed_message_count: usize, } +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +struct VerificationLedgerKey { + adapter_id: String, + project_root: PathBuf, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct VerificationLedgerEntry { + adapter_id: String, + project_root: PathBuf, + touched_paths: BTreeSet, + last_mutation_sequence: u64, + last_quick_status: Option, + last_final_status: Option, + last_final_verified_sequence: Option, +} + +impl VerificationLedgerEntry { + fn update_from_report(&mut self, report: &VerificationReport, mutation_sequence: u64) { + self.last_mutation_sequence = mutation_sequence; + self.last_quick_status = Some(report.status); + self.last_final_status = None; + self.last_final_verified_sequence = None; + for path in &report.touched_paths { + self.touched_paths.insert(path.clone()); + } + } +} + /// Coordinates the model loop, tool execution, hooks, and session updates. pub struct ConversationRuntime { session: Session, @@ -319,6 +354,86 @@ where } } + fn workspace_root(&self) -> Option { + self.session.workspace_root().map(PathBuf::from) + } + + fn run_quick_verification( + &self, + tool_name: &str, + tool_input: &str, + mutation_sequence: u64, + ) -> Vec { + let Some(verifier) = self.verifier.as_ref() else { + return Vec::new(); + }; + let Some(context) = VerificationContext::from_tool_invocation( + VerificationPhase::Quick, + self.workspace_root(), + tool_name.to_string(), + tool_input.to_string(), + mutation_sequence, + ) else { + return Vec::new(); + }; + verifier.quick_verify(&context) + } + + fn persist_verification_report( + &mut self, + report: &VerificationReport, + ) -> Result<(), RuntimeError> { + self.session + .push_message(ConversationMessage::verification_report(report)) + .map_err(|error| RuntimeError::new(error.to_string())) + } + + fn run_final_verification( + &self, + entry: &VerificationLedgerEntry, + ) -> Option { + self.verifier.as_ref().and_then(|verifier| { + verifier.final_verify(&crate::verifier::VerificationTarget { + adapter_id: entry.adapter_id.clone(), + project_root: entry.project_root.clone(), + touched_paths: entry.touched_paths.iter().cloned().collect(), + mutation_sequence: entry.last_mutation_sequence, + }) + }) + } + + fn make_final_gate_reminder(entry: &VerificationLedgerEntry) -> VerificationReport { + let status = entry + .last_final_status + .unwrap_or(VerificationStatus::Failed); + VerificationReport { + report_id: format!( + "vr-reminder-{}", + entry.project_root + .display() + .to_string() + .chars() + .map(|ch| match ch { + '\\' | '/' | ':' | ' ' => '-', + other => other, + }) + .collect::() + ), + phase: VerificationPhase::Final, + adapter_id: entry.adapter_id.clone(), + project_root: entry.project_root.clone(), + touched_paths: entry.touched_paths.iter().cloned().collect(), + status, + summary_text: format!( + "[verifier:final:{}] {} ({})\n[verifier] final verification is still failing for the current workspace state; make another edit before concluding", + entry.adapter_id, + status.as_str(), + entry.project_root.display() + ), + steps: Vec::new(), + } + } + #[allow(clippy::too_many_lines)] pub fn run_turn( &mut self, @@ -345,8 +460,13 @@ where let mut assistant_messages = Vec::new(); let mut tool_results = Vec::new(); + let mut verification_reports = Vec::new(); + let mut verification_gate = VerificationGateStatus::not_required(); let mut prompt_cache_events = Vec::new(); let mut iterations = 0; + let mut mutation_sequence = 0_u64; + let mut verification_ledger = + BTreeMap::::new(); loop { iterations += 1; @@ -403,7 +523,78 @@ where assistant_messages.push(assistant_message); if pending_tool_uses.is_empty() { - break; + let Some(verifier) = self.verifier.as_ref() else { + break; + }; + if !verifier.final_gate_enabled() { + break; + } + + let mut gate_reports = Vec::new(); + let pending_final_gate_keys = verification_ledger + .iter() + .filter_map(|(key, entry)| { + if entry.last_final_verified_sequence == Some(entry.last_mutation_sequence) + { + if entry + .last_final_status + .is_some_and(VerificationStatus::is_success) + { + None + } else { + Some((key.clone(), false)) + } + } else { + Some((key.clone(), true)) + } + }) + .collect::>(); + + if pending_final_gate_keys.is_empty() { + break; + } + + verification_gate.attempted = true; + verification_gate.passed = false; + + for (key, should_run) in pending_final_gate_keys { + let Some(entry) = verification_ledger.get(&key).cloned() else { + continue; + }; + let report = if should_run { + let Some(report) = self.run_final_verification(&entry) else { + continue; + }; + if let Some(ledger_entry) = verification_ledger.get_mut(&key) { + ledger_entry.last_final_status = Some(report.status); + ledger_entry.last_final_verified_sequence = + Some(ledger_entry.last_mutation_sequence); + } + report + } else { + Self::make_final_gate_reminder(&entry) + }; + self.record_verifier_ran( + iterations, + &format!("final_gate:{}", entry.adapter_id), + report.is_success(), + ); + verification_gate.report_ids.push(report.report_id.clone()); + self.persist_verification_report(&report)?; + gate_reports.push(report); + } + + if gate_reports.is_empty() { + break; + } + + let gate_passed = gate_reports.iter().all(VerificationReport::is_success); + verification_gate.passed = gate_passed; + verification_reports.extend(gate_reports); + if gate_passed { + break; + } + continue; } for (tool_use_id, tool_name, input) in pending_tool_uses { @@ -453,6 +644,7 @@ where ) }; + let mut pending_verification_reports = Vec::new(); let result_message = match permission_outcome { PermissionOutcome::Allow => { self.record_tool_started(iterations, &tool_name); @@ -491,16 +683,46 @@ where || post_hook_result.is_cancelled(), ); - if !is_error { - if let Some(verifier) = self.verifier.as_ref() { - if let Some(result) = verifier.verify(&tool_name, &effective_input) - { - self.record_verifier_ran(iterations, &tool_name, result.passed); - output = prepend_verifier_summary(&result.summary, output); - if !result.passed { - is_error = true; - } + if !is_error && is_write_tool(&tool_name) { + mutation_sequence += 1; + let reports = self.run_quick_verification( + &tool_name, + &effective_input, + mutation_sequence, + ); + for report in reports { + self.record_verifier_ran( + iterations, + &tool_name, + report.is_success(), + ); + output = prepend_verifier_summary(&report.short_summary(), output); + if !report.is_success() { + is_error = true; } + let key = VerificationLedgerKey { + adapter_id: report.adapter_id.clone(), + project_root: report.project_root.clone(), + }; + verification_ledger + .entry(key) + .and_modify(|entry| { + entry.update_from_report(&report, mutation_sequence); + }) + .or_insert_with(|| VerificationLedgerEntry { + adapter_id: report.adapter_id.clone(), + project_root: report.project_root.clone(), + touched_paths: report + .touched_paths + .iter() + .cloned() + .collect(), + last_mutation_sequence: mutation_sequence, + last_quick_status: Some(report.status), + last_final_status: None, + last_final_verified_sequence: None, + }); + pending_verification_reports.push(report); } } @@ -518,6 +740,10 @@ where .map_err(|error| RuntimeError::new(error.to_string()))?; self.record_tool_finished(iterations, &result_message); tool_results.push(result_message); + for report in pending_verification_reports { + self.persist_verification_report(&report)?; + verification_reports.push(report); + } } } @@ -526,6 +752,8 @@ where let summary = TurnSummary { assistant_messages, tool_results, + verification_reports, + verification_gate, prompt_cache_events, iterations, usage: self.usage_tracker.cumulative_usage(), @@ -688,6 +916,18 @@ where "tool_results".to_string(), Value::from(summary.tool_results.len() as u64), ); + attributes.insert( + "verification_reports".to_string(), + Value::from(summary.verification_reports.len() as u64), + ); + attributes.insert( + "verification_gate_attempted".to_string(), + Value::Bool(summary.verification_gate.attempted), + ); + attributes.insert( + "verification_gate_passed".to_string(), + Value::Bool(summary.verification_gate.passed), + ); attributes.insert( "prompt_cache_events".to_string(), Value::from(summary.prompt_cache_events.len() as u64), @@ -823,6 +1063,10 @@ fn merge_hook_feedback(messages: &[String], output: String, is_error: bool) -> S sections.join("\n\n") } +fn is_write_tool(tool_name: &str) -> bool { + matches!(tool_name, "edit_file" | "write_file" | "Edit" | "Write") +} + type ToolHandler = Box Result>; /// Simple in-memory tool executor for tests and lightweight integrations. @@ -872,7 +1116,10 @@ mod tests { use crate::prompt::{ProjectContext, SystemPromptBuilder}; use crate::session::{ContentBlock, MessageRole, Session}; use crate::usage::TokenUsage; - use crate::verifier::{VerificationResult, Verifier}; + use crate::verifier::{ + VerificationContext, VerificationPhase, VerificationReport, VerificationStatus, + VerificationTarget, Verifier, + }; use crate::ToolError; use std::fs; use std::path::PathBuf; @@ -1532,7 +1779,71 @@ mod tests { #[cfg(windows)] fn shell_snippet(script: &str) -> String { - script.replace('\'', "\"") + fn powershell_literal(value: &str) -> String { + format!("'{}'", value.replace('\'', "''")) + } + + fn powershell_snippet(script: &str) -> String { + format!( + "powershell -NoProfile -EncodedCommand {}", + encode_powershell(script) + ) + } + + fn encode_powershell(script: &str) -> String { + let bytes: Vec = script.encode_utf16().flat_map(u16::to_le_bytes).collect(); + encode_base64(&bytes) + } + + fn encode_base64(bytes: &[u8]) -> String { + const TABLE: &[u8; 64] = + b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + let mut encoded = String::with_capacity(bytes.len().div_ceil(3) * 4); + + for chunk in bytes.chunks(3) { + let b0 = chunk[0]; + let b1 = *chunk.get(1).unwrap_or(&0); + let b2 = *chunk.get(2).unwrap_or(&0); + let n = (u32::from(b0) << 16) | (u32::from(b1) << 8) | u32::from(b2); + + encoded.push(TABLE[((n >> 18) & 0x3F) as usize] as char); + encoded.push(TABLE[((n >> 12) & 0x3F) as usize] as char); + encoded.push(if chunk.len() > 1 { + TABLE[((n >> 6) & 0x3F) as usize] as char + } else { + '=' + }); + encoded.push(if chunk.len() > 2 { + TABLE[(n & 0x3F) as usize] as char + } else { + '=' + }); + } + + encoded + } + + if let Some((text, exit_code)) = script + .strip_prefix("printf '") + .and_then(|rest| rest.split_once("'; exit ")) + { + return powershell_snippet(&format!( + "[Console]::Out.Write({}); exit {exit_code}", + powershell_literal(text) + )); + } + + if let Some(text) = script + .strip_prefix("printf '") + .and_then(|rest| rest.strip_suffix('\'')) + { + return powershell_snippet(&format!( + "[Console]::Out.Write({})", + powershell_literal(text) + )); + } + + panic!("unsupported windows conversation test snippet: {script}"); } #[cfg(not(windows))] @@ -1878,13 +2189,18 @@ mod tests { struct FailingVerifier; impl Verifier for FailingVerifier { - fn verify(&self, tool_name: &str, _tool_input: &str) -> Option { - assert_eq!(tool_name, "edit_file"); - Some(VerificationResult { - passed: false, - summary: "[verifier] cargo check: FAIL\nerror[E0308]: mismatched types" - .to_string(), - }) + fn quick_verify(&self, context: &VerificationContext) -> Vec { + assert_eq!(context.tool_name, "edit_file"); + vec![test_verification_report( + VerificationPhase::Quick, + VerificationStatus::Failed, + &context.touched_paths, + "[verifier:quick:rust-cargo] failed (/workspace)\n[verifier] cargo check: FAIL\nerror[E0308]: mismatched types", + )] + } + + fn final_verify(&self, _target: &VerificationTarget) -> Option { + None } } @@ -1910,13 +2226,25 @@ mod tests { }; assert!(*is_error, "verifier failure should flip is_error to true"); assert!( - output.contains("[verifier]") && output.contains("mismatched types"), - "verifier summary must be surfaced to the model: {output:?}" + output.contains("[verifier:quick:rust-cargo] failed"), + "verifier short summary must be surfaced to the model: {output:?}" ); assert!( output.contains("edited"), "original tool output must be preserved: {output:?}" ); + assert_eq!(summary.verification_reports.len(), 1); + assert!( + summary.verification_reports[0] + .summary_text + .contains("mismatched types"), + "full verifier report should retain diagnostics" + ); + assert!(runtime + .session() + .messages + .iter() + .any(|message| message.role == MessageRole::Verification)); } #[test] @@ -1950,11 +2278,17 @@ mod tests { struct PassingVerifier; impl Verifier for PassingVerifier { - fn verify(&self, _tool_name: &str, _tool_input: &str) -> Option { - Some(VerificationResult { - passed: true, - summary: "[verifier] cargo check: ok".to_string(), - }) + fn quick_verify(&self, context: &VerificationContext) -> Vec { + vec![test_verification_report( + VerificationPhase::Quick, + VerificationStatus::Passed, + &context.touched_paths, + "[verifier:quick:rust-cargo] passed (/workspace)\n[verifier] cargo check: ok", + )] + } + + fn final_verify(&self, _target: &VerificationTarget) -> Option { + None } } @@ -1978,7 +2312,9 @@ mod tests { panic!("expected tool result"); }; assert!(!*is_error, "passing verifier must not flip is_error"); - assert!(output.contains("cargo check: ok")); + assert!(output.contains("[verifier:quick:rust-cargo] passed")); + assert_eq!(summary.verification_reports.len(), 1); + assert!(summary.verification_reports[0].is_success()); } #[test] @@ -2014,12 +2350,18 @@ mod tests { use std::sync::Arc; struct CountingVerifier(Arc); impl Verifier for CountingVerifier { - fn verify(&self, _tool_name: &str, _tool_input: &str) -> Option { + fn quick_verify(&self, _context: &VerificationContext) -> Vec { self.0.fetch_add(1, Ordering::SeqCst); - Some(VerificationResult { - passed: true, - summary: String::new(), - }) + vec![test_verification_report( + VerificationPhase::Quick, + VerificationStatus::Passed, + &[PathBuf::from("src/lib.rs")], + "[verifier:quick:rust-cargo] passed (/workspace)", + )] + } + + fn final_verify(&self, _target: &VerificationTarget) -> Option { + None } } @@ -2044,4 +2386,251 @@ mod tests { "verifier must be skipped when tool itself errored" ); } + + #[allow(clippy::too_many_lines)] + #[test] + fn staged_final_gate_blocks_completion_until_validation_passes() { + struct StagedApi { + calls: usize, + } + + impl ApiClient for StagedApi { + fn stream( + &mut self, + _request: ApiRequest, + ) -> Result, RuntimeError> { + self.calls += 1; + match self.calls { + 1 => Ok(vec![ + AssistantEvent::ToolUse { + id: "tool-1".to_string(), + name: "edit_file".to_string(), + input: r#"{"file_path":"src/lib.rs"}"#.to_string(), + }, + AssistantEvent::MessageStop, + ]), + 2 => Ok(vec![ + AssistantEvent::TextDelta("done".to_string()), + AssistantEvent::MessageStop, + ]), + 3 => Ok(vec![ + AssistantEvent::ToolUse { + id: "tool-2".to_string(), + name: "edit_file".to_string(), + input: r#"{"file_path":"src/lib.rs"}"#.to_string(), + }, + AssistantEvent::MessageStop, + ]), + 4 => Ok(vec![ + AssistantEvent::TextDelta("done for real".to_string()), + AssistantEvent::MessageStop, + ]), + _ => unreachable!("extra API call"), + } + } + } + + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Arc; + + struct StagedVerifier { + final_calls: Arc, + } + + impl Verifier for StagedVerifier { + fn quick_verify(&self, context: &VerificationContext) -> Vec { + vec![test_verification_report( + VerificationPhase::Quick, + VerificationStatus::Passed, + &context.touched_paths, + "[verifier:quick:rust-cargo] passed (/workspace)\n[verifier] cargo check: ok", + )] + } + + fn final_verify(&self, target: &VerificationTarget) -> Option { + let call = self.final_calls.fetch_add(1, Ordering::SeqCst); + let status = if call == 0 { + VerificationStatus::Failed + } else { + VerificationStatus::Passed + }; + Some(test_verification_report( + VerificationPhase::Final, + status, + &target.touched_paths, + if status == VerificationStatus::Failed { + "[verifier:final:rust-cargo] failed (/workspace)\n[verifier] cargo test: FAIL" + } else { + "[verifier:final:rust-cargo] passed (/workspace)\n[verifier] cargo test: ok" + }, + )) + } + + fn final_gate_enabled(&self) -> bool { + true + } + } + + let final_calls = Arc::new(AtomicUsize::new(0)); + let mut runtime = ConversationRuntime::new( + Session::new(), + StagedApi { calls: 0 }, + StaticToolExecutor::new().register("edit_file", |_| Ok("edited".to_string())), + PermissionPolicy::new(PermissionMode::DangerFullAccess), + vec!["system".to_string()], + ) + .with_verifier(Box::new(StagedVerifier { + final_calls: final_calls.clone(), + })); + + let summary = runtime + .run_turn("fix and verify", None) + .expect("turn should complete"); + + assert_eq!(summary.iterations, 4); + assert_eq!(summary.tool_results.len(), 2); + assert!(summary.verification_gate.attempted); + assert!(summary.verification_gate.passed); + assert_eq!(final_calls.load(Ordering::SeqCst), 2); + assert!(summary + .verification_reports + .iter() + .any(|report| report.phase == VerificationPhase::Final + && report.status == VerificationStatus::Failed)); + assert!(summary + .assistant_messages + .last() + .is_some_and(|message| message.blocks.iter().any( + |block| matches!(block, ContentBlock::Text { text } if text == "done for real") + ))); + } + + #[test] + fn staged_final_gate_dedupes_retries_without_new_mutation() { + struct ReminderApi { + calls: usize, + } + + impl ApiClient for ReminderApi { + fn stream( + &mut self, + _request: ApiRequest, + ) -> Result, RuntimeError> { + self.calls += 1; + match self.calls { + 1 => Ok(vec![ + AssistantEvent::ToolUse { + id: "tool-1".to_string(), + name: "edit_file".to_string(), + input: r#"{"file_path":"src/lib.rs"}"#.to_string(), + }, + AssistantEvent::MessageStop, + ]), + 2 | 3 => Ok(vec![ + AssistantEvent::TextDelta("done".to_string()), + AssistantEvent::MessageStop, + ]), + 4 => Ok(vec![ + AssistantEvent::ToolUse { + id: "tool-2".to_string(), + name: "edit_file".to_string(), + input: r#"{"file_path":"src/lib.rs"}"#.to_string(), + }, + AssistantEvent::MessageStop, + ]), + 5 => Ok(vec![ + AssistantEvent::TextDelta("done now".to_string()), + AssistantEvent::MessageStop, + ]), + _ => unreachable!("extra API call"), + } + } + } + + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::Arc; + + struct ReminderVerifier { + final_calls: Arc, + } + + impl Verifier for ReminderVerifier { + fn quick_verify(&self, context: &VerificationContext) -> Vec { + vec![test_verification_report( + VerificationPhase::Quick, + VerificationStatus::Passed, + &context.touched_paths, + "[verifier:quick:rust-cargo] passed (/workspace)\n[verifier] cargo check: ok", + )] + } + + fn final_verify(&self, target: &VerificationTarget) -> Option { + let call = self.final_calls.fetch_add(1, Ordering::SeqCst); + let status = if call == 0 { + VerificationStatus::Failed + } else { + VerificationStatus::Passed + }; + Some(test_verification_report( + VerificationPhase::Final, + status, + &target.touched_paths, + if status == VerificationStatus::Failed { + "[verifier:final:rust-cargo] failed (/workspace)\n[verifier] cargo clippy: FAIL" + } else { + "[verifier:final:rust-cargo] passed (/workspace)\n[verifier] cargo clippy: ok" + }, + )) + } + + fn final_gate_enabled(&self) -> bool { + true + } + } + + let final_calls = Arc::new(AtomicUsize::new(0)); + let mut runtime = ConversationRuntime::new( + Session::new(), + ReminderApi { calls: 0 }, + StaticToolExecutor::new().register("edit_file", |_| Ok("edited".to_string())), + PermissionPolicy::new(PermissionMode::DangerFullAccess), + vec!["system".to_string()], + ) + .with_verifier(Box::new(ReminderVerifier { + final_calls: final_calls.clone(), + })); + + let summary = runtime + .run_turn("fix and verify", None) + .expect("turn should complete"); + + assert_eq!(summary.iterations, 5); + assert_eq!(final_calls.load(Ordering::SeqCst), 2); + assert!(summary + .verification_reports + .iter() + .any(|report| report.phase == VerificationPhase::Final + && report.steps.is_empty() + && report.summary_text.contains("still failing"))); + assert!(summary.verification_gate.attempted); + assert!(summary.verification_gate.passed); + } + + fn test_verification_report( + phase: VerificationPhase, + status: VerificationStatus, + touched_paths: &[PathBuf], + summary_text: &str, + ) -> VerificationReport { + VerificationReport { + report_id: format!("test-{}-{}", phase.as_str(), status.as_str()), + phase, + adapter_id: "rust-cargo".to_string(), + project_root: PathBuf::from("/workspace"), + touched_paths: touched_paths.to_vec(), + status, + summary_text: summary_text.to_string(), + steps: Vec::new(), + } + } } diff --git a/rust/crates/runtime/src/file_ops.rs b/rust/crates/runtime/src/file_ops.rs index db51215ee3..f97a04c498 100644 --- a/rust/crates/runtime/src/file_ops.rs +++ b/rust/crates/runtime/src/file_ops.rs @@ -742,9 +742,9 @@ mod tests { let outside = temp_path("symlink-target.txt"); std::fs::write(&outside, "target content").expect("target should write"); - let link_path = workspace.join("escape-link.txt"); #[cfg(unix)] { + let link_path = workspace.join("escape-link.txt"); std::os::unix::fs::symlink(&outside, &link_path).expect("symlink should create"); assert!(is_symlink_escape(&link_path, &workspace).expect("check should succeed")); } diff --git a/rust/crates/runtime/src/hooks.rs b/rust/crates/runtime/src/hooks.rs index 6abd69fbbd..70a7970623 100644 --- a/rust/crates/runtime/src/hooks.rs +++ b/rust/crates/runtime/src/hooks.rs @@ -737,7 +737,7 @@ fn format_hook_failure(command: &str, code: i32, stdout: Option<&str>, stderr: & fn shell_command(command: &str) -> CommandWithStdin { #[cfg(windows)] - let mut command_builder = { + let command_builder = { let mut command_builder = Command::new("cmd"); command_builder.arg("/C").arg(command); CommandWithStdin::new(command_builder) @@ -957,11 +957,10 @@ mod tests { #[test] fn executes_hooks_in_configured_order() { // given + let first_command = shell_snippet("printf 'first'"); + let second_command = shell_snippet("printf 'second'"); let runner = HookRunner::new(RuntimeHookConfig::new( - vec![ - shell_snippet("printf 'first'"), - shell_snippet("printf 'second'"), - ], + vec![first_command.clone(), second_command.clone()], Vec::new(), Vec::new(), )); @@ -987,7 +986,7 @@ mod tests { event: HookEvent::PreToolUse, command, .. - } if command == "printf 'first'" + } if command == &first_command )); assert!(matches!( &reporter.events[1], @@ -995,7 +994,7 @@ mod tests { event: HookEvent::PreToolUse, command, .. - } if command == "printf 'first'" + } if command == &first_command )); assert!(matches!( &reporter.events[2], @@ -1003,7 +1002,7 @@ mod tests { event: HookEvent::PreToolUse, command, .. - } if command == "printf 'second'" + } if command == &second_command )); assert!(matches!( &reporter.events[3], @@ -1011,7 +1010,7 @@ mod tests { event: HookEvent::PreToolUse, command, .. - } if command == "printf 'second'" + } if command == &second_command )); } @@ -1041,10 +1040,10 @@ mod tests { #[test] fn malformed_nonempty_hook_output_reports_explicit_diagnostic_with_previews() { + let command = + shell_snippet("printf '{not-json\nsecond line'; printf 'stderr warning' >&2; exit 1"); let runner = HookRunner::new(RuntimeHookConfig::new( - vec![shell_snippet( - "printf '{not-json\nsecond line'; printf 'stderr warning' >&2; exit 1", - )], + vec![command.clone()], Vec::new(), Vec::new(), )); @@ -1056,8 +1055,11 @@ mod tests { assert!(rendered.contains("hook_invalid_json:")); assert!(rendered.contains("phase=PreToolUse")); assert!(rendered.contains("tool=Edit")); - assert!(rendered.contains("command=printf '{not-json")); - assert!(rendered.contains("printf 'stderr warning' >&2; exit 1")); + assert!(rendered.contains("command=")); + assert!(rendered.contains("stderr warning")); + assert!(rendered.contains( + &super::bounded_hook_preview(&command).unwrap_or_else(|| "".to_string()) + )); assert!(rendered.contains("detail=key must be a string")); assert!(rendered.contains("stdout_preview={not-json")); assert!(rendered.contains("second line stderr_preview=stderr warning")); @@ -1106,7 +1108,87 @@ mod tests { #[cfg(windows)] fn shell_snippet(script: &str) -> String { - script.replace('\'', "\"") + fn powershell_literal(value: &str) -> String { + format!("'{}'", value.replace('\'', "''")) + } + + fn powershell_snippet(script: &str) -> String { + format!( + "powershell -NoProfile -EncodedCommand {}", + encode_powershell(script) + ) + } + + fn encode_powershell(script: &str) -> String { + let bytes: Vec = script.encode_utf16().flat_map(u16::to_le_bytes).collect(); + encode_base64(&bytes) + } + + fn encode_base64(bytes: &[u8]) -> String { + const TABLE: &[u8; 64] = + b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + let mut encoded = String::with_capacity(bytes.len().div_ceil(3) * 4); + + for chunk in bytes.chunks(3) { + let b0 = chunk[0]; + let b1 = *chunk.get(1).unwrap_or(&0); + let b2 = *chunk.get(2).unwrap_or(&0); + let n = (u32::from(b0) << 16) | (u32::from(b1) << 8) | u32::from(b2); + + encoded.push(TABLE[((n >> 18) & 0x3F) as usize] as char); + encoded.push(TABLE[((n >> 12) & 0x3F) as usize] as char); + encoded.push(if chunk.len() > 1 { + TABLE[((n >> 6) & 0x3F) as usize] as char + } else { + '=' + }); + encoded.push(if chunk.len() > 2 { + TABLE[(n & 0x3F) as usize] as char + } else { + '=' + }); + } + + encoded + } + + if let Some(seconds) = script.strip_prefix("sleep ") { + return powershell_snippet(&format!("Start-Sleep -Seconds {seconds}")); + } + + if script == "printf '{not-json\nsecond line'; printf 'stderr warning' >&2; exit 1" { + return powershell_snippet( + "[Console]::Out.Write('{not-json' + [Environment]::NewLine + 'second line'); \ +[Console]::Error.Write('stderr warning'); exit 1", + ); + } + + if let Some(rest) = script.strip_prefix("printf '%s' '") { + if let Some(text) = rest.strip_suffix('\'') { + return powershell_snippet(&format!( + "[Console]::Out.Write({})", + powershell_literal(text) + )); + } + } + + if let Some(rest) = script.strip_prefix("printf '") { + if let Some((text, exit_code)) = rest.split_once("'; exit ") { + return powershell_snippet(&format!( + "[Console]::Out.Write({}); exit {exit_code}", + powershell_literal(text) + )); + } + + if let Some(text) = rest.strip_suffix('\'') { + return powershell_snippet(&format!( + "[Console]::Out.Write({})", + powershell_literal(text) + )); + } + } + + panic!("unsupported windows hook test snippet: {script}"); } #[cfg(not(windows))] diff --git a/rust/crates/runtime/src/lib.rs b/rust/crates/runtime/src/lib.rs index ec1ac4981a..5f11fb5641 100644 --- a/rust/crates/runtime/src/lib.rs +++ b/rust/crates/runtime/src/lib.rs @@ -63,7 +63,7 @@ pub use config::{ McpServerConfig, McpStdioServerConfig, McpTransport, McpWebSocketServerConfig, OAuthConfig, ProviderFallbackConfig, ResolvedPermissionMode, RuntimeConfig, RuntimeFeatureConfig, RuntimeHookConfig, RuntimePermissionRuleConfig, RuntimePluginConfig, RuntimeVerifierConfig, - ScopedMcpServerConfig, CLAW_SETTINGS_SCHEMA_NAME, + RuntimeVerifierMode, ScopedMcpServerConfig, CLAW_SETTINGS_SCHEMA_NAME, }; pub use config_validate::{ check_unsupported_format, format_diagnostics, validate_config_file, ConfigDiagnostic, @@ -169,7 +169,9 @@ pub use usage::{ format_usd, pricing_for_model, ModelPricing, TokenUsage, UsageCostEstimate, UsageTracker, }; pub use verifier::{ - prepend_verifier_summary, CargoVerifier, CargoVerifierConfig, VerificationResult, Verifier, + prepend_verifier_summary, CargoVerifier, CargoVerifierConfig, VerificationContext, + VerificationFailureKind, VerificationGateStatus, VerificationPhase, VerificationReport, + VerificationStatus, Verifier, }; pub use worker_boot::{ Worker, WorkerEvent, WorkerEventKind, WorkerEventPayload, WorkerFailure, WorkerFailureKind, diff --git a/rust/crates/runtime/src/mcp_stdio.rs b/rust/crates/runtime/src/mcp_stdio.rs index 5fbc31ba58..c2096b888f 100644 --- a/rust/crates/runtime/src/mcp_stdio.rs +++ b/rust/crates/runtime/src/mcp_stdio.rs @@ -19,12 +19,12 @@ use crate::mcp_lifecycle_hardened::{ }; #[cfg(test)] -const MCP_INITIALIZE_TIMEOUT_MS: u64 = 200; +const MCP_INITIALIZE_TIMEOUT_MS: u64 = 1_000; #[cfg(not(test))] const MCP_INITIALIZE_TIMEOUT_MS: u64 = 10_000; #[cfg(test)] -const MCP_LIST_TOOLS_TIMEOUT_MS: u64 = 300; +const MCP_LIST_TOOLS_TIMEOUT_MS: u64 = 1_000; #[cfg(not(test))] const MCP_LIST_TOOLS_TIMEOUT_MS: u64 = 30_000; @@ -1410,11 +1410,13 @@ mod tests { use std::collections::BTreeMap; use std::fs; use std::io::ErrorKind; - use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{SystemTime, UNIX_EPOCH}; + #[cfg(unix)] + use std::os::unix::fs::PermissionsExt; + use serde_json::json; use tokio::runtime::Builder; @@ -1443,6 +1445,34 @@ mod tests { std::env::temp_dir().join(format!("runtime-mcp-stdio-{nanos}-{unique_id}")) } + #[cfg(unix)] + fn make_executable(path: &Path) { + let mut permissions = fs::metadata(path).expect("metadata").permissions(); + permissions.set_mode(0o755); + fs::set_permissions(path, permissions).expect("chmod"); + } + + #[cfg(not(unix))] + fn make_executable(path: &Path) { + let _ = path; + } + + fn bash_command() -> &'static str { + if cfg!(windows) { + "bash" + } else { + "/bin/sh" + } + } + + fn python_command() -> &'static str { + if cfg!(windows) { + "python" + } else { + "python3" + } + } + fn write_echo_script() -> PathBuf { let root = temp_dir(); fs::create_dir_all(&root).expect("temp dir"); @@ -1452,9 +1482,7 @@ mod tests { "#!/bin/sh\nprintf 'READY:%s\\n' \"$MCP_TEST_TOKEN\"\nIFS= read -r line\nprintf 'ECHO:%s\\n' \"$line\"\n", ) .expect("write script"); - let mut permissions = fs::metadata(&script_path).expect("metadata").permissions(); - permissions.set_mode(0o755); - fs::set_permissions(&script_path, permissions).expect("chmod"); + make_executable(&script_path); script_path } @@ -1498,9 +1526,7 @@ mod tests { ] .join("\n"); fs::write(&script_path, script).expect("write script"); - let mut permissions = fs::metadata(&script_path).expect("metadata").permissions(); - permissions.set_mode(0o755); - fs::set_permissions(&script_path, permissions).expect("chmod"); + make_executable(&script_path); script_path } @@ -1632,9 +1658,7 @@ mod tests { ] .join("\n"); fs::write(&script_path, script).expect("write script"); - let mut permissions = fs::metadata(&script_path).expect("metadata").permissions(); - permissions.set_mode(0o755); - fs::set_permissions(&script_path, permissions).expect("chmod"); + make_executable(&script_path); script_path } @@ -1757,9 +1781,7 @@ mod tests { ] .join("\n"); fs::write(&script_path, script).expect("write script"); - let mut permissions = fs::metadata(&script_path).expect("metadata").permissions(); - permissions.set_mode(0o755); - fs::set_permissions(&script_path, permissions).expect("chmod"); + make_executable(&script_path); script_path } @@ -1767,7 +1789,7 @@ mod tests { let config = ScopedMcpServerConfig { scope: ConfigSource::Local, config: McpServerConfig::Stdio(McpStdioServerConfig { - command: "/bin/sh".to_string(), + command: bash_command().to_string(), args: vec![script_path.to_string_lossy().into_owned()], env: BTreeMap::from([("MCP_TEST_TOKEN".to_string(), "secret-value".to_string())]), tool_call_timeout_ms: None, @@ -1785,7 +1807,7 @@ mod tests { env: BTreeMap, ) -> crate::mcp_client::McpStdioTransport { crate::mcp_client::McpStdioTransport { - command: "python3".to_string(), + command: python_command().to_string(), args: vec![script_path.to_string_lossy().into_owned()], env, tool_call_timeout_ms: None, @@ -1834,7 +1856,7 @@ mod tests { ScopedMcpServerConfig { scope: ConfigSource::Local, config: McpServerConfig::Stdio(McpStdioServerConfig { - command: "python3".to_string(), + command: python_command().to_string(), args: vec![script_path.to_string_lossy().into_owned()], env, tool_call_timeout_ms: None, @@ -2053,7 +2075,7 @@ mod tests { runtime.block_on(async { let script_path = write_echo_script(); let transport = crate::mcp_client::McpStdioTransport { - command: "/bin/sh".to_string(), + command: bash_command().to_string(), args: vec![script_path.to_string_lossy().into_owned()], env: BTreeMap::from([("MCP_TEST_TOKEN".to_string(), "direct-secret".to_string())]), tool_call_timeout_ms: None, @@ -2312,7 +2334,7 @@ mod tests { ScopedMcpServerConfig { scope: ConfigSource::Local, config: McpServerConfig::Stdio(McpStdioServerConfig { - command: "python3".to_string(), + command: python_command().to_string(), args: vec![script_path.to_string_lossy().into_owned()], env: BTreeMap::from([( "MCP_TOOL_CALL_DELAY_MS".to_string(), @@ -2365,7 +2387,7 @@ mod tests { ScopedMcpServerConfig { scope: ConfigSource::Local, config: McpServerConfig::Stdio(McpStdioServerConfig { - command: "python3".to_string(), + command: python_command().to_string(), args: vec![script_path.to_string_lossy().into_owned()], env: BTreeMap::from([( "MCP_INVALID_TOOL_CALL_RESPONSE".to_string(), @@ -2676,9 +2698,7 @@ mod tests { ] .join("\n"); fs::write(&script_path, script).expect("write script"); - let mut permissions = fs::metadata(&script_path).expect("metadata").permissions(); - permissions.set_mode(0o755); - fs::set_permissions(&script_path, permissions).expect("chmod"); + make_executable(&script_path); script_path } @@ -2703,8 +2723,8 @@ mod tests { ScopedMcpServerConfig { scope: ConfigSource::Local, config: McpServerConfig::Stdio(McpStdioServerConfig { - command: broken_script_path.display().to_string(), - args: Vec::new(), + command: python_command().to_string(), + args: vec![broken_script_path.display().to_string()], env: BTreeMap::new(), tool_call_timeout_ms: None, }), diff --git a/rust/crates/runtime/src/mcp_tool_bridge.rs b/rust/crates/runtime/src/mcp_tool_bridge.rs index af637a98d1..72959b329a 100644 --- a/rust/crates/runtime/src/mcp_tool_bridge.rs +++ b/rust/crates/runtime/src/mcp_tool_bridge.rs @@ -314,11 +314,13 @@ impl McpToolRegistry { mod tests { use std::collections::BTreeMap; use std::fs; - use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{SystemTime, UNIX_EPOCH}; + #[cfg(unix)] + use std::os::unix::fs::PermissionsExt; + use super::*; use crate::config::{ ConfigSource, McpServerConfig, McpStdioServerConfig, ScopedMcpServerConfig, @@ -334,6 +336,26 @@ mod tests { std::env::temp_dir().join(format!("runtime-mcp-tool-bridge-{nanos}-{unique_id}")) } + #[cfg(unix)] + fn make_executable(path: &Path) { + let mut permissions = fs::metadata(path).expect("metadata").permissions(); + permissions.set_mode(0o755); + fs::set_permissions(path, permissions).expect("chmod"); + } + + #[cfg(not(unix))] + fn make_executable(path: &Path) { + let _ = path; + } + + fn python_command() -> &'static str { + if cfg!(windows) { + "python" + } else { + "python3" + } + } + fn cleanup_script(script_path: &Path) { if let Some(root) = script_path.parent() { let _ = fs::remove_dir_all(root); @@ -430,9 +452,7 @@ mod tests { ] .join("\n"); fs::write(&script_path, script).expect("write script"); - let mut permissions = fs::metadata(&script_path).expect("metadata").permissions(); - permissions.set_mode(0o755); - fs::set_permissions(&script_path, permissions).expect("chmod"); + make_executable(&script_path); script_path } @@ -444,7 +464,7 @@ mod tests { ScopedMcpServerConfig { scope: ConfigSource::Local, config: McpServerConfig::Stdio(McpStdioServerConfig { - command: "python3".to_string(), + command: python_command().to_string(), args: vec![script_path.to_string_lossy().into_owned()], env: BTreeMap::from([ ("MCP_SERVER_LABEL".to_string(), server_name.to_string()), diff --git a/rust/crates/runtime/src/oauth.rs b/rust/crates/runtime/src/oauth.rs index aa3ca158c7..8ef1a2210c 100644 --- a/rust/crates/runtime/src/oauth.rs +++ b/rust/crates/runtime/src/oauth.rs @@ -1,6 +1,6 @@ use std::collections::BTreeMap; -use std::fs::{self, File}; -use std::io::{self, Read}; +use std::fs; +use std::io; use std::path::PathBuf; use serde::{Deserialize, Serialize}; @@ -326,7 +326,8 @@ pub fn parse_oauth_callback_query(query: &str) -> Result io::Result { let mut buffer = vec![0_u8; bytes]; - File::open("/dev/urandom")?.read_exact(&mut buffer)?; + getrandom::fill(&mut buffer) + .map_err(|error| io::Error::other(format!("failed to gather random bytes: {error}")))?; Ok(base64url_encode(&buffer)) } diff --git a/rust/crates/runtime/src/session.rs b/rust/crates/runtime/src/session.rs index b97378e582..7bf7bdb794 100644 --- a/rust/crates/runtime/src/session.rs +++ b/rust/crates/runtime/src/session.rs @@ -8,6 +8,7 @@ use std::time::{SystemTime, UNIX_EPOCH}; use crate::json::{JsonError, JsonValue}; use crate::usage::TokenUsage; +use crate::verifier::{VerificationPhase, VerificationReport, VerificationStatus}; const SESSION_VERSION: u32 = 1; const ROTATE_AFTER_BYTES: u64 = 256 * 1024; @@ -22,6 +23,7 @@ pub enum MessageRole { User, Assistant, Tool, + Verification, } /// Structured message content stored inside a [`Session`]. @@ -41,6 +43,12 @@ pub enum ContentBlock { output: String, is_error: bool, }, + VerificationReport { + report_id: String, + phase: VerificationPhase, + status: VerificationStatus, + summary_text: String, + }, } /// One conversation message with optional token-usage metadata. @@ -668,6 +676,20 @@ impl ConversationMessage { } } + #[must_use] + pub fn verification_report(report: &VerificationReport) -> Self { + Self { + role: MessageRole::Verification, + blocks: vec![ContentBlock::VerificationReport { + report_id: report.report_id.clone(), + phase: report.phase, + status: report.status, + summary_text: report.summary_text.clone(), + }], + usage: None, + } + } + #[must_use] pub fn to_json(&self) -> JsonValue { let mut object = BTreeMap::new(); @@ -679,6 +701,7 @@ impl ConversationMessage { MessageRole::User => "user", MessageRole::Assistant => "assistant", MessageRole::Tool => "tool", + MessageRole::Verification => "verification", } .to_string(), ), @@ -706,6 +729,7 @@ impl ConversationMessage { "user" => MessageRole::User, "assistant" => MessageRole::Assistant, "tool" => MessageRole::Tool, + "verification" => MessageRole::Verification, other => { return Err(SessionError::Format(format!( "unsupported message role: {other}" @@ -767,6 +791,33 @@ impl ContentBlock { object.insert("output".to_string(), JsonValue::String(output.clone())); object.insert("is_error".to_string(), JsonValue::Bool(*is_error)); } + Self::VerificationReport { + report_id, + phase, + status, + summary_text, + } => { + object.insert( + "type".to_string(), + JsonValue::String("verification_report".to_string()), + ); + object.insert( + "report_id".to_string(), + JsonValue::String(report_id.clone()), + ); + object.insert( + "phase".to_string(), + JsonValue::String(phase.as_str().to_string()), + ); + object.insert( + "status".to_string(), + JsonValue::String(status.as_str().to_string()), + ); + object.insert( + "summary_text".to_string(), + JsonValue::String(summary_text.clone()), + ); + } } JsonValue::Object(object) } @@ -797,6 +848,12 @@ impl ContentBlock { .and_then(JsonValue::as_bool) .ok_or_else(|| SessionError::Format("missing is_error".to_string()))?, }), + "verification_report" => Ok(Self::VerificationReport { + report_id: required_string(object, "report_id")?, + phase: parse_verification_phase(&required_string(object, "phase")?)?, + status: parse_verification_status(&required_string(object, "status")?)?, + summary_text: required_string(object, "summary_text")?, + }), other => Err(SessionError::Format(format!( "unsupported block type: {other}" ))), @@ -970,6 +1027,28 @@ fn required_string( .ok_or_else(|| SessionError::Format(format!("missing {key}"))) } +fn parse_verification_phase(value: &str) -> Result { + match value { + "quick" => Ok(VerificationPhase::Quick), + "final" => Ok(VerificationPhase::Final), + other => Err(SessionError::Format(format!( + "unsupported verification phase: {other}" + ))), + } +} + +fn parse_verification_status(value: &str) -> Result { + match value { + "passed" => Ok(VerificationStatus::Passed), + "failed" => Ok(VerificationStatus::Failed), + "skipped" => Ok(VerificationStatus::Skipped), + "unavailable" => Ok(VerificationStatus::Unavailable), + other => Err(SessionError::Format(format!( + "unsupported verification status: {other}" + ))), + } +} + fn required_u32(object: &BTreeMap, key: &str) -> Result { let value = object .get(key) diff --git a/rust/crates/runtime/src/verifier.rs b/rust/crates/runtime/src/verifier.rs index 563f19cba3..f054bdebc1 100644 --- a/rust/crates/runtime/src/verifier.rs +++ b/rust/crates/runtime/src/verifier.rs @@ -1,165 +1,1529 @@ -//! Self-verification of code edits. +//! Structured self-verification of code edits. //! -//! After the model runs a writing tool (`edit_file` / `write_file`) on a Rust -//! source file, a [`Verifier`] is given the chance to run additional checks -//! (`cargo check`, `cargo clippy`, `cargo fmt --check`, `cargo test`) against -//! the affected crate and feed the result back into the tool output. The -//! assistant then sees any compilation, lint, formatting, or test failures on -//! the very next iteration and can correct them without the user having to -//! intervene. +//! The verifier consumes successful write/edit tool invocations, detects the +//! owning project for the touched file, and runs phase-aware validation. The +//! runtime uses the resulting structured reports both as model-visible +//! feedback and as a final gate before ending a turn in staged mode. use std::fmt::Write as _; +use std::fs; use std::path::{Path, PathBuf}; use std::process::Command; -use std::time::Duration; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{Duration, Instant}; use serde_json::Value; +use toml::Value as TomlValue; -/// Output of a single verification run injected back into the tool result. +/// Maximum bytes kept per verification step before the summary is truncated. +const DEFAULT_MAX_OUTPUT_BYTES: usize = 2_048; +const WRITE_TOOLS: &[&str] = &["edit_file", "write_file", "Edit", "Write"]; +const ESLINT_CONFIG_FILES: &[&str] = &[ + "eslint.config.js", + "eslint.config.cjs", + "eslint.config.mjs", + ".eslintrc", + ".eslintrc.js", + ".eslintrc.cjs", + ".eslintrc.json", + ".eslintrc.yml", + ".eslintrc.yaml", +]; +const PYTHON_ROOT_MARKERS: &[&str] = &[ + "pyproject.toml", + "uv.lock", + "poetry.lock", + "requirements.txt", + "requirements-dev.txt", + "requirements-test.txt", + "setup.py", + "setup.cfg", + "tox.ini", +]; +const RUFF_CONFIG_FILES: &[&str] = &["ruff.toml", ".ruff.toml"]; +const MYPY_CONFIG_FILES: &[&str] = &["mypy.ini", ".mypy.ini"]; +const PYTEST_CONFIG_FILES: &[&str] = &["pytest.ini", "tox.ini", "setup.cfg"]; +static REPORT_COUNTER: AtomicU64 = AtomicU64::new(1); + +/// High-level phase for a verification run. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum VerificationPhase { + Quick, + Final, +} + +impl VerificationPhase { + #[must_use] + pub fn as_str(self) -> &'static str { + match self { + Self::Quick => "quick", + Self::Final => "final", + } + } +} + +/// Outcome classification for a verification step or report. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum VerificationStatus { + Passed, + Failed, + Skipped, + Unavailable, +} + +/// Refined failure classification used by structured verification steps. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum VerificationFailureKind { + Code, + Environment, + ToolUnavailable, + Config, + Timeout, +} + +impl VerificationFailureKind { + #[must_use] + pub fn as_str(self) -> &'static str { + match self { + Self::Code => "code", + Self::Environment => "environment", + Self::ToolUnavailable => "tool_unavailable", + Self::Config => "config", + Self::Timeout => "timeout", + } + } +} + +impl VerificationStatus { + #[must_use] + pub fn as_str(self) -> &'static str { + match self { + Self::Passed => "passed", + Self::Failed => "failed", + Self::Skipped => "skipped", + Self::Unavailable => "unavailable", + } + } + + #[must_use] + pub fn is_success(self) -> bool { + matches!(self, Self::Passed | Self::Skipped) + } +} + +/// Mutable-work context supplied by the runtime for a verification decision. #[derive(Debug, Clone, PartialEq, Eq)] -pub struct VerificationResult { +pub struct VerificationContext { + pub phase: VerificationPhase, + pub workspace_root: Option, + pub tool_name: String, + pub tool_input: String, + pub touched_paths: Vec, + pub mutation_sequence: u64, +} + +impl VerificationContext { + #[must_use] + pub fn from_tool_invocation( + phase: VerificationPhase, + workspace_root: Option, + tool_name: impl Into, + tool_input: impl Into, + mutation_sequence: u64, + ) -> Option { + let tool_name = tool_name.into(); + let tool_input = tool_input.into(); + let touched_path = extract_file_path(&tool_input)?; + Some(Self { + phase, + workspace_root, + tool_name, + tool_input, + touched_paths: vec![touched_path], + mutation_sequence, + }) + } +} + +/// Deduplicated project target used by staged final-gate verification. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VerificationTarget { + pub adapter_id: String, + pub project_root: PathBuf, + pub touched_paths: Vec, + pub mutation_sequence: u64, +} + +/// Structured output of one verification step. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VerificationStepReport { + pub adapter: String, + pub project_root: PathBuf, + pub label: String, + pub command: String, + pub phase: VerificationPhase, + pub status: VerificationStatus, + pub failure_kind: Option, + pub duration_ms: u64, + pub truncated_output: String, +} + +/// Structured output of a full verification pass for one adapter/root pair. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VerificationReport { + pub report_id: String, + pub phase: VerificationPhase, + pub adapter_id: String, + pub project_root: PathBuf, + pub touched_paths: Vec, + pub status: VerificationStatus, + pub summary_text: String, + pub steps: Vec, +} + +impl VerificationReport { + #[must_use] + pub fn is_success(&self) -> bool { + self.status.is_success() + } + + #[must_use] + pub fn short_summary(&self) -> String { + let mut lines = self.summary_text.lines(); + let first = lines.next().unwrap_or_default().trim(); + if first.is_empty() { + format!( + "[verifier:{}:{}] {}", + self.phase.as_str(), + self.adapter_id, + self.status.as_str() + ) + } else { + first.to_string() + } + } + + #[must_use] + pub fn target(&self, mutation_sequence: u64) -> VerificationTarget { + VerificationTarget { + adapter_id: self.adapter_id.clone(), + project_root: self.project_root.clone(), + touched_paths: self.touched_paths.clone(), + mutation_sequence, + } + } +} + +/// Status of the staged final gate for the completed turn. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VerificationGateStatus { + pub attempted: bool, pub passed: bool, - pub summary: String, + pub report_ids: Vec, } -/// Strategy that inspects a completed tool invocation and optionally produces -/// additional diagnostics to inject into the tool result. +impl VerificationGateStatus { + #[must_use] + pub fn not_required() -> Self { + Self { + attempted: false, + passed: true, + report_ids: Vec::new(), + } + } +} + +/// Strategy that inspects completed mutations and produces verification +/// reports for the runtime. pub trait Verifier: Send { - /// Return `Some` when the tool/input pair is in scope for verification, - /// `None` otherwise (e.g. a `read_file` call, or an edit on `README.md`). - fn verify(&self, tool_name: &str, tool_input: &str) -> Option; + fn quick_verify(&self, context: &VerificationContext) -> Vec; + fn final_verify(&self, target: &VerificationTarget) -> Option; + fn final_gate_enabled(&self) -> bool { + false + } +} + +/// Declarative runtime config for the built-in multi-language verifier. +#[allow(clippy::struct_excessive_bools)] +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CargoVerifierConfig { + pub legacy_mode: bool, + pub quick_on_write: bool, + pub final_gate: bool, + pub max_output_bytes: usize, + pub rust_check: bool, + pub rust_clippy: bool, + pub rust_fmt: bool, + pub rust_test: bool, + pub rust_timeout: Duration, + pub node_enabled: bool, + pub node_timeout: Duration, + pub python_enabled: bool, + pub python_timeout: Duration, +} + +impl Default for CargoVerifierConfig { + fn default() -> Self { + Self { + legacy_mode: true, + quick_on_write: true, + final_gate: false, + max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES, + rust_check: true, + rust_clippy: true, + rust_fmt: true, + rust_test: true, + rust_timeout: Duration::from_mins(2), + node_enabled: true, + node_timeout: Duration::from_mins(2), + python_enabled: true, + python_timeout: Duration::from_mins(2), + } + } +} + +/// Built-in verifier registry for Rust, Node/TypeScript, and Python roots. +pub struct CargoVerifier { + config: CargoVerifierConfig, +} + +impl CargoVerifier { + #[must_use] + pub fn new(config: CargoVerifierConfig) -> Self { + Self { config } + } + + #[must_use] + pub fn final_gate_enabled(&self) -> bool { + !self.config.legacy_mode && self.config.final_gate + } +} + +impl Verifier for CargoVerifier { + fn quick_verify(&self, context: &VerificationContext) -> Vec { + if !WRITE_TOOLS.contains(&context.tool_name.as_str()) { + return Vec::new(); + } + let Some(path) = context.touched_paths.first() else { + return Vec::new(); + }; + + for adapter in [Adapter::Rust, Adapter::NodeTypeScript, Adapter::Python] { + if let Some(report) = adapter.quick_verify(path, context, &self.config) { + return vec![report]; + } + } + + Vec::new() + } + + fn final_verify(&self, target: &VerificationTarget) -> Option { + if self.config.legacy_mode || !self.config.final_gate { + return None; + } + let adapter = Adapter::by_id(&target.adapter_id)?; + adapter.final_verify(target, &self.config) + } + + fn final_gate_enabled(&self) -> bool { + !self.config.legacy_mode && self.config.final_gate + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum Adapter { + Rust, + NodeTypeScript, + Python, +} + +impl Adapter { + fn by_id(value: &str) -> Option { + match value { + "rust-cargo" => Some(Self::Rust), + "node-typescript" => Some(Self::NodeTypeScript), + "python" => Some(Self::Python), + _ => None, + } + } + + fn quick_verify( + self, + path: &Path, + context: &VerificationContext, + config: &CargoVerifierConfig, + ) -> Option { + match self { + Self::Rust => verify_rust(path, context, config), + Self::NodeTypeScript => verify_node(path, context, config, false), + Self::Python => verify_python(path, context, config, false), + } + } + + fn final_verify( + self, + target: &VerificationTarget, + config: &CargoVerifierConfig, + ) -> Option { + match self { + Self::Rust => Some(finalize_rust(target, config)), + Self::NodeTypeScript => finalize_node(target, config), + Self::Python => finalize_python(target, config), + } + } +} + +#[derive(Debug, Clone)] +struct PlannedStep { + label: String, + command: Vec, + diagnostics: StepDiagnostics, +} + +#[derive(Debug)] +enum StepOutcome { + Passed { + body: String, + duration_ms: u64, + }, + Failed { + body: String, + duration_ms: u64, + failure_kind: Option, + }, + Unavailable { + message: String, + duration_ms: u64, + failure_kind: Option, + }, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum PackageManager { + Npm, + Pnpm, + Yarn, + Bun, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum PythonLauncherKind { + Uv, + Poetry, + Venv, + Global, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +struct PythonRunner { + command_prefix: Vec, +} + +#[allow(clippy::struct_excessive_bools)] +#[derive(Debug, Clone, PartialEq, Eq)] +struct PythonProjectProfile { + project_root: PathBuf, + runner: PythonRunner, + launcher_kind: PythonLauncherKind, + pyproject_parsed: bool, + has_ruff: bool, + has_mypy: bool, + has_pytest: bool, + typed_targets: Vec, + test_root_present: bool, + pyproject_path: Option, + pyproject_parse_error: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum PythonStepKind { + RuffCheck, + Mypy, + Pytest, + PyCompile, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum StepDiagnostics { + Generic, + Python { + launcher_kind: PythonLauncherKind, + step_kind: PythonStepKind, + }, +} + +fn verify_rust( + path: &Path, + context: &VerificationContext, + config: &CargoVerifierConfig, +) -> Option { + let manifest = nearest_file(path, "Cargo.toml")?; + let project_root = manifest.parent()?.to_path_buf(); + let phase = context.phase; + let steps = if config.legacy_mode { + rust_legacy_steps(config) + } else if phase == VerificationPhase::Quick { + rust_quick_steps(config) + } else { + rust_final_steps(config) + }; + Some(run_rust_steps( + &project_root, + context.touched_paths.clone(), + phase, + steps, + config, + )) +} + +fn finalize_rust(target: &VerificationTarget, config: &CargoVerifierConfig) -> VerificationReport { + let steps = if config.legacy_mode { + rust_legacy_steps(config) + } else { + rust_final_steps(config) + }; + run_rust_steps( + &target.project_root, + target.touched_paths.clone(), + VerificationPhase::Final, + steps, + config, + ) +} + +fn run_rust_steps( + project_root: &Path, + touched_paths: Vec, + phase: VerificationPhase, + steps: Vec, + config: &CargoVerifierConfig, +) -> VerificationReport { + run_planned_steps( + "rust-cargo", + project_root, + touched_paths, + phase, + steps, + config.rust_timeout, + config.max_output_bytes, + ) +} + +fn rust_quick_steps(config: &CargoVerifierConfig) -> Vec { + if config.quick_on_write && config.rust_check { + vec![PlannedStep { + label: "cargo check".to_string(), + command: vec![ + "cargo".to_string(), + "check".to_string(), + "--quiet".to_string(), + "--message-format=short".to_string(), + ], + diagnostics: StepDiagnostics::Generic, + }] + } else { + Vec::new() + } +} + +fn rust_final_steps(config: &CargoVerifierConfig) -> Vec { + let mut steps = Vec::new(); + if config.rust_fmt { + steps.push(PlannedStep { + label: "cargo fmt --check".to_string(), + command: vec![ + "cargo".to_string(), + "fmt".to_string(), + "--".to_string(), + "--check".to_string(), + ], + diagnostics: StepDiagnostics::Generic, + }); + } + if config.rust_clippy { + steps.push(PlannedStep { + label: "cargo clippy".to_string(), + command: vec![ + "cargo".to_string(), + "clippy".to_string(), + "--quiet".to_string(), + "--message-format=short".to_string(), + "--".to_string(), + "-D".to_string(), + "warnings".to_string(), + ], + diagnostics: StepDiagnostics::Generic, + }); + } + if config.rust_test { + steps.push(PlannedStep { + label: "cargo test".to_string(), + command: vec![ + "cargo".to_string(), + "test".to_string(), + "--quiet".to_string(), + "--no-fail-fast".to_string(), + ], + diagnostics: StepDiagnostics::Generic, + }); + } + steps +} + +fn rust_legacy_steps(config: &CargoVerifierConfig) -> Vec { + let mut steps = rust_quick_steps(config); + if config.rust_clippy { + steps.push(PlannedStep { + label: "cargo clippy".to_string(), + command: vec![ + "cargo".to_string(), + "clippy".to_string(), + "--quiet".to_string(), + "--message-format=short".to_string(), + "--".to_string(), + "-D".to_string(), + "warnings".to_string(), + ], + diagnostics: StepDiagnostics::Generic, + }); + } + if config.rust_fmt { + steps.push(PlannedStep { + label: "cargo fmt --check".to_string(), + command: vec![ + "cargo".to_string(), + "fmt".to_string(), + "--".to_string(), + "--check".to_string(), + ], + diagnostics: StepDiagnostics::Generic, + }); + } + if config.rust_test { + steps.push(PlannedStep { + label: "cargo test".to_string(), + command: vec![ + "cargo".to_string(), + "test".to_string(), + "--quiet".to_string(), + "--no-fail-fast".to_string(), + ], + diagnostics: StepDiagnostics::Generic, + }); + } + steps +} + +fn verify_node( + path: &Path, + context: &VerificationContext, + config: &CargoVerifierConfig, + final_phase: bool, +) -> Option { + if !config.node_enabled { + return None; + } + let package_json = nearest_file(path, "package.json")?; + let project_root = package_json.parent()?.to_path_buf(); + let package_contents = fs::read_to_string(&package_json).ok()?; + let package_value: Value = serde_json::from_str(&package_contents).ok()?; + let phase = if final_phase { + VerificationPhase::Final + } else { + context.phase + }; + let package_manager = detect_package_manager(&project_root); + let steps = if config.legacy_mode { + node_legacy_steps(&project_root, &package_value, package_manager) + } else if phase == VerificationPhase::Quick { + if config.quick_on_write { + node_quick_steps(&project_root, &package_value, package_manager) + } else { + Vec::new() + } + } else { + node_final_steps(&project_root, &package_value, package_manager) + }; + Some(run_planned_steps( + "node-typescript", + &project_root, + context.touched_paths.clone(), + phase, + steps, + config.node_timeout, + config.max_output_bytes, + )) +} + +fn finalize_node( + target: &VerificationTarget, + config: &CargoVerifierConfig, +) -> Option { + if !config.node_enabled { + return None; + } + let package_json = target.project_root.join("package.json"); + let package_contents = fs::read_to_string(&package_json).ok()?; + let package_value: Value = serde_json::from_str(&package_contents).ok()?; + let package_manager = detect_package_manager(&target.project_root); + Some(run_planned_steps( + "node-typescript", + &target.project_root, + target.touched_paths.clone(), + VerificationPhase::Final, + node_final_steps(&target.project_root, &package_value, package_manager), + config.node_timeout, + config.max_output_bytes, + )) +} + +fn node_quick_steps( + root: &Path, + package_value: &Value, + manager: PackageManager, +) -> Vec { + if has_script(package_value, "typecheck") { + return vec![PlannedStep { + label: "typecheck".to_string(), + command: package_manager_run_script(manager, "typecheck"), + diagnostics: StepDiagnostics::Generic, + }]; + } + if root.join("tsconfig.json").is_file() { + return vec![PlannedStep { + label: "tsc --noEmit".to_string(), + command: package_manager_exec(manager, "tsc", &["--noEmit"]), + diagnostics: StepDiagnostics::Generic, + }]; + } + Vec::new() +} + +fn node_final_steps( + root: &Path, + package_value: &Value, + manager: PackageManager, +) -> Vec { + let mut steps = Vec::new(); + if has_script(package_value, "lint") { + steps.push(PlannedStep { + label: "lint".to_string(), + command: package_manager_run_script(manager, "lint"), + diagnostics: StepDiagnostics::Generic, + }); + } else if ESLINT_CONFIG_FILES + .iter() + .any(|name| root.join(name).is_file()) + { + steps.push(PlannedStep { + label: "eslint .".to_string(), + command: package_manager_exec(manager, "eslint", &["."]), + diagnostics: StepDiagnostics::Generic, + }); + } + if has_script(package_value, "test") { + steps.push(PlannedStep { + label: "test".to_string(), + command: package_manager_run_script(manager, "test"), + diagnostics: StepDiagnostics::Generic, + }); + } + steps +} + +fn node_legacy_steps( + root: &Path, + package_value: &Value, + manager: PackageManager, +) -> Vec { + let mut steps = node_quick_steps(root, package_value, manager); + steps.extend(node_final_steps(root, package_value, manager)); + steps +} + +fn verify_python( + path: &Path, + context: &VerificationContext, + config: &CargoVerifierConfig, + final_phase: bool, +) -> Option { + let phase = if final_phase { + VerificationPhase::Final + } else { + context.phase + }; + verify_python_for_phase(path, &context.touched_paths, phase, config) +} + +fn finalize_python( + target: &VerificationTarget, + config: &CargoVerifierConfig, +) -> Option { + if !config.python_enabled { + return None; + } + let profile = build_python_profile_for_root(&target.project_root)?; + Some(build_python_report( + &profile, + target.touched_paths.clone(), + VerificationPhase::Final, + config, + )) +} + +fn verify_python_for_phase( + path: &Path, + touched_paths: &[PathBuf], + phase: VerificationPhase, + config: &CargoVerifierConfig, +) -> Option { + if !config.python_enabled { + return None; + } + let profile = build_python_profile_for_path(path)?; + Some(build_python_report( + &profile, + touched_paths.to_vec(), + phase, + config, + )) +} + +fn build_python_report( + profile: &PythonProjectProfile, + touched_paths: Vec, + phase: VerificationPhase, + config: &CargoVerifierConfig, +) -> VerificationReport { + if let Some(report) = python_config_failure_report( + profile, + phase, + touched_paths.clone(), + config.max_output_bytes, + ) { + return report; + } + + let steps = if config.legacy_mode { + python_legacy_steps(profile, &touched_paths) + } else if phase == VerificationPhase::Quick { + if config.quick_on_write { + python_quick_steps(profile, &touched_paths) + } else { + Vec::new() + } + } else { + python_final_steps(profile, &touched_paths) + }; + + run_planned_steps( + "python", + &profile.project_root, + touched_paths, + phase, + steps, + config.python_timeout, + config.max_output_bytes, + ) +} + +fn python_config_failure_report( + profile: &PythonProjectProfile, + phase: VerificationPhase, + touched_paths: Vec, + max_output_bytes: usize, +) -> Option { + if profile.pyproject_parsed { + return None; + } + let error = profile.pyproject_parse_error.as_ref()?; + let pyproject_path = profile.pyproject_path.as_ref().map_or_else( + || "pyproject.toml".to_string(), + |path| path.display().to_string(), + ); + let steps = vec![VerificationStepReport { + adapter: "python".to_string(), + project_root: profile.project_root.clone(), + label: "pyproject.toml parse".to_string(), + command: pyproject_path, + phase, + status: VerificationStatus::Failed, + failure_kind: Some(VerificationFailureKind::Config), + duration_ms: 0, + truncated_output: truncate_output(error, max_output_bytes), + }]; + let summary_text = render_report_summary( + "python", + &profile.project_root, + phase, + VerificationStatus::Failed, + &steps, + ); + Some(VerificationReport { + report_id: next_report_id(), + phase, + adapter_id: "python".to_string(), + project_root: profile.project_root.clone(), + touched_paths, + status: VerificationStatus::Failed, + summary_text, + steps, + }) +} + +fn python_quick_steps( + profile: &PythonProjectProfile, + touched_paths: &[PathBuf], +) -> Vec { + let python_files = python_source_targets(&profile.project_root, touched_paths); + if profile.has_ruff && !python_files.is_empty() { + return vec![python_step( + profile, + PythonStepKind::RuffCheck, + "ruff check", + python_module_command(&profile.runner, "ruff", &["check"], &python_files), + )]; + } + if profile.has_mypy { + let targets = derive_mypy_targets_from_touched(&profile.project_root, touched_paths); + if !targets.is_empty() { + return vec![python_step( + profile, + PythonStepKind::Mypy, + "mypy", + python_module_command(&profile.runner, "mypy", &[], &targets), + )]; + } + } + if python_files.is_empty() { + return Vec::new(); + } + vec![python_step( + profile, + PythonStepKind::PyCompile, + "python -m py_compile", + python_module_command(&profile.runner, "py_compile", &[], &python_files), + )] +} + +fn python_final_steps( + profile: &PythonProjectProfile, + touched_paths: &[PathBuf], +) -> Vec { + let mut steps = Vec::new(); + debug_assert_eq!( + profile.test_root_present, + profile.project_root.join("tests").is_dir() + ); + if profile.has_ruff { + steps.push(python_step( + profile, + PythonStepKind::RuffCheck, + "ruff check", + python_module_command( + &profile.runner, + "ruff", + &["check"], + std::slice::from_ref(&profile.project_root), + ), + )); + } + if profile.has_mypy { + let targets = if profile.typed_targets.is_empty() { + let derived = derive_mypy_targets_from_touched(&profile.project_root, touched_paths); + if derived.is_empty() { + vec![profile.project_root.clone()] + } else { + derived + } + } else { + profile.typed_targets.clone() + }; + steps.push(python_step( + profile, + PythonStepKind::Mypy, + "mypy", + python_module_command(&profile.runner, "mypy", &[], &targets), + )); + } + if profile.has_pytest { + steps.push(python_step( + profile, + PythonStepKind::Pytest, + "pytest", + python_module_command( + &profile.runner, + "pytest", + &[], + std::slice::from_ref(&profile.project_root), + ), + )); + } + steps +} + +fn python_legacy_steps( + profile: &PythonProjectProfile, + touched_paths: &[PathBuf], +) -> Vec { + let mut steps = python_quick_steps(profile, touched_paths); + steps.extend(python_final_steps(profile, touched_paths)); + dedupe_steps(&mut steps); + steps +} + +fn python_step( + profile: &PythonProjectProfile, + step_kind: PythonStepKind, + label: &str, + command: Vec, +) -> PlannedStep { + PlannedStep { + label: label.to_string(), + command, + diagnostics: StepDiagnostics::Python { + launcher_kind: profile.launcher_kind, + step_kind, + }, + } +} + +fn dedupe_steps(steps: &mut Vec) { + let mut seen = Vec::::new(); + steps.retain(|step| { + let key = step.label.clone(); + if seen.contains(&key) { + false + } else { + seen.push(key); + true + } + }); +} + +fn run_planned_steps( + adapter_id: &str, + project_root: &Path, + touched_paths: Vec, + phase: VerificationPhase, + steps: Vec, + timeout: Duration, + max_output_bytes: usize, +) -> VerificationReport { + let mut reports = Vec::new(); + let mut report_status = if steps.is_empty() { + VerificationStatus::Skipped + } else { + VerificationStatus::Passed + }; + let mut skip_remaining = false; + + for step in steps { + if skip_remaining { + reports.push(VerificationStepReport { + adapter: adapter_id.to_string(), + project_root: project_root.to_path_buf(), + label: step.label.clone(), + command: step.command.join(" "), + phase, + status: VerificationStatus::Skipped, + failure_kind: None, + duration_ms: 0, + truncated_output: String::new(), + }); + continue; + } + + let outcome = run_step(project_root, &step, timeout, max_output_bytes); + match outcome { + StepOutcome::Passed { body, duration_ms } => reports.push(VerificationStepReport { + adapter: adapter_id.to_string(), + project_root: project_root.to_path_buf(), + label: step.label.clone(), + command: step.command.join(" "), + phase, + status: VerificationStatus::Passed, + failure_kind: None, + duration_ms, + truncated_output: body, + }), + StepOutcome::Failed { + body, + duration_ms, + failure_kind, + } => { + report_status = VerificationStatus::Failed; + skip_remaining = true; + reports.push(VerificationStepReport { + adapter: adapter_id.to_string(), + project_root: project_root.to_path_buf(), + label: step.label.clone(), + command: step.command.join(" "), + phase, + status: VerificationStatus::Failed, + failure_kind, + duration_ms, + truncated_output: body, + }); + } + StepOutcome::Unavailable { + message, + duration_ms, + failure_kind, + } => { + report_status = VerificationStatus::Unavailable; + skip_remaining = true; + reports.push(VerificationStepReport { + adapter: adapter_id.to_string(), + project_root: project_root.to_path_buf(), + label: step.label.clone(), + command: step.command.join(" "), + phase, + status: VerificationStatus::Unavailable, + failure_kind, + duration_ms, + truncated_output: message, + }); + } + } + } + + let summary_text = + render_report_summary(adapter_id, project_root, phase, report_status, &reports); + VerificationReport { + report_id: next_report_id(), + phase, + adapter_id: adapter_id.to_string(), + project_root: project_root.to_path_buf(), + touched_paths, + status: report_status, + summary_text, + steps: reports, + } +} + +fn render_report_summary( + adapter_id: &str, + project_root: &Path, + phase: VerificationPhase, + status: VerificationStatus, + steps: &[VerificationStepReport], +) -> String { + if adapter_id == "python" { + return render_python_report_summary(adapter_id, project_root, phase, status, steps); + } + + let mut summary = format!( + "[verifier:{}:{}] {} ({})", + phase.as_str(), + adapter_id, + status.as_str(), + project_root.display() + ); + if steps.is_empty() { + summary.push_str("\n[verifier] no verification steps were planned"); + return summary; + } + for step in steps { + let label = match step.status { + VerificationStatus::Passed => "ok", + VerificationStatus::Failed => "FAIL", + VerificationStatus::Skipped => "skipped", + VerificationStatus::Unavailable => "unavailable", + }; + let _ = writeln!(summary, "\n[verifier] {}: {label}", step.label); + if !step.truncated_output.trim().is_empty() { + summary.push_str(&step.truncated_output); + } + } + summary.trim_end().to_string() +} + +fn render_python_report_summary( + adapter_id: &str, + project_root: &Path, + phase: VerificationPhase, + status: VerificationStatus, + steps: &[VerificationStepReport], +) -> String { + let mut summary = format!( + "[verifier:{}:{}] {} ({})", + phase.as_str(), + adapter_id, + status.as_str(), + project_root.display() + ); + if steps.is_empty() { + summary.push_str("\n[verifier] no verification steps were planned"); + return summary; + } + let primary = steps + .iter() + .find(|step| !step.status.is_success()) + .unwrap_or(&steps[0]); + let label = match primary.status { + VerificationStatus::Passed => "ok", + VerificationStatus::Failed => "FAIL", + VerificationStatus::Skipped => "skipped", + VerificationStatus::Unavailable => "unavailable", + }; + let failure_suffix = primary + .failure_kind + .map(|kind| format!(" ({})", kind.as_str())) + .unwrap_or_default(); + let _ = writeln!( + summary, + "\n[verifier] {}: {label}{failure_suffix}", + primary.label + ); + if !primary.truncated_output.trim().is_empty() { + summary.push_str(&primary.truncated_output); + } + summary.trim_end().to_string() +} + +fn package_manager_run_script(manager: PackageManager, script: &str) -> Vec { + match manager { + PackageManager::Npm => vec![ + "npm".to_string(), + "run".to_string(), + "--silent".to_string(), + script.to_string(), + ], + PackageManager::Pnpm => vec!["pnpm".to_string(), "run".to_string(), script.to_string()], + PackageManager::Yarn => vec!["yarn".to_string(), script.to_string()], + PackageManager::Bun => vec!["bun".to_string(), "run".to_string(), script.to_string()], + } +} + +fn package_manager_exec(manager: PackageManager, binary: &str, args: &[&str]) -> Vec { + let mut command = match manager { + PackageManager::Npm => vec![ + "npm".to_string(), + "exec".to_string(), + "--".to_string(), + binary.to_string(), + ], + PackageManager::Pnpm => vec!["pnpm".to_string(), "exec".to_string(), binary.to_string()], + PackageManager::Yarn => vec!["yarn".to_string(), "exec".to_string(), binary.to_string()], + PackageManager::Bun => vec!["bun".to_string(), "x".to_string(), binary.to_string()], + }; + command.extend(args.iter().map(ToString::to_string)); + command +} + +fn python_module_command( + runner: &PythonRunner, + module: &str, + extra_args: &[&str], + paths: &[PathBuf], +) -> Vec { + let mut command = runner.command_prefix.clone(); + command.push(module.to_string()); + command.extend(extra_args.iter().map(ToString::to_string)); + command.extend(paths.iter().map(|path| path.display().to_string())); + command } -/// Declarative configuration for the built-in cargo-based verifier. -#[allow(clippy::struct_excessive_bools)] -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct CargoVerifierConfig { - pub run_check: bool, - pub run_clippy: bool, - pub run_fmt: bool, - pub run_test: bool, - pub timeout: Duration, +fn has_script(package_value: &Value, script: &str) -> bool { + package_value + .get("scripts") + .and_then(Value::as_object) + .is_some_and(|scripts| scripts.get(script).and_then(Value::as_str).is_some()) } -impl Default for CargoVerifierConfig { - fn default() -> Self { - Self { - run_check: true, - run_clippy: true, - run_fmt: true, - run_test: true, - timeout: Duration::from_mins(2), - } +fn detect_package_manager(root: &Path) -> PackageManager { + if root.join("pnpm-lock.yaml").is_file() { + PackageManager::Pnpm + } else if root.join("yarn.lock").is_file() { + PackageManager::Yarn + } else if root.join("bun.lockb").is_file() || root.join("bun.lock").is_file() { + PackageManager::Bun + } else { + PackageManager::Npm } } -/// Maximum bytes kept per cargo step before the summary is truncated. -const MAX_STEP_OUTPUT_BYTES: usize = 2_048; +fn build_python_profile_for_path(path: &Path) -> Option { + let project_root = nearest_python_root(path)?; + build_python_profile_for_root(&project_root) +} -/// Tool names recognised as writing to a file on disk. -const WRITE_TOOLS: &[&str] = &["edit_file", "write_file", "Edit", "Write"]; +fn build_python_profile_for_root(root: &Path) -> Option { + let project_root = normalize_local_path(root)?; + let pyproject_path = project_root.join("pyproject.toml"); + let (pyproject_parsed, pyproject_value, pyproject_parse_error) = + parse_optional_pyproject(&pyproject_path); + let (runner, launcher_kind) = detect_python_runner(&project_root); + let test_root_present = project_root.join("tests").is_dir(); + let has_ruff = python_has_ruff(&project_root, pyproject_value.as_ref()); + let has_mypy = python_has_mypy(&project_root, pyproject_value.as_ref()); + let has_pytest = python_has_pytest(&project_root, pyproject_value.as_ref(), test_root_present); + let typed_targets = python_typed_targets(&project_root, pyproject_value.as_ref()); -/// Built-in verifier that drives cargo against the crate that owns the -/// edited file. Runs checks sequentially with early-exit on the first failure. -pub struct CargoVerifier { - config: CargoVerifierConfig, + Some(PythonProjectProfile { + project_root, + runner, + launcher_kind, + pyproject_parsed, + has_ruff, + has_mypy, + has_pytest, + typed_targets, + test_root_present, + pyproject_path: pyproject_path.is_file().then_some(pyproject_path), + pyproject_parse_error, + }) } -impl CargoVerifier { - #[must_use] - pub fn new(config: CargoVerifierConfig) -> Self { - Self { config } +fn parse_optional_pyproject(path: &Path) -> (bool, Option, Option) { + if !path.is_file() { + return (true, None, None); + } + match fs::read_to_string(path) { + Ok(contents) => match contents.parse::() { + Ok(value) => (true, Some(value), None), + Err(error) => ( + false, + None, + Some(format!("failed to parse pyproject.toml: {error}")), + ), + }, + Err(error) => ( + false, + None, + Some(format!("failed to read pyproject.toml: {error}")), + ), } } -impl Verifier for CargoVerifier { - fn verify(&self, tool_name: &str, tool_input: &str) -> Option { - if !WRITE_TOOLS.contains(&tool_name) { - return None; - } - let file_path = extract_file_path(tool_input)?; - if file_path.extension().and_then(|ext| ext.to_str()) != Some("rs") { - return None; - } - let manifest = nearest_cargo_manifest(&file_path)?; +fn detect_python_runner(root: &Path) -> (PythonRunner, PythonLauncherKind) { + if root.join("uv.lock").is_file() { + return ( + PythonRunner { + command_prefix: vec![ + "uv".to_string(), + "run".to_string(), + "python".to_string(), + "-m".to_string(), + ], + }, + PythonLauncherKind::Uv, + ); + } + if root.join("poetry.lock").is_file() { + return ( + PythonRunner { + command_prefix: vec![ + "poetry".to_string(), + "run".to_string(), + "python".to_string(), + "-m".to_string(), + ], + }, + PythonLauncherKind::Poetry, + ); + } + if let Some(interpreter) = find_venv_python(root) { + return ( + PythonRunner { + command_prefix: vec![interpreter.display().to_string(), "-m".to_string()], + }, + PythonLauncherKind::Venv, + ); + } + ( + PythonRunner { + command_prefix: vec!["python".to_string(), "-m".to_string()], + }, + PythonLauncherKind::Global, + ) +} - let steps = planned_steps(&self.config); - if steps.is_empty() { - return None; +fn find_venv_python(root: &Path) -> Option { + let env_names = [".venv", "venv", "env"]; + let candidate_suffixes = if cfg!(windows) { + vec![ + PathBuf::from("Scripts/python.exe"), + PathBuf::from("Scripts/python"), + ] + } else { + vec![PathBuf::from("bin/python"), PathBuf::from("bin/python3")] + }; + for env_name in env_names { + for suffix in &candidate_suffixes { + let candidate = root.join(env_name).join(suffix); + if candidate.is_file() { + return Some(candidate); + } } + } + None +} - let mut summary = String::new(); - let mut overall_passed = true; - let mut skip_remaining = false; +fn python_has_ruff(root: &Path, pyproject: Option<&TomlValue>) -> bool { + RUFF_CONFIG_FILES + .iter() + .any(|name| root.join(name).is_file()) + || pyproject.is_some_and(|value| toml_contains_path(value, &["tool", "ruff"])) +} - for step in steps { - if skip_remaining { - writeln!(summary, "[verifier] {}: skipped", step.label).ok(); - continue; - } - let outcome = run_step(&step, &manifest, self.config.timeout); - match outcome { - StepOutcome::Passed => { - writeln!(summary, "[verifier] {}: ok", step.label).ok(); - } - StepOutcome::Failed { body } => { - overall_passed = false; - skip_remaining = true; - writeln!(summary, "[verifier] {}: FAIL", step.label).ok(); - summary.push_str(&truncate_output(&body)); - summary.push('\n'); - } - StepOutcome::Unavailable { message } => { - overall_passed = false; - skip_remaining = true; - writeln!( - summary, - "[verifier] {}: could not run ({message})", - step.label - ) - .ok(); - } +fn python_has_mypy(root: &Path, pyproject: Option<&TomlValue>) -> bool { + MYPY_CONFIG_FILES + .iter() + .any(|name| root.join(name).is_file()) + || pyproject.is_some_and(|value| toml_contains_path(value, &["tool", "mypy"])) + || file_contains(root.join("setup.cfg"), "[mypy]") +} + +fn python_has_pytest(root: &Path, pyproject: Option<&TomlValue>, test_root_present: bool) -> bool { + PYTEST_CONFIG_FILES + .iter() + .any(|name| root.join(name).is_file()) + || root.join("conftest.py").is_file() + || test_root_present + || pyproject + .is_some_and(|value| toml_contains_path(value, &["tool", "pytest", "ini_options"])) +} + +fn python_typed_targets(root: &Path, pyproject: Option<&TomlValue>) -> Vec { + let Some(pyproject) = pyproject else { + return Vec::new(); + }; + let mut targets = toml_string_targets(pyproject, &["tool", "mypy", "files"]) + .into_iter() + .map(|value| { + let path = PathBuf::from(value); + if path.is_absolute() { + path + } else { + root.join(path) } - } + }) + .collect::>(); + dedupe_paths(&mut targets); + targets +} + +fn derive_mypy_targets_from_touched(root: &Path, touched_paths: &[PathBuf]) -> Vec { + let python_files = python_source_targets(root, touched_paths); + let mut targets = python_files + .iter() + .map(|path| package_root_for_python_file(root, path)) + .collect::>(); + dedupe_paths(&mut targets); + targets +} - Some(VerificationResult { - passed: overall_passed, - summary: summary.trim_end().to_string(), +fn python_source_targets(root: &Path, touched_paths: &[PathBuf]) -> Vec { + let mut paths = touched_paths + .iter() + .filter_map(|path| normalize_project_path(root, path)) + .filter(|path| { + path.extension() + .and_then(|ext| ext.to_str()) + .is_some_and(|ext| ext.eq_ignore_ascii_case("py")) }) + .collect::>(); + dedupe_paths(&mut paths); + paths +} + +fn package_root_for_python_file(root: &Path, path: &Path) -> PathBuf { + let Some(mut cursor) = path.parent().map(Path::to_path_buf) else { + return path.to_path_buf(); + }; + let mut package_root = None; + loop { + if !cursor.starts_with(root) || !cursor.join("__init__.py").is_file() { + break; + } + package_root = Some(cursor.clone()); + let Some(parent) = cursor.parent() else { + break; + }; + cursor = parent.to_path_buf(); } + package_root.unwrap_or_else(|| path.to_path_buf()) } -/// Prepend the verifier summary to the tool output using a visible delimiter. -#[must_use] -pub fn prepend_verifier_summary(summary: &str, output: String) -> String { - if summary.is_empty() { - return output; +fn dedupe_paths(paths: &mut Vec) { + let mut seen = Vec::::new(); + paths.retain(|path| { + if seen.iter().any(|existing| existing == path) { + false + } else { + seen.push(path.clone()); + true + } + }); +} + +fn normalize_local_path(path: &Path) -> Option { + if path.is_absolute() { + Some(path.to_path_buf()) + } else { + std::env::current_dir().ok().map(|cwd| cwd.join(path)) } - if output.trim().is_empty() { - return summary.to_string(); +} + +fn normalize_project_path(root: &Path, path: &Path) -> Option { + let absolute = normalize_local_path(path)?; + absolute.starts_with(root).then_some(absolute) +} + +fn toml_contains_path(value: &TomlValue, path: &[&str]) -> bool { + toml_value_at(value, path).is_some() +} + +fn toml_string_targets(value: &TomlValue, path: &[&str]) -> Vec { + let Some(value) = toml_value_at(value, path) else { + return Vec::new(); + }; + if let Some(raw) = value.as_str() { + return raw + .split([',', '\n']) + .map(str::trim) + .filter(|part| !part.is_empty()) + .map(ToOwned::to_owned) + .collect(); } - format!("{output}\n\n[verifier output]\n{summary}") + value + .as_array() + .into_iter() + .flatten() + .filter_map(TomlValue::as_str) + .map(str::trim) + .filter(|part| !part.is_empty()) + .map(ToOwned::to_owned) + .collect() } -fn extract_file_path(input: &str) -> Option { - let value: Value = serde_json::from_str(input).ok()?; - let path_str = value - .get("file_path") - .or_else(|| value.get("filePath")) - .or_else(|| value.get("path"))? - .as_str()?; - Some(PathBuf::from(path_str)) +fn toml_value_at<'a>(value: &'a TomlValue, path: &[&str]) -> Option<&'a TomlValue> { + let mut current = value; + for key in path { + current = current.get(*key)?; + } + Some(current) +} + +fn file_contains(path: impl AsRef, needle: &str) -> bool { + fs::read_to_string(path.as_ref()).is_ok_and(|contents| contents.contains(needle)) } -fn nearest_cargo_manifest(file_path: &Path) -> Option { - let start = if file_path.is_absolute() { - file_path.to_path_buf() +fn nearest_file(start: &Path, file_name: &str) -> Option { + let start = if start.is_absolute() { + start.to_path_buf() } else { - std::env::current_dir().ok()?.join(file_path) + std::env::current_dir().ok()?.join(start) + }; + let mut cursor = if start.is_dir() { + start + } else { + start.parent()?.to_path_buf() }; - let mut cursor = start.parent()?.to_path_buf(); loop { - let candidate = cursor.join("Cargo.toml"); + let candidate = cursor.join(file_name); if candidate.is_file() { return Some(candidate); } @@ -169,91 +1533,197 @@ fn nearest_cargo_manifest(file_path: &Path) -> Option { } } -#[derive(Debug, Clone)] -struct PlannedStep { - label: &'static str, - args: Vec<&'static str>, +fn nearest_python_root(start: &Path) -> Option { + let start = normalize_local_path(start)?; + let mut cursor = if start.is_dir() { + start + } else { + start.parent()?.to_path_buf() + }; + loop { + for marker in PYTHON_ROOT_MARKERS { + if cursor.join(marker).is_file() { + return Some(cursor.clone()); + } + } + if !cursor.pop() { + return None; + } + } } -enum StepOutcome { - Passed, - Failed { body: String }, - Unavailable { message: String }, +fn classify_step_failure(step: &PlannedStep, body: &str) -> Option { + match step.diagnostics { + StepDiagnostics::Generic => None, + StepDiagnostics::Python { + launcher_kind, + step_kind, + } => Some(classify_python_failure_kind(launcher_kind, step_kind, body)), + } } -fn planned_steps(config: &CargoVerifierConfig) -> Vec { - let mut steps = Vec::new(); - if config.run_check { - steps.push(PlannedStep { - label: "cargo check", - args: vec!["check", "--quiet", "--message-format=short"], - }); +fn classify_step_timeout(step: &PlannedStep) -> Option { + match step.diagnostics { + StepDiagnostics::Generic => None, + StepDiagnostics::Python { .. } => Some(VerificationFailureKind::Timeout), } - if config.run_clippy { - steps.push(PlannedStep { - label: "cargo clippy", - args: vec![ - "clippy", - "--quiet", - "--message-format=short", - "--", - "-D", - "warnings", - ], - }); +} + +fn classify_step_unavailable(step: &PlannedStep, message: &str) -> Option { + match step.diagnostics { + StepDiagnostics::Generic => None, + StepDiagnostics::Python { launcher_kind, .. } => { + Some(classify_python_unavailable_kind(launcher_kind, message)) + } } - if config.run_fmt { - steps.push(PlannedStep { - label: "cargo fmt --check", - args: vec!["fmt", "--", "--check"], - }); +} + +fn classify_python_failure_kind( + launcher_kind: PythonLauncherKind, + step_kind: PythonStepKind, + body: &str, +) -> VerificationFailureKind { + let lower = body.to_ascii_lowercase(); + if is_python_config_failure(&lower) { + return VerificationFailureKind::Config; } - if config.run_test { - steps.push(PlannedStep { - label: "cargo test", - args: vec!["test", "--quiet", "--no-fail-fast"], - }); + if is_python_tool_unavailable(step_kind, &lower) { + return VerificationFailureKind::ToolUnavailable; } - steps + if is_python_environment_failure(launcher_kind, &lower) { + return VerificationFailureKind::Environment; + } + VerificationFailureKind::Code } -fn run_step(step: &PlannedStep, manifest: &Path, timeout: Duration) -> StepOutcome { - let mut command = Command::new("cargo"); - command.arg(step.args[0]); - command.arg("--manifest-path").arg(manifest); - for arg in step.args.iter().skip(1) { - command.arg(arg); +fn classify_python_unavailable_kind( + launcher_kind: PythonLauncherKind, + message: &str, +) -> VerificationFailureKind { + let lower = message.to_ascii_lowercase(); + if matches!( + launcher_kind, + PythonLauncherKind::Uv | PythonLauncherKind::Poetry + ) && (lower.contains("not found") + || lower.contains("cannot find") + || lower.contains("could not find")) + { + VerificationFailureKind::ToolUnavailable + } else { + VerificationFailureKind::Environment + } +} + +fn is_python_config_failure(lower: &str) -> bool { + (lower.contains("pyproject.toml") + && (lower.contains("parse") || lower.contains("invalid") || lower.contains("config"))) + || lower.contains("invalid configuration") + || lower.contains("failed to parse") + || lower.contains("toml parse error") +} + +fn is_python_tool_unavailable(step_kind: PythonStepKind, lower: &str) -> bool { + let module_name = match step_kind { + PythonStepKind::RuffCheck => "ruff", + PythonStepKind::Mypy => "mypy", + PythonStepKind::Pytest => "pytest", + PythonStepKind::PyCompile => "py_compile", + }; + lower.contains(&format!("no module named {module_name}")) + || lower.contains(&format!("no module named '{module_name}'")) + || lower.contains(&format!("module named {module_name}")) + || lower.contains(&format!("{module_name} is not installed")) + || (lower.contains("command not found") && lower.contains(module_name)) + || (lower.contains("not recognized as an internal or external command") + && lower.contains(module_name)) +} + +fn is_python_environment_failure(launcher_kind: PythonLauncherKind, lower: &str) -> bool { + if lower.contains("virtualenv") + || lower.contains("venv") + || lower.contains("interpreter") + || lower.contains("dependency resolution") + || lower.contains("environment") + || lower.contains("failed to create") + || lower.contains("no such file or directory") + || lower.contains("cannot find the path specified") + || lower.contains("poetry could not find") + { + return true; } + matches!( + launcher_kind, + PythonLauncherKind::Venv | PythonLauncherKind::Global + ) && (lower.contains("python executable") || lower.contains("python was not found")) +} + +fn run_step( + cwd: &Path, + step: &PlannedStep, + timeout: Duration, + max_output_bytes: usize, +) -> StepOutcome { + let mut command = Command::new(&step.command[0]); + command.current_dir(cwd); + command.stdin(std::process::Stdio::null()); + command.stdout(std::process::Stdio::piped()); + command.stderr(std::process::Stdio::piped()); command.env("CARGO_TERM_COLOR", "never"); + for arg in step.command.iter().skip(1) { + command.arg(arg); + } + let started = Instant::now(); match spawn_with_timeout(command, timeout) { Ok(output) => { + let duration_ms = duration_millis_u64(started.elapsed()); + let mut body = String::new(); + body.push_str(&String::from_utf8_lossy(&output.stdout)); + if !output.stderr.is_empty() { + if !body.is_empty() { + body.push('\n'); + } + body.push_str(&String::from_utf8_lossy(&output.stderr)); + } + let body = truncate_output(&body, max_output_bytes); if output.status.success() { - StepOutcome::Passed + StepOutcome::Passed { body, duration_ms } } else { - let mut body = String::new(); - body.push_str(&String::from_utf8_lossy(&output.stdout)); - if !output.stderr.is_empty() { - if !body.is_empty() { - body.push('\n'); - } - body.push_str(&String::from_utf8_lossy(&output.stderr)); - } - if body.trim().is_empty() { - body = format!("exit status: {}", output.status); + let failure_kind = classify_step_failure(step, &body); + StepOutcome::Failed { + body, + duration_ms, + failure_kind, } - StepOutcome::Failed { body } } } - Err(SpawnError::Timeout) => StepOutcome::Failed { - body: format!("step timed out after {}s", timeout.as_secs()), - }, - Err(SpawnError::Io(error)) => StepOutcome::Unavailable { - message: error.to_string(), - }, + Err(SpawnError::Timeout) => { + let body = truncate_output( + &format!("step timed out after {}s", timeout.as_secs()), + max_output_bytes, + ); + StepOutcome::Failed { + failure_kind: classify_step_timeout(step), + body, + duration_ms: duration_millis_u64(started.elapsed()), + } + } + Err(SpawnError::Io(error)) => { + let message = truncate_output(&error.to_string(), max_output_bytes); + StepOutcome::Unavailable { + failure_kind: classify_step_unavailable(step, &message), + message, + duration_ms: duration_millis_u64(started.elapsed()), + } + } } } +fn duration_millis_u64(duration: Duration) -> u64 { + u64::try_from(duration.as_millis()).unwrap_or(u64::MAX) +} + +#[derive(Debug)] enum SpawnError { Timeout, Io(std::io::Error), @@ -266,58 +1736,54 @@ fn spawn_with_timeout( use std::sync::mpsc; use std::thread; - command.stdin(std::process::Stdio::null()); - command.stdout(std::process::Stdio::piped()); - command.stderr(std::process::Stdio::piped()); let mut child = command.spawn().map_err(SpawnError::Io)?; - let stdout = child.stdout.take(); let stderr = child.stderr.take(); let (tx, rx) = mpsc::channel(); - let stdout_handle = stdout.map(|mut s| { + let stdout_handle = stdout.map(|mut stream| { let tx = tx.clone(); thread::spawn(move || { - let mut buf = Vec::new(); - let _ = std::io::Read::read_to_end(&mut s, &mut buf); - let _ = tx.send(("stdout", buf)); + let mut bytes = Vec::new(); + let _ = std::io::Read::read_to_end(&mut stream, &mut bytes); + let _ = tx.send(("stdout", bytes)); }) }); - let stderr_handle = stderr.map(|mut s| { + let stderr_handle = stderr.map(|mut stream| { let tx = tx.clone(); thread::spawn(move || { - let mut buf = Vec::new(); - let _ = std::io::Read::read_to_end(&mut s, &mut buf); - let _ = tx.send(("stderr", buf)); + let mut bytes = Vec::new(); + let _ = std::io::Read::read_to_end(&mut stream, &mut bytes); + let _ = tx.send(("stderr", bytes)); }) }); drop(tx); - let deadline = std::time::Instant::now() + timeout; + let deadline = Instant::now() + timeout; loop { if let Some(status) = child.try_wait().map_err(SpawnError::Io)? { - if let Some(h) = stdout_handle { - let _ = h.join(); + if let Some(handle) = stdout_handle { + let _ = handle.join(); } - if let Some(h) = stderr_handle { - let _ = h.join(); + if let Some(handle) = stderr_handle { + let _ = handle.join(); } - let mut stdout_bytes = Vec::new(); - let mut stderr_bytes = Vec::new(); + let mut stdout = Vec::new(); + let mut stderr = Vec::new(); while let Ok((which, bytes)) = rx.try_recv() { if which == "stdout" { - stdout_bytes = bytes; + stdout = bytes; } else { - stderr_bytes = bytes; + stderr = bytes; } } return Ok(std::process::Output { status, - stdout: stdout_bytes, - stderr: stderr_bytes, + stdout, + stderr, }); } - if std::time::Instant::now() >= deadline { + if Instant::now() >= deadline { let _ = child.kill(); let _ = child.wait(); return Err(SpawnError::Timeout); @@ -326,13 +1792,29 @@ fn spawn_with_timeout( } } -/// Trim output to `MAX_STEP_OUTPUT_BYTES`, preserving the head plus any lines -/// containing `error` or `warning` so the model keeps the actionable signal. -fn truncate_output(body: &str) -> String { - if body.len() <= MAX_STEP_OUTPUT_BYTES { +/// Merge a structured report back into the legacy tool-result channel. +#[must_use] +pub fn prepend_verifier_summary(summary: &str, output: String) -> String { + if summary.is_empty() { + return output; + } + if output.trim().is_empty() { + return summary.to_string(); + } + format!("{output}\n\n[verifier output]\n{summary}") +} + +/// Trim output to the configured byte budget, preserving the head, tail, and +/// diagnostically relevant lines. +#[must_use] +pub fn truncate_output(body: &str, max_bytes: usize) -> String { + if body.len() <= max_bytes { return body.to_string(); } - let head_budget = MAX_STEP_OUTPUT_BYTES * 2 / 3; + let head_budget = max_bytes / 2; + let tail_budget = max_bytes / 4; + let signal_budget = max_bytes.saturating_sub(head_budget + tail_budget + 32); + let mut head = String::new(); for line in body.lines() { if head.len() + line.len() + 1 > head_budget { @@ -341,132 +1823,328 @@ fn truncate_output(body: &str) -> String { head.push_str(line); head.push('\n'); } - let mut signal_lines = Vec::new(); - for line in body.lines() { - let lower = line.to_ascii_lowercase(); - if lower.contains("error") || lower.contains("warning") { - signal_lines.push(line); - } - } + let mut tail = String::new(); - let tail_budget = MAX_STEP_OUTPUT_BYTES - head.len(); - for line in signal_lines.into_iter().rev() { + for line in body.lines().rev() { if tail.len() + line.len() + 1 > tail_budget { break; } tail = format!("{line}\n{tail}"); } - format!("{head}... (truncated) ...\n{tail}") + + let mut signals = String::new(); + for line in body.lines() { + let lower = line.to_ascii_lowercase(); + if lower.contains("error") + || lower.contains("warning") + || lower.contains("failed") + || lower.contains("panic") + || lower.contains("traceback") + { + if signals.len() + line.len() + 1 > signal_budget { + break; + } + signals.push_str(line); + signals.push('\n'); + } + } + + let mut out = String::new(); + out.push_str(&head); + out.push_str("... (truncated) ...\n"); + if !signals.trim().is_empty() { + out.push_str(&signals); + out.push_str("... (tail) ...\n"); + } + out.push_str(&tail); + out.trim_end().to_string() +} + +fn extract_file_path(input: &str) -> Option { + let value: Value = serde_json::from_str(input).ok()?; + let path = value + .get("file_path") + .or_else(|| value.get("filePath")) + .or_else(|| value.get("path"))? + .as_str()?; + Some(PathBuf::from(path)) +} + +fn next_report_id() -> String { + format!("vr-{}", REPORT_COUNTER.fetch_add(1, Ordering::Relaxed)) } #[cfg(test)] mod tests { use super::*; - use std::fs; + use std::time::{SystemTime, UNIX_EPOCH}; + + fn temp_dir(tag: &str) -> PathBuf { + let nanos = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("time should be after epoch") + .as_nanos(); + let root = std::env::temp_dir().join(format!("verifier-unit-{tag}-{nanos}")); + fs::create_dir_all(&root).expect("temp dir should create"); + root + } #[test] - fn verify_returns_none_for_non_write_tools() { - let v = CargoVerifier::new(CargoVerifierConfig::default()); - assert!(v.verify("read_file", r#"{"file_path":"lib.rs"}"#).is_none()); - assert!(v.verify("grep_search", "{}").is_none()); + fn truncate_output_keeps_signal_lines_and_tail() { + let mut body = String::new(); + for index in 0..400 { + let _ = writeln!(body, "noise line {index}"); + } + body.push_str("warning: some warning\n"); + body.push_str("Traceback (most recent call last):\n"); + body.push_str("panic: boom\n"); + let truncated = truncate_output(&body, 512); + assert!(truncated.contains("warning: some warning")); + assert!(truncated.contains("Traceback")); + assert!(truncated.contains("panic: boom")); + assert!(truncated.contains("... (truncated) ...")); } #[test] - fn verify_returns_none_for_non_rust_extension() { - let v = CargoVerifier::new(CargoVerifierConfig::default()); - assert!(v - .verify("edit_file", r#"{"file_path":"notes.md"}"#) - .is_none()); - assert!(v - .verify("write_file", r#"{"file_path":"config.json"}"#) - .is_none()); + fn prepend_verifier_summary_merges_with_output() { + let merged = prepend_verifier_summary("[verifier] ok", "edited".to_string()); + assert!(merged.contains("edited")); + assert!(merged.contains("[verifier output]")); } #[test] - fn verify_returns_none_when_no_manifest_found() { - let v = CargoVerifier::new(CargoVerifierConfig { - run_check: true, - run_clippy: false, - run_fmt: false, - run_test: false, - timeout: Duration::from_secs(1), - }); - // /tmp has no Cargo.toml up the tree. - let input = r#"{"file_path":"/tmp/__claw_verifier_test_missing.rs"}"#; - assert!(v.verify("edit_file", input).is_none()); + fn extract_file_path_supports_known_keys() { + assert_eq!( + extract_file_path(r#"{"file_path":"src/lib.rs"}"#), + Some(PathBuf::from("src/lib.rs")) + ); + assert_eq!( + extract_file_path(r#"{"filePath":"src/lib.rs"}"#), + Some(PathBuf::from("src/lib.rs")) + ); + assert_eq!( + extract_file_path(r#"{"path":"src/lib.rs"}"#), + Some(PathBuf::from("src/lib.rs")) + ); + } + + #[test] + fn gate_status_defaults_to_not_required() { + let gate = VerificationGateStatus::not_required(); + assert!(!gate.attempted); + assert!(gate.passed); + } + + #[test] + fn python_profile_parses_pyproject_and_detects_tools() { + let root = temp_dir("pyproject"); + fs::write( + root.join("pyproject.toml"), + r#" +[tool.ruff] +line-length = 100 + +[tool.mypy] +files = ["app", "tests"] + +[tool.pytest.ini_options] +addopts = "-q" +"#, + ) + .expect("pyproject should write"); + + let profile = build_python_profile_for_root(&root).expect("profile should build"); + + assert!(profile.pyproject_parsed); + assert!(profile.has_ruff); + assert!(profile.has_mypy); + assert!(profile.has_pytest); + assert_eq!( + profile.typed_targets, + vec![root.join("app"), root.join("tests")] + ); + + fs::remove_dir_all(root).expect("temp dir should clean up"); } #[test] - fn extract_file_path_supports_multiple_key_names() { + fn python_profile_marks_invalid_pyproject_as_config_failure() { + let root = temp_dir("bad-pyproject"); + fs::write(root.join("pyproject.toml"), "[tool.ruff\n").expect("pyproject should write"); + + let profile = build_python_profile_for_root(&root).expect("profile should build"); + let report = python_config_failure_report( + &profile, + VerificationPhase::Quick, + vec![root.join("pyproject.toml")], + 2_048, + ) + .expect("invalid pyproject should report failure"); + + assert!(!profile.pyproject_parsed); + assert_eq!(report.status, VerificationStatus::Failed); assert_eq!( - extract_file_path(r#"{"file_path":"a.rs"}"#), - Some(PathBuf::from("a.rs")) + report.steps[0].failure_kind, + Some(VerificationFailureKind::Config) ); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn detect_python_runner_prefers_uv_over_venv() { + let root = temp_dir("uv-runner"); + fs::write(root.join("uv.lock"), "").expect("uv lock should write"); + let venv_python = if cfg!(windows) { + root.join(".venv").join("Scripts").join("python.exe") + } else { + root.join(".venv").join("bin").join("python") + }; + fs::create_dir_all(venv_python.parent().expect("venv parent")) + .expect("venv dir should create"); + fs::write(&venv_python, "").expect("fake interpreter should write"); + + let (runner, launcher_kind) = detect_python_runner(&root); + + assert_eq!(launcher_kind, PythonLauncherKind::Uv); assert_eq!( - extract_file_path(r#"{"filePath":"b.rs"}"#), - Some(PathBuf::from("b.rs")) + runner.command_prefix, + vec![ + "uv".to_string(), + "run".to_string(), + "python".to_string(), + "-m".to_string() + ] ); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn detect_python_runner_uses_local_venv_when_present() { + let root = temp_dir("venv-runner"); + let venv_python = if cfg!(windows) { + root.join("venv").join("Scripts").join("python.exe") + } else { + root.join("venv").join("bin").join("python") + }; + fs::create_dir_all(venv_python.parent().expect("venv parent")) + .expect("venv dir should create"); + fs::write(&venv_python, "").expect("fake interpreter should write"); + + let (_runner, launcher_kind) = detect_python_runner(&root); + + assert_eq!(launcher_kind, PythonLauncherKind::Venv); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn detect_python_runner_uses_poetry_when_lock_present() { + let root = temp_dir("poetry-runner"); + fs::write(root.join("poetry.lock"), "").expect("poetry lock should write"); + + let (runner, launcher_kind) = detect_python_runner(&root); + + assert_eq!(launcher_kind, PythonLauncherKind::Poetry); assert_eq!( - extract_file_path(r#"{"path":"c.rs"}"#), - Some(PathBuf::from("c.rs")) + runner.command_prefix, + vec![ + "poetry".to_string(), + "run".to_string(), + "python".to_string(), + "-m".to_string() + ] ); - assert_eq!(extract_file_path("not json"), None); - assert_eq!(extract_file_path(r#"{"other":1}"#), None); + + fs::remove_dir_all(root).expect("temp dir should clean up"); } #[test] - fn nearest_cargo_manifest_walks_up_directories() { - let tmp = tempdir(); - let crate_root = tmp.join("my_crate"); - let nested = crate_root.join("src").join("nested"); - fs::create_dir_all(&nested).unwrap(); - fs::write(crate_root.join("Cargo.toml"), "[package]\nname = \"x\"\n").unwrap(); - let file = nested.join("thing.rs"); - fs::write(&file, "").unwrap(); + fn nearest_python_root_prefers_closest_matching_directory() { + let root = temp_dir("python-root"); + let nested = root.join("pkg").join("inner"); + fs::create_dir_all(&nested).expect("nested dir should create"); + fs::write(root.join("pyproject.toml"), "[project]\nname = 'root'\n") + .expect("root pyproject should write"); + fs::write(nested.join("requirements.txt"), "pytest\n") + .expect("nested requirements should write"); + + let detected = nearest_python_root(&nested.join("module.py")).expect("root should resolve"); - let manifest = nearest_cargo_manifest(&file).expect("manifest should be found"); - assert_eq!(manifest, crate_root.join("Cargo.toml")); + assert_eq!(detected, nested); + + fs::remove_dir_all(root).expect("temp dir should clean up"); } #[test] - fn truncate_preserves_error_lines() { - let mut body = String::new(); - for i in 0..2_000 { - writeln!(body, "noise line {i}").unwrap(); - } - body.push_str("error[E0308]: mismatched types\n"); - let truncated = truncate_output(&body); - assert!(truncated.len() <= MAX_STEP_OUTPUT_BYTES + 64); - assert!(truncated.contains("error[E0308]")); - assert!(truncated.contains("... (truncated) ...")); + fn python_quick_steps_fall_back_to_py_compile() { + let root = temp_dir("pycompile"); + fs::write(root.join("requirements.txt"), "pytest\n").expect("requirements should write"); + fs::write(root.join("main.py"), "print('ok')\n").expect("python file should write"); + + let profile = build_python_profile_for_root(&root).expect("profile should build"); + let steps = python_quick_steps(&profile, &[root.join("main.py")]); + + assert_eq!(steps.len(), 1); + assert_eq!(steps[0].label, "python -m py_compile"); + + fs::remove_dir_all(root).expect("temp dir should clean up"); } #[test] - fn prepend_verifier_summary_merges_with_existing_output() { - let merged = prepend_verifier_summary("[verifier] cargo check: ok", "edited 1 file".into()); - assert!(merged.contains("edited 1 file")); - assert!(merged.contains("[verifier output]")); - assert!(merged.contains("cargo check: ok")); + fn derive_mypy_targets_uses_package_root() { + let root = temp_dir("mypy-targets"); + let pkg = root.join("pkg"); + let sub = pkg.join("sub"); + fs::create_dir_all(&sub).expect("package dir should create"); + fs::write(pkg.join("__init__.py"), "").expect("init should write"); + fs::write(sub.join("__init__.py"), "").expect("sub init should write"); + fs::write(sub.join("mod.py"), "x = 1\n").expect("module should write"); + + let targets = derive_mypy_targets_from_touched(&root, &[sub.join("mod.py")]); + + assert_eq!(targets, vec![pkg]); + + fs::remove_dir_all(root).expect("temp dir should clean up"); } #[test] - fn prepend_verifier_summary_passes_through_empty_summary() { - let merged = prepend_verifier_summary("", "edited".into()); - assert_eq!(merged, "edited"); - } - - fn tempdir() -> PathBuf { - let base = std::env::temp_dir(); - let unique = format!( - "claw_verifier_{}_{}", - std::process::id(), - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_nanos() + fn classify_python_failures_distinguishes_tool_environment_and_config() { + assert_eq!( + classify_python_failure_kind( + PythonLauncherKind::Global, + PythonStepKind::RuffCheck, + "No module named ruff", + ), + VerificationFailureKind::ToolUnavailable + ); + assert_eq!( + classify_python_failure_kind( + PythonLauncherKind::Poetry, + PythonStepKind::Pytest, + "Poetry could not find a compatible environment", + ), + VerificationFailureKind::Environment + ); + assert_eq!( + classify_python_failure_kind( + PythonLauncherKind::Global, + PythonStepKind::Mypy, + "failed to parse pyproject.toml", + ), + VerificationFailureKind::Config + ); + assert_eq!( + classify_step_timeout(&PlannedStep { + label: "pytest".to_string(), + command: vec!["python".to_string(), "-m".to_string(), "pytest".to_string()], + diagnostics: StepDiagnostics::Python { + launcher_kind: PythonLauncherKind::Global, + step_kind: PythonStepKind::Pytest, + }, + }), + Some(VerificationFailureKind::Timeout) ); - let path = base.join(unique); - fs::create_dir_all(&path).unwrap(); - path } } diff --git a/rust/crates/runtime/tests/verifier_e2e.rs b/rust/crates/runtime/tests/verifier_e2e.rs index d46767c1d6..438cd43dca 100644 --- a/rust/crates/runtime/tests/verifier_e2e.rs +++ b/rust/crates/runtime/tests/verifier_e2e.rs @@ -1,13 +1,15 @@ -//! End-to-end tests for `CargoVerifier` — spawn a real temp crate and drive -//! the verifier against it so we catch regressions in manifest discovery, -//! subprocess handling, output truncation, and scope selection. +//! End-to-end tests for `CargoVerifier` using real temp projects. use std::fs; use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicUsize, Ordering}; use std::time::Duration; -use runtime::{CargoVerifier, CargoVerifierConfig, VerificationResult, Verifier}; +use runtime::{ + CargoVerifier, CargoVerifierConfig, VerificationContext, VerificationFailureKind, + VerificationPhase, VerificationReport, VerificationStatus, Verifier, +}; +use serde_json::json; static TMP_COUNTER: AtomicUsize = AtomicUsize::new(0); @@ -35,16 +37,33 @@ fn write_minimal_crate(root: &Path, name: &str, lib_rs: &str) { } fn tool_input(path: &Path) -> String { - format!(r#"{{"file_path":"{}"}}"#, path.display()) + json!({ "file_path": path }).to_string() } -fn check_only() -> CargoVerifierConfig { +fn context_for(tool_name: &str, input: &str) -> Option { + VerificationContext::from_tool_invocation(VerificationPhase::Quick, None, tool_name, input, 1) +} + +fn quick_report(v: &CargoVerifier, tool_name: &str, input: &str) -> Option { + let context = context_for(tool_name, input)?; + v.quick_verify(&context).into_iter().next() +} + +fn quick_only() -> CargoVerifierConfig { CargoVerifierConfig { - run_check: true, - run_clippy: false, - run_fmt: false, - run_test: false, - timeout: Duration::from_mins(2), + legacy_mode: false, + quick_on_write: true, + final_gate: false, + max_output_bytes: 2_048, + rust_check: true, + rust_clippy: false, + rust_fmt: false, + rust_test: false, + rust_timeout: Duration::from_mins(2), + node_enabled: false, + node_timeout: Duration::from_mins(2), + python_enabled: false, + python_timeout: Duration::from_mins(2), } } @@ -57,23 +76,49 @@ fn cargo_available() -> bool { .is_ok_and(|s| s.success()) } +fn python_available() -> bool { + std::process::Command::new("python") + .arg("--version") + .stdout(std::process::Stdio::null()) + .stderr(std::process::Stdio::null()) + .status() + .is_ok_and(|s| s.success()) +} + +fn python_only() -> CargoVerifierConfig { + CargoVerifierConfig { + legacy_mode: false, + quick_on_write: true, + final_gate: true, + max_output_bytes: 2_048, + rust_check: false, + rust_clippy: false, + rust_fmt: false, + rust_test: false, + rust_timeout: Duration::from_mins(2), + node_enabled: false, + node_timeout: Duration::from_mins(2), + python_enabled: true, + python_timeout: Duration::from_mins(2), + } +} + #[test] -fn passing_crate_reports_ok_and_passed_true() { +fn passing_crate_reports_passed_status() { if !cargo_available() { - eprintln!("cargo unavailable — skipping"); + eprintln!("cargo unavailable - skipping"); return; } let root = unique_tmpdir("pass"); write_minimal_crate(&root, "vpass", "pub fn two() -> i32 { 2 }\n"); let file = root.join("src/lib.rs"); - let v = CargoVerifier::new(check_only()); - let result: VerificationResult = v - .verify("edit_file", &tool_input(&file)) + let v = CargoVerifier::new(quick_only()); + let result = quick_report(&v, "edit_file", &tool_input(&file)) .expect("verifier should run for .rs edit"); - assert!(result.passed, "summary was: {}", result.summary); - assert!(result.summary.contains("cargo check: ok")); + assert_eq!(result.status, VerificationStatus::Passed); + assert!(result.summary_text.contains("cargo check: ok")); let _ = fs::remove_dir_all(&root); } @@ -83,20 +128,19 @@ fn type_error_fails_and_surfaces_error_text_in_summary() { return; } let root = unique_tmpdir("typeerr"); - // Type mismatch: declared i32 but returns &str. write_minimal_crate(&root, "vtype", "pub fn oops() -> i32 { \"nope\" }\n"); let file = root.join("src/lib.rs"); - let v = CargoVerifier::new(check_only()); - let result = v.verify("write_file", &tool_input(&file)).unwrap(); + let v = CargoVerifier::new(quick_only()); + let result = quick_report(&v, "write_file", &tool_input(&file)).unwrap(); - assert!(!result.passed); - assert!(result.summary.contains("cargo check: FAIL")); - let lower = result.summary.to_lowercase(); + assert_eq!(result.status, VerificationStatus::Failed); + assert!(result.summary_text.contains("cargo check: FAIL")); + let lower = result.summary_text.to_lowercase(); assert!( lower.contains("mismatched") || lower.contains("error"), "summary missing diagnostic: {}", - result.summary + result.summary_text ); let _ = fs::remove_dir_all(&root); } @@ -105,23 +149,23 @@ fn type_error_fails_and_surfaces_error_text_in_summary() { fn non_rust_file_is_out_of_scope() { let v = CargoVerifier::new(CargoVerifierConfig::default()); let input = r#"{"file_path":"/tmp/README.md"}"#; - assert!(v.verify("edit_file", input).is_none()); + assert!(quick_report(&v, "edit_file", input).is_none()); } #[test] fn unknown_tool_is_ignored() { let v = CargoVerifier::new(CargoVerifierConfig::default()); let input = r#"{"file_path":"/tmp/x.rs"}"#; - assert!(v.verify("read_file", input).is_none()); - assert!(v.verify("bash", input).is_none()); + assert!(quick_report(&v, "read_file", input).is_none()); + assert!(quick_report(&v, "bash", input).is_none()); } #[test] fn malformed_json_returns_none_without_panicking() { let v = CargoVerifier::new(CargoVerifierConfig::default()); - assert!(v.verify("edit_file", "not-json").is_none()); - assert!(v.verify("edit_file", "{}").is_none()); - assert!(v.verify("edit_file", r#"{"file_path": 42}"#).is_none()); + assert!(quick_report(&v, "edit_file", "not-json").is_none()); + assert!(quick_report(&v, "edit_file", "{}").is_none()); + assert!(quick_report(&v, "edit_file", r#"{"file_path": 42}"#).is_none()); } #[test] @@ -133,53 +177,46 @@ fn accepts_alternate_path_keys() { write_minimal_crate(&root, "valt", "pub fn k() -> u8 { 1 }\n"); let file = root.join("src/lib.rs"); - let v = CargoVerifier::new(check_only()); + let v = CargoVerifier::new(quick_only()); - let a = format!(r#"{{"filePath":"{}"}}"#, file.display()); - let b = format!(r#"{{"path":"{}"}}"#, file.display()); - assert!(v.verify("edit_file", &a).is_some()); - assert!(v.verify("edit_file", &b).is_some()); + let a = json!({ "filePath": file }).to_string(); + let b = json!({ "path": file }).to_string(); + assert!(quick_report(&v, "edit_file", &a).is_some()); + assert!(quick_report(&v, "edit_file", &b).is_some()); let _ = fs::remove_dir_all(&root); } #[test] -fn file_outside_any_crate_is_skipped() { +fn file_outside_any_crate_is_skipped_or_ignored() { let root = unique_tmpdir("nocargo"); - // No Cargo.toml anywhere up the tree we control — but the tmp root's - // ancestors might have one. To guarantee "none found", create a file - // whose parent chain hits filesystem root without Cargo.toml only when - // the OS tmpdir itself isn't under a cargo project. Skip the strong - // assertion in that case. fs::create_dir_all(root.join("x")).unwrap(); let file = root.join("x/orphan.rs"); fs::write(&file, "pub fn z() {}\n").unwrap(); - let v = CargoVerifier::new(check_only()); - let result = v.verify("edit_file", &tool_input(&file)); - // Either no manifest found (None) — the preferred case — or one was - // discovered in an ancestor; both are acceptable. We just assert it - // doesn't panic and returns a well-formed value. - if let Some(r) = result { - assert!(r.summary.contains("cargo check")); + let v = CargoVerifier::new(quick_only()); + let result = quick_report(&v, "edit_file", &tool_input(&file)); + if let Some(report) = result { + assert!(report.summary_text.contains("cargo check")); } let _ = fs::remove_dir_all(&root); } #[test] -fn all_steps_disabled_yields_none() { +fn all_steps_disabled_yields_skipped_report() { let config = CargoVerifierConfig { - run_check: false, - run_clippy: false, - run_fmt: false, - run_test: false, - timeout: Duration::from_secs(5), + rust_check: false, + rust_clippy: false, + rust_fmt: false, + rust_test: false, + ..quick_only() }; let root = unique_tmpdir("nosteps"); write_minimal_crate(&root, "vnone", "pub fn n() {}\n"); let file = root.join("src/lib.rs"); let v = CargoVerifier::new(config); - assert!(v.verify("edit_file", &tool_input(&file)).is_none()); + let report = quick_report(&v, "edit_file", &tool_input(&file)).unwrap(); + assert_eq!(report.status, VerificationStatus::Skipped); let _ = fs::remove_dir_all(&root); } @@ -188,57 +225,58 @@ fn timeout_short_circuits_and_reports_failure() { if !cargo_available() { return; } - // 1ms timeout — cargo process spawn alone takes longer than this on any - // real machine, so the verifier should report a timeout failure. let config = CargoVerifierConfig { - run_check: true, - run_clippy: false, - run_fmt: false, - run_test: false, - timeout: Duration::from_millis(1), + rust_timeout: Duration::from_millis(1), + ..quick_only() }; let root = unique_tmpdir("timeout"); write_minimal_crate(&root, "vtime", "pub fn t() {}\n"); let file = root.join("src/lib.rs"); let v = CargoVerifier::new(config); - let result = v.verify("edit_file", &tool_input(&file)).unwrap(); - assert!(!result.passed); + let result = quick_report(&v, "edit_file", &tool_input(&file)).unwrap(); + assert_eq!(result.status, VerificationStatus::Failed); assert!( - result.summary.contains("cargo check: FAIL") - && result.summary.to_lowercase().contains("timed out"), + result.summary_text.contains("cargo check: FAIL") + && result.summary_text.to_lowercase().contains("timed out"), "unexpected summary: {}", - result.summary + result.summary_text ); let _ = fs::remove_dir_all(&root); } #[test] -fn later_steps_are_skipped_after_first_failure() { +fn later_steps_are_skipped_after_first_failure_in_legacy_mode() { if !cargo_available() { return; } let root = unique_tmpdir("skipchain"); - // Broken code so `cargo check` fails — the later steps (clippy/fmt/test) - // must all be recorded as `skipped` to save time. write_minimal_crate(&root, "vskip", "pub fn bad() -> i32 { return; }\n"); let file = root.join("src/lib.rs"); let config = CargoVerifierConfig { - run_check: true, - run_clippy: true, - run_fmt: true, - run_test: true, - timeout: Duration::from_mins(2), + legacy_mode: true, + quick_on_write: true, + final_gate: false, + max_output_bytes: 2_048, + rust_check: true, + rust_clippy: true, + rust_fmt: true, + rust_test: true, + rust_timeout: Duration::from_mins(2), + node_enabled: false, + node_timeout: Duration::from_mins(2), + python_enabled: false, + python_timeout: Duration::from_mins(2), }; let v = CargoVerifier::new(config); - let result = v.verify("edit_file", &tool_input(&file)).unwrap(); + let result = quick_report(&v, "edit_file", &tool_input(&file)).unwrap(); - assert!(!result.passed); - assert!(result.summary.contains("cargo check: FAIL")); - assert!(result.summary.contains("cargo clippy: skipped")); - assert!(result.summary.contains("cargo fmt --check: skipped")); - assert!(result.summary.contains("cargo test: skipped")); + assert_eq!(result.status, VerificationStatus::Failed); + assert!(result.summary_text.contains("cargo check: FAIL")); + assert!(result.summary_text.contains("cargo clippy: skipped")); + assert!(result.summary_text.contains("cargo fmt --check: skipped")); + assert!(result.summary_text.contains("cargo test: skipped")); let _ = fs::remove_dir_all(&root); } @@ -248,25 +286,30 @@ fn fmt_violation_is_detected_when_fmt_enabled() { return; } let root = unique_tmpdir("fmt"); - // Deliberately badly-formatted source that still compiles. let src = "pub fn f( )->i32{1 + 2}\n"; write_minimal_crate(&root, "vfmt", src); let file = root.join("src/lib.rs"); let config = CargoVerifierConfig { - run_check: false, - run_clippy: false, - run_fmt: true, - run_test: false, - timeout: Duration::from_mins(1), + legacy_mode: true, + quick_on_write: true, + final_gate: false, + max_output_bytes: 2_048, + rust_check: false, + rust_clippy: false, + rust_fmt: true, + rust_test: false, + rust_timeout: Duration::from_mins(1), + node_enabled: false, + node_timeout: Duration::from_mins(2), + python_enabled: false, + python_timeout: Duration::from_mins(2), }; let v = CargoVerifier::new(config); - let result = v.verify("edit_file", &tool_input(&file)).unwrap(); + let result = quick_report(&v, "edit_file", &tool_input(&file)).unwrap(); - // rustfmt may be unavailable in some toolchains — accept both "FAIL" - // (violations found) and "could not run" (binary missing), but never ok. - assert!(!result.passed, "summary: {}", result.summary); - assert!(result.summary.contains("cargo fmt --check")); + assert_ne!(result.status, VerificationStatus::Passed); + assert!(result.summary_text.contains("cargo fmt --check")); let _ = fs::remove_dir_all(&root); } @@ -280,16 +323,83 @@ fn nested_file_resolves_to_parent_crate_manifest() { fs::create_dir_all(root.join("src/sub")).unwrap(); let nested = root.join("src/sub/mod.rs"); fs::write(&nested, "pub fn inside() -> u8 { 7 }\n").unwrap(); - fs::write(root.join("src/lib.rs"), "pub mod sub;\n").unwrap(); - fs::remove_file(root.join("src/lib.rs")).ok(); fs::write( root.join("src/lib.rs"), "#[path = \"sub/mod.rs\"]\npub mod sub;\n", ) .unwrap(); - let v = CargoVerifier::new(check_only()); - let result = v.verify("edit_file", &tool_input(&nested)).unwrap(); - assert!(result.passed, "summary: {}", result.summary); + let v = CargoVerifier::new(quick_only()); + let result = quick_report(&v, "edit_file", &tool_input(&nested)).unwrap(); + assert_eq!(result.status, VerificationStatus::Passed); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn python_quick_fallback_py_compile_catches_syntax_error() { + if !python_available() { + return; + } + let root = unique_tmpdir("python-syntax"); + fs::write(root.join("requirements.txt"), "pytest\n").unwrap(); + let file = root.join("broken.py"); + fs::write(&file, "def broken(:\n pass\n").unwrap(); + + let v = CargoVerifier::new(python_only()); + let result = quick_report(&v, "edit_file", &tool_input(&file)).unwrap(); + + assert_eq!(result.status, VerificationStatus::Failed); + assert_eq!( + result.steps[0].failure_kind, + Some(VerificationFailureKind::Code) + ); + assert!(result.summary_text.contains("py_compile")); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn invalid_pyproject_reports_config_failure() { + let root = unique_tmpdir("python-bad-pyproject"); + let file = root.join("pyproject.toml"); + fs::write(&file, "[tool.ruff\n").unwrap(); + + let v = CargoVerifier::new(python_only()); + let result = quick_report(&v, "edit_file", &tool_input(&file)).unwrap(); + + assert_eq!(result.status, VerificationStatus::Failed); + assert_eq!( + result.steps[0].failure_kind, + Some(VerificationFailureKind::Config) + ); + assert!(result.summary_text.contains("pyproject.toml")); + let _ = fs::remove_dir_all(&root); +} + +#[test] +fn broken_local_venv_is_reported_as_environment() { + let root = unique_tmpdir("python-venv"); + fs::write(root.join("setup.py"), "from setuptools import setup\n").unwrap(); + let file = root.join("main.py"); + fs::write(&file, "print('ok')\n").unwrap(); + + let interpreter = if cfg!(windows) { + root.join(".venv").join("Scripts").join("python.exe") + } else { + root.join(".venv").join("bin").join("python") + }; + fs::create_dir_all(interpreter.parent().unwrap()).unwrap(); + fs::write(&interpreter, "").unwrap(); + + let v = CargoVerifier::new(python_only()); + let result = quick_report(&v, "edit_file", &tool_input(&file)).unwrap(); + + assert!( + matches!( + result.steps[0].failure_kind, + Some(VerificationFailureKind::Environment) + ), + "expected environment failure, got {:?}", + result.steps[0].failure_kind + ); let _ = fs::remove_dir_all(&root); } diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs index 572cc15f92..010dddd128 100644 --- a/rust/crates/rusty-claude-cli/src/main.rs +++ b/rust/crates/rusty-claude-cli/src/main.rs @@ -3188,6 +3188,7 @@ struct BuiltRuntime { plugins_active: bool, mcp_state: Option>>, mcp_active: bool, + buffer_output: bool, } impl BuiltRuntime { @@ -3195,6 +3196,7 @@ impl BuiltRuntime { runtime: ConversationRuntime, plugin_registry: PluginRegistry, mcp_state: Option>>, + buffer_output: bool, ) -> Self { Self { runtime: Some(runtime), @@ -3202,6 +3204,7 @@ impl BuiltRuntime { plugins_active: true, mcp_state, mcp_active: true, + buffer_output, } } @@ -3776,6 +3779,7 @@ impl LiveCli { hook_abort_monitor.stop(); match result { Ok(summary) => { + let should_print_buffered = runtime.buffer_output; self.replace_runtime(runtime)?; spinner.finish( "✨ Done", @@ -3783,6 +3787,9 @@ impl LiveCli { &mut stdout, )?; println!(); + if should_print_buffered { + print_buffered_turn_summary(&summary)?; + } if let Some(event) = summary.auto_compaction { println!( "{}", @@ -3850,6 +3857,12 @@ impl LiveCli { })), "tool_uses": collect_tool_uses(&summary), "tool_results": collect_tool_results(&summary), + "verification_reports": collect_verification_reports(&summary), + "verification_gate": { + "attempted": summary.verification_gate.attempted, + "passed": summary.verification_gate.passed, + "report_ids": summary.verification_gate.report_ids.clone(), + }, "prompt_cache_events": collect_prompt_cache_events(&summary), "usage": { "input_tokens": summary.usage.input_tokens, @@ -5923,6 +5936,7 @@ fn render_export_text(session: &Session) -> String { MessageRole::User => "user", MessageRole::Assistant => "assistant", MessageRole::Tool => "tool", + MessageRole::Verification => "verification", }; lines.push(format!("## {}. {role}", index + 1)); for block in &message.blocks { @@ -5941,6 +5955,16 @@ fn render_export_text(session: &Session) -> String { "[tool_result id={tool_use_id} name={tool_name} error={is_error}] {output}" )); } + ContentBlock::VerificationReport { + report_id, + phase, + status, + summary_text, + } => lines.push(format!( + "[verification_report id={report_id} phase={} status={}] {summary_text}", + phase.as_str(), + status.as_str() + )), } } lines.push(String::new()); @@ -6068,6 +6092,7 @@ fn run_export( Ok(()) } +#[allow(clippy::too_many_lines)] fn render_session_markdown(session: &Session, session_id: &str, session_path: &Path) -> String { let mut lines = vec![ "# Conversation Export".to_string(), @@ -6102,6 +6127,7 @@ fn render_session_markdown(session: &Session, session_id: &str, session_path: &P MessageRole::User => "User", MessageRole::Assistant => "Assistant", MessageRole::Tool => "Tool", + MessageRole::Verification => "Verification", }; lines.push(format!("## {}. {role}", index + 1)); lines.push(String::new()); @@ -6142,6 +6168,23 @@ fn render_session_markdown(session: &Session, session_id: &str, session_path: &P } lines.push(String::new()); } + ContentBlock::VerificationReport { + report_id, + phase, + status, + summary_text, + } => { + lines.push(format!( + "**Verification** `{}` _(id `{report_id}`, status `{}`)_", + phase.as_str(), + status.as_str() + )); + let summary = summarize_tool_payload_for_markdown(summary_text); + if !summary.is_empty() { + lines.push(format!("> {summary}")); + } + lines.push(String::new()); + } } } if let Some(usage) = message.usage { @@ -6631,6 +6674,13 @@ fn build_runtime_with_plugin_state( plugin_registry, mcp_state, } = runtime_plugin_state; + let verifier_config = feature_config.verifier().clone(); + let buffer_output = emit_output + && verifier_config.enabled() + && verifier_config.staged() + && verifier_config.final_gate(); + let assistant_emit_output = emit_output && !buffer_output; + let tool_emit_output = emit_output && !buffer_output; plugin_registry.initialize()?; let policy = permission_policy(permission_mode, &feature_config, &tool_registry) .map_err(std::io::Error::other)?; @@ -6640,14 +6690,14 @@ fn build_runtime_with_plugin_state( session_id, model, enable_tools, - emit_output, + assistant_emit_output, allowed_tools.clone(), tool_registry.clone(), progress_reporter, )?, CliToolExecutor::new( allowed_tools.clone(), - emit_output, + tool_emit_output, tool_registry.clone(), mcp_state.clone(), ), @@ -6658,18 +6708,30 @@ fn build_runtime_with_plugin_state( if emit_output { runtime = runtime.with_hook_progress_reporter(Box::new(CliHookProgressReporter)); } - let verifier_config = feature_config.verifier(); if verifier_config.enabled() { let cargo_config = runtime::CargoVerifierConfig { - run_check: verifier_config.run_check(), - run_clippy: verifier_config.run_clippy(), - run_fmt: verifier_config.run_fmt(), - run_test: verifier_config.run_test(), - timeout: std::time::Duration::from_secs(verifier_config.timeout_secs()), + legacy_mode: !verifier_config.staged(), + quick_on_write: verifier_config.quick_on_write(), + final_gate: verifier_config.final_gate(), + max_output_bytes: verifier_config.max_output_bytes(), + rust_check: verifier_config.run_check(), + rust_clippy: verifier_config.run_clippy(), + rust_fmt: verifier_config.run_fmt(), + rust_test: verifier_config.run_test(), + rust_timeout: std::time::Duration::from_secs(verifier_config.timeout_secs()), + node_enabled: verifier_config.node_enabled(), + node_timeout: std::time::Duration::from_secs(verifier_config.node_timeout_secs()), + python_enabled: verifier_config.python_enabled(), + python_timeout: std::time::Duration::from_secs(verifier_config.python_timeout_secs()), }; runtime = runtime.with_verifier(Box::new(runtime::CargoVerifier::new(cargo_config))); } - Ok(BuiltRuntime::new(runtime, plugin_registry, mcp_state)) + Ok(BuiltRuntime::new( + runtime, + plugin_registry, + mcp_state, + buffer_output, + )) } struct CliHookProgressReporter; @@ -7079,7 +7141,9 @@ impl AnthropicRuntimeClient { fn request_ends_with_tool_result(request: &ApiRequest) -> bool { request .messages - .last() + .iter() + .rev() + .find(|message| message.role != MessageRole::Verification) .is_some_and(|message| message.role == MessageRole::Tool) } @@ -7192,6 +7256,51 @@ fn final_assistant_text(summary: &runtime::TurnSummary) -> String { .unwrap_or_default() } +fn print_buffered_turn_summary( + summary: &runtime::TurnSummary, +) -> Result<(), Box> { + let renderer = TerminalRenderer::new(); + let mut stdout = io::stdout(); + + for message in &summary.tool_results { + for block in &message.blocks { + if let ContentBlock::ToolResult { + tool_name, + output, + is_error, + .. + } = block + { + writeln!( + stdout, + "\n{}", + format_tool_result(tool_name, output, *is_error) + )?; + } + } + } + + for report in &summary.verification_reports { + writeln!( + stdout, + "\n[verification {}:{}] {}", + report.phase.as_str(), + report.adapter_id, + report.status.as_str() + )?; + writeln!(stdout, "{}", report.summary_text)?; + } + + let final_text = final_assistant_text(summary); + if !final_text.trim().is_empty() { + writeln!(stdout)?; + renderer.stream_markdown(&final_text, &mut stdout)?; + writeln!(stdout)?; + } + + Ok(()) +} + fn collect_tool_uses(summary: &runtime::TurnSummary) -> Vec { summary .assistant_messages @@ -7230,6 +7339,39 @@ fn collect_tool_results(summary: &runtime::TurnSummary) -> Vec Vec { + summary + .verification_reports + .iter() + .map(|report| { + json!({ + "report_id": report.report_id.clone(), + "phase": report.phase.as_str(), + "adapter_id": report.adapter_id.clone(), + "project_root": report.project_root.display().to_string(), + "touched_paths": report + .touched_paths + .iter() + .map(|path| path.display().to_string()) + .collect::>(), + "status": report.status.as_str(), + "summary_text": report.summary_text.clone(), + "steps": report.steps.iter().map(|step| json!({ + "adapter": step.adapter.clone(), + "project_root": step.project_root.display().to_string(), + "label": step.label.clone(), + "command": step.command.clone(), + "phase": step.phase.as_str(), + "status": step.status.as_str(), + "failure_kind": step.failure_kind.map(runtime::VerificationFailureKind::as_str), + "duration_ms": step.duration_ms, + "truncated_output": step.truncated_output.clone(), + })).collect::>(), + }) + }) + .collect() +} + fn collect_prompt_cache_events(summary: &runtime::TurnSummary) -> Vec { summary .prompt_cache_events @@ -8140,7 +8282,10 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec { .iter() .filter_map(|message| { let role = match message.role { - MessageRole::System | MessageRole::User | MessageRole::Tool => "user", + MessageRole::System + | MessageRole::User + | MessageRole::Tool + | MessageRole::Verification => "user", MessageRole::Assistant => "assistant", }; let content = message @@ -8166,6 +8311,11 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec { }], is_error: *is_error, }, + ContentBlock::VerificationReport { summary_text, .. } => { + InputContentBlock::Text { + text: summary_text.clone(), + } + } }) .collect::>(); (!content.is_empty()).then(|| InputMessage { @@ -8387,6 +8537,8 @@ mod tests { use std::fs; use std::io::{Read, Write}; use std::net::TcpListener; + #[cfg(unix)] + use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::process::Command; use std::sync::{Mutex, MutexGuard, OnceLock}; @@ -8628,45 +8780,89 @@ mod tests { .expect("skill file should write"); } + fn make_executable(path: &Path) { + #[cfg(unix)] + { + let mut permissions = fs::metadata(path) + .expect("script metadata should load") + .permissions(); + permissions.set_mode(0o755); + fs::set_permissions(path, permissions).expect("script permissions should update"); + } + } + + fn script_name(stem: &str) -> String { + if cfg!(windows) { + format!("{stem}.cmd") + } else { + format!("{stem}.sh") + } + } + + fn write_script(path: &Path, unix_body: &str, windows_body: &str) { + let body = if cfg!(windows) { + windows_body + } else { + unix_body + }; + fs::write(path, body).expect("script should write"); + make_executable(path); + } + + fn python_command() -> &'static str { + if cfg!(windows) { + "python" + } else { + "python3" + } + } + fn write_plugin_fixture(root: &Path, name: &str, include_hooks: bool, include_lifecycle: bool) { fs::create_dir_all(root.join(".claude-plugin")).expect("manifest dir"); + let pre_hook_name = script_name("pre"); + let init_name = script_name("init"); + let shutdown_name = script_name("shutdown"); if include_hooks { fs::create_dir_all(root.join("hooks")).expect("hooks dir"); - fs::write( - root.join("hooks").join("pre.sh"), + write_script( + &root.join("hooks").join(&pre_hook_name), "#!/bin/sh\nprintf 'plugin pre hook'\n", - ) - .expect("write hook"); + "@echo off\r\necho plugin pre hook\r\n", + ); } if include_lifecycle { fs::create_dir_all(root.join("lifecycle")).expect("lifecycle dir"); - fs::write( - root.join("lifecycle").join("init.sh"), + write_script( + &root.join("lifecycle").join(&init_name), "#!/bin/sh\nprintf 'init\\n' >> lifecycle.log\n", - ) - .expect("write init lifecycle"); - fs::write( - root.join("lifecycle").join("shutdown.sh"), + "@echo off\r\necho init>> lifecycle.log\r\n", + ); + write_script( + &root.join("lifecycle").join(&shutdown_name), "#!/bin/sh\nprintf 'shutdown\\n' >> lifecycle.log\n", - ) - .expect("write shutdown lifecycle"); + "@echo off\r\necho shutdown>> lifecycle.log\r\n", + ); } - let hooks = if include_hooks { - ",\n \"hooks\": {\n \"PreToolUse\": [\"./hooks/pre.sh\"]\n }" - } else { - "" - }; - let lifecycle = if include_lifecycle { - ",\n \"lifecycle\": {\n \"Init\": [\"./lifecycle/init.sh\"],\n \"Shutdown\": [\"./lifecycle/shutdown.sh\"]\n }" - } else { - "" - }; + let mut manifest = json!({ + "name": name, + "version": "1.0.0", + "description": "runtime plugin fixture", + }); + if include_hooks { + manifest["hooks"] = json!({ + "PreToolUse": [format!("./hooks/{pre_hook_name}")], + }); + } + if include_lifecycle { + manifest["lifecycle"] = json!({ + "Init": [format!("./lifecycle/{init_name}")], + "Shutdown": [format!("./lifecycle/{shutdown_name}")], + }); + } fs::write( root.join(".claude-plugin").join("plugin.json"), - format!( - "{{\n \"name\": \"{name}\",\n \"version\": \"1.0.0\",\n \"description\": \"runtime plugin fixture\"{hooks}{lifecycle}\n}}" - ), + serde_json::to_string_pretty(&manifest).expect("manifest should serialize"), ) .expect("write plugin manifest"); } @@ -9145,8 +9341,15 @@ mod tests { #[test] fn rejects_unknown_allowed_tools() { - let error = parse_args(&["--allowedTools".to_string(), "teleport".to_string()]) - .expect_err("tool should be rejected"); + let _guard = env_lock(); + let cwd = temp_dir(); + fs::create_dir_all(&cwd).expect("temp cwd should exist"); + std::env::remove_var("RUSTY_CLAUDE_PERMISSION_MODE"); + let error = with_current_dir(&cwd, || { + parse_args(&["--allowedTools".to_string(), "teleport".to_string()]) + .expect_err("tool should be rejected") + }); + let _ = fs::remove_dir_all(cwd); assert!(error.contains("unsupported tool in --allowedTools: teleport")); } @@ -9691,102 +9894,114 @@ mod tests { } #[test] + #[allow(clippy::too_many_lines)] fn parses_direct_agents_mcp_and_skills_slash_commands() { - assert_eq!( - parse_args(&["/agents".to_string()]).expect("/agents should parse"), - CliAction::Agents { - args: None, - output_format: CliOutputFormat::Text - } - ); - assert_eq!( - parse_args(&["/mcp".to_string(), "show".to_string(), "demo".to_string()]) - .expect("/mcp show demo should parse"), - CliAction::Mcp { - args: Some("show demo".to_string()), - output_format: CliOutputFormat::Text, - } - ); - assert_eq!( - parse_args(&["/skills".to_string()]).expect("/skills should parse"), - CliAction::Skills { - args: None, - output_format: CliOutputFormat::Text, - } - ); - assert_eq!( - parse_args(&["/skill".to_string()]).expect("/skill should parse"), - CliAction::Skills { - args: None, - output_format: CliOutputFormat::Text, - } - ); - assert_eq!( - parse_args(&["/skills".to_string(), "help".to_string()]) - .expect("/skills help should parse"), - CliAction::Skills { - args: Some("help".to_string()), - output_format: CliOutputFormat::Text, - } - ); - assert_eq!( - parse_args(&["/skill".to_string(), "list".to_string()]) - .expect("/skill list should parse"), - CliAction::Skills { - args: Some("list".to_string()), - output_format: CliOutputFormat::Text, - } - ); - assert_eq!( - parse_args(&[ - "/skills".to_string(), - "help".to_string(), - "overview".to_string() - ]) - .expect("/skills help overview should invoke"), - CliAction::Prompt { - prompt: "$help overview".to_string(), - model: DEFAULT_MODEL.to_string(), - output_format: CliOutputFormat::Text, - allowed_tools: None, - permission_mode: crate::default_permission_mode(), - compact: false, - base_commit: None, - reasoning_effort: None, - allow_broad_cwd: false, - } - ); - assert_eq!( - parse_args(&[ - "/skills".to_string(), - "install".to_string(), - "./fixtures/help-skill".to_string(), - ]) - .expect("/skills install should parse"), - CliAction::Skills { - args: Some("install ./fixtures/help-skill".to_string()), - output_format: CliOutputFormat::Text, - } - ); - assert_eq!( - parse_args(&["/skills".to_string(), "/test".to_string()]) - .expect("/skills /test should normalize to a single skill prompt prefix"), - CliAction::Prompt { - prompt: "$test".to_string(), - model: DEFAULT_MODEL.to_string(), - output_format: CliOutputFormat::Text, - allowed_tools: None, - permission_mode: crate::default_permission_mode(), - compact: false, - base_commit: None, - reasoning_effort: None, - allow_broad_cwd: false, - } - ); - let error = parse_args(&["/status".to_string()]) - .expect_err("/status should remain REPL-only when invoked directly"); - assert!(error.contains("interactive-only")); - assert!(error.contains("claw --resume SESSION.jsonl /status")); + let _guard = env_lock(); + let cwd = temp_dir(); + fs::create_dir_all(&cwd).expect("temp cwd should exist"); + std::env::remove_var("RUSTY_CLAUDE_PERMISSION_MODE"); + + with_current_dir(&cwd, || { + let permission_mode = crate::default_permission_mode(); + + assert_eq!( + parse_args(&["/agents".to_string()]).expect("/agents should parse"), + CliAction::Agents { + args: None, + output_format: CliOutputFormat::Text + } + ); + assert_eq!( + parse_args(&["/mcp".to_string(), "show".to_string(), "demo".to_string()]) + .expect("/mcp show demo should parse"), + CliAction::Mcp { + args: Some("show demo".to_string()), + output_format: CliOutputFormat::Text, + } + ); + assert_eq!( + parse_args(&["/skills".to_string()]).expect("/skills should parse"), + CliAction::Skills { + args: None, + output_format: CliOutputFormat::Text, + } + ); + assert_eq!( + parse_args(&["/skill".to_string()]).expect("/skill should parse"), + CliAction::Skills { + args: None, + output_format: CliOutputFormat::Text, + } + ); + assert_eq!( + parse_args(&["/skills".to_string(), "help".to_string()]) + .expect("/skills help should parse"), + CliAction::Skills { + args: Some("help".to_string()), + output_format: CliOutputFormat::Text, + } + ); + assert_eq!( + parse_args(&["/skill".to_string(), "list".to_string()]) + .expect("/skill list should parse"), + CliAction::Skills { + args: Some("list".to_string()), + output_format: CliOutputFormat::Text, + } + ); + assert_eq!( + parse_args(&[ + "/skills".to_string(), + "help".to_string(), + "overview".to_string() + ]) + .expect("/skills help overview should invoke"), + CliAction::Prompt { + prompt: "$help overview".to_string(), + model: DEFAULT_MODEL.to_string(), + output_format: CliOutputFormat::Text, + allowed_tools: None, + permission_mode, + compact: false, + base_commit: None, + reasoning_effort: None, + allow_broad_cwd: false, + } + ); + assert_eq!( + parse_args(&[ + "/skills".to_string(), + "install".to_string(), + "./fixtures/help-skill".to_string(), + ]) + .expect("/skills install should parse"), + CliAction::Skills { + args: Some("install ./fixtures/help-skill".to_string()), + output_format: CliOutputFormat::Text, + } + ); + assert_eq!( + parse_args(&["/skills".to_string(), "/test".to_string()]) + .expect("/skills /test should normalize to a single skill prompt prefix"), + CliAction::Prompt { + prompt: "$test".to_string(), + model: DEFAULT_MODEL.to_string(), + output_format: CliOutputFormat::Text, + allowed_tools: None, + permission_mode, + compact: false, + base_commit: None, + reasoning_effort: None, + allow_broad_cwd: false, + } + ); + let error = parse_args(&["/status".to_string()]) + .expect_err("/status should remain REPL-only when invoked directly"); + assert!(error.contains("interactive-only")); + assert!(error.contains("claw --resume SESSION.jsonl /status")); + }); + + let _ = fs::remove_dir_all(cwd); } #[test] @@ -11376,7 +11591,7 @@ UU conflicted.rs", let pre_hooks = state.feature_config.hooks().pre_tool_use(); assert_eq!(pre_hooks.len(), 1); assert!( - pre_hooks[0].ends_with("hooks/pre.sh"), + pre_hooks[0].ends_with(&format!("hooks/{}", script_name("pre"))), "expected installed plugin hook path, got {pre_hooks:?}" ); @@ -11388,6 +11603,7 @@ UU conflicted.rs", #[test] #[allow(clippy::too_many_lines)] fn build_runtime_plugin_state_discovers_mcp_tools_and_surfaces_pending_servers() { + let _guard = env_lock(); let config_home = temp_dir(); let workspace = temp_dir(); fs::create_dir_all(&config_home).expect("config home"); @@ -11396,21 +11612,19 @@ UU conflicted.rs", write_mcp_server_fixture(&script_path); fs::write( config_home.join("settings.json"), - format!( - r#"{{ - "mcpServers": {{ - "alpha": {{ - "command": "python3", - "args": ["{}"] - }}, - "broken": {{ - "command": "python3", - "args": ["-c", "import sys; sys.exit(0)"] - }} - }} - }}"#, - script_path.to_string_lossy() - ), + serde_json::to_string_pretty(&json!({ + "mcpServers": { + "alpha": { + "command": python_command(), + "args": [script_path.to_string_lossy().to_string()], + }, + "broken": { + "command": python_command(), + "args": ["-c", "import sys; sys.exit(0)"], + } + } + })) + .expect("mcp settings should serialize"), ) .expect("write mcp settings"); @@ -11596,7 +11810,9 @@ UU conflicted.rs", .expect("runtime should build"); assert_eq!( - fs::read_to_string(&log_path).expect("init log should exist"), + fs::read_to_string(&log_path) + .expect("init log should exist") + .replace("\r\n", "\n"), "init\n" ); @@ -11605,7 +11821,9 @@ UU conflicted.rs", .expect("plugin shutdown should succeed"); assert_eq!( - fs::read_to_string(&log_path).expect("shutdown log should exist"), + fs::read_to_string(&log_path) + .expect("shutdown log should exist") + .replace("\r\n", "\n"), "init\nshutdown\n" ); diff --git a/rust/crates/rusty-claude-cli/tests/compact_output.rs b/rust/crates/rusty-claude-cli/tests/compact_output.rs index 456862fa1f..a9ffd00b38 100644 --- a/rust/crates/rusty-claude-cli/tests/compact_output.rs +++ b/rust/crates/rusty-claude-cli/tests/compact_output.rs @@ -141,11 +141,24 @@ fn run_claw( .env("CLAW_CONFIG_HOME", config_home) .env("HOME", home) .env("NO_COLOR", "1") - .env("PATH", "/usr/bin:/bin") .args(args); + configure_clean_process_env(&mut command, home); command.output().expect("claw should launch") } +fn configure_clean_process_env(command: &mut Command, home: &std::path::Path) { + if cfg!(windows) { + command.env("USERPROFILE", home); + for key in ["PATH", "SystemRoot", "ComSpec", "PATHEXT", "TEMP", "TMP"] { + if let Ok(value) = std::env::var(key) { + command.env(key, value); + } + } + } else { + command.env("PATH", "/usr/bin:/bin"); + } +} + fn unique_temp_dir(label: &str) -> PathBuf { let millis = SystemTime::now() .duration_since(UNIX_EPOCH) diff --git a/rust/crates/rusty-claude-cli/tests/mock_parity_harness.rs b/rust/crates/rusty-claude-cli/tests/mock_parity_harness.rs index 066abb686b..502e683d76 100644 --- a/rust/crates/rusty-claude-cli/tests/mock_parity_harness.rs +++ b/rust/crates/rusty-claude-cli/tests/mock_parity_harness.rs @@ -1,17 +1,64 @@ use std::collections::BTreeMap; use std::fs; use std::io::Write; -use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::process::{Command, Output, Stdio}; use std::sync::atomic::{AtomicU64, Ordering}; use std::time::{SystemTime, UNIX_EPOCH}; +#[cfg(unix)] +use std::os::unix::fs::PermissionsExt; + use mock_anthropic_service::{MockAnthropicService, SCENARIO_PREFIX}; use serde_json::{json, Value}; static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0); +#[cfg(unix)] +fn make_executable(path: &Path) { + let mut permissions = fs::metadata(path) + .expect("plugin script metadata") + .permissions(); + permissions.set_mode(0o755); + fs::set_permissions(path, permissions).expect("plugin script should be executable"); +} + +#[cfg(not(unix))] +fn make_executable(path: &Path) { + let _ = path; +} + +fn script_name(stem: &str) -> String { + if cfg!(windows) { + format!("{stem}.cmd") + } else { + format!("{stem}.sh") + } +} + +fn write_script(path: &Path, unix_body: &str, windows_body: &str) { + let body = if cfg!(windows) { + windows_body + } else { + unix_body + }; + fs::write(path, body).expect("script should write"); + make_executable(path); +} + +fn configure_clean_process_env(command: &mut Command, home: &Path) { + if cfg!(windows) { + command.env("USERPROFILE", home); + for key in ["PATH", "SystemRoot", "ComSpec", "PATHEXT", "TEMP", "TMP"] { + if let Ok(value) = std::env::var(key) { + command.env(key, value); + } + } + } else { + command.env("PATH", "/usr/bin:/bin"); + } +} + #[test] #[allow(clippy::too_many_lines)] fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios() { @@ -317,7 +364,6 @@ fn run_case(case: ScenarioCase, workspace: &HarnessWorkspace, base_url: &str) -> .env("CLAW_CONFIG_HOME", &workspace.config_home) .env("HOME", &workspace.home) .env("NO_COLOR", "1") - .env("PATH", "/usr/bin:/bin") .args([ "--model", "sonnet", @@ -325,6 +371,7 @@ fn run_case(case: ScenarioCase, workspace: &HarnessWorkspace, base_url: &str) -> case.permission_mode, "--output-format=json", ]); + configure_clean_process_env(&mut command, &workspace.home); if let Some(allowed_tools) = case.allowed_tools { command.args(["--allowedTools", allowed_tools]); @@ -420,41 +467,36 @@ fn prepare_plugin_fixture(workspace: &HarnessWorkspace) { fs::create_dir_all(&tool_dir).expect("plugin tools dir"); fs::create_dir_all(&manifest_dir).expect("plugin manifest dir"); - let script_path = tool_dir.join("echo-json.sh"); - fs::write( + let script_file = script_name("echo-json"); + let script_path = tool_dir.join(&script_file); + write_script( &script_path, "#!/bin/sh\nINPUT=$(cat)\nprintf '{\"plugin\":\"%s\",\"tool\":\"%s\",\"input\":%s}\\n' \"$CLAWD_PLUGIN_ID\" \"$CLAWD_TOOL_NAME\" \"$INPUT\"\n", - ) - .expect("plugin script should write"); - let mut permissions = fs::metadata(&script_path) - .expect("plugin script metadata") - .permissions(); - permissions.set_mode(0o755); - fs::set_permissions(&script_path, permissions).expect("plugin script should be executable"); + "@echo off\r\npowershell -NoProfile -Command \"$inputData = [Console]::In.ReadToEnd(); $payload = @{ plugin = $env:CLAWD_PLUGIN_ID; tool = $env:CLAWD_TOOL_NAME; input = ($inputData | ConvertFrom-Json) } | ConvertTo-Json -Compress -Depth 10; [Console]::Out.WriteLine($payload)\"\r\n", + ); fs::write( manifest_dir.join("plugin.json"), - r#"{ - "name": "parity-plugin", - "version": "1.0.0", - "description": "mock parity plugin", - "tools": [ - { - "name": "plugin_echo", - "description": "Echo JSON input", - "inputSchema": { - "type": "object", - "properties": { - "message": { "type": "string" } - }, - "required": ["message"], - "additionalProperties": false - }, - "command": "./tools/echo-json.sh", - "requiredPermission": "workspace-write" - } - ] -}"#, + serde_json::to_string_pretty(&json!({ + "name": "parity-plugin", + "version": "1.0.0", + "description": "mock parity plugin", + "tools": [{ + "name": "plugin_echo", + "description": "Echo JSON input", + "inputSchema": { + "type": "object", + "properties": { + "message": { "type": "string" } + }, + "required": ["message"], + "additionalProperties": false + }, + "command": format!("./tools/{script_file}"), + "requiredPermission": "workspace-write" + }] + })) + .expect("plugin manifest should serialize"), ) .expect("plugin manifest should write"); @@ -483,7 +525,7 @@ fn assert_streaming_text(_: &HarnessWorkspace, run: &ScenarioRun) { assert_eq!(run.response["tool_results"], Value::Array(Vec::new())); } -fn assert_read_file_roundtrip(workspace: &HarnessWorkspace, run: &ScenarioRun) { +fn assert_read_file_roundtrip(_workspace: &HarnessWorkspace, run: &ScenarioRun) { assert_eq!(run.response["iterations"], Value::from(2)); assert_eq!( run.response["tool_uses"][0]["name"], @@ -500,7 +542,10 @@ fn assert_read_file_roundtrip(workspace: &HarnessWorkspace, run: &ScenarioRun) { let output = run.response["tool_results"][0]["output"] .as_str() .expect("tool output"); - assert!(output.contains(&workspace.root.join("fixture.txt").display().to_string())); + assert!( + output.contains("fixture.txt"), + "expected read_file output to mention fixture.txt, got: {output}" + ); assert!(output.contains("alpha parity line")); } @@ -535,7 +580,7 @@ fn assert_write_file_allowed(workspace: &HarnessWorkspace, run: &ScenarioRun) { assert!(run.response["message"] .as_str() .expect("message text") - .contains("generated/output.txt")); + .contains("output.txt")); let generated = workspace.root.join("generated").join("output.txt"); let contents = fs::read_to_string(&generated).expect("generated file should exist"); assert_eq!(contents, "created by mock service\n"); diff --git a/rust/crates/tools/src/lib.rs b/rust/crates/tools/src/lib.rs index 3ffa7d0188..e63e22b1a1 100644 --- a/rust/crates/tools/src/lib.rs +++ b/rust/crates/tools/src/lib.rs @@ -4742,7 +4742,10 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec { .iter() .filter_map(|message| { let role = match message.role { - MessageRole::System | MessageRole::User | MessageRole::Tool => "user", + MessageRole::System + | MessageRole::User + | MessageRole::Tool + | MessageRole::Verification => "user", MessageRole::Assistant => "assistant", }; let content = message @@ -4768,6 +4771,11 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec { }], is_error: *is_error, }, + ContentBlock::VerificationReport { summary_text, .. } => { + InputContentBlock::Text { + text: summary_text.clone(), + } + } }) .collect::>(); (!content.is_empty()).then(|| InputMessage { @@ -6139,6 +6147,22 @@ mod tests { }; use serde_json::json; + #[cfg(unix)] + fn make_executable(path: &Path) { + use std::os::unix::fs::PermissionsExt; + + let mut permissions = fs::metadata(path) + .expect("script metadata should load") + .permissions(); + permissions.set_mode(0o755); + fs::set_permissions(path, permissions).expect("script permissions should update"); + } + + #[cfg(not(unix))] + fn make_executable(path: &Path) { + let _ = path; + } + fn env_lock() -> &'static Mutex<()> { static LOCK: OnceLock> = OnceLock::new(); LOCK.get_or_init(|| Mutex::new(())) @@ -6170,6 +6194,18 @@ mod tests { std::env::temp_dir().join(format!("clawd-tools-{unique}-{name}")) } + fn normalized_test_path(path: &str) -> String { + path.replace('\\', "/") + } + + fn assert_path_suffix(value: &serde_json::Value, suffix: &str) { + let path = value.as_str().expect("path"); + assert!( + normalized_test_path(path).ends_with(suffix), + "expected path `{path}` to end with `{suffix}`" + ); + } + fn run_git(cwd: &Path, args: &[&str]) { let status = Command::new("git") .args(args) @@ -6333,10 +6369,12 @@ mod tests { let worktree = temp_path("config-trust-worktree"); let claw_dir = worktree.join(".claw"); fs::create_dir_all(&claw_dir).expect("create .claw dir"); - // Use the actual OS temp dir so the worktree path matches the allowlist let tmp_root = std::env::temp_dir().to_str().expect("utf-8").to_string(); - let settings = format!("{{\"trustedRoots\": [\"{tmp_root}\"]}}"); - fs::write(claw_dir.join("settings.json"), settings).expect("write settings"); + fs::write( + claw_dir.join("settings.json"), + json!({ "trustedRoots": [tmp_root] }).to_string(), + ) + .expect("write settings"); // WorkerCreate with no per-call trusted_roots — config should supply them let cwd = worktree.to_str().expect("valid utf-8").to_string(); @@ -6350,7 +6388,6 @@ mod tests { .expect("WorkerCreate should succeed"); let output: serde_json::Value = serde_json::from_str(&created).expect("json"); - // worktree is under /tmp, so config roots auto-resolve trust assert_eq!( output["trust_auto_resolve"], true, "config-level trustedRoots should auto-resolve trust without per-call override" @@ -7305,10 +7342,7 @@ mod tests { let output: serde_json::Value = serde_json::from_str(&result).expect("valid json"); assert_eq!(output["skill"], "help"); - assert!(output["path"] - .as_str() - .expect("path") - .ends_with("/help/SKILL.md")); + assert_path_suffix(&output["path"], "/help/SKILL.md"); assert!(output["prompt"] .as_str() .expect("prompt") @@ -7324,10 +7358,7 @@ mod tests { let dollar_output: serde_json::Value = serde_json::from_str(&dollar_result).expect("valid json"); assert_eq!(dollar_output["skill"], "$help"); - assert!(dollar_output["path"] - .as_str() - .expect("path") - .ends_with("/help/SKILL.md")); + assert_path_suffix(&dollar_output["path"], "/help/SKILL.md"); if let Some(home) = original_home { std::env::set_var("HOME", home); @@ -7363,19 +7394,13 @@ mod tests { .expect("project-local skill should resolve"); let skill_output: serde_json::Value = serde_json::from_str(&skill_result).expect("valid json"); - assert!(skill_output["path"] - .as_str() - .expect("path") - .ends_with(".claw/skills/plan/SKILL.md")); + assert_path_suffix(&skill_output["path"], ".claw/skills/plan/SKILL.md"); let command_result = execute_tool("Skill", &json!({ "skill": "/handoff" })) .expect("legacy command should resolve"); let command_output: serde_json::Value = serde_json::from_str(&command_result).expect("valid json"); - assert!(command_output["path"] - .as_str() - .expect("path") - .ends_with(".claw/commands/handoff.md")); + assert_path_suffix(&command_output["path"], ".claw/commands/handoff.md"); std::env::set_current_dir(&original_dir).expect("restore cwd"); fs::remove_dir_all(root).expect("temp project should clean up"); @@ -7410,10 +7435,7 @@ mod tests { .expect("project-local skill should resolve"); let output: serde_json::Value = serde_json::from_str(&result).expect("valid json"); - assert!(output["path"] - .as_str() - .expect("path") - .ends_with(".claude/skills/trace/SKILL.md")); + assert_path_suffix(&output["path"], ".claude/skills/trace/SKILL.md"); assert_eq!(output["description"], "Project-local trace helper"); std::env::set_current_dir(&original_dir).expect("restore cwd"); @@ -7472,15 +7494,9 @@ mod tests { let omc_output: serde_json::Value = serde_json::from_str(&omc_result).expect("valid json"); let agents_output: serde_json::Value = serde_json::from_str(&agents_result).expect("valid json"); - assert!(omc_output["path"] - .as_str() - .expect("path") - .ends_with(".omc/skills/hud/SKILL.md")); + assert_path_suffix(&omc_output["path"], ".omc/skills/hud/SKILL.md"); assert_eq!(omc_output["description"], "Project-local OMC HUD helper"); - assert!(agents_output["path"] - .as_str() - .expect("path") - .ends_with(".agents/skills/trace/SKILL.md")); + assert_path_suffix(&agents_output["path"], ".agents/skills/trace/SKILL.md"); assert_eq!( agents_output["description"], "Project-local agents compatibility helper" @@ -7532,10 +7548,7 @@ mod tests { .expect("learned skill should resolve"); let output: serde_json::Value = serde_json::from_str(&result).expect("valid json"); - assert!(output["path"] - .as_str() - .expect("path") - .ends_with("skills/omc-learned/learned/SKILL.md")); + assert_path_suffix(&output["path"], "skills/omc-learned/learned/SKILL.md"); assert_eq!(output["description"], "Learned OMC skill"); match original_home { @@ -7591,20 +7604,14 @@ mod tests { execute_tool("Skill", &json!({ "skill": "statusline" })).expect("direct skill"); let direct_skill_output: serde_json::Value = serde_json::from_str(&direct_skill).expect("valid skill json"); - assert!(direct_skill_output["path"] - .as_str() - .expect("path") - .ends_with("skills/statusline/SKILL.md")); + assert_path_suffix(&direct_skill_output["path"], "skills/statusline/SKILL.md"); assert_eq!(direct_skill_output["description"], "Claude config skill"); let legacy_command = execute_tool("Skill", &json!({ "skill": "doctor-check" })).expect("direct command"); let legacy_command_output: serde_json::Value = serde_json::from_str(&legacy_command).expect("valid command json"); - assert!(legacy_command_output["path"] - .as_str() - .expect("path") - .ends_with("commands/doctor-check.md")); + assert_path_suffix(&legacy_command_output["path"], "commands/doctor-check.md"); assert_eq!( legacy_command_output["description"], "Claude config command" @@ -7658,10 +7665,7 @@ mod tests { .expect("legacy command markdown should resolve"); let output: serde_json::Value = serde_json::from_str(&result).expect("valid json"); - assert!(output["path"] - .as_str() - .expect("path") - .ends_with(".claude/commands/team.md")); + assert_path_suffix(&output["path"], ".claude/commands/team.md"); assert_eq!(output["description"], "Legacy team workflow"); std::env::set_current_dir(&original_dir).expect("restore cwd"); @@ -8858,10 +8862,7 @@ mod tests { .expect("glob should succeed"); let globbed_output: serde_json::Value = serde_json::from_str(&globbed).expect("json"); assert_eq!(globbed_output["numFiles"], 1); - assert!(globbed_output["filenames"][0] - .as_str() - .expect("filename") - .ends_with("nested/lib.rs")); + assert_path_suffix(&globbed_output["filenames"][0], "nested/lib.rs"); let glob_error = execute_tool("glob_search", &json!({ "pattern": "[" })) .expect_err("invalid glob should fail"); @@ -9240,24 +9241,18 @@ mod tests { .expect("time") .as_nanos() )); - std::fs::create_dir_all(&dir).expect("create dir"); - let script = dir.join("pwsh"); - std::fs::write( - &script, - r#"#!/bin/sh -while [ "$1" != "-Command" ] && [ $# -gt 0 ]; do shift; done -shift -printf 'pwsh:%s' "$1" -"#, - ) - .expect("write script"); - std::process::Command::new("/bin/chmod") - .arg("+x") - .arg(&script) - .status() - .expect("chmod"); let original_path = std::env::var("PATH").unwrap_or_default(); - std::env::set_var("PATH", format!("{}:{}", dir.display(), original_path)); + if !cfg!(windows) { + std::fs::create_dir_all(&dir).expect("create dir"); + let script = dir.join("powershell"); + std::fs::write( + &script, + "#!/bin/sh\nwhile [ \"$1\" != \"-Command\" ] && [ $# -gt 0 ]; do shift; done\nshift\nprintf 'pwsh:%s' \"$1\"\n", + ) + .expect("write script"); + make_executable(&script); + std::env::set_var("PATH", format!("{}:{}", dir.display(), original_path)); + } let result = execute_tool( "PowerShell", @@ -9271,11 +9266,21 @@ printf 'pwsh:%s' "$1" ) .expect("PowerShell background should succeed"); - std::env::set_var("PATH", original_path); - let _ = std::fs::remove_dir_all(dir); + if !cfg!(windows) { + std::env::set_var("PATH", original_path); + let _ = std::fs::remove_dir_all(dir); + } let output: serde_json::Value = serde_json::from_str(&result).expect("json"); - assert_eq!(output["stdout"], "pwsh:Write-Output hello"); + let stdout = output["stdout"] + .as_str() + .expect("stdout") + .replace("\r\n", "\n"); + if cfg!(windows) { + assert_eq!(stdout, "hello\n"); + } else { + assert_eq!(stdout, "pwsh:Write-Output hello"); + } assert!(output["stderr"].as_str().expect("stderr").is_empty()); let background_output: serde_json::Value = serde_json::from_str(&background).expect("json"); diff --git a/rust/scripts/run_mock_parity_diff.py b/rust/scripts/run_mock_parity_diff.py old mode 100755 new mode 100644 diff --git a/rust/scripts/run_mock_parity_harness.sh b/rust/scripts/run_mock_parity_harness.sh old mode 100755 new mode 100644 From 6facab32d38ed8697ab45d7ed8e4a69cbd8f29a7 Mon Sep 17 00:00:00 2001 From: Guajir0-code Date: Mon, 20 Apr 2026 22:34:27 +0000 Subject: [PATCH 3/6] fix: verifier final-gate error handling + unique report ids MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - verifier: emit Unavailable reports on Node package.json IO/parse errors instead of silent None (Bug 2) - verifier: drop dead final_phase param from verify_node/verify_python - verifier: scope CARGO_TERM_COLOR=never to cargo invocations only - verifier: remove dead VerificationReport::target() - conversation: make_final_gate_reminder report_id now includes adapter_id and mutation_sequence (Bug 3 — prevents collisions across adapters) - conversation: when run_final_verification returns None, emit synthetic Unavailable report and advance ledger instead of silent continue (Bug 1) - conversation: cap final-gate attempts at MAX_FINAL_GATE_ATTEMPTS=5 per (adapter, root); emit aborted Unavailable report on overflow (Bug 4) Co-Authored-By: Claude Opus 4.7 --- rust/crates/runtime/src/conversation.rs | 104 +++++++++++++++++--- rust/crates/runtime/src/verifier.rs | 121 ++++++++++++++++++------ 2 files changed, 181 insertions(+), 44 deletions(-) diff --git a/rust/crates/runtime/src/conversation.rs b/rust/crates/runtime/src/conversation.rs index 63de3fe5d9..0b91e33f77 100644 --- a/rust/crates/runtime/src/conversation.rs +++ b/rust/crates/runtime/src/conversation.rs @@ -16,10 +16,12 @@ use crate::permissions::{ use crate::session::{ContentBlock, ConversationMessage, Session}; use crate::usage::{TokenUsage, UsageTracker}; use crate::verifier::{ - prepend_verifier_summary, VerificationContext, VerificationGateStatus, VerificationPhase, - VerificationReport, VerificationStatus, Verifier, + prepend_verifier_summary, VerificationContext, VerificationFailureKind, VerificationGateStatus, + VerificationPhase, VerificationReport, VerificationStatus, VerificationStepReport, Verifier, }; +const MAX_FINAL_GATE_ATTEMPTS: u32 = 5; + const DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD: u32 = 100_000; const AUTO_COMPACTION_THRESHOLD_ENV_VAR: &str = "CLAUDE_CODE_AUTO_COMPACT_INPUT_TOKENS"; @@ -402,22 +404,27 @@ where }) } + fn sanitize_root_fragment(root: &std::path::Path) -> String { + root.display() + .to_string() + .chars() + .map(|ch| match ch { + '\\' | '/' | ':' | ' ' => '-', + other => other, + }) + .collect::() + } + fn make_final_gate_reminder(entry: &VerificationLedgerEntry) -> VerificationReport { let status = entry .last_final_status .unwrap_or(VerificationStatus::Failed); VerificationReport { report_id: format!( - "vr-reminder-{}", - entry.project_root - .display() - .to_string() - .chars() - .map(|ch| match ch { - '\\' | '/' | ':' | ' ' => '-', - other => other, - }) - .collect::() + "vr-reminder-{}-{}-{}", + entry.adapter_id, + Self::sanitize_root_fragment(&entry.project_root), + entry.last_mutation_sequence ), phase: VerificationPhase::Final, adapter_id: entry.adapter_id.clone(), @@ -434,6 +441,43 @@ where } } + fn make_final_gate_unavailable_report( + entry: &VerificationLedgerEntry, + reason: &str, + ) -> VerificationReport { + let summary_text = format!( + "[verifier:final:{}] unavailable ({})\n[verifier] {}", + entry.adapter_id, + entry.project_root.display(), + reason + ); + VerificationReport { + report_id: format!( + "vr-unavailable-{}-{}-{}", + entry.adapter_id, + Self::sanitize_root_fragment(&entry.project_root), + entry.last_mutation_sequence + ), + phase: VerificationPhase::Final, + adapter_id: entry.adapter_id.clone(), + project_root: entry.project_root.clone(), + touched_paths: entry.touched_paths.iter().cloned().collect(), + status: VerificationStatus::Unavailable, + summary_text, + steps: vec![VerificationStepReport { + adapter: entry.adapter_id.clone(), + project_root: entry.project_root.clone(), + label: "final-gate setup".to_string(), + command: String::new(), + phase: VerificationPhase::Final, + status: VerificationStatus::Unavailable, + failure_kind: Some(VerificationFailureKind::ToolUnavailable), + duration_ms: 0, + truncated_output: reason.to_string(), + }], + } + } + #[allow(clippy::too_many_lines)] pub fn run_turn( &mut self, @@ -467,6 +511,7 @@ where let mut mutation_sequence = 0_u64; let mut verification_ledger = BTreeMap::::new(); + let mut final_gate_attempts = BTreeMap::::new(); loop { iterations += 1; @@ -558,13 +603,42 @@ where verification_gate.passed = false; for (key, should_run) in pending_final_gate_keys { + let attempts = final_gate_attempts.entry(key.clone()).or_insert(0); + *attempts += 1; + let attempts_now = *attempts; let Some(entry) = verification_ledger.get(&key).cloned() else { continue; }; + if attempts_now > MAX_FINAL_GATE_ATTEMPTS { + let report = Self::make_final_gate_unavailable_report( + &entry, + &format!( + "final gate aborted after {MAX_FINAL_GATE_ATTEMPTS} attempts without convergence" + ), + ); + if let Some(ledger_entry) = verification_ledger.get_mut(&key) { + ledger_entry.last_final_status = Some(report.status); + ledger_entry.last_final_verified_sequence = + Some(ledger_entry.last_mutation_sequence); + } + self.record_verifier_ran( + iterations, + &format!("final_gate:{}", entry.adapter_id), + false, + ); + verification_gate.report_ids.push(report.report_id.clone()); + self.persist_verification_report(&report)?; + gate_reports.push(report); + continue; + } let report = if should_run { - let Some(report) = self.run_final_verification(&entry) else { - continue; - }; + let report = + self.run_final_verification(&entry).unwrap_or_else(|| { + Self::make_final_gate_unavailable_report( + &entry, + "final verification is unavailable (no verifier configured for this target)", + ) + }); if let Some(ledger_entry) = verification_ledger.get_mut(&key) { ledger_entry.last_final_status = Some(report.status); ledger_entry.last_final_verified_sequence = diff --git a/rust/crates/runtime/src/verifier.rs b/rust/crates/runtime/src/verifier.rs index f054bdebc1..1043f2c66a 100644 --- a/rust/crates/runtime/src/verifier.rs +++ b/rust/crates/runtime/src/verifier.rs @@ -202,16 +202,6 @@ impl VerificationReport { first.to_string() } } - - #[must_use] - pub fn target(&self, mutation_sequence: u64) -> VerificationTarget { - VerificationTarget { - adapter_id: self.adapter_id.clone(), - project_root: self.project_root.clone(), - touched_paths: self.touched_paths.clone(), - mutation_sequence, - } - } } /// Status of the staged final gate for the completed turn. @@ -355,8 +345,8 @@ impl Adapter { ) -> Option { match self { Self::Rust => verify_rust(path, context, config), - Self::NodeTypeScript => verify_node(path, context, config, false), - Self::Python => verify_python(path, context, config, false), + Self::NodeTypeScript => verify_node(path, context, config), + Self::Python => verify_python(path, context, config), } } @@ -618,20 +608,25 @@ fn verify_node( path: &Path, context: &VerificationContext, config: &CargoVerifierConfig, - final_phase: bool, ) -> Option { if !config.node_enabled { return None; } let package_json = nearest_file(path, "package.json")?; let project_root = package_json.parent()?.to_path_buf(); - let package_contents = fs::read_to_string(&package_json).ok()?; - let package_value: Value = serde_json::from_str(&package_contents).ok()?; - let phase = if final_phase { - VerificationPhase::Final - } else { - context.phase + let package_value = match load_node_package(&package_json) { + Ok(value) => value, + Err(report) => { + return Some(node_setup_failure_report( + &project_root, + context.touched_paths.clone(), + context.phase, + &report, + config.max_output_bytes, + )); + } }; + let phase = context.phase; let package_manager = detect_package_manager(&project_root); let steps = if config.legacy_mode { node_legacy_steps(&project_root, &package_value, package_manager) @@ -663,8 +658,18 @@ fn finalize_node( return None; } let package_json = target.project_root.join("package.json"); - let package_contents = fs::read_to_string(&package_json).ok()?; - let package_value: Value = serde_json::from_str(&package_contents).ok()?; + let package_value = match load_node_package(&package_json) { + Ok(value) => value, + Err(report) => { + return Some(node_setup_failure_report( + &target.project_root, + target.touched_paths.clone(), + VerificationPhase::Final, + &report, + config.max_output_bytes, + )); + } + }; let package_manager = detect_package_manager(&target.project_root); Some(run_planned_steps( "node-typescript", @@ -677,6 +682,62 @@ fn finalize_node( )) } +struct NodeSetupFailure { + label: String, + kind: VerificationFailureKind, + message: String, +} + +fn load_node_package(package_json: &Path) -> Result { + let contents = fs::read_to_string(package_json).map_err(|error| NodeSetupFailure { + label: "package.json read".to_string(), + kind: VerificationFailureKind::Environment, + message: format!("failed to read {}: {error}", package_json.display()), + })?; + serde_json::from_str::(&contents).map_err(|error| NodeSetupFailure { + label: "package.json parse".to_string(), + kind: VerificationFailureKind::Config, + message: format!("failed to parse {}: {error}", package_json.display()), + }) +} + +fn node_setup_failure_report( + project_root: &Path, + touched_paths: Vec, + phase: VerificationPhase, + failure: &NodeSetupFailure, + max_output_bytes: usize, +) -> VerificationReport { + let steps = vec![VerificationStepReport { + adapter: "node-typescript".to_string(), + project_root: project_root.to_path_buf(), + label: failure.label.clone(), + command: project_root.join("package.json").display().to_string(), + phase, + status: VerificationStatus::Unavailable, + failure_kind: Some(failure.kind), + duration_ms: 0, + truncated_output: truncate_output(&failure.message, max_output_bytes), + }]; + let summary_text = render_report_summary( + "node-typescript", + project_root, + phase, + VerificationStatus::Unavailable, + &steps, + ); + VerificationReport { + report_id: next_report_id(), + phase, + adapter_id: "node-typescript".to_string(), + project_root: project_root.to_path_buf(), + touched_paths, + status: VerificationStatus::Unavailable, + summary_text, + steps, + } +} + fn node_quick_steps( root: &Path, package_value: &Value, @@ -745,14 +806,8 @@ fn verify_python( path: &Path, context: &VerificationContext, config: &CargoVerifierConfig, - final_phase: bool, ) -> Option { - let phase = if final_phase { - VerificationPhase::Final - } else { - context.phase - }; - verify_python_for_phase(path, &context.touched_paths, phase, config) + verify_python_for_phase(path, &context.touched_paths, context.phase, config) } fn finalize_python( @@ -1668,7 +1723,15 @@ fn run_step( command.stdin(std::process::Stdio::null()); command.stdout(std::process::Stdio::piped()); command.stderr(std::process::Stdio::piped()); - command.env("CARGO_TERM_COLOR", "never"); + if step.command.first().is_some_and(|bin| { + let name = std::path::Path::new(bin) + .file_stem() + .and_then(|s| s.to_str()) + .unwrap_or(bin); + name == "cargo" + }) { + command.env("CARGO_TERM_COLOR", "never"); + } for arg in step.command.iter().skip(1) { command.arg(arg); } From d0e82bba3c0886fdfa44816d6b7f3a9448cee8d3 Mon Sep 17 00:00:00 2001 From: Guajir0-code Date: Mon, 20 Apr 2026 22:39:21 +0000 Subject: [PATCH 4/6] fix: reset final-gate attempt counter on new mutation_sequence Previous fix keyed the counter only by (adapter, project_root), so when the model edited code mid-turn and mutation_sequence advanced, the prior attempts carried over and prematurely tripped the cap on otherwise-valid work. Key the counter by (adapter, root, mutation_sequence) so each snapshot gets its own budget. Co-Authored-By: Claude Opus 4.7 --- rust/crates/runtime/src/conversation.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/rust/crates/runtime/src/conversation.rs b/rust/crates/runtime/src/conversation.rs index 0b91e33f77..b378a69f72 100644 --- a/rust/crates/runtime/src/conversation.rs +++ b/rust/crates/runtime/src/conversation.rs @@ -511,7 +511,7 @@ where let mut mutation_sequence = 0_u64; let mut verification_ledger = BTreeMap::::new(); - let mut final_gate_attempts = BTreeMap::::new(); + let mut final_gate_attempts = BTreeMap::<(VerificationLedgerKey, u64), u32>::new(); loop { iterations += 1; @@ -603,12 +603,14 @@ where verification_gate.passed = false; for (key, should_run) in pending_final_gate_keys { - let attempts = final_gate_attempts.entry(key.clone()).or_insert(0); - *attempts += 1; - let attempts_now = *attempts; let Some(entry) = verification_ledger.get(&key).cloned() else { continue; }; + let attempts = final_gate_attempts + .entry((key.clone(), entry.last_mutation_sequence)) + .or_insert(0); + *attempts += 1; + let attempts_now = *attempts; if attempts_now > MAX_FINAL_GATE_ATTEMPTS { let report = Self::make_final_gate_unavailable_report( &entry, From d051ca76d1621a2c5b27713ca19fbbea10356eae Mon Sep 17 00:00:00 2001 From: Guajir0-code Date: Wed, 22 Apr 2026 01:33:55 +0000 Subject: [PATCH 5/6] =?UTF-8?q?feat:=20edit=E2=86=92verify=E2=86=92fix=20r?= =?UTF-8?q?oadmap=20trunk=20+=20post-trunk=20modules?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merges complete edit→verify→fix pipeline into the repo: Trunk (phases 1-5): - Rich StepDiagnostics across Rust/Node/Python adapters - Change-scoped verification via nearest manifest walk - VerificationReport content block with shadow/text/typed report modes - RuntimeVerifierMode::Auto + CLAUDE_CODE_VERIFIER_AUTO - Parallel bash validation wiring (permission_enforcer + tools) - verifier_ran telemetry with adapter/phase/failure_kind/mutation_sequence Post-trunk modules: - runtime::critic — CriticPlanner with subagent_depth guard, diff thresholds (>=4 files OR >=200 lines OR >1 root), per-mutation dedup; wired into LiveCli post-turn pipeline - runtime::rollout_metrics — aggregate(), evaluate_budget_gates(), samples_from_traces() with 1pp/5%/10%/15% regression limits - promote_auto_skill + run_promote_auto_skill_cli — 3-fixture replay + human approval + 10% token budget gate for auto-generated skills - Explicit marker-based adapter detector for auto-mode verification - Dedicated bash permission parity tests Telemetry: turn_completed now emits turn_latency_ms + tokens_total. Validation: cargo fmt, clippy -D warnings, cargo test --workspace all green (+27 new tests across runtime, tools, cli). Co-Authored-By: Claude Opus 4.7 --- rust/crates/mock-anthropic-service/src/lib.rs | 308 ++++ rust/crates/runtime/src/bash.rs | 132 ++ rust/crates/runtime/src/config.rs | 183 ++- rust/crates/runtime/src/conversation.rs | 309 +++- rust/crates/runtime/src/critic.rs | 260 +++ rust/crates/runtime/src/lib.rs | 2 + .../crates/runtime/src/permission_enforcer.rs | 120 +- rust/crates/runtime/src/prompt.rs | 477 +++++- rust/crates/runtime/src/rollout_metrics.rs | 562 +++++++ rust/crates/runtime/src/session.rs | 402 ++++- rust/crates/runtime/src/verifier.rs | 1425 ++++++++++++++--- rust/crates/runtime/tests/verifier_e2e.rs | 4 + rust/crates/rusty-claude-cli/src/main.rs | 1005 +++++++++++- .../tests/mock_quality_harness.rs | 1025 ++++++++++++ rust/crates/tools/src/lane_completion.rs | 1 + rust/crates/tools/src/lib.rs | 624 +++++++- 16 files changed, 6439 insertions(+), 400 deletions(-) create mode 100644 rust/crates/runtime/src/critic.rs create mode 100644 rust/crates/runtime/src/rollout_metrics.rs create mode 100644 rust/crates/rusty-claude-cli/tests/mock_quality_harness.rs diff --git a/rust/crates/mock-anthropic-service/src/lib.rs b/rust/crates/mock-anthropic-service/src/lib.rs index 68968eed2e..bb1a45d896 100644 --- a/rust/crates/mock-anthropic-service/src/lib.rs +++ b/rust/crates/mock-anthropic-service/src/lib.rs @@ -100,6 +100,13 @@ enum Scenario { PluginToolRoundtrip, AutoCompactTriggered, TokenCostReporting, + RustRedGreen, + NodeRedGreen, + PythonRedGreen, + RustConfigFailure, + NodeToolUnavailable, + PythonTimeout, + RustFinalGateRetry, } impl Scenario { @@ -117,6 +124,13 @@ impl Scenario { "plugin_tool_roundtrip" => Some(Self::PluginToolRoundtrip), "auto_compact_triggered" => Some(Self::AutoCompactTriggered), "token_cost_reporting" => Some(Self::TokenCostReporting), + "rust_red_green" => Some(Self::RustRedGreen), + "node_red_green" => Some(Self::NodeRedGreen), + "python_red_green" => Some(Self::PythonRedGreen), + "rust_config_failure" => Some(Self::RustConfigFailure), + "node_tool_unavailable" => Some(Self::NodeToolUnavailable), + "python_timeout" => Some(Self::PythonTimeout), + "rust_final_gate_retry" => Some(Self::RustFinalGateRetry), _ => None, } } @@ -135,6 +149,13 @@ impl Scenario { Self::PluginToolRoundtrip => "plugin_tool_roundtrip", Self::AutoCompactTriggered => "auto_compact_triggered", Self::TokenCostReporting => "token_cost_reporting", + Self::RustRedGreen => "rust_red_green", + Self::NodeRedGreen => "node_red_green", + Self::PythonRedGreen => "python_red_green", + Self::RustConfigFailure => "rust_config_failure", + Self::NodeToolUnavailable => "node_tool_unavailable", + Self::PythonTimeout => "python_timeout", + Self::RustFinalGateRetry => "rust_final_gate_retry", } } } @@ -296,6 +317,200 @@ fn tool_results_by_name(request: &MessageRequest) -> HashMap usize { + request + .messages + .iter() + .map(|message| { + message + .content + .iter() + .filter(|block| matches!(block, InputContentBlock::ToolResult { .. })) + .count() + }) + .sum() +} + +fn all_text_content(request: &MessageRequest) -> String { + request + .messages + .iter() + .flat_map(|message| message.content.iter()) + .filter_map(|block| match block { + InputContentBlock::Text { text } => Some(text.as_str()), + _ => None, + }) + .collect::>() + .join("\n") +} + +fn count_text_occurrences(request: &MessageRequest, needle: &str) -> usize { + all_text_content(request).matches(needle).count() +} + +enum QualityAction { + FinalText(String), + ToolUse { + message_id: &'static str, + tool_id: &'static str, + tool_name: &'static str, + input: Value, + }, +} + +impl QualityAction { + fn stream_body(self) -> String { + match self { + Self::FinalText(text) => final_text_sse(&text), + Self::ToolUse { + tool_id, + tool_name, + input, + .. + } => tool_use_sse_json(tool_id, tool_name, &input), + } + } + + fn message_response(self) -> MessageResponse { + match self { + Self::FinalText(text) => text_message_response("msg_quality_final", &text), + Self::ToolUse { + message_id, + tool_id, + tool_name, + input, + } => tool_message_response(message_id, tool_id, tool_name, input), + } + } +} + +fn write_file_input(path: &str, content: &str) -> Value { + json!({ + "path": path, + "content": content, + }) +} + +#[allow(clippy::too_many_lines)] +fn quality_action(request: &MessageRequest, scenario: Scenario) -> Option { + let writes = tool_result_count(request); + let reminder_count = count_text_occurrences(request, "still failing"); + match scenario { + Scenario::RustRedGreen => Some(match writes { + 0 => QualityAction::ToolUse { + message_id: "msg_rust_red_green_broken", + tool_id: "toolu_rust_red_green_broken", + tool_name: "write_file", + input: write_file_input( + "crates/app/src/lib.rs", + "pub fn answer() -> usize {\n \"wrong\"\n}\n", + ), + }, + 1 => QualityAction::ToolUse { + message_id: "msg_rust_red_green_fixed", + tool_id: "toolu_rust_red_green_fixed", + tool_name: "write_file", + input: write_file_input( + "crates/app/src/lib.rs", + "pub fn answer() -> usize {\n 42\n}\n", + ), + }, + _ => QualityAction::FinalText("rust quality red-green complete".to_string()), + }), + Scenario::NodeRedGreen => Some(match writes { + 0 => QualityAction::ToolUse { + message_id: "msg_node_red_green_broken", + tool_id: "toolu_node_red_green_broken", + tool_name: "write_file", + input: write_file_input( + "packages/web/src/index.ts", + "export const message: string = 42; // BROKEN_TYPECHECK\n", + ), + }, + 1 => QualityAction::ToolUse { + message_id: "msg_node_red_green_fixed", + tool_id: "toolu_node_red_green_fixed", + tool_name: "write_file", + input: write_file_input( + "packages/web/src/index.ts", + "export const message: string = \"ok\";\n", + ), + }, + _ => QualityAction::FinalText("node quality red-green complete".to_string()), + }), + Scenario::PythonRedGreen => Some(match writes { + 0 => QualityAction::ToolUse { + message_id: "msg_python_red_green_broken", + tool_id: "toolu_python_red_green_broken", + tool_name: "write_file", + input: write_file_input("services/api/app/main.py", "# BROKEN_PY_COMPILE\n"), + }, + 1 => QualityAction::ToolUse { + message_id: "msg_python_red_green_fixed", + tool_id: "toolu_python_red_green_fixed", + tool_name: "write_file", + input: write_file_input( + "services/api/app/main.py", + "def meaning() -> int:\n return 42\n", + ), + }, + _ => QualityAction::FinalText("python quality red-green complete".to_string()), + }), + Scenario::RustConfigFailure => Some(match writes { + 0 => QualityAction::ToolUse { + message_id: "msg_rust_config_failure", + tool_id: "toolu_rust_config_failure", + tool_name: "write_file", + input: write_file_input("Cargo.toml", "[package\nname = \"broken\"\n"), + }, + _ => QualityAction::FinalText("rust config failure captured".to_string()), + }), + Scenario::NodeToolUnavailable => Some(match writes { + 0 => QualityAction::ToolUse { + message_id: "msg_node_tool_unavailable", + tool_id: "toolu_node_tool_unavailable", + tool_name: "write_file", + input: write_file_input( + "packages/web/src/index.ts", + "export const message = \"TOOL_UNAVAILABLE_SENTINEL\";\n", + ), + }, + _ => QualityAction::FinalText("node tool unavailable captured".to_string()), + }), + Scenario::PythonTimeout => Some(match writes { + 0 => QualityAction::ToolUse { + message_id: "msg_python_timeout", + tool_id: "toolu_python_timeout", + tool_name: "write_file", + input: write_file_input("services/api/app/main.py", "# TIMEOUT_SENTINEL\n"), + }, + _ => QualityAction::FinalText("python timeout captured".to_string()), + }), + Scenario::RustFinalGateRetry => Some(match writes { + 0 => QualityAction::ToolUse { + message_id: "msg_rust_final_gate_retry_broken", + tool_id: "toolu_rust_final_gate_retry_broken", + tool_name: "write_file", + input: write_file_input("crates/app/src/lib.rs", "pub fn answer()->usize{2}\n"), + }, + 1 if reminder_count > 0 => QualityAction::ToolUse { + message_id: "msg_rust_final_gate_retry_fixed", + tool_id: "toolu_rust_final_gate_retry_fixed", + tool_name: "write_file", + input: write_file_input( + "crates/app/src/lib.rs", + "pub fn answer() -> usize {\n 2\n}\n", + ), + }, + 1 => { + QualityAction::FinalText("attempting to conclude the current rust fix".to_string()) + } + _ => QualityAction::FinalText("rust final gate retry complete".to_string()), + }), + _ => None, + } +} + fn flatten_tool_result_content(content: &[api::ToolResultContentBlock]) -> String { content .iter() @@ -331,6 +546,9 @@ fn build_http_response(request: &MessageRequest, scenario: Scenario) -> String { #[allow(clippy::too_many_lines)] fn build_stream_body(request: &MessageRequest, scenario: Scenario) -> String { + if let Some(action) = quality_action(request, scenario) { + return action.stream_body(); + } match scenario { Scenario::StreamingText => streaming_text_sse(), Scenario::ReadFileRoundtrip => match latest_tool_result(request) { @@ -464,11 +682,21 @@ fn build_stream_body(request: &MessageRequest, scenario: Scenario) -> String { Scenario::TokenCostReporting => { final_text_sse_with_usage("token cost reporting parity complete.", 1_000, 500) } + Scenario::RustRedGreen + | Scenario::NodeRedGreen + | Scenario::PythonRedGreen + | Scenario::RustConfigFailure + | Scenario::NodeToolUnavailable + | Scenario::PythonTimeout + | Scenario::RustFinalGateRetry => unreachable!("quality scenarios are handled above"), } } #[allow(clippy::too_many_lines)] fn build_message_response(request: &MessageRequest, scenario: Scenario) -> MessageResponse { + if let Some(action) = quality_action(request, scenario) { + return action.message_response(); + } match scenario { Scenario::StreamingText => text_message_response( "msg_streaming_text", @@ -634,6 +862,13 @@ fn build_message_response(request: &MessageRequest, scenario: Scenario) -> Messa 1_000, 500, ), + Scenario::RustRedGreen + | Scenario::NodeRedGreen + | Scenario::PythonRedGreen + | Scenario::RustConfigFailure + | Scenario::NodeToolUnavailable + | Scenario::PythonTimeout + | Scenario::RustFinalGateRetry => unreachable!("quality scenarios are handled above"), } } @@ -651,6 +886,13 @@ fn request_id_for(scenario: Scenario) -> &'static str { Scenario::PluginToolRoundtrip => "req_plugin_tool_roundtrip", Scenario::AutoCompactTriggered => "req_auto_compact_triggered", Scenario::TokenCostReporting => "req_token_cost_reporting", + Scenario::RustRedGreen => "req_rust_red_green", + Scenario::NodeRedGreen => "req_node_red_green", + Scenario::PythonRedGreen => "req_python_red_green", + Scenario::RustConfigFailure => "req_rust_config_failure", + Scenario::NodeToolUnavailable => "req_node_tool_unavailable", + Scenario::PythonTimeout => "req_python_timeout", + Scenario::RustFinalGateRetry => "req_rust_final_gate_retry", } } @@ -836,6 +1078,72 @@ fn tool_use_sse(tool_id: &str, tool_name: &str, partial_json_chunks: &[&str]) -> }]) } +fn tool_use_sse_json(tool_id: &str, tool_name: &str, input: &Value) -> String { + let mut body = String::new(); + append_sse( + &mut body, + "message_start", + json!({ + "type": "message_start", + "message": { + "id": format!("msg_{tool_id}"), + "type": "message", + "role": "assistant", + "content": [], + "model": DEFAULT_MODEL, + "stop_reason": null, + "stop_sequence": null, + "usage": usage_json(12, 0) + } + }), + ); + append_sse( + &mut body, + "content_block_start", + json!({ + "type": "content_block_start", + "index": 0, + "content_block": { + "type": "tool_use", + "id": tool_id, + "name": tool_name, + "input": {} + } + }), + ); + append_sse( + &mut body, + "content_block_delta", + json!({ + "type": "content_block_delta", + "index": 0, + "delta": { + "type": "input_json_delta", + "partial_json": input.to_string() + } + }), + ); + append_sse( + &mut body, + "content_block_stop", + json!({ + "type": "content_block_stop", + "index": 0 + }), + ); + append_sse( + &mut body, + "message_delta", + json!({ + "type": "message_delta", + "delta": {"stop_reason": "tool_use", "stop_sequence": null}, + "usage": usage_json(12, 4) + }), + ); + append_sse(&mut body, "message_stop", json!({"type": "message_stop"})); + body +} + struct ToolUseSse<'a> { tool_id: &'a str, tool_name: &'a str, diff --git a/rust/crates/runtime/src/bash.rs b/rust/crates/runtime/src/bash.rs index aad27f6662..6b2c0c9247 100644 --- a/rust/crates/runtime/src/bash.rs +++ b/rust/crates/runtime/src/bash.rs @@ -206,6 +206,18 @@ fn prepare_command( prepared.env("HOME", cwd.join(".sandbox-home")); prepared.env("TMPDIR", cwd.join(".sandbox-tmp")); } + #[cfg(windows)] + { + if !posix_shell_available() { + let mut prepared = Command::new("powershell"); + prepared.args(windows_shell_args(command)).current_dir(cwd); + if sandbox_status.filesystem_active { + prepared.env("HOME", cwd.join(".sandbox-home")); + prepared.env("TMPDIR", cwd.join(".sandbox-tmp")); + } + return prepared; + } + } prepared } @@ -233,6 +245,18 @@ fn prepare_tokio_command( prepared.env("HOME", cwd.join(".sandbox-home")); prepared.env("TMPDIR", cwd.join(".sandbox-tmp")); } + #[cfg(windows)] + { + if !posix_shell_available() { + let mut prepared = TokioCommand::new("powershell"); + prepared.args(windows_shell_args(command)).current_dir(cwd); + if sandbox_status.filesystem_active { + prepared.env("HOME", cwd.join(".sandbox-home")); + prepared.env("TMPDIR", cwd.join(".sandbox-tmp")); + } + return prepared; + } + } prepared } @@ -241,6 +265,114 @@ fn prepare_sandbox_dirs(cwd: &std::path::Path) { let _ = std::fs::create_dir_all(cwd.join(".sandbox-tmp")); } +#[cfg(windows)] +fn posix_shell_available() -> bool { + use std::sync::OnceLock; + + static HAS_SH: OnceLock = OnceLock::new(); + *HAS_SH.get_or_init(|| { + Command::new("sh") + .arg("-lc") + .arg("printf ok") + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .status() + .is_ok_and(|status| status.success()) + }) +} + +#[cfg(windows)] +fn windows_shell_args(command: &str) -> Vec { + if let Some(script) = translate_posix_snippet_to_powershell(command) { + vec![ + "-NoProfile".to_string(), + "-EncodedCommand".to_string(), + encode_powershell(&script), + ] + } else { + vec![ + "-NoProfile".to_string(), + "-Command".to_string(), + command.to_string(), + ] + } +} + +#[cfg(windows)] +fn translate_posix_snippet_to_powershell(command: &str) -> Option { + if let Some((text, exit_code)) = command + .strip_prefix("printf '") + .and_then(|rest| rest.split_once("'; exit ")) + { + return Some(format!( + "[Console]::Out.Write({}); exit {exit_code}", + powershell_literal(text) + )); + } + + if let Some((text, exit_code)) = command + .strip_prefix("printf '") + .and_then(|rest| rest.split_once("' >&2; exit ")) + { + return Some(format!( + "[Console]::Error.Write({}); exit {exit_code}", + powershell_literal(text) + )); + } + + if let Some(text) = command + .strip_prefix("printf '") + .and_then(|rest| rest.strip_suffix('\'')) + { + return Some(format!( + "[Console]::Out.Write({})", + powershell_literal(text) + )); + } + + None +} + +#[cfg(windows)] +fn powershell_literal(value: &str) -> String { + format!("'{}'", value.replace('\'', "''")) +} + +#[cfg(windows)] +fn encode_powershell(script: &str) -> String { + let bytes: Vec = script.encode_utf16().flat_map(u16::to_le_bytes).collect(); + encode_base64(&bytes) +} + +#[cfg(windows)] +fn encode_base64(bytes: &[u8]) -> String { + const TABLE: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + let mut encoded = String::with_capacity(bytes.len().div_ceil(3) * 4); + + for chunk in bytes.chunks(3) { + let b0 = chunk[0]; + let b1 = *chunk.get(1).unwrap_or(&0); + let b2 = *chunk.get(2).unwrap_or(&0); + let n = (u32::from(b0) << 16) | (u32::from(b1) << 8) | u32::from(b2); + + encoded.push(TABLE[((n >> 18) & 0x3F) as usize] as char); + encoded.push(TABLE[((n >> 12) & 0x3F) as usize] as char); + encoded.push(if chunk.len() > 1 { + TABLE[((n >> 6) & 0x3F) as usize] as char + } else { + '=' + }); + encoded.push(if chunk.len() > 2 { + TABLE[(n & 0x3F) as usize] as char + } else { + '=' + }); + } + + encoded +} + #[cfg(test)] mod tests { use super::{execute_bash, BashCommandInput}; diff --git a/rust/crates/runtime/src/config.rs b/rust/crates/runtime/src/config.rs index f356e38f43..e5ce013b84 100644 --- a/rust/crates/runtime/src/config.rs +++ b/rust/crates/runtime/src/config.rs @@ -8,6 +8,7 @@ use crate::sandbox::{FilesystemIsolationMode, SandboxConfig}; /// Schema name advertised by generated settings files. pub const CLAW_SETTINGS_SCHEMA_NAME: &str = "SettingsSchema"; +const VERIFIER_AUTO_ENV_VAR: &str = "CLAUDE_CODE_VERIFIER_AUTO"; /// Origin of a loaded settings file in the configuration precedence chain. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] @@ -76,6 +77,7 @@ pub struct RuntimeFeatureConfig { pub enum RuntimeVerifierMode { Legacy, Staged, + Auto, } impl RuntimeVerifierMode { @@ -84,6 +86,7 @@ impl RuntimeVerifierMode { match self { Self::Legacy => "legacy", Self::Staged => "staged", + Self::Auto => "auto", } } } @@ -141,7 +144,15 @@ impl RuntimeVerifierConfig { #[must_use] pub fn staged(&self) -> bool { - self.mode == RuntimeVerifierMode::Staged + matches!( + self.mode, + RuntimeVerifierMode::Staged | RuntimeVerifierMode::Auto + ) + } + + #[must_use] + pub fn auto(&self) -> bool { + self.mode == RuntimeVerifierMode::Auto } #[must_use] @@ -934,24 +945,30 @@ fn validate_optional_hooks_config( fn parse_optional_verifier_config(root: &JsonValue) -> Result { let Some(object) = root.as_object() else { - return Ok(RuntimeVerifierConfig::default()); + return Ok(default_verifier_config_with_env()); }; let Some(verifier_value) = object.get("verifier") else { - return Ok(RuntimeVerifierConfig::default()); + return Ok(default_verifier_config_with_env()); }; let verifier = expect_object(verifier_value, "merged settings.verifier")?; let mut config = RuntimeVerifierConfig::default(); + let mut enabled_explicit = false; + let mut mode_explicit = false; + let mut final_gate_explicit = false; if let Some(enabled) = optional_bool(verifier, "enabled", "merged settings.verifier")? { + enabled_explicit = true; config.enabled = enabled; } if let Some(mode) = optional_string(verifier, "mode", "merged settings.verifier")? { + mode_explicit = true; config.mode = match mode { "legacy" => RuntimeVerifierMode::Legacy, "staged" => RuntimeVerifierMode::Staged, + "auto" => RuntimeVerifierMode::Auto, other => { return Err(ConfigError::Parse(format!( - "merged settings.verifier.mode must be legacy or staged, got `{other}`" + "merged settings.verifier.mode must be legacy, staged, or auto, got `{other}`" ))) } }; @@ -962,6 +979,7 @@ fn parse_optional_verifier_config(root: &JsonValue) -> Result Result RuntimeVerifierConfig { + let mut config = RuntimeVerifierConfig::default(); + if verifier_auto_env_enabled() { + config.enabled = true; + config.mode = RuntimeVerifierMode::Auto; + config.final_gate = true; + } + config +} + +fn verifier_auto_env_enabled() -> bool { + std::env::var(VERIFIER_AUTO_ENV_VAR) + .ok() + .is_some_and(|value| matches!(value.trim(), "1" | "true" | "TRUE" | "True")) +} + fn parse_optional_permission_rules( root: &JsonValue, ) -> Result { @@ -1484,11 +1525,12 @@ mod tests { use super::{ deep_merge_objects, parse_permission_mode_label, ConfigLoader, ConfigSource, McpServerConfig, McpTransport, ResolvedPermissionMode, RuntimeHookConfig, - RuntimePluginConfig, RuntimeVerifierMode, CLAW_SETTINGS_SCHEMA_NAME, + RuntimePluginConfig, RuntimeVerifierMode, CLAW_SETTINGS_SCHEMA_NAME, VERIFIER_AUTO_ENV_VAR, }; use crate::json::JsonValue; use crate::sandbox::FilesystemIsolationMode; use std::fs; + use std::sync::{Mutex, OnceLock}; use std::time::{SystemTime, UNIX_EPOCH}; fn temp_dir() -> std::path::PathBuf { @@ -1499,6 +1541,11 @@ mod tests { std::env::temp_dir().join(format!("runtime-config-{nanos}")) } + fn env_lock() -> &'static Mutex<()> { + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| Mutex::new(())) + } + #[test] fn rejects_non_object_settings_files() { let root = temp_dir(); @@ -2436,4 +2483,130 @@ mod tests { fs::remove_dir_all(root).expect("cleanup temp dir"); } + + #[test] + fn parses_auto_verifier_mode_from_config() { + let root = temp_dir(); + let cwd = root.join("project"); + let home = root.join("home").join(".claw"); + let user_settings = home.join("settings.json"); + fs::create_dir_all(&home).expect("home config dir"); + fs::create_dir_all(&cwd).expect("project dir"); + fs::write( + &user_settings, + r#"{ + "verifier": { + "enabled": true, + "mode": "auto", + "finalGate": true + } +}"#, + ) + .expect("write user settings"); + + let config = ConfigLoader::new(&cwd, &home) + .load() + .expect("config should parse"); + + assert_eq!(config.verifier().mode(), RuntimeVerifierMode::Auto); + assert!(config.verifier().staged()); + assert!(config.verifier().auto()); + assert!(config.verifier().final_gate()); + + fs::remove_dir_all(root).expect("cleanup temp dir"); + } + + #[test] + fn verifier_auto_env_enables_auto_mode_when_config_is_implicit() { + let _guard = env_lock() + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + std::env::set_var(VERIFIER_AUTO_ENV_VAR, "1"); + let root = temp_dir(); + let cwd = root.join("project"); + let home = root.join("home").join(".claw"); + fs::create_dir_all(&home).expect("home config dir"); + fs::create_dir_all(&cwd).expect("project dir"); + fs::write(home.join("settings.json"), "{}").expect("write empty settings"); + + let config = ConfigLoader::new(&cwd, &home) + .load() + .expect("config should parse"); + + assert!(config.verifier().enabled()); + assert_eq!(config.verifier().mode(), RuntimeVerifierMode::Auto); + assert!(config.verifier().final_gate()); + + std::env::remove_var(VERIFIER_AUTO_ENV_VAR); + fs::remove_dir_all(root).expect("cleanup temp dir"); + } + + #[test] + fn verifier_auto_env_does_not_override_explicit_disable() { + let _guard = env_lock() + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + std::env::set_var(VERIFIER_AUTO_ENV_VAR, "1"); + let root = temp_dir(); + let cwd = root.join("project"); + let home = root.join("home").join(".claw"); + let user_settings = home.join("settings.json"); + fs::create_dir_all(&home).expect("home config dir"); + fs::create_dir_all(&cwd).expect("project dir"); + fs::write( + &user_settings, + r#"{ + "verifier": { + "enabled": false + } +}"#, + ) + .expect("write settings"); + + let config = ConfigLoader::new(&cwd, &home) + .load() + .expect("config should parse"); + + assert!(!config.verifier().enabled()); + assert_eq!(config.verifier().mode(), RuntimeVerifierMode::Legacy); + + std::env::remove_var(VERIFIER_AUTO_ENV_VAR); + fs::remove_dir_all(root).expect("cleanup temp dir"); + } + + #[test] + fn verifier_auto_env_does_not_override_explicit_mode() { + let _guard = env_lock() + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + std::env::set_var(VERIFIER_AUTO_ENV_VAR, "1"); + let root = temp_dir(); + let cwd = root.join("project"); + let home = root.join("home").join(".claw"); + let user_settings = home.join("settings.json"); + fs::create_dir_all(&home).expect("home config dir"); + fs::create_dir_all(&cwd).expect("project dir"); + fs::write( + &user_settings, + r#"{ + "verifier": { + "enabled": true, + "mode": "staged", + "finalGate": false + } +}"#, + ) + .expect("write settings"); + + let config = ConfigLoader::new(&cwd, &home) + .load() + .expect("config should parse"); + + assert_eq!(config.verifier().mode(), RuntimeVerifierMode::Staged); + assert!(!config.verifier().auto()); + assert!(!config.verifier().final_gate()); + + std::env::remove_var(VERIFIER_AUTO_ENV_VAR); + fs::remove_dir_all(root).expect("cleanup temp dir"); + } } diff --git a/rust/crates/runtime/src/conversation.rs b/rust/crates/runtime/src/conversation.rs index b378a69f72..acdeb9da72 100644 --- a/rust/crates/runtime/src/conversation.rs +++ b/rust/crates/runtime/src/conversation.rs @@ -1,5 +1,6 @@ use std::collections::{BTreeMap, BTreeSet}; use std::fmt::{Display, Formatter}; +use std::hash::{Hash, Hasher}; use std::path::PathBuf; use serde_json::{Map, Value}; @@ -24,6 +25,7 @@ const MAX_FINAL_GATE_ATTEMPTS: u32 = 5; const DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD: u32 = 100_000; const AUTO_COMPACTION_THRESHOLD_ENV_VAR: &str = "CLAUDE_CODE_AUTO_COMPACT_INPUT_TOKENS"; +const VERIFIER_REPORT_STAGE_ENV_VAR: &str = "CLAUDE_CODE_VERIFIER_REPORT_STAGE"; /// Fully assembled request payload sent to the upstream model client. #[derive(Debug, Clone, PartialEq, Eq)] @@ -384,9 +386,13 @@ where fn persist_verification_report( &mut self, report: &VerificationReport, + report_mode: &str, ) -> Result<(), RuntimeError> { self.session - .push_message(ConversationMessage::verification_report(report)) + .push_message(ConversationMessage::verification_report( + report, + Some(report_mode), + )) .map_err(|error| RuntimeError::new(error.to_string())) } @@ -474,6 +480,11 @@ where failure_kind: Some(VerificationFailureKind::ToolUnavailable), duration_ms: 0, truncated_output: reason.to_string(), + step_kind: None, + target_scope: None, + package_name: None, + package_manager: None, + launcher_kind: None, }], } } @@ -498,6 +509,7 @@ where } self.record_turn_started(&user_input); + let turn_started_at = std::time::Instant::now(); self.session .push_user_text(user_input) .map_err(|error| RuntimeError::new(error.to_string()))?; @@ -623,13 +635,16 @@ where ledger_entry.last_final_verified_sequence = Some(ledger_entry.last_mutation_sequence); } + let report_mode = verification_report_mode(&report); self.record_verifier_ran( iterations, &format!("final_gate:{}", entry.adapter_id), - false, + &report, + entry.last_mutation_sequence, + &report_mode, ); verification_gate.report_ids.push(report.report_id.clone()); - self.persist_verification_report(&report)?; + self.persist_verification_report(&report, &report_mode)?; gate_reports.push(report); continue; } @@ -650,13 +665,16 @@ where } else { Self::make_final_gate_reminder(&entry) }; + let report_mode = verification_report_mode(&report); self.record_verifier_ran( iterations, &format!("final_gate:{}", entry.adapter_id), - report.is_success(), + &report, + entry.last_mutation_sequence, + &report_mode, ); verification_gate.report_ids.push(report.report_id.clone()); - self.persist_verification_report(&report)?; + self.persist_verification_report(&report, &report_mode)?; gate_reports.push(report); } @@ -767,10 +785,13 @@ where mutation_sequence, ); for report in reports { + let report_mode = verification_report_mode(&report); self.record_verifier_ran( iterations, &tool_name, - report.is_success(), + &report, + mutation_sequence, + &report_mode, ); output = prepend_verifier_summary(&report.short_summary(), output); if !report.is_success() { @@ -817,7 +838,8 @@ where self.record_tool_finished(iterations, &result_message); tool_results.push(result_message); for report in pending_verification_reports { - self.persist_verification_report(&report)?; + let report_mode = verification_report_mode(&report); + self.persist_verification_report(&report, &report_mode)?; verification_reports.push(report); } } @@ -835,7 +857,10 @@ where usage: self.usage_tracker.cumulative_usage(), auto_compaction, }; - self.record_turn_completed(&summary); + self.record_turn_completed( + &summary, + u64::try_from(turn_started_at.elapsed().as_millis()).unwrap_or(u64::MAX), + ); Ok(summary) } @@ -974,7 +999,7 @@ where session_tracer.record("tool_execution_finished", attributes); } - fn record_turn_completed(&self, summary: &TurnSummary) { + fn record_turn_completed(&self, summary: &TurnSummary, turn_latency_ms: u64) { let Some(session_tracer) = &self.session_tracer else { return; }; @@ -1008,10 +1033,22 @@ where "prompt_cache_events".to_string(), Value::from(summary.prompt_cache_events.len() as u64), ); + attributes.insert("turn_latency_ms".to_string(), Value::from(turn_latency_ms)); + attributes.insert( + "tokens_total".to_string(), + Value::from(u64::from(summary.usage.total_tokens())), + ); session_tracer.record("turn_completed", attributes); } - fn record_verifier_ran(&self, iteration: usize, tool_name: &str, passed: bool) { + fn record_verifier_ran( + &self, + iteration: usize, + tool_name: &str, + report: &VerificationReport, + mutation_sequence: u64, + report_mode: &str, + ) { let Some(session_tracer) = &self.session_tracer else { return; }; @@ -1022,7 +1059,41 @@ where "tool_name".to_string(), Value::String(tool_name.to_string()), ); - attributes.insert("passed".to_string(), Value::Bool(passed)); + attributes.insert("passed".to_string(), Value::Bool(report.is_success())); + attributes.insert( + "adapter_id".to_string(), + Value::String(report.adapter_id.clone()), + ); + attributes.insert( + "phase".to_string(), + Value::String(report.phase.as_str().to_string()), + ); + attributes.insert( + "mutation_sequence".to_string(), + Value::from(mutation_sequence), + ); + attributes.insert( + "report_mode".to_string(), + Value::String(report_mode.to_string()), + ); + if let Some(primary_step) = report.primary_step() { + attributes.insert( + "duration_ms".to_string(), + Value::from(primary_step.duration_ms), + ); + if let Some(target_scope) = &primary_step.target_scope { + attributes.insert( + "target_scope".to_string(), + Value::String(target_scope.clone()), + ); + } + if let Some(failure_kind) = primary_step.failure_kind { + attributes.insert( + "failure_kind".to_string(), + Value::String(failure_kind.as_str().to_string()), + ); + } + } session_tracer.record("verifier_ran", attributes); } @@ -1056,6 +1127,26 @@ fn parse_auto_compaction_threshold(value: Option<&str>) -> u32 { .unwrap_or(DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD) } +fn verification_report_mode(report: &VerificationReport) -> String { + let stage = std::env::var(VERIFIER_REPORT_STAGE_ENV_VAR).map_or_else( + |_| "shadow".to_string(), + |value| value.trim().to_ascii_lowercase(), + ); + let bucket = stable_percent_bucket(&report.report_id); + match stage.as_str() { + "typed" => "typed-primary".to_string(), + "ab" if bucket < 10 => "typed-primary".to_string(), + "shadow" if bucket < 10 => "shadow".to_string(), + _ => "text-primary".to_string(), + } +} + +fn stable_percent_bucket(value: &str) -> u64 { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + value.hash(&mut hasher); + hasher.finish() % 100 +} + fn build_assistant_message( events: Vec, ) -> Result< @@ -1179,9 +1270,10 @@ impl ToolExecutor for StaticToolExecutor { #[cfg(test)] mod tests { use super::{ - build_assistant_message, parse_auto_compaction_threshold, ApiClient, ApiRequest, - AssistantEvent, AutoCompactionEvent, ConversationRuntime, PromptCacheEvent, RuntimeError, - StaticToolExecutor, ToolExecutor, DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD, + build_assistant_message, parse_auto_compaction_threshold, stable_percent_bucket, + verification_report_mode, ApiClient, ApiRequest, AssistantEvent, AutoCompactionEvent, + ConversationRuntime, PromptCacheEvent, RuntimeError, StaticToolExecutor, ToolExecutor, + DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD, VERIFIER_REPORT_STAGE_ENV_VAR, }; use crate::compact::CompactionConfig; use crate::config::{RuntimeFeatureConfig, RuntimeHookConfig}; @@ -1193,13 +1285,14 @@ mod tests { use crate::session::{ContentBlock, MessageRole, Session}; use crate::usage::TokenUsage; use crate::verifier::{ - VerificationContext, VerificationPhase, VerificationReport, VerificationStatus, - VerificationTarget, Verifier, + VerificationContext, VerificationFailureKind, VerificationPhase, VerificationReport, + VerificationStatus, VerificationStepReport, VerificationTarget, Verifier, }; use crate::ToolError; use std::fs; use std::path::PathBuf; use std::sync::Arc; + use std::sync::{Mutex, OnceLock}; use std::time::{SystemTime, UNIX_EPOCH}; use telemetry::{MemoryTelemetrySink, SessionTracer, TelemetryEvent}; @@ -1263,6 +1356,11 @@ mod tests { } } + fn env_lock() -> &'static Mutex<()> { + static LOCK: OnceLock> = OnceLock::new(); + LOCK.get_or_init(|| Mutex::new(())) + } + struct PromptAllowOnce; impl PermissionPrompter for PromptAllowOnce { @@ -1290,6 +1388,7 @@ mod tests { git_status: None, git_diff: None, git_context: None, + context_pack: None, instruction_files: Vec::new(), }) .with_os("linux", "6.8") @@ -2692,14 +2791,180 @@ mod tests { assert!(summary.verification_gate.passed); } + #[test] + fn verification_report_mode_supports_shadow_ab_and_typed_stages() { + let _guard = env_lock() + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + std::env::remove_var(VERIFIER_REPORT_STAGE_ENV_VAR); + let shadow_report = test_verification_report_with_id("shadow-bucket"); + let shadow_bucket = stable_percent_bucket(&shadow_report.report_id); + let shadow_mode = verification_report_mode(&shadow_report); + if shadow_bucket < 10 { + assert_eq!(shadow_mode, "shadow"); + } else { + assert_eq!(shadow_mode, "text-primary"); + } + + let typed_id = report_id_for_bucket(|bucket| bucket < 10); + std::env::set_var(VERIFIER_REPORT_STAGE_ENV_VAR, "ab"); + let typed_report = test_verification_report_with_id(&typed_id); + assert_eq!(verification_report_mode(&typed_report), "typed-primary"); + + let control_id = report_id_for_bucket(|bucket| bucket >= 10); + let control_report = test_verification_report_with_id(&control_id); + assert_eq!(verification_report_mode(&control_report), "text-primary"); + + std::env::set_var(VERIFIER_REPORT_STAGE_ENV_VAR, "typed"); + assert_eq!(verification_report_mode(&control_report), "typed-primary"); + std::env::remove_var(VERIFIER_REPORT_STAGE_ENV_VAR); + } + + #[test] + fn verifier_telemetry_records_report_mode_failure_kind_and_target_scope() { + struct EditApi { + done: bool, + } + + impl ApiClient for EditApi { + fn stream( + &mut self, + _request: ApiRequest, + ) -> Result, RuntimeError> { + if self.done { + Ok(vec![ + AssistantEvent::TextDelta("done".to_string()), + AssistantEvent::MessageStop, + ]) + } else { + self.done = true; + Ok(vec![ + AssistantEvent::ToolUse { + id: "tool-1".to_string(), + name: "edit_file".to_string(), + input: r#"{"file_path":"src/lib.rs"}"#.to_string(), + }, + AssistantEvent::MessageStop, + ]) + } + } + } + + struct TelemetryVerifier; + + impl Verifier for TelemetryVerifier { + fn quick_verify(&self, context: &VerificationContext) -> Vec { + vec![VerificationReport { + report_id: "report-typed".to_string(), + phase: VerificationPhase::Quick, + adapter_id: "rust-cargo".to_string(), + project_root: PathBuf::from("/workspace"), + touched_paths: context.touched_paths.clone(), + status: VerificationStatus::Failed, + summary_text: "[verifier:quick:rust-cargo] failed (/workspace)".to_string(), + steps: vec![VerificationStepReport { + adapter: "rust-cargo".to_string(), + project_root: PathBuf::from("/workspace"), + label: "cargo check".to_string(), + command: "cargo check -p demo".to_string(), + phase: VerificationPhase::Quick, + status: VerificationStatus::Failed, + failure_kind: Some(VerificationFailureKind::Code), + duration_ms: 42, + truncated_output: "error[E0308]".to_string(), + step_kind: Some("cargo_check".to_string()), + target_scope: Some("package".to_string()), + package_name: Some("demo".to_string()), + package_manager: None, + launcher_kind: None, + }], + }] + } + + fn final_verify(&self, _target: &VerificationTarget) -> Option { + None + } + } + + let _guard = env_lock() + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + std::env::set_var(VERIFIER_REPORT_STAGE_ENV_VAR, "typed"); + let sink = Arc::new(MemoryTelemetrySink::default()); + let tracer = SessionTracer::new("session-runtime", sink.clone()); + let mut runtime = ConversationRuntime::new( + Session::new(), + EditApi { done: false }, + StaticToolExecutor::new().register("edit_file", |_| Ok("edited".to_string())), + PermissionPolicy::new(PermissionMode::DangerFullAccess), + vec!["system".to_string()], + ) + .with_verifier(Box::new(TelemetryVerifier)) + .with_session_tracer(tracer); + + let _summary = runtime.run_turn("fix", None).expect("turn should complete"); + std::env::remove_var(VERIFIER_REPORT_STAGE_ENV_VAR); + + let events = sink.events(); + let verifier_trace = events + .into_iter() + .find_map(|event| match event { + TelemetryEvent::SessionTrace(trace) if trace.name == "verifier_ran" => Some(trace), + _ => None, + }) + .expect("verifier trace should exist"); + assert_eq!( + verifier_trace.attributes.get("report_mode"), + Some(&serde_json::Value::String("typed-primary".to_string())) + ); + assert_eq!( + verifier_trace.attributes.get("failure_kind"), + Some(&serde_json::Value::String("code".to_string())) + ); + assert_eq!( + verifier_trace.attributes.get("target_scope"), + Some(&serde_json::Value::String("package".to_string())) + ); + assert_eq!( + verifier_trace.attributes.get("mutation_sequence"), + Some(&serde_json::Value::Number(1_u64.into())) + ); + } + fn test_verification_report( phase: VerificationPhase, status: VerificationStatus, touched_paths: &[PathBuf], summary_text: &str, + ) -> VerificationReport { + test_verification_report_with_fields( + format!("test-{}-{}", phase.as_str(), status.as_str()), + phase, + status, + touched_paths, + summary_text, + ) + } + + fn test_verification_report_with_id(report_id: &str) -> VerificationReport { + test_verification_report_with_fields( + report_id.to_string(), + VerificationPhase::Quick, + VerificationStatus::Failed, + &[PathBuf::from("src/lib.rs")], + "[verifier:quick:rust-cargo] failed (/workspace)\n[verifier] cargo check: FAIL", + ) + } + + fn test_verification_report_with_fields( + report_id: String, + phase: VerificationPhase, + status: VerificationStatus, + touched_paths: &[PathBuf], + summary_text: &str, ) -> VerificationReport { VerificationReport { - report_id: format!("test-{}-{}", phase.as_str(), status.as_str()), + report_id, phase, adapter_id: "rust-cargo".to_string(), project_root: PathBuf::from("/workspace"), @@ -2709,4 +2974,14 @@ mod tests { steps: Vec::new(), } } + + fn report_id_for_bucket(predicate: impl Fn(u64) -> bool) -> String { + for index in 0..10_000 { + let candidate = format!("bucket-report-{index}"); + if predicate(stable_percent_bucket(&candidate)) { + return candidate; + } + } + panic!("failed to find report id for bucket predicate"); + } } diff --git a/rust/crates/runtime/src/critic.rs b/rust/crates/runtime/src/critic.rs new file mode 100644 index 0000000000..5fba290aa0 --- /dev/null +++ b/rust/crates/runtime/src/critic.rs @@ -0,0 +1,260 @@ +//! Critic subagent gate. +//! +//! A thin planner that decides whether to invoke a second-opinion "critic" +//! subagent on a mutation. The critic itself is expected to be a cheap-model +//! pass that re-reads a diff and reinjects at most P0/P1 findings back into the +//! main turn. This module only encodes: +//! * Diff-size thresholds (≥4 files OR ≥200 added/removed lines OR >1 root). +//! * A `subagent_depth` guard so critic calls never nest. +//! * A dedup set so each `mutation_sequence` triggers at most one critic run. +//! * The preferred cheap-model hint. +//! +//! Keeping the policy isolated lets the runtime wire it into `conversation.rs` +//! without that file having to know the threshold numerics. + +use std::collections::HashSet; + +/// Diff-size snapshot fed to the critic planner. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct DiffStats { + pub files_changed: usize, + pub lines_changed: usize, + pub distinct_roots: usize, +} + +/// Why the critic was (or was not) invoked. Emitted into telemetry. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum CriticDecision { + /// Critic should run for `mutation_sequence`. Carries the reason that + /// tipped the threshold. + Run { reason: String }, + /// Critic should not run. + Skip { reason: String }, +} + +/// Thresholds per spec: ≥4 files OR ≥200 lines OR >1 root. +pub const CRITIC_FILE_THRESHOLD: usize = 4; +pub const CRITIC_LINE_THRESHOLD: usize = 200; +pub const CRITIC_ROOT_THRESHOLD: usize = 1; // strictly more than 1 + +/// Model hint the runtime should use when spawning the critic subagent. +/// Kept as a free-form string to avoid a compile-time coupling to model IDs. +pub const CRITIC_MODEL_HINT: &str = "claude-haiku"; + +/// Planner that tracks which mutation sequences have already been audited. +/// +/// Callers construct one per conversation and call [`CriticPlanner::plan`] each +/// time a new mutation finalizes. The planner is intentionally infallible — +/// actually spawning the subagent and reinjecting findings stays in the caller. +#[derive(Debug, Default)] +pub struct CriticPlanner { + audited: HashSet, +} + +impl CriticPlanner { + #[must_use] + pub fn new() -> Self { + Self::default() + } + + /// Decide whether to invoke the critic for this mutation. Records the + /// sequence as audited only when the decision is [`CriticDecision::Run`] — + /// skips from depth or thresholds do NOT consume the slot, so a later + /// mutation with the same sequence number (shouldn't happen, but still) + /// would be evaluated fresh. + pub fn plan( + &mut self, + mutation_sequence: u64, + subagent_depth: u32, + stats: DiffStats, + ) -> CriticDecision { + if subagent_depth > 0 { + return CriticDecision::Skip { + reason: format!("nested subagent depth={subagent_depth}"), + }; + } + if self.audited.contains(&mutation_sequence) { + return CriticDecision::Skip { + reason: format!("already audited mutation_sequence={mutation_sequence}"), + }; + } + let tripped = trip_reason(stats); + match tripped { + Some(reason) => { + self.audited.insert(mutation_sequence); + CriticDecision::Run { reason } + } + None => CriticDecision::Skip { + reason: format!( + "below thresholds (files={}, lines={}, roots={})", + stats.files_changed, stats.lines_changed, stats.distinct_roots + ), + }, + } + } + + #[must_use] + pub fn has_audited(&self, mutation_sequence: u64) -> bool { + self.audited.contains(&mutation_sequence) + } +} + +fn trip_reason(stats: DiffStats) -> Option { + if stats.files_changed >= CRITIC_FILE_THRESHOLD { + return Some(format!( + "files_changed={} >= {}", + stats.files_changed, CRITIC_FILE_THRESHOLD + )); + } + if stats.lines_changed >= CRITIC_LINE_THRESHOLD { + return Some(format!( + "lines_changed={} >= {}", + stats.lines_changed, CRITIC_LINE_THRESHOLD + )); + } + if stats.distinct_roots > CRITIC_ROOT_THRESHOLD { + return Some(format!( + "distinct_roots={} > {}", + stats.distinct_roots, CRITIC_ROOT_THRESHOLD + )); + } + None +} + +#[cfg(test)] +mod tests { + use super::*; + + fn small() -> DiffStats { + DiffStats { + files_changed: 1, + lines_changed: 5, + distinct_roots: 1, + } + } + + #[test] + fn below_thresholds_skips() { + let mut planner = CriticPlanner::new(); + match planner.plan(1, 0, small()) { + CriticDecision::Skip { reason } => assert!(reason.starts_with("below thresholds")), + CriticDecision::Run { .. } => panic!("expected skip, got run"), + } + } + + #[test] + fn file_threshold_runs() { + let mut planner = CriticPlanner::new(); + let stats = DiffStats { + files_changed: 4, + lines_changed: 20, + distinct_roots: 1, + }; + assert!(matches!( + planner.plan(1, 0, stats), + CriticDecision::Run { .. } + )); + } + + #[test] + fn line_threshold_runs() { + let mut planner = CriticPlanner::new(); + let stats = DiffStats { + files_changed: 1, + lines_changed: 200, + distinct_roots: 1, + }; + assert!(matches!( + planner.plan(1, 0, stats), + CriticDecision::Run { .. } + )); + } + + #[test] + fn root_threshold_runs_when_strictly_more_than_one() { + let mut planner = CriticPlanner::new(); + let stats = DiffStats { + files_changed: 1, + lines_changed: 5, + distinct_roots: 2, + }; + assert!(matches!( + planner.plan(1, 0, stats), + CriticDecision::Run { .. } + )); + } + + #[test] + fn single_root_does_not_trip_root_threshold() { + let mut planner = CriticPlanner::new(); + let stats = DiffStats { + files_changed: 1, + lines_changed: 5, + distinct_roots: 1, + }; + assert!(matches!( + planner.plan(1, 0, stats), + CriticDecision::Skip { .. } + )); + } + + #[test] + fn nested_subagent_depth_blocks_run() { + let mut planner = CriticPlanner::new(); + let stats = DiffStats { + files_changed: 10, + lines_changed: 500, + distinct_roots: 5, + }; + let decision = planner.plan(1, 1, stats); + match decision { + CriticDecision::Skip { reason } => assert!(reason.contains("nested subagent depth")), + CriticDecision::Run { .. } => panic!("expected skip, got run"), + } + assert!( + !planner.has_audited(1), + "depth-blocked run must not consume the mutation slot" + ); + } + + #[test] + fn one_run_per_mutation_sequence() { + let mut planner = CriticPlanner::new(); + let stats = DiffStats { + files_changed: 4, + lines_changed: 20, + distinct_roots: 1, + }; + assert!(matches!( + planner.plan(42, 0, stats), + CriticDecision::Run { .. } + )); + match planner.plan(42, 0, stats) { + CriticDecision::Skip { reason } => assert!(reason.starts_with("already audited")), + CriticDecision::Run { .. } => panic!("expected skip on dup, got run"), + } + } + + #[test] + fn distinct_mutation_sequences_are_independent() { + let mut planner = CriticPlanner::new(); + let stats = DiffStats { + files_changed: 4, + lines_changed: 20, + distinct_roots: 1, + }; + assert!(matches!( + planner.plan(1, 0, stats), + CriticDecision::Run { .. } + )); + assert!(matches!( + planner.plan(2, 0, stats), + CriticDecision::Run { .. } + )); + } + + #[test] + fn model_hint_is_cheap() { + assert_eq!(CRITIC_MODEL_HINT, "claude-haiku"); + } +} diff --git a/rust/crates/runtime/src/lib.rs b/rust/crates/runtime/src/lib.rs index 5f11fb5641..3f0843717e 100644 --- a/rust/crates/runtime/src/lib.rs +++ b/rust/crates/runtime/src/lib.rs @@ -12,6 +12,7 @@ mod compact; mod config; pub mod config_validate; mod conversation; +pub mod critic; mod file_ops; mod git_context; pub mod green_contract; @@ -33,6 +34,7 @@ mod policy_engine; mod prompt; pub mod recovery_recipes; mod remote; +pub mod rollout_metrics; pub mod sandbox; mod session; pub mod session_control; diff --git a/rust/crates/runtime/src/permission_enforcer.rs b/rust/crates/runtime/src/permission_enforcer.rs index 6ff872bcc8..8cc2c6473c 100644 --- a/rust/crates/runtime/src/permission_enforcer.rs +++ b/rust/crates/runtime/src/permission_enforcer.rs @@ -6,6 +6,7 @@ //! Permission enforcement layer that gates tool execution based on the //! active `PermissionPolicy`. +use crate::bash_validation::{validate_read_only, ValidationResult}; use crate::permissions::{PermissionMode, PermissionOutcome, PermissionPolicy}; use serde::{Deserialize, Serialize}; @@ -146,21 +147,21 @@ impl PermissionEnforcer { let mode = self.policy.active_mode(); match mode { - PermissionMode::ReadOnly => { - if is_read_only_command(command) { - EnforcementResult::Allowed - } else { - EnforcementResult::Denied { - tool: "bash".to_owned(), - active_mode: mode.as_str().to_owned(), - required_mode: PermissionMode::WorkspaceWrite.as_str().to_owned(), - reason: format!( - "command may modify state; not allowed in '{}' mode", - mode.as_str() - ), - } - } - } + PermissionMode::ReadOnly => match validate_read_only(command, mode) { + ValidationResult::Allow => EnforcementResult::Allowed, + ValidationResult::Block { reason } => EnforcementResult::Denied { + tool: "bash".to_owned(), + active_mode: mode.as_str().to_owned(), + required_mode: PermissionMode::WorkspaceWrite.as_str().to_owned(), + reason, + }, + ValidationResult::Warn { message } => EnforcementResult::Denied { + tool: "bash".to_owned(), + active_mode: mode.as_str().to_owned(), + required_mode: PermissionMode::WorkspaceWrite.as_str().to_owned(), + reason: message, + }, + }, PermissionMode::Prompt => EnforcementResult::Denied { tool: "bash".to_owned(), active_mode: mode.as_str().to_owned(), @@ -190,85 +191,20 @@ fn is_within_workspace(path: &str, workspace_root: &str) -> bool { normalized.starts_with(&root) || normalized == workspace_root.trim_end_matches('/') } -/// Conservative heuristic: is this bash command read-only? +#[cfg(test)] fn is_read_only_command(command: &str) -> bool { - let first_token = command - .split_whitespace() - .next() - .unwrap_or("") - .rsplit('/') - .next() - .unwrap_or(""); - + let trimmed = command.trim(); + if trimmed.is_empty() { + return false; + } + if trimmed.contains(" -i ") || trimmed.contains(" --in-place") || trimmed.starts_with("sed -i") + { + return false; + } matches!( - first_token, - "cat" - | "head" - | "tail" - | "less" - | "more" - | "wc" - | "ls" - | "find" - | "grep" - | "rg" - | "awk" - | "sed" - | "echo" - | "printf" - | "which" - | "where" - | "whoami" - | "pwd" - | "env" - | "printenv" - | "date" - | "cal" - | "df" - | "du" - | "free" - | "uptime" - | "uname" - | "file" - | "stat" - | "diff" - | "sort" - | "uniq" - | "tr" - | "cut" - | "paste" - | "tee" - | "xargs" - | "test" - | "true" - | "false" - | "type" - | "readlink" - | "realpath" - | "basename" - | "dirname" - | "sha256sum" - | "md5sum" - | "b3sum" - | "xxd" - | "hexdump" - | "od" - | "strings" - | "tree" - | "jq" - | "yq" - | "python3" - | "python" - | "node" - | "ruby" - | "cargo" - | "rustc" - | "git" - | "gh" - ) && !command.contains("-i ") - && !command.contains("--in-place") - && !command.contains(" > ") - && !command.contains(" >> ") + validate_read_only(command, PermissionMode::ReadOnly), + ValidationResult::Allow + ) } #[cfg(test)] diff --git a/rust/crates/runtime/src/prompt.rs b/rust/crates/runtime/src/prompt.rs index e46b7ebee5..c270c62f23 100644 --- a/rust/crates/runtime/src/prompt.rs +++ b/rust/crates/runtime/src/prompt.rs @@ -42,6 +42,8 @@ pub const SYSTEM_PROMPT_DYNAMIC_BOUNDARY: &str = "__SYSTEM_PROMPT_DYNAMIC_BOUNDA pub const FRONTIER_MODEL_NAME: &str = "Claude Opus 4.6"; const MAX_INSTRUCTION_FILE_CHARS: usize = 4_000; const MAX_TOTAL_INSTRUCTION_CHARS: usize = 12_000; +const MAX_CONTEXT_PACK_CHARS: usize = 6_000; +const MAX_CONTEXT_PACK_FILES: usize = 8; /// Contents of an instruction file included in prompt construction. #[derive(Debug, Clone, PartialEq, Eq)] @@ -50,6 +52,23 @@ pub struct ContextFile { pub content: String, } +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct ContextPackFile { + pub status: String, + pub path: PathBuf, + pub project_kind: Option, + pub project_root: Option, + pub entrypoint: Option, + pub related_test: Option, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +pub struct ContextPack { + pub repo_root: Option, + pub branch: Option, + pub changed_files: Vec, +} + /// Project-local context injected into the rendered system prompt. #[derive(Debug, Clone, Default, PartialEq, Eq)] pub struct ProjectContext { @@ -58,6 +77,7 @@ pub struct ProjectContext { pub git_status: Option, pub git_diff: Option, pub git_context: Option, + pub context_pack: Option, pub instruction_files: Vec, } @@ -74,6 +94,7 @@ impl ProjectContext { git_status: None, git_diff: None, git_context: None, + context_pack: None, instruction_files, }) } @@ -86,6 +107,11 @@ impl ProjectContext { context.git_status = read_git_status(&context.cwd); context.git_diff = read_git_diff(&context.cwd); context.git_context = GitContext::detect(&context.cwd); + context.context_pack = build_context_pack( + &context.cwd, + context.git_status.as_deref(), + context.git_context.as_ref(), + ); Ok(context) } } @@ -285,6 +311,277 @@ fn read_git_output(cwd: &Path, args: &[&str]) -> Option { String::from_utf8(output.stdout).ok() } +#[derive(Debug, Clone, PartialEq, Eq)] +struct GitStatusEntry { + status: String, + path: PathBuf, +} + +fn build_context_pack( + cwd: &Path, + git_status: Option<&str>, + git_context: Option<&GitContext>, +) -> Option { + let repo_root = read_git_repo_root(cwd).or_else(|| Some(cwd.to_path_buf()))?; + let entries = parse_git_status_snapshot(git_status?); + if entries.is_empty() { + return None; + } + + let changed_files = entries + .into_iter() + .take(MAX_CONTEXT_PACK_FILES) + .map(|entry| { + let absolute_path = repo_root.join(&entry.path); + let (project_root, project_kind) = detect_project_root(&absolute_path, &repo_root) + .unwrap_or((repo_root.clone(), None)); + let entrypoint = find_entrypoint(&project_root, project_kind.as_deref()); + let related_test = + find_related_test(&absolute_path, &project_root, project_kind.as_deref()); + ContextPackFile { + status: entry.status, + path: entry.path, + project_kind, + project_root: Some(project_root), + entrypoint, + related_test, + } + }) + .collect::>(); + + Some(ContextPack { + repo_root: Some(repo_root), + branch: git_context.and_then(|context| context.branch.clone()), + changed_files, + }) +} + +fn read_git_repo_root(cwd: &Path) -> Option { + let output = Command::new("git") + .args(["rev-parse", "--show-toplevel"]) + .current_dir(cwd) + .output() + .ok()?; + if !output.status.success() { + return None; + } + let stdout = String::from_utf8(output.stdout).ok()?; + let trimmed = stdout.trim(); + if trimmed.is_empty() { + None + } else { + Some(PathBuf::from(trimmed)) + } +} + +fn parse_git_status_snapshot(status: &str) -> Vec { + status + .lines() + .filter_map(|line| { + let trimmed = line.trim_end(); + if trimmed.is_empty() || trimmed.starts_with("##") || trimmed.len() < 4 { + return None; + } + let status_code = &trimmed[..2]; + let raw_path = trimmed[3..].trim(); + if raw_path.is_empty() { + return None; + } + let path = raw_path + .split(" -> ") + .last() + .map(str::trim) + .filter(|path| !path.is_empty())?; + Some(GitStatusEntry { + status: classify_git_status(status_code), + path: PathBuf::from(path), + }) + }) + .collect() +} + +fn classify_git_status(status_code: &str) -> String { + if status_code == "??" { + return "untracked".to_string(); + } + let significant = status_code + .chars() + .find(|ch| !ch.is_ascii_whitespace()) + .unwrap_or('M'); + match significant { + 'A' => "added", + 'M' => "modified", + 'D' => "deleted", + 'R' => "renamed", + 'C' => "copied", + 'T' => "typechange", + 'U' => "conflict", + _ => "changed", + } + .to_string() +} + +fn detect_project_root(path: &Path, repo_root: &Path) -> Option<(PathBuf, Option)> { + let mut current = if path.is_dir() { + path.to_path_buf() + } else { + path.parent()?.to_path_buf() + }; + loop { + for (marker, kind) in [ + ("Cargo.toml", "rust"), + ("package.json", "node"), + ("pyproject.toml", "python"), + ("go.mod", "go"), + ] { + if current.join(marker).is_file() { + return Some((current, Some(kind.to_string()))); + } + } + if current == repo_root { + break; + } + current = current.parent()?.to_path_buf(); + } + Some((repo_root.to_path_buf(), None)) +} + +fn find_entrypoint(project_root: &Path, project_kind: Option<&str>) -> Option { + let candidates: &[&str] = match project_kind { + Some("rust") => &["src/main.rs", "src/lib.rs", "src/bin/main.rs"], + Some("node") => &[ + "src/index.ts", + "src/main.ts", + "src/index.tsx", + "index.ts", + "src/index.js", + "src/main.js", + "index.js", + ], + Some("python") => &["app/main.py", "main.py", "src/main.py", "src/__init__.py"], + Some("go") => &["main.go", "cmd/main.go"], + _ => &["README.md"], + }; + candidates + .iter() + .map(|candidate| project_root.join(candidate)) + .find(|candidate| candidate.is_file()) +} + +fn find_related_test( + changed_path: &Path, + project_root: &Path, + project_kind: Option<&str>, +) -> Option { + let file_name = changed_path.file_name()?.to_string_lossy().to_lowercase(); + if is_probable_test_name(&file_name) { + return changed_path + .strip_prefix(project_root) + .ok() + .map(|relative| project_root.join(relative)); + } + + let stem = changed_path.file_stem()?.to_string_lossy(); + let extension = changed_path + .extension() + .and_then(|ext| ext.to_str()) + .unwrap_or(""); + let candidates = match project_kind { + Some("rust") => vec![format!("tests/{stem}.rs"), format!("tests/test_{stem}.rs")], + Some("node") => vec![ + format!("src/{stem}.test.{extension}"), + format!("src/{stem}.spec.{extension}"), + format!("tests/{stem}.test.{extension}"), + format!("tests/{stem}.spec.{extension}"), + ], + Some("python") => vec![format!("tests/test_{stem}.py"), format!("test_{stem}.py")], + Some("go") => vec![format!("{stem}_test.go"), format!("tests/{stem}_test.go")], + _ => Vec::new(), + }; + candidates + .into_iter() + .map(|candidate| project_root.join(candidate)) + .find(|candidate| candidate.is_file()) +} + +fn is_probable_test_name(file_name: &str) -> bool { + file_name.starts_with("test_") + || file_name.ends_with("_test.go") + || file_name.ends_with(".test.ts") + || file_name.ends_with(".test.tsx") + || file_name.ends_with(".test.js") + || file_name.ends_with(".spec.ts") + || file_name.ends_with(".spec.tsx") + || file_name.ends_with(".spec.js") + || file_name.ends_with("_test.rs") +} + +fn render_context_pack(pack: &ContextPack) -> String { + let repo_root = pack.repo_root.as_deref().unwrap_or_else(|| Path::new(".")); + let mut lines = Vec::new(); + lines.extend(prepend_bullets(vec![ + format!("Repo root: {}", repo_root.display()), + format!( + "Git branch: {}", + pack.branch.as_deref().unwrap_or("unknown") + ), + format!("Changed files in scope: {}.", pack.changed_files.len()), + ])); + lines.push("Changed file targets:".to_string()); + for changed in &pack.changed_files { + let mut detail = format!( + " - {} {}", + changed.status, + relative_or_display(repo_root, &changed.path) + ); + if let Some(kind) = &changed.project_kind { + use std::fmt::Write as _; + let _ = write!(detail, " [{kind}]"); + } + if let Some(project_root) = &changed.project_root { + use std::fmt::Write as _; + let _ = write!( + detail, + " (root: {})", + relative_or_display(repo_root, project_root) + ); + } + lines.push(detail); + if let Some(entrypoint) = &changed.entrypoint { + lines.push(format!( + " entrypoint: {}", + relative_or_display(repo_root, entrypoint) + )); + } + if let Some(related_test) = &changed.related_test { + lines.push(format!( + " related test: {}", + relative_or_display(repo_root, related_test) + )); + } + } + truncate_rendered_context_pack(&lines.join("\n")) +} + +fn relative_or_display(base: &Path, path: &Path) -> String { + path.strip_prefix(base) + .unwrap_or(path) + .display() + .to_string() +} + +fn truncate_rendered_context_pack(content: &str) -> String { + if content.chars().count() <= MAX_CONTEXT_PACK_CHARS { + return content.to_string(); + } + let mut shortened = content + .chars() + .take(MAX_CONTEXT_PACK_CHARS) + .collect::(); + shortened.push_str("\n... [context pack truncated]"); + shortened +} + fn render_project_context(project_context: &ProjectContext) -> String { let mut lines = vec!["# Project context".to_string()]; let mut bullets = vec![ @@ -298,7 +595,11 @@ fn render_project_context(project_context: &ProjectContext) -> String { )); } lines.extend(prepend_bullets(bullets)); - if let Some(status) = &project_context.git_status { + if let Some(context_pack) = &project_context.context_pack { + lines.push(String::new()); + lines.push("Workspace context pack:".to_string()); + lines.push(render_context_pack(context_pack)); + } else if let Some(status) = &project_context.git_status { lines.push(String::new()); lines.push("Git status snapshot:".to_string()); lines.push(status.clone()); @@ -312,18 +613,6 @@ fn render_project_context(project_context: &ProjectContext) -> String { } } } - if let Some(diff) = &project_context.git_diff { - lines.push(String::new()); - lines.push("Git diff snapshot:".to_string()); - lines.push(diff.clone()); - } - if let Some(git_context) = &project_context.git_context { - let rendered = git_context.render(); - if !rendered.is_empty() { - lines.push(String::new()); - lines.push(rendered); - } - } lines.join("\n") } @@ -521,8 +810,9 @@ fn get_actions_section() -> String { mod tests { use super::{ collapse_blank_lines, display_context_path, normalize_instruction_content, - render_instruction_content, render_instruction_files, truncate_instruction_content, - ContextFile, ProjectContext, SystemPromptBuilder, SYSTEM_PROMPT_DYNAMIC_BOUNDARY, + render_instruction_content, render_instruction_files, render_project_context, + truncate_instruction_content, truncate_rendered_context_pack, ContextFile, ProjectContext, + SystemPromptBuilder, MAX_CONTEXT_PACK_CHARS, SYSTEM_PROMPT_DYNAMIC_BOUNDARY, }; use crate::config::ConfigLoader; use std::fs; @@ -732,11 +1022,15 @@ mod tests { let status = context.git_status.as_deref().expect("status snapshot"); assert!(status.contains("## main")); assert!(status.contains("A d.txt")); + let context_pack = context.context_pack.as_ref().expect("context pack"); + assert_eq!(context_pack.changed_files.len(), 1); + assert_eq!(context_pack.changed_files[0].status, "added"); assert!(rendered.contains("Recent commits (last 5):")); assert!(rendered.contains("first commit")); - assert!(rendered.contains("Git status snapshot:")); - assert!(rendered.contains("## main")); + assert!(rendered.contains("Workspace context pack:")); + assert!(rendered.contains("Git branch: main")); + assert!(rendered.contains("added d.txt")); fs::remove_dir_all(root).expect("cleanup temp dir"); } @@ -785,6 +1079,74 @@ mod tests { fs::remove_dir_all(root).expect("cleanup temp dir"); } + #[test] + fn context_pack_detects_project_root_entrypoint_and_related_test() { + let _guard = env_lock(); + ensure_valid_cwd(); + let root = temp_dir(); + fs::create_dir_all(root.join("src")).expect("src dir"); + fs::create_dir_all(root.join("tests")).expect("tests dir"); + std::process::Command::new("git") + .args(["init", "--quiet", "-b", "main"]) + .current_dir(&root) + .status() + .expect("git init should run"); + std::process::Command::new("git") + .args(["config", "user.email", "tests@example.com"]) + .current_dir(&root) + .status() + .expect("git config email should run"); + std::process::Command::new("git") + .args(["config", "user.name", "Runtime Prompt Tests"]) + .current_dir(&root) + .status() + .expect("git config name should run"); + fs::write( + root.join("Cargo.toml"), + "[package]\nname = \"demo\"\nversion = \"0.1.0\"\nedition = \"2021\"\n", + ) + .expect("write Cargo.toml"); + fs::write(root.join("src/main.rs"), "fn main() {}\n").expect("write main"); + fs::write(root.join("src/lib.rs"), "pub fn value() -> usize { 1 }\n").expect("write lib"); + fs::write(root.join("tests/lib.rs"), "#[test]\nfn smoke() {}\n").expect("write test"); + std::process::Command::new("git") + .args(["add", "."]) + .current_dir(&root) + .status() + .expect("git add should run"); + std::process::Command::new("git") + .args(["commit", "-m", "init", "--quiet"]) + .current_dir(&root) + .status() + .expect("git commit should run"); + fs::write(root.join("src/lib.rs"), "pub fn value() -> usize { 2 }\n").expect("rewrite lib"); + + let context = + ProjectContext::discover_with_git(&root, "2026-03-31").expect("context should load"); + let rendered = render_project_context(&context); + let context_pack = context.context_pack.as_ref().expect("context pack"); + let changed = context_pack + .changed_files + .iter() + .find(|file| file.path == Path::new("src/lib.rs")) + .expect("changed file should be listed"); + + assert_eq!(changed.status, "modified"); + assert_eq!(changed.project_kind.as_deref(), Some("rust")); + assert_eq!(changed.project_root.as_ref(), Some(&root)); + assert_eq!(changed.entrypoint.as_ref(), Some(&root.join("src/main.rs"))); + assert_eq!( + changed.related_test.as_ref(), + Some(&root.join("tests/lib.rs")) + ); + assert!(rendered.contains("Workspace context pack:")); + assert!(rendered.contains("modified src/lib.rs [rust]")); + assert!(rendered.contains("entrypoint: src/main.rs")); + assert!(rendered.contains("related test: tests/lib.rs")); + + fs::remove_dir_all(root).expect("cleanup temp dir"); + } + #[test] fn load_system_prompt_reads_claude_files_and_config() { let root = temp_dir(); @@ -892,6 +1254,87 @@ mod tests { fs::remove_dir_all(root).expect("cleanup temp dir"); } + #[test] + fn rendered_context_pack_respects_6kb_cap() { + // Invariant: MAX_CONTEXT_PACK_CHARS is the spec'd 6KB budget. + assert_eq!(MAX_CONTEXT_PACK_CHARS, 6_000); + + let fat = "x".repeat(20_000); + let rendered = truncate_rendered_context_pack(&fat); + assert!( + rendered.chars().count() + <= MAX_CONTEXT_PACK_CHARS + "\n... [context pack truncated]".chars().count(), + "truncation must keep output within cap" + ); + assert!(rendered.ends_with("[context pack truncated]")); + } + + #[test] + fn context_pack_is_recomputed_each_discover_call() { + // Invariant: no cross-turn cache — each discover_with_git rebuilds the + // pack from the current workspace. The test mutates git between two + // discover calls and expects the second call to observe the new state. + let _guard = env_lock(); + ensure_valid_cwd(); + let root = temp_dir(); + fs::create_dir_all(&root).expect("root dir"); + std::process::Command::new("git") + .args(["init", "--quiet"]) + .current_dir(&root) + .status() + .expect("git init"); + std::process::Command::new("git") + .args(["config", "user.email", "t@example.com"]) + .current_dir(&root) + .status() + .expect("config email"); + std::process::Command::new("git") + .args(["config", "user.name", "T"]) + .current_dir(&root) + .status() + .expect("config name"); + fs::write(root.join("initial.txt"), "seed").expect("seed"); + std::process::Command::new("git") + .args(["add", "."]) + .current_dir(&root) + .status() + .expect("add seed"); + std::process::Command::new("git") + .args(["commit", "-m", "seed", "--quiet"]) + .current_dir(&root) + .status() + .expect("commit seed"); + + // First discover: no changed files. + let first = ProjectContext::discover_with_git(&root, "2026-04-22").expect("first discover"); + let first_count = first + .context_pack + .as_ref() + .map_or(0, |p| p.changed_files.len()); + + // Introduce a change between calls. + fs::write(root.join("a.txt"), "hello").expect("a.txt"); + std::process::Command::new("git") + .args(["add", "a.txt"]) + .current_dir(&root) + .status() + .expect("stage a.txt"); + + let second = + ProjectContext::discover_with_git(&root, "2026-04-22").expect("second discover"); + let second_count = second + .context_pack + .as_ref() + .map_or(0, |p| p.changed_files.len()); + + assert!( + second_count > first_count, + "second discover must observe new changes (first={first_count}, second={second_count}) — no cross-turn caching", + ); + + fs::remove_dir_all(root).expect("cleanup temp dir"); + } + #[test] fn renders_instruction_file_metadata() { let rendered = render_instruction_files(&[ContextFile { diff --git a/rust/crates/runtime/src/rollout_metrics.rs b/rust/crates/runtime/src/rollout_metrics.rs new file mode 100644 index 0000000000..4a63dbe27b --- /dev/null +++ b/rust/crates/runtime/src/rollout_metrics.rs @@ -0,0 +1,562 @@ +//! Rollout metrics aggregator and budget gates for the edit→verify→fix loop. +//! +//! The runtime emits individual `verifier_ran` traces per step, plus per-turn +//! token/latency counts. This module collapses those samples into the handful +//! of rollout metrics named by the plan: +//! +//! * `quick_verify_latency_ms` — p50/p95 latency of quick-phase verification. +//! * `final_gate_pass_rate` — fraction of final-phase reports that succeeded. +//! * `repair_iterations_until_green` — mean repair iterations needed before +//! the first green final-gate report. +//! * `tokens_per_successful_fix` — total tokens spent divided by number of +//! successful fixes (green final reports). +//! * `turn_latency_ms` — mean end-to-end turn latency. +//! +//! It also codifies the rollout **budget gates** from the plan: +//! * pass-rate may not regress by more than 1 percentage point. +//! * repair-iteration mean may not regress by more than 5%. +//! * tokens-per-fix may not regress by more than 10%. +//! * turn-latency p50 may not regress by more than 15%. +//! +//! Keeping aggregation + gates in one crate-visible module lets CI wire it +//! without reaching into `conversation.rs` internals. + +#![allow( + clippy::cast_precision_loss, + clippy::cast_possible_truncation, + clippy::cast_sign_loss, + clippy::doc_markdown, + clippy::map_unwrap_or +)] + +use std::cmp::Ordering; + +use telemetry::SessionTraceRecord; + +/// Phase observed for a single verifier step. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum VerifierPhase { + Quick, + Final, +} + +/// A single verifier-step observation. The aggregator takes a slice of these. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct VerifierSample { + pub phase: VerifierPhase, + /// Identifies the repair episode (0-based index of a user turn, typically). + pub turn_index: u64, + /// Monotonic within a turn; the final successful report carries the + /// "final iteration count" for the episode. + pub iteration: u32, + pub duration_ms: u64, + pub succeeded: bool, +} + +/// Per-turn fact used for token / latency rollups. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct TurnSample { + pub turn_index: u64, + pub tokens_total: u64, + pub turn_latency_ms: u64, + /// True iff the turn ended with a green final-gate report. + pub successful_fix: bool, +} + +/// Aggregated rollout metrics. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct RolloutMetrics { + pub quick_verify_latency_p50_ms: f64, + pub quick_verify_latency_p95_ms: f64, + pub final_gate_pass_rate: f64, + pub repair_iterations_until_green_mean: f64, + pub tokens_per_successful_fix: f64, + pub turn_latency_p50_ms: f64, + pub turn_latency_mean_ms: f64, + pub samples: AggregateCounts, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct AggregateCounts { + pub quick_samples: usize, + pub final_samples: usize, + pub turns: usize, + pub successful_fixes: usize, +} + +/// Collapse raw samples into [`RolloutMetrics`]. Missing signals become `0.0` +/// (not NaN) so downstream gates can compare safely. +#[must_use] +pub fn aggregate(verifier: &[VerifierSample], turns: &[TurnSample]) -> RolloutMetrics { + let mut quick_latencies: Vec = verifier + .iter() + .filter(|s| s.phase == VerifierPhase::Quick) + .map(|s| s.duration_ms) + .collect(); + let final_samples: Vec<&VerifierSample> = verifier + .iter() + .filter(|s| s.phase == VerifierPhase::Final) + .collect(); + let final_success_count = final_samples.iter().filter(|s| s.succeeded).count(); + + // Repair iterations per turn: count of final-phase failures before the + // first green final-phase report (if any). If the turn never went green, + // treat the whole chain as the cost. + let mut repair_iterations: Vec = Vec::new(); + for turn in turns { + let turn_finals: Vec<&&VerifierSample> = final_samples + .iter() + .filter(|s| s.turn_index == turn.turn_index) + .collect(); + if turn_finals.is_empty() { + continue; + } + let mut failures = 0_u32; + let mut saw_green = false; + for sample in &turn_finals { + if sample.succeeded { + saw_green = true; + break; + } + failures += 1; + } + if saw_green || !turn_finals.is_empty() { + repair_iterations.push(failures); + } + } + + let tokens_total: u64 = turns.iter().map(|t| t.tokens_total).sum(); + let successful_fixes = turns.iter().filter(|t| t.successful_fix).count(); + let tokens_per_successful_fix = if successful_fixes == 0 { + 0.0 + } else { + tokens_total as f64 / successful_fixes as f64 + }; + + let turn_latencies: Vec = turns.iter().map(|t| t.turn_latency_ms).collect(); + + RolloutMetrics { + quick_verify_latency_p50_ms: percentile(&mut quick_latencies.clone(), 0.50), + quick_verify_latency_p95_ms: percentile(&mut quick_latencies, 0.95), + final_gate_pass_rate: if final_samples.is_empty() { + 0.0 + } else { + final_success_count as f64 / final_samples.len() as f64 + }, + repair_iterations_until_green_mean: mean_u32(&repair_iterations), + tokens_per_successful_fix, + turn_latency_p50_ms: percentile(&mut turn_latencies.clone(), 0.50), + turn_latency_mean_ms: mean_u64(&turn_latencies), + samples: AggregateCounts { + quick_samples: verifier + .iter() + .filter(|s| s.phase == VerifierPhase::Quick) + .count(), + final_samples: final_samples.len(), + turns: turns.len(), + successful_fixes, + }, + } +} + +fn percentile(values: &mut [u64], p: f64) -> f64 { + if values.is_empty() { + return 0.0; + } + values.sort_unstable(); + let rank = (p * (values.len() as f64 - 1.0)).round() as usize; + values[rank.min(values.len() - 1)] as f64 +} + +fn mean_u32(values: &[u32]) -> f64 { + if values.is_empty() { + return 0.0; + } + values.iter().map(|v| f64::from(*v)).sum::() / values.len() as f64 +} + +fn mean_u64(values: &[u64]) -> f64 { + if values.is_empty() { + return 0.0; + } + values.iter().map(|v| *v as f64).sum::() / values.len() as f64 +} + +/// Extract `(VerifierSample, TurnSample)` vectors from raw session trace records. +/// +/// Recognises: +/// * `verifier_ran` records with attributes `phase` ("quick" | "final"), +/// `mutation_sequence`, `duration_ms`, `passed`. `mutation_sequence` +/// doubles as the `turn_index` so samples from the same repair episode +/// group together even if the caller did not set an explicit turn id. +/// * `turn_completed` records with optional `tokens_total`, `turn_latency_ms`, +/// `verification_gate_passed`. Records missing those fields are still +/// admitted with zeros so count-based aggregates stay honest. +/// +/// Any trace with an unexpected name is ignored. +#[must_use] +pub fn samples_from_traces( + traces: &[SessionTraceRecord], +) -> (Vec, Vec) { + let mut verifier = Vec::new(); + let mut turns = Vec::new(); + let mut turn_counter: u64 = 0; + + for record in traces { + match record.name.as_str() { + "verifier_ran" => { + let phase = record + .attributes + .get("phase") + .and_then(|v| v.as_str()) + .map(|s| match s { + "final" => VerifierPhase::Final, + _ => VerifierPhase::Quick, + }) + .unwrap_or(VerifierPhase::Quick); + let turn_index = record + .attributes + .get("mutation_sequence") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let iteration = record + .attributes + .get("iteration") + .and_then(serde_json::Value::as_u64) + .and_then(|v| u32::try_from(v).ok()) + .unwrap_or(0); + let duration_ms = record + .attributes + .get("duration_ms") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let succeeded = record + .attributes + .get("passed") + .and_then(serde_json::Value::as_bool) + .unwrap_or(false); + verifier.push(VerifierSample { + phase, + turn_index, + iteration, + duration_ms, + succeeded, + }); + } + "turn_completed" => { + let tokens_total = record + .attributes + .get("tokens_total") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let turn_latency_ms = record + .attributes + .get("turn_latency_ms") + .and_then(serde_json::Value::as_u64) + .unwrap_or(0); + let successful_fix = record + .attributes + .get("verification_gate_passed") + .and_then(serde_json::Value::as_bool) + .unwrap_or(false); + turns.push(TurnSample { + turn_index: turn_counter, + tokens_total, + turn_latency_ms, + successful_fix, + }); + turn_counter += 1; + } + _ => {} + } + } + + (verifier, turns) +} + +/// Budget gate thresholds codified from the rollout plan. +pub const MAX_PASS_RATE_REGRESSION_PP: f64 = 0.01; // 1 percentage point +pub const MAX_REPAIR_ITERATIONS_REGRESSION: f64 = 0.05; // 5% relative +pub const MAX_TOKENS_PER_FIX_REGRESSION: f64 = 0.10; // 10% relative +pub const MAX_TURN_LATENCY_P50_REGRESSION: f64 = 0.15; // 15% relative + +/// A single budget-gate violation. +#[derive(Debug, Clone, PartialEq)] +pub struct BudgetViolation { + pub metric: &'static str, + pub baseline: f64, + pub current: f64, + pub limit: f64, + pub actual: f64, +} + +/// Compare a rollout candidate against a baseline. Returns every violation so +/// reports can surface them all at once. +#[must_use] +pub fn evaluate_budget_gates( + baseline: &RolloutMetrics, + current: &RolloutMetrics, +) -> Vec { + let mut violations = Vec::new(); + + // Pass-rate: absolute percentage-point drop. + let pass_rate_drop = baseline.final_gate_pass_rate - current.final_gate_pass_rate; + if pass_rate_drop > MAX_PASS_RATE_REGRESSION_PP { + violations.push(BudgetViolation { + metric: "final_gate_pass_rate", + baseline: baseline.final_gate_pass_rate, + current: current.final_gate_pass_rate, + limit: MAX_PASS_RATE_REGRESSION_PP, + actual: pass_rate_drop, + }); + } + + push_relative_regression( + &mut violations, + "repair_iterations_until_green_mean", + baseline.repair_iterations_until_green_mean, + current.repair_iterations_until_green_mean, + MAX_REPAIR_ITERATIONS_REGRESSION, + ); + push_relative_regression( + &mut violations, + "tokens_per_successful_fix", + baseline.tokens_per_successful_fix, + current.tokens_per_successful_fix, + MAX_TOKENS_PER_FIX_REGRESSION, + ); + push_relative_regression( + &mut violations, + "turn_latency_p50_ms", + baseline.turn_latency_p50_ms, + current.turn_latency_p50_ms, + MAX_TURN_LATENCY_P50_REGRESSION, + ); + + violations +} + +fn push_relative_regression( + violations: &mut Vec, + metric: &'static str, + baseline: f64, + current: f64, + limit: f64, +) { + if baseline <= 0.0 { + // No baseline signal — cannot compute a relative delta. + return; + } + let delta = (current - baseline) / baseline; + if delta.partial_cmp(&limit) == Some(Ordering::Greater) { + violations.push(BudgetViolation { + metric, + baseline, + current, + limit, + actual: delta, + }); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn vs(phase: VerifierPhase, turn: u64, iter: u32, duration: u64, ok: bool) -> VerifierSample { + VerifierSample { + phase, + turn_index: turn, + iteration: iter, + duration_ms: duration, + succeeded: ok, + } + } + + fn ts(turn: u64, tokens: u64, latency: u64, ok: bool) -> TurnSample { + TurnSample { + turn_index: turn, + tokens_total: tokens, + turn_latency_ms: latency, + successful_fix: ok, + } + } + + #[test] + fn empty_inputs_produce_zeroed_metrics() { + let m = aggregate(&[], &[]); + assert!(m.quick_verify_latency_p50_ms.abs() < f64::EPSILON); + assert!(m.final_gate_pass_rate.abs() < f64::EPSILON); + assert!(m.tokens_per_successful_fix.abs() < f64::EPSILON); + assert_eq!(m.samples.turns, 0); + } + + #[test] + fn aggregates_basic_signals() { + let verifier = vec![ + vs(VerifierPhase::Quick, 0, 1, 10, true), + vs(VerifierPhase::Quick, 0, 2, 30, true), + vs(VerifierPhase::Final, 0, 1, 100, false), + vs(VerifierPhase::Final, 0, 2, 120, true), + vs(VerifierPhase::Final, 1, 1, 150, true), + ]; + let turns = vec![ts(0, 1_000, 500, true), ts(1, 500, 400, true)]; + let m = aggregate(&verifier, &turns); + assert!((m.final_gate_pass_rate - 2.0 / 3.0).abs() < 1e-9); + assert!((m.repair_iterations_until_green_mean - 0.5).abs() < 1e-9); + assert!((m.tokens_per_successful_fix - 750.0).abs() < 1e-9); + assert_eq!(m.samples.successful_fixes, 2); + } + + #[test] + fn pass_rate_regression_over_1pp_is_a_violation() { + let baseline = baseline(); + let mut current = baseline; + current.final_gate_pass_rate = 0.89; // dropped 2pp from 0.91 + let violations = evaluate_budget_gates(&baseline, ¤t); + assert!(violations + .iter() + .any(|v| v.metric == "final_gate_pass_rate")); + } + + #[test] + fn pass_rate_regression_under_1pp_is_ok() { + let baseline = baseline(); + let mut current = baseline; + current.final_gate_pass_rate = 0.905; // dropped 0.5pp + let violations = evaluate_budget_gates(&baseline, ¤t); + assert!(violations.is_empty()); + } + + #[test] + fn repair_regression_over_5pct_is_a_violation() { + let baseline = baseline(); + let mut current = baseline; + current.repair_iterations_until_green_mean = 2.2; // 10% up from 2.0 + let violations = evaluate_budget_gates(&baseline, ¤t); + assert!(violations + .iter() + .any(|v| v.metric == "repair_iterations_until_green_mean")); + } + + #[test] + fn tokens_regression_over_10pct_is_a_violation() { + let baseline = baseline(); + let mut current = baseline; + current.tokens_per_successful_fix = 1_200.0; // 20% up from 1000 + let violations = evaluate_budget_gates(&baseline, ¤t); + assert!(violations + .iter() + .any(|v| v.metric == "tokens_per_successful_fix")); + } + + #[test] + fn turn_latency_regression_over_15pct_is_a_violation() { + let baseline = baseline(); + let mut current = baseline; + current.turn_latency_p50_ms = 1_200.0; // 20% up from 1000 + let violations = evaluate_budget_gates(&baseline, ¤t); + assert!(violations.iter().any(|v| v.metric == "turn_latency_p50_ms")); + } + + #[test] + fn flat_metrics_emit_no_violations() { + let baseline = baseline(); + let current = baseline; + assert!(evaluate_budget_gates(&baseline, ¤t).is_empty()); + } + + fn verifier_trace( + name: &str, + phase: &str, + mutation_sequence: u64, + duration_ms: u64, + passed: bool, + ) -> SessionTraceRecord { + let mut attrs = serde_json::Map::new(); + attrs.insert("phase".into(), serde_json::Value::String(phase.into())); + attrs.insert( + "mutation_sequence".into(), + serde_json::Value::from(mutation_sequence), + ); + attrs.insert("duration_ms".into(), serde_json::Value::from(duration_ms)); + attrs.insert("passed".into(), serde_json::Value::Bool(passed)); + SessionTraceRecord { + session_id: "sess".into(), + sequence: 0, + name: name.into(), + timestamp_ms: 0, + attributes: attrs, + } + } + + fn turn_trace(tokens: u64, latency: u64, passed: bool) -> SessionTraceRecord { + let mut attrs = serde_json::Map::new(); + attrs.insert("tokens_total".into(), serde_json::Value::from(tokens)); + attrs.insert("turn_latency_ms".into(), serde_json::Value::from(latency)); + attrs.insert( + "verification_gate_passed".into(), + serde_json::Value::Bool(passed), + ); + SessionTraceRecord { + session_id: "sess".into(), + sequence: 0, + name: "turn_completed".into(), + timestamp_ms: 0, + attributes: attrs, + } + } + + #[test] + fn samples_from_traces_extracts_verifier_and_turn_records() { + let traces = vec![ + verifier_trace("verifier_ran", "quick", 0, 50, true), + verifier_trace("verifier_ran", "final", 0, 300, false), + verifier_trace("verifier_ran", "final", 0, 320, true), + turn_trace(1_000, 500, true), + verifier_trace("verifier_ran", "final", 1, 180, true), + turn_trace(500, 400, true), + verifier_trace("unrelated_event", "quick", 0, 0, true), + ]; + let (verifier, turns) = samples_from_traces(&traces); + assert_eq!(verifier.len(), 4, "unrelated events ignored"); + assert_eq!(turns.len(), 2); + let metrics = aggregate(&verifier, &turns); + assert_eq!(metrics.samples.final_samples, 3); + assert!((metrics.tokens_per_successful_fix - 750.0).abs() < 1e-9); + } + + #[test] + fn samples_from_traces_handles_missing_attributes() { + let record = SessionTraceRecord { + session_id: "sess".into(), + sequence: 0, + name: "verifier_ran".into(), + timestamp_ms: 0, + attributes: serde_json::Map::new(), + }; + let (verifier, turns) = samples_from_traces(&[record]); + assert_eq!(verifier.len(), 1); + assert_eq!(turns.len(), 0); + assert_eq!(verifier[0].duration_ms, 0); + assert!(!verifier[0].succeeded); + } + + fn baseline() -> RolloutMetrics { + RolloutMetrics { + quick_verify_latency_p50_ms: 100.0, + quick_verify_latency_p95_ms: 400.0, + final_gate_pass_rate: 0.91, + repair_iterations_until_green_mean: 2.0, + tokens_per_successful_fix: 1_000.0, + turn_latency_p50_ms: 1_000.0, + turn_latency_mean_ms: 1_100.0, + samples: AggregateCounts { + quick_samples: 10, + final_samples: 10, + turns: 10, + successful_fixes: 9, + }, + } + } +} diff --git a/rust/crates/runtime/src/session.rs b/rust/crates/runtime/src/session.rs index 7bf7bdb794..e12258d3d8 100644 --- a/rust/crates/runtime/src/session.rs +++ b/rust/crates/runtime/src/session.rs @@ -28,6 +28,34 @@ pub enum MessageRole { /// Structured message content stored inside a [`Session`]. #[derive(Debug, Clone, PartialEq, Eq)] +pub struct VerificationPrimaryFailureBlock { + pub label: String, + pub status: VerificationStatus, + pub failure_kind: Option, + pub output_excerpt: String, + pub step_kind: Option, + pub target_scope: Option, + pub package_name: Option, + pub package_manager: Option, + pub launcher_kind: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct VerificationCompactStepBlock { + pub label: String, + pub status: VerificationStatus, + pub failure_kind: Option, + pub duration_ms: u64, + pub step_kind: Option, + pub target_scope: Option, + pub package_name: Option, + pub package_manager: Option, + pub launcher_kind: Option, +} + +/// Structured message content stored inside a [`Session`]. +#[derive(Debug, Clone, PartialEq, Eq)] +#[allow(clippy::large_enum_variant)] // VerificationReport is intentionally structured; boxing would churn call sites pub enum ContentBlock { Text { text: String, @@ -48,6 +76,12 @@ pub enum ContentBlock { phase: VerificationPhase, status: VerificationStatus, summary_text: String, + adapter_id: Option, + project_root: Option, + touched_paths: Vec, + primary_failure: Option, + steps: Vec, + report_mode: Option, }, } @@ -677,7 +711,7 @@ impl ConversationMessage { } #[must_use] - pub fn verification_report(report: &VerificationReport) -> Self { + pub fn verification_report(report: &VerificationReport, report_mode: Option<&str>) -> Self { Self { role: MessageRole::Verification, blocks: vec![ContentBlock::VerificationReport { @@ -685,6 +719,42 @@ impl ConversationMessage { phase: report.phase, status: report.status, summary_text: report.summary_text.clone(), + adapter_id: Some(report.adapter_id.clone()), + project_root: Some(report.project_root.display().to_string()), + touched_paths: report + .touched_paths + .iter() + .map(|path| path.display().to_string()) + .collect(), + primary_failure: report.primary_step().and_then(|step| { + (!step.status.is_success()).then(|| VerificationPrimaryFailureBlock { + label: step.label.clone(), + status: step.status, + failure_kind: step.failure_kind.map(|kind| kind.as_str().to_string()), + output_excerpt: step.truncated_output.clone(), + step_kind: step.step_kind.clone(), + target_scope: step.target_scope.clone(), + package_name: step.package_name.clone(), + package_manager: step.package_manager.clone(), + launcher_kind: step.launcher_kind.clone(), + }) + }), + steps: report + .steps + .iter() + .map(|step| VerificationCompactStepBlock { + label: step.label.clone(), + status: step.status, + failure_kind: step.failure_kind.map(|kind| kind.as_str().to_string()), + duration_ms: step.duration_ms, + step_kind: step.step_kind.clone(), + target_scope: step.target_scope.clone(), + package_name: step.package_name.clone(), + package_manager: step.package_manager.clone(), + launcher_kind: step.launcher_kind.clone(), + }) + .collect(), + report_mode: report_mode.map(ToOwned::to_owned), }], usage: None, } @@ -754,6 +824,7 @@ impl ConversationMessage { impl ContentBlock { #[must_use] + #[allow(clippy::too_many_lines)] pub fn to_json(&self) -> JsonValue { let mut object = BTreeMap::new(); match self { @@ -796,6 +867,12 @@ impl ContentBlock { phase, status, summary_text, + adapter_id, + project_root, + touched_paths, + primary_failure, + steps, + report_mode, } => { object.insert( "type".to_string(), @@ -817,6 +894,49 @@ impl ContentBlock { "summary_text".to_string(), JsonValue::String(summary_text.clone()), ); + if let Some(adapter_id) = adapter_id { + object.insert( + "adapter_id".to_string(), + JsonValue::String(adapter_id.clone()), + ); + } + if let Some(project_root) = project_root { + object.insert( + "project_root".to_string(), + JsonValue::String(project_root.clone()), + ); + } + if !touched_paths.is_empty() { + object.insert( + "touched_paths".to_string(), + JsonValue::Array( + touched_paths + .iter() + .map(|path| JsonValue::String(path.clone())) + .collect(), + ), + ); + } + if let Some(primary_failure) = primary_failure { + object.insert("primary_failure".to_string(), primary_failure.to_json()); + } + if !steps.is_empty() { + object.insert( + "steps".to_string(), + JsonValue::Array( + steps + .iter() + .map(VerificationCompactStepBlock::to_json) + .collect(), + ), + ); + } + if let Some(report_mode) = report_mode { + object.insert( + "report_mode".to_string(), + JsonValue::String(report_mode.clone()), + ); + } } } JsonValue::Object(object) @@ -853,6 +973,25 @@ impl ContentBlock { phase: parse_verification_phase(&required_string(object, "phase")?)?, status: parse_verification_status(&required_string(object, "status")?)?, summary_text: required_string(object, "summary_text")?, + adapter_id: optional_string(object, "adapter_id"), + project_root: optional_string(object, "project_root"), + touched_paths: optional_string_array(object, "touched_paths")?, + primary_failure: object + .get("primary_failure") + .map(VerificationPrimaryFailureBlock::from_json) + .transpose()?, + steps: object + .get("steps") + .and_then(JsonValue::as_array) + .map(|steps| { + steps + .iter() + .map(VerificationCompactStepBlock::from_json) + .collect::, _>>() + }) + .transpose()? + .unwrap_or_default(), + report_mode: optional_string(object, "report_mode"), }), other => Err(SessionError::Format(format!( "unsupported block type: {other}" @@ -861,6 +1000,92 @@ impl ContentBlock { } } +impl VerificationPrimaryFailureBlock { + fn to_json(&self) -> JsonValue { + let mut object = BTreeMap::new(); + object.insert("label".to_string(), JsonValue::String(self.label.clone())); + object.insert( + "status".to_string(), + JsonValue::String(self.status.as_str().to_string()), + ); + object.insert( + "output_excerpt".to_string(), + JsonValue::String(self.output_excerpt.clone()), + ); + insert_optional_string(&mut object, "failure_kind", self.failure_kind.as_ref()); + insert_optional_string(&mut object, "step_kind", self.step_kind.as_ref()); + insert_optional_string(&mut object, "target_scope", self.target_scope.as_ref()); + insert_optional_string(&mut object, "package_name", self.package_name.as_ref()); + insert_optional_string( + &mut object, + "package_manager", + self.package_manager.as_ref(), + ); + insert_optional_string(&mut object, "launcher_kind", self.launcher_kind.as_ref()); + JsonValue::Object(object) + } + + fn from_json(value: &JsonValue) -> Result { + let object = value + .as_object() + .ok_or_else(|| SessionError::Format("primary_failure must be an object".to_string()))?; + Ok(Self { + label: required_string(object, "label")?, + status: parse_verification_status(&required_string(object, "status")?)?, + failure_kind: optional_string(object, "failure_kind"), + output_excerpt: required_string(object, "output_excerpt")?, + step_kind: optional_string(object, "step_kind"), + target_scope: optional_string(object, "target_scope"), + package_name: optional_string(object, "package_name"), + package_manager: optional_string(object, "package_manager"), + launcher_kind: optional_string(object, "launcher_kind"), + }) + } +} + +impl VerificationCompactStepBlock { + fn to_json(&self) -> JsonValue { + let mut object = BTreeMap::new(); + object.insert("label".to_string(), JsonValue::String(self.label.clone())); + object.insert( + "status".to_string(), + JsonValue::String(self.status.as_str().to_string()), + ); + object.insert( + "duration_ms".to_string(), + JsonValue::Number(i64::try_from(self.duration_ms).unwrap_or(i64::MAX)), + ); + insert_optional_string(&mut object, "failure_kind", self.failure_kind.as_ref()); + insert_optional_string(&mut object, "step_kind", self.step_kind.as_ref()); + insert_optional_string(&mut object, "target_scope", self.target_scope.as_ref()); + insert_optional_string(&mut object, "package_name", self.package_name.as_ref()); + insert_optional_string( + &mut object, + "package_manager", + self.package_manager.as_ref(), + ); + insert_optional_string(&mut object, "launcher_kind", self.launcher_kind.as_ref()); + JsonValue::Object(object) + } + + fn from_json(value: &JsonValue) -> Result { + let object = value.as_object().ok_or_else(|| { + SessionError::Format("verification step must be an object".to_string()) + })?; + Ok(Self { + label: required_string(object, "label")?, + status: parse_verification_status(&required_string(object, "status")?)?, + failure_kind: optional_string(object, "failure_kind"), + duration_ms: required_u64(object, "duration_ms")?, + step_kind: optional_string(object, "step_kind"), + target_scope: optional_string(object, "target_scope"), + package_name: optional_string(object, "package_name"), + package_manager: optional_string(object, "package_manager"), + launcher_kind: optional_string(object, "launcher_kind"), + }) + } +} + impl SessionCompaction { pub fn to_json(&self) -> Result { let mut object = BTreeMap::new(); @@ -1027,6 +1252,43 @@ fn required_string( .ok_or_else(|| SessionError::Format(format!("missing {key}"))) } +fn optional_string(object: &BTreeMap, key: &str) -> Option { + object + .get(key) + .and_then(JsonValue::as_str) + .map(ToOwned::to_owned) +} + +fn optional_string_array( + object: &BTreeMap, + key: &str, +) -> Result, SessionError> { + let Some(value) = object.get(key) else { + return Ok(Vec::new()); + }; + let array = value + .as_array() + .ok_or_else(|| SessionError::Format(format!("{key} must be an array")))?; + array + .iter() + .map(|item| { + item.as_str() + .map(ToOwned::to_owned) + .ok_or_else(|| SessionError::Format(format!("{key} entries must be strings"))) + }) + .collect() +} + +fn insert_optional_string( + object: &mut BTreeMap, + key: &str, + value: Option<&String>, +) { + if let Some(value) = value { + object.insert(key.to_string(), JsonValue::String(value.clone())); + } +} + fn parse_verification_phase(value: &str) -> Result { match value { "quick" => Ok(VerificationPhase::Quick), @@ -1227,6 +1489,10 @@ mod tests { }; use crate::json::JsonValue; use crate::usage::TokenUsage; + use crate::verifier::{ + VerificationFailureKind, VerificationPhase, VerificationReport, VerificationStatus, + VerificationStepReport, + }; use std::fs; use std::path::{Path, PathBuf}; use std::time::{SystemTime, UNIX_EPOCH}; @@ -1529,6 +1795,140 @@ mod tests { assert_eq!(forked.workspace_root(), Some(workspace_root.as_path())); } + #[test] + fn verification_report_round_trips_structured_fields() { + let report = sample_verification_report(); + let message = ConversationMessage::verification_report(&report, Some("typed-primary")); + + let json = message.to_json(); + let restored = ConversationMessage::from_json(&json).expect("message should parse"); + + let ContentBlock::VerificationReport { + adapter_id, + project_root, + touched_paths, + primary_failure, + steps, + report_mode, + .. + } = &restored.blocks[0] + else { + panic!("expected verification report block"); + }; + assert_eq!(adapter_id.as_deref(), Some("rust-cargo")); + assert_eq!(project_root.as_deref(), Some("/tmp/demo-workspace")); + assert_eq!(touched_paths, &vec!["src/lib.rs".to_string()]); + assert_eq!(report_mode.as_deref(), Some("typed-primary")); + let primary_failure = primary_failure + .as_ref() + .expect("primary failure should be persisted"); + assert_eq!(primary_failure.failure_kind.as_deref(), Some("code")); + assert_eq!(primary_failure.step_kind.as_deref(), Some("cargo_clippy")); + assert_eq!(primary_failure.target_scope.as_deref(), Some("package")); + assert_eq!(primary_failure.package_name.as_deref(), Some("demo")); + assert_eq!(steps.len(), 2); + assert_eq!(steps[1].failure_kind.as_deref(), Some("code")); + assert_eq!(steps[1].package_manager, None); + } + + #[test] + fn verification_report_from_json_accepts_legacy_payload_without_structured_fields() { + let block = JsonValue::Object( + [ + ( + "type".to_string(), + JsonValue::String("verification_report".to_string()), + ), + ( + "report_id".to_string(), + JsonValue::String("legacy-report".to_string()), + ), + ("phase".to_string(), JsonValue::String("quick".to_string())), + ( + "status".to_string(), + JsonValue::String("failed".to_string()), + ), + ( + "summary_text".to_string(), + JsonValue::String("legacy verifier summary".to_string()), + ), + ] + .into_iter() + .collect(), + ); + + let parsed = ContentBlock::from_json(&block).expect("legacy payload should parse"); + + let ContentBlock::VerificationReport { + report_id, + summary_text, + adapter_id, + project_root, + touched_paths, + primary_failure, + steps, + report_mode, + .. + } = parsed + else { + panic!("expected verification report block"); + }; + assert_eq!(report_id, "legacy-report"); + assert_eq!(summary_text, "legacy verifier summary"); + assert_eq!(adapter_id, None); + assert_eq!(project_root, None); + assert!(touched_paths.is_empty()); + assert_eq!(primary_failure, None); + assert!(steps.is_empty()); + assert_eq!(report_mode, None); + } + + fn sample_verification_report() -> VerificationReport { + VerificationReport { + report_id: "report-structured".to_string(), + phase: VerificationPhase::Quick, + adapter_id: "rust-cargo".to_string(), + project_root: PathBuf::from("/tmp/demo-workspace"), + touched_paths: vec![PathBuf::from("src/lib.rs")], + status: VerificationStatus::Failed, + summary_text: "[verifier:quick:rust-cargo] failed (/tmp/demo-workspace)".to_string(), + steps: vec![ + VerificationStepReport { + adapter: "rust-cargo".to_string(), + project_root: PathBuf::from("/tmp/demo-workspace"), + label: "cargo check".to_string(), + command: "cargo check -p demo".to_string(), + phase: VerificationPhase::Quick, + status: VerificationStatus::Passed, + failure_kind: None, + duration_ms: 11, + truncated_output: "ok".to_string(), + step_kind: Some("cargo_check".to_string()), + target_scope: Some("package".to_string()), + package_name: Some("demo".to_string()), + package_manager: None, + launcher_kind: None, + }, + VerificationStepReport { + adapter: "rust-cargo".to_string(), + project_root: PathBuf::from("/tmp/demo-workspace"), + label: "cargo clippy".to_string(), + command: "cargo clippy -p demo -- -D warnings".to_string(), + phase: VerificationPhase::Quick, + status: VerificationStatus::Failed, + failure_kind: Some(VerificationFailureKind::Code), + duration_ms: 22, + truncated_output: "error[E0308]: mismatched types".to_string(), + step_kind: Some("cargo_clippy".to_string()), + target_scope: Some("package".to_string()), + package_name: Some("demo".to_string()), + package_manager: None, + launcher_kind: None, + }, + ], + } + } + fn temp_session_path(label: &str) -> PathBuf { let nanos = SystemTime::now() .duration_since(UNIX_EPOCH) diff --git a/rust/crates/runtime/src/verifier.rs b/rust/crates/runtime/src/verifier.rs index 1043f2c66a..9fcdc53bd4 100644 --- a/rust/crates/runtime/src/verifier.rs +++ b/rust/crates/runtime/src/verifier.rs @@ -166,6 +166,11 @@ pub struct VerificationStepReport { pub failure_kind: Option, pub duration_ms: u64, pub truncated_output: String, + pub step_kind: Option, + pub target_scope: Option, + pub package_name: Option, + pub package_manager: Option, + pub launcher_kind: Option, } /// Structured output of a full verification pass for one adapter/root pair. @@ -202,6 +207,72 @@ impl VerificationReport { first.to_string() } } + + #[must_use] + pub fn primary_step(&self) -> Option<&VerificationStepReport> { + self.steps + .iter() + .find(|step| !step.status.is_success()) + .or_else(|| self.steps.first()) + } + + #[must_use] + pub fn primary_failure_kind(&self) -> Option { + self.primary_step().and_then(|step| step.failure_kind) + } + + #[must_use] + pub fn compact_payload(&self) -> Value { + let steps = self + .steps + .iter() + .map(|step| { + serde_json::json!({ + "label": step.label, + "status": step.status.as_str(), + "failure_kind": step.failure_kind.map(VerificationFailureKind::as_str), + "duration_ms": step.duration_ms, + "step_kind": step.step_kind, + "target_scope": step.target_scope, + "package_name": step.package_name, + "package_manager": step.package_manager, + "launcher_kind": step.launcher_kind, + }) + }) + .collect::>(); + let primary_failure = self.primary_step().and_then(|step| { + (!step.status.is_success()).then(|| { + serde_json::json!({ + "label": step.label, + "status": step.status.as_str(), + "failure_kind": step.failure_kind.map(VerificationFailureKind::as_str), + "output_excerpt": truncate_output(&step.truncated_output, 512), + "step_kind": step.step_kind, + "target_scope": step.target_scope, + "package_name": step.package_name, + "package_manager": step.package_manager, + "launcher_kind": step.launcher_kind, + }) + }) + }); + + serde_json::json!({ + "type": "verification_report", + "report_id": self.report_id, + "adapter_id": self.adapter_id, + "phase": self.phase.as_str(), + "status": self.status.as_str(), + "project_root": self.project_root.display().to_string(), + "touched_paths": self + .touched_paths + .iter() + .map(|path| path.display().to_string()) + .collect::>(), + "primary_failure": primary_failure, + "steps": steps, + "summary_text": self.short_summary(), + }) + } } /// Status of the staged final gate for the completed turn. @@ -238,6 +309,7 @@ pub trait Verifier: Send { #[derive(Debug, Clone, PartialEq, Eq)] pub struct CargoVerifierConfig { pub legacy_mode: bool, + pub auto_mode: bool, pub quick_on_write: bool, pub final_gate: bool, pub max_output_bytes: usize, @@ -256,6 +328,7 @@ impl Default for CargoVerifierConfig { fn default() -> Self { Self { legacy_mode: true, + auto_mode: false, quick_on_write: true, final_gate: false, max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES, @@ -298,8 +371,20 @@ impl Verifier for CargoVerifier { return Vec::new(); }; - for adapter in [Adapter::Rust, Adapter::NodeTypeScript, Adapter::Python] { + let adapters: Vec = if self.config.auto_mode { + match detect_adapter_by_marker(path) { + Some(adapter) => vec![adapter], + None => return Vec::new(), + } + } else { + vec![Adapter::Rust, Adapter::NodeTypeScript, Adapter::Python] + }; + + for adapter in adapters { if let Some(report) = adapter.quick_verify(path, context, &self.config) { + if self.config.auto_mode && report.steps.is_empty() { + continue; + } return vec![report]; } } @@ -396,6 +481,58 @@ enum PackageManager { Bun, } +impl PackageManager { + #[must_use] + fn as_str(self) -> &'static str { + match self { + Self::Npm => "npm", + Self::Pnpm => "pnpm", + Self::Yarn => "yarn", + Self::Bun => "bun", + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum VerificationTargetScope { + Workspace, + Package, + FileSet, + Project, +} + +impl VerificationTargetScope { + #[must_use] + fn as_str(self) -> &'static str { + match self { + Self::Workspace => "workspace", + Self::Package => "package", + Self::FileSet => "file_set", + Self::Project => "project", + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum RustStepKind { + Check, + Clippy, + FmtCheck, + Test, +} + +impl RustStepKind { + #[must_use] + fn as_str(self) -> &'static str { + match self { + Self::Check => "cargo_check", + Self::Clippy => "cargo_clippy", + Self::FmtCheck => "cargo_fmt_check", + Self::Test => "cargo_test", + } + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum PythonLauncherKind { Uv, @@ -404,11 +541,43 @@ enum PythonLauncherKind { Global, } +impl PythonLauncherKind { + #[must_use] + fn as_str(self) -> &'static str { + match self { + Self::Uv => "uv", + Self::Poetry => "poetry", + Self::Venv => "venv", + Self::Global => "global", + } + } +} + #[derive(Debug, Clone, PartialEq, Eq)] struct PythonRunner { command_prefix: Vec, } +#[allow(clippy::struct_excessive_bools)] +#[derive(Debug, Clone, PartialEq, Eq)] +struct RustProjectProfile { + project_root: PathBuf, + manifest_path: PathBuf, + package_name: Option, + manifest_parsed: bool, + manifest_parse_error: Option, +} + +#[allow(clippy::struct_excessive_bools)] +#[derive(Debug, Clone, PartialEq, Eq)] +struct NodeProjectProfile { + project_root: PathBuf, + package_json_path: PathBuf, + package_value: Value, + package_manager: PackageManager, + package_name: Option, +} + #[allow(clippy::struct_excessive_bools)] #[derive(Debug, Clone, PartialEq, Eq)] struct PythonProjectProfile { @@ -433,32 +602,134 @@ enum PythonStepKind { PyCompile, } +impl PythonStepKind { + #[must_use] + fn as_str(self) -> &'static str { + match self { + Self::RuffCheck => "ruff_check", + Self::Mypy => "mypy", + Self::Pytest => "pytest", + Self::PyCompile => "py_compile", + } + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum NodeStepKind { + Typecheck, + TscNoEmit, + Lint, + Eslint, + Test, +} + +impl NodeStepKind { + #[must_use] + fn as_str(self) -> &'static str { + match self { + Self::Typecheck => "typecheck", + Self::TscNoEmit => "tsc_no_emit", + Self::Lint => "lint", + Self::Eslint => "eslint", + Self::Test => "test", + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] enum StepDiagnostics { - Generic, + Rust { + step_kind: RustStepKind, + target_scope: VerificationTargetScope, + package_name: Option, + }, + NodeTypeScript { + step_kind: NodeStepKind, + target_scope: VerificationTargetScope, + package_manager: PackageManager, + package_name: Option, + }, Python { launcher_kind: PythonLauncherKind, step_kind: PythonStepKind, + target_scope: VerificationTargetScope, }, } +impl StepDiagnostics { + #[must_use] + #[allow(clippy::unnecessary_wraps)] // feeds Option field for session compat + fn step_kind(&self) -> Option { + match self { + Self::Rust { step_kind, .. } => Some(step_kind.as_str().to_string()), + Self::NodeTypeScript { step_kind, .. } => Some(step_kind.as_str().to_string()), + Self::Python { step_kind, .. } => Some(step_kind.as_str().to_string()), + } + } + + #[must_use] + #[allow(clippy::unnecessary_wraps)] // feeds Option field for session compat + fn target_scope(&self) -> Option { + match self { + Self::Rust { target_scope, .. } + | Self::NodeTypeScript { target_scope, .. } + | Self::Python { target_scope, .. } => Some(target_scope.as_str().to_string()), + } + } + + #[must_use] + fn package_name(&self) -> Option { + match self { + Self::Rust { package_name, .. } | Self::NodeTypeScript { package_name, .. } => { + package_name.clone() + } + Self::Python { .. } => None, + } + } + + #[must_use] + fn package_manager(&self) -> Option { + match self { + Self::NodeTypeScript { + package_manager, .. + } => Some(package_manager.as_str().to_string()), + Self::Rust { .. } | Self::Python { .. } => None, + } + } + + #[must_use] + fn launcher_kind(&self) -> Option { + match self { + Self::Python { launcher_kind, .. } => Some(launcher_kind.as_str().to_string()), + Self::Rust { .. } | Self::NodeTypeScript { .. } => None, + } + } +} + fn verify_rust( path: &Path, context: &VerificationContext, config: &CargoVerifierConfig, ) -> Option { - let manifest = nearest_file(path, "Cargo.toml")?; - let project_root = manifest.parent()?.to_path_buf(); + let profile = build_rust_profile_for_path(path)?; + if let Some(report) = rust_config_failure_report( + &profile, + context.phase, + context.touched_paths.clone(), + config.max_output_bytes, + ) { + return Some(report); + } let phase = context.phase; let steps = if config.legacy_mode { - rust_legacy_steps(config) + rust_legacy_steps(config, profile.package_name.clone()) } else if phase == VerificationPhase::Quick { - rust_quick_steps(config) + rust_quick_steps(config, profile.package_name.clone()) } else { - rust_final_steps(config) + rust_final_steps(config, profile.package_name.clone()) }; Some(run_rust_steps( - &project_root, + &profile.project_root, context.touched_paths.clone(), phase, steps, @@ -467,10 +738,14 @@ fn verify_rust( } fn finalize_rust(target: &VerificationTarget, config: &CargoVerifierConfig) -> VerificationReport { + let profile = build_rust_profile_for_root(&target.project_root); + let package_name = profile + .as_ref() + .and_then(|profile| profile.package_name.clone()); let steps = if config.legacy_mode { - rust_legacy_steps(config) + rust_legacy_steps(config, package_name.clone()) } else { - rust_final_steps(config) + rust_final_steps(config, package_name) }; run_rust_steps( &target.project_root, @@ -499,24 +774,42 @@ fn run_rust_steps( ) } -fn rust_quick_steps(config: &CargoVerifierConfig) -> Vec { +fn rust_quick_steps( + config: &CargoVerifierConfig, + package_name: Option, +) -> Vec { if config.quick_on_write && config.rust_check { + let mut command = vec![ + "cargo".to_string(), + "check".to_string(), + "--quiet".to_string(), + "--message-format=short".to_string(), + ]; + let target_scope = if let Some(package_name) = package_name.clone() { + command.extend(["-p".to_string(), package_name.clone()]); + VerificationTargetScope::Package + } else { + VerificationTargetScope::Workspace + }; vec![PlannedStep { label: "cargo check".to_string(), - command: vec![ - "cargo".to_string(), - "check".to_string(), - "--quiet".to_string(), - "--message-format=short".to_string(), - ], - diagnostics: StepDiagnostics::Generic, + command, + diagnostics: StepDiagnostics::Rust { + step_kind: RustStepKind::Check, + target_scope, + package_name, + }, }] } else { Vec::new() } } -fn rust_final_steps(config: &CargoVerifierConfig) -> Vec { +#[allow(clippy::needless_pass_by_value)] // signature matches sibling step builders +fn rust_final_steps( + config: &CargoVerifierConfig, + package_name: Option, +) -> Vec { let mut steps = Vec::new(); if config.rust_fmt { steps.push(PlannedStep { @@ -527,54 +820,90 @@ fn rust_final_steps(config: &CargoVerifierConfig) -> Vec { "--".to_string(), "--check".to_string(), ], - diagnostics: StepDiagnostics::Generic, + diagnostics: StepDiagnostics::Rust { + step_kind: RustStepKind::FmtCheck, + target_scope: VerificationTargetScope::Workspace, + package_name: package_name.clone(), + }, }); } if config.rust_clippy { + let mut command = vec![ + "cargo".to_string(), + "clippy".to_string(), + "--quiet".to_string(), + "--message-format=short".to_string(), + ]; + let target_scope = if let Some(package_name) = package_name.clone() { + command.extend(["-p".to_string(), package_name.clone()]); + VerificationTargetScope::Package + } else { + VerificationTargetScope::Workspace + }; + command.extend(["--".to_string(), "-D".to_string(), "warnings".to_string()]); steps.push(PlannedStep { label: "cargo clippy".to_string(), - command: vec![ - "cargo".to_string(), - "clippy".to_string(), - "--quiet".to_string(), - "--message-format=short".to_string(), - "--".to_string(), - "-D".to_string(), - "warnings".to_string(), - ], - diagnostics: StepDiagnostics::Generic, + command, + diagnostics: StepDiagnostics::Rust { + step_kind: RustStepKind::Clippy, + target_scope, + package_name: package_name.clone(), + }, }); } if config.rust_test { + let mut command = vec![ + "cargo".to_string(), + "test".to_string(), + "--quiet".to_string(), + "--no-fail-fast".to_string(), + ]; + let target_scope = if let Some(package_name) = package_name.clone() { + command.extend(["-p".to_string(), package_name.clone()]); + VerificationTargetScope::Package + } else { + VerificationTargetScope::Workspace + }; steps.push(PlannedStep { label: "cargo test".to_string(), - command: vec![ - "cargo".to_string(), - "test".to_string(), - "--quiet".to_string(), - "--no-fail-fast".to_string(), - ], - diagnostics: StepDiagnostics::Generic, + command, + diagnostics: StepDiagnostics::Rust { + step_kind: RustStepKind::Test, + target_scope, + package_name: package_name.clone(), + }, }); } steps } -fn rust_legacy_steps(config: &CargoVerifierConfig) -> Vec { - let mut steps = rust_quick_steps(config); +fn rust_legacy_steps( + config: &CargoVerifierConfig, + package_name: Option, +) -> Vec { + let mut steps = rust_quick_steps(config, package_name.clone()); if config.rust_clippy { + let mut command = vec![ + "cargo".to_string(), + "clippy".to_string(), + "--quiet".to_string(), + "--message-format=short".to_string(), + ]; + let target_scope = if let Some(package_name) = package_name.clone() { + command.extend(["-p".to_string(), package_name.clone()]); + VerificationTargetScope::Package + } else { + VerificationTargetScope::Workspace + }; + command.extend(["--".to_string(), "-D".to_string(), "warnings".to_string()]); steps.push(PlannedStep { label: "cargo clippy".to_string(), - command: vec![ - "cargo".to_string(), - "clippy".to_string(), - "--quiet".to_string(), - "--message-format=short".to_string(), - "--".to_string(), - "-D".to_string(), - "warnings".to_string(), - ], - diagnostics: StepDiagnostics::Generic, + command, + diagnostics: StepDiagnostics::Rust { + step_kind: RustStepKind::Clippy, + target_scope, + package_name: package_name.clone(), + }, }); } if config.rust_fmt { @@ -586,19 +915,34 @@ fn rust_legacy_steps(config: &CargoVerifierConfig) -> Vec { "--".to_string(), "--check".to_string(), ], - diagnostics: StepDiagnostics::Generic, + diagnostics: StepDiagnostics::Rust { + step_kind: RustStepKind::FmtCheck, + target_scope: VerificationTargetScope::Workspace, + package_name: package_name.clone(), + }, }); } if config.rust_test { + let mut command = vec![ + "cargo".to_string(), + "test".to_string(), + "--quiet".to_string(), + "--no-fail-fast".to_string(), + ]; + let target_scope = if let Some(package_name) = package_name.clone() { + command.extend(["-p".to_string(), package_name.clone()]); + VerificationTargetScope::Package + } else { + VerificationTargetScope::Workspace + }; steps.push(PlannedStep { label: "cargo test".to_string(), - command: vec![ - "cargo".to_string(), - "test".to_string(), - "--quiet".to_string(), - "--no-fail-fast".to_string(), - ], - diagnostics: StepDiagnostics::Generic, + command, + diagnostics: StepDiagnostics::Rust { + step_kind: RustStepKind::Test, + target_scope, + package_name, + }, }); } steps @@ -614,8 +958,8 @@ fn verify_node( } let package_json = nearest_file(path, "package.json")?; let project_root = package_json.parent()?.to_path_buf(); - let package_value = match load_node_package(&package_json) { - Ok(value) => value, + let profile = match build_node_profile_for_root(&project_root) { + Ok(profile) => profile?, Err(report) => { return Some(node_setup_failure_report( &project_root, @@ -627,21 +971,20 @@ fn verify_node( } }; let phase = context.phase; - let package_manager = detect_package_manager(&project_root); let steps = if config.legacy_mode { - node_legacy_steps(&project_root, &package_value, package_manager) + node_legacy_steps(&profile) } else if phase == VerificationPhase::Quick { if config.quick_on_write { - node_quick_steps(&project_root, &package_value, package_manager) + node_quick_steps(&profile) } else { Vec::new() } } else { - node_final_steps(&project_root, &package_value, package_manager) + node_final_steps(&profile) }; Some(run_planned_steps( "node-typescript", - &project_root, + &profile.project_root, context.touched_paths.clone(), phase, steps, @@ -657,9 +1000,8 @@ fn finalize_node( if !config.node_enabled { return None; } - let package_json = target.project_root.join("package.json"); - let package_value = match load_node_package(&package_json) { - Ok(value) => value, + let profile = match build_node_profile_for_root(&target.project_root) { + Ok(profile) => profile?, Err(report) => { return Some(node_setup_failure_report( &target.project_root, @@ -670,18 +1012,18 @@ fn finalize_node( )); } }; - let package_manager = detect_package_manager(&target.project_root); Some(run_planned_steps( "node-typescript", &target.project_root, target.touched_paths.clone(), VerificationPhase::Final, - node_final_steps(&target.project_root, &package_value, package_manager), + node_final_steps(&profile), config.node_timeout, config.max_output_bytes, )) } +#[derive(Debug)] struct NodeSetupFailure { label: String, kind: VerificationFailureKind, @@ -708,97 +1050,115 @@ fn node_setup_failure_report( failure: &NodeSetupFailure, max_output_bytes: usize, ) -> VerificationReport { + let status = if failure.kind == VerificationFailureKind::Config { + VerificationStatus::Failed + } else { + VerificationStatus::Unavailable + }; let steps = vec![VerificationStepReport { adapter: "node-typescript".to_string(), project_root: project_root.to_path_buf(), label: failure.label.clone(), command: project_root.join("package.json").display().to_string(), phase, - status: VerificationStatus::Unavailable, + status, failure_kind: Some(failure.kind), duration_ms: 0, truncated_output: truncate_output(&failure.message, max_output_bytes), + step_kind: None, + target_scope: Some(VerificationTargetScope::Package.as_str().to_string()), + package_name: None, + package_manager: None, + launcher_kind: None, }]; - let summary_text = render_report_summary( - "node-typescript", - project_root, - phase, - VerificationStatus::Unavailable, - &steps, - ); + let summary_text = + render_report_summary("node-typescript", project_root, phase, status, &steps); VerificationReport { report_id: next_report_id(), phase, adapter_id: "node-typescript".to_string(), project_root: project_root.to_path_buf(), touched_paths, - status: VerificationStatus::Unavailable, + status, summary_text, steps, } } -fn node_quick_steps( - root: &Path, - package_value: &Value, - manager: PackageManager, -) -> Vec { - if has_script(package_value, "typecheck") { +fn node_quick_steps(profile: &NodeProjectProfile) -> Vec { + if has_script(&profile.package_value, "typecheck") { return vec![PlannedStep { label: "typecheck".to_string(), - command: package_manager_run_script(manager, "typecheck"), - diagnostics: StepDiagnostics::Generic, + command: package_manager_run_script(profile.package_manager, "typecheck"), + diagnostics: StepDiagnostics::NodeTypeScript { + step_kind: NodeStepKind::Typecheck, + target_scope: VerificationTargetScope::Package, + package_manager: profile.package_manager, + package_name: profile.package_name.clone(), + }, }]; } - if root.join("tsconfig.json").is_file() { + if profile.project_root.join("tsconfig.json").is_file() { return vec![PlannedStep { label: "tsc --noEmit".to_string(), - command: package_manager_exec(manager, "tsc", &["--noEmit"]), - diagnostics: StepDiagnostics::Generic, + command: package_manager_exec(profile.package_manager, "tsc", &["--noEmit"]), + diagnostics: StepDiagnostics::NodeTypeScript { + step_kind: NodeStepKind::TscNoEmit, + target_scope: VerificationTargetScope::Package, + package_manager: profile.package_manager, + package_name: profile.package_name.clone(), + }, }]; } Vec::new() } -fn node_final_steps( - root: &Path, - package_value: &Value, - manager: PackageManager, -) -> Vec { +fn node_final_steps(profile: &NodeProjectProfile) -> Vec { let mut steps = Vec::new(); - if has_script(package_value, "lint") { + if has_script(&profile.package_value, "lint") { steps.push(PlannedStep { label: "lint".to_string(), - command: package_manager_run_script(manager, "lint"), - diagnostics: StepDiagnostics::Generic, + command: package_manager_run_script(profile.package_manager, "lint"), + diagnostics: StepDiagnostics::NodeTypeScript { + step_kind: NodeStepKind::Lint, + target_scope: VerificationTargetScope::Package, + package_manager: profile.package_manager, + package_name: profile.package_name.clone(), + }, }); } else if ESLINT_CONFIG_FILES .iter() - .any(|name| root.join(name).is_file()) + .any(|name| profile.project_root.join(name).is_file()) { steps.push(PlannedStep { label: "eslint .".to_string(), - command: package_manager_exec(manager, "eslint", &["."]), - diagnostics: StepDiagnostics::Generic, + command: package_manager_exec(profile.package_manager, "eslint", &["."]), + diagnostics: StepDiagnostics::NodeTypeScript { + step_kind: NodeStepKind::Eslint, + target_scope: VerificationTargetScope::Package, + package_manager: profile.package_manager, + package_name: profile.package_name.clone(), + }, }); } - if has_script(package_value, "test") { + if has_script(&profile.package_value, "test") { steps.push(PlannedStep { label: "test".to_string(), - command: package_manager_run_script(manager, "test"), - diagnostics: StepDiagnostics::Generic, + command: package_manager_run_script(profile.package_manager, "test"), + diagnostics: StepDiagnostics::NodeTypeScript { + step_kind: NodeStepKind::Test, + target_scope: VerificationTargetScope::Package, + package_manager: profile.package_manager, + package_name: profile.package_name.clone(), + }, }); } steps } -fn node_legacy_steps( - root: &Path, - package_value: &Value, - manager: PackageManager, -) -> Vec { - let mut steps = node_quick_steps(root, package_value, manager); - steps.extend(node_final_steps(root, package_value, manager)); +fn node_legacy_steps(profile: &NodeProjectProfile) -> Vec { + let mut steps = node_quick_steps(profile); + steps.extend(node_final_steps(profile)); steps } @@ -906,6 +1266,11 @@ fn python_config_failure_report( failure_kind: Some(VerificationFailureKind::Config), duration_ms: 0, truncated_output: truncate_output(error, max_output_bytes), + step_kind: None, + target_scope: Some(VerificationTargetScope::Project.as_str().to_string()), + package_name: None, + package_manager: None, + launcher_kind: Some(profile.launcher_kind.as_str().to_string()), }]; let summary_text = render_report_summary( "python", @@ -1039,6 +1404,14 @@ fn python_step( diagnostics: StepDiagnostics::Python { launcher_kind: profile.launcher_kind, step_kind, + target_scope: if matches!( + step_kind, + PythonStepKind::Pytest | PythonStepKind::RuffCheck + ) { + VerificationTargetScope::Project + } else { + VerificationTargetScope::FileSet + }, }, } } @@ -1056,6 +1429,7 @@ fn dedupe_steps(steps: &mut Vec) { }); } +#[allow(clippy::too_many_lines)] fn run_planned_steps( adapter_id: &str, project_root: &Path, @@ -1085,6 +1459,11 @@ fn run_planned_steps( failure_kind: None, duration_ms: 0, truncated_output: String::new(), + step_kind: step.diagnostics.step_kind(), + target_scope: step.diagnostics.target_scope(), + package_name: step.diagnostics.package_name(), + package_manager: step.diagnostics.package_manager(), + launcher_kind: step.diagnostics.launcher_kind(), }); continue; } @@ -1101,6 +1480,11 @@ fn run_planned_steps( failure_kind: None, duration_ms, truncated_output: body, + step_kind: step.diagnostics.step_kind(), + target_scope: step.diagnostics.target_scope(), + package_name: step.diagnostics.package_name(), + package_manager: step.diagnostics.package_manager(), + launcher_kind: step.diagnostics.launcher_kind(), }), StepOutcome::Failed { body, @@ -1119,6 +1503,11 @@ fn run_planned_steps( failure_kind, duration_ms, truncated_output: body, + step_kind: step.diagnostics.step_kind(), + target_scope: step.diagnostics.target_scope(), + package_name: step.diagnostics.package_name(), + package_manager: step.diagnostics.package_manager(), + launcher_kind: step.diagnostics.launcher_kind(), }); } StepOutcome::Unavailable { @@ -1138,6 +1527,11 @@ fn run_planned_steps( failure_kind, duration_ms, truncated_output: message, + step_kind: step.diagnostics.step_kind(), + target_scope: step.diagnostics.target_scope(), + package_name: step.diagnostics.package_name(), + package_manager: step.diagnostics.package_manager(), + launcher_kind: step.diagnostics.launcher_kind(), }); } } @@ -1164,10 +1558,6 @@ fn render_report_summary( status: VerificationStatus, steps: &[VerificationStepReport], ) -> String { - if adapter_id == "python" { - return render_python_report_summary(adapter_id, project_root, phase, status, steps); - } - let mut summary = format!( "[verifier:{}:{}] {} ({})", phase.as_str(), @@ -1179,64 +1569,33 @@ fn render_report_summary( summary.push_str("\n[verifier] no verification steps were planned"); return summary; } - for step in steps { + let primary_idx = steps + .iter() + .position(|step| !step.status.is_success()) + .unwrap_or(0); + for (idx, step) in steps.iter().enumerate() { let label = match step.status { VerificationStatus::Passed => "ok", VerificationStatus::Failed => "FAIL", VerificationStatus::Skipped => "skipped", VerificationStatus::Unavailable => "unavailable", }; - let _ = writeln!(summary, "\n[verifier] {}: {label}", step.label); - if !step.truncated_output.trim().is_empty() { + let failure_suffix = step + .failure_kind + .map(|kind| format!(" ({})", kind.as_str())) + .unwrap_or_default(); + let _ = writeln!( + summary, + "\n[verifier] {}: {label}{failure_suffix}", + step.label + ); + if idx == primary_idx && !step.truncated_output.trim().is_empty() { summary.push_str(&step.truncated_output); } } summary.trim_end().to_string() } -fn render_python_report_summary( - adapter_id: &str, - project_root: &Path, - phase: VerificationPhase, - status: VerificationStatus, - steps: &[VerificationStepReport], -) -> String { - let mut summary = format!( - "[verifier:{}:{}] {} ({})", - phase.as_str(), - adapter_id, - status.as_str(), - project_root.display() - ); - if steps.is_empty() { - summary.push_str("\n[verifier] no verification steps were planned"); - return summary; - } - let primary = steps - .iter() - .find(|step| !step.status.is_success()) - .unwrap_or(&steps[0]); - let label = match primary.status { - VerificationStatus::Passed => "ok", - VerificationStatus::Failed => "FAIL", - VerificationStatus::Skipped => "skipped", - VerificationStatus::Unavailable => "unavailable", - }; - let failure_suffix = primary - .failure_kind - .map(|kind| format!(" ({})", kind.as_str())) - .unwrap_or_default(); - let _ = writeln!( - summary, - "\n[verifier] {}: {label}{failure_suffix}", - primary.label - ); - if !primary.truncated_output.trim().is_empty() { - summary.push_str(&primary.truncated_output); - } - summary.trim_end().to_string() -} - fn package_manager_run_script(manager: PackageManager, script: &str) -> Vec { match manager { PackageManager::Npm => vec![ @@ -1299,55 +1658,157 @@ fn detect_package_manager(root: &Path) -> PackageManager { } } -fn build_python_profile_for_path(path: &Path) -> Option { - let project_root = nearest_python_root(path)?; - build_python_profile_for_root(&project_root) +fn build_rust_profile_for_path(path: &Path) -> Option { + let manifest = nearest_file(path, "Cargo.toml")?; + build_rust_profile_from_manifest(&manifest) } -fn build_python_profile_for_root(root: &Path) -> Option { - let project_root = normalize_local_path(root)?; - let pyproject_path = project_root.join("pyproject.toml"); - let (pyproject_parsed, pyproject_value, pyproject_parse_error) = - parse_optional_pyproject(&pyproject_path); - let (runner, launcher_kind) = detect_python_runner(&project_root); - let test_root_present = project_root.join("tests").is_dir(); - let has_ruff = python_has_ruff(&project_root, pyproject_value.as_ref()); - let has_mypy = python_has_mypy(&project_root, pyproject_value.as_ref()); - let has_pytest = python_has_pytest(&project_root, pyproject_value.as_ref(), test_root_present); - let typed_targets = python_typed_targets(&project_root, pyproject_value.as_ref()); +fn build_rust_profile_for_root(root: &Path) -> Option { + let manifest = normalize_local_path(root)?.join("Cargo.toml"); + build_rust_profile_from_manifest(&manifest) +} - Some(PythonProjectProfile { +fn build_rust_profile_from_manifest(manifest_path: &Path) -> Option { + let manifest_path = normalize_local_path(manifest_path)?; + let project_root = manifest_path.parent()?.to_path_buf(); + let (manifest_parsed, manifest_value, manifest_parse_error) = + parse_optional_toml_file(&manifest_path, "Cargo.toml"); + let package_name = manifest_value + .as_ref() + .and_then(|value| toml_value_at(value, &["package", "name"])) + .and_then(TomlValue::as_str) + .map(ToOwned::to_owned); + Some(RustProjectProfile { project_root, - runner, - launcher_kind, - pyproject_parsed, - has_ruff, - has_mypy, - has_pytest, - typed_targets, - test_root_present, - pyproject_path: pyproject_path.is_file().then_some(pyproject_path), - pyproject_parse_error, + manifest_path, + package_name, + manifest_parsed, + manifest_parse_error, }) } -fn parse_optional_pyproject(path: &Path) -> (bool, Option, Option) { - if !path.is_file() { - return (true, None, None); +fn rust_config_failure_report( + profile: &RustProjectProfile, + phase: VerificationPhase, + touched_paths: Vec, + max_output_bytes: usize, +) -> Option { + if profile.manifest_parsed { + return None; } - match fs::read_to_string(path) { - Ok(contents) => match contents.parse::() { - Ok(value) => (true, Some(value), None), - Err(error) => ( - false, - None, - Some(format!("failed to parse pyproject.toml: {error}")), - ), - }, - Err(error) => ( - false, + let error = profile.manifest_parse_error.as_ref()?; + let steps = vec![VerificationStepReport { + adapter: "rust-cargo".to_string(), + project_root: profile.project_root.clone(), + label: "Cargo.toml parse".to_string(), + command: profile.manifest_path.display().to_string(), + phase, + status: VerificationStatus::Failed, + failure_kind: Some(VerificationFailureKind::Config), + duration_ms: 0, + truncated_output: truncate_output(error, max_output_bytes), + step_kind: None, + target_scope: Some(VerificationTargetScope::Workspace.as_str().to_string()), + package_name: profile.package_name.clone(), + package_manager: None, + launcher_kind: None, + }]; + let summary_text = render_report_summary( + "rust-cargo", + &profile.project_root, + phase, + VerificationStatus::Failed, + &steps, + ); + Some(VerificationReport { + report_id: next_report_id(), + phase, + adapter_id: "rust-cargo".to_string(), + project_root: profile.project_root.clone(), + touched_paths, + status: VerificationStatus::Failed, + summary_text, + steps, + }) +} + +fn build_node_profile_for_root( + root: &Path, +) -> Result, NodeSetupFailure> { + let Some(project_root) = normalize_local_path(root) else { + return Ok(None); + }; + let package_json_path = project_root.join("package.json"); + let package_value = load_node_package(&package_json_path)?; + let package_name = package_value + .get("name") + .and_then(Value::as_str) + .map(ToOwned::to_owned); + Ok(Some(NodeProjectProfile { + package_manager: detect_package_manager(&project_root), + package_json_path, + project_root, + package_value, + package_name, + })) +} + +fn build_python_profile_for_path(path: &Path) -> Option { + let project_root = nearest_python_root(path)?; + build_python_profile_for_root(&project_root) +} + +fn build_python_profile_for_root(root: &Path) -> Option { + let project_root = normalize_local_path(root)?; + let pyproject_path = project_root.join("pyproject.toml"); + let (pyproject_parsed, pyproject_value, pyproject_parse_error) = + parse_optional_pyproject(&pyproject_path); + let (runner, launcher_kind) = detect_python_runner(&project_root); + let test_root_present = project_root.join("tests").is_dir(); + let has_ruff = python_has_ruff(&project_root, pyproject_value.as_ref()); + let has_mypy = python_has_mypy(&project_root, pyproject_value.as_ref()); + let has_pytest = python_has_pytest(&project_root, pyproject_value.as_ref(), test_root_present); + let typed_targets = python_typed_targets(&project_root, pyproject_value.as_ref()); + + Some(PythonProjectProfile { + project_root, + runner, + launcher_kind, + pyproject_parsed, + has_ruff, + has_mypy, + has_pytest, + typed_targets, + test_root_present, + pyproject_path: pyproject_path.is_file().then_some(pyproject_path), + pyproject_parse_error, + }) +} + +fn parse_optional_pyproject(path: &Path) -> (bool, Option, Option) { + parse_optional_toml_file(path, "pyproject.toml") +} + +fn parse_optional_toml_file( + path: &Path, + display_name: &str, +) -> (bool, Option, Option) { + if !path.is_file() { + return (true, None, None); + } + match fs::read_to_string(path) { + Ok(contents) => match contents.parse::() { + Ok(value) => (true, Some(value), None), + Err(error) => ( + false, + None, + Some(format!("failed to parse {display_name}: {error}")), + ), + }, + Err(error) => ( + false, None, - Some(format!("failed to read pyproject.toml: {error}")), + Some(format!("failed to read {display_name}: {error}")), ), } } @@ -1566,6 +2027,38 @@ fn file_contains(path: impl AsRef, needle: &str) -> bool { fs::read_to_string(path.as_ref()).is_ok_and(|contents| contents.contains(needle)) } +/// Walks ancestors of `start` and returns the adapter whose marker file is found first +/// (closest wins). Explicit markers per spec Fase 5: Cargo.toml → Rust, package.json → Node, +/// any `PYTHON_ROOT_MARKERS` entry → Python. +fn detect_adapter_by_marker(start: &Path) -> Option { + let start = if start.is_absolute() { + start.to_path_buf() + } else { + std::env::current_dir().ok()?.join(start) + }; + let mut cursor = if start.is_dir() { + start + } else { + start.parent()?.to_path_buf() + }; + loop { + if cursor.join("Cargo.toml").is_file() { + return Some(Adapter::Rust); + } + if cursor.join("package.json").is_file() { + return Some(Adapter::NodeTypeScript); + } + for marker in PYTHON_ROOT_MARKERS { + if cursor.join(marker).is_file() { + return Some(Adapter::Python); + } + } + if !cursor.pop() { + return None; + } + } +} + fn nearest_file(start: &Path, file_name: &str) -> Option { let start = if start.is_absolute() { start.to_path_buf() @@ -1607,32 +2100,164 @@ fn nearest_python_root(start: &Path) -> Option { } } +#[allow(clippy::unnecessary_wraps)] // feeds Option field fn classify_step_failure(step: &PlannedStep, body: &str) -> Option { match step.diagnostics { - StepDiagnostics::Generic => None, + StepDiagnostics::Rust { + step_kind, + target_scope: _, + package_name: _, + } => Some(classify_rust_failure_kind(step_kind, body)), + StepDiagnostics::NodeTypeScript { + step_kind, + target_scope: _, + package_manager, + package_name: _, + } => Some(classify_node_failure_kind(step_kind, package_manager, body)), StepDiagnostics::Python { launcher_kind, step_kind, + target_scope: _, } => Some(classify_python_failure_kind(launcher_kind, step_kind, body)), } } +#[allow(clippy::unnecessary_wraps)] // feeds Option field fn classify_step_timeout(step: &PlannedStep) -> Option { match step.diagnostics { - StepDiagnostics::Generic => None, - StepDiagnostics::Python { .. } => Some(VerificationFailureKind::Timeout), + StepDiagnostics::Rust { .. } + | StepDiagnostics::NodeTypeScript { .. } + | StepDiagnostics::Python { .. } => Some(VerificationFailureKind::Timeout), } } +#[allow(clippy::unnecessary_wraps)] // feeds Option field fn classify_step_unavailable(step: &PlannedStep, message: &str) -> Option { match step.diagnostics { - StepDiagnostics::Generic => None, + StepDiagnostics::Rust { step_kind, .. } => { + Some(classify_rust_unavailable_kind(step_kind, message)) + } + StepDiagnostics::NodeTypeScript { + step_kind, + package_manager, + .. + } => Some(classify_node_unavailable_kind( + step_kind, + package_manager, + message, + )), StepDiagnostics::Python { launcher_kind, .. } => { Some(classify_python_unavailable_kind(launcher_kind, message)) } } } +fn classify_rust_failure_kind(step_kind: RustStepKind, body: &str) -> VerificationFailureKind { + let lower = body.to_ascii_lowercase(); + if lower.contains("failed to parse manifest") + || lower.contains("could not parse manifest") + || lower.contains("invalid table header") + || (lower.contains("cargo.toml") && lower.contains("parse")) + { + return VerificationFailureKind::Config; + } + if is_rust_tool_unavailable(step_kind, &lower) { + return VerificationFailureKind::ToolUnavailable; + } + if lower.contains("toolchain") + || lower.contains("rustup") + || lower.contains("linker") + || lower.contains("target may not be installed") + || lower.contains("failed to run custom build command") + { + return VerificationFailureKind::Environment; + } + VerificationFailureKind::Code +} + +fn classify_rust_unavailable_kind( + step_kind: RustStepKind, + message: &str, +) -> VerificationFailureKind { + let lower = message.to_ascii_lowercase(); + if is_rust_tool_unavailable(step_kind, &lower) || lower.contains("cargo") { + VerificationFailureKind::ToolUnavailable + } else { + VerificationFailureKind::Environment + } +} + +fn is_rust_tool_unavailable(step_kind: RustStepKind, lower: &str) -> bool { + let tool_name = match step_kind { + RustStepKind::Check | RustStepKind::Clippy | RustStepKind::Test => "cargo", + RustStepKind::FmtCheck => "rustfmt", + }; + lower.contains("no such subcommand") + || lower.contains("command not found") + || lower.contains("not recognized as an internal or external command") + || lower.contains("is not installed") + || lower.contains(tool_name) && lower.contains("not found") +} + +fn classify_node_failure_kind( + step_kind: NodeStepKind, + package_manager: PackageManager, + body: &str, +) -> VerificationFailureKind { + let lower = body.to_ascii_lowercase(); + if lower.contains("tsconfig") + && (lower.contains("parse") || lower.contains("unknown compiler option")) + { + return VerificationFailureKind::Config; + } + if lower.contains("eslint") && lower.contains("configuration") { + return VerificationFailureKind::Config; + } + if is_node_tool_unavailable(step_kind, package_manager, &lower) { + return VerificationFailureKind::ToolUnavailable; + } + if lower.contains("node_modules") + || lower.contains("lockfile") + || lower.contains("package manager") + || lower.contains("missing script") + { + return VerificationFailureKind::Environment; + } + VerificationFailureKind::Code +} + +fn classify_node_unavailable_kind( + step_kind: NodeStepKind, + package_manager: PackageManager, + message: &str, +) -> VerificationFailureKind { + let lower = message.to_ascii_lowercase(); + if is_node_tool_unavailable(step_kind, package_manager, &lower) { + VerificationFailureKind::ToolUnavailable + } else { + VerificationFailureKind::Environment + } +} + +fn is_node_tool_unavailable( + step_kind: NodeStepKind, + package_manager: PackageManager, + lower: &str, +) -> bool { + let tool_name = match step_kind { + NodeStepKind::Typecheck => "typecheck", + NodeStepKind::TscNoEmit => "tsc", + NodeStepKind::Lint => "lint", + NodeStepKind::Eslint => "eslint", + NodeStepKind::Test => "test", + }; + lower.contains("command not found") + || lower.contains("not recognized as an internal or external command") + || lower.contains("could not determine executable to run") + || lower.contains(package_manager.as_str()) && lower.contains("not found") + || lower.contains(tool_name) && lower.contains("not found") +} + fn classify_python_failure_kind( launcher_kind: PythonLauncherKind, step_kind: PythonStepKind, @@ -1718,7 +2343,7 @@ fn run_step( timeout: Duration, max_output_bytes: usize, ) -> StepOutcome { - let mut command = Command::new(&step.command[0]); + let mut command = spawnable_command(&step.command[0]); command.current_dir(cwd); command.stdin(std::process::Stdio::null()); command.stdout(std::process::Stdio::piped()); @@ -1782,6 +2407,95 @@ fn run_step( } } +fn spawnable_command(program: &str) -> Command { + #[cfg(windows)] + { + if let Some(resolved) = resolve_windows_program(program) { + if uses_cmd_wrapper(&resolved) { + let mut command = Command::new("cmd"); + command.arg("/C").arg(resolved); + return command; + } + return Command::new(resolved); + } + } + + Command::new(program) +} + +#[cfg(windows)] +fn resolve_windows_program(program: &str) -> Option { + let path = Path::new(program); + if path.components().count() > 1 || path.is_absolute() { + return resolve_windows_path_candidate(path); + } + + let path_entries = std::env::var_os("PATH") + .into_iter() + .flat_map(|paths| std::env::split_paths(&paths).collect::>()) + .collect::>(); + for entry in path_entries { + let candidate = entry.join(program); + if let Some(resolved) = resolve_windows_path_candidate(&candidate) { + return Some(resolved); + } + } + None +} + +#[cfg(windows)] +fn resolve_windows_path_candidate(candidate: &Path) -> Option { + if candidate.is_file() { + return Some(candidate.to_path_buf()); + } + if candidate.extension().is_some() { + return None; + } + + for extension in windows_path_extensions() { + let trimmed = extension.trim(); + if trimmed.is_empty() { + continue; + } + let suffix = trimmed.strip_prefix('.').unwrap_or(trimmed); + let path = candidate.with_extension(suffix); + if path.is_file() { + return Some(path); + } + } + None +} + +#[cfg(windows)] +fn windows_path_extensions() -> Vec { + std::env::var("PATHEXT") + .ok() + .map(|value| { + value + .split(';') + .map(str::trim) + .filter(|entry| !entry.is_empty()) + .map(ToOwned::to_owned) + .collect::>() + }) + .filter(|values| !values.is_empty()) + .unwrap_or_else(|| { + vec![ + ".COM".to_string(), + ".EXE".to_string(), + ".BAT".to_string(), + ".CMD".to_string(), + ] + }) +} + +#[cfg(windows)] +fn uses_cmd_wrapper(path: &Path) -> bool { + path.extension() + .and_then(|ext| ext.to_str()) + .is_some_and(|ext| ext.eq_ignore_ascii_case("cmd") || ext.eq_ignore_ascii_case("bat")) +} + fn duration_millis_u64(duration: Duration) -> u64 { u64::try_from(duration.as_millis()).unwrap_or(u64::MAX) } @@ -2205,9 +2919,310 @@ addopts = "-q" diagnostics: StepDiagnostics::Python { launcher_kind: PythonLauncherKind::Global, step_kind: PythonStepKind::Pytest, + target_scope: VerificationTargetScope::Project, }, }), Some(VerificationFailureKind::Timeout) ); } + + #[test] + fn rust_profile_extracts_package_name_from_nearest_manifest() { + let root = temp_dir("rust-profile"); + let crate_dir = root.join("crates").join("demo"); + fs::create_dir_all(crate_dir.join("src")).expect("crate dir should create"); + fs::write( + crate_dir.join("Cargo.toml"), + "[package]\nname = \"demo\"\nversion = \"0.1.0\"\n", + ) + .expect("manifest should write"); + fs::write(crate_dir.join("src").join("lib.rs"), "pub fn demo() {}\n") + .expect("lib should write"); + + let profile = build_rust_profile_for_path(&crate_dir.join("src").join("lib.rs")) + .expect("profile should resolve"); + + assert_eq!(profile.project_root, crate_dir); + assert_eq!(profile.package_name.as_deref(), Some("demo")); + assert!(profile.manifest_parsed); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn rust_quick_steps_scope_to_package_when_package_name_is_known() { + let steps = rust_quick_steps(&CargoVerifierConfig::default(), Some("demo".to_string())); + + assert_eq!(steps.len(), 1); + assert_eq!( + steps[0].command, + vec![ + "cargo".to_string(), + "check".to_string(), + "--quiet".to_string(), + "--message-format=short".to_string(), + "-p".to_string(), + "demo".to_string(), + ] + ); + assert_eq!( + steps[0].diagnostics, + StepDiagnostics::Rust { + step_kind: RustStepKind::Check, + target_scope: VerificationTargetScope::Package, + package_name: Some("demo".to_string()), + } + ); + } + + #[test] + fn node_profile_resolves_nearest_package_and_package_manager() { + let root = temp_dir("node-profile"); + let package_root = root.join("packages").join("web"); + fs::create_dir_all(&package_root).expect("package root should create"); + fs::write( + package_root.join("package.json"), + r#"{ + "name": "@demo/web", + "scripts": { + "typecheck": "tsc --noEmit", + "lint": "eslint ." + } +}"#, + ) + .expect("package should write"); + fs::write( + package_root.join("pnpm-lock.yaml"), + "lockfileVersion: '9.0'\n", + ) + .expect("lockfile should write"); + + let profile = build_node_profile_for_root(&package_root) + .expect("profile should not error") + .expect("profile should exist"); + + assert_eq!(profile.project_root, package_root); + assert_eq!(profile.package_name.as_deref(), Some("@demo/web")); + assert_eq!(profile.package_manager, PackageManager::Pnpm); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn node_quick_steps_prefer_package_scripts_and_carry_metadata() { + let profile = NodeProjectProfile { + project_root: PathBuf::from("/workspace/packages/web"), + package_json_path: PathBuf::from("/workspace/packages/web/package.json"), + package_value: serde_json::json!({ + "name": "@demo/web", + "scripts": { + "typecheck": "tsc --noEmit" + } + }), + package_manager: PackageManager::Pnpm, + package_name: Some("@demo/web".to_string()), + }; + + let steps = node_quick_steps(&profile); + + assert_eq!(steps.len(), 1); + assert_eq!( + steps[0].command, + vec![ + "pnpm".to_string(), + "run".to_string(), + "typecheck".to_string(), + ] + ); + assert_eq!( + steps[0].diagnostics, + StepDiagnostics::NodeTypeScript { + step_kind: NodeStepKind::Typecheck, + target_scope: VerificationTargetScope::Package, + package_manager: PackageManager::Pnpm, + package_name: Some("@demo/web".to_string()), + } + ); + } + + #[test] + fn classify_rust_failures_distinguishes_config_environment_and_tools() { + assert_eq!( + classify_rust_failure_kind( + RustStepKind::Check, + "failed to parse manifest at Cargo.toml" + ), + VerificationFailureKind::Config + ); + assert_eq!( + classify_rust_failure_kind( + RustStepKind::Check, + "error: linker `cc` not found while building crate", + ), + VerificationFailureKind::Environment + ); + assert_eq!( + classify_rust_failure_kind(RustStepKind::Clippy, "error: no such subcommand: `clippy`",), + VerificationFailureKind::ToolUnavailable + ); + assert_eq!( + classify_rust_failure_kind(RustStepKind::Check, "error[E0308]: mismatched types"), + VerificationFailureKind::Code + ); + } + + #[test] + fn classify_node_failures_distinguishes_config_environment_and_tools() { + assert_eq!( + classify_node_failure_kind( + NodeStepKind::TscNoEmit, + PackageManager::Npm, + "tsconfig parse error: unknown compiler option", + ), + VerificationFailureKind::Config + ); + assert_eq!( + classify_node_failure_kind( + NodeStepKind::Typecheck, + PackageManager::Pnpm, + "pnpm package manager could not install because lockfile is missing", + ), + VerificationFailureKind::Environment + ); + assert_eq!( + classify_node_failure_kind( + NodeStepKind::Eslint, + PackageManager::Npm, + "eslint not found", + ), + VerificationFailureKind::ToolUnavailable + ); + assert_eq!( + classify_node_failure_kind( + NodeStepKind::TscNoEmit, + PackageManager::Npm, + "src/index.ts(1,7): error TS2322: Type 'string' is not assignable", + ), + VerificationFailureKind::Code + ); + } + + #[test] + fn cargo_verifier_auto_mode_skips_empty_step_reports() { + let root = temp_dir("auto-node-empty"); + fs::write( + root.join("package.json"), + r#"{"name":"demo-node","scripts":{}}"#, + ) + .expect("package json should write"); + let source = root.join("index.ts"); + fs::write(&source, "export const value = 1;\n").expect("source should write"); + + let verifier = CargoVerifier::new(CargoVerifierConfig { + legacy_mode: false, + auto_mode: true, + quick_on_write: true, + final_gate: true, + max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES, + rust_check: true, + rust_clippy: true, + rust_fmt: true, + rust_test: true, + rust_timeout: Duration::from_secs(1), + node_enabled: true, + node_timeout: Duration::from_secs(1), + python_enabled: false, + python_timeout: Duration::from_secs(1), + }); + let context = VerificationContext { + phase: VerificationPhase::Quick, + workspace_root: Some(root.clone()), + tool_name: "edit_file".to_string(), + tool_input: format!(r#"{{"file_path":"{}"}}"#, source.display()), + touched_paths: vec![source], + mutation_sequence: 1, + }; + + let reports = verifier.quick_verify(&context); + + assert!( + reports.is_empty(), + "auto mode should skip empty-step reports" + ); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn detect_adapter_by_marker_prefers_closest_ancestor() { + let root = temp_dir("auto-marker-closest"); + // Outer repo marker is package.json; inner package has Cargo.toml closer to file. + fs::write(root.join("package.json"), r#"{"name":"outer"}"#) + .expect("outer package.json should write"); + let inner = root.join("crate-a"); + fs::create_dir_all(&inner).expect("inner dir should create"); + fs::write( + inner.join("Cargo.toml"), + "[package]\nname=\"a\"\nversion=\"0.1.0\"\n", + ) + .expect("inner Cargo.toml should write"); + let source = inner.join("src").join("lib.rs"); + fs::create_dir_all(source.parent().unwrap()).expect("src dir"); + fs::write(&source, "pub fn x() {}\n").expect("src"); + + let detected = detect_adapter_by_marker(&source).expect("adapter should resolve"); + assert_eq!(detected, Adapter::Rust); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn detect_adapter_by_marker_returns_none_without_markers() { + let root = temp_dir("auto-marker-none"); + let source = root.join("note.txt"); + fs::write(&source, "plain\n").expect("note should write"); + + assert!(detect_adapter_by_marker(&source).is_none()); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } + + #[test] + fn auto_mode_without_marker_returns_no_reports() { + let root = temp_dir("auto-no-marker"); + let source = root.join("random.txt"); + fs::write(&source, "nope\n").expect("source should write"); + + let verifier = CargoVerifier::new(CargoVerifierConfig { + legacy_mode: false, + auto_mode: true, + quick_on_write: true, + final_gate: true, + max_output_bytes: DEFAULT_MAX_OUTPUT_BYTES, + rust_check: true, + rust_clippy: true, + rust_fmt: true, + rust_test: true, + rust_timeout: Duration::from_secs(1), + node_enabled: true, + node_timeout: Duration::from_secs(1), + python_enabled: true, + python_timeout: Duration::from_secs(1), + }); + let context = VerificationContext { + phase: VerificationPhase::Quick, + workspace_root: Some(root.clone()), + tool_name: "edit_file".to_string(), + tool_input: format!(r#"{{"file_path":"{}"}}"#, source.display()), + touched_paths: vec![source], + mutation_sequence: 1, + }; + + assert!( + verifier.quick_verify(&context).is_empty(), + "auto mode without marker should return no reports" + ); + + fs::remove_dir_all(root).expect("temp dir should clean up"); + } } diff --git a/rust/crates/runtime/tests/verifier_e2e.rs b/rust/crates/runtime/tests/verifier_e2e.rs index 438cd43dca..0415d6fc03 100644 --- a/rust/crates/runtime/tests/verifier_e2e.rs +++ b/rust/crates/runtime/tests/verifier_e2e.rs @@ -64,6 +64,7 @@ fn quick_only() -> CargoVerifierConfig { node_timeout: Duration::from_mins(2), python_enabled: false, python_timeout: Duration::from_mins(2), + auto_mode: false, } } @@ -100,6 +101,7 @@ fn python_only() -> CargoVerifierConfig { node_timeout: Duration::from_mins(2), python_enabled: true, python_timeout: Duration::from_mins(2), + auto_mode: false, } } @@ -268,6 +270,7 @@ fn later_steps_are_skipped_after_first_failure_in_legacy_mode() { node_timeout: Duration::from_mins(2), python_enabled: false, python_timeout: Duration::from_mins(2), + auto_mode: false, }; let v = CargoVerifier::new(config); let result = quick_report(&v, "edit_file", &tool_input(&file)).unwrap(); @@ -304,6 +307,7 @@ fn fmt_violation_is_detected_when_fmt_enabled() { node_timeout: Duration::from_mins(2), python_enabled: false, python_timeout: Duration::from_mins(2), + auto_mode: false, }; let v = CargoVerifier::new(config); let result = quick_report(&v, "edit_file", &tool_input(&file)).unwrap(); diff --git a/rust/crates/rusty-claude-cli/src/main.rs b/rust/crates/rusty-claude-cli/src/main.rs index 010dddd128..6c0d9cf712 100644 --- a/rust/crates/rusty-claude-cli/src/main.rs +++ b/rust/crates/rusty-claude-cli/src/main.rs @@ -10,7 +10,7 @@ mod init; mod input; mod render; -use std::collections::BTreeSet; +use std::collections::{BTreeMap, BTreeSet}; use std::env; use std::fs; use std::io::{self, IsTerminal, Read, Write}; @@ -53,7 +53,8 @@ use runtime::{ use serde::Deserialize; use serde_json::{json, Map, Value}; use tools::{ - execute_tool, mvp_tool_specs, GlobalToolRegistry, RuntimeToolDefinition, ToolSearchOutput, + execute_tool, mvp_tool_specs, run_inline_review, GlobalToolRegistry, ReviewFinding, + ReviewOutcome, RuntimeToolDefinition, ToolSearchOutput, }; const DEFAULT_MODEL: &str = "claude-opus-4-6"; @@ -83,6 +84,10 @@ const OFFICIAL_REPO_SLUG: &str = "ultraworkers/claw-code"; const DEPRECATED_INSTALL_COMMAND: &str = "cargo install claw-code"; const LATEST_SESSION_REFERENCE: &str = "latest"; const SESSION_REFERENCE_ALIASES: &[&str] = &[LATEST_SESSION_REFERENCE, "last", "recent"]; +const CRITIC_CHANGED_FILES_THRESHOLD: usize = 4; +const CRITIC_CHANGED_LINES_THRESHOLD: usize = 200; +const AUTO_SKILL_MIN_ITERATIONS: usize = 3; +const AUTO_SKILL_MIN_FAILURES: usize = 2; const CLI_OPTION_SUGGESTIONS: &[&str] = &[ "--help", "-h", @@ -3160,6 +3165,8 @@ struct LiveCli { runtime: BuiltRuntime, session: SessionHandle, prompt_history: Vec, + critic_planner: runtime::critic::CriticPlanner, + turn_mutation_counter: u64, } #[derive(Debug, Clone)] @@ -3671,6 +3678,8 @@ impl LiveCli { runtime, session, prompt_history: Vec::new(), + critic_planner: runtime::critic::CriticPlanner::new(), + turn_mutation_counter: 0, }; cli.persist_session()?; Ok(cli) @@ -3779,6 +3788,8 @@ impl LiveCli { hook_abort_monitor.stop(); match result { Ok(summary) => { + let summary = + self.apply_post_turn_pipeline(&mut runtime, summary, &mut permission_prompter)?; let should_print_buffered = runtime.buffer_output; self.replace_runtime(runtime)?; spinner.finish( @@ -3829,7 +3840,8 @@ impl LiveCli { let mut permission_prompter = CliPermissionPrompter::new(self.permission_mode); let result = runtime.run_turn(input, Some(&mut permission_prompter)); hook_abort_monitor.stop(); - let summary = result?; + let summary = + self.apply_post_turn_pipeline(&mut runtime, result?, &mut permission_prompter)?; self.replace_runtime(runtime)?; self.persist_session()?; let final_text = final_assistant_text(&summary); @@ -3842,7 +3854,8 @@ impl LiveCli { let mut permission_prompter = CliPermissionPrompter::new(self.permission_mode); let result = runtime.run_turn(input, Some(&mut permission_prompter)); hook_abort_monitor.stop(); - let summary = result?; + let summary = + self.apply_post_turn_pipeline(&mut runtime, result?, &mut permission_prompter)?; self.replace_runtime(runtime)?; self.persist_session()?; println!( @@ -3881,6 +3894,54 @@ impl LiveCli { Ok(()) } + fn apply_post_turn_pipeline( + &mut self, + runtime: &mut BuiltRuntime, + mut summary: runtime::TurnSummary, + permission_prompter: &mut CliPermissionPrompter, + ) -> Result> { + let change_stats = collect_turn_change_stats(&summary); + + self.turn_mutation_counter += 1; + let planner_stats = runtime::critic::DiffStats { + files_changed: change_stats.files.len(), + lines_changed: change_stats.total_changed_lines, + distinct_roots: change_stats.verified_roots.len(), + }; + let subagent_depth = std::env::var("CLAUDE_CODE_SUBAGENT_DEPTH") + .ok() + .and_then(|value| value.trim().parse::().ok()) + .unwrap_or(0); + let planner_decision = + self.critic_planner + .plan(self.turn_mutation_counter, subagent_depth, planner_stats); + let planner_allows = matches!( + planner_decision, + runtime::critic::CriticDecision::Run { .. } + ); + + if planner_allows && should_run_critic(&summary, &change_stats) { + let review = run_inline_review( + &build_critic_prompt(&summary, &change_stats), + Some(&critic_model_for(&self.model)), + ) + .map_err(std::io::Error::other)?; + let blocking = blocking_findings(&review.findings); + if blocking.is_empty() { + append_review_note(&mut summary, &review); + } else { + let fix_summary = runtime.run_turn( + build_critic_fix_prompt(&blocking), + Some(permission_prompter), + )?; + summary = merge_turn_summaries(summary, fix_summary); + } + } + + let _ = maybe_write_auto_skill_draft(&summary); + Ok(summary) + } + #[allow(clippy::too_many_lines)] fn handle_repl_command( &mut self, @@ -5960,6 +6021,7 @@ fn render_export_text(session: &Session) -> String { phase, status, summary_text, + .. } => lines.push(format!( "[verification_report id={report_id} phase={} status={}] {summary_text}", phase.as_str(), @@ -6173,6 +6235,7 @@ fn render_session_markdown(session: &Session, session_id: &str, session_path: &P phase, status, summary_text, + .. } => { lines.push(format!( "**Verification** `{}` _(id `{report_id}`, status `{}`)_", @@ -6711,6 +6774,7 @@ fn build_runtime_with_plugin_state( if verifier_config.enabled() { let cargo_config = runtime::CargoVerifierConfig { legacy_mode: !verifier_config.staged(), + auto_mode: verifier_config.auto(), quick_on_write: verifier_config.quick_on_write(), final_gate: verifier_config.final_gate(), max_output_bytes: verifier_config.max_output_bytes(), @@ -7256,6 +7320,604 @@ fn final_assistant_text(summary: &runtime::TurnSummary) -> String { .unwrap_or_default() } +#[derive(Debug, Clone, Default, PartialEq, Eq)] +struct FileChangeStats { + path: String, + added_lines: usize, + deleted_lines: usize, + preview_lines: Vec, +} + +#[derive(Debug, Clone, Default, PartialEq, Eq)] +struct TurnChangeStats { + files: Vec, + total_changed_lines: usize, + verified_roots: BTreeSet, +} + +fn collect_turn_change_stats(summary: &runtime::TurnSummary) -> TurnChangeStats { + let mut files = BTreeMap::::new(); + for message in &summary.tool_results { + for block in &message.blocks { + let ContentBlock::ToolResult { + tool_name, + output, + is_error, + .. + } = block + else { + continue; + }; + if *is_error + || !matches!( + tool_name.as_str(), + "write_file" | "edit_file" | "Write" | "Edit" + ) + { + continue; + } + let Ok(parsed) = serde_json::from_str::(output) else { + continue; + }; + let Some(path) = parsed + .get("filePath") + .or_else(|| parsed.get("file_path")) + .and_then(Value::as_str) + else { + continue; + }; + + let entry = files + .entry(path.to_string()) + .or_insert_with(|| FileChangeStats { + path: path.to_string(), + ..FileChangeStats::default() + }); + if let Some(hunks) = parsed.get("structuredPatch").and_then(Value::as_array) { + for hunk in hunks { + if let Some(lines) = hunk.get("lines").and_then(Value::as_array) { + for line in lines.iter().filter_map(Value::as_str) { + if line.starts_with('+') { + entry.added_lines += 1; + } else if line.starts_with('-') { + entry.deleted_lines += 1; + } + if entry.preview_lines.len() < 8 { + entry.preview_lines.push(line.to_string()); + } + } + } + } + } + } + } + + let verified_roots = summary + .verification_reports + .iter() + .map(|report| report.project_root.display().to_string()) + .filter(|root| !root.is_empty()) + .collect::>(); + let files = files.into_values().collect::>(); + let total_changed_lines = files + .iter() + .map(|file| file.added_lines + file.deleted_lines) + .sum(); + + TurnChangeStats { + files, + total_changed_lines, + verified_roots, + } +} + +fn should_run_critic(summary: &runtime::TurnSummary, stats: &TurnChangeStats) -> bool { + let critic_enabled = env::var("CLAUDE_CODE_CRITIC").map_or(true, |value| value.trim() != "0"); + critic_enabled + && summary.verification_gate.passed + && (stats.files.len() >= CRITIC_CHANGED_FILES_THRESHOLD + || stats.total_changed_lines >= CRITIC_CHANGED_LINES_THRESHOLD + || stats.verified_roots.len() > 1) +} + +fn build_critic_prompt(summary: &runtime::TurnSummary, stats: &TurnChangeStats) -> String { + let mut lines = vec![ + "Review the recent code change.".to_string(), + "Return ONLY JSON with this exact shape:".to_string(), + r#"{"summary":"short summary","findings":[{"severity":"P0|P1|P2|P3","title":"short title","body":"one paragraph","file":"optional path"}]}"#.to_string(), + "Report only concrete bugs, regressions, security issues, or missing edge cases. If there are no real findings, return an empty findings array.".to_string(), + format!( + "Verification gate passed: {} (attempted: {}).", + summary.verification_gate.passed, summary.verification_gate.attempted + ), + format!( + "Changed files: {}. Total changed lines: {}. Verified roots: {}.", + stats.files.len(), + stats.total_changed_lines, + stats.verified_roots.len() + ), + String::new(), + "Diff summary:".to_string(), + ]; + + for file in &stats.files { + lines.push(format!( + "- {} (+{}, -{})", + file.path, file.added_lines, file.deleted_lines + )); + for preview in file.preview_lines.iter().take(4) { + lines.push(format!(" {preview}")); + } + } + + if !summary.verification_reports.is_empty() { + lines.push(String::new()); + lines.push("Verification summary:".to_string()); + for report in summary.verification_reports.iter().rev().take(6) { + lines.push(format!( + "- {} {} {}", + report.adapter_id, + report.phase.as_str(), + report.summary_text + )); + } + } + + let mut prompt = lines.join("\n"); + if prompt.chars().count() > 6_000 { + prompt = prompt.chars().take(6_000).collect(); + } + prompt +} + +fn critic_model_for(current_model: &str) -> String { + env::var("CLAUDE_CODE_CRITIC_MODEL").unwrap_or_else(|_| current_model.to_string()) +} + +fn blocking_findings(findings: &[ReviewFinding]) -> Vec { + findings + .iter() + .filter(|finding| matches!(finding.severity.as_str(), "P0" | "P1")) + .cloned() + .collect() +} + +fn build_critic_fix_prompt(findings: &[ReviewFinding]) -> String { + let mut lines = vec![ + "A post-change code review found blocking issues. Fix only the blocking findings below, preserve existing behavior unless the finding requires change, and rerun the minimal verification needed.".to_string(), + String::new(), + "Blocking findings:".to_string(), + ]; + for finding in findings { + lines.push(format!( + "- {} {}{}", + finding.severity, + finding.title, + finding + .file + .as_deref() + .map(|file| format!(" ({file})")) + .unwrap_or_default() + )); + lines.push(format!(" {}", finding.body)); + } + lines.join("\n") +} + +fn append_review_note(summary: &mut runtime::TurnSummary, review: &ReviewOutcome) { + use std::fmt::Write as _; + if review.findings.is_empty() { + return; + } + let mut note = format!("\n\nReview notes: {}\n", review.summary); + for finding in &review.findings { + let file_suffix = finding + .file + .as_deref() + .map(|file| format!(" ({file})")) + .unwrap_or_default(); + let _ = writeln!( + note, + "- {} {}{}: {}", + finding.severity, finding.title, file_suffix, finding.body + ); + } + + if let Some(message) = summary.assistant_messages.last_mut() { + message.blocks.push(ContentBlock::Text { text: note }); + } else { + summary.assistant_messages.push(ConversationMessage { + role: MessageRole::Assistant, + blocks: vec![ContentBlock::Text { text: note }], + usage: None, + }); + } +} + +fn merge_turn_summaries( + mut initial: runtime::TurnSummary, + follow_up: runtime::TurnSummary, +) -> runtime::TurnSummary { + initial + .assistant_messages + .extend(follow_up.assistant_messages); + initial.tool_results.extend(follow_up.tool_results); + initial + .verification_reports + .extend(follow_up.verification_reports); + initial.verification_gate = follow_up.verification_gate; + initial + .prompt_cache_events + .extend(follow_up.prompt_cache_events); + initial.iterations += follow_up.iterations; + initial.usage = TokenUsage { + input_tokens: initial.usage.input_tokens + follow_up.usage.input_tokens, + output_tokens: initial.usage.output_tokens + follow_up.usage.output_tokens, + cache_creation_input_tokens: initial.usage.cache_creation_input_tokens + + follow_up.usage.cache_creation_input_tokens, + cache_read_input_tokens: initial.usage.cache_read_input_tokens + + follow_up.usage.cache_read_input_tokens, + }; + initial.auto_compaction = follow_up.auto_compaction.or(initial.auto_compaction); + initial +} + +#[allow(clippy::too_many_lines)] +fn maybe_write_auto_skill_draft( + summary: &runtime::TurnSummary, +) -> Result, Box> { + let enabled = env::var("CLAUDE_CODE_AUTO_SKILLS").is_ok_and(|value| value.trim() == "1"); + if !enabled { + return Ok(None); + } + + let first_green_index = summary + .verification_reports + .iter() + .position(|report| report.phase == runtime::VerificationPhase::Final && report.is_success()) + .unwrap_or(summary.verification_reports.len()); + let failures_before_green = summary + .verification_reports + .iter() + .take(first_green_index) + .filter(|report| !report.is_success()) + .count(); + if summary.iterations < AUTO_SKILL_MIN_ITERATIONS + || failures_before_green < AUTO_SKILL_MIN_FAILURES + { + return Ok(None); + } + + let cwd = env::current_dir()?; + let mut root_counts = BTreeMap::::new(); + for report in &summary.verification_reports { + let root = report.project_root.display().to_string(); + if !root.is_empty() { + *root_counts.entry(root).or_default() += 1; + } + } + let Some((dominant_root, _)) = root_counts.into_iter().max_by_key(|(_, count)| *count) else { + return Ok(None); + }; + + let adapter_ids = summary + .verification_reports + .iter() + .map(|report| report.adapter_id.clone()) + .collect::>() + .into_iter() + .collect::>(); + let failure_kinds = summary + .verification_reports + .iter() + .filter_map(runtime::VerificationReport::primary_failure_kind) + .map(|kind| kind.as_str().to_string()) + .collect::>() + .into_iter() + .collect::>(); + let touched_files = summary + .verification_reports + .iter() + .flat_map(|report| report.touched_paths.iter()) + .filter_map(|path| path.strip_prefix(&cwd).ok().or(Some(path.as_path()))) + .map(|path| sanitize_skill_text(&path.display().to_string())) + .collect::>() + .into_iter() + .take(12) + .collect::>(); + + let root_name = Path::new(&dominant_root) + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or("workspace"); + let adapter_slug = adapter_ids + .first() + .map(|adapter| slugify_token(adapter)) + .filter(|slug| !slug.is_empty()) + .unwrap_or_else(|| String::from("mixed")); + let skill_slug = format!("repair-{}-{}", slugify_token(root_name), adapter_slug); + let skill_dir = cwd + .join(".claude") + .join("skills") + .join("generated") + .join(&skill_slug); + if skill_dir.exists() { + return Ok(None); + } + fs::create_dir_all(&skill_dir)?; + + let skill_name = format!("repair-{}-{}", sanitize_skill_text(root_name), adapter_slug); + let markdown = build_auto_skill_markdown( + &skill_name, + &adapter_ids, + &failure_kinds, + &touched_files, + failures_before_green, + summary.iterations, + ); + fs::write(skill_dir.join("SKILL.md"), markdown)?; + fs::write( + skill_dir.join("meta.json"), + serde_json::to_string_pretty(&json!({ + "status": "quarantined", + "confidence": "draft", + "requires_human_approval": true, + "promotion": { + "required_fixtures": 3, + "max_token_regression_pct": 10 + }, + "source_episode": { + "iterations": summary.iterations, + "failures_before_first_green": failures_before_green, + "adapters": adapter_ids, + "failure_kinds": failure_kinds, + "dominant_root": sanitize_skill_text(root_name), + "touched_files": touched_files + } + }))?, + )?; + + Ok(Some(skill_dir)) +} + +fn build_auto_skill_markdown( + skill_name: &str, + adapter_ids: &[String], + failure_kinds: &[String], + touched_files: &[String], + failures_before_green: usize, + iterations: usize, +) -> String { + let lines = vec![ + "---".to_string(), + format!("name: {skill_name}"), + "status: quarantined".to_string(), + "source: auto-generated".to_string(), + "---".to_string(), + String::new(), + "# Auto-generated Skill Draft".to_string(), + String::new(), + "This draft was generated from normalized verification metadata only. Human review is required before promotion.".to_string(), + String::new(), + "## Trigger".to_string(), + format!( + "- Repeated verification failures before green: {failures_before_green} across {iterations} iterations." + ), + format!("- Adapters involved: {}.", adapter_ids.join(", ")), + format!("- Failure kinds observed: {}.", failure_kinds.join(", ")), + String::new(), + "## Workflow".to_string(), + "1. Reproduce the failing verifier step in the smallest change-scoped target available.".to_string(), + "2. Inspect the touched files and adjacent tests before broadening scope.".to_string(), + "3. Fix the highest-confidence code or config fault first.".to_string(), + "4. Re-run quick verification before broader final-gate checks.".to_string(), + String::new(), + "## Validation".to_string(), + format!("- Prefer targeted adapter commands for: {}.", adapter_ids.join(", ")), + format!("- Re-check touched files: {}.", touched_files.join(", ")), + String::new(), + "## Safety".to_string(), + "- Do not promote this draft automatically.".to_string(), + "- Keep generated guidance limited to normalized metadata; do not paste raw tool output.".to_string(), + ]; + lines.join("\n") +} + +/// Outcome of replaying a generated auto-skill against a stored fixture. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct AutoSkillFixtureResult { + pub passed: bool, +} + +/// Errors from [`promote_auto_skill`]. Deliberately explicit so CI can report. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum AutoSkillPromotionError { + NotEnoughFixtures { have: usize, need: usize }, + FixtureFailed { index: usize }, + HumanApprovalMissing, + TokenRegressionExceeded { delta_pct: i64, limit_pct: i64 }, + MetaRead(String), + MetaWrite(String), +} + +impl std::fmt::Display for AutoSkillPromotionError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NotEnoughFixtures { have, need } => { + write!(f, "auto-skill requires {need} fixture replays, got {have}") + } + Self::FixtureFailed { index } => { + write!(f, "auto-skill fixture #{index} failed during replay") + } + Self::HumanApprovalMissing => { + write!( + f, + "auto-skill requires explicit human approval before promotion" + ) + } + Self::TokenRegressionExceeded { + delta_pct, + limit_pct, + } => write!( + f, + "auto-skill token regression {delta_pct}% exceeds {limit_pct}% limit" + ), + Self::MetaRead(msg) => write!(f, "cannot read meta.json: {msg}"), + Self::MetaWrite(msg) => write!(f, "cannot update meta.json: {msg}"), + } + } +} + +impl std::error::Error for AutoSkillPromotionError {} + +pub const AUTO_SKILL_REQUIRED_FIXTURES: usize = 3; +pub const AUTO_SKILL_MAX_TOKEN_REGRESSION_PCT: i64 = 10; + +/// Promote a quarantined auto-skill draft to `status: active` after gating: +/// * every fixture in `fixtures` must have `passed = true`, +/// * there must be at least [`AUTO_SKILL_REQUIRED_FIXTURES`] of them, +/// * `human_approved` must be explicitly true, +/// * tokens spent with the skill must not regress more than +/// [`AUTO_SKILL_MAX_TOKEN_REGRESSION_PCT`]% vs. the baseline. +/// +/// On success, updates `meta.json` to `status: active` and returns the path. +pub fn promote_auto_skill( + skill_dir: &Path, + fixtures: &[AutoSkillFixtureResult], + baseline_tokens: u64, + current_tokens: u64, + human_approved: bool, +) -> Result { + if fixtures.len() < AUTO_SKILL_REQUIRED_FIXTURES { + return Err(AutoSkillPromotionError::NotEnoughFixtures { + have: fixtures.len(), + need: AUTO_SKILL_REQUIRED_FIXTURES, + }); + } + for (index, fixture) in fixtures.iter().enumerate() { + if !fixture.passed { + return Err(AutoSkillPromotionError::FixtureFailed { index }); + } + } + if !human_approved { + return Err(AutoSkillPromotionError::HumanApprovalMissing); + } + if baseline_tokens > 0 { + let delta = i128::from(current_tokens) - i128::from(baseline_tokens); + let delta_pct = (delta * 100) / i128::from(baseline_tokens); + if delta_pct > i128::from(AUTO_SKILL_MAX_TOKEN_REGRESSION_PCT) { + return Err(AutoSkillPromotionError::TokenRegressionExceeded { + delta_pct: i64::try_from(delta_pct).unwrap_or(i64::MAX), + limit_pct: AUTO_SKILL_MAX_TOKEN_REGRESSION_PCT, + }); + } + } + + let meta_path = skill_dir.join("meta.json"); + let raw = fs::read_to_string(&meta_path) + .map_err(|error| AutoSkillPromotionError::MetaRead(error.to_string()))?; + let mut meta: serde_json::Value = serde_json::from_str(&raw) + .map_err(|error| AutoSkillPromotionError::MetaRead(error.to_string()))?; + if let Some(obj) = meta.as_object_mut() { + obj.insert("status".to_string(), json!("active")); + obj.insert("requires_human_approval".to_string(), json!(false)); + obj.insert( + "promoted_at_unix_secs".to_string(), + json!(std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or_default()), + ); + obj.insert( + "promotion_evidence".to_string(), + json!({ + "fixtures_passed": fixtures.len(), + "baseline_tokens": baseline_tokens, + "current_tokens": current_tokens, + "human_approved": human_approved, + }), + ); + } + fs::write( + &meta_path, + serde_json::to_string_pretty(&meta) + .map_err(|error| AutoSkillPromotionError::MetaWrite(error.to_string()))?, + ) + .map_err(|error| AutoSkillPromotionError::MetaWrite(error.to_string()))?; + Ok(skill_dir.to_path_buf()) +} + +/// Parameters for [`run_promote_auto_skill_cli`]. Kept as a plain struct so +/// both the subcommand dispatcher and integration tests can drive it. +#[derive(Debug, Clone)] +pub struct PromoteAutoSkillArgs { + /// Path to the quarantined skill directory (`.claude/skills/generated//`). + pub skill_dir: PathBuf, + /// JSON file: array of `{ "passed": bool }` objects, one per replay fixture. + pub fixtures_json: PathBuf, + pub baseline_tokens: u64, + pub current_tokens: u64, + pub approved: bool, +} + +pub fn run_promote_auto_skill_cli( + args: &PromoteAutoSkillArgs, +) -> Result> { + let raw = fs::read_to_string(&args.fixtures_json)?; + let entries: Vec = serde_json::from_str(&raw)?; + let fixtures: Vec = entries + .into_iter() + .map(|entry| AutoSkillFixtureResult { + passed: entry + .get("passed") + .and_then(serde_json::Value::as_bool) + .unwrap_or(false), + }) + .collect(); + + let path = promote_auto_skill( + &args.skill_dir, + &fixtures, + args.baseline_tokens, + args.current_tokens, + args.approved, + )?; + Ok(path) +} + +fn sanitize_skill_text(value: &str) -> String { + value + .chars() + .map(|ch| { + if ch.is_ascii_alphanumeric() || matches!(ch, '/' | '\\' | '-' | '_' | '.' | ' ') { + ch + } else { + ' ' + } + }) + .collect::() + .split_whitespace() + .collect::>() + .join(" ") +} + +fn slugify_token(value: &str) -> String { + let mut slug = value + .chars() + .map(|ch| { + if ch.is_ascii_alphanumeric() { + ch.to_ascii_lowercase() + } else { + '-' + } + }) + .collect::(); + while slug.contains("--") { + slug = slug.replace("--", "-"); + } + slug.trim_matches('-').to_string() +} + fn print_buffered_turn_summary( summary: &runtime::TurnSummary, ) -> Result<(), Box> { @@ -7356,6 +8018,7 @@ fn collect_verification_reports(summary: &runtime::TurnSummary) -> Vec>(), "status": report.status.as_str(), "summary_text": report.summary_text.clone(), + "primary_failure_kind": report.primary_failure_kind().map(runtime::VerificationFailureKind::as_str), "steps": report.steps.iter().map(|step| json!({ "adapter": step.adapter.clone(), "project_root": step.project_root.display().to_string(), @@ -7366,6 +8029,11 @@ fn collect_verification_reports(summary: &runtime::TurnSummary) -> Vec>(), }) }) @@ -11986,8 +12654,15 @@ fn write_mcp_server_fixture(script_path: &Path) { #[cfg(test)] mod sandbox_report_tests { - use super::{format_sandbox_report, HookAbortMonitor}; - use runtime::HookAbortSignal; + use super::{ + collect_turn_change_stats, format_sandbox_report, maybe_write_auto_skill_draft, + promote_auto_skill, run_promote_auto_skill_cli, should_run_critic, AutoSkillFixtureResult, + AutoSkillPromotionError, HookAbortMonitor, PromoteAutoSkillArgs, + AUTO_SKILL_MAX_TOKEN_REGRESSION_PCT, AUTO_SKILL_REQUIRED_FIXTURES, + }; + use runtime::{ContentBlock, ConversationMessage, HookAbortSignal, MessageRole, TokenUsage}; + use serde_json::json; + use std::path::Path; use std::sync::mpsc; use std::time::Duration; @@ -12019,6 +12694,324 @@ mod sandbox_report_tests { assert!(!abort_signal.is_aborted()); } + fn sample_turn_summary() -> runtime::TurnSummary { + runtime::TurnSummary { + assistant_messages: vec![ConversationMessage { + role: MessageRole::Assistant, + blocks: vec![ContentBlock::Text { + text: "done".to_string(), + }], + usage: None, + }], + tool_results: vec![ConversationMessage { + role: MessageRole::User, + blocks: vec![ContentBlock::ToolResult { + tool_use_id: "tool-1".to_string(), + tool_name: "write_file".to_string(), + output: json!({ + "filePath": "src/lib.rs", + "structuredPatch": [{ + "lines": ["-old", "+new", "+extra"] + }] + }) + .to_string(), + is_error: false, + }], + usage: None, + }], + verification_reports: vec![], + verification_gate: runtime::VerificationGateStatus { + attempted: true, + passed: true, + report_ids: vec!["report-1".to_string()], + }, + prompt_cache_events: vec![], + iterations: 1, + usage: TokenUsage::default(), + auto_compaction: None, + } + } + + fn sample_verification_report( + report_id: &str, + phase: runtime::VerificationPhase, + status: runtime::VerificationStatus, + project_root: &Path, + ) -> runtime::VerificationReport { + runtime::VerificationReport { + report_id: report_id.to_string(), + phase, + adapter_id: "rust-cargo".to_string(), + project_root: project_root.to_path_buf(), + touched_paths: vec![project_root.join("src/lib.rs")], + status, + summary_text: "report".to_string(), + steps: vec![], + } + } + + #[test] + fn collect_turn_change_stats_counts_changed_files_and_lines() { + let summary = sample_turn_summary(); + let stats = collect_turn_change_stats(&summary); + + assert_eq!(stats.files.len(), 1); + assert_eq!(stats.files[0].path, "src/lib.rs"); + assert_eq!(stats.files[0].added_lines, 2); + assert_eq!(stats.files[0].deleted_lines, 1); + assert_eq!(stats.total_changed_lines, 3); + } + + #[test] + fn should_run_critic_requires_green_nontrivial_turn() { + let mut summary = sample_turn_summary(); + summary.tool_results = (0..4) + .map(|index| ConversationMessage { + role: MessageRole::User, + blocks: vec![ContentBlock::ToolResult { + tool_use_id: format!("tool-{index}"), + tool_name: "write_file".to_string(), + output: json!({ + "filePath": format!("src/file-{index}.rs"), + "structuredPatch": [{"lines": ["-old", "+new"]}] + }) + .to_string(), + is_error: false, + }], + usage: None, + }) + .collect(); + let stats = collect_turn_change_stats(&summary); + assert!(should_run_critic(&summary, &stats)); + + summary.verification_gate.passed = false; + assert!(!should_run_critic(&summary, &stats)); + } + + #[test] + fn auto_skill_draft_writes_quarantined_skill_when_enabled() { + let original_dir = std::env::current_dir().expect("cwd"); + let original_flag = std::env::var("CLAUDE_CODE_AUTO_SKILLS").ok(); + let root = std::env::temp_dir().join(format!( + "claw-auto-skill-{}", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("time") + .as_nanos() + )); + std::fs::create_dir_all(root.join("src")).expect("workspace should exist"); + std::env::set_current_dir(&root).expect("set cwd"); + std::env::set_var("CLAUDE_CODE_AUTO_SKILLS", "1"); + + let summary = runtime::TurnSummary { + assistant_messages: vec![], + tool_results: vec![], + verification_reports: vec![ + sample_verification_report( + "report-1", + runtime::VerificationPhase::Quick, + runtime::VerificationStatus::Failed, + &root, + ), + sample_verification_report( + "report-2", + runtime::VerificationPhase::Quick, + runtime::VerificationStatus::Failed, + &root, + ), + sample_verification_report( + "report-3", + runtime::VerificationPhase::Final, + runtime::VerificationStatus::Passed, + &root, + ), + ], + verification_gate: runtime::VerificationGateStatus { + attempted: true, + passed: true, + report_ids: vec!["report-3".to_string()], + }, + prompt_cache_events: vec![], + iterations: 3, + usage: TokenUsage::default(), + auto_compaction: None, + }; + + let generated = maybe_write_auto_skill_draft(&summary).expect("skill draft should write"); + let skill_dir = generated.expect("skill draft path"); + assert!(skill_dir.join("SKILL.md").is_file()); + assert!(skill_dir.join("meta.json").is_file()); + + std::env::set_current_dir(original_dir).expect("restore cwd"); + match original_flag { + Some(value) => std::env::set_var("CLAUDE_CODE_AUTO_SKILLS", value), + None => std::env::remove_var("CLAUDE_CODE_AUTO_SKILLS"), + } + let _ = std::fs::remove_dir_all(root); + } + + fn write_quarantined_skill(dir: &Path) { + std::fs::create_dir_all(dir).expect("skill dir"); + std::fs::write(dir.join("SKILL.md"), "# draft").expect("SKILL.md"); + let meta = json!({ + "status": "quarantined", + "requires_human_approval": true, + "promotion": { + "required_fixtures": AUTO_SKILL_REQUIRED_FIXTURES, + "max_token_regression_pct": AUTO_SKILL_MAX_TOKEN_REGRESSION_PCT + } + }); + std::fs::write( + dir.join("meta.json"), + serde_json::to_string_pretty(&meta).unwrap(), + ) + .expect("meta.json"); + } + + fn promo_tmpdir(label: &str) -> std::path::PathBuf { + let path = std::env::temp_dir().join(format!( + "claw-promo-{label}-{}", + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .expect("time") + .as_nanos() + )); + std::fs::create_dir_all(&path).expect("promo dir"); + path + } + + #[test] + fn promote_auto_skill_requires_three_fixtures() { + let dir = promo_tmpdir("few-fixtures"); + write_quarantined_skill(&dir); + let fixtures = vec![AutoSkillFixtureResult { passed: true }]; + let err = promote_auto_skill(&dir, &fixtures, 1000, 1000, true).expect_err("should fail"); + assert!(matches!( + err, + AutoSkillPromotionError::NotEnoughFixtures { have: 1, need: 3 } + )); + let _ = std::fs::remove_dir_all(dir); + } + + #[test] + fn promote_auto_skill_fails_when_any_fixture_fails() { + let dir = promo_tmpdir("fixture-fail"); + write_quarantined_skill(&dir); + let fixtures = vec![ + AutoSkillFixtureResult { passed: true }, + AutoSkillFixtureResult { passed: false }, + AutoSkillFixtureResult { passed: true }, + ]; + let err = promote_auto_skill(&dir, &fixtures, 1000, 1000, true).expect_err("should fail"); + assert!(matches!( + err, + AutoSkillPromotionError::FixtureFailed { index: 1 } + )); + let _ = std::fs::remove_dir_all(dir); + } + + #[test] + fn promote_auto_skill_requires_human_approval() { + let dir = promo_tmpdir("no-approval"); + write_quarantined_skill(&dir); + let fixtures = vec![AutoSkillFixtureResult { passed: true }; 3]; + let err = promote_auto_skill(&dir, &fixtures, 1000, 1000, false).expect_err("should fail"); + assert!(matches!(err, AutoSkillPromotionError::HumanApprovalMissing)); + let _ = std::fs::remove_dir_all(dir); + } + + #[test] + fn promote_auto_skill_enforces_10pct_token_budget() { + let dir = promo_tmpdir("token-regress"); + write_quarantined_skill(&dir); + let fixtures = vec![AutoSkillFixtureResult { passed: true }; 3]; + let err = promote_auto_skill(&dir, &fixtures, 1000, 1200, true).expect_err("should fail"); + match err { + AutoSkillPromotionError::TokenRegressionExceeded { + delta_pct, + limit_pct, + } => { + assert_eq!(delta_pct, 20); + assert_eq!(limit_pct, AUTO_SKILL_MAX_TOKEN_REGRESSION_PCT); + } + other => panic!("unexpected error: {other:?}"), + } + let _ = std::fs::remove_dir_all(dir); + } + + #[test] + fn promote_auto_skill_activates_meta_on_success() { + let dir = promo_tmpdir("promote-ok"); + write_quarantined_skill(&dir); + let fixtures = vec![AutoSkillFixtureResult { passed: true }; 3]; + let promoted = + promote_auto_skill(&dir, &fixtures, 1000, 1050, true).expect("should promote"); + let meta_raw = std::fs::read_to_string(promoted.join("meta.json")).expect("meta readable"); + let meta: serde_json::Value = serde_json::from_str(&meta_raw).expect("meta json"); + assert_eq!(meta["status"], "active"); + assert_eq!(meta["requires_human_approval"], false); + assert_eq!(meta["promotion_evidence"]["fixtures_passed"], 3); + let _ = std::fs::remove_dir_all(dir); + } + + #[test] + fn run_promote_auto_skill_cli_reads_fixtures_and_promotes() { + let dir = promo_tmpdir("cli-wrapper"); + write_quarantined_skill(&dir); + let fixtures_path = dir.join("fixtures.json"); + std::fs::write( + &fixtures_path, + serde_json::to_string(&json!([ + { "passed": true }, + { "passed": true }, + { "passed": true } + ])) + .unwrap(), + ) + .expect("fixtures.json"); + let args = PromoteAutoSkillArgs { + skill_dir: dir.clone(), + fixtures_json: fixtures_path, + baseline_tokens: 1_000, + current_tokens: 1_050, + approved: true, + }; + let promoted = run_promote_auto_skill_cli(&args).expect("cli wrapper should promote"); + assert_eq!(promoted, dir); + let meta: serde_json::Value = + serde_json::from_str(&std::fs::read_to_string(dir.join("meta.json")).expect("meta")) + .expect("meta json"); + assert_eq!(meta["status"], "active"); + let _ = std::fs::remove_dir_all(dir); + } + + #[test] + fn run_promote_auto_skill_cli_surfaces_fixture_failures() { + let dir = promo_tmpdir("cli-wrapper-fail"); + write_quarantined_skill(&dir); + let fixtures_path = dir.join("fixtures.json"); + std::fs::write( + &fixtures_path, + serde_json::to_string(&json!([ + { "passed": true }, + { "passed": false }, + { "passed": true } + ])) + .unwrap(), + ) + .expect("fixtures.json"); + let args = PromoteAutoSkillArgs { + skill_dir: dir.clone(), + fixtures_json: fixtures_path, + baseline_tokens: 1_000, + current_tokens: 1_000, + approved: true, + }; + let err = run_promote_auto_skill_cli(&args).expect_err("should fail"); + assert!(err.to_string().contains("fixture #1")); + let _ = std::fs::remove_dir_all(dir); + } + #[test] fn hook_abort_monitor_propagates_interrupt() { let abort_signal = HookAbortSignal::new(); diff --git a/rust/crates/rusty-claude-cli/tests/mock_quality_harness.rs b/rust/crates/rusty-claude-cli/tests/mock_quality_harness.rs new file mode 100644 index 0000000000..11d1deb36f --- /dev/null +++ b/rust/crates/rusty-claude-cli/tests/mock_quality_harness.rs @@ -0,0 +1,1025 @@ +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::{Command, Output}; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::time::{SystemTime, UNIX_EPOCH}; + +#[cfg(unix)] +use std::os::unix::fs::PermissionsExt; + +use mock_anthropic_service::{MockAnthropicService, SCENARIO_PREFIX}; +use serde_json::{json, Value}; + +static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0); + +#[cfg(unix)] +fn make_executable(path: &Path) { + let mut permissions = fs::metadata(path).expect("script metadata").permissions(); + permissions.set_mode(0o755); + fs::set_permissions(path, permissions).expect("script should be executable"); +} + +#[cfg(not(unix))] +fn make_executable(path: &Path) { + let _ = path; +} + +fn program_name(stem: &str) -> String { + if cfg!(windows) { + format!("{stem}.cmd") + } else { + stem.to_string() + } +} + +fn write_program_stub(dir: &Path, stem: &str, unix_body: &str, windows_ps1: &str) { + if cfg!(windows) { + let ps1_path = dir.join(format!("{stem}.ps1")); + fs::write(&ps1_path, windows_ps1).expect("powershell stub should write"); + let wrapper = format!( + "@echo off\r\npowershell -NoProfile -ExecutionPolicy Bypass -File \"%~dp0\\{stem}.ps1\" %*\r\n" + ); + let cmd_path = dir.join(program_name(stem)); + fs::write(&cmd_path, wrapper).expect("cmd wrapper should write"); + } else { + let script_path = dir.join(program_name(stem)); + fs::write(&script_path, unix_body).expect("shell stub should write"); + make_executable(&script_path); + } +} + +fn configure_quality_process_env(command: &mut Command, workspace: &HarnessWorkspace) { + if cfg!(windows) { + command.env("USERPROFILE", &workspace.home); + for key in [ + "SystemRoot", + "ComSpec", + "PATHEXT", + "TEMP", + "TMP", + "CARGO_HOME", + "RUSTUP_HOME", + "VCINSTALLDIR", + "VCToolsInstallDir", + "VCToolsVersion", + "VSINSTALLDIR", + "VisualStudioVersion", + "WindowsSdkDir", + "WindowsSDKVersion", + "UniversalCRTSdkDir", + "UCRTVersion", + "INCLUDE", + "LIB", + "LIBPATH", + ] { + if let Ok(value) = std::env::var(key) { + command.env(key, value); + } + } + let mut paths = vec![workspace.bin.display().to_string()]; + if let Ok(path) = std::env::var("PATH") { + paths.push(path); + } + command.env("PATH", paths.join(";")); + } else { + let path = std::env::var("PATH").unwrap_or_else(|_| "/usr/bin:/bin".to_string()); + command.env("PATH", format!("{}:{path}", workspace.bin.display())); + for key in ["CARGO_HOME", "RUSTUP_HOME"] { + if let Ok(value) = std::env::var(key) { + command.env(key, value); + } + } + } +} + +fn run_quality_case(case: ScenarioCase) { + let runtime = tokio::runtime::Runtime::new().expect("tokio runtime should build"); + let server = runtime + .block_on(MockAnthropicService::spawn()) + .expect("mock service should start"); + let base_url = server.base_url(); + let workspace = HarnessWorkspace::new(unique_temp_dir(case.name)); + workspace.create().expect("workspace should exist"); + (case.prepare)(&workspace); + + let run = match run_case(case, &workspace, &base_url) { + Ok(run) => run, + Err(output) => { + let captured = runtime.block_on(server.captured_requests()); + let messages_only = captured + .iter() + .filter(|request| request.path == "/v1/messages") + .collect::>(); + let last_request = messages_only + .last() + .map_or("", |request| { + request.raw_body.as_str() + }); + panic!( + "case {} failed\nmessages requests: {}\nstdout:\n{}\n\nstderr:\n{}\n\nlast request body:\n{}", + case.name, + messages_only.len(), + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr), + last_request + ); + } + }; + (case.assert)(&workspace, &run); + + fs::remove_dir_all(&workspace.root).expect("workspace cleanup should succeed"); +} + +#[test] +fn quality_harness_rust_red_green() { + run_quality_case(ScenarioCase { + name: "rust_red_green", + prepare: prepare_rust_red_green_fixture, + assert: assert_rust_red_green, + }); +} + +#[test] +fn quality_harness_node_red_green() { + run_quality_case(ScenarioCase { + name: "node_red_green", + prepare: prepare_node_red_green_fixture, + assert: assert_node_red_green, + }); +} + +#[test] +fn quality_harness_python_red_green() { + run_quality_case(ScenarioCase { + name: "python_red_green", + prepare: prepare_python_red_green_fixture, + assert: assert_python_red_green, + }); +} + +#[test] +fn quality_harness_rust_config_failure() { + run_quality_case(ScenarioCase { + name: "rust_config_failure", + prepare: prepare_rust_config_failure_fixture, + assert: assert_rust_config_failure, + }); +} + +#[test] +fn quality_harness_node_tool_unavailable() { + run_quality_case(ScenarioCase { + name: "node_tool_unavailable", + prepare: prepare_node_tool_unavailable_fixture, + assert: assert_node_tool_unavailable, + }); +} + +#[test] +fn quality_harness_python_timeout() { + run_quality_case(ScenarioCase { + name: "python_timeout", + prepare: prepare_python_timeout_fixture, + assert: assert_python_timeout, + }); +} + +#[test] +fn quality_harness_rust_final_gate_retry() { + run_quality_case(ScenarioCase { + name: "rust_final_gate_retry", + prepare: prepare_rust_final_gate_retry_fixture, + assert: assert_rust_final_gate_retry, + }); +} + +#[derive(Clone, Copy)] +struct ScenarioCase { + name: &'static str, + prepare: fn(&HarnessWorkspace), + assert: fn(&HarnessWorkspace, &ScenarioRun), +} + +struct HarnessWorkspace { + root: PathBuf, + config_home: PathBuf, + home: PathBuf, + bin: PathBuf, +} + +impl HarnessWorkspace { + fn new(root: PathBuf) -> Self { + Self { + config_home: root.join("config-home"), + home: root.join("home"), + bin: root.join("bin"), + root, + } + } + + fn create(&self) -> std::io::Result<()> { + fs::create_dir_all(&self.root)?; + fs::create_dir_all(&self.config_home)?; + fs::create_dir_all(&self.home)?; + fs::create_dir_all(&self.bin)?; + Ok(()) + } +} + +struct ScenarioRun { + response: Value, +} + +fn run_case( + case: ScenarioCase, + workspace: &HarnessWorkspace, + base_url: &str, +) -> Result { + let mut command = Command::new(env!("CARGO_BIN_EXE_claw")); + command + .current_dir(&workspace.root) + .env_clear() + .env("ANTHROPIC_API_KEY", "test-quality-key") + .env("ANTHROPIC_BASE_URL", base_url) + .env("CLAW_CONFIG_HOME", &workspace.config_home) + .env("HOME", &workspace.home) + .env("NO_COLOR", "1") + .args([ + "--model", + "sonnet", + "--permission-mode", + "workspace-write", + "--allowedTools", + "write_file", + "--output-format=json", + ]); + configure_quality_process_env(&mut command, workspace); + + let prompt = format!("{SCENARIO_PREFIX}{}", case.name); + command.arg(prompt); + + let output = command.output().expect("claw should launch"); + if !output.status.success() { + return Err(output); + } + Ok(ScenarioRun { + response: parse_json_output(&String::from_utf8_lossy(&output.stdout)), + }) +} + +fn write_verifier_settings(workspace: &HarnessWorkspace, settings: &Value) { + fs::write( + workspace.config_home.join("settings.json"), + settings.to_string(), + ) + .expect("settings should write"); +} + +fn rust_verifier_settings(final_gate: bool) -> Value { + json!({ + "verifier": { + "enabled": true, + "mode": "staged", + "finalGate": final_gate, + "cargo": { + "check": true, + "clippy": false, + "fmt": true, + "test": false, + "timeoutSecs": 60 + }, + "node": { "enabled": false }, + "python": { "enabled": false } + } + }) +} + +fn node_verifier_settings(final_gate: bool) -> Value { + json!({ + "verifier": { + "enabled": true, + "mode": "staged", + "finalGate": final_gate, + "cargo": { + "check": false, + "clippy": false, + "fmt": false, + "test": false, + "timeoutSecs": 5 + }, + "node": { + "enabled": true, + "timeoutSecs": 10 + }, + "python": { "enabled": false } + } + }) +} + +fn python_verifier_settings(final_gate: bool, timeout_secs: u64) -> Value { + json!({ + "verifier": { + "enabled": true, + "mode": "staged", + "finalGate": final_gate, + "cargo": { + "check": false, + "clippy": false, + "fmt": false, + "test": false, + "timeoutSecs": 5 + }, + "node": { "enabled": false }, + "python": { + "enabled": true, + "timeoutSecs": timeout_secs + } + } + }) +} + +fn prepare_rust_workspace(workspace: &HarnessWorkspace) { + fs::create_dir_all(workspace.root.join("crates").join("app").join("src")) + .expect("rust src dir should exist"); + fs::write( + workspace.root.join("Cargo.toml"), + "[workspace]\nmembers = [\"crates/app\"]\nresolver = \"2\"\n", + ) + .expect("workspace cargo manifest should write"); + fs::write( + workspace.root.join("crates").join("app").join("Cargo.toml"), + "[package]\nname = \"demo_app\"\nversion = \"0.1.0\"\nedition = \"2021\"\n", + ) + .expect("package cargo manifest should write"); + fs::write( + workspace + .root + .join("crates") + .join("app") + .join("src") + .join("lib.rs"), + "pub fn answer() -> usize {\n 0\n}\n", + ) + .expect("rust source should write"); +} + +fn prepare_rust_red_green_fixture(workspace: &HarnessWorkspace) { + prepare_rust_workspace(workspace); + write_verifier_settings(workspace, &rust_verifier_settings(true)); +} + +fn prepare_rust_final_gate_retry_fixture(workspace: &HarnessWorkspace) { + prepare_rust_workspace(workspace); + write_verifier_settings(workspace, &rust_verifier_settings(true)); +} + +fn prepare_rust_config_failure_fixture(workspace: &HarnessWorkspace) { + fs::create_dir_all(workspace.root.join("src")).expect("rust src dir should exist"); + fs::write( + workspace.root.join("Cargo.toml"), + "[package]\nname = \"config_demo\"\nversion = \"0.1.0\"\nedition = \"2021\"\n", + ) + .expect("cargo manifest should write"); + fs::write( + workspace.root.join("src").join("lib.rs"), + "pub fn answer() -> usize {\n 0\n}\n", + ) + .expect("rust source should write"); + write_verifier_settings(workspace, &rust_verifier_settings(false)); +} + +fn node_stub_unix() -> &'static str { + r#"#!/bin/sh +set -eu +if [ "$#" -lt 2 ] || [ "$1" != "run" ]; then + echo "unsupported npm invocation: $*" >&2 + exit 2 +fi +shift +if [ "$1" = "--silent" ]; then + shift +fi +script="$1" +content="" +if [ -f "src/index.ts" ]; then + content="$(cat "src/index.ts")" +fi +case "$content" in + *TOOL_UNAVAILABLE_SENTINEL*) + echo "npm not found" >&2 + exit 1 + ;; +esac +case "$script" in + typecheck) + case "$content" in + *BROKEN_TYPECHECK*) + echo "src/index.ts(1,14): error TS2322: Type 'number' is not assignable to type 'string'." >&2 + exit 1 + ;; + *) + exit 0 + ;; + esac + ;; + lint|test) + exit 0 + ;; + *) + echo "unsupported npm script: $script" >&2 + exit 2 + ;; +esac +"# +} + +fn node_stub_windows() -> &'static str { + r#"$arguments = @($args) +if ($arguments.Length -lt 2 -or $arguments[0] -ne 'run') { + [Console]::Error.WriteLine("unsupported npm invocation: $($arguments -join ' ')") + exit 2 +} +$index = 1 +if ($arguments.Length -gt 2 -and $arguments[1] -eq '--silent') { + $index = 2 +} +$script = $arguments[$index] +$sourcePath = Join-Path (Get-Location) 'src/index.ts' +$content = if (Test-Path $sourcePath) { Get-Content -LiteralPath $sourcePath -Raw } else { '' } +if ($content -match 'TOOL_UNAVAILABLE_SENTINEL') { + [Console]::Error.WriteLine('npm not found') + exit 1 +} +switch ($script) { + 'typecheck' { + if ($content -match 'BROKEN_TYPECHECK') { + [Console]::Error.WriteLine("src/index.ts(1,14): error TS2322: Type 'number' is not assignable to type 'string'.") + exit 1 + } + exit 0 + } + 'lint' { exit 0 } + 'test' { exit 0 } + default { + [Console]::Error.WriteLine("unsupported npm script: $script") + exit 2 + } +} +"# +} + +fn prepare_node_workspace(workspace: &HarnessWorkspace) { + let package_root = workspace.root.join("packages").join("web"); + fs::create_dir_all(package_root.join("src")).expect("node src dir should exist"); + fs::write( + package_root.join("package.json"), + json!({ + "name": "web-app", + "version": "1.0.0", + "scripts": { + "typecheck": "tsc --noEmit", + "lint": "eslint .", + "test": "vitest run" + } + }) + .to_string(), + ) + .expect("package.json should write"); + fs::write( + package_root.join("src").join("index.ts"), + "export const message = 'seed';\n", + ) + .expect("node source should write"); + write_program_stub(&workspace.bin, "npm", node_stub_unix(), node_stub_windows()); +} + +fn prepare_node_red_green_fixture(workspace: &HarnessWorkspace) { + prepare_node_workspace(workspace); + write_verifier_settings(workspace, &node_verifier_settings(true)); +} + +fn prepare_node_tool_unavailable_fixture(workspace: &HarnessWorkspace) { + prepare_node_workspace(workspace); + write_verifier_settings(workspace, &node_verifier_settings(false)); +} + +fn python_stub_unix() -> &'static str { + r#"#!/bin/sh +set -eu +if [ "$#" -lt 2 ] || [ "$1" != "-m" ]; then + echo "unsupported python invocation: $*" >&2 + exit 2 +fi +module="$2" +content="" +if [ -f "app/main.py" ]; then + content="$(cat "app/main.py")" +fi +case "$content" in + *TIMEOUT_SENTINEL*) + sleep 3 + exit 0 + ;; +esac +case "$module" in + py_compile) + case "$content" in + *BROKEN_PY_COMPILE*) + echo "SyntaxError: invalid syntax" >&2 + exit 1 + ;; + *) + exit 0 + ;; + esac + ;; + pytest) + exit 0 + ;; + *) + echo "unsupported python module: $module" >&2 + exit 2 + ;; +esac +"# +} + +fn python_stub_windows() -> &'static str { + r#"$arguments = @($args) +if ($arguments.Length -lt 2 -or $arguments[0] -ne '-m') { + [Console]::Error.WriteLine("unsupported python invocation: $($arguments -join ' ')") + exit 2 +} +$module = $arguments[1] +$sourcePath = Join-Path (Get-Location) 'app/main.py' +$content = if (Test-Path $sourcePath) { Get-Content -LiteralPath $sourcePath -Raw } else { '' } +if ($content -match 'TIMEOUT_SENTINEL') { + Start-Sleep -Seconds 3 + exit 0 +} +switch ($module) { + 'py_compile' { + if ($content -match 'BROKEN_PY_COMPILE') { + [Console]::Error.WriteLine('SyntaxError: invalid syntax') + exit 1 + } + exit 0 + } + 'pytest' { exit 0 } + default { + [Console]::Error.WriteLine("unsupported python module: $module") + exit 2 + } +} +"# +} + +fn prepare_python_workspace(workspace: &HarnessWorkspace) { + let project_root = workspace.root.join("services").join("api"); + fs::create_dir_all(project_root.join("app")).expect("python app dir should exist"); + fs::create_dir_all(project_root.join("tests")).expect("python tests dir should exist"); + fs::write( + project_root.join("pyproject.toml"), + "[project]\nname = \"quality-api\"\nversion = \"0.1.0\"\n", + ) + .expect("pyproject should write"); + fs::write( + project_root.join("app").join("main.py"), + "def meaning() -> int:\n return 1\n", + ) + .expect("python source should write"); + fs::write( + project_root.join("tests").join("test_smoke.py"), + "def test_smoke():\n assert True\n", + ) + .expect("python test should write"); + write_program_stub( + &workspace.bin, + "python", + python_stub_unix(), + python_stub_windows(), + ); +} + +fn prepare_python_red_green_fixture(workspace: &HarnessWorkspace) { + prepare_python_workspace(workspace); + write_verifier_settings(workspace, &python_verifier_settings(true, 10)); +} + +fn prepare_python_timeout_fixture(workspace: &HarnessWorkspace) { + prepare_python_workspace(workspace); + write_verifier_settings(workspace, &python_verifier_settings(false, 1)); +} + +fn verification_reports(response: &Value) -> &[Value] { + response["verification_reports"] + .as_array() + .expect("verification reports array") +} + +fn report_steps(report: &Value) -> &[Value] { + report["steps"].as_array().expect("report steps array") +} + +fn find_report<'a>(response: &'a Value, phase: &str, status: &str, adapter: &str) -> &'a Value { + verification_reports(response) + .iter() + .find(|report| { + report["phase"] == phase + && report["status"] == status + && report["adapter_id"] == adapter + }) + .unwrap_or_else(|| { + panic!( + "missing report phase={phase} status={status} adapter={adapter}: {}", + serde_json::to_string_pretty(response).expect("response should serialize") + ) + }) +} + +fn has_report_matching(response: &Value, predicate: F) -> bool +where + F: Fn(&Value) -> bool, +{ + verification_reports(response).iter().any(predicate) +} + +fn path_value_ends_with(value: &Value, suffix: &Path) -> bool { + value + .as_str() + .map(PathBuf::from) + .is_some_and(|path| path.ends_with(suffix)) +} + +fn assert_rust_red_green(workspace: &HarnessWorkspace, run: &ScenarioRun) { + assert_eq!(run.response["iterations"], Value::from(3)); + assert_eq!(run.response["tool_uses"].as_array().map(Vec::len), Some(2)); + assert_eq!( + run.response["tool_results"].as_array().map(Vec::len), + Some(2) + ); + assert_eq!( + run.response["verification_gate"]["attempted"], + Value::Bool(true) + ); + assert_eq!( + run.response["verification_gate"]["passed"], + Value::Bool(true) + ); + + let quick_failed = find_report(&run.response, "quick", "failed", "rust-cargo"); + assert_eq!( + quick_failed["primary_failure_kind"], + Value::String("code".to_string()) + ); + assert!(path_value_ends_with( + &quick_failed["project_root"], + Path::new("crates").join("app").as_path() + )); + let quick_step = &report_steps(quick_failed)[0]; + assert!(quick_step["command"] + .as_str() + .is_some_and(|command| command.contains("cargo check") && command.contains("-p demo_app"))); + assert_eq!( + quick_step["step_kind"], + Value::String("cargo_check".to_string()) + ); + assert_eq!( + quick_step["target_scope"], + Value::String("package".to_string()) + ); + assert_eq!( + quick_step["package_name"], + Value::String("demo_app".to_string()) + ); + + let final_passed = find_report(&run.response, "final", "passed", "rust-cargo"); + let final_step = report_steps(final_passed) + .iter() + .find(|step| step["step_kind"] == Value::String("cargo_fmt_check".to_string())) + .expect("cargo fmt step should exist"); + assert_eq!( + final_step["target_scope"], + Value::String("workspace".to_string()) + ); + assert_eq!( + final_step["package_name"], + Value::String("demo_app".to_string()) + ); + assert!(fs::read_to_string( + workspace + .root + .join("crates") + .join("app") + .join("src") + .join("lib.rs") + ) + .expect("rust source should exist") + .contains("42")); + assert!(run.response["message"] + .as_str() + .expect("message text") + .contains("rust quality red-green complete")); +} + +fn assert_node_red_green(workspace: &HarnessWorkspace, run: &ScenarioRun) { + assert_eq!(run.response["iterations"], Value::from(3)); + assert_eq!(run.response["tool_uses"].as_array().map(Vec::len), Some(2)); + assert_eq!( + run.response["verification_gate"]["attempted"], + Value::Bool(true) + ); + assert_eq!( + run.response["verification_gate"]["passed"], + Value::Bool(true) + ); + + let quick_failed = find_report(&run.response, "quick", "failed", "node-typescript"); + assert_eq!( + quick_failed["primary_failure_kind"], + Value::String("code".to_string()) + ); + assert!(path_value_ends_with( + &quick_failed["project_root"], + Path::new("packages").join("web").as_path() + )); + let quick_step = &report_steps(quick_failed)[0]; + assert!(quick_step["command"] + .as_str() + .is_some_and(|command| command.contains("npm run --silent typecheck"))); + assert_eq!( + quick_step["step_kind"], + Value::String("typecheck".to_string()) + ); + assert_eq!( + quick_step["target_scope"], + Value::String("package".to_string()) + ); + assert_eq!( + quick_step["package_manager"], + Value::String("npm".to_string()) + ); + assert_eq!( + quick_step["package_name"], + Value::String("web-app".to_string()) + ); + + let final_passed = find_report(&run.response, "final", "passed", "node-typescript"); + assert!(report_steps(final_passed) + .iter() + .any(|step| step["step_kind"] == Value::String("lint".to_string()))); + assert!(report_steps(final_passed) + .iter() + .any(|step| step["step_kind"] == Value::String("test".to_string()))); + assert!(fs::read_to_string( + workspace + .root + .join("packages") + .join("web") + .join("src") + .join("index.ts") + ) + .expect("node source should exist") + .contains("\"ok\"")); + assert!(run.response["message"] + .as_str() + .expect("message text") + .contains("node quality red-green complete")); +} + +fn assert_python_red_green(workspace: &HarnessWorkspace, run: &ScenarioRun) { + assert_eq!(run.response["iterations"], Value::from(3)); + assert_eq!(run.response["tool_uses"].as_array().map(Vec::len), Some(2)); + assert_eq!( + run.response["verification_gate"]["attempted"], + Value::Bool(true) + ); + assert_eq!( + run.response["verification_gate"]["passed"], + Value::Bool(true) + ); + + let quick_failed = find_report(&run.response, "quick", "failed", "python"); + assert_eq!( + quick_failed["primary_failure_kind"], + Value::String("code".to_string()) + ); + assert!(path_value_ends_with( + &quick_failed["project_root"], + Path::new("services").join("api").as_path() + )); + let quick_step = &report_steps(quick_failed)[0]; + assert_eq!( + quick_step["step_kind"], + Value::String("py_compile".to_string()) + ); + assert_eq!( + quick_step["target_scope"], + Value::String("file_set".to_string()) + ); + assert_eq!( + quick_step["launcher_kind"], + Value::String("global".to_string()) + ); + + let final_passed = find_report(&run.response, "final", "passed", "python"); + assert!(report_steps(final_passed) + .iter() + .any(|step| step["step_kind"] == Value::String("pytest".to_string()))); + assert!(fs::read_to_string( + workspace + .root + .join("services") + .join("api") + .join("app") + .join("main.py") + ) + .expect("python source should exist") + .contains("return 42")); + assert!(run.response["message"] + .as_str() + .expect("message text") + .contains("python quality red-green complete")); +} + +fn assert_rust_config_failure(_: &HarnessWorkspace, run: &ScenarioRun) { + assert_eq!(run.response["iterations"], Value::from(2)); + assert_eq!(run.response["tool_uses"].as_array().map(Vec::len), Some(1)); + assert_eq!( + run.response["verification_gate"]["attempted"], + Value::Bool(false) + ); + assert_eq!( + run.response["verification_gate"]["passed"], + Value::Bool(true) + ); + + let quick_failed = find_report(&run.response, "quick", "failed", "rust-cargo"); + assert_eq!( + quick_failed["primary_failure_kind"], + Value::String("config".to_string()) + ); + let quick_step = &report_steps(quick_failed)[0]; + assert_eq!( + quick_step["failure_kind"], + Value::String("config".to_string()) + ); + assert!(run.response["message"] + .as_str() + .expect("message text") + .contains("rust config failure captured")); +} + +fn assert_node_tool_unavailable(_: &HarnessWorkspace, run: &ScenarioRun) { + assert_eq!(run.response["iterations"], Value::from(2)); + assert_eq!( + run.response["verification_gate"]["attempted"], + Value::Bool(false) + ); + let quick_failed = find_report(&run.response, "quick", "failed", "node-typescript"); + assert_eq!( + quick_failed["primary_failure_kind"], + Value::String("tool_unavailable".to_string()) + ); + let quick_step = &report_steps(quick_failed)[0]; + assert_eq!( + quick_step["failure_kind"], + Value::String("tool_unavailable".to_string()) + ); + assert_eq!( + quick_step["step_kind"], + Value::String("typecheck".to_string()) + ); + assert_eq!( + quick_step["package_manager"], + Value::String("npm".to_string()) + ); + assert!(run.response["message"] + .as_str() + .expect("message text") + .contains("node tool unavailable captured")); +} + +fn assert_python_timeout(_: &HarnessWorkspace, run: &ScenarioRun) { + assert_eq!(run.response["iterations"], Value::from(2)); + assert_eq!( + run.response["verification_gate"]["attempted"], + Value::Bool(false) + ); + let quick_failed = find_report(&run.response, "quick", "failed", "python"); + assert_eq!( + quick_failed["primary_failure_kind"], + Value::String("timeout".to_string()) + ); + let quick_step = &report_steps(quick_failed)[0]; + assert_eq!( + quick_step["failure_kind"], + Value::String("timeout".to_string()) + ); + assert_eq!( + quick_step["step_kind"], + Value::String("py_compile".to_string()) + ); + assert_eq!( + quick_step["launcher_kind"], + Value::String("global".to_string()) + ); + assert!(run.response["message"] + .as_str() + .expect("message text") + .contains("python timeout captured")); +} + +fn assert_rust_final_gate_retry(workspace: &HarnessWorkspace, run: &ScenarioRun) { + assert!( + run.response["iterations"] + .as_u64() + .is_some_and(|iterations| iterations >= 5), + "expected retry flow to require at least five iterations: {}", + serde_json::to_string_pretty(&run.response).expect("response should serialize") + ); + assert_eq!(run.response["tool_uses"].as_array().map(Vec::len), Some(2)); + assert_eq!( + run.response["verification_gate"]["attempted"], + Value::Bool(true) + ); + assert_eq!( + run.response["verification_gate"]["passed"], + Value::Bool(true) + ); + + assert!(has_report_matching(&run.response, |report| { + report["phase"] == "final" + && report["status"] == "failed" + && report["adapter_id"] == "rust-cargo" + && !report_steps(report).is_empty() + })); + assert!(has_report_matching(&run.response, |report| { + report["phase"] == "final" + && report["adapter_id"] == "rust-cargo" + && report_steps(report).is_empty() + && report["summary_text"] + .as_str() + .is_some_and(|summary| summary.contains("still failing")) + })); + assert!(has_report_matching(&run.response, |report| { + report["phase"] == "final" + && report["status"] == "passed" + && report["adapter_id"] == "rust-cargo" + })); + + let final_failed = find_report(&run.response, "final", "failed", "rust-cargo"); + let failing_fmt_step = report_steps(final_failed) + .iter() + .find(|step| step["step_kind"] == Value::String("cargo_fmt_check".to_string())) + .expect("failing cargo fmt step should exist"); + assert_eq!( + failing_fmt_step["target_scope"], + Value::String("workspace".to_string()) + ); + assert_eq!( + failing_fmt_step["package_name"], + Value::String("demo_app".to_string()) + ); + assert!(fs::read_to_string( + workspace + .root + .join("crates") + .join("app") + .join("src") + .join("lib.rs") + ) + .expect("rust source should exist") + .contains("pub fn answer() -> usize")); + assert!(run.response["message"] + .as_str() + .expect("message text") + .contains("rust final gate retry complete")); +} + +fn parse_json_output(stdout: &str) -> Value { + stdout + .lines() + .rev() + .find_map(|line| { + let trimmed = line.trim(); + if trimmed.starts_with('{') && trimmed.ends_with('}') { + serde_json::from_str(trimmed).ok() + } else { + None + } + }) + .unwrap_or_else(|| panic!("no JSON response line found in stdout:\n{stdout}")) +} + +fn unique_temp_dir(label: &str) -> PathBuf { + let millis = SystemTime::now() + .duration_since(UNIX_EPOCH) + .expect("clock should be after epoch") + .as_millis(); + let counter = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed); + std::env::temp_dir().join(format!( + "claw-mock-quality-{label}-{}-{millis}-{counter}", + std::process::id() + )) +} diff --git a/rust/crates/tools/src/lane_completion.rs b/rust/crates/tools/src/lane_completion.rs index e4eecce7df..8413e08c73 100644 --- a/rust/crates/tools/src/lane_completion.rs +++ b/rust/crates/tools/src/lane_completion.rs @@ -109,6 +109,7 @@ mod tests { completed_at: Some("2024-01-01T00:00:00Z".to_string()), lane_events: vec![], derived_state: "working".to_string(), + subagent_depth: 0, current_blocker: None, error: None, } diff --git a/rust/crates/tools/src/lib.rs b/rust/crates/tools/src/lib.rs index e63e22b1a1..575a45706a 100644 --- a/rust/crates/tools/src/lib.rs +++ b/rust/crates/tools/src/lib.rs @@ -1,3 +1,4 @@ +use std::cell::Cell; use std::collections::{BTreeMap, BTreeSet}; use std::path::{Path, PathBuf}; use std::process::Command; @@ -1842,66 +1843,56 @@ fn from_value Deserialize<'de>>(input: &Value) -> Result /// ROADMAP #50: Read-only commands targeting CWD paths get `WorkspaceWrite`, /// all others remain `DangerFullAccess`. fn classify_bash_permission(command: &str) -> PermissionMode { - // Read-only commands that are safe when targeting workspace paths - const READ_ONLY_COMMANDS: &[&str] = &[ - "cat", "head", "tail", "less", "more", "ls", "ll", "dir", "find", "test", "[", "[[", - "grep", "rg", "awk", "sed", "file", "stat", "readlink", "wc", "sort", "uniq", "cut", "tr", - "pwd", "echo", "printf", - ]; - - // Get the base command (first word before any args or pipes) - let base_cmd = command.split_whitespace().next().unwrap_or(""); - let base_cmd = base_cmd.split('|').next().unwrap_or("").trim(); - let base_cmd = base_cmd.split(';').next().unwrap_or("").trim(); - let base_cmd = base_cmd.split('>').next().unwrap_or("").trim(); - let base_cmd = base_cmd.split('<').next().unwrap_or("").trim(); - - // Check if it's a read-only command - let cmd_name = base_cmd.split('/').next_back().unwrap_or(base_cmd); - let is_read_only = READ_ONLY_COMMANDS.contains(&cmd_name); + let intent = runtime::bash_validation::classify_command(command); + if intent != runtime::bash_validation::CommandIntent::ReadOnly { + return PermissionMode::DangerFullAccess; + } - if !is_read_only { + if !matches!( + runtime::bash_validation::validate_read_only(command, PermissionMode::ReadOnly), + runtime::bash_validation::ValidationResult::Allow + ) { return PermissionMode::DangerFullAccess; } - // Check if any path argument is outside workspace - // Simple heuristic: check for absolute paths not starting with CWD - if has_dangerous_paths(command) { + let workspace = std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")); + if !matches!( + runtime::bash_validation::validate_paths(command, &workspace), + runtime::bash_validation::ValidationResult::Allow + ) { + return PermissionMode::DangerFullAccess; + } + if tool_targets_outside_workspace(command, &workspace) { return PermissionMode::DangerFullAccess; } PermissionMode::WorkspaceWrite } -/// Check if command has dangerous paths (outside workspace). -fn has_dangerous_paths(command: &str) -> bool { - // Look for absolute paths - let tokens: Vec<&str> = command.split_whitespace().collect(); - - for token in tokens { - // Skip flags/options +fn tool_targets_outside_workspace(command: &str, workspace: &Path) -> bool { + let home = std::env::var("HOME") + .or_else(|_| std::env::var("USERPROFILE")) + .unwrap_or_default(); + for token in command.split_whitespace() { if token.starts_with('-') { continue; } - - // Check for absolute paths - if token.starts_with('/') || token.starts_with("~/") { - // Check if it's within CWD - let path = - PathBuf::from(token.replace('~', &std::env::var("HOME").unwrap_or_default())); - if let Ok(cwd) = std::env::current_dir() { - if !path.starts_with(&cwd) { - return true; // Path outside workspace - } - } + if token.contains("../..") || (token.starts_with("../") && !token.starts_with("./")) { + return true; } - // Check for parent directory traversal that escapes workspace - if token.contains("../..") || token.starts_with("../") && !token.starts_with("./") { - return true; + let candidate = token.trim_matches(|ch| matches!(ch, '"' | '\'' | ';' | ',' | ')' | '(')); + if candidate.starts_with('/') || candidate.starts_with("~/") { + let resolved = if candidate.starts_with("~/") { + PathBuf::from(candidate.replacen('~', &home, 1)) + } else { + PathBuf::from(candidate) + }; + if !workspace.as_os_str().is_empty() && !resolved.starts_with(workspace) { + return true; + } } } - false } @@ -2310,6 +2301,25 @@ struct AgentInput { model: Option, } +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct ReviewFinding { + pub severity: String, + pub title: String, + pub body: String, + #[serde(skip_serializing_if = "Option::is_none")] + pub file: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub struct ReviewOutcome { + pub summary: String, + pub findings: Vec, + #[serde(rename = "rawResponse")] + pub raw_response: String, + #[serde(rename = "subagentDepth")] + pub subagent_depth: u32, +} + #[derive(Debug, Deserialize)] struct ToolSearchInput { query: String, @@ -2604,6 +2614,8 @@ struct AgentOutput { current_blocker: Option, #[serde(rename = "derivedState")] derived_state: String, + #[serde(rename = "subagentDepth", default)] + subagent_depth: u32, #[serde(skip_serializing_if = "Option::is_none")] error: Option, } @@ -2614,6 +2626,7 @@ struct AgentJob { prompt: String, system_prompt: Vec, allowed_tools: BTreeSet, + subagent_depth: u32, } #[derive(Debug, Clone, Serialize, PartialEq, Eq)] @@ -3473,10 +3486,64 @@ const DEFAULT_AGENT_MODEL: &str = "claude-opus-4-6"; const DEFAULT_AGENT_SYSTEM_DATE: &str = "2026-03-31"; const DEFAULT_AGENT_MAX_ITERATIONS: usize = 32; +thread_local! { + static SUBAGENT_DEPTH: Cell = const { Cell::new(0) }; +} + +struct SubagentDepthGuard { + previous_depth: u32, +} + +impl Drop for SubagentDepthGuard { + fn drop(&mut self) { + SUBAGENT_DEPTH.with(|depth| depth.set(self.previous_depth)); + } +} + +fn current_subagent_depth() -> u32 { + SUBAGENT_DEPTH.with(Cell::get) +} + +fn enter_subagent_depth(depth: u32) -> SubagentDepthGuard { + let previous_depth = SUBAGENT_DEPTH.with(|current| { + let previous = current.get(); + current.set(depth); + previous + }); + SubagentDepthGuard { previous_depth } +} + fn execute_agent(input: AgentInput) -> Result { execute_agent_with_spawn(input, spawn_agent_job) } +pub fn run_inline_review(prompt: &str, model: Option<&str>) -> Result { + if prompt.trim().is_empty() { + return Err(String::from("review prompt must not be empty")); + } + if current_subagent_depth() > 0 { + return Err(String::from( + "review critic can only run from top-level depth", + )); + } + + let subagent_type = "Review"; + let subagent_depth = 1; + let resolved_model = resolve_agent_model(model); + let system_prompt = build_agent_system_prompt(subagent_type)?; + let allowed_tools = allowed_tools_for_subagent(subagent_type); + let mut runtime = build_subagent_runtime(resolved_model, allowed_tools.clone(), system_prompt)? + .with_max_iterations(DEFAULT_AGENT_MAX_ITERATIONS); + let _depth_guard = enter_subagent_depth(subagent_depth); + let summary = runtime + .run_turn(prompt.to_string(), None) + .map_err(|error| error.to_string())?; + Ok(parse_review_outcome( + &final_assistant_text(&summary), + subagent_depth, + )) +} + fn execute_agent_with_spawn(input: AgentInput, spawn_fn: F) -> Result where F: FnOnce(AgentJob) -> Result<(), String>, @@ -3494,6 +3561,13 @@ where let output_file = output_dir.join(format!("{agent_id}.md")); let manifest_file = output_dir.join(format!("{agent_id}.json")); let normalized_subagent_type = normalize_subagent_type(input.subagent_type.as_deref()); + let parent_depth = current_subagent_depth(); + if normalized_subagent_type == "Review" && parent_depth > 0 { + return Err(String::from( + "review subagents cannot be spawned from another subagent", + )); + } + let subagent_depth = parent_depth.saturating_add(1); let model = resolve_agent_model(input.model.as_deref()); let agent_name = input .name @@ -3537,6 +3611,7 @@ where lane_events: vec![LaneEvent::started(iso8601_now())], current_blocker: None, derived_state: String::from("working"), + subagent_depth, error: None, }; write_agent_manifest(&manifest)?; @@ -3547,6 +3622,7 @@ where prompt: input.prompt, system_prompt, allowed_tools, + subagent_depth, }; if let Err(error) = spawn_fn(job) { let error = format!("failed to spawn sub-agent: {error}"); @@ -3586,6 +3662,7 @@ fn spawn_agent_job(job: AgentJob) -> Result<(), String> { fn run_agent_job(job: &AgentJob) -> Result<(), String> { let mut runtime = build_agent_runtime(job)?.with_max_iterations(DEFAULT_AGENT_MAX_ITERATIONS); + let _depth_guard = enter_subagent_depth(job.subagent_depth); let summary = runtime .run_turn(job.prompt.clone(), None) .map_err(|error| error.to_string())?; @@ -3601,7 +3678,14 @@ fn build_agent_runtime( .model .clone() .unwrap_or_else(|| DEFAULT_AGENT_MODEL.to_string()); - let allowed_tools = job.allowed_tools.clone(); + build_subagent_runtime(model, job.allowed_tools.clone(), job.system_prompt.clone()) +} + +fn build_subagent_runtime( + model: String, + allowed_tools: BTreeSet, + system_prompt: Vec, +) -> Result, String> { let api_client = ProviderRuntimeClient::new(model, allowed_tools.clone())?; let permission_policy = agent_permission_policy(); let tool_executor = SubagentToolExecutor::new(allowed_tools) @@ -3611,7 +3695,7 @@ fn build_agent_runtime( api_client, tool_executor, permission_policy, - job.system_prompt.clone(), + system_prompt, )) } @@ -3675,6 +3759,17 @@ fn allowed_tools_for_subagent(subagent_type: &str) -> BTreeSet { "SendUserMessage", "PowerShell", ], + "Review" => vec![ + "bash", + "read_file", + "glob_search", + "grep_search", + "WebFetch", + "WebSearch", + "ToolSearch", + "StructuredOutput", + "PowerShell", + ], "claw-guide" => vec![ "read_file", "glob_search", @@ -4383,6 +4478,130 @@ fn derive_agent_state( "truly_idle" } +fn parse_review_outcome(raw_response: &str, subagent_depth: u32) -> ReviewOutcome { + let parsed = extract_json_value(raw_response) + .and_then(|value| value.as_object().cloned()) + .unwrap_or_default(); + + let mut findings = parsed + .get("findings") + .and_then(serde_json::Value::as_array) + .map(|items| { + items + .iter() + .filter_map(parse_review_finding) + .collect::>() + }) + .unwrap_or_default(); + + findings.retain(|finding| !finding.title.trim().is_empty() && !finding.body.trim().is_empty()); + + let summary = parsed + .get("summary") + .and_then(serde_json::Value::as_str) + .map(str::trim) + .filter(|value| !value.is_empty()) + .map_or_else( + || { + if findings.is_empty() { + String::from("No review findings.") + } else { + format!("Reviewer reported {} finding(s).", findings.len()) + } + }, + ToString::to_string, + ); + + ReviewOutcome { + summary, + findings, + raw_response: raw_response.trim().to_string(), + subagent_depth, + } +} + +fn parse_review_finding(value: &serde_json::Value) -> Option { + let object = value.as_object()?; + let severity = object + .get("severity") + .or_else(|| object.get("priority")) + .and_then(serde_json::Value::as_str) + .map_or_else(|| String::from("P2"), normalize_review_severity); + let title = object + .get("title") + .and_then(serde_json::Value::as_str) + .unwrap_or_default() + .trim() + .to_string(); + let body = object + .get("body") + .or_else(|| object.get("message")) + .and_then(serde_json::Value::as_str) + .unwrap_or_default() + .trim() + .to_string(); + let file = object + .get("file") + .and_then(serde_json::Value::as_str) + .map(str::trim) + .filter(|value| !value.is_empty()) + .map(ToString::to_string); + Some(ReviewFinding { + severity, + title, + body, + file, + }) +} + +fn normalize_review_severity(value: &str) -> String { + match value.trim().to_ascii_uppercase().as_str() { + "P0" => String::from("P0"), + "P1" => String::from("P1"), + "P3" => String::from("P3"), + _ => String::from("P2"), + } +} + +fn extract_json_value(raw: &str) -> Option { + let trimmed = raw.trim(); + if trimmed.is_empty() { + return None; + } + if let Ok(value) = serde_json::from_str(trimmed) { + return Some(value); + } + + for fence in ["```json", "```JSON", "```"] { + if let Some(start) = trimmed.find(fence) { + let after_fence = &trimmed[start + fence.len()..]; + if let Some(end) = after_fence.find("```") { + let candidate = after_fence[..end].trim(); + if let Ok(value) = serde_json::from_str(candidate) { + return Some(value); + } + } + } + } + + extract_braced_json(trimmed) +} + +fn extract_braced_json(raw: &str) -> Option { + let mut starts = raw + .char_indices() + .filter_map(|(index, ch)| matches!(ch, '{' | '[').then_some(index)) + .collect::>(); + starts.reverse(); + for start in starts { + let candidate = raw[start..].trim(); + if let Ok(value) = serde_json::from_str(candidate) { + return Some(value); + } + } + None +} + fn maybe_commit_provenance(result: Option<&str>) -> Option { let commit = extract_commit_sha(result?)?; let branch = current_git_branch().unwrap_or_else(|| "unknown".to_string()); @@ -4771,9 +4990,54 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec { }], is_error: *is_error, }, - ContentBlock::VerificationReport { summary_text, .. } => { - InputContentBlock::Text { - text: summary_text.clone(), + ContentBlock::VerificationReport { + summary_text, + adapter_id, + project_root, + touched_paths, + primary_failure, + steps, + report_mode, + .. + } => { + if report_mode.as_deref() == Some("typed-primary") { + let payload = json!({ + "type": "verification_report", + "adapter_id": adapter_id, + "project_root": project_root, + "touched_paths": touched_paths, + "primary_failure": primary_failure.as_ref().map(|failure| json!({ + "label": failure.label, + "status": failure.status.as_str(), + "failure_kind": failure.failure_kind, + "output_excerpt": failure.output_excerpt, + "step_kind": failure.step_kind, + "target_scope": failure.target_scope, + "package_name": failure.package_name, + "package_manager": failure.package_manager, + "launcher_kind": failure.launcher_kind, + })), + "steps": steps.iter().map(|step| json!({ + "label": step.label, + "status": step.status.as_str(), + "failure_kind": step.failure_kind, + "duration_ms": step.duration_ms, + "step_kind": step.step_kind, + "target_scope": step.target_scope, + "package_name": step.package_name, + "package_manager": step.package_manager, + "launcher_kind": step.launcher_kind, + })).collect::>(), + }); + let payload = serde_json::to_string(&payload) + .unwrap_or_else(|_| summary_text.clone()); + InputContentBlock::Text { + text: format!("{payload}\n\n[verifier summary]\n{summary_text}"), + } + } else { + InputContentBlock::Text { + text: summary_text.clone(), + } } } }) @@ -5043,6 +5307,7 @@ fn normalize_subagent_type(subagent_type: Option<&str>) -> String { "verification" | "verificationagent" | "verify" | "verifier" => { String::from("Verification") } + "review" | "reviewagent" | "critic" | "criticagent" => String::from("Review"), "clawguide" | "clawguideagent" | "guide" => String::from("claw-guide"), "statusline" | "statuslinesetup" => String::from("statusline-setup"), _ => trimmed.to_string(), @@ -6132,18 +6397,24 @@ mod tests { use std::time::Duration; use super::{ - agent_permission_policy, allowed_tools_for_subagent, classify_lane_failure, - derive_agent_state, execute_agent_with_spawn, execute_tool, extract_recovery_outcome, - final_assistant_text, global_cron_registry, maybe_commit_provenance, mvp_tool_specs, - permission_mode_from_plugin, persist_agent_terminal_state, push_output_block, - run_task_packet, AgentInput, AgentJob, GlobalToolRegistry, LaneEventName, LaneFailureClass, - ProviderRuntimeClient, SubagentToolExecutor, + agent_permission_policy, allowed_tools_for_subagent, classify_bash_permission, + classify_lane_failure, convert_messages, derive_agent_state, enter_subagent_depth, + execute_agent_with_spawn, execute_tool, extract_recovery_outcome, final_assistant_text, + global_cron_registry, maybe_commit_provenance, mvp_tool_specs, normalize_subagent_type, + parse_review_outcome, permission_mode_from_plugin, persist_agent_terminal_state, + push_output_block, run_inline_review, run_task_packet, AgentInput, AgentJob, + GlobalToolRegistry, LaneEventName, LaneFailureClass, ProviderRuntimeClient, + SubagentToolExecutor, DEFAULT_AGENT_MODEL, }; - use api::OutputContentBlock; + use api::{InputContentBlock, OutputContentBlock}; + use runtime::verifier::VerificationStepReport; use runtime::ProviderFallbackConfig; use runtime::{ - permission_enforcer::PermissionEnforcer, ApiRequest, AssistantEvent, ConversationRuntime, - PermissionMode, PermissionPolicy, RuntimeError, Session, TaskPacket, ToolExecutor, + permission_enforcer::{EnforcementResult, PermissionEnforcer}, + ApiRequest, AssistantEvent, ContentBlock, ConversationMessage, ConversationRuntime, + MessageRole, PermissionMode, PermissionPolicy, RuntimeError, Session, TaskPacket, + ToolExecutor, VerificationFailureKind, VerificationPhase, VerificationReport, + VerificationStatus, }; use serde_json::json; @@ -8386,6 +8657,62 @@ mod tests { assert!(verification.contains("bash")); assert!(verification.contains("PowerShell")); assert!(!verification.contains("write_file")); + + let review = allowed_tools_for_subagent("Review"); + assert!(review.contains("bash")); + assert!(review.contains("read_file")); + assert!(review.contains("StructuredOutput")); + assert!(!review.contains("write_file")); + assert!(!review.contains("Agent")); + } + + #[test] + fn review_outcome_parses_json_and_normalizes_severity() { + let outcome = parse_review_outcome( + r#"{ + "summary": "Found one blocking issue.", + "findings": [ + { + "severity": "p1", + "title": "Null dereference", + "body": "The new path dereferences an optional value without checking it.", + "file": "src/lib.rs" + } + ] + }"#, + 1, + ); + + assert_eq!(outcome.summary, "Found one blocking issue."); + assert_eq!(outcome.subagent_depth, 1); + assert_eq!(outcome.findings.len(), 1); + assert_eq!(outcome.findings[0].severity, "P1"); + assert_eq!(outcome.findings[0].file.as_deref(), Some("src/lib.rs")); + } + + #[test] + fn review_outcome_extracts_fenced_json_payloads() { + let outcome = parse_review_outcome( + "Review complete.\n```json\n{\"findings\":[],\"summary\":\"No issues.\"}\n```", + 1, + ); + + assert!(outcome.findings.is_empty()); + assert_eq!(outcome.summary, "No issues."); + } + + #[test] + fn normalize_subagent_type_maps_review_aliases() { + assert_eq!(normalize_subagent_type(Some("review")), "Review"); + assert_eq!(normalize_subagent_type(Some("critic")), "Review"); + } + + #[test] + fn inline_review_rejects_nested_depth() { + let _guard = enter_subagent_depth(1); + let error = run_inline_review("{}", Some(DEFAULT_AGENT_MODEL)) + .expect_err("nested inline review should fail"); + assert!(error.contains("top-level depth")); } #[derive(Debug)] @@ -8940,6 +9267,159 @@ mod tests { assert_eq!(output["duration_ms"], 0); } + #[test] + fn convert_messages_keeps_text_primary_verification_reports_as_summary_text() { + let report = sample_verification_report(); + let messages = vec![ConversationMessage { + role: MessageRole::Verification, + blocks: vec![ContentBlock::VerificationReport { + report_id: report.report_id.clone(), + phase: report.phase, + status: report.status, + summary_text: report.summary_text.clone(), + adapter_id: Some(report.adapter_id.clone()), + project_root: Some(report.project_root.display().to_string()), + touched_paths: report + .touched_paths + .iter() + .map(|path| path.display().to_string()) + .collect(), + primary_failure: None, + steps: Vec::new(), + report_mode: Some("text-primary".to_string()), + }], + usage: None, + }]; + + let converted = convert_messages(&messages); + + assert_eq!(converted.len(), 1); + match &converted[0].content[0] { + InputContentBlock::Text { text } => assert_eq!(text, report.summary_text.as_str()), + other => panic!("expected text block, got {other:?}"), + } + } + + #[test] + fn convert_messages_emits_compact_json_for_typed_primary_verification_reports() { + let report = sample_verification_report(); + let message = ConversationMessage::verification_report(&report, Some("typed-primary")); + + let converted = convert_messages(&[message]); + + assert_eq!(converted.len(), 1); + match &converted[0].content[0] { + InputContentBlock::Text { text } => { + assert!(text.contains("\"type\":\"verification_report\"")); + assert!(text.contains("\"adapter_id\":\"rust-cargo\"")); + assert!(text.contains("\"failure_kind\":\"code\"")); + assert!(text.contains("[verifier summary]")); + assert!(text.contains(&report.summary_text)); + } + other => panic!("expected text block, got {other:?}"), + } + } + + #[test] + fn bash_validation_parity_read_only_primitive_is_shared() { + // Paridade: tools/src/lib.rs:classify_bash_permission (call site ~1846) e + // runtime::permission_enforcer::check_bash (call site ~151) devem concordar + // sobre a classificação de "read-only" porque ambos delegam para + // `runtime::bash_validation::validate_read_only`. + // + // Invariantes: + // (1) validate_read_only(cmd, ReadOnly) == Allow ⇔ + // enforcer(ReadOnly).check_bash(cmd) == Allowed + // (2) validate_read_only(cmd, ReadOnly) != Allow ⇒ + // classify_bash_permission(cmd) == DangerFullAccess + // (3) Quando (1) e validate_paths(cmd, cwd) == Allow, + // classify_bash_permission(cmd) == WorkspaceWrite. + let enforcer = PermissionEnforcer::new(PermissionPolicy::new(PermissionMode::ReadOnly)); + let cwd = std::env::current_dir().unwrap_or_else(|_| std::path::PathBuf::from(".")); + + let commands = [ + "ls", + "cat src/lib.rs", + "grep -n alpha src/lib.rs", + "cat /etc/passwd", + "rm src/lib.rs", + "echo hi > out.txt", + "pwd", + "git status", + "mkdir new", + "head -n 5 Cargo.toml", + ]; + + for command in commands { + let read_only_allow = matches!( + runtime::bash_validation::validate_read_only(command, PermissionMode::ReadOnly), + runtime::bash_validation::ValidationResult::Allow + ); + let enforcer_allow = matches!(enforcer.check_bash(command), EnforcementResult::Allowed); + assert_eq!( + read_only_allow, enforcer_allow, + "invariant (1) violated for `{command}`: read_only_allow={read_only_allow}, enforcer_allow={enforcer_allow}" + ); + + let tool_mode = classify_bash_permission(command); + if !read_only_allow { + assert_eq!( + tool_mode, + PermissionMode::DangerFullAccess, + "invariant (2) violated for `{command}`: tool_mode={tool_mode:?}" + ); + continue; + } + + let paths_allow = matches!( + runtime::bash_validation::validate_paths(command, &cwd), + runtime::bash_validation::ValidationResult::Allow + ); + if paths_allow && !crate::tool_targets_outside_workspace(command, &cwd) { + assert_eq!( + tool_mode, + PermissionMode::WorkspaceWrite, + "invariant (3) violated for `{command}`: tool_mode={tool_mode:?}" + ); + } else { + assert_eq!( + tool_mode, + PermissionMode::DangerFullAccess, + "out-of-workspace path must escalate for `{command}`: tool_mode={tool_mode:?}" + ); + } + } + } + + #[test] + fn bash_permission_matrix_documents_current_overlap_with_runtime_enforcer() { + let enforcer = PermissionEnforcer::new(PermissionPolicy::new(PermissionMode::ReadOnly)); + let cases = [ + ("cat src/lib.rs", PermissionMode::WorkspaceWrite, true), + ( + "grep -n alpha src/lib.rs", + PermissionMode::WorkspaceWrite, + true, + ), + ("cat /etc/passwd", PermissionMode::DangerFullAccess, true), + ("rm src/lib.rs", PermissionMode::DangerFullAccess, false), + ("echo hi > out.txt", PermissionMode::DangerFullAccess, false), + ]; + + for (command, expected_mode, enforcer_allows) in cases { + assert_eq!( + classify_bash_permission(command), + expected_mode, + "unexpected tool-side classification for {command}" + ); + let allowed = matches!(enforcer.check_bash(command), EnforcementResult::Allowed); + assert_eq!( + allowed, enforcer_allows, + "unexpected enforcer outcome for {command}" + ); + } + } + #[test] fn brief_returns_sent_message_and_attachment_metadata() { let attachment = std::env::temp_dir().join(format!( @@ -9685,4 +10165,34 @@ mod tests { .into_bytes() } } + + fn sample_verification_report() -> VerificationReport { + VerificationReport { + report_id: "tool-typed-report".to_string(), + phase: VerificationPhase::Quick, + adapter_id: "rust-cargo".to_string(), + project_root: PathBuf::from("/workspace"), + touched_paths: vec![PathBuf::from("src/lib.rs")], + status: VerificationStatus::Failed, + summary_text: + "[verifier:quick:rust-cargo] failed (/workspace)\n[verifier] cargo check: FAIL" + .to_string(), + steps: vec![VerificationStepReport { + adapter: "rust-cargo".to_string(), + project_root: PathBuf::from("/workspace"), + label: "cargo check".to_string(), + command: "cargo check -p demo".to_string(), + phase: VerificationPhase::Quick, + status: VerificationStatus::Failed, + failure_kind: Some(VerificationFailureKind::Code), + duration_ms: 41, + truncated_output: "error[E0308]: mismatched types".to_string(), + step_kind: Some("cargo_check".to_string()), + target_scope: Some("package".to_string()), + package_name: Some("demo".to_string()), + package_manager: None, + launcher_kind: None, + }], + } + } } From c4acdfabcdf024c55b5a50b5ca38d9863cfd997d Mon Sep 17 00:00:00 2001 From: Yeachan-Heo Date: Wed, 8 Apr 2026 11:04:27 +0000 Subject: [PATCH 6/6] Lock in Linux hook stdin BrokenPipe coverage Latest main already contains the functional BrokenPipe tolerance in plugins::hooks::CommandWithStdin::output_with_stdin, but the only coverage for the original CI failure was the higher-level plugin hook test. Add a deterministic regression that exercises the exact low-level EPIPE path by spawning a hook child that closes stdin immediately while the parent writes an oversized payload. This keeps the real root cause explicit: Linux surfaced BrokenPipe from the parent's stdin write after the hook child closed fd 0 early. Missing execute bits were not the primary bug. Constraint: Keep the change surgical on top of latest main Rejected: Re-open the production code path | latest main already contains the runtime fix Rejected: Inflate HookRunner payloads in the regression | HOOK_* env injection hit ARG_MAX before the pipe path Confidence: high Scope-risk: narrow Reversibility: clean Directive: Keep BrokenPipe coverage near CommandWithStdin so future refactors do not regress the Linux EPIPE path Tested: cargo test -p plugins hooks::tests::collects_and_runs_hooks_from_enabled_plugins -- --exact (10x) Tested: cargo test -p plugins hooks::tests::output_with_stdin_tolerates_broken_pipe_when_child_closes_stdin_early -- --exact (10x) Tested: cargo test --workspace Not-tested: GitHub Actions rerun on the PR branch --- rust/crates/plugins/src/hooks.rs | 39 ++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/rust/crates/plugins/src/hooks.rs b/rust/crates/plugins/src/hooks.rs index 5932123ca0..a6e6dfaa18 100644 --- a/rust/crates/plugins/src/hooks.rs +++ b/rust/crates/plugins/src/hooks.rs @@ -579,4 +579,43 @@ mod tests { ); } } + + #[test] + fn output_with_stdin_tolerates_broken_pipe_when_child_closes_stdin_early() { + // given: a hook that immediately closes stdin without consuming the + // JSON payload. Use an oversized payload so the parent keeps writing + // long enough for Linux to surface EPIPE on the old implementation. + let root = temp_dir("stdin-close"); + let script = root.join("close-stdin.sh"); + fs::create_dir_all(&root).expect("temp hook dir"); + fs::write( + &script, + "#!/bin/sh\nexec 0<&-\nprintf 'stdin closed early\\n'\nsleep 0.05\n", + ) + .expect("write stdin-closing hook"); + make_executable(&script); + + let mut child = super::shell_command(script.to_str().expect("utf8 path")); + child.stdin(std::process::Stdio::piped()); + child.stdout(std::process::Stdio::piped()); + child.stderr(std::process::Stdio::piped()); + let large_input = vec![b'x'; 2 * 1024 * 1024]; + + // when + let output = child + .output_with_stdin(&large_input) + .expect("broken pipe should be tolerated"); + + // then + assert!( + output.status.success(), + "child should still exit cleanly: {output:?}" + ); + assert_eq!( + String::from_utf8_lossy(&output.stdout).trim(), + "stdin closed early" + ); + + let _ = fs::remove_dir_all(root); + } }