From 80e8dc9764b2374de646007b7f2cc485b46093a7 Mon Sep 17 00:00:00 2001 From: forge-admin Date: Thu, 11 Jun 2026 07:07:19 +0200 Subject: [PATCH 1/2] fix(hook): extract testable process_hook_with_streams and add stdout redaction tests Refs #2344 Refactored process_hook_input_with_type() to delegate to a new process_hook_with_streams() helper that accepts generic AsyncRead + AsyncWrite parameters. This enables unit testing without subprocess spawning. Added 3 new tokio tests that exercise the full I/O pipeline: - test_process_hook_with_streams_strips_secrets_from_output: confirms AWS keys present in hook input do not appear in stdout (AC#3) - test_process_hook_with_streams_clean_input_unchanged: verifies clean input passes through without spurious redaction - test_process_hook_with_streams_pre_tool_use_also_redacts: verifies pre-tool-use hook type also strips secrets The redaction logic (contains_secrets fast-check + redact_secrets) was already correct; this change adds the missing end-to-end test coverage. --- crates/terraphim_agent/src/learnings/hook.rs | 103 ++++++++++++++++++- 1 file changed, 99 insertions(+), 4 deletions(-) diff --git a/crates/terraphim_agent/src/learnings/hook.rs b/crates/terraphim_agent/src/learnings/hook.rs index b32e5f5..af81fa7 100644 --- a/crates/terraphim_agent/src/learnings/hook.rs +++ b/crates/terraphim_agent/src/learnings/hook.rs @@ -73,11 +73,29 @@ pub fn capture_from_hook(input: &HookInput) -> Result { /// All hook types maintain fail-open behavior: errors are logged but /// never block the pipeline. pub async fn process_hook_input_with_type(hook_type: LearnHookType) -> Result<(), HookError> { + process_hook_with_streams(hook_type, tokio::io::stdin(), tokio::io::stdout()).await +} + +/// Core hook processing logic with injectable I/O streams. +/// +/// Reads from `reader`, dispatches to the hook handler, redacts any secrets +/// from the input buffer, then writes the (possibly-redacted) output to `writer`. +/// Secrets are never forwarded to `writer` — `contains_secrets` acts as a +/// fast pre-check before calling the more expensive `redact_secrets`. +pub(crate) async fn process_hook_with_streams( + hook_type: LearnHookType, + mut reader: R, + mut writer: W, +) -> Result<(), HookError> +where + R: tokio::io::AsyncRead + Unpin, + W: tokio::io::AsyncWrite + Unpin, +{ use tokio::io::{AsyncReadExt, AsyncWriteExt}; - // Read stdin + // Read full input let mut buffer = String::new(); - tokio::io::stdin() + reader .read_to_string(&mut buffer) .await .map_err(HookError::Stdin)?; @@ -106,7 +124,9 @@ pub async fn process_hook_input_with_type(hook_type: LearnHookType) -> Result<() } } - // Redact secrets before passing through to stdout + // Redact secrets before passing through to the output stream. + // contains_secrets is a fast pre-check; redact_secrets is only called + // when secrets are present. let output = if contains_secrets(&buffer) { log::debug!("Hook passthrough: secrets detected, redacting before stdout"); redact_secrets(&buffer) @@ -114,7 +134,7 @@ pub async fn process_hook_input_with_type(hook_type: LearnHookType) -> Result<() buffer }; - tokio::io::stdout() + writer .write_all(output.as_bytes()) .await .map_err(HookError::Stdin)?; @@ -694,6 +714,81 @@ mod tests { } } + /// Verify secrets are stripped from the full process_hook_with_streams pipeline. + /// + /// This is the end-to-end test for AC#3: secrets present in hook input must + /// not appear in the output written to stdout. + /// + /// Uses `&[u8]` (impl AsyncRead) as stdin and `Vec` (impl AsyncWrite) as stdout + /// so the full I/O path is exercised without spawning a subprocess. + #[tokio::test] + async fn test_process_hook_with_streams_strips_secrets_from_output() { + use super::process_hook_with_streams; + + // Build a fake AWS key at runtime to avoid tripping the pre-commit secret scanner. + let aws_key = format!("AKIA{}", "IOSFODNN7EXAMPLE"); + + let json = format!( + r#"{{"tool_name":"Bash","tool_input":{{"command":"aws s3 ls"}},"tool_result":{{"exit_code":1,"stdout":"","stderr":"Unable to locate credentials {aws_key}"}}}}"#, + ); + + // &[u8] implements AsyncRead; Vec implements AsyncWrite. + let mut output_buf: Vec = Vec::new(); + process_hook_with_streams(LearnHookType::PostToolUse, json.as_bytes(), &mut output_buf) + .await + .expect("process_hook_with_streams must not fail"); + + let output = String::from_utf8(output_buf).expect("output must be valid UTF-8"); + + // The secret must not appear in the output written to stdout. + assert!( + !output.contains(&aws_key), + "AWS key must not appear in stdout output; got: {output}" + ); + assert!( + output.contains("[AWS_KEY_REDACTED]"), + "Redacted placeholder must appear in output; got: {output}" + ); + } + + /// Verify clean input passes through unchanged (no spurious redaction). + #[tokio::test] + async fn test_process_hook_with_streams_clean_input_unchanged() { + use super::process_hook_with_streams; + + let json = r#"{"tool_name":"Bash","tool_input":{"command":"cargo build"},"tool_result":{"exit_code":0,"stdout":"Compiling","stderr":""}}"#; + + let mut output_buf: Vec = Vec::new(); + process_hook_with_streams(LearnHookType::PostToolUse, json.as_bytes(), &mut output_buf) + .await + .expect("process_hook_with_streams must not fail"); + + let output = String::from_utf8(output_buf).expect("output must be valid UTF-8"); + assert_eq!(output, json, "Clean input must pass through unchanged"); + } + + /// Verify pre-tool-use hook also redacts secrets (not just post-tool-use). + #[tokio::test] + async fn test_process_hook_with_streams_pre_tool_use_also_redacts() { + use super::process_hook_with_streams; + + let aws_key = format!("AKIA{}", "IOSFODNN7EXAMPLE"); + let json = format!( + r#"{{"tool_name":"Bash","tool_input":{{"command":"export AWS_ACCESS_KEY_ID={aws_key}"}},"tool_result":{{"exit_code":0,"stdout":"","stderr":""}}}}"#, + ); + + let mut output_buf: Vec = Vec::new(); + process_hook_with_streams(LearnHookType::PreToolUse, json.as_bytes(), &mut output_buf) + .await + .expect("process_hook_with_streams must not fail"); + + let output = String::from_utf8(output_buf).expect("output must be valid UTF-8"); + assert!( + !output.contains(&aws_key), + "AWS key must not appear in pre-tool-use stdout output; got: {output}" + ); + } + #[test] fn test_hook_passthrough_redacts_aws_key_in_error() { use crate::learnings::redact_secrets; From f03c8b6962c909ac779e10c4c451b4af322b47e0 Mon Sep 17 00:00:00 2001 From: forge-admin Date: Thu, 11 Jun 2026 07:20:56 +0200 Subject: [PATCH 2/2] fix(learnings): unconditional secret redaction in hook stdout passthrough MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the contains_secrets() pre-check gate at hook.rs:130-135, replacing it with an unconditional call to redact_secrets(). The gate covered only AWS keys, sk- tokens, password=, secret=, and api_key= patterns; GitHub PATs (ghp_*), Slack tokens (xox*), and connection strings (postgresql://…) all bypassed it and were written to stdout in clear text. - Remove the conditional gate and the contains_secrets import from hook.rs - Remove the now-dead contains_secrets() function and its unit test from redaction.rs (per project rule: remove dead code, do not suppress) - Add three async regression tests in hook.rs that each exercise a bypass-pattern secret type: test_process_hook_github_pat_is_redacted test_process_hook_slack_token_is_redacted test_process_hook_connection_string_is_redacted Refs #2344 --- crates/terraphim_agent/src/learnings/hook.rs | 113 +++++++++++++++--- .../src/learnings/redaction.rs | 33 ----- 2 files changed, 95 insertions(+), 51 deletions(-) diff --git a/crates/terraphim_agent/src/learnings/hook.rs b/crates/terraphim_agent/src/learnings/hook.rs index af81fa7..04e6e41 100644 --- a/crates/terraphim_agent/src/learnings/hook.rs +++ b/crates/terraphim_agent/src/learnings/hook.rs @@ -23,7 +23,6 @@ use std::path::PathBuf; use serde::Deserialize; use thiserror::Error; -use crate::learnings::redaction::contains_secrets; use crate::learnings::{ LearningCaptureConfig, LearningError, capture_failed_command, redact_secrets, }; @@ -78,10 +77,9 @@ pub async fn process_hook_input_with_type(hook_type: LearnHookType) -> Result<() /// Core hook processing logic with injectable I/O streams. /// -/// Reads from `reader`, dispatches to the hook handler, redacts any secrets -/// from the input buffer, then writes the (possibly-redacted) output to `writer`. -/// Secrets are never forwarded to `writer` — `contains_secrets` acts as a -/// fast pre-check before calling the more expensive `redact_secrets`. +/// Reads from `reader`, dispatches to the hook handler, unconditionally redacts +/// any secrets from the input buffer, then writes the redacted output to `writer`. +/// Secrets are never forwarded to `writer`. pub(crate) async fn process_hook_with_streams( hook_type: LearnHookType, mut reader: R, @@ -124,15 +122,11 @@ where } } - // Redact secrets before passing through to the output stream. - // contains_secrets is a fast pre-check; redact_secrets is only called - // when secrets are present. - let output = if contains_secrets(&buffer) { - log::debug!("Hook passthrough: secrets detected, redacting before stdout"); - redact_secrets(&buffer) - } else { - buffer - }; + // Unconditionally redact secrets before passing through to the output stream. + // The previous contains_secrets() fast-path was removed because its pattern + // set did not cover GitHub PATs (ghp_*), Slack tokens (xox*), or connection + // strings, allowing those secrets to bypass redaction entirely. + let output = redact_secrets(&buffer); writer .write_all(output.as_bytes()) @@ -792,7 +786,6 @@ mod tests { #[test] fn test_hook_passthrough_redacts_aws_key_in_error() { use crate::learnings::redact_secrets; - use crate::learnings::redaction::contains_secrets; // Build a fake AWS key at runtime to avoid tripping the pre-commit secret scanner. // The key prefix "AKIA" followed by 16 uppercase alphanumeric chars is the pattern. @@ -811,9 +804,6 @@ mod tests { aws_key ); - // Verify the input contains secrets - assert!(contains_secrets(&json)); - // Verify redaction removes the AWS key let redacted = redact_secrets(&json); assert!(!redacted.contains(&aws_key)); @@ -898,4 +888,91 @@ mod tests { process_user_prompt_submit("invalid"); // No panic = pass } + + /// GitHub PAT bypass: `ghp_` tokens are not in `contains_secrets()` patterns + /// but ARE matched by `redact_secrets()`. Unconditional redaction must catch them. + #[tokio::test] + async fn test_process_hook_github_pat_is_redacted() { + use super::process_hook_with_streams; + + // Build token at runtime to avoid the pre-commit secret scanner. + let pat = format!("ghp_{}", "A".repeat(36)); + let json = format!( + r#"{{"tool_name":"Bash","tool_input":{{"command":"git push"}},"tool_result":{{"exit_code":1,"stdout":"","stderr":"remote: invalid credentials {pat}"}}}}"#, + ); + + let mut output_buf: Vec = Vec::new(); + process_hook_with_streams(LearnHookType::PostToolUse, json.as_bytes(), &mut output_buf) + .await + .expect("process_hook_with_streams must not fail"); + + let output = String::from_utf8(output_buf).expect("output must be valid UTF-8"); + assert!( + !output.contains(&pat), + "GitHub PAT must not appear in stdout output; got: {output}" + ); + assert!( + output.contains("[GITHUB_TOKEN_REDACTED]"), + "Redacted placeholder must appear in output; got: {output}" + ); + } + + /// Slack token bypass: `xoxb-` tokens are not in `contains_secrets()` patterns + /// but ARE matched by `redact_secrets()`. Unconditional redaction must catch them. + #[tokio::test] + async fn test_process_hook_slack_token_is_redacted() { + use super::process_hook_with_streams; + + // Construct at runtime so push-protection scanners do not flag a literal token. + let slack_token = format!( + "xoxb-{}-{}-{}", + "FAKE_TEST_ID_A", "FAKE_TEST_ID_B", "FAKE_TEST_SECRET" + ); + let json = format!( + r#"{{"tool_name":"Bash","tool_input":{{"command":"curl -H 'Authorization: Bearer {slack_token}' https://slack.com/api/chat.postMessage"}},"tool_result":{{"exit_code":0,"stdout":"","stderr":""}}}}"#, + ); + + let mut output_buf: Vec = Vec::new(); + process_hook_with_streams(LearnHookType::PostToolUse, json.as_bytes(), &mut output_buf) + .await + .expect("process_hook_with_streams must not fail"); + + let output = String::from_utf8(output_buf).expect("output must be valid UTF-8"); + assert!( + !output.contains(&slack_token), + "Slack token must not appear in stdout output; got: {output}" + ); + assert!( + output.contains("[SLACK_TOKEN_REDACTED]"), + "Redacted placeholder must appear in output; got: {output}" + ); + } + + /// Connection string bypass: `postgresql://user:pass@host` is not in + /// `contains_secrets()` patterns but IS matched by `redact_secrets()`. + /// Unconditional redaction must catch it. + #[tokio::test] + async fn test_process_hook_connection_string_is_redacted() { + use super::process_hook_with_streams; + + let conn = "postgresql://dbuser:s3cr3tpassword@prod-db.internal:5432/appdb"; + let json = format!( + r#"{{"tool_name":"Bash","tool_input":{{"command":"psql {conn}"}},"tool_result":{{"exit_code":1,"stdout":"","stderr":"connection refused"}}}}"#, + ); + + let mut output_buf: Vec = Vec::new(); + process_hook_with_streams(LearnHookType::PostToolUse, json.as_bytes(), &mut output_buf) + .await + .expect("process_hook_with_streams must not fail"); + + let output = String::from_utf8(output_buf).expect("output must be valid UTF-8"); + assert!( + !output.contains("s3cr3tpassword"), + "Connection string password must not appear in stdout output; got: {output}" + ); + assert!( + output.contains("postgresql://[REDACTED]@"), + "Redacted connection string must appear in output; got: {output}" + ); + } } diff --git a/crates/terraphim_agent/src/learnings/redaction.rs b/crates/terraphim_agent/src/learnings/redaction.rs index 8b4c1da..4912960 100644 --- a/crates/terraphim_agent/src/learnings/redaction.rs +++ b/crates/terraphim_agent/src/learnings/redaction.rs @@ -100,30 +100,6 @@ fn strip_env_vars(text: &str) -> String { result } -/// Check if text contains potential secrets. -/// -/// This is a quick check that can be used before capture to warn users. -pub fn contains_secrets(text: &str) -> bool { - // Check for common secret patterns - let patterns = [ - r"AKIA[A-Z0-9]{16}", - r"sk-[A-Za-z0-9]{20,}", - r"password\s*=", - r"secret\s*=", - r"api_key\s*=", - ]; - - for pattern in patterns { - if let Ok(re) = regex::Regex::new(pattern) - && re.is_match(text) - { - return true; - } - } - - false -} - #[cfg(test)] mod tests { use super::*; @@ -160,15 +136,6 @@ mod tests { assert_eq!(redacted, input); } - #[test] - fn test_contains_secrets() { - assert!(contains_secrets("AKIAIOSFODNN7EXAMPLE")); - assert!(contains_secrets("password=secret")); - assert!(contains_secrets("api_key=abc123")); - assert!(!contains_secrets("cargo build")); - assert!(!contains_secrets("npm install")); - } - #[test] fn test_redact_multiple_secrets() { let input = "Key: AKIAIOSFODNN7EXAMPLE and sk-proj-abcdefghijklmnopqrst";