diff --git a/crates/terraphim_agent/src/learnings/hook.rs b/crates/terraphim_agent/src/learnings/hook.rs index b32e5f5..04e6e41 100644 --- a/crates/terraphim_agent/src/learnings/hook.rs +++ b/crates/terraphim_agent/src/learnings/hook.rs @@ -23,7 +23,6 @@ use std::path::PathBuf; use serde::Deserialize; use thiserror::Error; -use crate::learnings::redaction::contains_secrets; use crate::learnings::{ LearningCaptureConfig, LearningError, capture_failed_command, redact_secrets, }; @@ -73,11 +72,28 @@ pub fn capture_from_hook(input: &HookInput) -> Result { /// All hook types maintain fail-open behavior: errors are logged but /// never block the pipeline. pub async fn process_hook_input_with_type(hook_type: LearnHookType) -> Result<(), HookError> { + process_hook_with_streams(hook_type, tokio::io::stdin(), tokio::io::stdout()).await +} + +/// Core hook processing logic with injectable I/O streams. +/// +/// Reads from `reader`, dispatches to the hook handler, unconditionally redacts +/// any secrets from the input buffer, then writes the redacted output to `writer`. +/// Secrets are never forwarded to `writer`. +pub(crate) async fn process_hook_with_streams( + hook_type: LearnHookType, + mut reader: R, + mut writer: W, +) -> Result<(), HookError> +where + R: tokio::io::AsyncRead + Unpin, + W: tokio::io::AsyncWrite + Unpin, +{ use tokio::io::{AsyncReadExt, AsyncWriteExt}; - // Read stdin + // Read full input let mut buffer = String::new(); - tokio::io::stdin() + reader .read_to_string(&mut buffer) .await .map_err(HookError::Stdin)?; @@ -106,15 +122,13 @@ pub async fn process_hook_input_with_type(hook_type: LearnHookType) -> Result<() } } - // Redact secrets before passing through to stdout - let output = if contains_secrets(&buffer) { - log::debug!("Hook passthrough: secrets detected, redacting before stdout"); - redact_secrets(&buffer) - } else { - buffer - }; + // Unconditionally redact secrets before passing through to the output stream. + // The previous contains_secrets() fast-path was removed because its pattern + // set did not cover GitHub PATs (ghp_*), Slack tokens (xox*), or connection + // strings, allowing those secrets to bypass redaction entirely. + let output = redact_secrets(&buffer); - tokio::io::stdout() + writer .write_all(output.as_bytes()) .await .map_err(HookError::Stdin)?; @@ -694,10 +708,84 @@ mod tests { } } + /// Verify secrets are stripped from the full process_hook_with_streams pipeline. + /// + /// This is the end-to-end test for AC#3: secrets present in hook input must + /// not appear in the output written to stdout. + /// + /// Uses `&[u8]` (impl AsyncRead) as stdin and `Vec` (impl AsyncWrite) as stdout + /// so the full I/O path is exercised without spawning a subprocess. + #[tokio::test] + async fn test_process_hook_with_streams_strips_secrets_from_output() { + use super::process_hook_with_streams; + + // Build a fake AWS key at runtime to avoid tripping the pre-commit secret scanner. + let aws_key = format!("AKIA{}", "IOSFODNN7EXAMPLE"); + + let json = format!( + r#"{{"tool_name":"Bash","tool_input":{{"command":"aws s3 ls"}},"tool_result":{{"exit_code":1,"stdout":"","stderr":"Unable to locate credentials {aws_key}"}}}}"#, + ); + + // &[u8] implements AsyncRead; Vec implements AsyncWrite. + let mut output_buf: Vec = Vec::new(); + process_hook_with_streams(LearnHookType::PostToolUse, json.as_bytes(), &mut output_buf) + .await + .expect("process_hook_with_streams must not fail"); + + let output = String::from_utf8(output_buf).expect("output must be valid UTF-8"); + + // The secret must not appear in the output written to stdout. + assert!( + !output.contains(&aws_key), + "AWS key must not appear in stdout output; got: {output}" + ); + assert!( + output.contains("[AWS_KEY_REDACTED]"), + "Redacted placeholder must appear in output; got: {output}" + ); + } + + /// Verify clean input passes through unchanged (no spurious redaction). + #[tokio::test] + async fn test_process_hook_with_streams_clean_input_unchanged() { + use super::process_hook_with_streams; + + let json = r#"{"tool_name":"Bash","tool_input":{"command":"cargo build"},"tool_result":{"exit_code":0,"stdout":"Compiling","stderr":""}}"#; + + let mut output_buf: Vec = Vec::new(); + process_hook_with_streams(LearnHookType::PostToolUse, json.as_bytes(), &mut output_buf) + .await + .expect("process_hook_with_streams must not fail"); + + let output = String::from_utf8(output_buf).expect("output must be valid UTF-8"); + assert_eq!(output, json, "Clean input must pass through unchanged"); + } + + /// Verify pre-tool-use hook also redacts secrets (not just post-tool-use). + #[tokio::test] + async fn test_process_hook_with_streams_pre_tool_use_also_redacts() { + use super::process_hook_with_streams; + + let aws_key = format!("AKIA{}", "IOSFODNN7EXAMPLE"); + let json = format!( + r#"{{"tool_name":"Bash","tool_input":{{"command":"export AWS_ACCESS_KEY_ID={aws_key}"}},"tool_result":{{"exit_code":0,"stdout":"","stderr":""}}}}"#, + ); + + let mut output_buf: Vec = Vec::new(); + process_hook_with_streams(LearnHookType::PreToolUse, json.as_bytes(), &mut output_buf) + .await + .expect("process_hook_with_streams must not fail"); + + let output = String::from_utf8(output_buf).expect("output must be valid UTF-8"); + assert!( + !output.contains(&aws_key), + "AWS key must not appear in pre-tool-use stdout output; got: {output}" + ); + } + #[test] fn test_hook_passthrough_redacts_aws_key_in_error() { use crate::learnings::redact_secrets; - use crate::learnings::redaction::contains_secrets; // Build a fake AWS key at runtime to avoid tripping the pre-commit secret scanner. // The key prefix "AKIA" followed by 16 uppercase alphanumeric chars is the pattern. @@ -716,9 +804,6 @@ mod tests { aws_key ); - // Verify the input contains secrets - assert!(contains_secrets(&json)); - // Verify redaction removes the AWS key let redacted = redact_secrets(&json); assert!(!redacted.contains(&aws_key)); @@ -803,4 +888,91 @@ mod tests { process_user_prompt_submit("invalid"); // No panic = pass } + + /// GitHub PAT bypass: `ghp_` tokens are not in `contains_secrets()` patterns + /// but ARE matched by `redact_secrets()`. Unconditional redaction must catch them. + #[tokio::test] + async fn test_process_hook_github_pat_is_redacted() { + use super::process_hook_with_streams; + + // Build token at runtime to avoid the pre-commit secret scanner. + let pat = format!("ghp_{}", "A".repeat(36)); + let json = format!( + r#"{{"tool_name":"Bash","tool_input":{{"command":"git push"}},"tool_result":{{"exit_code":1,"stdout":"","stderr":"remote: invalid credentials {pat}"}}}}"#, + ); + + let mut output_buf: Vec = Vec::new(); + process_hook_with_streams(LearnHookType::PostToolUse, json.as_bytes(), &mut output_buf) + .await + .expect("process_hook_with_streams must not fail"); + + let output = String::from_utf8(output_buf).expect("output must be valid UTF-8"); + assert!( + !output.contains(&pat), + "GitHub PAT must not appear in stdout output; got: {output}" + ); + assert!( + output.contains("[GITHUB_TOKEN_REDACTED]"), + "Redacted placeholder must appear in output; got: {output}" + ); + } + + /// Slack token bypass: `xoxb-` tokens are not in `contains_secrets()` patterns + /// but ARE matched by `redact_secrets()`. Unconditional redaction must catch them. + #[tokio::test] + async fn test_process_hook_slack_token_is_redacted() { + use super::process_hook_with_streams; + + // Construct at runtime so push-protection scanners do not flag a literal token. + let slack_token = format!( + "xoxb-{}-{}-{}", + "FAKE_TEST_ID_A", "FAKE_TEST_ID_B", "FAKE_TEST_SECRET" + ); + let json = format!( + r#"{{"tool_name":"Bash","tool_input":{{"command":"curl -H 'Authorization: Bearer {slack_token}' https://slack.com/api/chat.postMessage"}},"tool_result":{{"exit_code":0,"stdout":"","stderr":""}}}}"#, + ); + + let mut output_buf: Vec = Vec::new(); + process_hook_with_streams(LearnHookType::PostToolUse, json.as_bytes(), &mut output_buf) + .await + .expect("process_hook_with_streams must not fail"); + + let output = String::from_utf8(output_buf).expect("output must be valid UTF-8"); + assert!( + !output.contains(&slack_token), + "Slack token must not appear in stdout output; got: {output}" + ); + assert!( + output.contains("[SLACK_TOKEN_REDACTED]"), + "Redacted placeholder must appear in output; got: {output}" + ); + } + + /// Connection string bypass: `postgresql://user:pass@host` is not in + /// `contains_secrets()` patterns but IS matched by `redact_secrets()`. + /// Unconditional redaction must catch it. + #[tokio::test] + async fn test_process_hook_connection_string_is_redacted() { + use super::process_hook_with_streams; + + let conn = "postgresql://dbuser:s3cr3tpassword@prod-db.internal:5432/appdb"; + let json = format!( + r#"{{"tool_name":"Bash","tool_input":{{"command":"psql {conn}"}},"tool_result":{{"exit_code":1,"stdout":"","stderr":"connection refused"}}}}"#, + ); + + let mut output_buf: Vec = Vec::new(); + process_hook_with_streams(LearnHookType::PostToolUse, json.as_bytes(), &mut output_buf) + .await + .expect("process_hook_with_streams must not fail"); + + let output = String::from_utf8(output_buf).expect("output must be valid UTF-8"); + assert!( + !output.contains("s3cr3tpassword"), + "Connection string password must not appear in stdout output; got: {output}" + ); + assert!( + output.contains("postgresql://[REDACTED]@"), + "Redacted connection string must appear in output; got: {output}" + ); + } } diff --git a/crates/terraphim_agent/src/learnings/redaction.rs b/crates/terraphim_agent/src/learnings/redaction.rs index 8b4c1da..4912960 100644 --- a/crates/terraphim_agent/src/learnings/redaction.rs +++ b/crates/terraphim_agent/src/learnings/redaction.rs @@ -100,30 +100,6 @@ fn strip_env_vars(text: &str) -> String { result } -/// Check if text contains potential secrets. -/// -/// This is a quick check that can be used before capture to warn users. -pub fn contains_secrets(text: &str) -> bool { - // Check for common secret patterns - let patterns = [ - r"AKIA[A-Z0-9]{16}", - r"sk-[A-Za-z0-9]{20,}", - r"password\s*=", - r"secret\s*=", - r"api_key\s*=", - ]; - - for pattern in patterns { - if let Ok(re) = regex::Regex::new(pattern) - && re.is_match(text) - { - return true; - } - } - - false -} - #[cfg(test)] mod tests { use super::*; @@ -160,15 +136,6 @@ mod tests { assert_eq!(redacted, input); } - #[test] - fn test_contains_secrets() { - assert!(contains_secrets("AKIAIOSFODNN7EXAMPLE")); - assert!(contains_secrets("password=secret")); - assert!(contains_secrets("api_key=abc123")); - assert!(!contains_secrets("cargo build")); - assert!(!contains_secrets("npm install")); - } - #[test] fn test_redact_multiple_secrets() { let input = "Key: AKIAIOSFODNN7EXAMPLE and sk-proj-abcdefghijklmnopqrst";