Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
202 changes: 187 additions & 15 deletions crates/terraphim_agent/src/learnings/hook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ use std::path::PathBuf;
use serde::Deserialize;
use thiserror::Error;

use crate::learnings::redaction::contains_secrets;
use crate::learnings::{
LearningCaptureConfig, LearningError, capture_failed_command, redact_secrets,
};
Expand Down Expand Up @@ -73,11 +72,28 @@ pub fn capture_from_hook(input: &HookInput) -> Result<PathBuf, LearningError> {
/// All hook types maintain fail-open behavior: errors are logged but
/// never block the pipeline.
pub async fn process_hook_input_with_type(hook_type: LearnHookType) -> Result<(), HookError> {
process_hook_with_streams(hook_type, tokio::io::stdin(), tokio::io::stdout()).await
}

/// Core hook processing logic with injectable I/O streams.
///
/// Reads from `reader`, dispatches to the hook handler, unconditionally redacts
/// any secrets from the input buffer, then writes the redacted output to `writer`.
/// Secrets are never forwarded to `writer`.
pub(crate) async fn process_hook_with_streams<R, W>(
hook_type: LearnHookType,
mut reader: R,
mut writer: W,
) -> Result<(), HookError>
where
R: tokio::io::AsyncRead + Unpin,
W: tokio::io::AsyncWrite + Unpin,
{
use tokio::io::{AsyncReadExt, AsyncWriteExt};

// Read stdin
// Read full input
let mut buffer = String::new();
tokio::io::stdin()
reader
.read_to_string(&mut buffer)
.await
.map_err(HookError::Stdin)?;
Expand Down Expand Up @@ -106,15 +122,13 @@ pub async fn process_hook_input_with_type(hook_type: LearnHookType) -> Result<()
}
}

// Redact secrets before passing through to stdout
let output = if contains_secrets(&buffer) {
log::debug!("Hook passthrough: secrets detected, redacting before stdout");
redact_secrets(&buffer)
} else {
buffer
};
// Unconditionally redact secrets before passing through to the output stream.
// The previous contains_secrets() fast-path was removed because its pattern
// set did not cover GitHub PATs (ghp_*), Slack tokens (xox*), or connection
// strings, allowing those secrets to bypass redaction entirely.
let output = redact_secrets(&buffer);

tokio::io::stdout()
writer
.write_all(output.as_bytes())
.await
.map_err(HookError::Stdin)?;
Expand Down Expand Up @@ -694,10 +708,84 @@ mod tests {
}
}

/// Verify secrets are stripped from the full process_hook_with_streams pipeline.
///
/// This is the end-to-end test for AC#3: secrets present in hook input must
/// not appear in the output written to stdout.
///
/// Uses `&[u8]` (impl AsyncRead) as stdin and `Vec<u8>` (impl AsyncWrite) as stdout
/// so the full I/O path is exercised without spawning a subprocess.
#[tokio::test]
async fn test_process_hook_with_streams_strips_secrets_from_output() {
use super::process_hook_with_streams;

// Build a fake AWS key at runtime to avoid tripping the pre-commit secret scanner.
let aws_key = format!("AKIA{}", "IOSFODNN7EXAMPLE");

let json = format!(
r#"{{"tool_name":"Bash","tool_input":{{"command":"aws s3 ls"}},"tool_result":{{"exit_code":1,"stdout":"","stderr":"Unable to locate credentials {aws_key}"}}}}"#,
);

// &[u8] implements AsyncRead; Vec<u8> implements AsyncWrite.
let mut output_buf: Vec<u8> = Vec::new();
process_hook_with_streams(LearnHookType::PostToolUse, json.as_bytes(), &mut output_buf)
.await
.expect("process_hook_with_streams must not fail");

let output = String::from_utf8(output_buf).expect("output must be valid UTF-8");

// The secret must not appear in the output written to stdout.
assert!(
!output.contains(&aws_key),
"AWS key must not appear in stdout output; got: {output}"
);
assert!(
output.contains("[AWS_KEY_REDACTED]"),
"Redacted placeholder must appear in output; got: {output}"
);
}

/// Verify clean input passes through unchanged (no spurious redaction).
#[tokio::test]
async fn test_process_hook_with_streams_clean_input_unchanged() {
use super::process_hook_with_streams;

let json = r#"{"tool_name":"Bash","tool_input":{"command":"cargo build"},"tool_result":{"exit_code":0,"stdout":"Compiling","stderr":""}}"#;

let mut output_buf: Vec<u8> = Vec::new();
process_hook_with_streams(LearnHookType::PostToolUse, json.as_bytes(), &mut output_buf)
.await
.expect("process_hook_with_streams must not fail");

let output = String::from_utf8(output_buf).expect("output must be valid UTF-8");
assert_eq!(output, json, "Clean input must pass through unchanged");
}

/// Verify pre-tool-use hook also redacts secrets (not just post-tool-use).
#[tokio::test]
async fn test_process_hook_with_streams_pre_tool_use_also_redacts() {
use super::process_hook_with_streams;

let aws_key = format!("AKIA{}", "IOSFODNN7EXAMPLE");
let json = format!(
r#"{{"tool_name":"Bash","tool_input":{{"command":"export AWS_ACCESS_KEY_ID={aws_key}"}},"tool_result":{{"exit_code":0,"stdout":"","stderr":""}}}}"#,
);

let mut output_buf: Vec<u8> = Vec::new();
process_hook_with_streams(LearnHookType::PreToolUse, json.as_bytes(), &mut output_buf)
.await
.expect("process_hook_with_streams must not fail");

let output = String::from_utf8(output_buf).expect("output must be valid UTF-8");
assert!(
!output.contains(&aws_key),
"AWS key must not appear in pre-tool-use stdout output; got: {output}"
);
}

#[test]
fn test_hook_passthrough_redacts_aws_key_in_error() {
use crate::learnings::redact_secrets;
use crate::learnings::redaction::contains_secrets;

// Build a fake AWS key at runtime to avoid tripping the pre-commit secret scanner.
// The key prefix "AKIA" followed by 16 uppercase alphanumeric chars is the pattern.
Expand All @@ -716,9 +804,6 @@ mod tests {
aws_key
);

// Verify the input contains secrets
assert!(contains_secrets(&json));

// Verify redaction removes the AWS key
let redacted = redact_secrets(&json);
assert!(!redacted.contains(&aws_key));
Expand Down Expand Up @@ -803,4 +888,91 @@ mod tests {
process_user_prompt_submit("invalid");
// No panic = pass
}

/// GitHub PAT bypass: `ghp_` tokens are not in `contains_secrets()` patterns
/// but ARE matched by `redact_secrets()`. Unconditional redaction must catch them.
#[tokio::test]
async fn test_process_hook_github_pat_is_redacted() {
use super::process_hook_with_streams;

// Build token at runtime to avoid the pre-commit secret scanner.
let pat = format!("ghp_{}", "A".repeat(36));
let json = format!(
r#"{{"tool_name":"Bash","tool_input":{{"command":"git push"}},"tool_result":{{"exit_code":1,"stdout":"","stderr":"remote: invalid credentials {pat}"}}}}"#,
);

let mut output_buf: Vec<u8> = Vec::new();
process_hook_with_streams(LearnHookType::PostToolUse, json.as_bytes(), &mut output_buf)
.await
.expect("process_hook_with_streams must not fail");

let output = String::from_utf8(output_buf).expect("output must be valid UTF-8");
assert!(
!output.contains(&pat),
"GitHub PAT must not appear in stdout output; got: {output}"
);
assert!(
output.contains("[GITHUB_TOKEN_REDACTED]"),
"Redacted placeholder must appear in output; got: {output}"
);
}

/// Slack token bypass: `xoxb-` tokens are not in `contains_secrets()` patterns
/// but ARE matched by `redact_secrets()`. Unconditional redaction must catch them.
#[tokio::test]
async fn test_process_hook_slack_token_is_redacted() {
use super::process_hook_with_streams;

// Construct at runtime so push-protection scanners do not flag a literal token.
let slack_token = format!(
"xoxb-{}-{}-{}",
"FAKE_TEST_ID_A", "FAKE_TEST_ID_B", "FAKE_TEST_SECRET"
);
let json = format!(
r#"{{"tool_name":"Bash","tool_input":{{"command":"curl -H 'Authorization: Bearer {slack_token}' https://slack.com/api/chat.postMessage"}},"tool_result":{{"exit_code":0,"stdout":"","stderr":""}}}}"#,
);

let mut output_buf: Vec<u8> = Vec::new();
process_hook_with_streams(LearnHookType::PostToolUse, json.as_bytes(), &mut output_buf)
.await
.expect("process_hook_with_streams must not fail");

let output = String::from_utf8(output_buf).expect("output must be valid UTF-8");
assert!(
!output.contains(&slack_token),
"Slack token must not appear in stdout output; got: {output}"
);
assert!(
output.contains("[SLACK_TOKEN_REDACTED]"),
"Redacted placeholder must appear in output; got: {output}"
);
}

/// Connection string bypass: `postgresql://user:pass@host` is not in
/// `contains_secrets()` patterns but IS matched by `redact_secrets()`.
/// Unconditional redaction must catch it.
#[tokio::test]
async fn test_process_hook_connection_string_is_redacted() {
use super::process_hook_with_streams;

let conn = "postgresql://dbuser:s3cr3tpassword@prod-db.internal:5432/appdb";
let json = format!(
r#"{{"tool_name":"Bash","tool_input":{{"command":"psql {conn}"}},"tool_result":{{"exit_code":1,"stdout":"","stderr":"connection refused"}}}}"#,
);

let mut output_buf: Vec<u8> = Vec::new();
process_hook_with_streams(LearnHookType::PostToolUse, json.as_bytes(), &mut output_buf)
.await
.expect("process_hook_with_streams must not fail");

let output = String::from_utf8(output_buf).expect("output must be valid UTF-8");
assert!(
!output.contains("s3cr3tpassword"),
"Connection string password must not appear in stdout output; got: {output}"
);
assert!(
output.contains("postgresql://[REDACTED]@"),
"Redacted connection string must appear in output; got: {output}"
);
}
}
33 changes: 0 additions & 33 deletions crates/terraphim_agent/src/learnings/redaction.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,30 +100,6 @@ fn strip_env_vars(text: &str) -> String {
result
}

/// Check if text contains potential secrets.
///
/// This is a quick check that can be used before capture to warn users.
pub fn contains_secrets(text: &str) -> bool {
// Check for common secret patterns
let patterns = [
r"AKIA[A-Z0-9]{16}",
r"sk-[A-Za-z0-9]{20,}",
r"password\s*=",
r"secret\s*=",
r"api_key\s*=",
];

for pattern in patterns {
if let Ok(re) = regex::Regex::new(pattern)
&& re.is_match(text)
{
return true;
}
}

false
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down Expand Up @@ -160,15 +136,6 @@ mod tests {
assert_eq!(redacted, input);
}

#[test]
fn test_contains_secrets() {
assert!(contains_secrets("AKIAIOSFODNN7EXAMPLE"));
assert!(contains_secrets("password=secret"));
assert!(contains_secrets("api_key=abc123"));
assert!(!contains_secrets("cargo build"));
assert!(!contains_secrets("npm install"));
}

#[test]
fn test_redact_multiple_secrets() {
let input = "Key: AKIAIOSFODNN7EXAMPLE and sk-proj-abcdefghijklmnopqrst";
Expand Down
Loading