From 502d4491044b462f680f1a6628b8dac3667c56f1 Mon Sep 17 00:00:00 2001 From: Nehan Tanwar Date: Sat, 11 Apr 2026 02:58:37 +0530 Subject: [PATCH 1/3] debug: log full mcp_call item to inspect for embedded Tavily result --- app/services/review_service.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/services/review_service.py b/app/services/review_service.py index f5920ab..617b994 100644 --- a/app/services/review_service.py +++ b/app/services/review_service.py @@ -96,6 +96,8 @@ def _log_output_items(iteration: int, output: list) -> None: f"tool={getattr(item, 'name', '?')} " f"input={getattr(item, 'arguments', '?')}" ) + # Log full item to discover if Tavily result is embedded in any attribute + logger.info(f"[iter {iteration}][mcp:call:full] {item}") elif item_type == "mcp_call_result": logger.info( f"[iter {iteration}][mcp:result] " From 3690ea267f7f4ecfccfba5dc86ee7369794d34bd Mon Sep 17 00:00:00 2001 From: Nehan Tanwar Date: Sat, 11 Apr 2026 03:06:49 +0530 Subject: [PATCH 2/3] Log Tavily search results from mcp_call.output (URL + score per result --- app/services/review_service.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/app/services/review_service.py b/app/services/review_service.py index 617b994..b8a8f75 100644 --- a/app/services/review_service.py +++ b/app/services/review_service.py @@ -96,8 +96,15 @@ def _log_output_items(iteration: int, output: list) -> None: f"tool={getattr(item, 'name', '?')} " f"input={getattr(item, 'arguments', '?')}" ) - # Log full item to discover if Tavily result is embedded in any attribute - logger.info(f"[iter {iteration}][mcp:call:full] {item}") + raw_output = getattr(item, "output", None) + if raw_output: + try: + parsed = json.loads(raw_output) + results = parsed.get("results", []) + summary = [{"url": r.get("url"), "score": r.get("score")} for r in results] + logger.info(f"[iter {iteration}][mcp:result] {len(results)} result(s): {json.dumps(summary)}") + except Exception: + logger.info(f"[iter {iteration}][mcp:result] {raw_output[:500]}") elif item_type == "mcp_call_result": logger.info( f"[iter {iteration}][mcp:result] " From 6edadd3ab4ec4bfc18d7a4d69ac505ef96e787e6 Mon Sep 17 00:00:00 2001 From: Nehan Tanwar Date: Sat, 11 Apr 2026 03:16:03 +0530 Subject: [PATCH 3/3] feat: log tool execution order with colors before review result --- app/main.py | 37 +++++++++++++++++++++++--- app/services/review_service.py | 47 +++++++++++++++++++++++++++++++--- 2 files changed, 76 insertions(+), 8 deletions(-) diff --git a/app/main.py b/app/main.py index d460b8d..71e7dc8 100644 --- a/app/main.py +++ b/app/main.py @@ -2,10 +2,39 @@ from fastapi import FastAPI from app.routes import health, repos, webhook -logging.basicConfig( - level=logging.INFO, - format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", -) +# ANSI color codes +_RESET = "\033[0m" +_GREY = "\033[90m" +_CYAN = "\033[96m" +_GREEN = "\033[92m" +_YELLOW = "\033[93m" +_RED = "\033[91m" +_BOLD = "\033[1m" + +_LEVEL_COLORS = { + "DEBUG": _GREY, + "INFO": _GREEN, + "WARNING": _YELLOW, + "ERROR": _RED, + "CRITICAL": _RED + _BOLD, +} + + +class _ColorFormatter(logging.Formatter): + def format(self, record: logging.LogRecord) -> str: + level_color = _LEVEL_COLORS.get(record.levelname, "") + record.levelname = f"{level_color}{record.levelname}{_RESET}" + record.name = f"{_CYAN}{record.name}{_RESET}" + record.asctime = self.formatTime(record, self.datefmt) + record.asctime = f"{_GREY}{record.asctime}{_RESET}" + return ( + f"{record.asctime} [{record.levelname}] {record.name}: {record.getMessage()}" + ) + + +_handler = logging.StreamHandler() +_handler.setFormatter(_ColorFormatter()) +logging.basicConfig(level=logging.INFO, handlers=[_handler]) app = FastAPI( title="CodeSentinel", diff --git a/app/services/review_service.py b/app/services/review_service.py index b8a8f75..2c3a356 100644 --- a/app/services/review_service.py +++ b/app/services/review_service.py @@ -101,10 +101,18 @@ def _log_output_items(iteration: int, output: list) -> None: try: parsed = json.loads(raw_output) results = parsed.get("results", []) - summary = [{"url": r.get("url"), "score": r.get("score")} for r in results] - logger.info(f"[iter {iteration}][mcp:result] {len(results)} result(s): {json.dumps(summary)}") - except Exception: - logger.info(f"[iter {iteration}][mcp:result] {raw_output[:500]}") + logger.info(f"[iter {iteration}][mcp:result] {len(results)} result(s)") + for i, r in enumerate(results, 1): + logger.info( + f"[iter {iteration}][mcp:result:{i}] " + f"score={r.get('score', '?'):.3f} " + f"url={r.get('url', '?')}" + ) + content = r.get("content", "").strip() + if content: + logger.info(f"[iter {iteration}][mcp:result:{i}:content] {content}") + except json.JSONDecodeError as e: + logger.warning(f"[iter {iteration}][mcp:result] failed to parse output: {e}") elif item_type == "mcp_call_result": logger.info( f"[iter {iteration}][mcp:result] " @@ -170,6 +178,28 @@ def _strip_removed_lines(diff: str) -> str: return "\n".join(result) +_TOOL_COLORS = { + "run_linter": "\033[93m", # yellow + "tavily_search": "\033[96m", # cyan + "tavily_extract": "\033[96m", # cyan + "tavily_research":"\033[96m", # cyan + "tavily_skill": "\033[96m", # cyan +} +_RESET = "\033[0m" +_BOLD = "\033[1m" + + +def _log_tools_used(tools: list[str]) -> None: + if not tools: + logger.info(f"[review] tools used: {_BOLD}(none){_RESET}") + return + colored = [] + for t in tools: + color = _TOOL_COLORS.get(t, "\033[90m") + colored.append(f"{color}{t}{_RESET}") + logger.info(f"[review] tools used: {_BOLD}" + " → ".join(colored) + _RESET) + + def review_diff(diff: str, repo_full_name: str, pr_number: int) -> list[Finding]: """Run the agent loop: diff → tool calls (linter + Tavily MCP) → final findings.""" if not diff.strip(): @@ -214,6 +244,7 @@ def review_diff(diff: str, repo_full_name: str, pr_number: int) -> list[Finding] # the model from calling it repeatedly when it receives an empty result. active_tools = list(TOOLS) linter_called = False + tools_used: list[str] = [] for iteration in range(1, 6): response = _client.responses.create( @@ -224,6 +255,7 @@ def review_diff(diff: str, repo_full_name: str, pr_number: int) -> list[Finding] ) function_calls = [item for item in response.output if item.type == "function_call"] + mcp_calls = [item for item in response.output if item.type == "mcp_call"] logger.info( f"[review] iteration {iteration} — " @@ -232,8 +264,13 @@ def review_diff(diff: str, repo_full_name: str, pr_number: int) -> list[Finding] ) _log_output_items(iteration, response.output) + # Track MCP tool calls in execution order + for mc in mcp_calls: + tools_used.append(getattr(mc, "name", "mcp")) + if not function_calls: # No local tools pending — model produced its final answer + _log_tools_used(tools_used) return _parse_findings(response.output_text or "{}") # Extend with the full assistant turn (includes any completed MCP call items). @@ -242,6 +279,7 @@ def review_diff(diff: str, repo_full_name: str, pr_number: int) -> list[Finding] # Dispatch each local function call and append its result for fc in function_calls: + tools_used.append(fc.name) args = json.loads(fc.arguments) result = _dispatch_tool(fc.name, args, file_contents, iteration) input_items.append({ @@ -255,4 +293,5 @@ def review_diff(diff: str, repo_full_name: str, pr_number: int) -> list[Finding] active_tools = [t for t in active_tools if t.get("name") != "run_linter"] logger.warning("[review] Agent loop hit max iterations without a final answer") + _log_tools_used(tools_used) return []