Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
69a4757
Improve MCP failure diagnostics
TomProkop May 4, 2026
5a727cb
Address MCP review feedback
TomProkop May 4, 2026
dfe5606
Add CLI-friendly failure diagnostics
TomProkop May 17, 2026
8bcae69
Rename get_failure_details to get_execution_log
TomProkop May 17, 2026
03d5f2d
Remove unintentionally committed files
TomProkop May 17, 2026
8c9e23c
feat(mcp): structured execution logs with filtering, search, and paging
TomProkop May 17, 2026
da4fa18
refactor: make CliSubprocessResult constructor accept McpLogForwarder…
TomProkop May 17, 2026
910e8d9
fix: logging consistency, banned API enforcement, and analyzer guardr…
TomProkop May 17, 2026
d7dd9ee
feat: add OTel telemetry with Azure Monitor exporter
TomProkop May 17, 2026
d40b9b2
fix: suppress subprocess telemetry to prevent duplicates
TomProkop May 17, 2026
db4f998
fix: propagate trace context from MCP to CLI subprocess
TomProkop May 17, 2026
97742ff
ci: inject telemetry connection string into NuGet packages
TomProkop May 17, 2026
f664d03
feat: telemetry on by default, no easy opt-out
TomProkop May 17, 2026
0929cf8
fix: address PR review comments
TomProkop May 17, 2026
d92fa37
fix: add missing OTel using statements for Release build
TomProkop May 17, 2026
04fb508
fix: proper OpenTelemetry implementation for App Insights
TomProkop May 18, 2026
4e1985d
fix: upgrade OTel packages to latest and add explicit ForceFlush
TomProkop May 18, 2026
faa8837
fix: record exceptions in all error paths, remove sampling override
TomProkop May 18, 2026
25c5e1e
refactor: unified observability via ILogger as single source of truth
TomProkop May 18, 2026
d8d2fec
feat: tag telemetry spans with user/tenant identity
TomProkop May 18, 2026
1b35246
fix: tag enduser.id (UPN), enduser.scope (tenant), environment URL
TomProkop May 18, 2026
31c53d6
fix: use Entra object ID for enduser.id, add UPN and environment name
TomProkop May 18, 2026
fe7ec13
fix: disable SDK-side adaptive sampling (SamplingRatio=1.0)
TomProkop May 18, 2026
3820c8f
feat: tag all spans with user identity and CLI/MCP version
TomProkop May 18, 2026
1c862d3
fix: clean version in telemetry, disable rate-limited sampling
TomProkop May 18, 2026
beeea73
feat: set request URL and response code for App Insights
TomProkop May 18, 2026
2ae3ba4
chore: cleanup dead code and duplicates for PR review
TomProkop May 19, 2026
e792ae4
fix: filter out Azure Monitor self-telemetry from HTTP instrumentation
TomProkop May 19, 2026
435af8a
refactor: SOLID — extract telemetry into host-agnostic services
TomProkop May 19, 2026
4f4f1db
fix: use enduser.name tag, remove UTC offset from console timestamps
TomProkop May 19, 2026
726f65f
fix: use txc.* namespace for identity tags instead of enduser.*
TomProkop May 19, 2026
ca6fd50
feat: set both OTel enduser.* and txc.* identity tags
TomProkop May 19, 2026
0c6a6a1
fix: propagate subprocess failure to MCP Server span
TomProkop May 19, 2026
eedba51
fix: distinguish talxis-cli vs talxis-mcp service name, label subproc…
TomProkop May 19, 2026
582c60a
fix: remove telemetry.enabled gate — telemetry is always on in Release
TomProkop May 19, 2026
408a13b
refactor: dedupe telemetry bootstrap and logging
TomProkop May 20, 2026
440a204
docs: address PR review comments — fix stale docs, dead code, URI exa…
TomProkop May 20, 2026
8f9ad3e
feat: add session ID resolver for terminal session correlation
TomProkop May 20, 2026
9a89202
fix: add visualstudio.com to HTTP telemetry filter
TomProkop May 20, 2026
4137095
fix: stamp session ID on every span, remove identity duplication, add…
TomProkop May 20, 2026
91a4c10
fix: consistent entry_point/session source across MCP→CLI, filter exp…
TomProkop May 20, 2026
21bbdaa
feat: populate native App Insights session_Id via microsoft.session.i…
TomProkop May 20, 2026
4cf0fb6
fix: CLI subprocess spans report txc.entry_point=mcp when invoked fro…
TomProkop May 20, 2026
e1c3df3
feat: add support escalation info to error output
TomProkop May 20, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/nuget-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,9 @@ jobs:
run: |
$pkgVer = "${{ needs.resolve-version.outputs.package-version }}"
$asmVer = "${{ needs.resolve-version.outputs.assembly-version }}"
dotnet pack src/TALXIS.CLI/TALXIS.CLI.csproj --configuration Release -p:Version=$pkgVer -p:PackageVersion=$pkgVer -p:AssemblyVersion=$asmVer -p:FileVersion=$asmVer
dotnet pack src/TALXIS.CLI.MCP/TALXIS.CLI.MCP.csproj --configuration Release -p:Version=$pkgVer -p:PackageVersion=$pkgVer -p:AssemblyVersion=$asmVer -p:FileVersion=$asmVer
$telemetry = "${{ secrets.TXC_TELEMETRY_CONNECTION_STRING }}"
dotnet pack src/TALXIS.CLI/TALXIS.CLI.csproj --configuration Release -p:Version=$pkgVer -p:PackageVersion=$pkgVer -p:AssemblyVersion=$asmVer -p:FileVersion=$asmVer -p:TxcTelemetryConnectionString="$telemetry"
dotnet pack src/TALXIS.CLI.MCP/TALXIS.CLI.MCP.csproj --configuration Release -p:Version=$pkgVer -p:PackageVersion=$pkgVer -p:AssemblyVersion=$asmVer -p:FileVersion=$asmVer -p:TxcTelemetryConnectionString="$telemetry"
- uses: actions/upload-artifact@v4
with:
name: nuget
Expand Down
53 changes: 50 additions & 3 deletions docs/output-contract.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,55 @@ In text mode, only the human-readable message is printed.

The MCP server (`txc-mcp`) spawns `txc` as a subprocess with `TXC_LOG_FORMAT=json` and `TXC_NON_INTERACTIVE=1`:
- **stdout** → captured as the MCP tool result (JSON by default since stdout is redirected)
- **stderr** → parsed as JSON log lines, forwarded as MCP log notifications
- **stderr** → parsed as structured JSON log lines, forwarded as MCP log/progress notifications
- **exit code** → determines `isError` in the MCP tool result

Since commands default to JSON when stdout is redirected, the MCP server gets structured data automatically — no `--json` injection needed.

### Structured execution logs

Every MCP tool call produces a retrievable execution log stored in memory. The MCP client can fetch it via the `get_execution_log` tool using the diagnostics URI returned in the tool response.

**Data flow:** Command → `ILogger` → `JsonStderrLogger` → stderr JSON line → `McpLogForwarder` → `RedactedLogEntry` list → `ToolLogStore` → `get_execution_log`

Each log entry preserves its original structure:

| Field | Description |
|-------|-------------|
| `timestamp` | ISO-8601 UTC timestamp |
| `level` | Log level: `Trace`, `Debug`, `Information`, `Warning`, `Error`, `Critical` |
| `category` | Logger category (typically `nameof(CommandClass)`) |
| `message` | Redacted log message |
| `data` | Structured data dictionary from message template placeholders (nullable) |

The `get_execution_log` tool supports filtering and pagination:
- `level` — minimum level filter (e.g., `"Error"` returns only Error + Critical)
- `category` — filter by category substring
- `search` — full-text search across messages and data values
- `skip` / `take` — pagination (defaults: skip=0, take=50)

### Progress streaming

Long-running commands should emit frequent `ILogger` messages so MCP clients see real-time progress. `McpLogForwarder` converts each stderr log line into an MCP `notifications/progress` notification (rate-limited to 500ms). If a log entry includes a `Progress` integer property, that semantic value is used instead of the raw line count.

**Good pattern for progress:**
```csharp
Logger.LogInformation("Importing solution {Current}/{Total}: {SolutionName}",
current, total, name);
```

### Structured logging requirement

All `ILogger` calls must use **message templates**, not string interpolation. This ensures the `data` dictionary in structured log entries is populated with named values.

```csharp
// ✅ Correct — populates data: { "Entity": "account", "Error": "..." }
Logger.LogError("Failed to process {Entity}: {Error}", entityName, ex.Message);

// ❌ Wrong — data dict is empty, structured filtering loses information
Logger.LogError($"Failed to process {entityName}: {ex.Message}");
```

## Enforcement

| Mechanism | What it catches | When |
Expand All @@ -142,6 +186,7 @@ Since commands default to JSON when stdout is redirected, the MCP server gets st
| `TXC001` (Roslyn analyzer) | Leaf `[CliCommand]` not inheriting `TxcLeafCommand` | Build time (error) |
| `TXC002` (Roslyn analyzer) | Leaf command defining own `RunAsync()` | Build time (error) |
| `TXC003` (Roslyn analyzer) | Direct `OutputWriter` calls in command code (auto-suppresses text-renderer lambdas) | Build time (error) |
| `TXC014` (Roslyn analyzer) | String interpolation in `ILogger` calls (must use message templates) | Build time (warning) |
| `CommandConventionTests` | Non-conforming commands, stale `--json` flags, local `JsonSerializerOptions` | Test time |
| `LayeringTests` | Feature→Feature project references, `--yes` commands missing `[McpIgnore]` | Test time |

Expand All @@ -150,5 +195,7 @@ Since commands default to JSON when stdout is redirected, the MCP server gets st
1. Create a class with `[CliCommand]` extending `TxcLeafCommand` (or `ProfiledCliCommand`)
2. Implement `protected override ILogger Logger { get; }` and `protected override Task<int> ExecuteAsync()`
3. Use `OutputFormatter` for all output
4. Return `ExitSuccess`, `ExitError`, or `ExitValidationError`
5. Run tests to verify convention compliance
4. Use message templates (not string interpolation) in all `ILogger` calls
5. For long-running operations, emit periodic `Logger.LogInformation` messages for progress visibility
6. Return `ExitSuccess`, `ExitError`, or `ExitValidationError`
7. Run tests to verify convention compliance
4 changes: 4 additions & 0 deletions src/BannedSymbols.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ M:System.Console.WriteLine(System.Object);Use OutputWriter.WriteLine() or Output
M:System.Console.WriteLine(System.String,System.Object);Use OutputWriter.WriteLine() or OutputFormatter for command output
M:System.Console.WriteLine(System.String,System.Object[]);Use OutputWriter.WriteLine() or OutputFormatter for command output

# Console.Error / Console.Out — prevent direct stream access outside approved sinks
P:System.Console.Error;Use ILogger for diagnostics or an approved logging sink
P:System.Console.Out;Use OutputWriter/OutputFormatter for command output or an approved stdout sink

# Console.ReadKey / Console.ReadLine — interactive I/O forbidden in command code
# Use IHeadlessDetector to guard interactive flows and prompt via a dedicated abstraction.
M:System.Console.ReadKey;Use IHeadlessDetector and a prompt abstraction instead
Expand Down
17 changes: 16 additions & 1 deletion src/Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,24 @@
<WarningsAsErrors>$(WarningsAsErrors);RS0030;TXC001;TXC002;TXC003;TXC004;TXC005;TXC006;TXC007;TXC008;TXC009</WarningsAsErrors>
</PropertyGroup>

<!-- Telemetry is only active in Release builds. Debug/local 'dotnet run' never emits telemetry. -->
<PropertyGroup Condition="'$(Configuration)' == 'Release'">
<DefineConstants>$(DefineConstants);TELEMETRY_ENABLED</DefineConstants>
</PropertyGroup>

<!-- Pipeline injects: -p:TxcTelemetryConnectionString="InstrumentationKey=...;IngestionEndpoint=..."
Embedded as assembly metadata so NuGet-distributed builds carry the default. -->
<ItemGroup Condition="'$(TxcTelemetryConnectionString)' != ''">
<AssemblyAttribute Include="System.Reflection.AssemblyMetadataAttribute">
<_Parameter1>TxcTelemetryConnectionString</_Parameter1>
<_Parameter2>$(TxcTelemetryConnectionString)</_Parameter2>
</AssemblyAttribute>
</ItemGroup>

<ItemGroup>
<!-- Banned API analyzer: prevents Console.Write*/ReadKey/ReadLine in command code.
Infrastructure projects (Logging, MCP, Core) opt out via their own .csproj. -->
Infrastructure code that legitimately uses Console has surgical #pragma RS0030 exemptions
at each call site (e.g. TxcConsoleFormatter, JsonStderrLoggerProvider, Program.cs bootstrap). -->
<PackageReference Include="Microsoft.CodeAnalysis.BannedApiAnalyzers" Version="3.3.4">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers</IncludeAssets>
Expand Down
3 changes: 3 additions & 0 deletions src/TALXIS.CLI.Analyzers/AnalyzerReleases.Unshipped.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,6 @@ TXC010 | TALXIS.CLI.Design | Error | CLI command description must be at least 20
TXC011 | TALXIS.CLI.Design | Warning | Profiled command description should mention profile or environment
TXC012 | TALXIS.CLI.Design | Warning | [CliDestructive] command description should signal danger
TXC013 | TALXIS.CLI.Design | Warning | Command with ambiguous name should declare [CliWorkflow]
TXC014 | TALXIS.CLI.Design | Warning | Use message templates instead of string interpolation in logger calls
TXC016 | TALXIS.CLI.Design | Warning | CreateLogger must use nameof() for the category name
TXC015 | TALXIS.CLI.Design | Warning | Catch blocks must not silently swallow exceptions
9 changes: 9 additions & 0 deletions src/TALXIS.CLI.Analyzers/DiagnosticIds.cs
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,13 @@ internal static class DiagnosticIds

/// <summary>Leaf command with ambiguous name should declare [CliWorkflow] to prevent workflow misclassification.</summary>
public const string WorkflowRecommended = "TXC013";

/// <summary>Logger calls must use message templates, not string interpolation, to preserve structured data.</summary>
public const string NoInterpolatedLogMessage = "TXC014";

/// <summary>Catch blocks must not silently swallow exceptions — must log, rethrow, or return error.</summary>
public const string NoBareExceptionSwallow = "TXC015";

/// <summary>CreateLogger calls must use nameof() for consistent category names.</summary>
public const string LoggerMustUseNameof = "TXC016";
}
102 changes: 102 additions & 0 deletions src/TALXIS.CLI.Analyzers/LoggerMustUseNameofAnalyzer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
using System.Collections.Immutable;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.Diagnostics;
using Microsoft.CodeAnalysis.Operations;

namespace TALXIS.CLI.Analyzers;

/// <summary>
/// TXC016: CreateLogger calls must use <c>nameof()</c> for the category name.
/// <para>
/// The generic overload <c>CreateLogger&lt;T&gt;()</c> uses the full namespace-qualified
/// type name as the category, which produces noisy log output. The correct pattern is
/// <c>TxcLoggerFactory.CreateLogger(nameof(ClassName))</c>.
/// </para>
/// <para>
/// String-literal arguments are fragile — they silently go stale when a class is renamed.
/// Using <c>nameof()</c> keeps category names in sync with the code.
/// </para>
/// </summary>
[DiagnosticAnalyzer(LanguageNames.CSharp)]
public sealed class LoggerMustUseNameofAnalyzer : DiagnosticAnalyzer
{
private static readonly DiagnosticDescriptor GenericRule = new(
id: DiagnosticIds.LoggerMustUseNameof,
title: "CreateLogger must use nameof() for the category name",
messageFormat: "Use TxcLoggerFactory.CreateLogger(nameof({0})) instead of CreateLogger<{0}>() to ensure consistent short category names",
category: "TALXIS.CLI.Design",
defaultSeverity: DiagnosticSeverity.Warning,
isEnabledByDefault: true);

private static readonly DiagnosticDescriptor LiteralRule = new(
id: DiagnosticIds.LoggerMustUseNameof,
title: "CreateLogger must use nameof() for the category name",
messageFormat: "Use nameof() instead of a string literal in CreateLogger(\"{0}\") to prevent stale category names after refactoring",
category: "TALXIS.CLI.Design",
defaultSeverity: DiagnosticSeverity.Warning,
isEnabledByDefault: true);

public override ImmutableArray<DiagnosticDescriptor> SupportedDiagnostics =>
ImmutableArray.Create(GenericRule, LiteralRule);

public override void Initialize(AnalysisContext context)
{
context.ConfigureGeneratedCodeAnalysis(GeneratedCodeAnalysisFlags.None);
context.EnableConcurrentExecution();
context.RegisterOperationAction(AnalyzeInvocation, OperationKind.Invocation);
}

private static void AnalyzeInvocation(OperationAnalysisContext context)
{
var invocation = (IInvocationOperation)context.Operation;
var method = invocation.TargetMethod;

if (method.Name != "CreateLogger")
return;

var containingTypeName = method.ContainingType?.Name;
if (containingTypeName != "TxcLoggerFactory" && containingTypeName != "ILoggerFactory")
return;

// Do not flag calls inside TxcLoggerFactory itself — it is the factory implementation.
if (context.ContainingSymbol?.ContainingType?.Name == "TxcLoggerFactory")
return;

// Generic overload: CreateLogger<T>() — always flag.
if (method.IsGenericMethod)
{
var typeArg = method.TypeArguments[0];
context.ReportDiagnostic(Diagnostic.Create(
GenericRule,
invocation.Syntax.GetLocation(),
typeArg.Name));
return;
}

// Non-generic overload: CreateLogger(string categoryName)
if (invocation.Arguments.Length == 0)
return;

var firstArg = invocation.Arguments[0];

// nameof(...) compiles to INameOfOperation — this is the correct pattern; skip it.
if (firstArg.Value is INameOfOperation)
return;

// String literal — flag it.
if (firstArg.Value is ILiteralOperation literal
&& literal.Type?.SpecialType == SpecialType.System_String)
{
var literalText = literal.ConstantValue.HasValue
? literal.ConstantValue.Value?.ToString() ?? ""
: "";
context.ReportDiagnostic(Diagnostic.Create(
LiteralRule,
firstArg.Syntax.GetLocation(),
literalText));
return;
}

// Interpolation, concatenation, or other dynamic expressions — intentional; skip.
}
}
118 changes: 118 additions & 0 deletions src/TALXIS.CLI.Analyzers/NoBareExceptionSwallowAnalyzer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
using System.Collections.Immutable;
using System.Linq;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.Diagnostics;
using Microsoft.CodeAnalysis.Operations;

namespace TALXIS.CLI.Analyzers;

/// <summary>
/// TXC015: Catch blocks must not silently swallow exceptions. Broad catch clauses
/// (catching <c>Exception</c>, <c>SystemException</c>, or bare <c>catch</c>) must
/// contain at least one of: a call to a Log* method, a throw/rethrow, or a return statement.
/// Narrowly-typed catches (e.g. <c>OperationCanceledException</c>, <c>IOException</c>) are
/// not flagged — they represent intentional, targeted error handling.
/// </summary>
[DiagnosticAnalyzer(LanguageNames.CSharp)]
public sealed class NoBareExceptionSwallowAnalyzer : DiagnosticAnalyzer
{
private static readonly DiagnosticDescriptor Rule = new(
id: DiagnosticIds.NoBareExceptionSwallow,
title: "Catch blocks must not silently swallow exceptions",
messageFormat: "Catch block in '{0}' silently swallows {1}. Add logging (Logger.LogError), rethrow, or return an error result.",
category: "TALXIS.CLI.Design",
defaultSeverity: DiagnosticSeverity.Warning,
isEnabledByDefault: true);

/// <summary>
/// Broad exception types that are flagged when caught without logging, rethrowing, or returning.
/// </summary>
private static readonly ImmutableHashSet<string> BroadExceptionTypeNames = ImmutableHashSet.Create(
"Exception",
"SystemException");

public override ImmutableArray<DiagnosticDescriptor> SupportedDiagnostics =>
ImmutableArray.Create(Rule);

public override void Initialize(AnalysisContext context)
{
context.ConfigureGeneratedCodeAnalysis(GeneratedCodeAnalysisFlags.None);
context.EnableConcurrentExecution();
context.RegisterOperationAction(AnalyzeCatchClause, OperationKind.CatchClause);
}

private static void AnalyzeCatchClause(OperationAnalysisContext context)
{
var catchOp = (ICatchClauseOperation)context.Operation;

// Determine whether this is a broad catch.
// A bare "catch { }" has ExceptionType == null → broad.
// Otherwise check against the known broad type names.
var exceptionType = catchOp.ExceptionType;
if (exceptionType != null && !IsBroadExceptionType(exceptionType))
return;

// Walk the catch handler body looking for an acceptable action.
var handler = catchOp.Handler;
if (handler != null && ContainsAcceptableAction(handler))
return;

// Build diagnostic arguments.
var caughtDescription = exceptionType != null
? exceptionType.Name
: "all exceptions";

var containingTypeName = context.ContainingSymbol is IMethodSymbol ms
? ms.ContainingType?.Name ?? ms.Name
: context.ContainingSymbol?.ContainingType?.Name ?? context.ContainingSymbol?.Name ?? "Unknown";

context.ReportDiagnostic(Diagnostic.Create(
Rule,
catchOp.Syntax.GetLocation(),
containingTypeName,
caughtDescription));
}

/// <summary>
/// Returns <c>true</c> when the caught type is <c>System.Exception</c> or <c>System.SystemException</c>.
/// We check the simple name AND verify the type lives in the <c>System</c> namespace to avoid
/// false positives on user-defined types that happen to share the name.
/// </summary>
private static bool IsBroadExceptionType(ITypeSymbol type)
{
if (!BroadExceptionTypeNames.Contains(type.Name))
return false;

return type.ContainingNamespace?.ToDisplayString() == "System";
}

/// <summary>
/// Walks all descendant operations of <paramref name="operation"/> looking for at least one
/// acceptable action: a Log* method call, a throw/rethrow, or a return statement.
/// </summary>
private static bool ContainsAcceptableAction(IOperation operation)
{
foreach (var descendant in operation.Descendants())
{
switch (descendant.Kind)
{
// throw or rethrow (bare "throw;" is also OperationKind.Throw with null exception)
case OperationKind.Throw:
return true;

// return statement
case OperationKind.Return:
return true;

// method call whose name starts with "Log"
case OperationKind.Invocation:
var invocation = (IInvocationOperation)descendant;
if (invocation.TargetMethod.Name.StartsWith("Log"))
return true;
break;
}
}

return false;
}
}
Loading