From 0464ed4571be9164f6ba8d1c8422801005d1a86f Mon Sep 17 00:00:00 2001 From: Danil Tarasov Date: Fri, 3 Jul 2026 03:18:15 +0300 Subject: [PATCH] feat(csharp): resolve member-call receivers by type + modularize the C# extractor --- graphify/extract.py | 919 ++++--------------- graphify/extractors/MIGRATION.md | 32 +- graphify/extractors/base.py | 48 + graphify/extractors/csharp.py | 536 ++++++++--- graphify/extractors/csharp_extract.py | 1187 +++++++++++++++++++++++++ graphify/extractors/csharp_resolve.py | 762 ++++++++++++++++ tests/test_csharp_member_calls.py | 1058 +++++++++++++++++++--- tests/test_csharp_type_resolution.py | 100 +++ uv.lock | 2 +- 9 files changed, 3613 insertions(+), 1031 deletions(-) create mode 100644 graphify/extractors/csharp_extract.py create mode 100644 graphify/extractors/csharp_resolve.py diff --git a/graphify/extract.py b/graphify/extract.py index 267cf3664..1b3cbbd6e 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -23,15 +23,73 @@ # --- migrated to graphify/extractors/ (see graphify/extractors/MIGRATION.md) --- from graphify.extractors.base import ( # noqa: F401 + LanguageConfig, _LANGUAGE_BUILTIN_GLOBALS, _file_stem, _make_id, _read_text, ) from graphify.extractors.blade import extract_blade # noqa: F401 -from graphify.extractors.csharp import ( +from graphify.extractors.csharp import ( # noqa: F401 + _canonicalize_csharp_namespace_nodes, + _csharp_base_identifier, + _csharp_preserve_scoped_stub_rewire, + _is_cs_file, + _metadata, _resolve_cross_file_csharp_imports, _resolve_csharp_type_references, + build_csharp_name_resolver, +) +from graphify.extractors.csharp_extract import ( # noqa: F401 + _csharp_namespace_id, + _CSHARP_SCOPE_NODES, + _CSHARP_TYPE_DECLARATION_NODES, + _CSHARP_PARAMETER_LIST_NODES, + _CSHARP_CALLABLE_PARAMETER_OWNER_NODES, + _csharp_scope_chain, + _csharp_scope_id, + _CSHARP_BINDING_PATTERN_NODES, + _csharp_designator_names, + _bare_type_node, + _csharp_declared_bare_type, + _csharp_parameter_scope_owner, + _csharp_parameter_is_callable_scoped, + _build_csharp_type_table, + _build_csharp_var_call_inits, + _csharp_unique_sorted, + _csharp_shadow_bucket, + _csharp_add_shadow, + _csharp_first_child, + _csharp_enclosing_scope_id, + _csharp_first_identifier_child, + _csharp_names_from_variable_declaration, + _csharp_direct_member_names, + _csharp_direct_member_types, + _build_csharp_shadow_names, + _csharp_pre_scan_interfaces, + _csharp_classify_base, + _CSHARP_TYPE_PARAMETER_SCOPE_DECLARATIONS, + _csharp_type_parameters_in_scope, + _csharp_collect_type_refs, + _csharp_attribute_names, + _csharp_import_target_kind, + _import_csharp, + _csharp_namespace_name, + _csharp_extra_walk, + _CSHARP_CONFIG, + _read_csharp_type_name, + csharp_base_list_facts, + csharp_class_member_metadata, + csharp_field_type_ref_facts, + csharp_file_facts, + csharp_invocation_callee, + csharp_method_reference_facts, + csharp_property_type_ref_facts, +) +from graphify.extractors.csharp_resolve import ( # noqa: F401 + _is_placeholder_node, + _lookup_type_table, + _resolve_csharp_member_calls, ) from graphify.extractors.elixir import extract_elixir # noqa: F401 from graphify.extractors.razor import extract_razor # noqa: F401 @@ -81,11 +139,6 @@ def _file_node_id(rel_path: Path) -> str: return _make_id(_file_stem(rel_path)) -def _csharp_namespace_id(dotted_name: str) -> str: - digest = hashlib.sha1(dotted_name.encode("utf-8")).hexdigest()[:16] - return f"csharp_namespace:{digest}" - - _TSCONFIG_ALIAS_CACHE: dict[str, dict[str, list[str]]] = {} _WORKSPACE_PACKAGE_CACHE: dict[str, dict[str, Path]] = {} _WORKSPACE_MANIFEST_NAMES = ("pnpm-workspace.yaml", "package.json") @@ -575,52 +628,6 @@ def _resolve_js_module_path(raw: str | Path, start_dir: Path | None = None) -> P return _resolve_workspace_import(raw, start_dir) -# ── LanguageConfig dataclass ───────────────────────────────────────────────── - -@dataclass -class LanguageConfig: - ts_module: str # e.g. "tree_sitter_python" - ts_language_fn: str = "language" # attr to call: e.g. tslang.language() - - class_types: frozenset = frozenset() - function_types: frozenset = frozenset() - import_types: frozenset = frozenset() - call_types: frozenset = frozenset() - static_prop_types: frozenset = frozenset() - helper_fn_names: frozenset = frozenset() - container_bind_methods: frozenset = frozenset() - event_listener_properties: frozenset = frozenset() - - # Name extraction - name_field: str = "name" - name_fallback_child_types: tuple = () - - # Body detection - body_field: str = "body" - body_fallback_child_types: tuple = () # e.g. ("declaration_list", "compound_statement") - - # Call name extraction - call_function_field: str = "function" # field on call node for callee - call_accessor_node_types: frozenset = frozenset() # member/attribute nodes - call_accessor_field: str = "attribute" # field on accessor for method name - call_accessor_object_field: str = "" # field on accessor for the receiver/object - - # Stop recursion at these types in walk_calls - function_boundary_types: frozenset = frozenset() - - # Import handler: called for import nodes instead of generic handling - import_handler: Callable | None = None - - # Optional custom name resolver for functions (C, C++ declarator unwrapping) - resolve_function_name_fn: Callable | None = None - - # Extra label formatting for functions: if True, functions get "name()" label - function_label_parens: bool = True - - # Extra walk hook called after generic dispatch (for JS arrow functions, C# namespaces, etc.) - extra_walk_fn: Callable | None = None - - # ── Generic helpers ─────────────────────────────────────────────────────────── @@ -698,146 +705,6 @@ def _python_collect_type_refs(node, source: bytes, generic: bool, out: list[tupl _python_collect_type_refs(c, source, generic, out) -def _csharp_pre_scan_interfaces(root_node, source: bytes) -> set[str]: - """Return names declared as `interface` in this C# compilation unit.""" - out: set[str] = set() - stack = [root_node] - while stack: - n = stack.pop() - if n.type == "interface_declaration": - name_node = n.child_by_field_name("name") - if name_node is not None: - text = _read_text(name_node, source) - if text: - out.add(text) - stack.extend(n.children) - return out - - -def _csharp_classify_base(name: str, interface_names: set[str]) -> str: - """`implements` if the base name is an interface (declared or by I-prefix convention), else `inherits`.""" - if name in interface_names: - return "implements" - if len(name) >= 2 and name[0] == "I" and name[1].isupper(): - return "implements" - return "inherits" - - -_CSHARP_TYPE_PARAMETER_SCOPE_DECLARATIONS = frozenset({ - "class_declaration", - "interface_declaration", - "record_declaration", - "struct_declaration", - "method_declaration", -}) - - -def _csharp_type_parameters_in_scope(node, source: bytes) -> frozenset[str]: - """Return C# type-parameter names visible from ``node``.""" - names: set[str] = set() - scope = node - while scope is not None: - if scope.type in _CSHARP_TYPE_PARAMETER_SCOPE_DECLARATIONS: - for child in scope.children: - if child.type != "type_parameter_list": - continue - for param in child.children: - if param.type == "type_parameter": - name_node = next( - (sub for sub in param.children if sub.type == "identifier"), - None, - ) - if name_node is not None: - name = _read_text(name_node, source) - if name: - names.add(name) - elif param.type == "identifier": - name = _read_text(param, source) - if name: - names.add(name) - scope = scope.parent - return frozenset(names) - - -def _csharp_collect_type_refs( - node, - source: bytes, - generic: bool, - out: list[tuple[str, str, bool, str]], - skip: frozenset[str] | None = None, -) -> None: - """Walk a C# type expression; append (name, role, qualified, qualifier) tuples.""" - if node is None: - return - if skip is None: - skip = _csharp_type_parameters_in_scope(node, source) - t = node.type - if t == "predefined_type": - return - if t == "identifier": - name = _read_text(node, source) - if name and name not in skip: - out.append((name, "generic_arg" if generic else "type", False, "")) - return - if t == "qualified_name": - prefix, _, text = _read_text(node, source).rpartition(".") - text = text.split("<", 1)[0] - if text and text not in skip: - out.append((text, "generic_arg" if generic else "type", True, prefix)) - return - if t == "generic_name": - name_child = node.child_by_field_name("name") - if name_child is None: - for sub in node.children: - if sub.type == "identifier": - name_child = sub - break - if name_child is not None: - qualified = name_child.type == "qualified_name" - prefix, _, name = _read_text(name_child, source).rpartition(".") - if name and name not in skip: - out.append((name, "generic_arg" if generic else "type", qualified, prefix if qualified else "")) - for sub in node.children: - if sub.type == "type_argument_list": - for arg in sub.children: - if arg.is_named: - _csharp_collect_type_refs(arg, source, True, out, skip) - return - if t in ("nullable_type", "array_type", "pointer_type", "ref_type"): - for c in node.children: - if c.is_named: - _csharp_collect_type_refs(c, source, generic, out, skip) - return - if node.is_named: - for c in node.children: - if c.is_named: - _csharp_collect_type_refs(c, source, generic, out, skip) - - -def _csharp_attribute_names(method_node, source: bytes) -> list[tuple[str, bool, str]]: - """Collect attribute names from a C# method/declaration's attribute_list children.""" - names: list[tuple[str, bool, str]] = [] - skip = _csharp_type_parameters_in_scope(method_node, source) - for child in method_node.children: - if child.type != "attribute_list": - continue - for attr in child.children: - if attr.type != "attribute": - continue - name_node = attr.child_by_field_name("name") - if name_node is None: - for sub in attr.children: - if sub.type in ("identifier", "qualified_name"): - name_node = sub - break - if name_node is not None: - qualified = name_node.type == "qualified_name" - prefix, _, text = _read_text(name_node, source).rpartition(".") - if text and text not in skip: - names.append((text, qualified, prefix if qualified else "")) - return names - - _JAVA_TYPE_PARAMETER_SCOPE_DECLARATIONS = frozenset({ "class_declaration", "interface_declaration", @@ -2023,37 +1890,6 @@ def _import_c(node, source: bytes, file_nid: str, stem: str, edges: list, str_pa break -def _import_csharp(node, source: bytes, file_nid: str, stem: str, edges: list, str_path: str, scope_stack: list[str] | None = None) -> None: - text = _read_text(node, source).strip().rstrip(";") - if text.startswith("global "): - text = text[len("global "):].strip() - if not text.startswith("using"): - return - body = text[len("using"):].strip() - using_kind, alias, target_fqn = "namespace", None, body - if body.startswith("static "): - using_kind, target_fqn = "static", body[len("static "):].strip() - elif "=" in body: - lhs, rhs = body.split("=", 1) - using_kind, alias, target_fqn = "alias", lhs.strip(), rhs.strip() - if not target_fqn: - return - edges.append({ - "source": file_nid, - "target": _make_id(target_fqn), - "relation": "imports", - "context": "import", - "confidence": "EXTRACTED", - "source_file": str_path, - "source_location": f"L{node.start_point[0] + 1}", - "weight": 1.0, - "metadata": sanitize_metadata({k: v for k, v in - {"using_kind": using_kind, "alias": alias, "target_fqn": target_fqn, - "scope_kind": "namespace" if scope_stack else "file", - "scope_id": scope_stack[-1] if scope_stack else None}.items() if v is not None}), - }) - - def _import_kotlin(node, source: bytes, file_nid: str, stem: str, edges: list, str_path: str, scope_stack: list[str] | None = None) -> None: path_node = node.child_by_field_name("path") if path_node: @@ -2270,76 +2106,6 @@ def _swift_local_var_types(body_node, source: bytes, table: dict[str, str]) -> N stack.append(c) -def _csharp_member_type_table(root, source: bytes) -> dict[str, str]: - """Collect ``name -> TypeName`` for C# receiver typing (#1609): class fields, - properties, method parameters, and local variable declarations. - - File-scoped, first-binding-wins (like the C++ table): a field declared once at - class scope is visible to every method's `field.Method()`, and a param/local - shadowing the same name is a conservative approximation graphify already accepts - for receiver typing. Only a resolvable, non-`var` type name is recorded; `var` - without a `new T()` initializer, and predefined/lower-cased primitives, are - skipped (precision over recall — an untypable receiver is left for the resolver - to drop rather than guess). `var v = new T()` is typed from the object-creation. - """ - table: dict[str, str] = {} - - def _typed(type_node) -> str | None: - info = _read_csharp_type_name(type_node, source) - if not info: - return None - name = info[0] - # A genuine C# class name is Pascal-cased; skip predefined primitives - # (int/bool/string) which never own a resolvable method definition here. - return name if name and name[:1].isupper() else None - - def _decl_names(var_decl): - for c in var_decl.children: - if c.type == "variable_declarator": - nm = c.child_by_field_name("name") or next( - (g for g in c.children if g.type == "identifier"), None) - if nm is not None: - yield _read_text(nm, source), c - - def _new_type(declarator) -> str | None: - # `var v = new Server()` — recover the type from the object_creation_expression. - for g in declarator.children: - if g.type == "object_creation_expression": - return _typed(g.child_by_field_name("type")) - return None - - stack = [root] - while stack: - n = stack.pop() - t = n.type - if t in ("field_declaration", "local_declaration_statement"): - vd = next((c for c in n.children if c.type == "variable_declaration"), None) - if vd is not None: - type_node = vd.child_by_field_name("type") - declared = _typed(type_node) - for name, decl in _decl_names(vd): - resolved = declared or _new_type(decl) - if name and resolved and name not in table: - table[name] = resolved - elif t == "property_declaration": - nm = n.child_by_field_name("name") - resolved = _typed(n.child_by_field_name("type")) - if nm is not None and resolved: - pname = _read_text(nm, source) - if pname not in table: - table[pname] = resolved - elif t == "parameter": - nm = n.child_by_field_name("name") - resolved = _typed(n.child_by_field_name("type")) - if nm is not None and resolved: - pname = _read_text(nm, source) - if pname not in table: - table[pname] = resolved - for c in n.children: - stack.append(c) - return table - - def _objc_local_var_types(body_node, source: bytes, table: dict[str, str]) -> None: """Collect ``var -> ClassName`` from ObjC local declarations (``Foo *f = ...;``) in a method body, for receiver typing in the cross-file message-send pass @@ -2707,62 +2473,6 @@ def _ts_extra_walk(node, source: bytes, file_nid: str, stem: str, str_path: str, return False -# ── C# extra walk for namespace declarations ────────────────────────────────── - -def _csharp_namespace_name(node, source: bytes) -> str: - name_node = node.child_by_field_name("name") - if name_node is not None: - return _read_text(name_node, source).strip() - for child in node.children: - if child.type in ("identifier", "qualified_name"): - return _read_text(child, source).strip() - return "" - - -def _csharp_extra_walk(node, source: bytes, file_nid: str, stem: str, str_path: str, - nodes: list, edges: list, seen_ids: set, function_bodies: list, - parent_class_nid: str | None, add_node_fn, add_edge_fn, - walk_fn, namespace_stack: list[str], scope_stack: list[str]) -> bool: - """Handle namespace declarations for C#. Returns True if handled.""" - if node.type == "namespace_declaration": - ns_name = _csharp_namespace_name(node, source) - pushed = False - if ns_name: - namespace_stack.append(ns_name) - scope_stack.append(f"s{node.start_byte}") - pushed = True - ns_label = ".".join(namespace_stack) - ns_nid = _csharp_namespace_id(ns_label) - line = node.start_point[0] + 1 - add_node_fn(ns_nid, ns_label, line, node_type="namespace", metadata={"kind": "csharp_namespace"}) - add_edge_fn(file_nid, ns_nid, "contains", line) - body = node.child_by_field_name("body") - if body: - try: - for child in body.children: - walk_fn(child, parent_class_nid) - finally: - if pushed: - namespace_stack.pop() - scope_stack.pop() - elif pushed: - namespace_stack.pop() - scope_stack.pop() - return True - if node.type == "file_scoped_namespace_declaration": - ns_name = _csharp_namespace_name(node, source) - if ns_name: - namespace_stack.append(ns_name) - scope_stack.append(f"s{node.start_byte}") - ns_label = ".".join(namespace_stack) - ns_nid = _csharp_namespace_id(ns_label) - line = node.start_point[0] + 1 - add_node_fn(ns_nid, ns_label, line, node_type="namespace", metadata={"kind": "csharp_namespace"}) - add_edge_fn(file_nid, ns_nid, "contains", line) - return True - return False - - # ── Swift extra walk for enum cases ────────────────────────────────────────── def _swift_extra_walk(node, source: bytes, file_nid: str, stem: str, str_path: str, @@ -2945,26 +2655,6 @@ def _swift_extra_walk(node, source: bytes, file_nid: str, stem: str, str_path: s function_boundary_types=frozenset({"method", "singleton_method"}), ) -_CSHARP_CONFIG = LanguageConfig( - ts_module="tree_sitter_c_sharp", - class_types=frozenset({ - "class_declaration", - "interface_declaration", - "enum_declaration", - "struct_declaration", - "record_declaration", - }), - function_types=frozenset({"method_declaration"}), - import_types=frozenset({"using_directive"}), - call_types=frozenset({"invocation_expression"}), - call_function_field="function", - call_accessor_node_types=frozenset({"member_access_expression"}), - call_accessor_field="name", - body_fallback_child_types=("declaration_list",), - function_boundary_types=frozenset({"method_declaration"}), - import_handler=_import_csharp, -) - _KOTLIN_CONFIG = LanguageConfig( ts_module="tree_sitter_kotlin", class_types=frozenset({"class_declaration", "object_declaration"}), @@ -3127,31 +2817,6 @@ def _import_swift(node, source: bytes, file_nid: str, stem: str, edges: list, st return modules -def _read_csharp_type_name(node, source: bytes) -> tuple[str, bool, str] | None: - """Resolve a C# type name, whether it was qualified, and its qualifier prefix.""" - if node is None: - return None - if node.type in ("identifier", "predefined_type"): - return (_read_text(node, source), False, "") - if node.type == "qualified_name": - prefix, _, tail = _read_text(node, source).rpartition(".") - tail = tail.split("<", 1)[0] - return (tail, True, prefix) - if node.type == "generic_name": - name_node = node.child_by_field_name("name") - if name_node is not None: - qualified = name_node.type == "qualified_name" - prefix, _, tail = _read_text(name_node, source).rpartition(".") - return (tail, qualified, prefix if qualified else "") - for child in node.children: - if not child.is_named: - continue - result = _read_csharp_type_name(child, source) - if result: - return result - return None - - _SWIFT_CONFIG = LanguageConfig( ts_module="tree_sitter_swift", class_types=frozenset({"class_declaration", "protocol_declaration"}), @@ -3431,11 +3096,13 @@ def walk(node, parent_class_nid: str | None = None) -> None: class_nid = _make_id(stem, ".".join(namespace_stack), class_name) line = node.start_point[0] + 1 metadata = None - if config.ts_module == "tree_sitter_c_sharp" and parent_class_nid: - metadata = {"is_nested_type": True} + if config.ts_module == "tree_sitter_c_sharp": + metadata = csharp_class_member_metadata(node, source, parent_class_nid) add_node(class_nid, class_name, line, metadata=metadata) callable_def_nids.add(class_nid) # a class is callable (constructor) add_edge(file_nid, class_nid, "contains", line) + if config.ts_module == "tree_sitter_c_sharp" and parent_class_nid: + add_edge(parent_class_nid, class_nid, "contains", line, context="nested_type") # TS/JS decorators on the class and its members (@Component, @Injectable, # @Input, @Inject, @Entity, …). Decorators live only in class subtrees. @@ -3656,57 +3323,36 @@ def _php_emit_base(base_name: str, rel: str, at_line: int) -> None: # C#-specific: inheritance / interface implementation via base_list if config.ts_module == "tree_sitter_c_sharp": csharp_type_params = _csharp_type_parameters_in_scope(node, source) - for child in node.children: - if child.type != "base_list": - continue - for sub in child.children: - if sub.type not in ("identifier", "generic_name", "qualified_name"): - continue - base_info = _read_csharp_type_name(sub, source) - if base_info is None: - continue - base, qualified, qualifier = base_info - if not base or base in csharp_type_params: - continue - base_nid = _make_id(stem, ".".join(namespace_stack), base) + for base, qualified, qualifier, relation, generic_refs in csharp_base_list_facts( + node, source, csharp_interface_names, csharp_type_params + ): + base_nid = _make_id(stem, ".".join(namespace_stack), base) + if base_nid not in seen_ids: + base_nid = _make_id(base) if base_nid not in seen_ids: - base_nid = _make_id(base) - if base_nid not in seen_ids: - nodes.append({ - "id": base_nid, - "label": base, - "file_type": "code", - "source_file": "", - "source_location": "", - }) - seen_ids.add(base_nid) - relation = _csharp_classify_base(base, csharp_interface_names) - metadata = {"ref_token": base} - if qualified: + nodes.append({ + "id": base_nid, + "label": base, + "file_type": "code", + "source_file": "", + "source_location": "", + }) + seen_ids.add(base_nid) + metadata = {"ref_token": base} + if qualified: + metadata["qualified"] = True + if qualifier: + metadata["ref_qualifier"] = qualifier + add_edge(class_nid, base_nid, relation, line, metadata=metadata) + for ref_name, ref_qualified, ref_qualifier in generic_refs: + target = ensure_named_node(ref_name, line) + metadata = {"ref_token": ref_name} + if ref_qualified: metadata["qualified"] = True - if qualifier: - metadata["ref_qualifier"] = qualifier - add_edge(class_nid, base_nid, relation, line, metadata=metadata) - if sub.type == "generic_name": - for tal in sub.children: - if tal.type != "type_argument_list": - continue - for arg in tal.children: - if not arg.is_named: - continue - refs: list[tuple[str, str, bool, str]] = [] - _csharp_collect_type_refs( - arg, source, True, refs, csharp_type_params - ) - for ref_name, _role, ref_qualified, ref_qualifier in refs: - target = ensure_named_node(ref_name, line) - metadata = {"ref_token": ref_name} - if ref_qualified: - metadata["qualified"] = True - if ref_qualifier: - metadata["ref_qualifier"] = ref_qualifier - add_edge(class_nid, target, "references", line, - context="generic_arg", metadata=metadata) + if ref_qualifier: + metadata["ref_qualifier"] = ref_qualifier + add_edge(class_nid, target, "references", line, + context="generic_arg", metadata=metadata) # Java-specific: extends (superclass) / implements (interfaces) / interface-extends if config.ts_module in ("tree_sitter_java", "tree_sitter_groovy"): @@ -3977,29 +3623,15 @@ def _emit_java_parent_type(type_node, rel: str, at_line: int) -> None: if (config.ts_module == "tree_sitter_c_sharp" and t == "field_declaration" and parent_class_nid): - type_node = node.child_by_field_name("type") - if type_node is None: - for child in node.children: - if child.type == "variable_declaration": - type_node = child.child_by_field_name("type") - if type_node is not None: - break - type_info = _read_csharp_type_name(type_node, source) - if type_info: - type_name, qualified, qualifier = type_info - csharp_type_params = _csharp_type_parameters_in_scope( - type_node if type_node is not None else node, source - ) - if not type_name or type_name in csharp_type_params: - return - line = node.start_point[0] + 1 - metadata = {"ref_token": type_name} + line = node.start_point[0] + 1 + for ref_name, context, qualified, qualifier in csharp_field_type_ref_facts(node, source): + metadata = {"ref_token": ref_name} if qualified: metadata["qualified"] = True if qualifier: metadata["ref_qualifier"] = qualifier - add_edge(parent_class_nid, ensure_named_node(type_name, line), - "references", line, context="field", metadata=metadata) + add_edge(parent_class_nid, ensure_named_node(ref_name, line), + "references", line, context=context, metadata=metadata) return if (config.ts_module == "tree_sitter_c_sharp" @@ -4013,22 +3645,17 @@ def _emit_java_parent_type(type_node, rel: str, at_line: int) -> None: # field. Use _csharp_collect_type_refs (like the Java/PHP/Kotlin # siblings) so `List` yields both the List field ref and the # Widget generic_arg ref. - type_node = node.child_by_field_name("type") - if type_node is not None: - line = node.start_point[0] + 1 - refs: list[tuple[str, str, bool, str]] = [] - _csharp_collect_type_refs(type_node, source, False, refs) - for ref_name, role, qualified, qualifier in refs: - ctx = "generic_arg" if role == "generic_arg" else "field" - target_nid = ensure_named_node(ref_name, line) - if target_nid != parent_class_nid: - metadata = {"ref_token": ref_name} - if qualified: - metadata["qualified"] = True - if qualifier: - metadata["ref_qualifier"] = qualifier - add_edge(parent_class_nid, target_nid, "references", - line, context=ctx, metadata=metadata) + line = node.start_point[0] + 1 + for ref_name, context, qualified, qualifier in csharp_property_type_ref_facts(node, source): + target_nid = ensure_named_node(ref_name, line) + if target_nid != parent_class_nid: + metadata = {"ref_token": ref_name} + if qualified: + metadata["qualified"] = True + if qualifier: + metadata["ref_qualifier"] = qualifier + add_edge(parent_class_nid, target_nid, "references", + line, context=context, metadata=metadata) return if (config.ts_module == "tree_sitter_java" @@ -4204,13 +3831,23 @@ def _emit_java_parent_type(type_node, rel: str, at_line: int) -> None: return line = node.start_point[0] + 1 + func_metadata = None + if config.ts_module == "tree_sitter_c_sharp" and t == "method_declaration": + func_metadata = { + "csharp_return_type": _csharp_declared_bare_type( + node.child_by_field_name("returns"), + node, + source, + ) + } + if parent_class_nid: func_nid = _make_id(parent_class_nid, func_name) - add_node(func_nid, f".{func_name}()", line) + add_node(func_nid, f".{func_name}()", line, metadata=func_metadata) add_edge(parent_class_nid, func_nid, "method", line) else: func_nid = _make_id(stem, func_name) - add_node(func_nid, f"{func_name}()", line) + add_node(func_nid, f"{func_name}()", line, metadata=func_metadata) add_edge(file_nid, func_nid, "contains", line) callable_def_nids.add(func_nid) # function / method def is callable if config.ts_module == "tree_sitter_python": @@ -4240,55 +3877,16 @@ def _emit_java_parent_type(type_node, rel: str, at_line: int) -> None: ) if config.ts_module == "tree_sitter_c_sharp": - csharp_type_params = _csharp_type_parameters_in_scope(node, source) - params_node = node.child_by_field_name("parameters") - if params_node is not None: - for p in params_node.children: - if p.type != "parameter": - continue - type_node = p.child_by_field_name("type") - refs: list[tuple[str, str, bool, str]] = [] - _csharp_collect_type_refs( - type_node, source, False, refs, csharp_type_params - ) - for ref_name, role, qualified, qualifier in refs: - ctx = "generic_arg" if role == "generic_arg" else "parameter_type" - target_nid = ensure_named_node(ref_name, line) - if target_nid != func_nid: - metadata = {"ref_token": ref_name} - if qualified: - metadata["qualified"] = True - if qualifier: - metadata["ref_qualifier"] = qualifier - add_edge(func_nid, target_nid, "references", line, - context=ctx, metadata=metadata) - return_node = node.child_by_field_name("returns") - if return_node is not None: - refs: list[tuple[str, str, bool, str]] = [] - _csharp_collect_type_refs( - return_node, source, False, refs, csharp_type_params - ) - for ref_name, role, qualified, qualifier in refs: - ctx = "generic_arg" if role == "generic_arg" else "return_type" - target_nid = ensure_named_node(ref_name, line) - if target_nid != func_nid: - metadata = {"ref_token": ref_name} - if qualified: - metadata["qualified"] = True - if qualifier: - metadata["ref_qualifier"] = qualifier - add_edge(func_nid, target_nid, "references", line, - context=ctx, metadata=metadata) - for attr_name, qualified, qualifier in _csharp_attribute_names(node, source): - target_nid = ensure_named_node(attr_name, line) + for ref_name, context, qualified, qualifier in csharp_method_reference_facts(node, source): + target_nid = ensure_named_node(ref_name, line) if target_nid != func_nid: - metadata = {"ref_token": attr_name} + metadata = {"ref_token": ref_name} if qualified: metadata["qualified"] = True if qualifier: metadata["ref_qualifier"] = qualifier add_edge(func_nid, target_nid, "references", line, - context="attribute", metadata=metadata) + context=context, metadata=metadata) if config.ts_module == "tree_sitter_java": params_node = node.child_by_field_name("parameters") @@ -4846,45 +4444,11 @@ def walk_calls(node, caller_nid: str) -> None: callee_name = _read_text(child, source) break elif config.ts_module == "tree_sitter_c_sharp" and node.type == "invocation_expression": - # C#: the invoked function is the `function` field. A member call - # `recv.Method(...)` is a member_access_expression (receiver in its - # `expression` field, method in `name`). Capture a simple-identifier - # or `this` receiver + set is_member_call so the receiver-typed - # resolver (_resolve_csharp_member_calls) can bind it to the - # receiver's declared type. Without this the bare method name matched - # any same-named method in the corpus, silently mis-resolving - # `_server.Save()` to an unrelated `Cache.Save()` (#1609). - fn_node = node.child_by_field_name("function") - if fn_node is not None and fn_node.type == "member_access_expression": - mname = fn_node.child_by_field_name("name") - recv = fn_node.child_by_field_name("expression") - if mname is not None: - callee_name = _read_text(mname, source) - is_member_call = True - if recv is not None and recv.type == "identifier": - member_receiver = _read_text(recv, source) - elif recv is not None and recv.type == "this_expression": - member_receiver = "this" - elif fn_node is not None and fn_node.type == "identifier": - callee_name = _read_text(fn_node, source) - else: - # Fallback: original name-field / first-named-child scan. - name_node = node.child_by_field_name("name") - if name_node: - callee_name = _read_text(name_node, source) - else: - for child in node.children: - if child.is_named: - raw = _read_text(child, source) - if "." in raw: - callee_name = raw.split(".")[-1] - is_member_call = True - parts = raw.split(".") - if len(parts) == 2 and parts[0]: - member_receiver = parts[0] - else: - callee_name = raw - break + # C#: the callee is the `function` field. For `obj.Method()` it is a + # member_access_expression (expression=receiver, name=member); capture the + # receiver so the member-call resolver can type it (L2). Force is_member_call + # so the same-file bare-name fast path is suppressed below (tgt_nid=None). + callee_name, is_member_call, member_receiver = csharp_invocation_callee(node, source) elif config.ts_module == "tree_sitter_php": # PHP: distinguish call expression subtypes if node.type == "function_call_expression": @@ -4998,17 +4562,10 @@ def walk_calls(node, caller_nid: str) -> None: # viewset action delegates to a same-named service action — which would # match `tgt_nid == caller_nid` and silently drop the call (#1446). The # captured receiver is resolved later in _resolve_python_member_calls. - # C#: ANY member call with a captured receiver defers to the - # receiver-typed resolver — a bare method-name match ignores the - # receiver's declared type and mis-binds to an unrelated same-named - # method (#1609). The receiver may be lowercase (`_server.Save()`), - # so this is broader than the capitalized/this-field Python rule. - _csharp_defer = ( + if is_member_call and member_receiver is not None and ( config.ts_module == "tree_sitter_c_sharp" - and is_member_call and member_receiver - ) - if is_member_call and member_receiver and ( - member_receiver[:1].isupper() or is_this_field_call or _csharp_defer + or member_receiver[:1].isupper() + or is_this_field_call ): tgt_nid = None else: @@ -5038,6 +4595,10 @@ def walk_calls(node, caller_nid: str) -> None: "source_location": f"L{node.start_point[0] + 1}", "receiver": swift_receiver or member_receiver, } + if config.ts_module == "tree_sitter_c_sharp": + rc_entry["lang"] = "csharp" + rc_entry["scope_chain"] = _csharp_scope_chain(node) + rc_entry["call_byte"] = node.start_byte # Ruby: attach the receiver's inferred type from the method's # local `var = Const.new` bindings, when unambiguously known. if member_receiver and config.ts_module == "tree_sitter_ruby": @@ -5050,9 +4611,9 @@ def walk_calls(node, caller_nid: str) -> None: # suffix sets, so a source_file suffix alone can't separate them. if config.ts_module == "tree_sitter_cpp": rc_entry["lang"] = "cpp" - # C#: tag the raw_call so _resolve_csharp_member_calls claims it - # and types the receiver against the file's field/param/local - # type table (#1609). + # C#: tag the raw_call so the C# member-call resolver claims it + # and types the receiver (declared locals/fields/params/static/ + # method-return vars) against the per-file type table. if config.ts_module == "tree_sitter_c_sharp": rc_entry["lang"] = "csharp" raw_calls.append(rc_entry) @@ -5395,13 +4956,8 @@ def _scan_js_module_dispatch(n) -> None: result["ts_type_table"] = {"path": str_path, "table": type_table} elif config.ts_module == "tree_sitter_cpp": result["cpp_type_table"] = {"path": str_path, "table": type_table} - # C#: a file-wide receiver type table (field/property/param/local -> Type) for - # _resolve_csharp_member_calls (#1609). Built from the whole tree, not just - # function bodies, so class-level fields/properties are in scope for every method. if config.ts_module == "tree_sitter_c_sharp": - cs_table = _csharp_member_type_table(root, source) - if cs_table: - result["csharp_type_table"] = {"path": str_path, "table": cs_table} + result.update(csharp_file_facts(root, source, str_path)) return result @@ -6055,11 +5611,24 @@ def extract_ruby(path: Path) -> dict: return _extract_generic(path, _RUBY_CONFIG) -def extract_csharp(path: Path) -> dict: - """Extract C# type declarations, methods, namespaces, and usings from a .cs file.""" +def _extract_csharp_file(path: Path) -> dict: + """Extract raw C# type declarations, methods, namespaces, and usings from a .cs file.""" return _extract_generic(path, _CSHARP_CONFIG) +def extract_csharp(path: Path) -> dict: + """Extract C# type declarations, methods, namespaces, usings, and member calls from a .cs file.""" + result = _extract_csharp_file(path) + if not isinstance(result, dict) or result.get("error"): + return result + nodes = result.get("nodes") + edges = result.get("edges") + if not isinstance(nodes, list) or not isinstance(edges, list): + return result + run_language_resolvers([path], [result], nodes, edges) + return result + + def extract_apex(path: Path) -> dict: """Extract classes, interfaces, enums, methods, and Salesforce constructs from Apex .cls and .trigger files using regex (no tree-sitter grammar on PyPI).""" @@ -9338,49 +8907,6 @@ def _disambiguate_colliding_node_ids( raw_call["caller_nid"] = unambiguous_remaps[str(raw_call["caller_nid"])] -def _canonicalize_csharp_namespace_nodes(all_nodes: list[dict], all_edges: list[dict]) -> None: - """Collapse duplicate C# namespace node entries to one canonical node per label.""" - by_label: dict[str, list[dict]] = {} - for node in all_nodes: - if node.get("type") != "namespace": - continue - label = node.get("label") - if isinstance(label, str): - by_label.setdefault(label, []).append(node) - - remap: dict[str, str] = {} - drop_node_ids: set[int] = set() - for group in by_label.values(): - if len(group) < 2: - continue - canonical = sorted( - group, - key=lambda node: ( - str(node.get("source_file") or ""), - str(node.get("source_location") or ""), - str(node.get("id") or ""), - ), - )[0] - canonical_id = canonical.get("id") - for node in group: - if node is canonical: - continue - drop_node_ids.add(id(node)) - dup_id = node.get("id") - if isinstance(dup_id, str) and isinstance(canonical_id, str): - remap[dup_id] = canonical_id - - if remap: - for edge in all_edges: - if edge.get("source") in remap: - edge["source"] = remap[str(edge["source"])] - if edge.get("target") in remap: - edge["target"] = remap[str(edge["target"])] - - if drop_node_ids: - all_nodes[:] = [node for node in all_nodes if id(node) not in drop_node_ids] - - # Languages whose identifiers are case-insensitive, so cross-file name resolution # may fold case. Everywhere else, case is semantic (`Path` the class vs `PATH` the # env var are distinct) and folding manufactures false edges / super-hubs (#1581). @@ -9460,27 +8986,16 @@ def _rewire_unique_stub_nodes(nodes: list[dict], edges: list[dict]) -> None: return by_id = {node.get("id"): node for node in nodes if node.get("id")} - csharp_scoped_relations = {"inherits", "implements", "references", "imports"} for edge in edges: - is_csharp_scoped_edge = ( - str(edge.get("source_file", "")).endswith(".cs") - and edge.get("relation") in csharp_scoped_relations - ) source = edge.get("source") if source in remap: remapped_source = remap[str(source)] - if not ( - is_csharp_scoped_edge - and str(by_id.get(remapped_source, {}).get("source_file", "")).endswith(".cs") - ): + if not _csharp_preserve_scoped_stub_rewire(edge, remapped_source, by_id): edge["source"] = remapped_source target = edge.get("target") if target in remap: remapped_target = remap[str(target)] - if not ( - is_csharp_scoped_edge - and str(by_id.get(remapped_target, {}).get("source_file", "")).endswith(".cs") - ): + if not _csharp_preserve_scoped_stub_rewire(edge, remapped_target, by_id): edge["target"] = remapped_target referenced = {x for e in edges for x in (e.get("source"), e.get("target"))} @@ -11707,120 +11222,6 @@ def _key(label: str) -> str: }) -def _resolve_csharp_member_calls( - per_file: list[dict], - all_nodes: list[dict], - all_edges: list[dict], -) -> None: - """Resolve C# member calls (``recv.Method()``) to the receiver's declared type - (#1609). - - The shared cross-file pass drops every ``is_member_call`` because a bare method - name collides across the corpus — and for C# an in-file bare match silently - mis-bound ``_server.Save()`` to an unrelated ``Cache.Save()``. The C# extractor - now records each member call's receiver plus a per-file ``name -> Type`` table - (``csharp_type_table``) of fields/properties/params/locals. This pass types the - receiver, then emits an edge ONLY when that type resolves to exactly ONE - definition (the god-node guard); an untypable receiver is skipped (no guess). - - Receiver typing, by precision tier: - * ``this.M()`` — receiver is the caller's own enclosing class -> EXTRACTED. - * ``Type.M()`` (capitalized) — the type is named explicitly in source -> EXTRACTED. - * ``recv.M()`` — ``recv`` typed via the file's field/param/local table -> INFERRED. - - Must run after id-disambiguation so node ids and caller_nids are final. - """ - type_table_by_file: dict[str, dict[str, str]] = {} - for result in per_file: - tt = result.get("csharp_type_table") - if tt and tt.get("path"): - type_table_by_file[tt["path"]] = tt.get("table", {}) - - def _key(label: str) -> str: - return re.sub(r"[^a-zA-Z0-9]+", "", str(label)).lower() - - contained = {e.get("target") for e in all_edges if e.get("relation") == "contains"} - - type_def_nids: dict[str, list[str]] = {} - node_by_id: dict[str, dict] = {} - for n in all_nodes: - node_by_id[n.get("id")] = n - if n.get("source_file") and n.get("id") in contained and _is_type_like_definition(n): - type_def_nids.setdefault(_key(n.get("label", "")), []).append(n["id"]) - - # (type_node_id, method_key) -> method_node_id, and caller -> enclosing type. - # C# owns its methods via `method` edges. - method_index: dict[tuple[str, str], str] = {} - enclosing_type: dict[str, str] = {} - for e in all_edges: - if e.get("relation") != "method": - continue - src, tgt = e.get("source"), e.get("target") - tnode = node_by_id.get(tgt) - if tnode is None: - continue - enclosing_type.setdefault(tgt, src) - method_index[(src, _key(tnode.get("label", "")))] = tgt - - all_raw_calls: list[dict] = [] - for result in per_file: - all_raw_calls.extend(result.get("raw_calls", [])) - - existing_pairs = {(e.get("source"), e.get("target")) for e in all_edges} - for rc in all_raw_calls: - if rc.get("lang") != "csharp" or not rc.get("is_member_call"): - continue - receiver = rc.get("receiver") - callee = rc.get("callee") - caller = rc.get("caller_nid") - if not receiver or not callee or not caller: - continue - src_file = rc.get("source_file", "") - if receiver == "this": - type_nid = enclosing_type.get(caller) - if not type_nid: - continue - type_qualified = True - elif receiver[:1].isupper(): - # Type.M() — the type is named explicitly (also covers a Pascal-cased - # local whose name equals its type, resolved via the table below if the - # explicit-type lookup misses). - type_defs = type_def_nids.get(_key(receiver), []) - if len(type_defs) != 1: - type_name = type_table_by_file.get(src_file, {}).get(receiver) - type_defs = type_def_nids.get(_key(type_name), []) if type_name else [] - if len(type_defs) != 1: - continue - type_nid = type_defs[0] - type_qualified = True - else: - type_name = type_table_by_file.get(src_file, {}).get(receiver) - if not type_name: - continue - type_defs = type_def_nids.get(_key(type_name), []) - if len(type_defs) != 1: # ambiguous or absent -> bail (god-node guard) - continue - type_nid = type_defs[0] - type_qualified = False - method_nid = method_index.get((type_nid, _key(callee))) - if not method_nid: - continue # receiver typed, but the type has no such method — skip - if method_nid == caller or (caller, method_nid) in existing_pairs: - continue - existing_pairs.add((caller, method_nid)) - all_edges.append({ - "source": caller, - "target": method_nid, - "relation": "calls", - "context": "call", - "confidence": "EXTRACTED" if type_qualified else "INFERRED", - "confidence_score": 1.0 if type_qualified else 0.8, - "source_file": src_file, - "source_location": rc.get("source_location"), - "weight": 1.0, - }) - - def _resolve_objc_member_calls( per_file: list[dict], all_nodes: list[dict], @@ -11966,8 +11367,6 @@ def _key(label: str) -> str: _resolve_objc_member_calls, ) ) -# C# receiver-typed member-call resolution (#1609): `field/param/local.Method()` -# bound to the receiver's declared type instead of a bare same-named match. register_language_resolver( LanguageResolver("csharp_member_calls", frozenset({".cs"}), _resolve_csharp_member_calls) ) @@ -14094,7 +13493,7 @@ def _xaml_codebehind_symbols( codebehind = _xaml_codebehind_path(path) if not codebehind: return None, {}, [] - result = extract_csharp(codebehind) + result = _extract_csharp_file(codebehind) if result.get("error"): return None, {}, [] @@ -14253,7 +13652,7 @@ def _xaml_csharp_class_nodes(path: Path) -> dict[str, list[dict]]: continue if patterns and _is_ignored(cs_path, root, patterns, _cache=ignore_cache): continue - result = extract_csharp(cs_path) + result = _extract_csharp_file(cs_path) if result.get("error"): continue for node in result.get("nodes", []): @@ -15493,7 +14892,7 @@ def _body_of(block): ".cuh": extract_cpp, ".metal": extract_cpp, ".rb": extract_ruby, - ".cs": extract_csharp, + ".cs": _extract_csharp_file, ".kt": extract_kotlin, ".kts": extract_kotlin, ".scala": extract_scala, diff --git a/graphify/extractors/MIGRATION.md b/graphify/extractors/MIGRATION.md index 1ddf6badf..b3d187c66 100644 --- a/graphify/extractors/MIGRATION.md +++ b/graphify/extractors/MIGRATION.md @@ -12,12 +12,38 @@ written so an AI agent can execute it in a single session. | zig | yes | | elixir | yes | | razor | yes | -| (40 more in extract.py) | no | +| csharp | partial — helpers + cross-file resolver split into `extractors/csharp*.py`; config-driven `extract_csharp` entry stays (see Middle path) | +| (39 more in extract.py) | no | Note: config-driven extractors (python, js, java, c, cpp, ruby, csharp, kotlin, scala, php, lua, swift, groovy) depend on the shared -`_extract_generic` core (~1,300 lines). Do NOT port them one-by-one; the core -must move first as its own coordinated batch. Pick a bespoke extractor. +`_extract_generic` core (~1,300 lines). Do NOT move the config-driven +`extract_` ENTRY POINT one-by-one; the core must move first as its own +coordinated batch. Pick a bespoke extractor for a full port. + +### Middle path (config-driven helper split — the C# pattern) + +Even for a config-driven language you can give it a real module home *before* the +`_extract_generic` batch: split the language-specific **helpers** (per-file +binding / type-table / shadow model, type references, imports) and the cross-file +**member-call resolver** into their own modules — see `extractors/csharp_extract.py`, +`extractors/csharp_resolve.py`, and `extractors/csharp.py` — facade-re-exported +from `extract.py`, while the thin `extract_` → `_extract_generic` entry +point stays inline. Guardrails: + +- **Prove the split is behavior-preserving** with a normalized node/edge snapshot + over BOTH the single-file `extract_` and the multi-file `extract` entry + points (order-preserving — a list-sorting canonical hides a fact-order + regression). +- **Lift shared config** (`LanguageConfig`) to `base.py`; keep the import + direction `extract.py -> extractors/` (never import `graphify.extract` here). +- Only pull C#-only logic out of `_extract_generic` where the helper **returns + facts** and the core keeps emission (no `add_node`/`add_edge`/`nodes`/`seen_ids` + threaded in) — otherwise leave a thin inline hook. + +Unlike a verbatim entry-point port, a middle-path split may ship alongside a +feature (e.g. the C# member-call resolver) with its own tests; the snapshot + +full suite are the preservation proof. ## Invariants (non-negotiable) diff --git a/graphify/extractors/base.py b/graphify/extractors/base.py index fa2e5b514..bb43e58ae 100644 --- a/graphify/extractors/base.py +++ b/graphify/extractors/base.py @@ -1,7 +1,9 @@ # DO NOT import from graphify.extract here — direction is extract.py → extractors/ only. from __future__ import annotations +from dataclasses import dataclass from pathlib import Path +from typing import Callable from graphify.ids import make_id @@ -64,3 +66,49 @@ def _file_stem(path: Path) -> str: def _read_text(node, source: bytes) -> str: return source[node.start_byte:node.end_byte].decode("utf-8", errors="replace") + + +# ── LanguageConfig dataclass ───────────────────────────────────────────────── + +@dataclass +class LanguageConfig: + ts_module: str # e.g. "tree_sitter_python" + ts_language_fn: str = "language" # attr to call: e.g. tslang.language() + + class_types: frozenset = frozenset() + function_types: frozenset = frozenset() + import_types: frozenset = frozenset() + call_types: frozenset = frozenset() + static_prop_types: frozenset = frozenset() + helper_fn_names: frozenset = frozenset() + container_bind_methods: frozenset = frozenset() + event_listener_properties: frozenset = frozenset() + + # Name extraction + name_field: str = "name" + name_fallback_child_types: tuple = () + + # Body detection + body_field: str = "body" + body_fallback_child_types: tuple = () # e.g. ("declaration_list", "compound_statement") + + # Call name extraction + call_function_field: str = "function" # field on call node for callee + call_accessor_node_types: frozenset = frozenset() # member/attribute nodes + call_accessor_field: str = "attribute" # field on accessor for method name + call_accessor_object_field: str = "" # field on accessor for the receiver/object + + # Stop recursion at these types in walk_calls + function_boundary_types: frozenset = frozenset() + + # Import handler: called for import nodes instead of generic handling + import_handler: Callable | None = None + + # Optional custom name resolver for functions (C, C++ declarator unwrapping) + resolve_function_name_fn: Callable | None = None + + # Extra label formatting for functions: if True, functions get "name()" label + function_label_parens: bool = True + + # Extra walk hook called after generic dispatch (for JS arrow functions, C# namespaces, etc.) + extra_walk_fn: Callable | None = None diff --git a/graphify/extractors/csharp.py b/graphify/extractors/csharp.py index 2cda37263..0cbc8f3fe 100644 --- a/graphify/extractors/csharp.py +++ b/graphify/extractors/csharp.py @@ -11,11 +11,24 @@ from __future__ import annotations import html +from dataclasses import dataclass from pathlib import Path from graphify.extractors.base import _make_id +_CSHARP_SCOPED_STUB_REWIRE_RELATIONS = frozenset({"inherits", "implements", "references", "imports"}) + + +def _csharp_preserve_scoped_stub_rewire(edge: dict, remapped_id: str, by_id: dict) -> bool: + """Return True when generic stub rewiring must not cross C# scoped resolution.""" + return ( + str(edge.get("source_file", "")).endswith(".cs") + and edge.get("relation") in _CSHARP_SCOPED_STUB_REWIRE_RELATIONS + and str(by_id.get(remapped_id, {}).get("source_file", "")).endswith(".cs") + ) + + def _build_csharp_type_def_index(all_nodes: list[dict]) -> dict[tuple[str, str], str]: """Return deterministic ``(namespace, name) -> node_id`` C# type definitions.""" candidates: dict[tuple[str, str], list[dict]] = {} @@ -58,6 +71,55 @@ def _build_csharp_type_def_index(all_nodes: list[dict]) -> dict[tuple[str, str], } +def _build_csharp_type_def_groups(all_nodes: list[dict]) -> dict[tuple[str, str], list[str]]: + """Like _build_csharp_type_def_index but keeps ALL node ids per (namespace, name). + + Partial classes are split into multiple same-key nodes; L2 aggregates their + `method` edges. Returns (namespace, name) -> [node_id, ...] deterministically + sorted. + """ + groups: dict[tuple[str, str], list[dict]] = {} + for node in all_nodes: + if node.get("type") == "namespace": + continue + metadata = node.get("metadata") or {} + if not isinstance(metadata, dict): + metadata = {} + if metadata.get("is_nested_type"): + continue + nid = node.get("id") + label = node.get("label") + if not (isinstance(nid, str) and nid and isinstance(label, str) and label): + continue + source_file = node.get("source_file") + if ( + not isinstance(source_file, str) + or not source_file.endswith(".cs") + or node.get("file_type") != "code" + ): + continue + if label.endswith(")") or label.startswith(".") or "." in label: + continue + namespace = metadata.get("namespace", "") + if not isinstance(namespace, str): + namespace = "" + groups.setdefault((namespace, label), []).append(node) + return { + key: [ + n["id"] + for n in sorted( + nodes, + key=lambda node: ( + str(node.get("source_file") or ""), + str(node.get("source_location") or ""), + str(node.get("id") or ""), + ), + ) + ] + for key, nodes in groups.items() + } + + def _strip_trailing_csharp_generic_args(target_fqn: str) -> str: target_fqn = target_fqn.strip() if not target_fqn.endswith(">"): @@ -74,6 +136,294 @@ def _strip_trailing_csharp_generic_args(target_fqn: str) -> str: return target_fqn +def _csharp_base_identifier(name: str) -> str: + """`Foo` -> `Foo`; a plain identifier is returned unchanged. Prevents a + generic method callee from colliding with a same-spelled non-generic (`Fooint`).""" + name = name.strip() + lt = name.find("<") + return name[:lt].strip() if lt != -1 else name + + +def _is_cs_file(value: object) -> bool: + return isinstance(value, str) and value.endswith(".cs") + + +def _metadata(value: object) -> dict: + return value if isinstance(value, dict) else {} + + +def _namespace(node: dict | None) -> str: + metadata = _metadata((node or {}).get("metadata")) + namespace = metadata.get("namespace", "") + return namespace if isinstance(namespace, str) else "" + + +def _append_unique(items: list[str], value: str) -> None: + if value not in items: + items.append(value) + + +@dataclass(frozen=True) +class CsharpImportEntry: + target_fqn: str + scope_kind: str + scope_id: str | None + using_kind: str + alias: str | None = None + target_kind: str | None = None + + @property + def is_extern(self) -> bool: + return self.using_kind == "extern_alias" + + +class CsharpNameResolver: + """Shared, module-scope C# name resolution built from a full node+edge set.""" + + def __init__(self, all_nodes, all_edges): + safe_nodes = all_nodes if isinstance(all_nodes, list) else [] + safe_edges = all_edges if isinstance(all_edges, list) else [] + self.node_by_id = { + node["id"]: node + for node in safe_nodes + if isinstance(node, dict) and isinstance(node.get("id"), str) and node.get("id") + } + self.type_def_index = _build_csharp_type_def_index(safe_nodes) + self.type_def_groups = _build_csharp_type_def_groups(safe_nodes) + self.known_namespaces = { + node.get("label") + for node in safe_nodes + if isinstance(node, dict) + and node.get("type") == "namespace" + and isinstance(node.get("label"), str) + } + self.namespace_usings_by_file: dict[str, list[CsharpImportEntry]] = {} + self.static_usings_by_file: dict[str, list[CsharpImportEntry]] = {} + self.global_namespace_usings: list[CsharpImportEntry] = [] + self.global_static_usings: list[CsharpImportEntry] = [] + self.aliases_by_file: dict[str, dict[str, list[CsharpImportEntry]]] = {} + self.global_aliases: dict[str, list[CsharpImportEntry]] = {} + self.extern_aliases_by_file: dict[str, list[CsharpImportEntry]] = {} + + for edge in safe_edges: + if not isinstance(edge, dict) or edge.get("relation") != "imports": + continue + source_node = self.node_by_id.get(edge.get("source")) + if not ( + source_node + and isinstance(source_node.get("label"), str) + and source_node.get("label", "").endswith(".cs") + ): + continue + source_file = source_node.get("source_file") + if not _is_cs_file(source_file): + continue + metadata = _metadata(edge.get("metadata")) + target_fqn = metadata.get("target_fqn") + if not isinstance(target_fqn, str) or not target_fqn: + continue + scope_kind = metadata.get("scope_kind") or "file" + scope_id = metadata.get("scope_id") + scope_id = scope_id if isinstance(scope_id, str) else None + using_kind = metadata.get("using_kind") or "namespace" + target_kind = metadata.get("target_kind") + target_kind = target_kind if target_kind in ("type", "namespace") else None + alias = metadata.get("alias") + alias = alias if isinstance(alias, str) and alias else None + entry = CsharpImportEntry( + target_fqn=target_fqn, + scope_kind=str(scope_kind), + scope_id=scope_id, + using_kind=str(using_kind), + alias=alias, + target_kind=target_kind, + ) + if using_kind == "namespace": + bucket = self.global_namespace_usings if entry.scope_kind == "global" else self.namespace_usings_by_file.setdefault(source_file, []) + if entry not in bucket: + bucket.append(entry) + elif using_kind == "static": + bucket = self.global_static_usings if entry.scope_kind == "global" else self.static_usings_by_file.setdefault(source_file, []) + if entry not in bucket: + bucket.append(entry) + elif using_kind == "alias" and alias: + if entry.scope_kind == "global": + bucket = self.global_aliases.setdefault(alias, []) + else: + bucket = self.aliases_by_file.setdefault(source_file, {}).setdefault(alias, []) + if entry not in bucket: + bucket.append(entry) + elif using_kind == "extern_alias" and alias: + bucket = self.extern_aliases_by_file.setdefault(source_file, []) + if entry not in bucket: + bucket.append(entry) + + def _scope_chain(self, source_node): + if not isinstance(source_node, dict): + return [] + chain = _metadata(source_node.get("metadata")).get("scope_chain") + return chain if isinstance(chain, list) else [] + + def _using_in_scope(self, scope_kind, scope_id, source_node): + if scope_kind in ("global", "file"): + return True + return scope_id is not None and scope_id in self._scope_chain(source_node) + + def _entry_in_scope(self, entry: CsharpImportEntry, source_node) -> bool: + return self._using_in_scope(entry.scope_kind, entry.scope_id, source_node) + + def _alias_entries(self, label, source_file): + entries = list(self.global_aliases.get(label, [])) + entries.extend(self.aliases_by_file.get(source_file, {}).get(label, [])) + return entries + + def _extern_alias_entries(self, label, source_file): + return [ + entry for entry in self.extern_aliases_by_file.get(source_file, []) + if entry.alias == label + ] + + def _scopes_for(self, source_node, source_file): + scopes = [] + _append_unique(scopes, _namespace(source_node)) + _append_unique(scopes, "") + for entry in self.global_namespace_usings: + _append_unique(scopes, entry.target_fqn) + for entry in self.namespace_usings_by_file.get(source_file, []): + if self._entry_in_scope(entry, source_node): + _append_unique(scopes, entry.target_fqn) + return scopes + + def _type_candidates(self, label, source_node, source_file): + candidates = [] + for namespace in self._scopes_for(source_node, source_file): + hit = self.type_def_index.get((namespace, label)) + if hit and hit not in candidates: + candidates.append(hit) + return candidates + + def _classify_alias_entry(self, entry: CsharpImportEntry, source_node, source_file) -> tuple[str | None, str | None, str | None]: + base_fqn = _strip_trailing_csharp_generic_args(html.unescape(entry.target_fqn)) + if base_fqn in self.known_namespaces: + return "namespace", base_fqn, None + namespace, sep, simple_name = base_fqn.rpartition(".") + if sep and self.type_def_index.get((namespace, simple_name)): + return "type", namespace, simple_name + if sep and namespace in self.known_namespaces: + return "type", namespace, simple_name + if not sep: + candidates = self._type_candidates(simple_name or namespace, source_node, source_file) + if len(candidates) == 1: + node = self.node_by_id.get(candidates[0]) + return "type", _namespace(node), node.get("label") if node else None + if entry.target_kind in ("type", "namespace"): + return entry.target_kind, namespace if entry.target_kind == "type" else base_fqn, simple_name if entry.target_kind == "type" else None + return None, None, None + + def is_alias_in_scope(self, label, source_node, source_file) -> bool: + scoped = [ + entry for entry in self._alias_entries(label, source_file) + if self._entry_in_scope(entry, source_node) + ] + scoped.extend( + entry for entry in self._extern_alias_entries(label, source_file) + if self._entry_in_scope(entry, source_node) + ) + return bool(scoped) + + def using_static_in_scope(self, source_node, source_file) -> list[CsharpImportEntry]: + entries = list(self.global_static_usings) + entries.extend( + entry for entry in self.static_usings_by_file.get(source_file, []) + if self._entry_in_scope(entry, source_node) + ) + return entries + + def namespace_may_bind(self, label, source_node, source_file) -> bool: + for namespace in self._scopes_for(source_node, source_file): + candidate = f"{namespace}.{label}" if namespace else label + if candidate in self.known_namespaces: + return True + return False + + def qualifier_is_namespace_in_scope(self, qualifier, source_node, source_file) -> bool: + if not isinstance(qualifier, str) or not qualifier: + return False + first, sep, rest = qualifier.partition(".") + has_alias, alias_ns = self._alias_namespace_for_leading(first, source_node, source_file) + if has_alias: + if not alias_ns: + return False + candidate = f"{alias_ns}.{rest}" if rest else alias_ns + return not rest or candidate in self.known_namespaces + if qualifier in self.known_namespaces: + return True + return self.namespace_may_bind(first, source_node, source_file) + + def resolve_alias(self, label, source_node, source_file): + entries = [ + entry for entry in self._alias_entries(label, source_file) + if self._entry_in_scope(entry, source_node) + ] + if not entries: + return None + hits = set() + for entry in entries: + target_kind, namespace, simple_name = self._classify_alias_entry(entry, source_node, source_file) + if target_kind != "type" or not simple_name: + continue + hit = self.type_def_index.get((namespace or "", simple_name)) + if hit: + hits.add(hit) + return next(iter(hits)) if len(hits) == 1 else None + + def resolve_label(self, label, source_node, source_file): + if self.is_alias_in_scope(label, source_node, source_file): + resolved = self.resolve_alias(label, source_node, source_file) + return resolved + candidates = self._type_candidates(label, source_node, source_file) + return candidates[0] if len(candidates) == 1 else None + + def _alias_namespace_for_leading(self, qualifier, source_node, source_file) -> tuple[bool, str | None]: + entries = [ + entry for entry in self._alias_entries(qualifier, source_file) + if self._entry_in_scope(entry, source_node) + ] + entries.extend( + entry for entry in self._extern_alias_entries(qualifier, source_file) + if self._entry_in_scope(entry, source_node) + ) + if not entries: + return False, None + hits = set() + for entry in entries: + if entry.is_extern: + return True, None + target_kind, namespace, simple_name = self._classify_alias_entry(entry, source_node, source_file) + if target_kind == "namespace" and namespace: + hits.add(namespace) + else: + return True, None + return True, next(iter(hits)) if len(hits) == 1 else None + + def resolve_qualified(self, label, qualifier, source_node, source_file): + if not isinstance(qualifier, str) or not qualifier: + return None + first, sep, rest = qualifier.partition(".") + has_alias, alias_ns = self._alias_namespace_for_leading(first, source_node, source_file) + if has_alias: + if not alias_ns: + return None + qualifier = f"{alias_ns}.{rest}" if rest else alias_ns + if qualifier in self.known_namespaces: + return self.type_def_index.get((qualifier, label)) + return None + + +def build_csharp_name_resolver(all_nodes, all_edges): + return CsharpNameResolver(all_nodes, all_edges) + + def _resolve_cross_file_csharp_imports( per_file: list[dict], paths: list[Path], @@ -161,140 +511,13 @@ def _resolve_csharp_type_references( pass is the single soundness gate: it uses only graph-stamped namespace/import facts, keeps a binding only when the referenced simple name resolves to one in-scope real type definition, and otherwise leaves the edge on a dangling stub. + Name resolution is delegated to the shared CsharpNameResolver (#1562 lifted). """ _ = (per_file, paths) - def _is_cs_file(value: object) -> bool: - return isinstance(value, str) and value.endswith(".cs") - - def _metadata(value: object) -> dict: - return value if isinstance(value, dict) else {} - - def _namespace(node: dict | None) -> str: - metadata = _metadata((node or {}).get("metadata")) - namespace = metadata.get("namespace", "") - return namespace if isinstance(namespace, str) else "" - - def _append_unique(items: list[str], value: str) -> None: - if value not in items: - items.append(value) - - node_by_id = { - node["id"]: node - for node in all_nodes - if isinstance(node.get("id"), str) and node.get("id") - } - type_def_index = _build_csharp_type_def_index(all_nodes) - known_namespaces = { - node.get("label") - for node in all_nodes - if node.get("type") == "namespace" and isinstance(node.get("label"), str) - } - - # Each using carries its lexical scope: ("file", None) applies file-wide; - # ("namespace", scope_id) applies only where scope_id is in the ref's scope_chain. - namespace_usings_by_file: dict[str, list[tuple[str, str, str | None]]] = {} - aliases_by_file: dict[str, dict[str, list[tuple[str, str, str | None]]]] = {} - - for edge in all_edges: - if edge.get("relation") != "imports": - continue - source_node = node_by_id.get(edge.get("source")) - if not ( - source_node - and isinstance(source_node.get("label"), str) - and source_node.get("label", "").endswith(".cs") - ): - continue - source_file = source_node.get("source_file") - if not _is_cs_file(source_file): - continue - metadata = _metadata(edge.get("metadata")) - target_fqn = metadata.get("target_fqn") - if not isinstance(target_fqn, str) or not target_fqn: - continue - scope_kind = metadata.get("scope_kind") or "file" - scope_id = metadata.get("scope_id") - using_kind = metadata.get("using_kind") - if using_kind == "namespace": - entry = (target_fqn, scope_kind, scope_id) - bucket = namespace_usings_by_file.setdefault(source_file, []) - if entry not in bucket: - bucket.append(entry) - elif using_kind == "alias": - alias = metadata.get("alias") - if isinstance(alias, str) and alias: - entry = (target_fqn, scope_kind, scope_id) - bucket = aliases_by_file.setdefault(source_file, {}).setdefault(alias, []) - if entry not in bucket: - bucket.append(entry) - - def _scope_chain(node: dict) -> list[str]: - chain = _metadata(node.get("metadata")).get("scope_chain") - return chain if isinstance(chain, list) else [] - - def _using_in_scope(scope_kind: str, scope_id: str | None, source_node: dict) -> bool: - if scope_kind == "file": - return True - return scope_id is not None and scope_id in _scope_chain(source_node) - - def _scopes_for(source_node: dict, source_file: str) -> list[str]: - scopes: list[str] = [] - _append_unique(scopes, _namespace(source_node)) - _append_unique(scopes, "") - for namespace, scope_kind, scope_id in namespace_usings_by_file.get(source_file, []): - if _using_in_scope(scope_kind, scope_id, source_node): - _append_unique(scopes, namespace) - return scopes - - def _resolve_alias(label: str, source_node: dict, source_file: str) -> str | None: - hits = set() - for target_fqn, scope_kind, scope_id in aliases_by_file.get(source_file, {}).get(label, []): - if not _using_in_scope(scope_kind, scope_id, source_node): - continue - base_fqn = _strip_trailing_csharp_generic_args(html.unescape(target_fqn)) - namespace, sep, simple_name = base_fqn.rpartition(".") - if not sep: - simple_name = namespace - namespace = "" - if not simple_name: - continue - hit = type_def_index.get((namespace, simple_name)) - if hit: - hits.add(hit) - return next(iter(hits)) if len(hits) == 1 else None - - def _resolve_label(label: str, source_node: dict, source_file: str) -> str | None: - if label in aliases_by_file.get(source_file, {}): - return _resolve_alias(label, source_node, source_file) - candidates: list[str] = [] - for namespace in _scopes_for(source_node, source_file): - hit = type_def_index.get((namespace, label)) - if hit and hit not in candidates: - candidates.append(hit) - return candidates[0] if len(candidates) == 1 else None - - def _resolve_qualified(label: str, qualifier: object, source_node: dict, source_file: str) -> str | None: - # Sound qualified resolution: an in-scope alias for Q shadows the namespace Q. For a qualified - # ref Q.label, look up (alias_target_namespace, label). If no in-scope alias, fall through to an - # exact known namespace. Dangle on ambiguity / no hit / unknown qualifier. - if not isinstance(qualifier, str) or not qualifier: - return None - in_scope = [ - entry for entry in aliases_by_file.get(source_file, {}).get(qualifier, []) - if _using_in_scope(entry[1], entry[2], source_node) - ] - if in_scope: - hits = set() - for target_fqn, _scope_kind, _scope_id in in_scope: - alias_ns = _strip_trailing_csharp_generic_args(html.unescape(target_fqn)) - hit = type_def_index.get((alias_ns, label)) - if hit: - hits.add(hit) - return next(iter(hits)) if len(hits) == 1 else None - if qualifier in known_namespaces: - return type_def_index.get((qualifier, label)) - return None + resolver = build_csharp_name_resolver(all_nodes, all_edges) + node_by_id = resolver.node_by_id + aliases_by_file = resolver.aliases_by_file def _is_placeholder(node: dict | None) -> bool: return bool(node) and not node.get("source_file") @@ -311,7 +534,6 @@ def _label_for_type_ref_target(target_node: dict, source_file: str) -> str | Non return None if not label.endswith(".cs"): return label - stem = label[:-3] for alias in aliases_by_file.get(source_file, {}): if alias.lower() == stem.lower() or _make_id(alias) == _make_id(stem): @@ -322,7 +544,6 @@ def _dangling_stub_id(label: str, current_target: object) -> str: current = node_by_id.get(current_target) if _is_placeholder(current) and current.get("label") == label: return str(current_target) - for node in all_nodes: nid = node.get("id") if ( @@ -370,9 +591,11 @@ def _dangling_stub_id(label: str, current_target: object) -> str: if not label: continue if metadata.get("qualified"): - resolved = _resolve_qualified(label, metadata.get("ref_qualifier"), source_node, source_file) + resolved = resolver.resolve_qualified( + label, metadata.get("ref_qualifier"), source_node, source_file + ) else: - resolved = _resolve_label(label, source_node, source_file) + resolved = resolver.resolve_label(label, source_node, source_file) target = edge.get("target") desired = resolved or _dangling_stub_id(label, target) if desired != target: @@ -391,3 +614,46 @@ def _dangling_stub_id(label: str, current_target: object) -> str: node for node in all_nodes if node.get("id") not in repointed_from or node.get("id") in still_referenced ] + + +def _canonicalize_csharp_namespace_nodes(all_nodes: list[dict], all_edges: list[dict]) -> None: + """Collapse duplicate C# namespace node entries to one canonical node per label.""" + by_label: dict[str, list[dict]] = {} + for node in all_nodes: + if node.get("type") != "namespace": + continue + label = node.get("label") + if isinstance(label, str): + by_label.setdefault(label, []).append(node) + + remap: dict[str, str] = {} + drop_node_ids: set[int] = set() + for group in by_label.values(): + if len(group) < 2: + continue + canonical = sorted( + group, + key=lambda node: ( + str(node.get("source_file") or ""), + str(node.get("source_location") or ""), + str(node.get("id") or ""), + ), + )[0] + canonical_id = canonical.get("id") + for node in group: + if node is canonical: + continue + drop_node_ids.add(id(node)) + dup_id = node.get("id") + if isinstance(dup_id, str) and isinstance(canonical_id, str): + remap[dup_id] = canonical_id + + if remap: + for edge in all_edges: + if edge.get("source") in remap: + edge["source"] = remap[str(edge["source"])] + if edge.get("target") in remap: + edge["target"] = remap[str(edge["target"])] + + if drop_node_ids: + all_nodes[:] = [node for node in all_nodes if id(node) not in drop_node_ids] diff --git a/graphify/extractors/csharp_extract.py b/graphify/extractors/csharp_extract.py new file mode 100644 index 000000000..0b47f58c9 --- /dev/null +++ b/graphify/extractors/csharp_extract.py @@ -0,0 +1,1187 @@ +"""C# per-file extractor helpers, split out of graphify/extract.py.""" +from __future__ import annotations + +import hashlib + +from graphify.extractors.base import _make_id, _read_text, LanguageConfig +from graphify.extractors.csharp import _csharp_base_identifier +from graphify.security import sanitize_metadata + + +def _csharp_namespace_id(dotted_name: str) -> str: + digest = hashlib.sha1(dotted_name.encode("utf-8")).hexdigest()[:16] + return f"csharp_namespace:{digest}" + + +_CSHARP_SCOPE_NODES = frozenset({ + "block", + "checked_statement", + "compilation_unit", + "constructor_declaration", + "class_declaration", + "conversion_operator_declaration", + "delegate_declaration", + "destructor_declaration", + "do_statement", + "enum_declaration", + "event_declaration", + "file_scoped_namespace_declaration", + "fixed_statement", + "finally_clause", + "for_statement", + "foreach_statement", + "if_statement", + "indexer_declaration", + "interface_declaration", + "lambda_expression", + "lock_statement", + "local_function_statement", + "method_declaration", + "namespace_declaration", + "operator_declaration", + "property_declaration", + "query_expression", + "record_declaration", + "struct_declaration", + "switch_expression", + "switch_expression_arm", + "switch_section", + "switch_statement", + "try_statement", + "unsafe_statement", + "using_statement", + "while_statement", + "anonymous_method_expression", + "accessor_declaration", + "catch_clause", +}) + + +_CSHARP_TYPE_DECLARATION_NODES = frozenset({ + "class_declaration", + "enum_declaration", + "interface_declaration", + "record_declaration", + "struct_declaration", +}) + + +_CSHARP_PARAMETER_LIST_NODES = frozenset({ + "bracketed_parameter_list", + "parameter_list", +}) + + +_CSHARP_CALLABLE_PARAMETER_OWNER_NODES = frozenset({ + "accessor_declaration", + "anonymous_method_expression", + "constructor_declaration", + "conversion_operator_declaration", + "delegate_declaration", + "indexer_declaration", + "lambda_expression", + "local_function_statement", + "method_declaration", + "operator_declaration", +}) + + +def _csharp_scope_chain(node) -> list[str]: + """Innermost-first lexical scope ids (f"s{start_byte}") from `node` up.""" + chain, cur = [], node + while cur is not None: + if cur.type in _CSHARP_SCOPE_NODES: + chain.append(f"s{cur.start_byte}") + cur = cur.parent + return chain + + +def _csharp_scope_id(node) -> str: + ch = _csharp_scope_chain(node) + return ch[0] if ch else "s0" + + +_CSHARP_BINDING_PATTERN_NODES = frozenset({ + "declaration_expression", + "declaration_pattern", + "list_pattern", + "parenthesized_pattern", + "parenthesized_variable_designation", + "positional_pattern_clause", + "property_pattern_clause", + "recursive_pattern", + "subpattern", + "tuple_pattern", + "var_pattern", +}) + + +def _csharp_designator_names(node, source: bytes) -> list[str]: + """Collect local value names from C# binding designators/patterns. + + The installed grammar has `parenthesized_variable_designation` but no + `single_variable_designation` node; concrete single-name designators are + `identifier` leaves in binding positions. This collector is deliberately + pattern-recursive, but it only treats direct designator/name-field + identifiers as bindings, so recursive-pattern property names (`P:`) and + type identifiers (`Actual`) are not collected. + """ + if node is None: + return [] + + out: list[str] = [] + + def _add(name: str) -> None: + if name and name != "_" and name not in out: + out.append(name) + + def _collect(cur, direct_identifier: bool = False) -> None: + if cur is None: + return + if cur.type == "identifier": + if direct_identifier: + _add(_read_text(cur, source)) + return + if cur.type == "implicit_parameter": + _add(_read_text(cur, source)) + return + + name_node = cur.child_by_field_name("name") + if name_node is not None: + _collect(name_node, direct_identifier=True) + + if cur.type in ("tuple_pattern", "parenthesized_variable_designation"): + for i, child in enumerate(cur.children): + if child.type == "identifier" and ( + cur.type == "parenthesized_variable_designation" + or cur.field_name_for_child(i) == "name" + ): + _collect(child, direct_identifier=True) + elif child.is_named: + _collect(child) + return + + if cur.type in _CSHARP_BINDING_PATTERN_NODES: + for i, child in enumerate(cur.children): + if cur.field_name_for_child(i) in ("type", "expression", "qualifier"): + continue + if cur.type == "var_pattern" and child.type == "identifier": + _collect(child, direct_identifier=True) + elif child.type in _CSHARP_BINDING_PATTERN_NODES: + _collect(child) + + _collect(node, direct_identifier=node.type in ("identifier", "implicit_parameter")) + return out + + +def _bare_type_node(type_node, source: bytes) -> str | None: + if type_node is None or type_node.type != "identifier": + return None + text = _read_text(type_node, source).strip() + return text if text and text != "var" else None + + +def _csharp_declared_bare_type(type_node, decl_node, source: bytes) -> str | None: + bare = _bare_type_node(type_node, source) + if bare is None: + return None + return None if bare in _csharp_type_parameters_in_scope(decl_node, source) else bare + + +def _csharp_parameter_scope_owner(node): + cur = node.parent + while cur is not None and cur.type in _CSHARP_PARAMETER_LIST_NODES: + cur = cur.parent + return cur + + +def _csharp_parameter_is_callable_scoped(node) -> bool: + owner = _csharp_parameter_scope_owner(node) + if owner is None or owner.type in _CSHARP_TYPE_DECLARATION_NODES: + return False + return owner.type in _CSHARP_CALLABLE_PARAMETER_OWNER_NODES + + +def _build_csharp_type_table(root, source: bytes) -> dict[str, list[tuple[str, str | None, int]]]: + """Per-lexical-scope C# value binders: + Entries have the shape `(var_name, bare_unqualified_type_or_None, decl_start_byte)`. + + The table records every non-member value binder collected by + `_build_csharp_shadow_names`, minus type-scoped binders. Bare declared types + are accepted only when they are identifiers and not visible type parameters. + Unknown/unaccepted types are recorded as None so the resolver can poison + shadowed receiver names instead of falling through to fields/properties. + """ + table: dict[str, list[tuple[str, str | None, int]]] = {} + + def _put(scope_id: str, name: str, type_name: str | None, decl_start_byte: int) -> None: + if name and name != "_": + table.setdefault(scope_id, []).append((name, type_name, decl_start_byte)) + + def _first_tuple_pattern(node): + return next((c for c in node.children if c.type == "tuple_pattern"), None) + + def _put_explicit_designators(scope_node, designator_node, type_name: str | None, decl_start_byte: int) -> None: + for name in _csharp_designator_names(designator_node, source): + _put(_csharp_scope_id(scope_node), name, type_name, decl_start_byte) + + def _put_query_name(scope_node, name_node) -> None: + if name_node is not None and name_node.type == "identifier": + _put(_csharp_scope_id(scope_node), _read_text(name_node, source), None, name_node.start_byte) + + def _walk(node) -> None: + if node.type == "parameter": + if _csharp_parameter_is_callable_scoped(node): + type_name = _csharp_declared_bare_type(node.child_by_field_name("type"), node, source) + _put_explicit_designators(node, node, type_name, node.start_byte) + elif node.type == "implicit_parameter": + if _csharp_parameter_is_callable_scoped(node): + _put_explicit_designators(node, node, None, node.start_byte) + elif node.type == "variable_declaration": + is_type_member_decl = node.parent is not None and node.parent.type in ( + "field_declaration", + "event_field_declaration", + ) + if not is_type_member_decl: + type_node = node.child_by_field_name("type") + is_var = type_node is not None and type_node.type == "implicit_type" + declared = _csharp_declared_bare_type(type_node, node, source) + for child in node.children: + if child.type != "variable_declarator": + continue + name_node = child.child_by_field_name("name") or _first_tuple_pattern(child) + if name_node is None: + continue + if name_node.type == "tuple_pattern": + for var_name in _csharp_designator_names(name_node, source): + _put(_csharp_scope_id(child), var_name, None, child.start_byte) + continue + names = _csharp_designator_names(name_node, source) + if not names: + continue + type_name = declared + if declared is None and is_var: + # var f = new Bar(args): in tree_sitter_c_sharp, the + # variable_declarator has only a `name` field; the RHS + # object_creation_expression is a named child after `=`. + creation = next( + (c for c in child.named_children if c.type == "object_creation_expression"), + None, + ) + if creation is not None: + ctype = creation.child_by_field_name("type") + type_name = _csharp_declared_bare_type(ctype, child, source) + else: + type_name = None + for var_name in names: + _put(_csharp_scope_id(child), var_name, type_name, child.start_byte) + elif node.type == "foreach_statement": + type_name = _csharp_declared_bare_type(node.child_by_field_name("type"), node, source) + _put_explicit_designators(node, node.child_by_field_name("left"), type_name, node.start_byte) + elif node.type in ("catch_declaration", "declaration_pattern", "declaration_expression"): + type_name = _csharp_declared_bare_type(node.child_by_field_name("type"), node, source) + _put_explicit_designators(node, node, type_name, node.start_byte) + elif node.type == "var_pattern": + for name in _csharp_designator_names(node, source): + _put(_csharp_scope_id(node), name, None, node.start_byte) + elif node.type == "from_clause": + _put_query_name(node, node.child_by_field_name("name")) + elif node.type in ("let_clause", "join_clause", "join_into_clause"): + _put_query_name(node, _csharp_first_identifier_child(node)) + elif node.type == "query_continuation": + _put_query_name(node, _csharp_first_identifier_child(node)) + elif node.type == "query_expression": + for index, child in enumerate(node.children): + if child.type != "into": + continue + for next_child in node.children[index + 1:]: + if next_child.type == "identifier": + _put_query_name(node, next_child) + break + if next_child.is_named: + break + + for child in node.children: + _walk(child) + + _walk(root) + return table + + +def _csharp_direct_invocation_initializer(declarator): + seen_equals = False + for child in declarator.children: + if child.type == "=": + seen_equals = True + continue + if seen_equals and child.is_named: + return child if child.type == "invocation_expression" else None + return None + + +def _build_csharp_var_call_inits(root, source: bytes) -> dict[str, list[dict[str, object]]]: + """Structural facts for `var x = ();` locals. + + Facts are keyed by the same lexical binding identity as `csharp_type_table`: + `(scope_id, name, decl_start_byte)`. Poison markers are scope/name pairs; + the resolver skips every init for a poisoned pair. + """ + inits: list[dict[str, object]] = [] + decl_counts: dict[tuple[str, str], int] = {} + assignments: list[dict[str, object]] = [] + + def _record_decl(scope_id: str, name: str) -> None: + if name and name != "_": + key = (scope_id, name) + decl_counts[key] = decl_counts.get(key, 0) + 1 + + def _walk(node) -> None: + if node.type == "variable_declaration": + is_type_member_decl = node.parent is not None and node.parent.type in ( + "field_declaration", + "event_field_declaration", + ) + if not is_type_member_decl: + type_node = node.child_by_field_name("type") + is_var = type_node is not None and type_node.type == "implicit_type" + for child in node.children: + if child.type != "variable_declarator": + continue + scope_id = _csharp_scope_id(child) + name_node = child.child_by_field_name("name") or _csharp_first_child(child, "tuple_pattern") + names = _csharp_designator_names(name_node, source) + for name in names: + _record_decl(scope_id, name) + if not (is_var and len(names) == 1): + continue + init_call = _csharp_direct_invocation_initializer(child) + if init_call is None: + continue + inits.append({ + "scope_id": scope_id, + "name": names[0], + "decl_start_byte": child.start_byte, + "call_byte": init_call.start_byte, + }) + elif node.type == "assignment_expression": + left = node.child_by_field_name("left") + if left is None: + left = next((child for child in node.children if child.is_named), None) + if left is not None and left.type == "identifier": + assignments.append({ + "name": _read_text(left, source), + "scope_chain": _csharp_scope_chain(node), + "assignment_byte": node.start_byte, + }) + + for child in node.children: + _walk(child) + + _walk(root) + + poisoned: dict[tuple[str, str], set[str]] = {} + for key, count in decl_counts.items(): + if count > 1: + poisoned.setdefault(key, set()).add("redeclaration") + + for assignment in assignments: + name = assignment.get("name") + scope_chain = assignment.get("scope_chain") + assignment_byte = assignment.get("assignment_byte") + if not (isinstance(name, str) and isinstance(scope_chain, list) and isinstance(assignment_byte, int)): + continue + for fact in inits: + scope_id = fact.get("scope_id") + decl_start_byte = fact.get("decl_start_byte") + if ( + fact.get("name") == name + and isinstance(scope_id, str) + and scope_id in scope_chain + and isinstance(decl_start_byte, int) + and decl_start_byte < assignment_byte + ): + poisoned.setdefault((scope_id, name), set()).add("assignment") + + poisoned_facts = [ + {"scope_id": scope_id, "name": name, "reason": reason} + for (scope_id, name), reasons in sorted(poisoned.items()) + for reason in sorted(reasons) + ] + return {"inits": inits, "poisoned": poisoned_facts} + + +def _csharp_unique_sorted(values: list[str] | set[str]) -> list[str]: + return sorted({v for v in values if isinstance(v, str) and v and v != "_"}) + + +def _csharp_shadow_bucket() -> dict[str, list[str]]: + return { + "values": [], + "namespaces": [], + "methods": [], + "typeparams": [], + "nested_types": [], + } + + +def _csharp_add_shadow( + scopes: dict[str, dict[str, list[str]]], + scope_id: str, + bucket: str, + name: str | None, +) -> None: + if not name or name == "_": + return + entry = scopes.setdefault(scope_id, _csharp_shadow_bucket()) + if name not in entry[bucket]: + entry[bucket].append(name) + + +def _csharp_first_child(node, node_type: str): + return next((child for child in node.children if child.type == node_type), None) + + +def _csharp_enclosing_scope_id(node) -> str: + cur = node.parent + while cur is not None: + if cur.type in _CSHARP_SCOPE_NODES and cur.type != node.type: + return f"s{cur.start_byte}" + cur = cur.parent + return "s0" + + +def _csharp_first_identifier_child(node): + return next((child for child in node.children if child.type == "identifier"), None) + + +def _csharp_names_from_variable_declaration(node, source: bytes) -> list[str]: + names: list[str] = [] + for child in node.children: + if child.type != "variable_declarator": + continue + name_node = child.child_by_field_name("name") or _csharp_first_child(child, "tuple_pattern") + names.extend(_csharp_designator_names(name_node, source)) + return _csharp_unique_sorted(names) + + +def _csharp_direct_member_names(type_node, source: bytes) -> dict[str, list[str]]: + """Direct C# member names needed for inherited shadow checks. + + Values are fields, properties, events, enum members, and record positional + parameters. Methods are regular method declarations. Nested types are direct + type declarations inside this type. The resolver later walks resolved + internal base chains and treats an unresolved base as "cannot prove absence". + """ + members: dict[str, set[str]] = { + "values": set(), + "methods": set(), + "nested_types": set(), + } + if type_node.type == "record_declaration": + for child in type_node.children: + if child.type != "parameter_list": + continue + for param in child.children: + if param.type != "parameter": + continue + name_node = param.child_by_field_name("name") + if name_node is not None: + members["values"].add(_read_text(name_node, source)) + + body = type_node.child_by_field_name("body") + if body is not None: + for child in body.children: + if child.type in ("field_declaration", "event_field_declaration"): + decl = child.child_by_field_name("declaration") or _csharp_first_child(child, "variable_declaration") + if decl is not None: + members["values"].update(_csharp_names_from_variable_declaration(decl, source)) + elif child.type in ("property_declaration", "event_declaration"): + name_node = child.child_by_field_name("name") + if name_node is not None: + members["values"].add(_read_text(name_node, source)) + elif child.type == "enum_member_declaration": + name_node = child.child_by_field_name("name") + if name_node is not None: + members["values"].add(_read_text(name_node, source)) + elif child.type == "method_declaration": + name_node = child.child_by_field_name("name") + if name_node is not None: + members["methods"].add(_csharp_base_identifier(_read_text(name_node, source))) + elif child.type in _CSHARP_CONFIG.class_types: + name_node = child.child_by_field_name("name") + if name_node is not None: + members["nested_types"].add(_read_text(name_node, source)) + return {key: _csharp_unique_sorted(values) for key, values in members.items()} + + +def _csharp_direct_member_types(type_node, source: bytes) -> dict[str, str | None]: + """Direct C# member receiver types for fields, properties, and record positional properties.""" + members: dict[str, str | None] = {} + + def _put(name_node, type_name: str | None) -> None: + if name_node is None: + return + name = _read_text(name_node, source) + if name and name != "_": + members[name] = type_name + + if type_node.type == "record_declaration": + for child in type_node.children: + if child.type != "parameter_list": + continue + for param in child.children: + if param.type != "parameter": + continue + _put( + param.child_by_field_name("name"), + _csharp_declared_bare_type(param.child_by_field_name("type"), param, source), + ) + + body = type_node.child_by_field_name("body") + if body is not None: + for child in body.children: + if child.type == "field_declaration": + decl = child.child_by_field_name("declaration") or _csharp_first_child(child, "variable_declaration") + if decl is None: + continue + type_name = _csharp_declared_bare_type(decl.child_by_field_name("type"), decl, source) + for decl_child in decl.children: + if decl_child.type != "variable_declarator": + continue + name_node = decl_child.child_by_field_name("name") or _csharp_first_child(decl_child, "tuple_pattern") + for name in _csharp_designator_names(name_node, source): + if name and name != "_": + members[name] = type_name + elif child.type == "property_declaration": + _put( + child.child_by_field_name("name"), + _csharp_declared_bare_type(child.child_by_field_name("type"), child, source), + ) + + return dict(sorted(members.items())) + + +def _build_csharp_shadow_names(root, source: bytes) -> dict[str, dict[str, list[str]]]: + """Collect C# simple-name lookup shadow facts by lexical scope id. + + The result is keyed by `_csharp_scope_id`, with buckets: + values, namespaces, methods, typeparams, nested_types. This mirrors the + binder coverage used by `_build_csharp_type_table` and adds type members + that are not local variable declarations. + """ + scopes: dict[str, dict[str, list[str]]] = {} + + def _add_designators(scope_node, designator_node, bucket: str = "values") -> None: + for name in _csharp_designator_names(designator_node, source): + _csharp_add_shadow(scopes, _csharp_scope_id(scope_node), bucket, name) + + def _add_type_parameters(scope_node) -> None: + for child in scope_node.children: + if child.type != "type_parameter_list": + continue + for param in child.children: + if param.type == "type_parameter": + name_node = param.child_by_field_name("name") or _csharp_first_child(param, "identifier") + if name_node is not None: + _csharp_add_shadow(scopes, _csharp_scope_id(scope_node), "typeparams", _read_text(name_node, source)) + elif param.type == "identifier": + _csharp_add_shadow(scopes, _csharp_scope_id(scope_node), "typeparams", _read_text(param, source)) + + def _add_query_name(scope_node, name_node) -> None: + if name_node is not None and name_node.type == "identifier": + _csharp_add_shadow(scopes, _csharp_scope_id(scope_node), "values", _read_text(name_node, source)) + + def _walk(node) -> None: + if node.type in ("namespace_declaration", "file_scoped_namespace_declaration"): + ns_name = _csharp_namespace_name(node, source) + if ns_name: + first = ns_name.split(".", 1)[0] + _csharp_add_shadow(scopes, _csharp_scope_id(node), "namespaces", first) + + if node.type in _CSHARP_TYPE_PARAMETER_SCOPE_DECLARATIONS: + _add_type_parameters(node) + + if node.type == "parameter": + _add_designators(node, node) + elif node.type == "implicit_parameter": + _add_designators(node, node) + elif node.type == "variable_declaration": + if node.parent is not None and node.parent.type in ("field_declaration", "event_field_declaration"): + bucket_scope = node.parent.parent if node.parent.parent is not None else node.parent + for name in _csharp_names_from_variable_declaration(node, source): + _csharp_add_shadow(scopes, _csharp_scope_id(bucket_scope), "values", name) + else: + for child in node.children: + if child.type == "variable_declarator": + name_node = child.child_by_field_name("name") or _csharp_first_child(child, "tuple_pattern") + _add_designators(child, name_node) + elif node.type == "foreach_statement": + _add_designators(node, node.child_by_field_name("left")) + elif node.type in ("catch_declaration", "declaration_pattern", "declaration_expression", "var_pattern"): + _add_designators(node, node) + elif node.type == "local_function_statement": + _add_type_parameters(node) + name_node = node.child_by_field_name("name") + if name_node is not None: + _csharp_add_shadow(scopes, _csharp_enclosing_scope_id(node), "methods", _read_text(name_node, source)) + elif node.type == "from_clause": + _add_query_name(node, node.child_by_field_name("name")) + elif node.type in ("let_clause", "join_clause", "join_into_clause"): + _add_query_name(node, _csharp_first_identifier_child(node)) + elif node.type == "query_continuation": + _add_query_name(node, _csharp_first_identifier_child(node)) + elif node.type == "query_expression": + for index, child in enumerate(node.children): + if child.type != "into": + continue + for next_child in node.children[index + 1:]: + if next_child.type == "identifier": + _add_query_name(node, next_child) + break + if next_child.is_named: + break + elif node.type == "property_declaration": + name_node = node.child_by_field_name("name") + if name_node is not None and node.parent is not None: + _csharp_add_shadow(scopes, _csharp_scope_id(node.parent), "values", _read_text(name_node, source)) + elif node.type == "event_declaration": + name_node = node.child_by_field_name("name") + if name_node is not None and node.parent is not None: + _csharp_add_shadow(scopes, _csharp_scope_id(node.parent), "values", _read_text(name_node, source)) + elif node.type == "enum_member_declaration": + name_node = node.child_by_field_name("name") + if name_node is not None: + _csharp_add_shadow(scopes, _csharp_scope_id(node), "values", _read_text(name_node, source)) + elif node.type in _CSHARP_CONFIG.class_types and node.parent is not None and node.parent.type == "declaration_list": + name_node = node.child_by_field_name("name") + parent = node.parent.parent + if parent is not None and parent.type in _CSHARP_CONFIG.class_types and name_node is not None: + _csharp_add_shadow(scopes, _csharp_scope_id(parent), "nested_types", _read_text(name_node, source)) + + for child in node.children: + _walk(child) + + _walk(root) + return { + scope_id: {bucket: _csharp_unique_sorted(names) for bucket, names in buckets.items()} + for scope_id, buckets in scopes.items() + } + + +def _csharp_pre_scan_interfaces(root_node, source: bytes) -> set[str]: + """Return names declared as `interface` in this C# compilation unit.""" + out: set[str] = set() + stack = [root_node] + while stack: + n = stack.pop() + if n.type == "interface_declaration": + name_node = n.child_by_field_name("name") + if name_node is not None: + text = _read_text(name_node, source) + if text: + out.add(text) + stack.extend(n.children) + return out + + +def _csharp_classify_base(name: str, interface_names: set[str]) -> str: + """`implements` if the base name is an interface (declared or by I-prefix convention), else `inherits`.""" + if name in interface_names: + return "implements" + if len(name) >= 2 and name[0] == "I" and name[1].isupper(): + return "implements" + return "inherits" + + +_CSHARP_TYPE_PARAMETER_SCOPE_DECLARATIONS = _CSHARP_TYPE_DECLARATION_NODES | frozenset({ + "delegate_declaration", + "local_function_statement", + "method_declaration", +}) + + +def _csharp_type_parameters_in_scope(node, source: bytes) -> frozenset[str]: + """Return C# type-parameter names visible from ``node``.""" + names: set[str] = set() + scope = node + while scope is not None: + if scope.type in _CSHARP_TYPE_PARAMETER_SCOPE_DECLARATIONS: + for child in scope.children: + if child.type != "type_parameter_list": + continue + for param in child.children: + if param.type == "type_parameter": + name_node = next( + (sub for sub in param.children if sub.type == "identifier"), + None, + ) + if name_node is not None: + name = _read_text(name_node, source) + if name: + names.add(name) + elif param.type == "identifier": + name = _read_text(param, source) + if name: + names.add(name) + scope = scope.parent + return frozenset(names) + + +def _csharp_collect_type_refs( + node, + source: bytes, + generic: bool, + out: list[tuple[str, str, bool, str]], + skip: frozenset[str] | None = None, +) -> None: + """Walk a C# type expression; append (name, role, qualified, qualifier) tuples.""" + if node is None: + return + if skip is None: + skip = _csharp_type_parameters_in_scope(node, source) + t = node.type + if t == "predefined_type": + return + if t == "identifier": + name = _read_text(node, source) + if name and name not in skip: + out.append((name, "generic_arg" if generic else "type", False, "")) + return + if t == "qualified_name": + prefix, _, text = _read_text(node, source).rpartition(".") + text = text.split("<", 1)[0] + if text and text not in skip: + out.append((text, "generic_arg" if generic else "type", True, prefix)) + return + if t == "generic_name": + name_child = node.child_by_field_name("name") + if name_child is None: + for sub in node.children: + if sub.type == "identifier": + name_child = sub + break + if name_child is not None: + qualified = name_child.type == "qualified_name" + prefix, _, name = _read_text(name_child, source).rpartition(".") + if name and name not in skip: + out.append((name, "generic_arg" if generic else "type", qualified, prefix if qualified else "")) + for sub in node.children: + if sub.type == "type_argument_list": + for arg in sub.children: + if arg.is_named: + _csharp_collect_type_refs(arg, source, True, out, skip) + return + if t in ("nullable_type", "array_type", "pointer_type", "ref_type"): + for c in node.children: + if c.is_named: + _csharp_collect_type_refs(c, source, generic, out, skip) + return + if node.is_named: + for c in node.children: + if c.is_named: + _csharp_collect_type_refs(c, source, generic, out, skip) + + +def _csharp_attribute_names(method_node, source: bytes) -> list[tuple[str, bool, str]]: + """Collect attribute names from a C# method/declaration's attribute_list children.""" + names: list[tuple[str, bool, str]] = [] + skip = _csharp_type_parameters_in_scope(method_node, source) + for child in method_node.children: + if child.type != "attribute_list": + continue + for attr in child.children: + if attr.type != "attribute": + continue + name_node = attr.child_by_field_name("name") + if name_node is None: + for sub in attr.children: + if sub.type in ("identifier", "qualified_name"): + name_node = sub + break + if name_node is not None: + qualified = name_node.type == "qualified_name" + prefix, _, text = _read_text(name_node, source).rpartition(".") + if text and text not in skip: + names.append((text, qualified, prefix if qualified else "")) + return names + + +def _csharp_import_target_kind(using_kind: str, target_fqn: str) -> str: + if using_kind == "namespace": + return "namespace" + if using_kind == "static": + return "type" + if "<" in target_fqn or target_fqn.endswith("]"): + return "type" + tail = target_fqn.rsplit(".", 1)[-1].strip() + return "type" if tail[:1].isupper() else "namespace" + + +def _import_csharp(node, source: bytes, file_nid: str, stem: str, edges: list, str_path: str, scope_stack: list[str] | None = None) -> None: + text = _read_text(node, source).strip().rstrip(";") + if node.type == "extern_alias_directive": + alias_node = node.child_by_field_name("name") + alias = _read_text(alias_node, source).strip() if alias_node is not None else "" + if not alias: + return + edges.append({ + "source": file_nid, + "target": _make_id(alias), + "relation": "imports", + "context": "import", + "confidence": "EXTRACTED", + "source_file": str_path, + "source_location": f"L{node.start_point[0] + 1}", + "weight": 1.0, + "metadata": sanitize_metadata({ + "using_kind": "extern_alias", + "alias": alias, + "target_fqn": alias, + "target_kind": "namespace", + "scope_kind": "global", + }), + }) + return + + is_global = text.startswith("global ") + if is_global: + text = text[len("global "):].strip() + if not text.startswith("using"): + return + body = text[len("using"):].strip() + using_kind, alias, target_fqn = "namespace", None, body + if body.startswith("static "): + using_kind, target_fqn = "static", body[len("static "):].strip() + elif "=" in body: + lhs, rhs = body.split("=", 1) + using_kind, alias, target_fqn = "alias", lhs.strip(), rhs.strip() + if not target_fqn: + return + scope_kind = "global" if is_global else ("namespace" if scope_stack else "file") + metadata = { + "using_kind": using_kind, + "target_fqn": target_fqn, + "target_kind": _csharp_import_target_kind(using_kind, target_fqn), + "scope_kind": scope_kind, + "is_global": is_global, + } + if alias: + metadata["alias"] = alias + if scope_stack and not is_global: + metadata["scope_id"] = scope_stack[-1] + edges.append({ + "source": file_nid, + "target": _make_id(target_fqn), + "relation": "imports", + "context": "import", + "confidence": "EXTRACTED", + "source_file": str_path, + "source_location": f"L{node.start_point[0] + 1}", + "weight": 1.0, + "metadata": sanitize_metadata(metadata), + }) + + +def _csharp_namespace_name(node, source: bytes) -> str: + name_node = node.child_by_field_name("name") + if name_node is not None: + return _read_text(name_node, source).strip() + for child in node.children: + if child.type in ("identifier", "qualified_name"): + return _read_text(child, source).strip() + return "" + + +def _csharp_extra_walk(node, source: bytes, file_nid: str, stem: str, str_path: str, + nodes: list, edges: list, seen_ids: set, function_bodies: list, + parent_class_nid: str | None, add_node_fn, add_edge_fn, + walk_fn, namespace_stack: list[str], scope_stack: list[str]) -> bool: + """Handle namespace declarations for C#. Returns True if handled.""" + if node.type == "namespace_declaration": + ns_name = _csharp_namespace_name(node, source) + pushed = False + if ns_name: + namespace_stack.append(ns_name) + scope_stack.append(f"s{node.start_byte}") + pushed = True + ns_label = ".".join(namespace_stack) + ns_nid = _csharp_namespace_id(ns_label) + line = node.start_point[0] + 1 + add_node_fn(ns_nid, ns_label, line, node_type="namespace", metadata={"kind": "csharp_namespace"}) + add_edge_fn(file_nid, ns_nid, "contains", line) + body = node.child_by_field_name("body") + if body: + try: + for child in body.children: + walk_fn(child, parent_class_nid) + finally: + if pushed: + namespace_stack.pop() + scope_stack.pop() + elif pushed: + namespace_stack.pop() + scope_stack.pop() + return True + if node.type == "file_scoped_namespace_declaration": + ns_name = _csharp_namespace_name(node, source) + if ns_name: + namespace_stack.append(ns_name) + scope_stack.append(f"s{node.start_byte}") + ns_label = ".".join(namespace_stack) + ns_nid = _csharp_namespace_id(ns_label) + line = node.start_point[0] + 1 + add_node_fn(ns_nid, ns_label, line, node_type="namespace", metadata={"kind": "csharp_namespace"}) + add_edge_fn(file_nid, ns_nid, "contains", line) + return True + return False + + +_CSHARP_CONFIG = LanguageConfig( + ts_module="tree_sitter_c_sharp", + class_types=frozenset({ + "class_declaration", + "interface_declaration", + "enum_declaration", + "struct_declaration", + "record_declaration", + }), + function_types=frozenset({"method_declaration"}), + import_types=frozenset({"using_directive", "extern_alias_directive"}), + call_types=frozenset({"invocation_expression"}), + call_function_field="function", + call_accessor_node_types=frozenset({"member_access_expression"}), + call_accessor_field="name", + body_fallback_child_types=("declaration_list",), + function_boundary_types=frozenset({"method_declaration"}), + import_handler=_import_csharp, +) + + +def _read_csharp_type_name(node, source: bytes) -> tuple[str, bool, str] | None: + """Resolve a C# type name, whether it was qualified, and its qualifier prefix.""" + if node is None: + return None + if node.type in ("identifier", "predefined_type"): + return (_read_text(node, source), False, "") + if node.type == "qualified_name": + prefix, _, tail = _read_text(node, source).rpartition(".") + tail = tail.split("<", 1)[0] + return (tail, True, prefix) + if node.type == "generic_name": + name_node = node.child_by_field_name("name") + if name_node is not None: + qualified = name_node.type == "qualified_name" + prefix, _, tail = _read_text(name_node, source).rpartition(".") + return (tail, qualified, prefix if qualified else "") + for child in node.children: + if not child.is_named: + continue + result = _read_csharp_type_name(child, source) + if result: + return result + return None + + +CsharpTypeRefFact = tuple[str, str, bool, str] +CsharpBaseListFact = tuple[str, bool, str, str, list[tuple[str, bool, str]]] + + +def csharp_class_member_metadata(type_node, source: bytes, parent_class_nid: str | None) -> dict: + """Return C# type metadata collected from the declaration node.""" + metadata = { + "csharp_member_names": _csharp_direct_member_names(type_node, source), + "csharp_member_types": _csharp_direct_member_types(type_node, source), + } + if parent_class_nid: + metadata["is_nested_type"] = True + metadata["parent_class_nid"] = parent_class_nid + return metadata + + +def csharp_base_list_facts( + type_node, + source: bytes, + csharp_interface_names: set[str], + csharp_type_params: frozenset[str], +) -> list[CsharpBaseListFact]: + """Return ordered base-list facts for C# class/interface emission.""" + facts: list[CsharpBaseListFact] = [] + for child in type_node.children: + if child.type != "base_list": + continue + for sub in child.children: + base_type_node = sub + if sub.type == "primary_constructor_base_type": + base_type_node = sub.child_by_field_name("type") + if base_type_node is None or base_type_node.type not in ( + "identifier", + "generic_name", + "qualified_name", + ): + continue + base_info = _read_csharp_type_name(base_type_node, source) + if base_info is None: + continue + base, qualified, qualifier = base_info + if not base or base in csharp_type_params: + continue + generic_refs: list[tuple[str, bool, str]] = [] + if base_type_node.type == "generic_name": + for tal in base_type_node.children: + if tal.type != "type_argument_list": + continue + for arg in tal.children: + if not arg.is_named: + continue + refs: list[tuple[str, str, bool, str]] = [] + _csharp_collect_type_refs(arg, source, True, refs, csharp_type_params) + for ref_name, _role, ref_qualified, ref_qualifier in refs: + generic_refs.append((ref_name, ref_qualified, ref_qualifier)) + facts.append(( + base, + qualified, + qualifier, + _csharp_classify_base(base, csharp_interface_names), + generic_refs, + )) + return facts + + +def csharp_field_type_ref_facts(field_node, source: bytes) -> list[CsharpTypeRefFact]: + """Return C# field type reference facts in emission order.""" + type_node = field_node.child_by_field_name("type") + if type_node is None: + for child in field_node.children: + if child.type == "variable_declaration": + type_node = child.child_by_field_name("type") + if type_node is not None: + break + type_info = _read_csharp_type_name(type_node, source) + if not type_info: + return [] + type_name, qualified, qualifier = type_info + csharp_type_params = _csharp_type_parameters_in_scope( + type_node if type_node is not None else field_node, source + ) + if not type_name or type_name in csharp_type_params: + return [] + return [(type_name, "field", qualified, qualifier)] + + +def csharp_property_type_ref_facts(property_node, source: bytes) -> list[CsharpTypeRefFact]: + """Return C# property type reference facts in emission order.""" + type_node = property_node.child_by_field_name("type") + if type_node is None: + return [] + refs: list[tuple[str, str, bool, str]] = [] + _csharp_collect_type_refs(type_node, source, False, refs) + return [ + (ref_name, "generic_arg" if role == "generic_arg" else "field", qualified, qualifier) + for ref_name, role, qualified, qualifier in refs + ] + + +def csharp_method_reference_facts(method_node, source: bytes) -> list[CsharpTypeRefFact]: + """Return ordered C# parameter, return, and attribute reference facts.""" + csharp_type_params = _csharp_type_parameters_in_scope(method_node, source) + facts: list[CsharpTypeRefFact] = [] + params_node = method_node.child_by_field_name("parameters") + if params_node is not None: + for param in params_node.children: + if param.type != "parameter": + continue + refs: list[tuple[str, str, bool, str]] = [] + _csharp_collect_type_refs( + param.child_by_field_name("type"), source, False, refs, csharp_type_params + ) + for ref_name, role, qualified, qualifier in refs: + facts.append(( + ref_name, + "generic_arg" if role == "generic_arg" else "parameter_type", + qualified, + qualifier, + )) + return_node = method_node.child_by_field_name("returns") + if return_node is not None: + refs = [] + _csharp_collect_type_refs(return_node, source, False, refs, csharp_type_params) + for ref_name, role, qualified, qualifier in refs: + facts.append(( + ref_name, + "generic_arg" if role == "generic_arg" else "return_type", + qualified, + qualifier, + )) + for attr_name, qualified, qualifier in _csharp_attribute_names(method_node, source): + facts.append((attr_name, "attribute", qualified, qualifier)) + return facts + + +def csharp_invocation_callee(node, source: bytes) -> tuple[str | None, bool, str | None]: + """Parse a C# invocation node into `(callee_name, is_member_call, member_receiver)`.""" + callee_name: str | None = None + is_member_call = False + member_receiver: str | None = None + + func_node = node.child_by_field_name("function") + if func_node is not None and func_node.type == "conditional_access_expression": + is_member_call = True + recv_node = func_node.child_by_field_name("condition") + if recv_node is not None: + member_receiver = _read_text(recv_node, source) + binding_node = next( + (child for child in func_node.named_children if child.type == "member_binding_expression"), + None, + ) + name_node = binding_node.child_by_field_name("name") if binding_node is not None else None + if name_node is not None: + callee_name = _csharp_base_identifier(_read_text(name_node, source)) + elif func_node is not None and func_node.type == "member_access_expression": + is_member_call = True + recv_node = func_node.child_by_field_name("expression") + if recv_node is not None: + member_receiver = _read_text(recv_node, source) + name_node = func_node.child_by_field_name("name") + if name_node is not None: + callee_name = _csharp_base_identifier(_read_text(name_node, source)) + else: + if func_node is not None and func_node.type in ("identifier", "generic_name"): + callee_name = _csharp_base_identifier(_read_text(func_node, source)) + is_member_call = True + member_receiver = "" + else: + name_node = node.child_by_field_name("name") + if name_node: + callee_name = _read_text(name_node, source) + else: + for child in node.children: + if child.is_named: + raw = _read_text(child, source) + if "." in raw: + callee_name = _csharp_base_identifier(raw.split(".")[-1]) + is_member_call = True + member_receiver = raw.rsplit(".", 1)[0] + else: + callee_name = _csharp_base_identifier(raw) + break + return callee_name, is_member_call, member_receiver + + +def csharp_file_facts(root, source: bytes, str_path: str) -> dict[str, dict]: + """Return per-file C# fact tables for the shared extraction result.""" + csharp_var_call_inits = _build_csharp_var_call_inits(root, source) + return { + "csharp_type_table": { + "path": str_path, + "scopes": _build_csharp_type_table(root, source), + }, + "csharp_shadow_names": { + "path": str_path, + "scopes": _build_csharp_shadow_names(root, source), + }, + "csharp_var_call_inits": { + "path": str_path, + "inits": csharp_var_call_inits["inits"], + "poisoned": csharp_var_call_inits["poisoned"], + }, + } diff --git a/graphify/extractors/csharp_resolve.py b/graphify/extractors/csharp_resolve.py new file mode 100644 index 000000000..f126867cf --- /dev/null +++ b/graphify/extractors/csharp_resolve.py @@ -0,0 +1,762 @@ +"""C# member-call resolution (moved out of extract.py — no behavior change).""" +from __future__ import annotations + +from graphify.extractors.csharp import ( + _csharp_base_identifier, + _is_cs_file, + _metadata, + build_csharp_name_resolver, +) +from graphify.security import sanitize_metadata + + +def _resolve_csharp_member_calls( + per_file: list[dict], + all_nodes: list[dict], + all_edges: list[dict], +) -> None: + """Resolve C# member calls, preserving the never-wrong-edge bar.""" + safe_per_file = per_file if isinstance(per_file, list) else [] + resolver = build_csharp_name_resolver(all_nodes, all_edges) + node_by_id = resolver.node_by_id + + def _key(label: str) -> str: + s = str(label).strip() + if s.endswith("()"): + s = s[:-2] + return _csharp_base_identifier(s.lstrip(".")) + + # Public CsharpNameResolver type lookup deliberately excludes nested types. + # The member model below is separate and includes nested type nodes, so + # enclosing-type/base-chain/member-shadow checks work for nested callers + # without perturbing #1562 type-reference behavior. + member_key_by_nid: dict[str, tuple[str, str]] = {} + member_type_nid_by_key: dict[tuple[str, str], str] = {} + for key, ids in resolver.type_def_groups.items(): + for nid in ids: + member_key_by_nid[nid] = key + member_type_nid_by_key.setdefault(key, nid) + for node in all_nodes: + if not isinstance(node, dict): + continue + metadata = _metadata(node.get("metadata")) + nid = node.get("id") + label = node.get("label") + source_file = node.get("source_file") + if not ( + metadata.get("is_nested_type") + and isinstance(nid, str) + and nid + and isinstance(label, str) + and label + and _is_cs_file(source_file) + ): + continue + key = ("__nested__", nid) + member_key_by_nid[nid] = key + member_type_nid_by_key.setdefault(key, nid) + + method_index: dict[tuple[tuple[str, str], str], str] = {} + methods_by_group: dict[tuple[str, str], dict[str, set[str]]] = {} + method_decl_count_by_nid: dict[str, int] = {} + method_return_type_by_nid: dict[str, str | None] = {} + enclosing_type: dict[str, str] = {} + inherits_of: dict[tuple[str, str], set[tuple[str, str]]] = {} + unresolved_base: set[tuple[str, str]] = set() + direct_member_names: dict[tuple[str, str], dict[str, set[str]]] = {} + member_types_by_nid: dict[str, dict[str, str | None]] = {} + parent_class_by_nid: dict[str, str] = {} + parent_class_fallback_by_nid: dict[str, str] = {} + nested_type_ids_by_parent: dict[str, dict[str, set[str]]] = {} + type_decl_count_by_nid: dict[str, int] = {} + inherit_candidate_nids: dict[str, set[str]] = {} + + for node in all_nodes: + if not isinstance(node, dict): + continue + nid = node.get("id") + metadata = _metadata(node.get("metadata")) + label = node.get("label") + source_file = node.get("source_file") + if ( + isinstance(nid, str) + and nid + and isinstance(label, str) + and label.startswith(".") + and label.endswith("()") + and _is_cs_file(source_file) + and "csharp_return_type" in metadata + ): + return_type = metadata.get("csharp_return_type") + method_return_type_by_nid[nid] = return_type if isinstance(return_type, str) and return_type else None + group = member_key_by_nid.get(nid) + if not group: + continue + if isinstance(nid, str): + parent_nid = metadata.get("parent_class_nid") + if isinstance(parent_nid, str) and parent_nid: + parent_class_fallback_by_nid.setdefault(nid, parent_nid) + raw_members = _metadata(metadata.get("csharp_member_names")) + bucket = direct_member_names.setdefault(group, {"values": set(), "methods": set(), "nested_types": set()}) + for kind in ("values", "methods", "nested_types"): + values = raw_members.get(kind) + if isinstance(values, list): + bucket[kind].update(str(v) for v in values if v) + + raw_member_types = _metadata(metadata.get("csharp_member_types")) + if isinstance(nid, str) and nid: + typed_bucket = member_types_by_nid.setdefault(nid, {}) + for name, type_name in raw_member_types.items(): + if isinstance(name, str) and name: + typed_bucket[name] = type_name if isinstance(type_name, str) and type_name else None + + def _record_nested_type(parent_nid: str, child_nid: str) -> None: + target_node = node_by_id.get(child_nid, {}) + label = target_node.get("label") + if not isinstance(label, str) or not label: + return + simple = _csharp_base_identifier(label) + if not simple: + return + nested_type_ids_by_parent.setdefault(parent_nid, {}).setdefault(simple, set()).add(child_nid) + + for e in all_edges: + if e.get("relation") != "contains": + continue + src, tgt = e.get("source"), e.get("target") + if not (isinstance(src, str) and isinstance(tgt, str) and tgt in member_key_by_nid): + continue + target_node = node_by_id.get(tgt, {}) + target_metadata = _metadata(target_node.get("metadata")) + if e.get("context") == "nested_type" or target_metadata.get("is_nested_type"): + parent_class_by_nid[tgt] = src + _record_nested_type(src, tgt) + continue + type_decl_count_by_nid[tgt] = type_decl_count_by_nid.get(tgt, 0) + 1 + + for nid, parent_nid in parent_class_fallback_by_nid.items(): + parent_class_by_nid.setdefault(nid, parent_nid) + + for e in all_edges: + rel, src, tgt = e.get("relation"), e.get("source"), e.get("target") + if not (rel == "inherits" and _is_cs_file(e.get("source_file")) and isinstance(src, str) and isinstance(tgt, str)): + continue + if src not in member_key_by_nid: + continue + tgt_group = member_key_by_nid.get(tgt) + if tgt_group is not None and not _is_placeholder_node(node_by_id.get(tgt)): + inherit_candidate_nids.setdefault(src, set()).add(tgt) + + def _visible_nested_type_targets(type_nid: str | None) -> dict[str, set[str]]: + visible: dict[str, set[str]] = {} + seen_lexical: set[str] = set() + + def add_declared_nested(owner_nid: str) -> None: + for name, ids in nested_type_ids_by_parent.get(owner_nid, {}).items(): + visible.setdefault(name, set()).update(ids) + + def walk_base_chain(owner_nid: str, seen_bases: set[str]) -> None: + for base_nid in inherit_candidate_nids.get(owner_nid, set()): + if base_nid in seen_bases: + continue + seen_bases.add(base_nid) + add_declared_nested(base_nid) + walk_base_chain(base_nid, seen_bases) + + current = type_nid + while current and current not in seen_lexical: + seen_lexical.add(current) + add_declared_nested(current) + walk_base_chain(current, set()) + current = parent_class_by_nid.get(current) + return visible + + def _base_reference_is_ambiguous_nested_collision(src: str, tgt: str) -> bool: + if not isinstance(tgt, str): + return False + target_node = node_by_id.get(tgt) + if not isinstance(target_node, dict): + return False + label = target_node.get("label") + if not isinstance(label, str) or not label: + return False + simple = _csharp_base_identifier(label) + visible_targets = _visible_nested_type_targets(src).get(simple, set()) + if not visible_targets: + return False + target_metadata = _metadata(target_node.get("metadata")) + target_is_exact_visible_nested = ( + tgt in visible_targets + and target_metadata.get("is_nested_type") + and type_decl_count_by_nid.get(tgt, 0) <= 1 + ) + return not target_is_exact_visible_nested + + for e in all_edges: + rel, src, tgt = e.get("relation"), e.get("source"), e.get("target") + if rel == "method" and src in member_key_by_nid: + if isinstance(tgt, str) and tgt: + method_decl_count_by_nid[tgt] = method_decl_count_by_nid.get(tgt, 0) + 1 + tnode = node_by_id.get(tgt) + if tnode is not None: + group = member_key_by_nid[src] + enclosing_type.setdefault(tgt, src) + method_key = _key(tnode.get("label", "")) + method_index[(group, method_key)] = tgt + methods_by_group.setdefault(group, {}).setdefault(method_key, set()).add(tgt) + direct_member_names.setdefault(group, {"values": set(), "methods": set(), "nested_types": set()})["methods"].add(method_key) + elif rel == "inherits" and _is_cs_file(e.get("source_file")) and src in member_key_by_nid: + src_group = member_key_by_nid[src] + tgt_group = member_key_by_nid.get(tgt) + if tgt_group is None or _is_placeholder_node(node_by_id.get(tgt)) or _base_reference_is_ambiguous_nested_collision(src, tgt): + unresolved_base.add(src_group) + else: + inherits_of.setdefault(src_group, set()).add(tgt_group) + elif rel == "contains" and (e.get("context") == "nested_type" or _metadata(node_by_id.get(tgt, {}).get("metadata")).get("is_nested_type")): + src_group = member_key_by_nid.get(src) + target_node = node_by_id.get(tgt) + if src_group and target_node is not None: + label = target_node.get("label") + if isinstance(label, str) and label: + direct_member_names.setdefault(src_group, {"values": set(), "methods": set(), "nested_types": set()})["nested_types"].add(label) + + all_raw_calls: list[dict] = [] + shadow_by_file: dict[str, dict[str, dict[str, list[str]]]] = {} + type_table_by_file: dict[str, dict[str, list[tuple[str, str | None, int]]]] = {} + var_call_inits_by_file: dict[str, list[dict]] = {} + poisoned_var_call_inits_by_file: dict[str, set[tuple[str, str]]] = {} + for result in safe_per_file: + if not isinstance(result, dict): + continue + raw_calls = result.get("raw_calls") + if isinstance(raw_calls, list): + all_raw_calls.extend(rc for rc in raw_calls if isinstance(rc, dict)) + tt = result.get("csharp_type_table") + if isinstance(tt, dict): + path = tt.get("path") + scopes = tt.get("scopes") + if isinstance(path, str) and path and isinstance(scopes, dict): + type_table_by_file[path] = scopes + sf = result.get("csharp_shadow_names") + if isinstance(sf, dict): + path = sf.get("path") + scopes = sf.get("scopes") + if isinstance(path, str) and path and isinstance(scopes, dict): + shadow_by_file[path] = scopes + vi = result.get("csharp_var_call_inits") + if isinstance(vi, dict): + path = vi.get("path") + inits = vi.get("inits") + poisoned = vi.get("poisoned") + if isinstance(path, str) and path: + if isinstance(inits, list): + var_call_inits_by_file.setdefault(path, []).extend(fact for fact in inits if isinstance(fact, dict)) + if isinstance(poisoned, list): + poison_bucket = poisoned_var_call_inits_by_file.setdefault(path, set()) + for fact in poisoned: + if not isinstance(fact, dict): + continue + scope_id = fact.get("scope_id") + name = fact.get("name") + if isinstance(scope_id, str) and isinstance(name, str) and name: + poison_bucket.add((scope_id, name)) + + member_kind_cache: dict[tuple[str, str], tuple[dict[str, set[str]], bool]] = {} + method_cache: dict[tuple[tuple[str, str], str], tuple[set[str], bool]] = {} + + def _member_names_by_kind_for_group( + group: tuple[str, str], + seen: set[tuple[str, str]] | None = None, + ) -> tuple[dict[str, set[str]], bool]: + if group in member_kind_cache: + cached, cached_unknown = member_kind_cache[group] + return {kind: set(values) for kind, values in cached.items()}, cached_unknown + seen = set(seen or set()) + if group in seen: + return {"values": set(), "methods": set(), "nested_types": set()}, False + seen.add(group) + direct = direct_member_names.get(group, {}) + names_by_kind = { + "values": set(direct.get("values", set())), + "methods": set(direct.get("methods", set())), + "nested_types": set(direct.get("nested_types", set())), + } + unknown = group in unresolved_base + for base in inherits_of.get(group, set()): + base_names, base_unknown = _member_names_by_kind_for_group(base, seen) + for kind in ("values", "methods", "nested_types"): + names_by_kind[kind].update(base_names.get(kind, set())) + unknown = unknown or base_unknown + member_kind_cache[group] = ({kind: set(values) for kind, values in names_by_kind.items()}, unknown) + return names_by_kind, unknown + + def _member_names_for_group(group: tuple[str, str], seen: set[tuple[str, str]] | None = None) -> tuple[set[str], bool]: + names_by_kind, unknown = _member_names_by_kind_for_group(group, seen) + names = set() + for values in names_by_kind.values(): + names.update(values) + return names, unknown + + def _method_ids_for_group(group: tuple[str, str], method_key: str, seen: set[tuple[str, str]] | None = None) -> tuple[set[str], bool]: + cache_key = (group, method_key) + if cache_key in method_cache: + return method_cache[cache_key] + seen = set(seen or set()) + if group in seen: + return set(), False + seen.add(group) + hits = set(methods_by_group.get(group, {}).get(method_key, set())) + unknown = group in unresolved_base + for base in inherits_of.get(group, set()): + base_hits, base_unknown = _method_ids_for_group(base, method_key, seen) + hits.update(base_hits) + unknown = unknown or base_unknown + method_cache[cache_key] = (hits, unknown) + return hits, unknown + + def _direct_method_id_for_group(group: tuple[str, str], method_key: str) -> str | None: + direct_hits = set(methods_by_group.get(group, {}).get(method_key, set())) + if len(direct_hits) == 1: + return next(iter(direct_hits)) + return None + + def _instance_method_id_for_group(group: tuple[str, str], method_key: str) -> str | None: + direct_hits = set(methods_by_group.get(group, {}).get(method_key, set())) + if len(direct_hits) == 1: + return next(iter(direct_hits)) + if len(direct_hits) > 1: + return None + method_ids, unknown = _method_ids_for_group(group, method_key) + if unknown or len(method_ids) != 1: + return None + return next(iter(method_ids)) + + def _shadow_bucket_has(rc: dict, src_file: str, bucket: str, name: str) -> bool: + scopes = shadow_by_file.get(src_file, {}) + chain = rc.get("scope_chain", []) + if not isinstance(chain, list): + return False + for sid in chain: + values = scopes.get(sid, {}).get(bucket, []) + if name in values: + return True + return False + + def _type_group_for_nid(nid: str | None) -> tuple[str, str] | None: + return member_key_by_nid.get(nid) if nid else None + + def _declared_type_group( + type_name: str | None, + source_node, + src_file: str, + enclosing_nid: str | None, + ) -> tuple[str, str] | None: + if not type_name: + return None + simple = _csharp_base_identifier(type_name) + if enclosing_nid: + visible_targets = _visible_nested_type_targets(enclosing_nid).get(simple, set()) + if visible_targets: + resolved_nid = resolver.resolve_label(simple, source_node, src_file) + resolved_node = node_by_id.get(resolved_nid, {}) if resolved_nid else {} + resolved_metadata = _metadata(resolved_node.get("metadata")) + exact_visible_nested = ( + resolved_nid in visible_targets + and resolved_metadata.get("is_nested_type") + and type_decl_count_by_nid.get(resolved_nid, 0) <= 1 + ) + if not exact_visible_nested: + return None + return member_key_by_nid.get(resolved_nid) + source_namespace = _metadata(source_node.get("metadata") if isinstance(source_node, dict) else None).get("namespace") + if isinstance(source_namespace, str): + same_namespace_nid = resolver.type_def_index.get((source_namespace, simple)) + if same_namespace_nid: + return member_key_by_nid.get(same_namespace_nid) + nid = resolver.resolve_label(simple, source_node, src_file) + return member_key_by_nid.get(nid) if nid else None + + def _static_import_group(entry, source_node, src_file) -> tuple[str, str] | None: + target = _csharp_base_identifier(str(entry.target_fqn).strip()) + namespace, sep, simple = target.rpartition(".") + nid = resolver.type_def_index.get((namespace, simple)) if sep else resolver.resolve_label(target, source_node, src_file) + return _type_group_for_nid(nid) + + def _using_static_may_shadow(name: str, source_node, src_file: str) -> bool: + for entry in resolver.using_static_in_scope(source_node, src_file): + group = _static_import_group(entry, source_node, src_file) + if group is None: + return True + names, unknown = _member_names_for_group(group) + if unknown or name in names: + return True + return False + + def _complete_static_shadow(name: str, rc: dict, source_node, src_file: str, enclosing_group: tuple[str, str] | None) -> bool: + if resolver.namespace_may_bind(name, source_node, src_file): + return True + if resolver.is_alias_in_scope(name, source_node, src_file): + return True + for bucket in ("values", "methods", "typeparams", "nested_types"): + if _shadow_bucket_has(rc, src_file, bucket, name): + return True + if _using_static_may_shadow(name, source_node, src_file): + return True + if enclosing_group is not None: + names, unknown = _member_names_for_group(enclosing_group) + if unknown or name in names: + return True + return False + + def _implicit_shadow(name: str, rc: dict, source_node, src_file: str, enclosing_group: tuple[str, str]) -> bool: + if resolver.namespace_may_bind(name, source_node, src_file): + return True + if resolver.is_alias_in_scope(name, source_node, src_file): + return True + for bucket in ("values", "methods", "typeparams", "nested_types"): + if _shadow_bucket_has(rc, src_file, bucket, name): + return True + if _using_static_may_shadow(name, source_node, src_file): + return True + direct = direct_member_names.get(enclosing_group, {}) + value_or_nested = set(direct.get("values", set())) | set(direct.get("nested_types", set())) + if name in value_or_nested: + return True + names_by_kind, unknown = _member_names_by_kind_for_group(enclosing_group) + if unknown: + return True + inherited_values = set(names_by_kind.get("values", set())) - set(direct.get("values", set())) + inherited_nested_types = set(names_by_kind.get("nested_types", set())) - set(direct.get("nested_types", set())) + return name in inherited_values or name in inherited_nested_types + + def _resolve_implicit_target(rc, callee_key: str, source_node, src_file: str) -> tuple[tuple[str, str] | None, str | None]: + caller = rc["caller_nid"] + enclosing_nid = enclosing_type.get(caller) + enclosing_group = member_key_by_nid.get(enclosing_nid) if enclosing_nid else None + if enclosing_group is None: + return None, None + if _implicit_shadow(callee_key, rc, source_node, src_file, enclosing_group): + return None, None + method_ids, unknown = _method_ids_for_group(enclosing_group, callee_key) + if unknown or len(method_ids) != 1: + return None, None + method_nid = next(iter(method_ids)) + owner = enclosing_type.get(method_nid) + return member_key_by_nid.get(owner) if owner else enclosing_group, method_nid + + def _simple_non_namespace_shadow(name: str, rc: dict, source_node, src_file: str, enclosing_group: tuple[str, str] | None) -> bool: + found, _typed = _lookup_type_table( + type_table_by_file.get(src_file, {}), + name, + rc.get("scope_chain", []), + rc.get("call_byte", 1 << 62), + ) + if found: + return True + for bucket in ("values", "methods", "typeparams", "nested_types"): + if _shadow_bucket_has(rc, src_file, bucket, name): + return True + if _using_static_may_shadow(name, source_node, src_file): + return True + if enclosing_group is not None: + names, unknown = _member_names_for_group(enclosing_group) + if unknown or name in names: + return True + return False + + def _leading_qualifier_guard(leading: str, qualifier: str, rc: dict, source_node, src_file: str, enclosing_group: tuple[str, str] | None) -> bool: + if _simple_non_namespace_shadow(leading, rc, source_node, src_file, enclosing_group): + return True + if resolver.resolve_label(leading, source_node, src_file): + return True + if resolver.is_alias_in_scope(leading, source_node, src_file): + return not resolver.qualifier_is_namespace_in_scope(leading, source_node, src_file) + return not ( + resolver.qualifier_is_namespace_in_scope(qualifier, source_node, src_file) + or resolver.qualifier_is_namespace_in_scope(leading, source_node, src_file) + ) + + inferred_local_groups: dict[tuple[str, str, str, int], tuple[str, str]] = {} + + def _lookup_inferred_local_group( + src_file: str, + name: str, + scope_chain: list[str], + call_byte: int, + type_table: dict[str, list[tuple[str, str | None, int]]], + ) -> tuple[str, str] | None: + if not isinstance(scope_chain, list): + return None + for sid in scope_chain: + visible_inferred = [ + group + for (path, scope_id, local_name, decl_byte), group in inferred_local_groups.items() + if path == src_file and scope_id == sid and local_name == name and decl_byte < call_byte + ] + if visible_inferred: + groups = set(visible_inferred) + if len(groups) == 1: + return next(iter(groups)) + return None + for decl_name, _type_name, decl_start_byte in type_table.get(sid, []): + if decl_name == name and decl_start_byte < call_byte: + return None + return None + + def _resolve_receiver_type( + rc, + receiver, + source_node, + src_file, + *, + type_tables=None, + use_inferred_locals: bool = True, + ): + caller = rc["caller_nid"] + enclosing_nid = enclosing_type.get(caller) + enclosing_group = member_key_by_nid.get(enclosing_nid) if enclosing_nid else None + active_type_tables = type_table_by_file if type_tables is None else type_tables + if receiver == "this": + return enclosing_group, False, {}, None + if receiver == "base": + if enclosing_group is None or enclosing_group in unresolved_base: + return None, False, {}, None + bases = inherits_of.get(enclosing_group, set()) + if len(bases) != 1: + return None, False, {}, None + return next(iter(bases)), False, {}, None + + if isinstance(receiver, str) and "." not in receiver: + type_table = active_type_tables.get(src_file, {}) + if use_inferred_locals: + inferred_group = _lookup_inferred_local_group( + src_file, + receiver, + rc.get("scope_chain", []), + rc.get("call_byte", 1 << 62), + type_table, + ) + if inferred_group is not None: + return inferred_group, True, {}, None + found, typed = _lookup_type_table( + type_table, + receiver, + rc.get("scope_chain", []), + rc.get("call_byte", 1 << 62), + ) + if found: + if typed is None: + return None, True, {}, None + return _declared_type_group(typed, source_node, src_file, enclosing_nid), True, {}, None + if enclosing_nid is not None: + member_types = member_types_by_nid.get(enclosing_nid, {}) + if receiver in member_types: + typed = member_types.get(receiver) + if typed is None: + return None, True, {}, None + return _declared_type_group(typed, source_node, src_file, enclosing_nid), True, {}, None + + if isinstance(receiver, str) and "." in receiver: + parts = receiver.split(".") + if len(parts) == 2 and parts[0] == "this": + if enclosing_nid is None: + return None, True, {}, None + member_types = member_types_by_nid.get(enclosing_nid, {}) + if parts[1] not in member_types: + return None, True, {}, None + typed = member_types.get(parts[1]) + if typed is None: + return None, True, {}, None + return _declared_type_group(typed, source_node, src_file, enclosing_nid), True, {}, None + + qualifier, _, label = receiver.rpartition(".") + leading = qualifier.split(".", 1)[0] + if _leading_qualifier_guard(leading, qualifier, rc, source_node, src_file, enclosing_group): + return None, False, {}, None + nid = resolver.resolve_qualified(label, qualifier, source_node, src_file) + return member_key_by_nid.get(nid) if nid else None, False, {"csharp_static": True}, None + if isinstance(receiver, str) and receiver[:1].isupper(): + if _complete_static_shadow(receiver, rc, source_node, src_file, enclosing_group): + return None, False, {}, None + nid = resolver.resolve_label(receiver, source_node, src_file) + return member_key_by_nid.get(nid) if nid else None, False, {"csharp_static": True}, None + return None, False, {}, None + + def _resolved_method_for_raw_call( + rc: dict, + source_node, + src_file: str, + *, + type_tables=None, + use_inferred_locals: bool = True, + ) -> tuple[tuple[str, str] | None, str | None]: + receiver, callee = rc.get("receiver"), rc.get("callee") + if receiver is None or not callee: + return None, None + callee_key = _key(callee) + if receiver == "": + group, forced_method_nid = _resolve_implicit_target(rc, callee_key, source_node, src_file) + metadata = {} + else: + group, _inferred_local, metadata, forced_method_nid = _resolve_receiver_type( + rc, + receiver, + source_node, + src_file, + type_tables=type_tables, + use_inferred_locals=use_inferred_locals, + ) + if group is None: + return None, None + method_nid = forced_method_nid + if method_nid is None: + if metadata.get("csharp_static") is True: + method_nid = _direct_method_id_for_group(group, callee_key) + else: + method_nid = _instance_method_id_for_group(group, callee_key) + return group, method_nid + + def _method_return_rhs_allowed(receiver) -> bool: + if receiver in ("", "this"): + return True + if not isinstance(receiver, str) or not receiver: + return False + if not receiver[:1].isupper(): + return False + return all(part.isidentifier() and part[:1].isupper() for part in receiver.split(".")) + + raw_call_by_file_byte: dict[tuple[str, int], dict] = {} + for rc in all_raw_calls: + if not isinstance(rc, dict) or rc.get("lang") != "csharp" or not rc.get("is_member_call"): + continue + src_file = rc.get("source_file") + call_byte = rc.get("call_byte") + if isinstance(src_file, str) and isinstance(call_byte, int): + raw_call_by_file_byte[(src_file, call_byte)] = rc + + frozen_type_table_by_file = { + path: {scope_id: list(entries) for scope_id, entries in scopes.items()} + for path, scopes in type_table_by_file.items() + } + + for src_file, init_facts in var_call_inits_by_file.items(): + poisoned = poisoned_var_call_inits_by_file.get(src_file, set()) + for fact in init_facts: + scope_id = fact.get("scope_id") + name = fact.get("name") + decl_byte = fact.get("decl_start_byte") + call_byte = fact.get("call_byte") + if not ( + isinstance(scope_id, str) + and isinstance(name, str) + and name + and isinstance(decl_byte, int) + and isinstance(call_byte, int) + ): + continue + if (scope_id, name) in poisoned: + continue + init_rc = raw_call_by_file_byte.get((src_file, call_byte)) + if init_rc is None or not _method_return_rhs_allowed(init_rc.get("receiver")): + continue + caller = init_rc.get("caller_nid") + source_node = node_by_id.get(caller) + if source_node is None: + continue + _group, method_nid = _resolved_method_for_raw_call( + init_rc, + source_node, + src_file, + type_tables=frozen_type_table_by_file, + use_inferred_locals=False, + ) + if method_nid is None or method_decl_count_by_nid.get(method_nid, 0) != 1: + continue + return_type = method_return_type_by_nid.get(method_nid) + if not return_type: + continue + method_node = node_by_id.get(method_nid) + if not isinstance(method_node, dict): + continue + method_src_file = method_node.get("source_file") + if not isinstance(method_src_file, str): + continue + method_enclosing_nid = enclosing_type.get(method_nid) + return_group = _declared_type_group(return_type, method_node, method_src_file, method_enclosing_nid) + if return_group is None or member_type_nid_by_key.get(return_group) is None: + continue + inferred_local_groups[(src_file, scope_id, name, decl_byte)] = return_group + + existing_pairs = {(e.get("source"), e.get("target")) for e in all_edges} + for rc in all_raw_calls: + if not isinstance(rc, dict) or rc.get("lang") != "csharp" or not rc.get("is_member_call"): + continue + receiver, callee, caller = rc.get("receiver"), rc.get("callee"), rc.get("caller_nid") + if receiver is None or not callee or not caller: + continue + source_node = node_by_id.get(caller) + if source_node is None: + continue + src_file = rc.get("source_file", "") + callee_key = _key(callee) + forced_method_nid = None + if receiver == "": + group, forced_method_nid = _resolve_implicit_target(rc, callee_key, source_node, src_file) + inferred_local = False + metadata = {} + else: + group, inferred_local, metadata, forced_method_nid = _resolve_receiver_type(rc, receiver, source_node, src_file) + if group is None: + continue + method_nid = forced_method_nid + if method_nid is None: + if metadata.get("csharp_static") is True: + method_nid = _direct_method_id_for_group(group, callee_key) + else: + method_nid = _instance_method_id_for_group(group, callee_key) + target = method_nid or member_type_nid_by_key.get(group) + if not target or target == caller or (caller, target) in existing_pairs: + continue + existing_pairs.add((caller, target)) + edge = { + "source": caller, + "target": target, + "relation": "calls" if method_nid else "references", + "context": "call", + "confidence": "INFERRED" if inferred_local else "EXTRACTED", + "confidence_score": 0.8 if inferred_local else 1.0, + "source_file": src_file, + "source_location": rc.get("source_location"), + "weight": 1.0, + } + if metadata: + edge["metadata"] = sanitize_metadata(metadata) + all_edges.append(edge) + + +def _is_placeholder_node(node: dict | None) -> bool: + return bool(node) and not node.get("source_file") + + +def _lookup_type_table( + scopes: dict[str, list[tuple[str, str | None, int]]], + name: str, + scope_chain: list[str], + call_byte: int, +) -> tuple[bool, str | None]: + """Nearest visible C# lexical binding for `name`, preserving unknown-type poisoning.""" + chain = scope_chain + if not isinstance(chain, list): + return False, None + for sid in chain: + visible: set[str | None] = set() + for decl_name, type_name, decl_start_byte in scopes.get(sid, []): + if decl_name == name and decl_start_byte < call_byte: + visible.add(type_name) + if not visible: + continue + typed = {type_name for type_name in visible if type_name is not None} + if len(typed) == 1 and None not in visible: + return True, next(iter(typed)) + return True, None + return False, None diff --git a/tests/test_csharp_member_calls.py b/tests/test_csharp_member_calls.py index d83265d9b..97215d140 100644 --- a/tests/test_csharp_member_calls.py +++ b/tests/test_csharp_member_calls.py @@ -1,143 +1,937 @@ -"""C# receiver-typed member-call resolution (#1609). - -`recv.Method()` where `recv` is a typed field / property / parameter / local must -resolve to the receiver TYPE's method — not a bare same-named match. Before this, -C# had no member-call resolver: the bare method name matched any same-named method -in the corpus, so `_server.Save()` silently mis-bound to an unrelated `Cache.Save()` -(a WRONG edge, not just a missing one). Resolution is by receiver type with the -single-definition god-node guard; an untypable receiver produces no edge. -""" from __future__ import annotations -import os +from collections import Counter from pathlib import Path from graphify.extract import extract -def _calls(tmp_path, files: dict[str, str]): - for name, body in files.items(): - p = tmp_path / name - p.parent.mkdir(parents=True, exist_ok=True) - p.write_text(body) - old = os.getcwd() - try: - os.chdir(tmp_path) - r = extract([Path(n) for n in files], cache_root=tmp_path / ".cache") - finally: - os.chdir(old) - calls = {(e["source"], e["target"]) for e in r["edges"] if e["relation"] == "calls"} - return calls, r - - -_AMBIG = { - "S.cs": ( - "public class Server { public bool Save() => true; }\n" - "public class Cache { public bool Save() => false; }\n" - "public class Repo {\n" - " private Server _server = new Server();\n" - " public bool Commit() { return _server.Save(); }\n" - "}\n" +def _write(path: Path, text: str) -> Path: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(text, encoding="utf-8") + return path + + +def _node_by_id(result: dict, nid: str) -> dict | None: + return next((n for n in result["nodes"] if n.get("id") == nid), None) + + +def _parse_csharp_root(source: bytes): + from tree_sitter import Language, Parser + import tree_sitter_c_sharp + + parser = Parser(Language(tree_sitter_c_sharp.language())) + return parser.parse(source).root_node + + +def _walk_tree(node): + yield node + for child in node.children: + yield from _walk_tree(child) + + +def _calls(result: dict, callee_label: str) -> list[dict]: + """Every `calls`/`references` edge whose target node has `callee_label`. + Method targets are labelled `.Name()`.""" + out = [] + for e in result["edges"]: + if e.get("relation") not in ("calls", "references"): + continue + tgt = _node_by_id(result, e.get("target")) + if tgt is not None and tgt.get("label") == callee_label: + out.append(e) + return out + + +def _method_owner(result: dict, method_nid: str) -> dict | None: + for e in result["edges"]: + if e.get("relation") == "method" and e.get("target") == method_nid: + return _node_by_id(result, e.get("source")) + return None + + +def _call_owner_labels(result: dict, callee_label: str) -> list[str | None]: + return [ + (_method_owner(result, e["target"]) or {}).get("label") + for e in _calls(result, callee_label) + ] + + +def test_same_file_bare_name_no_misbind(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Other { public void Bar() {} }\n" + "class C { public void M() { var b = new B(); b.Bar(); } }\n" + "class B { public void Run() {} } }\n") + result = extract([src], cache_root=tmp_path) + # b.Bar() must not misbind to same-file Other.Bar; b IS a B, so a `references`-to-B + # (method-not-found on the correct receiver type) is fine — only a `calls` misbind is wrong. + _assert_no_calls_edge_from(result, "C", ".M()") + + +def test_local_var_resolves_not_same_named(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N {\n" + "class A { public void Run() {} }\n" + "class B { public void Run() {} }\n" + "class C { public void M() { var a = new A(); a.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + hits = _calls(result, ".Run()") + assert len(hits) == 1, f"expected exactly one .Run() edge, got {hits}" + owner = _method_owner(result, hits[0]["target"]) + assert owner is not None and owner.get("label") == "A", owner + assert hits[0].get("confidence") == "INFERRED" + assert hits[0].get("confidence_score") == 0.8 + assert "metadata" not in hits[0] + + +def test_scope_keyed_no_contamination(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class A { public void Run() {} }\n" + "class B { public void Run() {} }\n" + "class C { void M1(){ var svc = new A(); svc.Run(); }\n" + "void M2(){ var svc = new B(); svc.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + assert Counter(_call_owner_labels(result, ".Run()")) == Counter({"A": 1, "B": 1}) + + +def test_redeclare_poison_skips(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class A { public void Run() {} }\n" + "class B { public void Run() {} }\n" + "class C { void M(){ var svc = new A(); var svc = new B(); svc.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_from(result, "C", ".M()") + + +def test_block_local_leak_skips(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class A { public void Run(){} } class B { public void Run(){} }\n" + "class C { A svc; void M(){ { B svc = new B(); } svc.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + # The inner block-local `B svc` is out of scope at svc.Run(); svc binds to the field + # `A svc`, so field-receiver inference resolves A.Run. The block-local must NOT leak to B.Run. + _edge_from_to_owner(result, "C", ".M()", "A", ".Run()") + assert "B" not in _call_owner_labels(result, ".Run()") + + +def test_call_position_ignores_future_decl(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Service { public void Run(){} }\n" + "class C { void M(){ svc.Run(); var svc = new Service(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_from(result, "C", ".M()") + + +def test_this_and_base_resolve(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Base { public virtual void OnInit(){} }\n" + "class C : Base { void Helper(){} void M(){ this.Helper(); base.OnInit(); } } }\n") + result = extract([src], cache_root=tmp_path) + helper = _calls(result, ".Helper()") + base = _calls(result, ".OnInit()") + assert _call_owner_labels(result, ".Helper()") == ["C"] + assert _call_owner_labels(result, ".OnInit()") == ["Base"] + assert helper[0].get("confidence") == "EXTRACTED" + assert base[0].get("confidence") == "EXTRACTED" + + +def test_unresolved_base_skips(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class C : ExternalBase { void M(){ base.OnInit(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_from(result, "C", ".M()") + + +def test_partial_class_aggregates(tmp_path: Path): + p1 = _write(tmp_path / "p1.cs", "namespace N { partial class P { public void A(){} } }\n") + p2 = _write(tmp_path / "p2.cs", + "namespace N { partial class P { public void B(){} }\n" + "class C { void M(){ var p = new P(); p.A(); } } }\n") + result = extract([p1, p2], cache_root=tmp_path) + assert _call_owner_labels(result, ".A()") == ["P"] + + +def test_qualified_local_decl_skips(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace A { class Bar { public void Run(){} } }\n" + "namespace B { class Bar { public void Run(){} } }\n" + "namespace Use { class C { void M(){ A.Bar f = new A.Bar(); f.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_from(result, "C", ".M()") + + +def test_generic_callee_normalizes(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class G { public void Foo(){} public void Fooint(){} }\n" + "class C { void M(){ var g = new G(); g.Foo(); } } }\n") + result = extract([src], cache_root=tmp_path) + assert _call_owner_labels(result, ".Foo()") == ["G"] + assert _calls(result, ".Fooint()") == [] + + +def test_underscore_method_names_distinct(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class S { public void Foo_Bar(){} public void FooBar(){} }\n" + "class C { void M(){ var s = new S(); s.Foo_Bar(); } } }\n") + result = extract([src], cache_root=tmp_path) + assert _call_owner_labels(result, ".Foo_Bar()") == ["S"] + assert _calls(result, ".FooBar()") == [] + + +def test_method_return_var_implicit_rhs_resolves(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Service { public void Run(){} }\n" + "class C { Service Get(){ return null; } void M(){ var svc = Get(); svc.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + edge = _edge_from_to_owner(result, "C", ".M()", "Service", ".Run()") + assert edge.get("confidence") == "INFERRED" + assert edge.get("confidence_score") == 0.8 + assert "metadata" not in edge + + +def _method_id(result: dict, owner_label: str, method_label: str) -> str: + for e in result["edges"]: + if e.get("relation") != "method": + continue + owner = _node_by_id(result, e.get("source")) + target = _node_by_id(result, e.get("target")) + if owner and target and owner.get("label") == owner_label and target.get("label") == method_label: + return e["target"] + raise AssertionError(f"missing method {owner_label}.{method_label}") + + +def _calls_from_method(result: dict, owner_label: str, method_label: str) -> list[dict]: + caller = _method_id(result, owner_label, method_label) + return [ + e for e in result["edges"] + if e.get("source") == caller and e.get("relation") in ("calls", "references") + ] + + +def _assert_no_call_from(result: dict, owner_label: str, method_label: str) -> None: + edges = _calls_from_method(result, owner_label, method_label) + assert edges == [], f"expected no member-call edge from {owner_label}.{method_label}, got {edges}" + + +def _assert_no_calls_edge_from(result: dict, owner_label: str, method_label: str) -> None: + # A member-call MISBIND is a `calls` edge. A `references` edge to the receiver's own + # type (the method-not-found fallback) or to a parameter type is correct, not a + # misbind, so this asserts only that no wrong `calls` resolution was emitted. + caller = _method_id(result, owner_label, method_label) + calls = [e for e in result["edges"] if e.get("source") == caller and e.get("relation") == "calls"] + assert calls == [], f"expected no calls edge from {owner_label}.{method_label}, got {calls}" + + +def _edge_from_to_owner(result: dict, caller_owner: str, caller_method: str, target_owner: str, target_method: str) -> dict: + target = _method_id(result, target_owner, target_method) + for e in _calls_from_method(result, caller_owner, caller_method): + if e.get("target") == target: + return e + raise AssertionError(f"missing call from {caller_owner}.{caller_method} to {target_owner}.{target_method}") + + +def test_static_receiver_resolves_with_metadata(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Logger { public static void Warn(){} }\n" + "class C { void M(){ Logger.Warn(); } } }\n") + result = extract([src], cache_root=tmp_path) + edge = _edge_from_to_owner(result, "C", ".M()", "Logger", ".Warn()") + assert edge.get("confidence") == "EXTRACTED" + assert edge.get("metadata", {}).get("csharp_static") is True + + +def test_static_value_shadow_local_param_field_property_event_skip(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Logger { public static void Warn(){} }\n" + "class Actual { public void Warn(){} }\n" + "class C { object Logger; event System.Action Ev; int Prop { get; }\n" + "void Local(){ Actual Logger = new Actual(); Logger.Warn(); }\n" + "void Param(Actual Logger){ Logger.Warn(); }\n" + "void Field(){ Logger.Warn(); }\n" + "void Property(){ Prop.ToString(); }\n" + "void Event(){ Ev(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_from(result, "C", ".Field()") + _assert_no_call_from(result, "C", ".Property()") + _assert_no_call_from(result, "C", ".Event()") + # Local() and Param() resolve Logger:Actual -> Actual.Warn. Field() has an + # object-typed member named Logger, which shadows the static Logger type but + # has no accepted bare declared receiver type. + assert _call_owner_labels(result, ".Warn()").count("Actual") == 2 + assert "Logger" not in _call_owner_labels(result, ".Warn()") + + +def test_static_namespace_method_typeparam_nested_using_static_alias_shadows_skip(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { namespace Logger { class X{} }\n" + "class Tools { public static void Logger(){} }\n" + "class Target { public static void Warn(){} }\n" + "using Logger = Target;\n" + "using static Tools;\n" + "class C { class Logger { public static void Warn(){} }\n" + "void M(){ void Logger(){} Logger.Warn(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_from(result, "C", ".M()") + + +def test_inherited_member_shadows_static_receiver(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Logger { public static void Warn(){} }\n" + "class Base { public int Logger; public void Other(){} public class Nested{} }\n" + "class Derived : Base { void M(){ Logger.Warn(); Other.Warn(); Nested.Warn(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_from(result, "Derived", ".M()") + + +def test_inherited_nested_type_shadow_static_receiver(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Logger { public static void Warn(){} }\n" + "class Base { public class Logger { public static void Warn(){} } }\n" + "class Derived : Base { void M(){ Logger.Warn(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_from(result, "Derived", ".M()") + + +def test_dotted_receiver_resolves_namespace_qualified_and_skips_nested_type_qualifier(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace Lib { class Logger { public static void Warn(){} } }\n" + "namespace Use { using Lib; class Outer { public class Inner { public static void Warn(){} }\n" + "void Good(){ Lib.Logger.Warn(); }\n" + "void Bad(){ Outer.Inner.Warn(); } } }\n") + result = extract([src], cache_root=tmp_path) + _edge_from_to_owner(result, "Outer", ".Good()", "Logger", ".Warn()") + _assert_no_call_from(result, "Outer", ".Bad()") + + +def test_dotted_receiver_resolves_namespace_alias(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace Lib { class Logger { public static void Warn(){} } }\n" + "namespace Use { using L = Lib; class C { void M(){ L.Logger.Warn(); } } }\n") + result = extract([src], cache_root=tmp_path) + _edge_from_to_owner(result, "C", ".M()", "Logger", ".Warn()") + + +def test_dotted_receiver_type_alias_takes_precedence_over_namespace_and_skips(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace B { class Logger { public static void Warn(){} } }\n" + "namespace Use { using B = X.Target; class C { void M(){ B.Logger.Warn(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_from(result, "C", ".M()") + + +def test_query_range_variable_shadows_static_receiver(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Logger { public static void Warn(){} }\n" + "class C { object[] xs; void M(){ var q = from Logger in xs select Logger.Warn(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_from(result, "C", ".M()") + + +def test_query_let_join_and_into_variables_shadows_static_receiver(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Logger { public static void Warn(){} }\n" + "class C { object[] xs; object[] ys;\n" + "void M(){ var q = from x in xs let Logger = x select Logger.Warn(); }\n" + "void J(){ var q = from x in xs join Logger in ys on x equals Logger into Logger select Logger.Warn(); }\n" + "void I(){ var q = from x in xs select x into Logger select Logger.Warn(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_from(result, "C", ".M()") + _assert_no_call_from(result, "C", ".J()") + _assert_no_call_from(result, "C", ".I()") + + +def test_nested_type_caller_inherited_member_shadows_static_receiver(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Logger { public static void Warn(){} }\n" + "class Base { public int Logger; }\n" + "class Outer { class Inner : Base { void M(){ Logger.Warn(); } } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_from(result, "Inner", ".M()") + + +def test_nested_type_base_id_collision_skips(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Logger { public static void Warn(){} }\n" + "class Base { }\n" + "class Outer { class Base { protected int Logger; }\n" + "class Inner : Base { void M(){ Logger.Warn(); } } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_from(result, "Inner", ".M()") + + +def test_global_using_static_shadows_across_files(tmp_path: Path): + g = _write(tmp_path / "global.cs", + "global using static N.Tools;\n" + "namespace N { class Tools { public static void Logger(){} } }\n") + u = _write(tmp_path / "user.cs", + "namespace N { class Logger { public static void Warn(){} }\n" + "class C { void M(){ Logger.Warn(); } } }\n") + result = extract([g, u], cache_root=tmp_path) + _assert_no_call_from(result, "C", ".M()") + + +def test_global_namespace_using_resolves_static_receiver_across_files(tmp_path: Path): + g = _write(tmp_path / "global.cs", + "global using Lib;\n" + "namespace Lib { class Logger { public static void Warn(){} } }\n") + u = _write(tmp_path / "user.cs", + "namespace Use { class C { void M(){ Logger.Warn(); } } }\n") + result = extract([g, u], cache_root=tmp_path) + _edge_from_to_owner(result, "C", ".M()", "Logger", ".Warn()") + + +def test_conditional_access_invocation_helper_and_resolution(tmp_path: Path): + from graphify.extractors import csharp_extract + + helper = getattr(csharp_extract, "csharp_invocation_callee", None) + assert callable(helper), "expected the C# invocation parser to be extracted" + + source = ( + b"namespace N { class Service { public void Run(){} } " + b"class C { void M(Service svc){ svc?.Run(); } } }" + ) + invocation = next(node for node in _walk_tree(_parse_csharp_root(source)) if node.type == "invocation_expression") + assert helper(invocation, source) == ("Run", True, "svc") + + src = _write(tmp_path / "s.cs", source.decode("utf-8")) + result = extract([src], cache_root=tmp_path) + _edge_from_to_owner(result, "C", ".M()", "Service", ".Run()") + + +def test_csharp_type_table_records_only_identifier_bare_types() -> None: + from graphify.extract import _build_csharp_type_table + + source = b"class Bar{} class C { void M(){ Bar f; A.Bar q; Bar[] arr; } }" + root = _parse_csharp_root(source) + table = _build_csharp_type_table(root, source) + entries = { + name: type_name + for scope_entries in table.values() + for name, type_name, _ in scope_entries + if name in {"f", "q", "arr"} + } + assert entries == {"f": "Bar", "q": None, "arr": None} + + +def test_csharp_type_table_matches_shadow_value_binders_minus_type_scoped_binders() -> None: + from graphify.extract import ( + _build_csharp_shadow_names, + _build_csharp_type_table, + _csharp_designator_names, + _csharp_names_from_variable_declaration, ) + + source = b""" +namespace N { +class Service { public void Run() {} } +record R(Service recordParam); +class Primary(Service classPrimary) {} +struct S(Service structPrimary) {} +class C { + Service fieldName; + event System.Action eventFieldName; + Service PropertyName { get; } + event System.Action EventName { add { } remove { } } + enum E { EnumValue } + Service[] xs; + object o; + object pair; + void M(Service paramName) { + Service localName = new Service(); + var objectCreated = new Service(); + var untypedLocal = fieldName; + foreach (Service foreachName in xs) { } + try { } catch (Service catchName) { } + if (o is Service patternName) { } + if (o is var varPatternName) { } + var (deconA, deconB) = pair; + Out(out Service outName); + Out(out var outVarName); + var q = from queryFrom in xs + let queryLet = queryFrom + join queryJoin in xs on queryFrom equals queryJoin into queryInto + select queryInto into queryContinuation + select queryContinuation; + System.Action anon = delegate(Service anonymousParam) { anonymousParam.ToString(); }; + xs.Select(implicitLambda => implicitLambda.ToString()); + xs.Select((lambdaA, lambdaB) => lambdaA.ToString()); + xs.Select((Service typedLambdaParam) => typedLambdaParam.ToString()); + void LocalFn(Service localFunctionParam) { localFunctionParam.ToString(); } + } + void Out(out Service value) { value = null; } + Service this[int indexerParam] { get { return fieldName; } } +} } +""" + root = _parse_csharp_root(source) + + shadow_values = { + name + for buckets in _build_csharp_shadow_names(root, source).values() + for name in buckets.get("values", []) + } + type_table_values = { + name + for entries in _build_csharp_type_table(root, source).values() + for name, _type_name, _decl_start in entries + } + + def text(node) -> str: + return source[node.start_byte:node.end_byte].decode("utf-8") + + def parameter_owner(node): + cur = node.parent + while cur is not None and cur.type in {"bracketed_parameter_list", "parameter_list"}: + cur = cur.parent + return cur + + type_scoped_values: set[str] = set() + + def walk(node) -> None: + if node.type == "parameter": + owner = parameter_owner(node) + if owner is not None and owner.type in { + "class_declaration", + "enum_declaration", + "interface_declaration", + "record_declaration", + "struct_declaration", + }: + type_scoped_values.update(_csharp_designator_names(node, source)) + elif node.type == "variable_declaration": + if node.parent is not None and node.parent.type in {"field_declaration", "event_field_declaration"}: + type_scoped_values.update(_csharp_names_from_variable_declaration(node, source)) + elif node.type in {"property_declaration", "event_declaration", "enum_member_declaration"}: + name_node = node.child_by_field_name("name") + if name_node is not None: + type_scoped_values.add(text(name_node)) + + for child in node.children: + walk(child) + + walk(root) + + assert type_table_values == shadow_values - type_scoped_values + + +def test_implicit_enclosing_and_base_methods_resolve(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Base { public void BaseRun(){} }\n" + "class C : Base { public void Run(){} void M(){ Run(); BaseRun(); } } }\n") + result = extract([src], cache_root=tmp_path) + _edge_from_to_owner(result, "C", ".M()", "C", ".Run()") + _edge_from_to_owner(result, "C", ".M()", "Base", ".BaseRun()") + + +def test_implicit_shadows_skip_delegate_param_local_static_local_using_static_and_field_event(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Tools { public static void FromStatic(){} }\n" + "using static Tools;\n" + "class C { System.Action Run; event System.Action Ev; public void RunMethod(){} public void FromStatic(){}\n" + "void LocalDelegate(){ System.Action RunMethod = null; RunMethod(); }\n" + "void ParamDelegate(System.Action RunMethod){ RunMethod(); }\n" + "void FieldDelegate(){ Run(); }\n" + "void EventDelegate(){ Ev(); }\n" + "void StaticLocal(){ static void RunMethod(){} RunMethod(); }\n" + "void LocalTypeParam(){ void Local(){ RunMethod(); } Local(); }\n" + "void UsingStatic(){ FromStatic(); } } }\n") + result = extract([src], cache_root=tmp_path) + # Each bare call is shadowed and must not resolve to a method (`calls`). ParamDelegate's + # `System.Action RunMethod` parameter yields a param-TYPE `references` edge (not a member + # call), so assert no wrong `calls` resolution rather than zero edges. + for method in (".LocalDelegate()", ".ParamDelegate()", ".FieldDelegate()", ".EventDelegate()", ".StaticLocal()", ".LocalTypeParam()", ".UsingStatic()"): + _assert_no_calls_edge_from(result, "C", method) + + +def test_partial_class_base_aggregation_for_implicit_base_method(tmp_path: Path): + p1 = _write(tmp_path / "p1.cs", "namespace N { class Base { public void BaseRun(){} } partial class P : Base { } }\n") + p2 = _write(tmp_path / "p2.cs", "namespace N { partial class P { void M(){ BaseRun(); } } }\n") + result = extract([p1, p2], cache_root=tmp_path) + _edge_from_to_owner(result, "P", ".M()", "Base", ".BaseRun()") + + +def test_implicit_wrong_bare_name_regression_skips_unrelated_same_file_method(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class B { public void Run(){} }\n" + "class C { void M(){ Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_from(result, "C", ".M()") + + +def test_implicit_generic_wrong_bare_name_regression_skips_unrelated_same_file_method(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class B { public void Run(){} }\n" + "class C { void M(){ Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_from(result, "C", ".M()") + + +def test_typed_foreach_catch_pattern_locals_resolve(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Actual { public void Run() {} }\n" + "class C { Actual[] xs; object o;\n" + "void Foreach(){ foreach (Actual item in xs) { item.Run(); } }\n" + "void Catch(){ try { } catch (Actual caught) { caught.Run(); } }\n" + "void Pattern(){ if (o is Actual pat) { pat.Run(); } } } }\n") + result = extract([src], cache_root=tmp_path) + assert Counter(_call_owner_labels(result, ".Run()")) == Counter({"Actual": 3}) + + +def test_declared_parameter_receiver_resolves(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Service { public void Run() {} }\n" + "class C { void M(Service svc){ svc.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + edge = _edge_from_to_owner(result, "C", ".M()", "Service", ".Run()") + assert edge.get("confidence") == "INFERRED" + assert edge.get("confidence_score") == 0.8 + assert "metadata" not in edge + + +def test_field_property_and_this_member_receivers_resolve(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Service { public void Run() {} }\n" + "class C { Service field; Service Prop { get; }\n" + "void BareField(){ field.Run(); }\n" + "void ThisField(){ this.field.Run(); }\n" + "void BareProp(){ Prop.Run(); }\n" + "void ThisProp(){ this.Prop.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + for method in (".BareField()", ".ThisField()", ".BareProp()", ".ThisProp()"): + edge = _edge_from_to_owner(result, "C", method, "Service", ".Run()") + assert edge.get("confidence") == "INFERRED" + assert edge.get("confidence_score") == 0.8 + + +def test_local_param_shadow_field_and_untyped_local_poisons(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class FieldSvc { public void Run() {} }\n" + "class LocalSvc { public void Run() {} }\n" + "class ParamSvc { public void Run() {} }\n" + "class C { FieldSvc svc; FieldSvc field;\n" + "void Local(){ LocalSvc svc = new LocalSvc(); svc.Run(); }\n" + "void Param(ParamSvc svc){ svc.Run(); }\n" + "void Untyped(){ var svc = field; svc.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _edge_from_to_owner(result, "C", ".Local()", "LocalSvc", ".Run()") + _edge_from_to_owner(result, "C", ".Param()", "ParamSvc", ".Run()") + _assert_no_calls_edge_from(result, "C", ".Untyped()") + + +def test_query_lambda_and_anonymous_method_binders_shadow_field(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class FieldSvc { public void Run() {} }\n" + "class OtherSvc { public void Run() {} }\n" + "class C { FieldSvc svc; object[] xs;\n" + "void Query(){ var q = from svc in xs select svc.Run(); }\n" + "void Lambda(){ xs.Select(svc => svc.Run()); }\n" + "void Anonymous(){ System.Action d = delegate(OtherSvc svc){ svc.Run(); }; } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_calls_edge_from(result, "C", ".Query()") + _assert_no_calls_edge_from(result, "C", ".Lambda()") + _edge_from_to_owner(result, "C", ".Anonymous()", "OtherSvc", ".Run()") + assert "FieldSvc" not in _call_owner_labels(result, ".Run()") + + +def test_type_parameter_declared_receiver_skips(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class T { public void Run() {} }\n" + "class C { void Param(T svc){ svc.Run(); }\n" + "void Local(){ T svc; svc.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_calls_edge_from(result, "C", ".Param()") + _assert_no_calls_edge_from(result, "C", ".Local()") + + +def test_visible_nested_declared_receiver_skips_top_level_same_name(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Logger { public void Run(){} }\n" + "class Outer { class Logger { public void Run(){} }\n" + "void M(Logger logger){ logger.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_calls_edge_from(result, "Outer", ".M()") + + +def test_instance_receiver_walks_base_chain_for_inherited_method(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Base { public void Run(){} }\n" + "class Derived : Base { }\n" + "class C { void M(Derived d){ d.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _edge_from_to_owner(result, "C", ".M()", "Base", ".Run()") + + +def test_static_and_dotted_type_receivers_do_not_walk_instance_base_chain(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "using Lib;\n" + "namespace Lib { class Base { public void Run(){} } class Derived : Base { } }\n" + "namespace Use { class C { void Bare(){ Derived.Run(); }\n" + "void Dotted(){ Lib.Derived.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_calls_edge_from(result, "C", ".Bare()") + _assert_no_calls_edge_from(result, "C", ".Dotted()") + + +def test_direct_first_skips_unknown_base_without_direct_and_keeps_direct_with_external_base(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Mid : ExternalBase { public void Run(){} }\n" + "class Derived : Mid { }\n" + "class Direct : ExternalBase { public void Run(){} }\n" + "class C { void Shadowed(Derived svc){ svc.Run(); }\n" + "void DirectHit(Direct svc){ svc.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_calls_edge_from(result, "C", ".Shadowed()") + _edge_from_to_owner(result, "C", ".DirectHit()", "Direct", ".Run()") + + +def test_positional_record_parameter_receiver_resolves_as_member(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Service { public void Run(){} }\n" + "record R(Service svc) { void Bare(){ svc.Run(); }\n" + "void This(){ this.svc.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _edge_from_to_owner(result, "R", ".Bare()", "Service", ".Run()") + _edge_from_to_owner(result, "R", ".This()", "Service", ".Run()") + + +def test_record_primary_constructor_base_type_emits_inherits_edge(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Logger {} record Base(Logger Logger);\n" + "record Derived(Logger Other) : Base(Other) { } }\n") + result = extract([src], cache_root=tmp_path) + inherits = { + ((_node_by_id(result, edge.get("source")) or {}).get("label"), + (_node_by_id(result, edge.get("target")) or {}).get("label")) + for edge in result["edges"] + if edge.get("relation") == "inherits" + } + assert ("Derived", "Base") in inherits + + +def test_inherited_record_positional_param_shadows_static_receiver(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Logger { public static void Warn(){} }\n" + "record Base(Logger Logger);\n" + "record Derived(Logger Other) : Base(Other) { void M(){ Logger.Warn(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_calls_edge_from(result, "Derived", ".M()") + + +def test_class_and_struct_primary_constructor_params_do_not_resolve_as_members(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Service { public void Run(){} }\n" + "class C(Service svc) { void M(){ svc.Run(); } }\n" + "struct S(Service svc) { void M(){ svc.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_calls_edge_from(result, "C", ".M()") + _assert_no_calls_edge_from(result, "S", ".M()") + + +def test_partial_sibling_member_type_alias_does_not_resolve_at_other_part_call_site(tmp_path: Path): + a = _write(tmp_path / "a.cs", + "using S = N.Good;\n" + "namespace N { partial class C { S svc; }\n" + "class Good { public void Run(){} } }\n") + b = _write(tmp_path / "b.cs", + "using S = N.Bad;\n" + "namespace N { partial class C { void M(){ svc.Run(); } }\n" + "class Bad { public void Run(){} } }\n") + result = extract([a, b], cache_root=tmp_path) + _assert_no_calls_edge_from(result, "C", ".M()") + assert "Bad" not in _call_owner_labels(result, ".Run()") + assert "Good" not in _call_owner_labels(result, ".Run()") + + +def _method_id_with_owner_namespace(result: dict, owner_label: str, owner_namespace: str, method_label: str) -> str: + for e in result["edges"]: + if e.get("relation") != "method": + continue + owner = _node_by_id(result, e.get("source")) + target = _node_by_id(result, e.get("target")) + metadata = (owner or {}).get("metadata") or {} + if ( + owner + and target + and owner.get("label") == owner_label + and metadata.get("namespace", "") == owner_namespace + and target.get("label") == method_label + ): + return e["target"] + raise AssertionError(f"missing method {owner_namespace}.{owner_label}.{method_label}") + + +def _edge_from_to_owner_namespace( + result: dict, + caller_owner: str, + caller_method: str, + target_owner: str, + target_namespace: str, + target_method: str, +) -> dict: + target = _method_id_with_owner_namespace(result, target_owner, target_namespace, target_method) + for e in _calls_from_method(result, caller_owner, caller_method): + if e.get("target") == target: + return e + raise AssertionError( + f"missing call from {caller_owner}.{caller_method} to {target_namespace}.{target_owner}.{target_method}" + ) + + +def _assert_no_call_label_from(result: dict, owner_label: str, method_label: str, callee_label: str) -> None: + labels = [ + (_node_by_id(result, edge.get("target")) or {}).get("label") + for edge in _calls_from_method(result, owner_label, method_label) + if edge.get("relation") == "calls" + ] + assert callee_label not in labels, f"unexpected {callee_label} call from {owner_label}.{method_label}: {labels}" + + +def _assert_no_call_to_owner( + result: dict, + caller_owner: str, + caller_method: str, + target_owner: str, + target_method: str, +) -> None: + target = _method_id(result, target_owner, target_method) + unexpected = [ + edge + for edge in _calls_from_method(result, caller_owner, caller_method) + if edge.get("relation") == "calls" and edge.get("target") == target + ] + assert unexpected == [], ( + f"unexpected call from {caller_owner}.{caller_method} to {target_owner}.{target_method}: {unexpected}" + ) + + +def test_method_return_var_this_rhs_resolves(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class ThisSvc { public void Run(){} }\n" + "class C { ThisSvc Get(){ return null; } void M(){ var svc = this.Get(); svc.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + edge = _edge_from_to_owner(result, "C", ".M()", "ThisSvc", ".Run()") + assert edge.get("confidence") == "INFERRED" + assert edge.get("confidence_score") == 0.8 + + +def test_method_return_var_static_and_dotted_rhs_resolve(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace StaticNs { class StaticSvc { public void Run(){} }\n" + "class Factory { public static StaticSvc Get(){ return null; } } }\n" + "namespace DottedNs { class DottedSvc { public void Run(){} }\n" + "class Factory { public static DottedSvc Get(){ return null; } } }\n" + "namespace Use { using StaticNs; class C { void M(){ var a = Factory.Get(); a.Run();\n" + "var b = DottedNs.Factory.Get(); b.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + assert _edge_from_to_owner(result, "C", ".M()", "StaticSvc", ".Run()").get("confidence") == "INFERRED" + assert _edge_from_to_owner(result, "C", ".M()", "DottedSvc", ".Run()").get("confidence") == "INFERRED" + + +def test_method_return_var_resolves_return_type_in_callee_context(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace Lib { class Service { public void Run(){} }\n" + "class Factory { public static Service Get(){ return null; } } }\n" + "namespace App { using Lib; class Service { public void Run(){} }\n" + "class C { void M(){ var svc = Lib.Factory.Get(); svc.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + edge = _edge_from_to_owner_namespace(result, "C", ".M()", "Service", "Lib", ".Run()") + assert edge.get("confidence") == "INFERRED" + assert "App" not in [ + ((_method_owner(result, e["target"]) or {}).get("metadata") or {}).get("namespace") + for e in _calls_from_method(result, "C", ".M()") + if (_node_by_id(result, e.get("target")) or {}).get("label") == ".Run()" + ] + + +def test_method_return_var_overloaded_method_skips(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Service { public void Run(){} } class Other { public void Run(){} }\n" + "class C { Service Get(){ return null; } Other Get(int i){ return null; }\n" + "void M(){ var svc = Get(); svc.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_label_from(result, "C", ".M()", ".Run()") + + +def test_method_return_var_same_file_partial_overload_skips(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Service { public void Run(){} } class Other { public void Run(){} }\n" + "partial class C { Service Get(){ return null; } }\n" + "partial class C { Other Get(int i){ return null; } }\n" + "partial class C { void M(){ var svc = Get(); svc.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_label_from(result, "C", ".M()", ".Run()") + + +def test_method_return_var_non_bare_returns_skip(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "using System.Collections.Generic;\n" + "namespace N { class Service { public void Run(){} } class Box {}\n" + "class C {\n" + "Service[] ArrayGet(){ return null; } void ArrayCase(){ var svc = ArrayGet(); svc.Run(); }\n" + "Service? NullableGet(){ return null; } void NullableCase(){ var svc = NullableGet(); svc.Run(); }\n" + "N.Service QualifiedGet(){ return null; } void QualifiedCase(){ var svc = QualifiedGet(); svc.Run(); }\n" + "Box GenericGet(){ return null; } void GenericCase(){ var svc = GenericGet(); svc.Run(); }\n" + "void VoidGet(){ } void VoidCase(){ var svc = VoidGet(); svc.Run(); }\n" + "T TypeParamGet(){ return default; } void TypeParamCase(){ var svc = TypeParamGet(); svc.Run(); }\n" + "ref Service RefGet(){ throw null; } void RefCase(){ var svc = RefGet(); svc.Run(); }\n" + "(Service a, Service b) TupleGet(){ throw null; } void TupleCase(){ var svc = TupleGet(); svc.Run(); }\n" + "} }\n") + result = extract([src], cache_root=tmp_path) + for method in ( + ".ArrayCase()", ".NullableCase()", ".QualifiedCase()", ".GenericCase()", + ".VoidCase()", ".TypeParamCase()", ".RefCase()", ".TupleCase()", + ): + _assert_no_call_label_from(result, "C", method, ".Run()") + + +def test_method_return_var_reassignment_poison_skips(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Service { public void Run(){} } class Other { public void Run(){} }\n" + "class C { Service Get(){ return null; } Other Other(){ return null; }\n" + "void M(){ var svc = Get(); svc = Other(); svc.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_label_from(result, "C", ".M()", ".Run()") + + +def test_method_return_var_inner_declared_local_shadows_outer_inferred(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Service { public void Run(){} } class Other { public void Run(){} }\n" + "class C { Service Get(){ return null; }\n" + "void M(){ var svc = Get(); { Other svc; svc.Run(); } } } }\n") + result = extract([src], cache_root=tmp_path) + _edge_from_to_owner(result, "C", ".M()", "Other", ".Run()") + _assert_no_call_to_owner(result, "C", ".M()", "Service", ".Run()") + + +def test_method_return_var_transitive_inference_skips_second_var(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class NextSvc { public void Run(){} }\n" + "class Service { public NextSvc Next(){ return null; } public void Run(){} }\n" + "class C { Service Get(){ return null; }\n" + "void M(){ var a = Get(); var b = a.Next(); a.Run(); b.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _edge_from_to_owner(result, "C", ".M()", "Service", ".Run()") + run_owners = [ + (_method_owner(result, e["target"]) or {}).get("label") + for e in _calls_from_method(result, "C", ".M()") + if (_node_by_id(result, e.get("target")) or {}).get("label") == ".Run()" + ] + assert Counter(run_owners) == Counter({"Service": 1}) + + +def test_method_return_var_lowercase_instance_rhs_skips(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Service { public void Run(){} }\n" + "class Factory { public Service Get(){ return null; } }\n" + "class C { void M(){ var obj = new Factory(); var svc = obj.Get(); svc.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_label_from(result, "C", ".M()", ".Run()") -def _find(r, label, id_contains): - return next(n["id"] for n in r["nodes"] - if n["label"] == label and id_contains in n["id"]) - - -def test_field_receiver_resolves_to_declared_type_not_bare_match(tmp_path): - calls, r = _calls(tmp_path, _AMBIG) - commit = _find(r, ".Commit()", "commit") - server_save = _find(r, ".Save()", "server") - cache_save = _find(r, ".Save()", "cache") - assert (commit, server_save) in calls, "field.Method() must resolve to the field's type" - assert (commit, cache_save) not in calls, "must NOT mis-bind to an unrelated same-named method" - - -def test_parameter_receiver_resolves(tmp_path): - calls, r = _calls(tmp_path, { - "S.cs": ( - "public class Server { public bool Save() => true; }\n" - "public class Cache { public bool Save() => false; }\n" - "public class Svc { public static bool Copy(Server server) { return server.Save(); } }\n" - ) - }) - assert any("copy" in s and "server_save" in t for s, t in calls) - assert not any("copy" in s and "cache_save" in t for s, t in calls) - - -def test_local_var_receiver_resolves(tmp_path): - calls, r = _calls(tmp_path, { - "S.cs": ( - "public class Server { public bool Save() => true; }\n" - "public class R {\n" - " public bool A() { Server s = new Server(); return s.Save(); }\n" - " public bool B() { var v = new Server(); return v.Save(); }\n" - "}\n" - ) - }) - assert any("_r_a" in s and "server_save" in t for s, t in calls), "explicit-typed local" - assert any("_r_b" in s and "server_save" in t for s, t in calls), "var = new T() local" - - -def test_cross_file_receiver_resolves(tmp_path): - calls, r = _calls(tmp_path, { - "Server.cs": ( - "public class Server { public bool Save() => true; }\n" - "public class Cache { public bool Save() => false; }\n" - ), - "Repo.cs": ( - "public class Repo { private Server _s = new Server(); " - "public bool Commit() { return _s.Save(); } }\n" - ), - }) - assert any("commit" in s and "server_save" in t for s, t in calls) - assert not any("commit" in s and "cache_save" in t for s, t in calls) - - -def test_this_and_static_receivers(tmp_path): - calls, r = _calls(tmp_path, { - "S.cs": ( - "public class Util { public static int F() => 1; }\n" - "public class R {\n" - " public bool A() { return this.B(); }\n" - " public bool B() => true;\n" - " public int G() { return Util.F(); }\n" - "}\n" - ) - }) - assert any("_r_a" in s and "_r_b" in t for s, t in calls), "this.B() -> R.B" - assert any("_r_g" in s and "util_f" in t for s, t in calls), "Util.F() -> Util.F" - - -def test_untyped_receiver_emits_no_edge(tmp_path): - calls, r = _calls(tmp_path, { - "S.cs": ( - "public class Server { public bool Save() => true; }\n" - "public class R { public bool C(dynamic x) { return x.Save(); } }\n" - ) - }) - assert not any("save" in t.lower() for _s, t in calls), "dynamic receiver must not resolve" - - -def test_method_absent_on_type_emits_no_edge(tmp_path): - calls, r = _calls(tmp_path, { - "S.cs": ( - "public class Server { public bool Save() => true; }\n" - "public class R { private Server _s = new Server(); " - "public bool C() { return _s.Missing(); } }\n" - ) - }) - assert not any("_r_c" in s and "save" in t.lower() for s, t in calls) - - -def test_unqualified_call_still_resolves(tmp_path): - calls, r = _calls(tmp_path, { - "S.cs": ( - "public class R { public bool A() { Helper(); return true; } " - "private void Helper() {} }\n" - ) - }) - assert any("_r_a" in s and "helper" in t for s, t in calls), "no regression on unqualified calls" +def test_method_return_var_property_and_field_rhs_skip(tmp_path: Path): + src = _write(tmp_path / "s.cs", + "namespace N { class Service { public void Run(){} }\n" + "class C { Service field; Service Prop { get; }\n" + "void M(){ var a = field; a.Run(); var b = Prop; b.Run(); } } }\n") + result = extract([src], cache_root=tmp_path) + _assert_no_call_label_from(result, "C", ".M()", ".Run()") diff --git a/tests/test_csharp_type_resolution.py b/tests/test_csharp_type_resolution.py index 694a491d2..69ddc499c 100644 --- a/tests/test_csharp_type_resolution.py +++ b/tests/test_csharp_type_resolution.py @@ -15,6 +15,32 @@ def _node_by_id(result: dict, nid: str) -> dict | None: return next((n for n in result["nodes"] if n.get("id") == nid), None) +def _parse_csharp_root(source: bytes): + from tree_sitter import Language, Parser + import tree_sitter_c_sharp + + parser = Parser(Language(tree_sitter_c_sharp.language())) + return parser.parse(source).root_node + + +def _walk_tree(node): + yield node + for child in node.children: + yield from _walk_tree(child) + + +def _edge_labels(result: dict, relation: str, context: str | None = None) -> set[tuple[str, str]]: + labels = {node["id"]: node["label"] for node in result["nodes"]} + pairs = set() + for edge in result["edges"]: + if edge.get("relation") != relation: + continue + if context is not None and edge.get("context") != context: + continue + pairs.add((labels.get(edge["source"], edge["source"]), labels.get(edge["target"], edge["target"]))) + return pairs + + def _targets(result: dict, relation: str, label: str) -> list[dict]: out = [] for e in result["edges"]: @@ -226,6 +252,55 @@ def test_csharp_import_edges_carry_using_kind(tmp_path: Path): assert ("alias", "Game.Core.Damage", "X") in imports, imports +def test_csharp_file_facts_helper_and_extern_alias_import(tmp_path: Path): + from graphify.extractors import csharp_extract + + helper = getattr(csharp_extract, "csharp_file_facts", None) + assert callable(helper), "expected the C# file fact assembly helper to be extracted" + + source = ( + b"extern alias Legacy;\n" + b"class Foo { public void Bar() {} }\n" + b"class Z {\n" + b" void M() {\n" + b" Foo x = new Foo();\n" + b" x.Bar();\n" + b" }\n" + b"}\n" + ) + facts = helper(_parse_csharp_root(source), source, "a.cs") + assert set(facts) == {"csharp_type_table", "csharp_shadow_names", "csharp_var_call_inits"} + assert facts["csharp_type_table"]["path"] == "a.cs" + assert facts["csharp_shadow_names"]["path"] == "a.cs" + assert facts["csharp_var_call_inits"]["path"] == "a.cs" + assert facts["csharp_var_call_inits"]["inits"] == [] + assert facts["csharp_var_call_inits"]["poisoned"] == [] + # The helper must carry the real per-file binding SCOPES, not just the keys: + # the typed local `Foo x` must land in the type table and the shadow values, + # so a helper returning empty/wrong scopes would fail here. + tt_bindings = { + (name, typ) + for entries in facts["csharp_type_table"]["scopes"].values() + for (name, typ, _byte) in entries + } + assert ("x", "Foo") in tt_bindings, tt_bindings + shadow_values = { + value + for bucket in facts["csharp_shadow_names"]["scopes"].values() + for value in bucket.get("values", []) + } + assert "x" in shadow_values, shadow_values + + f = _write(tmp_path / "a.cs", source.decode("utf-8")) + result = extract([f], cache_root=tmp_path) + imports = { + (e["metadata"].get("using_kind"), e["metadata"].get("target_fqn"), e["metadata"].get("alias")) + for e in result["edges"] + if e.get("relation") == "imports" and e.get("metadata") + } + assert ("extern_alias", "Legacy", "Legacy") in imports, imports + + def test_csharp_import_edges_resolve_internal_namespace_and_alias(tmp_path: Path): core = _write( tmp_path / "core.cs", @@ -274,6 +349,31 @@ def test_csharp_import_edges_resolve_internal_namespace_and_alias(tmp_path: Path ] +def test_csharp_generic_base_list_helper_and_generic_arg_reference(tmp_path: Path): + from graphify.extractors import csharp_extract + + helper = getattr(csharp_extract, "csharp_base_list_facts", None) + assert callable(helper), "expected the C# base-list parser to be extracted" + + source = ( + b"namespace N { class Dep {} class Base {} " + b"class Derived : Base {} }" + ) + derived = next( + node for node in _walk_tree(_parse_csharp_root(source)) + if node.type == "class_declaration" + and any(child.type == "identifier" and source[child.start_byte:child.end_byte] == b"Derived" for child in node.children) + ) + assert helper(derived, source, set(), frozenset()) == [ + ("Base", False, "", "inherits", [("Dep", False, "")]) + ] + + f = _write(tmp_path / "a.cs", source.decode("utf-8")) + result = extract([f], cache_root=tmp_path) + assert ("Derived", "Base") in _edge_labels(result, "inherits") + assert ("Derived", "Dep") in _edge_labels(result, "references", "generic_arg") + + def test_csharp_qualified_base_ref_is_flagged(tmp_path: Path): f = _write(tmp_path / "a.cs", "namespace N { class T {} class Use : B.T {} }\n") result = extract([f], cache_root=tmp_path) diff --git a/uv.lock b/uv.lock index fcd87008b..c28eebf68 100644 --- a/uv.lock +++ b/uv.lock @@ -1090,7 +1090,7 @@ wheels = [ [[package]] name = "graphifyy" -version = "0.9.3" +version = "0.9.5" source = { editable = "." } dependencies = [ { name = "networkx", version = "3.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },