Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions graphify/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -15839,7 +15839,7 @@ def _is_cpp_header(path: Path) -> bool:

def _get_extractor(path: Path) -> Any | None:
"""Return the correct extractor function for a file, or None if unsupported."""
if path.name.endswith(".blade.php"):
if path.name.lower().endswith(".blade.php"):
return extract_blade
# MCP config files (.mcp.json, claude_desktop_config.json, ...) are routed
# by filename before generic .json dispatch so they get MCP-aware nodes
Expand All @@ -15855,14 +15855,17 @@ def _get_extractor(path: Path) -> Any | None:
# (the suffix map sends `.h` to extract_c, which can't read @interface etc.).
# ObjC sniffing has priority over the C++ sniff: an Objective-C++ header can
# contain both `@interface` and inline C++ (`::`), and it must parse as ObjC.
if path.suffix == ".h":
suffix = path.suffix
if suffix not in _DISPATCH and suffix.lower() in _DISPATCH:
suffix = suffix.lower()
if suffix == ".h":
if _is_objc_header(path):
return extract_objc
# A C++ class header routed to extract_c loses the class entirely (the C
# grammar has no class_specifier). Reroute to extract_cpp (#1547).
if _is_cpp_header(path):
return extract_cpp
return _DISPATCH.get(path.suffix)
return _DISPATCH.get(suffix)


def _safe_extract_with_xaml_root(extractor, path: Path, root: Path) -> dict:
Expand Down Expand Up @@ -16614,7 +16617,8 @@ def _ignored(p: Path) -> bool:
]
for fname in filenames:
p = dp / fname
if p.suffix in _EXTENSIONS and not _ignored(p) and _resolves_under_root(p, containment_root):
suffix = p.suffix
if (suffix in _EXTENSIONS or suffix.lower() in _EXTENSIONS) and not _ignored(p) and _resolves_under_root(p, containment_root):
results.append(p)
return sorted(results)
# Walk with symlink following + cycle detection
Expand All @@ -16634,7 +16638,8 @@ def _ignored(p: Path) -> bool:
]
for fname in filenames:
p = dp / fname
if p.suffix in _EXTENSIONS and not _ignored(p) and _resolves_under_root(p, containment_root):
suffix = p.suffix
if (suffix in _EXTENSIONS or suffix.lower() in _EXTENSIONS) and not _ignored(p) and _resolves_under_root(p, containment_root):
results.append(p)
return sorted(results)

Expand Down
28 changes: 27 additions & 1 deletion tests/test_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,8 @@ def _legacy_collect_files(target, *, root=None):
for ext in sorted(extensions):
results.extend(
p for p in target.rglob(f"*{ext}")
if not any(_is_noise_dir(part) for part in p.parts)
if p.suffix == ext
and not any(_is_noise_dir(part) for part in p.parts)
and not (patterns and _is_ignored(p, ignore_root, patterns))
)
return sorted(results)
Expand Down Expand Up @@ -1693,3 +1694,28 @@ def test_non_colliding_path_id_is_not_salted(tmp_path):
result = extract([p], cache_root=tmp_path)
file_id = next(n["id"] for n in result["nodes"] if n.get("source_location") == "L1")
assert file_id == make_id(_file_stem(Path("src/auth/session.py"))) == "src_auth_session"


def test_case_insensitive_suffix_filtering(tmp_path):
py_file = tmp_path / "app.PY"
js_file = tmp_path / "script.JS"
ts_file = tmp_path / "lib.Ts"

py_file.write_text("class MyPythonClass:\n pass\n")
js_file.write_text("function myJSFunction() {}\n")
ts_file.write_text("export class MyTSClass {}\n")

collected = collect_files(tmp_path)
collected_names = {f.name for f in collected}
assert "app.PY" in collected_names
assert "script.JS" in collected_names
assert "lib.Ts" in collected_names

result = extract(collected, cache_root=tmp_path)
nodes = result["nodes"]
labels = {n.get("label") for n in nodes if "label" in n}

assert "MyPythonClass" in labels
assert "myJSFunction()" in labels
assert "MyTSClass" in labels