Skip to content

Commit 0f0037d

Browse files
committed
style: Add macro preprocessing to fix tree-sitter parse errors
1 parent 5a76b38 commit 0f0037d

1 file changed

Lines changed: 30 additions & 1 deletion

File tree

scripts/cpp/convert_leading_spaces_to_tabs.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import argparse
1919
import glob
2020
import os
21+
import re
2122

2223
import tree_sitter_cpp as tscpp
2324
from tree_sitter import Language, Parser
@@ -26,6 +27,33 @@
2627
CPP_LANGUAGE = Language(tscpp.language())
2728

2829

30+
# Macros that confuse tree-sitter's C++ parser. These are expanded in the
31+
# source text before parsing (but not in the output). Replacements are
32+
# same-length to preserve byte offsets between the parsed and original text.
33+
MACRO_EXPANSIONS = [
34+
(re.compile(rb'\bCALLBACK\b'), b' '),
35+
(re.compile(rb'\bGCALL\b'), b' '),
36+
(re.compile(rb'\bWINAPI\b'), b' '),
37+
(re.compile(rb'\bIN\b'), b' '),
38+
(re.compile(rb'\bOUT\b'), b' '),
39+
(re.compile(rb'\bRO\b'), b' '),
40+
(re.compile(rb'\bW3DNEW\b'), b'new '),
41+
(re.compile(rb'\bNEW\b'), b'new'),
42+
(re.compile(rb'\b__RPC_FAR\b'), b' '),
43+
(re.compile(rb'\b__RPC_STUB\b'), b' '),
44+
(re.compile(rb'\b__asm\b'), b' '),
45+
(re.compile(rb'\b_asm\b'), b' '),
46+
]
47+
48+
49+
def preprocess_for_parsing(code_bytes):
50+
"""Apply macro expansions to help tree-sitter parse the code."""
51+
result = code_bytes
52+
for pattern, replacement in MACRO_EXPANSIONS:
53+
result = pattern.sub(replacement, result)
54+
return result
55+
56+
2957
# Node types that create an indentation level for their children.
3058
SCOPE_CREATING_TYPES = frozenset({
3159
'compound_statement',
@@ -116,7 +144,8 @@ def process_file(filepath, dry_run=False, verbose=False):
116144
lines = lines[:-1] # split adds an empty string after trailing \n
117145

118146
code_bytes = content.encode('utf-8')
119-
tree = parser.parse(code_bytes)
147+
parse_bytes = preprocess_for_parsing(code_bytes)
148+
tree = parser.parse(parse_bytes)
120149

121150
# If the file has excessive parse errors, the AST is unreliable - skip it.
122151
# Only count top-level errors (not deeply nested ones from macro expansions).

0 commit comments

Comments
 (0)