Skip to content

Commit ffd570d

Browse files
thesprockeeclaude
andauthored
Add buildwordlist command to harvest 52K+ symbol names (#9)
* Add buildwordlist command to harvest 52K+ symbol names Collects human-readable EVR symbol names from evr-reconstruction data sources and writes a deduplicated wordlist for use with symhash -wordlist for reverse hash lookups. Sources: - symbol_table.txt: 51,338 names (replicated vars, game state, cosmetics) - customization_models.json: 526 backpack/chassis model names - multiplayer JSON files: item names, equip slots, reward names - message_catalog.yaml: 161 SNS message type names - pkg/tint KnownTints: known tint symbol names Also bundles pkg/hash and cmd/symhash as dependencies. Usage: buildwordlist -src ~/src/evr-reconstruction -out evr-names.txt buildwordlist -src ~/src/evr-reconstruction -out evr-names.txt -verify ./extracted Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix(buildwordlist): remove JSON comment stripping, validate algo flag, fix dead tests - Remove lineCommentRe regex that corrupted URLs in JSON string values - Validate -algo flag in symhash, error on unknown algorithm instead of silent fallthrough - Remove dead if-block with no assertion in TestSNSMessageHashStripsSPrefix - Add TODO comment to TestCSymbol64HashKnownVector for unverified known vector - Check f.Close() explicitly after w.Flush() instead of relying on defer Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 6377a58 commit ffd570d

3 files changed

Lines changed: 317 additions & 19 deletions

File tree

cmd/buildwordlist/main.go

Lines changed: 310 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,310 @@
1+
// Command buildwordlist harvests human-readable EVR symbol names from
2+
// evr-reconstruction data sources and writes a deduplicated wordlist
3+
// suitable for use with symhash -wordlist.
4+
//
5+
// Sources harvested:
6+
// - symbol_table.txt (51K+ names from rad_archive_tool)
7+
// - multiplayer JSON files (cosmetics, items, models)
8+
// - SNS message catalog YAML (network message type names)
9+
// - Known tint names
10+
//
11+
// Usage:
12+
//
13+
// buildwordlist -src ~/src/evr-reconstruction -out names.txt
14+
// buildwordlist -src ~/src/evr-reconstruction -out names.txt -verify ./extracted
15+
package main
16+
17+
import (
18+
"bufio"
19+
"encoding/json"
20+
"flag"
21+
"fmt"
22+
"os"
23+
"path/filepath"
24+
"regexp"
25+
"sort"
26+
"strings"
27+
28+
"github.com/EchoTools/evrFileTools/pkg/hash"
29+
"github.com/EchoTools/evrFileTools/pkg/tint"
30+
)
31+
32+
var (
33+
srcDir string
34+
outFile string
35+
verifyDir string
36+
statsOnly bool
37+
)
38+
39+
func init() {
40+
flag.StringVar(&srcDir, "src", "", "Path to evr-reconstruction repository")
41+
flag.StringVar(&outFile, "out", "evr-names.txt", "Output wordlist file path")
42+
flag.StringVar(&verifyDir, "verify", "", "Extracted package dir to verify hashes against file names")
43+
flag.BoolVar(&statsOnly, "stats", false, "Print harvest stats without writing output file")
44+
}
45+
46+
func main() {
47+
flag.Parse()
48+
49+
if srcDir == "" {
50+
fmt.Fprintf(os.Stderr, "Usage: buildwordlist -src <evr-reconstruction dir> [-out names.txt]\n")
51+
flag.PrintDefaults()
52+
os.Exit(1)
53+
}
54+
55+
if err := run(); err != nil {
56+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
57+
os.Exit(1)
58+
}
59+
}
60+
61+
func run() error {
62+
names := make(map[string]struct{})
63+
64+
// 1. Symbol table (largest source: ~51K names)
65+
symbolTablePath := filepath.Join(srcDir, "tools", "rad_archive_tool", "build", "test_extract", ".rad_manifest", "symbol_table.txt")
66+
n, err := harvestSymbolTable(names, symbolTablePath)
67+
if err != nil {
68+
fmt.Fprintf(os.Stderr, "Warning: symbol table not found (%v)\n", err)
69+
} else {
70+
fmt.Printf("symbol_table.txt: %6d names\n", n)
71+
}
72+
73+
// 2. Multiplayer JSON files
74+
multiplayerDir := filepath.Join(srcDir, "cache", "extracted", "apk_extracted", "assets", "sourcedb", "rad15", "json", "r14", "multiplayer")
75+
jsonFiles := []string{
76+
"item_assignment.json",
77+
"customization_models.json",
78+
"equip_slots.json",
79+
"item_progression.json",
80+
"player_rewards.json",
81+
}
82+
for _, f := range jsonFiles {
83+
path := filepath.Join(multiplayerDir, f)
84+
n, err := harvestJSONStrings(names, path)
85+
if err != nil {
86+
fmt.Fprintf(os.Stderr, "Warning: %s not found (%v)\n", f, err)
87+
continue
88+
}
89+
fmt.Printf("%-26s %6d names\n", f+":", n)
90+
}
91+
92+
// 3. SNS message catalog YAML
93+
msgCatalogPath := filepath.Join(srcDir, "docs", "kb", "message_catalog.yaml")
94+
n, err = harvestYAMLNames(names, msgCatalogPath)
95+
if err != nil {
96+
fmt.Fprintf(os.Stderr, "Warning: message_catalog.yaml not found (%v)\n", err)
97+
} else {
98+
fmt.Printf("message_catalog.yaml: %6d names\n", n)
99+
}
100+
101+
// 4. Known tints from pkg/tint
102+
n = harvestKnownTints(names)
103+
fmt.Printf("known tints (built-in): %6d names\n", n)
104+
105+
fmt.Printf("─────────────────────────────────────\n")
106+
fmt.Printf("Total unique names: %6d\n", len(names))
107+
108+
if statsOnly {
109+
return nil
110+
}
111+
112+
// Write output
113+
sorted := make([]string, 0, len(names))
114+
for name := range names {
115+
sorted = append(sorted, name)
116+
}
117+
sort.Strings(sorted)
118+
119+
f, err := os.Create(outFile)
120+
if err != nil {
121+
return fmt.Errorf("create output file: %w", err)
122+
}
123+
defer f.Close()
124+
125+
w := bufio.NewWriter(f)
126+
fmt.Fprintf(w, "# EVR symbol name wordlist\n")
127+
fmt.Fprintf(w, "# Generated by buildwordlist from evr-reconstruction\n")
128+
fmt.Fprintf(w, "# %d unique names\n", len(sorted))
129+
fmt.Fprintf(w, "#\n")
130+
for _, name := range sorted {
131+
fmt.Fprintln(w, name)
132+
}
133+
if err := w.Flush(); err != nil {
134+
return fmt.Errorf("write output: %w", err)
135+
}
136+
if err := f.Close(); err != nil {
137+
return fmt.Errorf("close output file: %w", err)
138+
}
139+
140+
fmt.Printf("Written to: %s\n", outFile)
141+
142+
// Optional: verify hashes against extracted file names
143+
if verifyDir != "" {
144+
return verify(sorted, verifyDir)
145+
}
146+
147+
return nil
148+
}
149+
150+
// harvestSymbolTable reads hash<space>name lines from a rad_archive_tool symbol table.
151+
func harvestSymbolTable(names map[string]struct{}, path string) (int, error) {
152+
f, err := os.Open(path)
153+
if err != nil {
154+
return 0, err
155+
}
156+
defer f.Close()
157+
158+
added := 0
159+
scanner := bufio.NewScanner(f)
160+
scanner.Buffer(make([]byte, 1<<20), 1<<20)
161+
for scanner.Scan() {
162+
line := scanner.Text()
163+
if strings.HasPrefix(line, "#") || line == "" {
164+
continue
165+
}
166+
// Format: "<hex_hash> <name>"
167+
idx := strings.IndexByte(line, ' ')
168+
if idx < 0 {
169+
continue
170+
}
171+
name := strings.TrimSpace(line[idx+1:])
172+
if name == "" {
173+
continue
174+
}
175+
if _, exists := names[name]; !exists {
176+
names[name] = struct{}{}
177+
added++
178+
}
179+
}
180+
return added, scanner.Err()
181+
}
182+
183+
// harvestJSONStrings extracts all string values from a JSON file that look like
184+
// EVR symbol names (contain only lowercase letters, digits, underscores, dots, slashes).
185+
var symbolPattern = regexp.MustCompile(`^[a-z][a-z0-9_./:*@-]{1,127}$`)
186+
187+
// trailingCommaRe matches trailing commas before ] or } (game JSON has these).
188+
var trailingCommaRe = regexp.MustCompile(`,\s*([\]}])`)
189+
190+
func harvestJSONStrings(names map[string]struct{}, path string) (int, error) {
191+
data, err := os.ReadFile(path)
192+
if err != nil {
193+
return 0, err
194+
}
195+
196+
// Strip trailing commas before ] or } (common in game asset JSON)
197+
cleaned := trailingCommaRe.ReplaceAll(data, []byte("$1"))
198+
199+
// Unmarshal into generic interface and walk all string values
200+
var raw interface{}
201+
if err := json.Unmarshal(cleaned, &raw); err != nil {
202+
return 0, fmt.Errorf("parse JSON: %w", err)
203+
}
204+
205+
added := 0
206+
var walk func(v interface{})
207+
walk = func(v interface{}) {
208+
switch val := v.(type) {
209+
case string:
210+
if symbolPattern.MatchString(val) {
211+
if _, exists := names[val]; !exists {
212+
names[val] = struct{}{}
213+
added++
214+
}
215+
}
216+
case []interface{}:
217+
for _, item := range val {
218+
walk(item)
219+
}
220+
case map[string]interface{}:
221+
for k, item := range val {
222+
// Also try keys
223+
if symbolPattern.MatchString(k) {
224+
if _, exists := names[k]; !exists {
225+
names[k] = struct{}{}
226+
added++
227+
}
228+
}
229+
walk(item)
230+
}
231+
}
232+
}
233+
walk(raw)
234+
235+
return added, nil
236+
}
237+
238+
// harvestYAMLNames extracts quoted names from a YAML file using a simple regex.
239+
// Looks for: name: "SomeName" patterns.
240+
var yamlNameRe = regexp.MustCompile(`name:\s+"([^"]+)"`)
241+
242+
func harvestYAMLNames(names map[string]struct{}, path string) (int, error) {
243+
data, err := os.ReadFile(path)
244+
if err != nil {
245+
return 0, err
246+
}
247+
248+
added := 0
249+
for _, match := range yamlNameRe.FindAllSubmatch(data, -1) {
250+
name := string(match[1])
251+
if _, exists := names[name]; !exists {
252+
names[name] = struct{}{}
253+
added++
254+
}
255+
}
256+
return added, nil
257+
}
258+
259+
// harvestKnownTints adds known tint names from pkg/tint.
260+
func harvestKnownTints(names map[string]struct{}) int {
261+
added := 0
262+
for _, name := range tint.KnownTints {
263+
if _, exists := names[name]; !exists {
264+
names[name] = struct{}{}
265+
added++
266+
}
267+
}
268+
return added
269+
}
270+
271+
// verify checks how many names in the wordlist match file names in the extracted dir.
272+
// File names are CSymbol64 hashes in hex. A match means our hash algorithm is working.
273+
func verify(names []string, extractedDir string) error {
274+
fmt.Printf("\nVerifying against extracted files in %s...\n", extractedDir)
275+
276+
// Build a set of all hex file names in the extracted dir (any depth)
277+
fileNames := make(map[uint64]struct{})
278+
err := filepath.WalkDir(extractedDir, func(path string, d os.DirEntry, err error) error {
279+
if err != nil || d.IsDir() {
280+
return nil
281+
}
282+
base := filepath.Base(path)
283+
var v uint64
284+
if _, err := fmt.Sscanf(base, "%x", &v); err == nil {
285+
fileNames[v] = struct{}{}
286+
}
287+
return nil
288+
})
289+
if err != nil {
290+
return fmt.Errorf("walk extracted dir: %w", err)
291+
}
292+
293+
fmt.Printf("Extracted files (unique hash IDs): %d\n", len(fileNames))
294+
295+
matches := 0
296+
for _, name := range names {
297+
h := hash.CSymbol64Hash(name)
298+
if _, ok := fileNames[h]; ok {
299+
matches++
300+
fmt.Printf(" MATCH 0x%016x %s\n", h, name)
301+
}
302+
}
303+
304+
fmt.Printf("\nMatches: %d / %d names (%.1f%%)\n", matches, len(names), 100*float64(matches)/float64(len(names)))
305+
if matches == 0 {
306+
fmt.Println("NOTE: zero matches likely means the CSymbol64 lookup table needs correction.")
307+
fmt.Println(" Extract the 2048-byte table from 0x141ffc480 in echovr.exe and update pkg/hash.")
308+
}
309+
return nil
310+
}

cmd/symhash/main.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,14 @@ func main() {
5151

5252
func computeHash(s string) uint64 {
5353
switch algo {
54+
case "symbol":
55+
return hash.CSymbol64Hash(s)
5456
case "sns":
5557
return hash.SNSMessageHash(s)
5658
default:
57-
return hash.CSymbol64Hash(s)
59+
fmt.Fprintf(os.Stderr, "Error: unknown algorithm %q (supported: symbol, sns)\n", algo)
60+
os.Exit(1)
61+
return 0
5862
}
5963
}
6064

pkg/hash/hash_test.go

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,6 @@ func TestSNSMessageHashStripsSPrefix(t *testing.T) {
1616
t.Errorf("hash collision: h1=0x%016x h2=0x%016x h3=0x%016x", h1, h2, h3)
1717
}
1818

19-
// Hashing with/without leading 'S' must differ
20-
withS := SNSMessageHash("SNSLobbySmiteEntrant")
21-
withoutS := SNSMessageHash("NSLobbySmiteEntrant") // already no leading S
22-
if withS != withoutS {
23-
// This is expected — SNSMessageHash strips the 'S', so both inputs produce the same
24-
// hash. Calling with "NSLobbySmiteEntrant" means no 'S' to strip, then it hashes
25-
// "SLobbySmiteEntrant"... wait, no. Let me think again.
26-
//
27-
// "SNSLobbySmiteEntrant" → strip 'S' → "NSLobbySmiteEntrant" → hash
28-
// "NSLobbySmiteEntrant" → strip 'N'... no, we only strip 'S'.
29-
// "NSLobbySmiteEntrant" starts with 'N', not 'S', so no strip.
30-
// → hash("NSLobbySmiteEntrant")
31-
// So they SHOULD be equal.
32-
}
33-
3419
// Verify strip behavior: SNSFoo → hash("NSFoo"); SFoo (no second S) → hash("Foo")
3520
hSNS := SNSMessageHash("SNSLobbySmiteEntrant") // strips 'S' → "NSLobbySmiteEntrant"
3621
hNS := SNSMessageHash("NSLobbySmiteEntrant") // starts with 'N', no strip → "NSLobbySmiteEntrant"
@@ -81,13 +66,12 @@ func TestCSymbol64HashDifferentStrings(t *testing.T) {
8166

8267
// TestCSymbol64HashKnownVector tests the game-extracted test vector.
8368
// Vector from docs/kb/csymbol64_hash_findings.md in evr-reconstruction.
84-
// NOTE: Skipped until lookup table polynomial is verified against game binary.
69+
// TODO: known vector is unverified — update want value once the 2048-byte
70+
// lookup table at 0x141ffc480 in echovr.exe is extracted and confirmed.
8571
func TestCSymbol64HashKnownVector(t *testing.T) {
8672
got := CSymbol64Hash("rwd_tint_0019")
8773
want := uint64(0x74d228d09dc5dd8f)
8874
if got != want {
89-
t.Logf("CSymbol64Hash(\"rwd_tint_0019\") = 0x%016x (expected 0x%016x)", got, want)
90-
t.Logf("NOTE: lookup table polynomial needs verification against game binary at 0x141ffc480")
9175
t.Skip("known vector unconfirmed - skipping until binary table is extracted")
9276
}
9377
}

0 commit comments

Comments
 (0)