|
| 1 | +// Command buildwordlist harvests human-readable EVR symbol names from |
| 2 | +// evr-reconstruction data sources and writes a deduplicated wordlist |
| 3 | +// suitable for use with symhash -wordlist. |
| 4 | +// |
| 5 | +// Sources harvested: |
| 6 | +// - symbol_table.txt (51K+ names from rad_archive_tool) |
| 7 | +// - multiplayer JSON files (cosmetics, items, models) |
| 8 | +// - SNS message catalog YAML (network message type names) |
| 9 | +// - Known tint names |
| 10 | +// |
| 11 | +// Usage: |
| 12 | +// |
| 13 | +// buildwordlist -src ~/src/evr-reconstruction -out names.txt |
| 14 | +// buildwordlist -src ~/src/evr-reconstruction -out names.txt -verify ./extracted |
| 15 | +package main |
| 16 | + |
| 17 | +import ( |
| 18 | + "bufio" |
| 19 | + "encoding/json" |
| 20 | + "flag" |
| 21 | + "fmt" |
| 22 | + "os" |
| 23 | + "path/filepath" |
| 24 | + "regexp" |
| 25 | + "sort" |
| 26 | + "strings" |
| 27 | + |
| 28 | + "github.com/EchoTools/evrFileTools/pkg/hash" |
| 29 | + "github.com/EchoTools/evrFileTools/pkg/tint" |
| 30 | +) |
| 31 | + |
| 32 | +var ( |
| 33 | + srcDir string |
| 34 | + outFile string |
| 35 | + verifyDir string |
| 36 | + statsOnly bool |
| 37 | +) |
| 38 | + |
| 39 | +func init() { |
| 40 | + flag.StringVar(&srcDir, "src", "", "Path to evr-reconstruction repository") |
| 41 | + flag.StringVar(&outFile, "out", "evr-names.txt", "Output wordlist file path") |
| 42 | + flag.StringVar(&verifyDir, "verify", "", "Extracted package dir to verify hashes against file names") |
| 43 | + flag.BoolVar(&statsOnly, "stats", false, "Print harvest stats without writing output file") |
| 44 | +} |
| 45 | + |
| 46 | +func main() { |
| 47 | + flag.Parse() |
| 48 | + |
| 49 | + if srcDir == "" { |
| 50 | + fmt.Fprintf(os.Stderr, "Usage: buildwordlist -src <evr-reconstruction dir> [-out names.txt]\n") |
| 51 | + flag.PrintDefaults() |
| 52 | + os.Exit(1) |
| 53 | + } |
| 54 | + |
| 55 | + if err := run(); err != nil { |
| 56 | + fmt.Fprintf(os.Stderr, "Error: %v\n", err) |
| 57 | + os.Exit(1) |
| 58 | + } |
| 59 | +} |
| 60 | + |
| 61 | +func run() error { |
| 62 | + names := make(map[string]struct{}) |
| 63 | + |
| 64 | + // 1. Symbol table (largest source: ~51K names) |
| 65 | + symbolTablePath := filepath.Join(srcDir, "tools", "rad_archive_tool", "build", "test_extract", ".rad_manifest", "symbol_table.txt") |
| 66 | + n, err := harvestSymbolTable(names, symbolTablePath) |
| 67 | + if err != nil { |
| 68 | + fmt.Fprintf(os.Stderr, "Warning: symbol table not found (%v)\n", err) |
| 69 | + } else { |
| 70 | + fmt.Printf("symbol_table.txt: %6d names\n", n) |
| 71 | + } |
| 72 | + |
| 73 | + // 2. Multiplayer JSON files |
| 74 | + multiplayerDir := filepath.Join(srcDir, "cache", "extracted", "apk_extracted", "assets", "sourcedb", "rad15", "json", "r14", "multiplayer") |
| 75 | + jsonFiles := []string{ |
| 76 | + "item_assignment.json", |
| 77 | + "customization_models.json", |
| 78 | + "equip_slots.json", |
| 79 | + "item_progression.json", |
| 80 | + "player_rewards.json", |
| 81 | + } |
| 82 | + for _, f := range jsonFiles { |
| 83 | + path := filepath.Join(multiplayerDir, f) |
| 84 | + n, err := harvestJSONStrings(names, path) |
| 85 | + if err != nil { |
| 86 | + fmt.Fprintf(os.Stderr, "Warning: %s not found (%v)\n", f, err) |
| 87 | + continue |
| 88 | + } |
| 89 | + fmt.Printf("%-26s %6d names\n", f+":", n) |
| 90 | + } |
| 91 | + |
| 92 | + // 3. SNS message catalog YAML |
| 93 | + msgCatalogPath := filepath.Join(srcDir, "docs", "kb", "message_catalog.yaml") |
| 94 | + n, err = harvestYAMLNames(names, msgCatalogPath) |
| 95 | + if err != nil { |
| 96 | + fmt.Fprintf(os.Stderr, "Warning: message_catalog.yaml not found (%v)\n", err) |
| 97 | + } else { |
| 98 | + fmt.Printf("message_catalog.yaml: %6d names\n", n) |
| 99 | + } |
| 100 | + |
| 101 | + // 4. Known tints from pkg/tint |
| 102 | + n = harvestKnownTints(names) |
| 103 | + fmt.Printf("known tints (built-in): %6d names\n", n) |
| 104 | + |
| 105 | + fmt.Printf("─────────────────────────────────────\n") |
| 106 | + fmt.Printf("Total unique names: %6d\n", len(names)) |
| 107 | + |
| 108 | + if statsOnly { |
| 109 | + return nil |
| 110 | + } |
| 111 | + |
| 112 | + // Write output |
| 113 | + sorted := make([]string, 0, len(names)) |
| 114 | + for name := range names { |
| 115 | + sorted = append(sorted, name) |
| 116 | + } |
| 117 | + sort.Strings(sorted) |
| 118 | + |
| 119 | + f, err := os.Create(outFile) |
| 120 | + if err != nil { |
| 121 | + return fmt.Errorf("create output file: %w", err) |
| 122 | + } |
| 123 | + defer f.Close() |
| 124 | + |
| 125 | + w := bufio.NewWriter(f) |
| 126 | + fmt.Fprintf(w, "# EVR symbol name wordlist\n") |
| 127 | + fmt.Fprintf(w, "# Generated by buildwordlist from evr-reconstruction\n") |
| 128 | + fmt.Fprintf(w, "# %d unique names\n", len(sorted)) |
| 129 | + fmt.Fprintf(w, "#\n") |
| 130 | + for _, name := range sorted { |
| 131 | + fmt.Fprintln(w, name) |
| 132 | + } |
| 133 | + if err := w.Flush(); err != nil { |
| 134 | + return fmt.Errorf("write output: %w", err) |
| 135 | + } |
| 136 | + if err := f.Close(); err != nil { |
| 137 | + return fmt.Errorf("close output file: %w", err) |
| 138 | + } |
| 139 | + |
| 140 | + fmt.Printf("Written to: %s\n", outFile) |
| 141 | + |
| 142 | + // Optional: verify hashes against extracted file names |
| 143 | + if verifyDir != "" { |
| 144 | + return verify(sorted, verifyDir) |
| 145 | + } |
| 146 | + |
| 147 | + return nil |
| 148 | +} |
| 149 | + |
| 150 | +// harvestSymbolTable reads hash<space>name lines from a rad_archive_tool symbol table. |
| 151 | +func harvestSymbolTable(names map[string]struct{}, path string) (int, error) { |
| 152 | + f, err := os.Open(path) |
| 153 | + if err != nil { |
| 154 | + return 0, err |
| 155 | + } |
| 156 | + defer f.Close() |
| 157 | + |
| 158 | + added := 0 |
| 159 | + scanner := bufio.NewScanner(f) |
| 160 | + scanner.Buffer(make([]byte, 1<<20), 1<<20) |
| 161 | + for scanner.Scan() { |
| 162 | + line := scanner.Text() |
| 163 | + if strings.HasPrefix(line, "#") || line == "" { |
| 164 | + continue |
| 165 | + } |
| 166 | + // Format: "<hex_hash> <name>" |
| 167 | + idx := strings.IndexByte(line, ' ') |
| 168 | + if idx < 0 { |
| 169 | + continue |
| 170 | + } |
| 171 | + name := strings.TrimSpace(line[idx+1:]) |
| 172 | + if name == "" { |
| 173 | + continue |
| 174 | + } |
| 175 | + if _, exists := names[name]; !exists { |
| 176 | + names[name] = struct{}{} |
| 177 | + added++ |
| 178 | + } |
| 179 | + } |
| 180 | + return added, scanner.Err() |
| 181 | +} |
| 182 | + |
| 183 | +// harvestJSONStrings extracts all string values from a JSON file that look like |
| 184 | +// EVR symbol names (contain only lowercase letters, digits, underscores, dots, slashes). |
| 185 | +var symbolPattern = regexp.MustCompile(`^[a-z][a-z0-9_./:*@-]{1,127}$`) |
| 186 | + |
| 187 | +// trailingCommaRe matches trailing commas before ] or } (game JSON has these). |
| 188 | +var trailingCommaRe = regexp.MustCompile(`,\s*([\]}])`) |
| 189 | + |
| 190 | +func harvestJSONStrings(names map[string]struct{}, path string) (int, error) { |
| 191 | + data, err := os.ReadFile(path) |
| 192 | + if err != nil { |
| 193 | + return 0, err |
| 194 | + } |
| 195 | + |
| 196 | + // Strip trailing commas before ] or } (common in game asset JSON) |
| 197 | + cleaned := trailingCommaRe.ReplaceAll(data, []byte("$1")) |
| 198 | + |
| 199 | + // Unmarshal into generic interface and walk all string values |
| 200 | + var raw interface{} |
| 201 | + if err := json.Unmarshal(cleaned, &raw); err != nil { |
| 202 | + return 0, fmt.Errorf("parse JSON: %w", err) |
| 203 | + } |
| 204 | + |
| 205 | + added := 0 |
| 206 | + var walk func(v interface{}) |
| 207 | + walk = func(v interface{}) { |
| 208 | + switch val := v.(type) { |
| 209 | + case string: |
| 210 | + if symbolPattern.MatchString(val) { |
| 211 | + if _, exists := names[val]; !exists { |
| 212 | + names[val] = struct{}{} |
| 213 | + added++ |
| 214 | + } |
| 215 | + } |
| 216 | + case []interface{}: |
| 217 | + for _, item := range val { |
| 218 | + walk(item) |
| 219 | + } |
| 220 | + case map[string]interface{}: |
| 221 | + for k, item := range val { |
| 222 | + // Also try keys |
| 223 | + if symbolPattern.MatchString(k) { |
| 224 | + if _, exists := names[k]; !exists { |
| 225 | + names[k] = struct{}{} |
| 226 | + added++ |
| 227 | + } |
| 228 | + } |
| 229 | + walk(item) |
| 230 | + } |
| 231 | + } |
| 232 | + } |
| 233 | + walk(raw) |
| 234 | + |
| 235 | + return added, nil |
| 236 | +} |
| 237 | + |
| 238 | +// harvestYAMLNames extracts quoted names from a YAML file using a simple regex. |
| 239 | +// Looks for: name: "SomeName" patterns. |
| 240 | +var yamlNameRe = regexp.MustCompile(`name:\s+"([^"]+)"`) |
| 241 | + |
| 242 | +func harvestYAMLNames(names map[string]struct{}, path string) (int, error) { |
| 243 | + data, err := os.ReadFile(path) |
| 244 | + if err != nil { |
| 245 | + return 0, err |
| 246 | + } |
| 247 | + |
| 248 | + added := 0 |
| 249 | + for _, match := range yamlNameRe.FindAllSubmatch(data, -1) { |
| 250 | + name := string(match[1]) |
| 251 | + if _, exists := names[name]; !exists { |
| 252 | + names[name] = struct{}{} |
| 253 | + added++ |
| 254 | + } |
| 255 | + } |
| 256 | + return added, nil |
| 257 | +} |
| 258 | + |
| 259 | +// harvestKnownTints adds known tint names from pkg/tint. |
| 260 | +func harvestKnownTints(names map[string]struct{}) int { |
| 261 | + added := 0 |
| 262 | + for _, name := range tint.KnownTints { |
| 263 | + if _, exists := names[name]; !exists { |
| 264 | + names[name] = struct{}{} |
| 265 | + added++ |
| 266 | + } |
| 267 | + } |
| 268 | + return added |
| 269 | +} |
| 270 | + |
| 271 | +// verify checks how many names in the wordlist match file names in the extracted dir. |
| 272 | +// File names are CSymbol64 hashes in hex. A match means our hash algorithm is working. |
| 273 | +func verify(names []string, extractedDir string) error { |
| 274 | + fmt.Printf("\nVerifying against extracted files in %s...\n", extractedDir) |
| 275 | + |
| 276 | + // Build a set of all hex file names in the extracted dir (any depth) |
| 277 | + fileNames := make(map[uint64]struct{}) |
| 278 | + err := filepath.WalkDir(extractedDir, func(path string, d os.DirEntry, err error) error { |
| 279 | + if err != nil || d.IsDir() { |
| 280 | + return nil |
| 281 | + } |
| 282 | + base := filepath.Base(path) |
| 283 | + var v uint64 |
| 284 | + if _, err := fmt.Sscanf(base, "%x", &v); err == nil { |
| 285 | + fileNames[v] = struct{}{} |
| 286 | + } |
| 287 | + return nil |
| 288 | + }) |
| 289 | + if err != nil { |
| 290 | + return fmt.Errorf("walk extracted dir: %w", err) |
| 291 | + } |
| 292 | + |
| 293 | + fmt.Printf("Extracted files (unique hash IDs): %d\n", len(fileNames)) |
| 294 | + |
| 295 | + matches := 0 |
| 296 | + for _, name := range names { |
| 297 | + h := hash.CSymbol64Hash(name) |
| 298 | + if _, ok := fileNames[h]; ok { |
| 299 | + matches++ |
| 300 | + fmt.Printf(" MATCH 0x%016x %s\n", h, name) |
| 301 | + } |
| 302 | + } |
| 303 | + |
| 304 | + fmt.Printf("\nMatches: %d / %d names (%.1f%%)\n", matches, len(names), 100*float64(matches)/float64(len(names))) |
| 305 | + if matches == 0 { |
| 306 | + fmt.Println("NOTE: zero matches likely means the CSymbol64 lookup table needs correction.") |
| 307 | + fmt.Println(" Extract the 2048-byte table from 0x141ffc480 in echovr.exe and update pkg/hash.") |
| 308 | + } |
| 309 | + return nil |
| 310 | +} |
0 commit comments