Skip to content

Commit 6377a58

Browse files
thesprockeeclaude
andauthored
Add batchextract command for parallel package extraction (#6)
* Add batchextract command for parallel package extraction Extracts all packages from a _data directory in one command, using a configurable worker pool (default: runtime.NumCPU()). batchextract -data ./rad/_data -output ./extracted batchextract -data ./rad/_data -output ./extracted -workers 8 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> * fix(batchextract): remove unused filter flag, validate workers, handle empty manifests Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 89f00a9 commit 6377a58

1 file changed

Lines changed: 183 additions & 0 deletions

File tree

cmd/batchextract/main.go

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
// Command batchextract extracts all packages from an EVR data directory in parallel.
2+
//
3+
// Usage:
4+
//
5+
// batchextract -data ./ready-at-dawn/_data -output ./extracted
6+
// batchextract -data ./ready-at-dawn/_data -output ./extracted -workers 8
7+
package main
8+
9+
import (
10+
"flag"
11+
"fmt"
12+
"os"
13+
"path/filepath"
14+
"runtime"
15+
"sync"
16+
"sync/atomic"
17+
"time"
18+
19+
"github.com/EchoTools/evrFileTools/pkg/manifest"
20+
)
21+
22+
var (
23+
dataDir string
24+
outputDir string
25+
workers int
26+
verbose bool
27+
)
28+
29+
func init() {
30+
flag.StringVar(&dataDir, "data", "", "Path to _data directory containing manifests/ and packages/")
31+
flag.StringVar(&outputDir, "output", "", "Output directory for extracted files")
32+
flag.IntVar(&workers, "workers", runtime.NumCPU(), "Number of parallel extraction workers")
33+
flag.BoolVar(&verbose, "verbose", false, "Print each package as it is extracted")
34+
}
35+
36+
func main() {
37+
flag.Parse()
38+
39+
if dataDir == "" || outputDir == "" {
40+
fmt.Fprintf(os.Stderr, "Usage: batchextract -data <_data dir> -output <output dir>\n")
41+
flag.PrintDefaults()
42+
os.Exit(1)
43+
}
44+
45+
if workers <= 0 {
46+
workers = runtime.NumCPU()
47+
}
48+
49+
if err := run(); err != nil {
50+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
51+
os.Exit(1)
52+
}
53+
}
54+
55+
type result struct {
56+
name string
57+
files int
58+
err error
59+
}
60+
61+
func run() error {
62+
manifestDir := filepath.Join(dataDir, "manifests")
63+
entries, err := os.ReadDir(manifestDir)
64+
if err != nil {
65+
return fmt.Errorf("read manifests directory: %w", err)
66+
}
67+
68+
if len(entries) == 0 {
69+
return fmt.Errorf("no manifests found in %s", manifestDir)
70+
}
71+
72+
if err := os.MkdirAll(outputDir, 0755); err != nil {
73+
return fmt.Errorf("create output directory: %w", err)
74+
}
75+
76+
// Collect manifest names (skip directories)
77+
var names []string
78+
for _, e := range entries {
79+
if !e.IsDir() {
80+
names = append(names, e.Name())
81+
}
82+
}
83+
84+
if len(names) == 0 {
85+
return fmt.Errorf("no manifest files found in %s (directory contains only subdirectories)", manifestDir)
86+
}
87+
88+
fmt.Printf("Found %d manifests, extracting with %d workers...\n", len(names), workers)
89+
start := time.Now()
90+
91+
// Feed work through a channel
92+
work := make(chan string, len(names))
93+
for _, n := range names {
94+
work <- n
95+
}
96+
close(work)
97+
98+
results := make(chan result, len(names))
99+
var wg sync.WaitGroup
100+
101+
for range workers {
102+
wg.Add(1)
103+
go func() {
104+
defer wg.Done()
105+
for name := range work {
106+
files, err := extractOne(name)
107+
results <- result{name: name, files: files, err: err}
108+
}
109+
}()
110+
}
111+
112+
// Close results when all workers done
113+
go func() {
114+
wg.Wait()
115+
close(results)
116+
}()
117+
118+
var (
119+
done atomic.Int64
120+
totalFiles atomic.Int64
121+
errors []string
122+
)
123+
124+
total := int64(len(names))
125+
for r := range results {
126+
done.Add(1)
127+
if r.err != nil {
128+
errors = append(errors, fmt.Sprintf("%s: %v", r.name, r.err))
129+
if verbose {
130+
fmt.Printf("[%d/%d] FAILED %s: %v\n", done.Load(), total, r.name, r.err)
131+
} else {
132+
fmt.Printf("\r[%d/%d] extracting... ", done.Load(), total)
133+
}
134+
} else {
135+
totalFiles.Add(int64(r.files))
136+
if verbose {
137+
fmt.Printf("[%d/%d] %s (%d files)\n", done.Load(), total, r.name, r.files)
138+
} else {
139+
fmt.Printf("\r[%d/%d] extracting... ", done.Load(), total)
140+
}
141+
}
142+
}
143+
144+
elapsed := time.Since(start)
145+
fmt.Printf("\r%-40s\n", "") // clear progress line
146+
fmt.Printf("Extracted %d files from %d packages in %s\n", totalFiles.Load(), len(names)-len(errors), elapsed.Round(time.Millisecond))
147+
148+
if len(errors) > 0 {
149+
fmt.Fprintf(os.Stderr, "\n%d packages failed:\n", len(errors))
150+
for _, e := range errors {
151+
fmt.Fprintf(os.Stderr, " %s\n", e)
152+
}
153+
return fmt.Errorf("%d packages failed", len(errors))
154+
}
155+
156+
return nil
157+
}
158+
159+
func extractOne(name string) (int, error) {
160+
manifestPath := filepath.Join(dataDir, "manifests", name)
161+
m, err := manifest.ReadFile(manifestPath)
162+
if err != nil {
163+
return 0, fmt.Errorf("read manifest: %w", err)
164+
}
165+
166+
packagePath := filepath.Join(dataDir, "packages", name)
167+
pkg, err := manifest.OpenPackage(m, packagePath)
168+
if err != nil {
169+
return 0, fmt.Errorf("open package: %w", err)
170+
}
171+
defer pkg.Close()
172+
173+
dest := filepath.Join(outputDir, name)
174+
if err := os.MkdirAll(dest, 0755); err != nil {
175+
return 0, fmt.Errorf("create output dir: %w", err)
176+
}
177+
178+
if err := pkg.Extract(dest); err != nil {
179+
return 0, fmt.Errorf("extract: %w", err)
180+
}
181+
182+
return m.FileCount(), nil
183+
}

0 commit comments

Comments
 (0)