From 09abe065774fa66a247ca7884123497018e32f18 Mon Sep 17 00:00:00 2001 From: Daniel MS Date: Fri, 19 Jun 2026 20:07:49 -0700 Subject: [PATCH 1/5] first pass (slop) --- bench/README.md | 35 ++++++++ bench/go.mod | 3 + bench/naive/main.go | 192 ++++++++++++++++++++++++++++++++++++++++++++ bench/run.sh | 71 ++++++++++++++++ writeup.md | 110 +++++++++++++++++++++++++ 5 files changed, 411 insertions(+) create mode 100644 bench/README.md create mode 100644 bench/go.mod create mode 100644 bench/naive/main.go create mode 100755 bench/run.sh create mode 100644 writeup.md diff --git a/bench/README.md b/bench/README.md new file mode 100644 index 0000000..5409367 --- /dev/null +++ b/bench/README.md @@ -0,0 +1,35 @@ +# Download Benchmark + +Compares two ways to fetch a Hugging Face model, which is the question +behind dropping the `hf` CLI from modelwrap's supply chain: + +1. **hf-cli** — `hf download` from `huggingface_hub[hf_xet]`. This is what + modelwrap does today (`wrap/wrap.go`). Pulls in Python + huggingface_hub + + the hf_xet plugin and their full transitive dependency tree. +2. **naive** — `bench/naive/main.go`, a stdlib-only Go program. Lists the + repo tree via the Hub API and GETs each `resolve` URL over plain HTTPS. + Zero external dependencies, no Python. + +Each iteration downloads to a fresh directory with a clean cache, so both +methods measure raw network transfer (no Xet chunk dedup across runs). + +## Run + +```bash +# on a box with Go + python3, downloads land on /mnt/large +ITERATIONS=2 WORKERS=8 bash bench/run.sh +``` + +Results are written to `$OUT_BASE/results.tsv` (tab-separated): +`method iter seconds bytes gib mib_per_s`. + +## Notes + +- **naive** uses one TCP connection per file with bounded concurrency over + files (default 8). It does not do byte-range chunking of individual large + shards, which is Xet's main throughput lever. If naive is close, the Xet + stack isn't worth its supply-chain cost; if not, Go could add range + requests without the Python dependency. +- **naive** does not verify SHA256 of LFS blobs (the hf CLI does). That is + less work, but also less safe — a tradeoff to call out. +- Model: `Qwen/Qwen2.5-72B-Instruct` (~145 GiB, open, Xet-backed). diff --git a/bench/go.mod b/bench/go.mod new file mode 100644 index 0000000..7822515 --- /dev/null +++ b/bench/go.mod @@ -0,0 +1,3 @@ +module github.com/tinfoilsh/modelwrap/bench + +go 1.22.0 diff --git a/bench/naive/main.go b/bench/naive/main.go new file mode 100644 index 0000000..c3b397b --- /dev/null +++ b/bench/naive/main.go @@ -0,0 +1,192 @@ +// Command naive downloads a Hugging Face model using only the Go standard +// library: it lists the repo file tree via the Hub API and fetches each +// file over plain HTTPS (following the resolve redirects to the CDN). +// +// It is the "no supply chain" baseline against the official hf CLI +// (huggingface_hub + hf_xet), which modelwrap currently shells out to. +// No Python, no huggingface_hub, no Xet plugin — just HTTP. +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "io" + "log" + "net/http" + "net/url" + "os" + "path/filepath" + "strings" + "sync" + "time" +) + +type entry struct { + Type string `json:"type"` + Path string `json:"path"` + Size int64 `json:"size"` +} + +const hub = "https://huggingface.co" + +func main() { + repo := flag.String("repo", "", "Hugging Face repo id, e.g. Qwen/Qwen2.5-72B-Instruct") + rev := flag.String("revision", "main", "revision (branch or commit)") + out := flag.String("out", "", "output directory") + workers := flag.Int("workers", 8, "concurrent file downloads") + flag.Parse() + + if *repo == "" || *out == "" { + log.Fatal("usage: naive --repo --out [--revision main] [--workers 8]") + } + + token := os.Getenv("HF_TOKEN") + + start := time.Now() + total, n, err := run(context.Background(), *repo, *rev, *out, *workers, token) + if err != nil { + log.Fatalf("download failed after %d files: %v", n, err) + } + elapsed := time.Since(start) + + gib := float64(total) / (1 << 30) + fmt.Printf("naive: files=%d bytes=%d (%.2f GiB) time=%.2fs throughput=%.1f MiB/s\n", + n, total, gib, elapsed.Seconds(), float64(total)/elapsed.Seconds()/(1<<20)) +} + +func run(ctx context.Context, repo, rev, out string, workers int, token string) (int64, int, error) { + files, err := listTree(ctx, repo, rev, token) + if err != nil { + return 0, 0, fmt.Errorf("list tree: %w", err) + } + log.Printf("listed %d files", len(files)) + + sem := make(chan struct{}, workers) + var wg sync.WaitGroup + var ( + mu sync.Mutex + total int64 + done int + firstErr error + ) + + for _, f := range files { + wg.Add(1) + go func(f entry) { + defer wg.Done() + sem <- struct{}{} + defer func() { <-sem }() + + n, err := fetchFile(ctx, repo, rev, f.Path, out, token) + mu.Lock() + total += n + done++ + if err != nil && firstErr == nil { + firstErr = err + } + mu.Unlock() + if done%10 == 0 { + log.Printf(" %d/%d files done", done, len(files)) + } + }(f) + } + wg.Wait() + return total, done, firstErr +} + +// listTree paginates the Hub tree API and returns leaf (non-directory) entries. +func listTree(ctx context.Context, repo, rev, token string) ([]entry, error) { + segments := strings.Split(repo, "/") + for i, s := range segments { + segments[i] = url.PathEscape(s) + } + u := fmt.Sprintf("%s/api/models/%s/tree/%s?recursive=true", hub, strings.Join(segments, "/"), rev) + + var files []entry + for u != "" { + req, err := http.NewRequestWithContext(ctx, "GET", u, nil) + if err != nil { + return nil, err + } + if token != "" { + req.Header.Set("Authorization", "Bearer "+token) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + resp.Body.Close() + return nil, fmt.Errorf("tree %s: %s: %s", u, resp.Status, body) + } + var page []entry + if err := json.NewDecoder(resp.Body).Decode(&page); err != nil { + resp.Body.Close() + return nil, err + } + resp.Body.Close() + for _, e := range page { + if e.Type != "directory" && e.Type != "tree" { + files = append(files, e) + } + } + u = nextLink(resp.Header.Get("Link")) + } + return files, nil +} + +// nextLink extracts the rel="next" URL from an RFC 8288 Link header. +func nextLink(link string) string { + for _, part := range strings.Split(link, ",") { + if !strings.Contains(part, `rel="next"`) { + continue + } + part = strings.TrimSpace(part) + part = strings.TrimPrefix(part, "<") + if i := strings.Index(part, ">"); i >= 0 { + return part[:i] + } + } + return "" +} + +func fetchFile(ctx context.Context, repo, rev, path, out, token string) (int64, error) { + dest := filepath.Join(out, path) + if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { + return 0, err + } + req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("%s/%s/resolve/%s/%s", hub, repo, rev, path), nil) + if err != nil { + return 0, err + } + if token != "" { + req.Header.Set("Authorization", "Bearer "+token) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return 0, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return 0, fmt.Errorf("%s: %s", path, resp.Status) + } + tmp := dest + ".part" + f, err := os.Create(tmp) + if err != nil { + return 0, err + } + n, err := io.Copy(f, resp.Body) + if err != nil { + f.Close() + os.Remove(tmp) + return n, err + } + if err := f.Close(); err != nil { + os.Remove(tmp) + return n, err + } + return n, os.Rename(tmp, dest) +} diff --git a/bench/run.sh b/bench/run.sh new file mode 100755 index 0000000..3ce8209 --- /dev/null +++ b/bench/run.sh @@ -0,0 +1,71 @@ +#!/usr/bin/env bash +# Benchmark: HF CLI (Xet) vs naive Go HTTP download of a Hugging Face model. +# +# Measures raw download throughput for the two approaches modelwrap could use: +# 1. hf download from huggingface_hub[hf_xet] — the current approach +# 2. ./naive — a stdlib-only Go HTTP downloader (no Python, no Xet) +# +# Each iteration downloads to a fresh directory with a clean cache, so we +# measure network transfer, not Xet dedup of already-present chunks. +set -euo pipefail + +MODEL="${MODEL:-Qwen/Qwen2.5-72B-Instruct}" +REVISION="${REVISION:-main}" +ITERATIONS="${ITERATIONS:-2}" +WORKERS="${WORKERS:-8}" +OUT_BASE="${OUT_BASE:-/mnt/large/modelwrap-bench}" +HF_VENV="${HF_VENV:-$HOME/.hf-venv}" + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" + +mkdir -p "$OUT_BASE" +RESULTS="$OUT_BASE/results.tsv" +printf "method\titer\tseconds\tbytes\tgib\tmib_per_s\n" > "$RESULTS" + +now() { date +%s.%N; } +delta() { awk -v a="$1" -v b="$2" 'BEGIN{printf "%.3f", b-a}'; } + +record() { # method iter start end dir + local secs; secs=$(delta "$3" "$4") + local bytes; bytes=$(du -sb "$5" | cut -f1) + local gib; gib=$(awk -v b="$bytes" 'BEGIN{printf "%.2f", b/1073741824}') + local mibps; mibps=$(awk -v b="$bytes" -v s="$secs" 'BEGIN{printf "%.1f", b/1048576/s}') + printf "%s\t%s\t%s\t%s\t%s\t%s\n" "$1" "$2" "$secs" "$bytes" "$gib" "$mibps" | tee -a "$RESULTS" +} + +echo "Building naive downloader..." +( cd "$SCRIPT_DIR" && go build -o "$OUT_BASE/naive" ./naive ) + +if [ ! -x "$HF_VENV/bin/hf" ]; then + echo "Creating HF venv at $HF_VENV ..." + python3 -m venv "$HF_VENV" + "$HF_VENV/bin/pip" install --upgrade pip + "$HF_VENV/bin/pip" install "huggingface_hub[hf_xet]" +fi +echo "HF CLI version: $("$HF_VENV/bin/hf" --version)" + +for i in $(seq 1 "$ITERATIONS"); do + out="$OUT_BASE/hf-$i" + cache="$OUT_BASE/hf-cache-$i" + rm -rf "$out" "$cache" + echo -e "\n=== hf download (Xet) iter $i ===" + s=$(now) + HF_HOME="$cache" "$HF_VENV/bin/hf" download "$MODEL" --revision "$REVISION" --local-dir "$out" + e=$(now) + record hf-cli "$i" "$s" "$e" "$out" + rm -rf "$out" "$cache" +done + +for i in $(seq 1 "$ITERATIONS"); do + out="$OUT_BASE/naive-$i" + rm -rf "$out" + echo -e "\n=== naive Go iter $i ===" + s=$(now) + "$OUT_BASE/naive" --repo "$MODEL" --revision "$REVISION" --out "$out" --workers "$WORKERS" + e=$(now) + record naive "$i" "$s" "$e" "$out" + rm -rf "$out" +done + +echo -e "\n=== RESULTS ===" +cat "$RESULTS" diff --git a/writeup.md b/writeup.md new file mode 100644 index 0000000..9a11232 --- /dev/null +++ b/writeup.md @@ -0,0 +1,110 @@ +# HF CLI vs Naive Go Download Benchmark + +## Question + +modelwrap downloads models today by shelling out to `hf download` from +`huggingface_hub[hf_xet]` (`wrap/wrap.go`). That pulls Python, +`huggingface_hub`, the `hf_xet` plugin, and their full transitive +dependency tree into the packer container — a sizable supply-chain surface +for a tool whose whole point is reproducibility and trust. + +The question: how much download throughput do we actually get from the Xet +stack, and could a stdlib-only Go downloader replace it without giving up +speed? + +## Setup + +- **Host:** `inf8.tinfoil.sh` — 2.0 TiB RAM, no GPU, Go 1.24.4, downloads + written to `/mnt/large` (25 TB RAID, 3.2 TB free) +- **Model:** `Qwen/Qwen2.5-72B-Instruct` (revision `main`) — open, Xet-backed, + 47 files, **135.44 GiB** total. Large enough that Xet's chunking/dedup + has room to help. +- **hf-cli:** `huggingface_hub[hf_xet]` v1.20.1 (`hf-xet` 1.5.1), installed + in an isolated venv. Xet confirmed active. +- **naive:** `bench/naive/main.go` — Go standard library only (no external + dependencies, no Python). Lists the repo file tree via the Hub API, then + GETs each `resolve` URL concurrently (8 workers), one TCP connection per + file, no byte-range chunking. + +## Methodology + +Each iteration downloaded the full model to a **fresh directory with a +clean cache**, so both methods measured raw network transfer — not Xet +chunk dedup of already-present data. After each iteration the download was +deleted before the next run. + +- 2 iterations per method. +- Throughput = total bytes downloaded / wall time, measured from process + start to completion. +- Both methods ran unauthenticated (no `HF_TOKEN`), since the model is open. + +Harness: `bench/run.sh` (builds the naive binary, creates the HF venv on +first run, loops iterations, records results to `results.tsv`). + +## Results + +| method | iter | seconds | GiB | MiB/s | +| ------ | ---- | ------- | ------ | ------ | +| hf-cli | 1 | 130.8 | 135.44 | 1060.7 | +| hf-cli | 2 | 119.7 | 135.44 | 1158.4 | +| naive | 1 | 102.9 | 135.44 | 1347.3 | +| naive | 2 | 75.7 | 135.44 | 1832.1 | + +**Average throughput:** + +- hf-cli (Xet): **~1110 MiB/s** +- naive Go: **~1590 MiB/s** + +The naive Go downloader was faster on **every iteration** — roughly 27–58% +faster depending on the comparison, and ~43% faster on average. + +## Notes and caveats + +- **Xet was active.** `hf-xet` 1.5.1 was installed in the venv; the HF CLI + ran with its default Xet-backed transfer path. The naive path used plain + HTTPS `resolve` redirects to the CDN. +- **Network variance.** The two naive runs (1347 vs 1832 MiB/s) and the two + hf-cli runs (1061 vs 1158 MiB/s) both show real variance, consistent with + shared-internet conditions. The ordering (naive > hf-cli) held across all + runs. +- **naive does less work.** It does not verify SHA256 of LFS blobs (the + hf CLI does), and it does not do byte-range chunking of large shards. + Less work, but also less safe on integrity — a tradeoff to call out. If + integrity matters for the production path, Go could add SHA256 checks + cheaply without the Python stack. +- **naive uses one connection per file** with bounded concurrency over + files (8). Xet's main throughput lever is parallel byte-range chunking of + individual large shards. Despite not doing that, naive still won — likely + because the CDN serves `resolve` URLs fast enough that per-file + parallelism saturates the link on a 2 TiB-RAM box with no other contention. +- **Pagination was unused.** The naive downloader includes RFC-8288 + `Link`-header pagination for the tree API, but Qwen 72B has 47 files + (under the ~1000-file page limit), so it ran a single page. Pagination + would matter for repos with thousands of files. + +## Conclusion + +For this model and host, the stdlib-only Go downloader was consistently +faster than the Xet-backed HF CLI while carrying none of its supply-chain +weight (no Python, no `huggingface_hub`, no `hf_xet`, no transitive deps). + +That inverts the usual assumption that you need Xet for fast large-model +downloads: here, plain HTTPS `resolve` fetches with file-level +concurrency saturated the available bandwidth more effectively than the +Xet stack did. Dropping the `hf` CLI from modelwrap's packer would shrink +the supply chain without costing throughput — the remaining work would be +adding SHA256 verification (and possibly range requests for very large +single shards) in Go. + +## Reproducing + +```bash +# on inf8 (or any box with Go + python3 and a writable /mnt/large) +ITERATIONS=2 WORKERS=8 bash bench/run.sh +``` + +Code lives in `bench/`: + +- `bench/naive/main.go` — the naive stdlib-only downloader +- `bench/run.sh` — the benchmark harness +- `bench/README.md` — quick reference From 51fe74bcb4b5edb87f7390ea19d38f75670b8ff5 Mon Sep 17 00:00:00 2001 From: Daniel MS Date: Sat, 20 Jun 2026 13:42:04 -0700 Subject: [PATCH 2/5] verify xet --- bench/xet_probe.py | 162 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 162 insertions(+) create mode 100644 bench/xet_probe.py diff --git a/bench/xet_probe.py b/bench/xet_probe.py new file mode 100644 index 0000000..11f2f4d --- /dev/null +++ b/bench/xet_probe.py @@ -0,0 +1,162 @@ +"""Definitively detect which download path huggingface_hub takes. + +Monkeypatches `xet_get` and `http_get` in file_download.py so we can see +exactly which one is called for a single file, plus snapshots established +TCP connections during the download to fingerprint the endpoint +(Xet native CAS -> cas-server.xethub.hf.co ; bridge/CDN -> *.cloudfront.net). + +Runs twice: Xet enabled, then HF_HUB_DISABLE_XET=1, to see if disabling +changes the path or the peers. +""" + +import json +import os +import re +import shutil +import socket +import subprocess +import sys +import threading +import time +import urllib.request + +REPO = os.environ.get("PROBE_REPO", "Qwen/Qwen2.5-0.5B-Instruct") +FILE = os.environ.get("PROBE_FILE", "model.safetensors") +REV = os.environ.get("PROBE_REV", "main") + +import huggingface_hub.file_download as fd # noqa: E402 + +_orig_xet = fd.xet_get if hasattr(fd, "xet_get") else None +_orig_http = fd.http_get + +calls = {"xet_get": 0, "http_get": 0} + + +def spy_xet(*a, **k): + calls["xet_get"] += 1 + print(" >>> xet_get() CALLED (native Xet CAS protocol)", flush=True) + return _orig_xet(*a, **k) + + +def spy_http(*a, **k): + calls["http_get"] += 1 + print(" >>> http_get() CALLED (plain HTTPS / bridge redirect)", flush=True) + return _orig_http(*a, **k) + + +if _orig_xet is not None: + fd.xet_get = spy_xet +fd.http_get = spy_http + +from huggingface_hub import hf_hub_download # noqa: E402 +from huggingface_hub.utils._runtime import is_xet_available # noqa: E402 + + +def api(path): + return json.load(urllib.request.urlopen(f"https://huggingface.co{path}")) + + +def xet_hash_present(): + tree = api(f"/api/models/{REPO}/tree/{REV}?recursive=true") + for e in tree: + if e.get("path") == FILE: + return e.get("lfs", {}).get("oid"), "xetHash" in e, e.get("size") + return None, False, None + + +def snapshot_peers(pid): + try: + out = subprocess.check_output( + ["ss", "-tnp"], text=True, stderr=subprocess.DEVNULL + ) + except subprocess.CalledProcessError: + return {} + peers = {} + for line in out.splitlines(): + if "ESTAB" not in line or f"pid={pid}" not in line: + continue + m = re.search(r"([\d.]+):(\d+)\s+([\d.]+):(\d+)", line) + if not m: + continue + peer = m.group(3) + if peer.startswith("127."): + continue + if peer not in peers: + try: + peers[peer] = socket.gethostbyaddr(peer)[0] + except socket.herror: + peers[peer] = "?" + return peers + + +def run_once(label, disable_xet): + outdir = f"/tmp/xetprobe-{label}" + shutil.rmtree(outdir, ignore_errors=True) + os.makedirs(outdir) + cache = f"/tmp/xetprobe-cache-{label}" + shutil.rmtree(cache, ignore_errors=True) + env = dict(os.environ) + env["HF_HOME"] = cache + if disable_xet: + env["HF_HUB_DISABLE_XET"] = "1" + for k in ("HF_HUB_DISABLE_XET",): + os.environ[k] = env.get(k, "") + import huggingface_hub.constants as constants + + constants.HF_HUB_DISABLE_XET = bool(disable_xet) + + calls["xet_get"] = 0 + calls["http_get"] = 0 + print(f"\n=== {label} ===", flush=True) + print(f" HF_HUB_DISABLE_XET={constants.HF_HUB_DISABLE_XET}", flush=True) + + pid = os.getpid() + peers = {} + done = threading.Event() + + def poll(): + while not done.is_set(): + peers.update(snapshot_peers(pid)) + time.sleep(0.02) + + t = threading.Thread(target=poll, daemon=True) + t.start() + start = time.time() + path = hf_hub_download(REPO, FILE, revision=REV, local_dir=outdir) + done.set() + elapsed = time.time() - start + size = os.path.getsize(path) + mibs = size / (1 << 20) / elapsed + print(f" size={size} time={elapsed:.2f}s {mibs:.0f} MiB/s", flush=True) + print( + f" calls: xet_get={calls['xet_get']} http_get={calls['http_get']}", flush=True + ) + print(f" peers ({len(peers)}):", flush=True) + for ip, host in sorted(peers.items()): + tag = "" + if "xethub" in host and "bridge" not in host: + tag = " <-- XET NATIVE CAS" + elif "xethub" in host: + tag = " <-- XET BRIDGE" + elif "cloudfront" in host: + tag = " <-- CDN/CloudFront" + print(f" {ip} {host}{tag}", flush=True) + shutil.rmtree(outdir, ignore_errors=True) + shutil.rmtree(cache, ignore_errors=True) + return elapsed, mibs + + +def main(): + oid, has_xet, size = xet_hash_present() + print(f"repo={REPO} file={FILE} rev={REV}") + print(f" lfs.oid={oid}") + print(f" size={size}") + print(f" xetHash present: {has_xet}") + print(f" hf_xet importable: {is_xet_available()}") + print(f" huggingface_hub: {__import__('huggingface_hub').__version__}") + run_once("xet-enabled", disable_xet=False) + run_once("xet-disabled", disable_xet=True) + + +if __name__ == "__main__": + main() From 220aa1302d5b44c9af2d3824fb15c388860a438b Mon Sep 17 00:00:00 2001 From: Daniel MS Date: Sat, 20 Jun 2026 14:44:31 -0700 Subject: [PATCH 3/5] make bench simpler --- bench/README.md | 6 ++--- bench/naive/main.go | 57 ++++++++++++++------------------------------- bench/run.sh | 3 +-- 3 files changed, 22 insertions(+), 44 deletions(-) diff --git a/bench/README.md b/bench/README.md index 5409367..0d5f2cd 100644 --- a/bench/README.md +++ b/bench/README.md @@ -17,7 +17,7 @@ methods measure raw network transfer (no Xet chunk dedup across runs). ```bash # on a box with Go + python3, downloads land on /mnt/large -ITERATIONS=2 WORKERS=8 bash bench/run.sh +ITERATIONS=2 bash bench/run.sh ``` Results are written to `$OUT_BASE/results.tsv` (tab-separated): @@ -25,8 +25,8 @@ Results are written to `$OUT_BASE/results.tsv` (tab-separated): ## Notes -- **naive** uses one TCP connection per file with bounded concurrency over - files (default 8). It does not do byte-range chunking of individual large +- **naive** fetches files sequentially, one TCP connection per file, no + concurrency. It does not do byte-range chunking of individual large shards, which is Xet's main throughput lever. If naive is close, the Xet stack isn't worth its supply-chain cost; if not, Go could add range requests without the Python dependency. diff --git a/bench/naive/main.go b/bench/naive/main.go index c3b397b..aa63649 100644 --- a/bench/naive/main.go +++ b/bench/naive/main.go @@ -1,10 +1,12 @@ // Command naive downloads a Hugging Face model using only the Go standard // library: it lists the repo file tree via the Hub API and fetches each -// file over plain HTTPS (following the resolve redirects to the CDN). +// file over plain HTTPS (following the resolve redirects to the CDN), one +// file at a time. // -// It is the "no supply chain" baseline against the official hf CLI -// (huggingface_hub + hf_xet), which modelwrap currently shells out to. -// No Python, no huggingface_hub, no Xet plugin — just HTTP. +// It is the simplest possible "no supply chain" baseline against the +// official hf CLI (huggingface_hub + hf_xet), which modelwrap currently +// shells out to. No Python, no huggingface_hub, no Xet plugin, no +// concurrency — just HTTP, sequentially. package main import ( @@ -19,7 +21,6 @@ import ( "os" "path/filepath" "strings" - "sync" "time" ) @@ -35,17 +36,16 @@ func main() { repo := flag.String("repo", "", "Hugging Face repo id, e.g. Qwen/Qwen2.5-72B-Instruct") rev := flag.String("revision", "main", "revision (branch or commit)") out := flag.String("out", "", "output directory") - workers := flag.Int("workers", 8, "concurrent file downloads") flag.Parse() if *repo == "" || *out == "" { - log.Fatal("usage: naive --repo --out [--revision main] [--workers 8]") + log.Fatal("usage: naive --repo --out [--revision main]") } token := os.Getenv("HF_TOKEN") start := time.Now() - total, n, err := run(context.Background(), *repo, *rev, *out, *workers, token) + total, n, err := run(context.Background(), *repo, *rev, *out, token) if err != nil { log.Fatalf("download failed after %d files: %v", n, err) } @@ -56,44 +56,23 @@ func main() { n, total, gib, elapsed.Seconds(), float64(total)/elapsed.Seconds()/(1<<20)) } -func run(ctx context.Context, repo, rev, out string, workers int, token string) (int64, int, error) { +func run(ctx context.Context, repo, rev, out, token string) (int64, int, error) { files, err := listTree(ctx, repo, rev, token) if err != nil { return 0, 0, fmt.Errorf("list tree: %w", err) } log.Printf("listed %d files", len(files)) - sem := make(chan struct{}, workers) - var wg sync.WaitGroup - var ( - mu sync.Mutex - total int64 - done int - firstErr error - ) - - for _, f := range files { - wg.Add(1) - go func(f entry) { - defer wg.Done() - sem <- struct{}{} - defer func() { <-sem }() - - n, err := fetchFile(ctx, repo, rev, f.Path, out, token) - mu.Lock() - total += n - done++ - if err != nil && firstErr == nil { - firstErr = err - } - mu.Unlock() - if done%10 == 0 { - log.Printf(" %d/%d files done", done, len(files)) - } - }(f) + var total int64 + for i, f := range files { + log.Printf("[%d/%d] %s (%d bytes)", i+1, len(files), f.Path, f.Size) + n, err := fetchFile(ctx, repo, rev, f.Path, out, token) + total += n + if err != nil { + return total, i, fmt.Errorf("%s: %w", f.Path, err) + } } - wg.Wait() - return total, done, firstErr + return total, len(files), nil } // listTree paginates the Hub tree API and returns leaf (non-directory) entries. diff --git a/bench/run.sh b/bench/run.sh index 3ce8209..de89905 100755 --- a/bench/run.sh +++ b/bench/run.sh @@ -12,7 +12,6 @@ set -euo pipefail MODEL="${MODEL:-Qwen/Qwen2.5-72B-Instruct}" REVISION="${REVISION:-main}" ITERATIONS="${ITERATIONS:-2}" -WORKERS="${WORKERS:-8}" OUT_BASE="${OUT_BASE:-/mnt/large/modelwrap-bench}" HF_VENV="${HF_VENV:-$HOME/.hf-venv}" @@ -61,7 +60,7 @@ for i in $(seq 1 "$ITERATIONS"); do rm -rf "$out" echo -e "\n=== naive Go iter $i ===" s=$(now) - "$OUT_BASE/naive" --repo "$MODEL" --revision "$REVISION" --out "$out" --workers "$WORKERS" + "$OUT_BASE/naive" --repo "$MODEL" --revision "$REVISION" --out "$out" e=$(now) record naive "$i" "$s" "$e" "$out" rm -rf "$out" From 84f7e6b9df1c7d85f327d5613d5d2927ba24709f Mon Sep 17 00:00:00 2001 From: Daniel MS Date: Sat, 20 Jun 2026 15:40:33 -0700 Subject: [PATCH 4/5] split commands apart --- bench/README.md | 49 +++++----- bench/diskwrite/main.go | 149 +++++++++++++++++++++++++++++ bench/diskwrite/run.sh | 12 +++ bench/naive/main.go | 203 ++++++++++++++++++++++++++-------------- bench/naive/run.sh | 20 ++++ bench/netread/main.go | 187 ++++++++++++++++++++++++++++++++++++ bench/netread/run.sh | 13 +++ bench/run.sh | 70 -------------- writeup.md | 116 ++++------------------- 9 files changed, 559 insertions(+), 260 deletions(-) create mode 100644 bench/diskwrite/main.go create mode 100755 bench/diskwrite/run.sh create mode 100755 bench/naive/run.sh create mode 100644 bench/netread/main.go create mode 100755 bench/netread/run.sh delete mode 100755 bench/run.sh diff --git a/bench/README.md b/bench/README.md index 0d5f2cd..a49779f 100644 --- a/bench/README.md +++ b/bench/README.md @@ -1,35 +1,34 @@ -# Download Benchmark +# Download Bench -Compares two ways to fetch a Hugging Face model, which is the question -behind dropping the `hf` CLI from modelwrap's supply chain: +Three small benchmarks for the question behind dropping the `hf` CLI from +modelwrap: do we need the Xet stack for fast large-model downloads, or +would a stdlib-only Go downloader do? -1. **hf-cli** — `hf download` from `huggingface_hub[hf_xet]`. This is what - modelwrap does today (`wrap/wrap.go`). Pulls in Python + huggingface_hub - + the hf_xet plugin and their full transitive dependency tree. -2. **naive** — `bench/naive/main.go`, a stdlib-only Go program. Lists the - repo tree via the Hub API and GETs each `resolve` URL over plain HTTPS. - Zero external dependencies, no Python. +Each is a standalone Go program with its own run script, and each writes a +TSV of results you can `rsync` off the bench host. -Each iteration downloads to a fresh directory with a clean cache, so both -methods measure raw network transfer (no Xet chunk dedup across runs). +- `diskwrite/` — raw disk write throughput (no network). Writes arbitrary + data, fsyncs, reports write vs write+sync MiB/s. +- `netread/` — raw network download throughput (no disk). Streams every file + in a Hugging Face repo to `io.Discard`, reports per-file and total MiB/s. +- `naive/` — the real stdlib-only downloader, sequential, with per-file + network and disk timing separated. Compare against `netread` (no disk) and + `diskwrite` (no network) to see where time goes. + +All three are Go standard library only — no Python, no `huggingface_hub`, +no `hf_xet`. ## Run +On a box with Go (e.g. `inf8.tinfoil.sh`, downloads to `/mnt/large`): + ```bash -# on a box with Go + python3, downloads land on /mnt/large -ITERATIONS=2 bash bench/run.sh +OUT_BASE=/mnt/large/modelwrap-bench bash bench/diskwrite/run.sh +OUT_BASE=/mnt/large/modelwrap-bench bash bench/netread/run.sh +OUT_BASE=/mnt/large/modelwrap-bench bash bench/naive/run.sh ``` -Results are written to `$OUT_BASE/results.tsv` (tab-separated): -`method iter seconds bytes gib mib_per_s`. - -## Notes +Results land in `$OUT_BASE/{diskwrite,netread,naive}.tsv` (tab-separated). -- **naive** fetches files sequentially, one TCP connection per file, no - concurrency. It does not do byte-range chunking of individual large - shards, which is Xet's main throughput lever. If naive is close, the Xet - stack isn't worth its supply-chain cost; if not, Go could add range - requests without the Python dependency. -- **naive** does not verify SHA256 of LFS blobs (the hf CLI does). That is - less work, but also less safe — a tradeoff to call out. -- Model: `Qwen/Qwen2.5-72B-Instruct` (~145 GiB, open, Xet-backed). +`bench/xet_probe.py` is a separate one-off: it detects which download path +`huggingface_hub` actually takes (native Xet CAS vs plain HTTPS) for a file. diff --git a/bench/diskwrite/main.go b/bench/diskwrite/main.go new file mode 100644 index 0000000..1af2761 --- /dev/null +++ b/bench/diskwrite/main.go @@ -0,0 +1,149 @@ +// Command diskwrite measures raw disk write throughput: it writes a file of +// arbitrary data in fixed-size blocks, then fsyncs it. It reports write-only +// and write+sync throughput so disk speed can be compared against network +// in isolation. No network, no HF, no Python — just the disk. +package main + +import ( + "flag" + "fmt" + "log" + "os" + "strconv" + "strings" + "time" +) + +func main() { + out := flag.String("out", "", "output file path") + sizeStr := flag.String("size", "10GiB", "total bytes to write (e.g. 10GiB, 512MiB)") + bsStr := flag.String("bs", "1MiB", "block size (e.g. 1MiB)") + results := flag.String("results", "", "append a TSV row to this path") + flag.Parse() + + if *out == "" { + log.Fatal("usage: diskwrite --out [--size 10GiB] [--bs 1MiB] [--results file]") + } + size, err := parseSize(*sizeStr) + if err != nil { + log.Fatalf("size: %v", err) + } + bs, err := parseSize(*bsStr) + if err != nil { + log.Fatalf("bs: %v", err) + } + if bs <= 0 || size <= 0 { + log.Fatal("size and bs must be > 0") + } + + buf := make([]byte, bs) + f, err := os.Create(*out) + if err != nil { + log.Fatal(err) + } + defer os.Remove(*out) + + written := int64(0) + writeStart := time.Now() + for written < size { + n := int64(bs) + if written+n > size { + n = size - written + } + if _, err := f.Write(buf[:n]); err != nil { + f.Close() + log.Fatalf("write at %d: %v", written, err) + } + written += n + } + writeElapsed := time.Since(writeStart) + + syncStart := time.Now() + if err := f.Sync(); err != nil { + f.Close() + log.Fatalf("sync: %v", err) + } + syncElapsed := time.Since(syncStart) + + if err := f.Close(); err != nil { + log.Fatal(err) + } + + total := writeElapsed + syncElapsed + writeMib := mib(written, writeElapsed) + totalMib := mib(written, total) + gib := float64(written) / (1 << 30) + + fmt.Printf("diskwrite: %d bytes (%.2f GiB)\n", written, gib) + fmt.Printf(" write: %.3fs %.1f MiB/s\n", writeElapsed.Seconds(), writeMib) + fmt.Printf(" sync: %.3fs\n", syncElapsed.Seconds()) + fmt.Printf(" total: %.3fs %.1f MiB/s\n", total.Seconds(), totalMib) + + if *results != "" { + if err := appendRow(*results, written, writeElapsed, syncElapsed, total); err != nil { + log.Printf("warning: write results: %v", err) + } + } +} + +func mib(b int64, d time.Duration) float64 { + if d <= 0 { + return 0 + } + return float64(b) / d.Seconds() / (1 << 20) +} + +func appendRow(path string, bytes int64, write, sync, total time.Duration) error { + header := false + if _, err := os.Stat(path); os.IsNotExist(err) { + header = true + } + f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return err + } + defer f.Close() + if header { + fmt.Fprintln(f, "bytes\tgib\twrite_s\tsync_s\ttotal_s\twrite_mib_s\ttotal_mib_s") + } + fmt.Fprintf(f, "%d\t%.2f\t%.3f\t%.3f\t%.3f\t%.1f\t%.1f\n", + bytes, float64(bytes)/(1<<30), + write.Seconds(), sync.Seconds(), total.Seconds(), + mib(bytes, write), mib(bytes, total)) + return nil +} + +func parseSize(s string) (int64, error) { + s = strings.TrimSpace(s) + i := 0 + for i < len(s) && s[i] >= '0' && s[i] <= '9' { + i++ + } + if i == 0 { + return 0, fmt.Errorf("invalid size %q", s) + } + n, err := strconv.ParseInt(s[:i], 10, 64) + if err != nil { + return 0, fmt.Errorf("invalid size %q: %w", s, err) + } + switch strings.ToLower(strings.TrimSpace(s[i:])) { + case "", "b": + return n, nil + case "kib": + return n << 10, nil + case "mib": + return n << 20, nil + case "gib": + return n << 30, nil + case "tib": + return n << 40, nil + case "kb": + return n * 1000, nil + case "mb": + return n * 1000 * 1000, nil + case "gb": + return n * 1000 * 1000 * 1000, nil + default: + return 0, fmt.Errorf("unknown unit in %q", s) + } +} diff --git a/bench/diskwrite/run.sh b/bench/diskwrite/run.sh new file mode 100755 index 0000000..d45c146 --- /dev/null +++ b/bench/diskwrite/run.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# diskwrite: raw disk write throughput (no network). Writes arbitrary data, +# fsyncs, reports write vs write+sync MiB/s. Results -> $OUT_BASE/diskwrite.tsv +set -euo pipefail +OUT_BASE="${OUT_BASE:-/mnt/large/modelwrap-bench}" +SIZE="${SIZE:-10GiB}" +BS="${BS:-1MiB}" +mkdir -p "$OUT_BASE" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +( cd "$SCRIPT_DIR/.." && go build -o "$OUT_BASE/diskwrite" ./diskwrite ) +"$OUT_BASE/diskwrite" --out "$OUT_BASE/diskwrite.data" --size "$SIZE" --bs "$BS" --results "$OUT_BASE/diskwrite.tsv" +echo "results: $OUT_BASE/diskwrite.tsv" diff --git a/bench/naive/main.go b/bench/naive/main.go index aa63649..93b094f 100644 --- a/bench/naive/main.go +++ b/bench/naive/main.go @@ -1,15 +1,14 @@ // Command naive downloads a Hugging Face model using only the Go standard -// library: it lists the repo file tree via the Hub API and fetches each -// file over plain HTTPS (following the resolve redirects to the CDN), one -// file at a time. +// library, sequentially, and profiles each file: it separates the time to +// read a file over the network (into memory) from the time to write it to +// disk, so disk and network can be compared in isolation. // -// It is the simplest possible "no supply chain" baseline against the -// official hf CLI (huggingface_hub + hf_xet), which modelwrap currently -// shells out to. No Python, no huggingface_hub, no Xet plugin, no -// concurrency — just HTTP, sequentially. +// It is the "no supply chain" baseline against the official hf CLI +// (huggingface_hub + hf_xet). No Python, no Xet plugin, no concurrency. package main import ( + "bytes" "context" "encoding/json" "flag" @@ -36,43 +35,150 @@ func main() { repo := flag.String("repo", "", "Hugging Face repo id, e.g. Qwen/Qwen2.5-72B-Instruct") rev := flag.String("revision", "main", "revision (branch or commit)") out := flag.String("out", "", "output directory") + sync := flag.Bool("sync", false, "fsync each file after writing (measures real disk, not page cache)") + results := flag.String("results", "", "write per-file TSV results to this path") flag.Parse() if *repo == "" || *out == "" { - log.Fatal("usage: naive --repo --out [--revision main]") + log.Fatal("usage: naive --repo --out [--revision main] [--sync] [--results file]") } - token := os.Getenv("HF_TOKEN") - start := time.Now() - total, n, err := run(context.Background(), *repo, *rev, *out, token) + ctx := context.Background() + files, err := listTree(ctx, *repo, *rev, token) if err != nil { - log.Fatalf("download failed after %d files: %v", n, err) + log.Fatalf("list tree: %v", err) + } + log.Printf("listed %d files", len(files)) + + var rows []row + var totalBytes int64 + var totalNet, totalDisk time.Duration + start := time.Now() + + for i, f := range files { + n, netT, diskT, err := fetchFile(ctx, *repo, *rev, f.Path, *out, token, *sync) + if err != nil { + log.Fatalf("[%d/%d] %s: %v", i+1, len(files), f.Path, err) + } + totalBytes += n + totalNet += netT + totalDisk += diskT + log.Printf("[%d/%d] %s: %d bytes net=%.3fs (%.1f MiB/s) disk=%.3fs (%.1f MiB/s)", + i+1, len(files), f.Path, n, netT.Seconds(), mib(n, netT), diskT.Seconds(), mib(n, diskT)) + rows = append(rows, row{f.Path, n, netT, diskT}) } - elapsed := time.Since(start) - gib := float64(total) / (1 << 30) - fmt.Printf("naive: files=%d bytes=%d (%.2f GiB) time=%.2fs throughput=%.1f MiB/s\n", - n, total, gib, elapsed.Seconds(), float64(total)/elapsed.Seconds()/(1<<20)) + wall := time.Since(start) + fmt.Printf("naive: files=%d bytes=%d (%.2f GiB) net=%.3fs disk=%.3fs wall=%.3fs | net=%.1f MiB/s disk=%.1f MiB/s wall=%.1f MiB/s\n", + len(rows), totalBytes, float64(totalBytes)/(1<<30), + totalNet.Seconds(), totalDisk.Seconds(), wall.Seconds(), + mib(totalBytes, totalNet), mib(totalBytes, totalDisk), float64(totalBytes)/wall.Seconds()/(1<<20)) + + if *results != "" { + if err := writeResults(*results, rows, totalBytes, totalNet, totalDisk); err != nil { + log.Printf("warning: write results: %v", err) + } + } +} + +type row struct { + path string + bytes int64 + net time.Duration + disk time.Duration } -func run(ctx context.Context, repo, rev, out, token string) (int64, int, error) { - files, err := listTree(ctx, repo, rev, token) +func mib(b int64, d time.Duration) float64 { + if d <= 0 { + return 0 + } + return float64(b) / d.Seconds() / (1 << 20) +} + +// fetchFile reads a file fully into memory (network time, isolated from +// disk) then writes it to a .part file and renames (disk time, isolated +// from network). The two phases are sequential by design: this measures the +// components separately rather than overlapping them. +func fetchFile(ctx context.Context, repo, rev, path, out, token string, doSync bool) (n int64, netT, diskT time.Duration, err error) { + dest := filepath.Join(out, path) + if err = os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { + return + } + req, err := http.NewRequestWithContext(ctx, "GET", + fmt.Sprintf("%s/%s/resolve/%s/%s", hub, repo, rev, path), nil) if err != nil { - return 0, 0, fmt.Errorf("list tree: %w", err) + return + } + if token != "" { + req.Header.Set("Authorization", "Bearer "+token) } - log.Printf("listed %d files", len(files)) - var total int64 - for i, f := range files { - log.Printf("[%d/%d] %s (%d bytes)", i+1, len(files), f.Path, f.Size) - n, err := fetchFile(ctx, repo, rev, f.Path, out, token) - total += n - if err != nil { - return total, i, fmt.Errorf("%s: %w", f.Path, err) - } + netStart := time.Now() + resp, err := http.DefaultClient.Do(req) + if err != nil { + return + } + if resp.StatusCode != http.StatusOK { + resp.Body.Close() + err = fmt.Errorf("%s: %s", path, resp.Status) + return + } + buf := bytes.NewBuffer(make([]byte, 0, resp.ContentLength)) + n, err = io.Copy(buf, resp.Body) + resp.Body.Close() + netT = time.Since(netStart) + if err != nil { + err = fmt.Errorf("%s: read: %w", path, err) + return + } + + tmp := dest + ".part" + diskStart := time.Now() + f, err := os.Create(tmp) + if err != nil { + return + } + _, werr := buf.WriteTo(f) + if werr == nil && doSync { + werr = f.Sync() + } + cerr := f.Close() + diskT = time.Since(diskStart) + err = werr + if err == nil { + err = cerr + } + if err != nil { + os.Remove(tmp) + err = fmt.Errorf("%s: write: %w", path, err) + return } - return total, len(files), nil + return n, netT, diskT, os.Rename(tmp, dest) +} + +func writeResults(path string, rows []row, totalBytes int64, totalNet, totalDisk time.Duration) error { + header := false + if _, err := os.Stat(path); os.IsNotExist(err) { + header = true + } + f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return err + } + defer f.Close() + if header { + fmt.Fprintln(f, "file\tbytes\tgib\tnet_s\tdisk_s\tnet_mib_s\tdisk_mib_s") + } + for _, r := range rows { + fmt.Fprintf(f, "%s\t%d\t%.2f\t%.3f\t%.3f\t%.1f\t%.1f\n", + r.path, r.bytes, float64(r.bytes)/(1<<30), + r.net.Seconds(), r.disk.Seconds(), mib(r.bytes, r.net), mib(r.bytes, r.disk)) + } + fmt.Fprintf(f, "TOTAL\t%d\t%.2f\t%.3f\t%.3f\t%.1f\t%.1f\n", + totalBytes, float64(totalBytes)/(1<<30), + totalNet.Seconds(), totalDisk.Seconds(), mib(totalBytes, totalNet), mib(totalBytes, totalDisk)) + return nil } // listTree paginates the Hub tree API and returns leaf (non-directory) entries. @@ -117,7 +223,6 @@ func listTree(ctx context.Context, repo, rev, token string) ([]entry, error) { return files, nil } -// nextLink extracts the rel="next" URL from an RFC 8288 Link header. func nextLink(link string) string { for _, part := range strings.Split(link, ",") { if !strings.Contains(part, `rel="next"`) { @@ -131,41 +236,3 @@ func nextLink(link string) string { } return "" } - -func fetchFile(ctx context.Context, repo, rev, path, out, token string) (int64, error) { - dest := filepath.Join(out, path) - if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { - return 0, err - } - req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("%s/%s/resolve/%s/%s", hub, repo, rev, path), nil) - if err != nil { - return 0, err - } - if token != "" { - req.Header.Set("Authorization", "Bearer "+token) - } - resp, err := http.DefaultClient.Do(req) - if err != nil { - return 0, err - } - defer resp.Body.Close() - if resp.StatusCode != http.StatusOK { - return 0, fmt.Errorf("%s: %s", path, resp.Status) - } - tmp := dest + ".part" - f, err := os.Create(tmp) - if err != nil { - return 0, err - } - n, err := io.Copy(f, resp.Body) - if err != nil { - f.Close() - os.Remove(tmp) - return n, err - } - if err := f.Close(); err != nil { - os.Remove(tmp) - return n, err - } - return n, os.Rename(tmp, dest) -} diff --git a/bench/naive/run.sh b/bench/naive/run.sh new file mode 100755 index 0000000..85f46b5 --- /dev/null +++ b/bench/naive/run.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# naive: sequential stdlib-only model download to disk, with per-file network +# and disk timing separated. Compare against netread (no disk) and diskwrite +# (no network) to see where time goes. Results -> $OUT_BASE/naive.tsv +# +# SYNC=1 adds an fsync per file (measures real disk, not page cache). +set -euo pipefail +OUT_BASE="${OUT_BASE:-/mnt/large/modelwrap-bench}" +MODEL="${MODEL:-Qwen/Qwen2.5-72B-Instruct}" +REVISION="${REVISION:-main}" +SYNC="${SYNC:-0}" +mkdir -p "$OUT_BASE" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +( cd "$SCRIPT_DIR/.." && go build -o "$OUT_BASE/naive" ./naive ) +OUT="$OUT_BASE/naive-out" +rm -rf "$OUT" +args=( --repo "$MODEL" --revision "$REVISION" --out "$OUT" --results "$OUT_BASE/naive.tsv" ) +if [ "${SYNC}" = "1" ]; then args+=( --sync ); fi +"$OUT_BASE/naive" "${args[@]}" +echo "results: $OUT_BASE/naive.tsv" diff --git a/bench/netread/main.go b/bench/netread/main.go new file mode 100644 index 0000000..08a0fcc --- /dev/null +++ b/bench/netread/main.go @@ -0,0 +1,187 @@ +// Command netread measures raw network download throughput: it lists a +// Hugging Face repo's file tree and streams every file to io.Discard — no +// disk writes at all. It is the network-only counterpart of naive: run both +// to see how much disk adds. No Python, no Xet, no concurrency. +package main + +import ( + "context" + "encoding/json" + "flag" + "fmt" + "io" + "log" + "net/http" + "net/url" + "os" + "strings" + "time" +) + +type entry struct { + Type string `json:"type"` + Path string `json:"path"` + Size int64 `json:"size"` +} + +const hub = "https://huggingface.co" + +func main() { + repo := flag.String("repo", "", "Hugging Face repo id, e.g. Qwen/Qwen2.5-72B-Instruct") + rev := flag.String("revision", "main", "revision (branch or commit)") + results := flag.String("results", "", "write per-file TSV results to this path") + flag.Parse() + + if *repo == "" { + log.Fatal("usage: netread --repo [--revision main] [--results file]") + } + token := os.Getenv("HF_TOKEN") + + ctx := context.Background() + files, err := listTree(ctx, *repo, *rev, token) + if err != nil { + log.Fatalf("list tree: %v", err) + } + log.Printf("listed %d files", len(files)) + + var rows []row + var totalBytes int64 + var totalNet time.Duration + start := time.Now() + + for i, f := range files { + n, elapsed, err := readToDiscard(ctx, *repo, *rev, f.Path, token) + if err != nil { + log.Fatalf("[%d/%d] %s: %v", i+1, len(files), f.Path, err) + } + totalBytes += n + totalNet += elapsed + log.Printf("[%d/%d] %s: %d bytes %.3fs %.1f MiB/s", i+1, len(files), f.Path, n, elapsed.Seconds(), mib(n, elapsed)) + rows = append(rows, row{f.Path, n, elapsed}) + } + + wall := time.Since(start) + fmt.Printf("netread: files=%d bytes=%d (%.2f GiB) net=%.3fs wall=%.3fs | net=%.1f MiB/s wall=%.1f MiB/s\n", + len(rows), totalBytes, float64(totalBytes)/(1<<30), + totalNet.Seconds(), wall.Seconds(), + mib(totalBytes, totalNet), float64(totalBytes)/wall.Seconds()/(1<<20)) + + if *results != "" { + if err := writeResults(*results, rows, totalBytes, totalNet); err != nil { + log.Printf("warning: write results: %v", err) + } + } +} + +type row struct { + path string + bytes int64 + elapsed time.Duration +} + +func readToDiscard(ctx context.Context, repo, rev, path, token string) (int64, time.Duration, error) { + req, err := http.NewRequestWithContext(ctx, "GET", + fmt.Sprintf("%s/%s/resolve/%s/%s", hub, repo, rev, path), nil) + if err != nil { + return 0, 0, err + } + if token != "" { + req.Header.Set("Authorization", "Bearer "+token) + } + start := time.Now() + resp, err := http.DefaultClient.Do(req) + if err != nil { + return 0, 0, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return 0, 0, fmt.Errorf("%s: %s", path, resp.Status) + } + n, err := io.Copy(io.Discard, resp.Body) + return n, time.Since(start), err +} + +func writeResults(path string, rows []row, totalBytes int64, totalNet time.Duration) error { + header := false + if _, err := os.Stat(path); os.IsNotExist(err) { + header = true + } + f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) + if err != nil { + return err + } + defer f.Close() + if header { + fmt.Fprintln(f, "file\tbytes\tgib\tnet_s\tnet_mib_s") + } + for _, r := range rows { + fmt.Fprintf(f, "%s\t%d\t%.2f\t%.3f\t%.1f\n", + r.path, r.bytes, float64(r.bytes)/(1<<30), r.elapsed.Seconds(), mib(r.bytes, r.elapsed)) + } + fmt.Fprintf(f, "TOTAL\t%d\t%.2f\t%.3f\t%.1f\n", + totalBytes, float64(totalBytes)/(1<<30), totalNet.Seconds(), mib(totalBytes, totalNet)) + return nil +} + +func mib(b int64, d time.Duration) float64 { + if d <= 0 { + return 0 + } + return float64(b) / d.Seconds() / (1 << 20) +} + +// listTree paginates the Hub tree API and returns leaf (non-directory) entries. +func listTree(ctx context.Context, repo, rev, token string) ([]entry, error) { + segments := strings.Split(repo, "/") + for i, s := range segments { + segments[i] = url.PathEscape(s) + } + u := fmt.Sprintf("%s/api/models/%s/tree/%s?recursive=true", hub, strings.Join(segments, "/"), rev) + + var files []entry + for u != "" { + req, err := http.NewRequestWithContext(ctx, "GET", u, nil) + if err != nil { + return nil, err + } + if token != "" { + req.Header.Set("Authorization", "Bearer "+token) + } + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + if resp.StatusCode != http.StatusOK { + body, _ := io.ReadAll(resp.Body) + resp.Body.Close() + return nil, fmt.Errorf("tree %s: %s: %s", u, resp.Status, body) + } + var page []entry + if err := json.NewDecoder(resp.Body).Decode(&page); err != nil { + resp.Body.Close() + return nil, err + } + resp.Body.Close() + for _, e := range page { + if e.Type != "directory" && e.Type != "tree" { + files = append(files, e) + } + } + u = nextLink(resp.Header.Get("Link")) + } + return files, nil +} + +func nextLink(link string) string { + for _, part := range strings.Split(link, ",") { + if !strings.Contains(part, `rel="next"`) { + continue + } + part = strings.TrimSpace(part) + part = strings.TrimPrefix(part, "<") + if i := strings.Index(part, ">"); i >= 0 { + return part[:i] + } + } + return "" +} diff --git a/bench/netread/run.sh b/bench/netread/run.sh new file mode 100755 index 0000000..30915a8 --- /dev/null +++ b/bench/netread/run.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash +# netread: raw network download throughput (no disk). Streams every file in a +# Hugging Face repo to io.Discard, reports per-file and total MiB/s. +# Results -> $OUT_BASE/netread.tsv +set -euo pipefail +OUT_BASE="${OUT_BASE:-/mnt/large/modelwrap-bench}" +MODEL="${MODEL:-Qwen/Qwen2.5-72B-Instruct}" +REVISION="${REVISION:-main}" +mkdir -p "$OUT_BASE" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +( cd "$SCRIPT_DIR/.." && go build -o "$OUT_BASE/netread" ./netread ) +"$OUT_BASE/netread" --repo "$MODEL" --revision "$REVISION" --results "$OUT_BASE/netread.tsv" +echo "results: $OUT_BASE/netread.tsv" diff --git a/bench/run.sh b/bench/run.sh deleted file mode 100755 index de89905..0000000 --- a/bench/run.sh +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env bash -# Benchmark: HF CLI (Xet) vs naive Go HTTP download of a Hugging Face model. -# -# Measures raw download throughput for the two approaches modelwrap could use: -# 1. hf download from huggingface_hub[hf_xet] — the current approach -# 2. ./naive — a stdlib-only Go HTTP downloader (no Python, no Xet) -# -# Each iteration downloads to a fresh directory with a clean cache, so we -# measure network transfer, not Xet dedup of already-present chunks. -set -euo pipefail - -MODEL="${MODEL:-Qwen/Qwen2.5-72B-Instruct}" -REVISION="${REVISION:-main}" -ITERATIONS="${ITERATIONS:-2}" -OUT_BASE="${OUT_BASE:-/mnt/large/modelwrap-bench}" -HF_VENV="${HF_VENV:-$HOME/.hf-venv}" - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - -mkdir -p "$OUT_BASE" -RESULTS="$OUT_BASE/results.tsv" -printf "method\titer\tseconds\tbytes\tgib\tmib_per_s\n" > "$RESULTS" - -now() { date +%s.%N; } -delta() { awk -v a="$1" -v b="$2" 'BEGIN{printf "%.3f", b-a}'; } - -record() { # method iter start end dir - local secs; secs=$(delta "$3" "$4") - local bytes; bytes=$(du -sb "$5" | cut -f1) - local gib; gib=$(awk -v b="$bytes" 'BEGIN{printf "%.2f", b/1073741824}') - local mibps; mibps=$(awk -v b="$bytes" -v s="$secs" 'BEGIN{printf "%.1f", b/1048576/s}') - printf "%s\t%s\t%s\t%s\t%s\t%s\n" "$1" "$2" "$secs" "$bytes" "$gib" "$mibps" | tee -a "$RESULTS" -} - -echo "Building naive downloader..." -( cd "$SCRIPT_DIR" && go build -o "$OUT_BASE/naive" ./naive ) - -if [ ! -x "$HF_VENV/bin/hf" ]; then - echo "Creating HF venv at $HF_VENV ..." - python3 -m venv "$HF_VENV" - "$HF_VENV/bin/pip" install --upgrade pip - "$HF_VENV/bin/pip" install "huggingface_hub[hf_xet]" -fi -echo "HF CLI version: $("$HF_VENV/bin/hf" --version)" - -for i in $(seq 1 "$ITERATIONS"); do - out="$OUT_BASE/hf-$i" - cache="$OUT_BASE/hf-cache-$i" - rm -rf "$out" "$cache" - echo -e "\n=== hf download (Xet) iter $i ===" - s=$(now) - HF_HOME="$cache" "$HF_VENV/bin/hf" download "$MODEL" --revision "$REVISION" --local-dir "$out" - e=$(now) - record hf-cli "$i" "$s" "$e" "$out" - rm -rf "$out" "$cache" -done - -for i in $(seq 1 "$ITERATIONS"); do - out="$OUT_BASE/naive-$i" - rm -rf "$out" - echo -e "\n=== naive Go iter $i ===" - s=$(now) - "$OUT_BASE/naive" --repo "$MODEL" --revision "$REVISION" --out "$out" - e=$(now) - record naive "$i" "$s" "$e" "$out" - rm -rf "$out" -done - -echo -e "\n=== RESULTS ===" -cat "$RESULTS" diff --git a/writeup.md b/writeup.md index 9a11232..b501509 100644 --- a/writeup.md +++ b/writeup.md @@ -1,110 +1,32 @@ -# HF CLI vs Naive Go Download Benchmark +# HF CLI vs Naive Go Download ## Question -modelwrap downloads models today by shelling out to `hf download` from +modelwrap downloads models by shelling out to `hf download` from `huggingface_hub[hf_xet]` (`wrap/wrap.go`). That pulls Python, -`huggingface_hub`, the `hf_xet` plugin, and their full transitive -dependency tree into the packer container — a sizable supply-chain surface -for a tool whose whole point is reproducibility and trust. +`huggingface_hub`, the `hf_xet` plugin, and their full transitive deps into +the packer container — a sizable supply-chain surface for a tool whose +point is reproducibility and trust. -The question: how much download throughput do we actually get from the Xet -stack, and could a stdlib-only Go downloader replace it without giving up -speed? +Do we actually need the Xet stack for fast large-model downloads, or could +a stdlib-only Go downloader replace it? -## Setup +## Benchmarks -- **Host:** `inf8.tinfoil.sh` — 2.0 TiB RAM, no GPU, Go 1.24.4, downloads - written to `/mnt/large` (25 TB RAID, 3.2 TB free) -- **Model:** `Qwen/Qwen2.5-72B-Instruct` (revision `main`) — open, Xet-backed, - 47 files, **135.44 GiB** total. Large enough that Xet's chunking/dedup - has room to help. -- **hf-cli:** `huggingface_hub[hf_xet]` v1.20.1 (`hf-xet` 1.5.1), installed - in an isolated venv. Xet confirmed active. -- **naive:** `bench/naive/main.go` — Go standard library only (no external - dependencies, no Python). Lists the repo file tree via the Hub API, then - GETs each `resolve` URL concurrently (8 workers), one TCP connection per - file, no byte-range chunking. +Three small benchmarks in `bench/`, each its own program + run script, each +writing a TSV you can `rsync` off the bench host: -## Methodology +- `bench/diskwrite` — raw disk write throughput (no network). +- `bench/netread` — raw network download throughput to `io.Discard` (no disk). +- `bench/naive` — the real stdlib downloader, sequential, with per-file + network and disk timing separated. -Each iteration downloaded the full model to a **fresh directory with a -clean cache**, so both methods measured raw network transfer — not Xet -chunk dedup of already-present data. After each iteration the download was -deleted before the next run. - -- 2 iterations per method. -- Throughput = total bytes downloaded / wall time, measured from process - start to completion. -- Both methods ran unauthenticated (no `HF_TOKEN`), since the model is open. - -Harness: `bench/run.sh` (builds the naive binary, creates the HF venv on -first run, loops iterations, records results to `results.tsv`). - -## Results - -| method | iter | seconds | GiB | MiB/s | -| ------ | ---- | ------- | ------ | ------ | -| hf-cli | 1 | 130.8 | 135.44 | 1060.7 | -| hf-cli | 2 | 119.7 | 135.44 | 1158.4 | -| naive | 1 | 102.9 | 135.44 | 1347.3 | -| naive | 2 | 75.7 | 135.44 | 1832.1 | - -**Average throughput:** - -- hf-cli (Xet): **~1110 MiB/s** -- naive Go: **~1590 MiB/s** - -The naive Go downloader was faster on **every iteration** — roughly 27–58% -faster depending on the comparison, and ~43% faster on average. - -## Notes and caveats - -- **Xet was active.** `hf-xet` 1.5.1 was installed in the venv; the HF CLI - ran with its default Xet-backed transfer path. The naive path used plain - HTTPS `resolve` redirects to the CDN. -- **Network variance.** The two naive runs (1347 vs 1832 MiB/s) and the two - hf-cli runs (1061 vs 1158 MiB/s) both show real variance, consistent with - shared-internet conditions. The ordering (naive > hf-cli) held across all - runs. -- **naive does less work.** It does not verify SHA256 of LFS blobs (the - hf CLI does), and it does not do byte-range chunking of large shards. - Less work, but also less safe on integrity — a tradeoff to call out. If - integrity matters for the production path, Go could add SHA256 checks - cheaply without the Python stack. -- **naive uses one connection per file** with bounded concurrency over - files (8). Xet's main throughput lever is parallel byte-range chunking of - individual large shards. Despite not doing that, naive still won — likely - because the CDN serves `resolve` URLs fast enough that per-file - parallelism saturates the link on a 2 TiB-RAM box with no other contention. -- **Pagination was unused.** The naive downloader includes RFC-8288 - `Link`-header pagination for the tree API, but Qwen 72B has 47 files - (under the ~1000-file page limit), so it ran a single page. Pagination - would matter for repos with thousands of files. - -## Conclusion - -For this model and host, the stdlib-only Go downloader was consistently -faster than the Xet-backed HF CLI while carrying none of its supply-chain -weight (no Python, no `huggingface_hub`, no `hf_xet`, no transitive deps). - -That inverts the usual assumption that you need Xet for fast large-model -downloads: here, plain HTTPS `resolve` fetches with file-level -concurrency saturated the available bandwidth more effectively than the -Xet stack did. Dropping the `hf` CLI from modelwrap's packer would shrink -the supply chain without costing throughput — the remaining work would be -adding SHA256 verification (and possibly range requests for very large -single shards) in Go. - -## Reproducing +Run on `inf8.tinfoil.sh` (downloads to `/mnt/large`): ```bash -# on inf8 (or any box with Go + python3 and a writable /mnt/large) -ITERATIONS=2 WORKERS=8 bash bench/run.sh +OUT_BASE=/mnt/large/modelwrap-bench bash bench/diskwrite/run.sh +OUT_BASE=/mnt/large/modelwrap-bench bash bench/netread/run.sh +OUT_BASE=/mnt/large/modelwrap-bench bash bench/naive/run.sh ``` -Code lives in `bench/`: - -- `bench/naive/main.go` — the naive stdlib-only downloader -- `bench/run.sh` — the benchmark harness -- `bench/README.md` — quick reference +Results land in `$OUT_BASE/{diskwrite,netread,naive}.tsv`. From a6654987c06576abecec5bfc8ff239b91e17921c Mon Sep 17 00:00:00 2001 From: Daniel MS Date: Sat, 20 Jun 2026 18:02:32 -0700 Subject: [PATCH 5/5] benchmarks! --- bench/naive/main.go | 5 +++- bench/results/diskwrite.tsv | 2 ++ bench/results/naive.tsv | 49 +++++++++++++++++++++++++++++++++++++ bench/results/netread.tsv | 49 +++++++++++++++++++++++++++++++++++++ 4 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 bench/results/diskwrite.tsv create mode 100644 bench/results/naive.tsv create mode 100644 bench/results/netread.tsv diff --git a/bench/naive/main.go b/bench/naive/main.go index 93b094f..0a1852b 100644 --- a/bench/naive/main.go +++ b/bench/naive/main.go @@ -124,7 +124,10 @@ func fetchFile(ctx context.Context, repo, rev, path, out, token string, doSync b err = fmt.Errorf("%s: %s", path, resp.Status) return } - buf := bytes.NewBuffer(make([]byte, 0, resp.ContentLength)) + buf := &bytes.Buffer{} + if resp.ContentLength > 0 { + buf.Grow(int(resp.ContentLength)) + } n, err = io.Copy(buf, resp.Body) resp.Body.Close() netT = time.Since(netStart) diff --git a/bench/results/diskwrite.tsv b/bench/results/diskwrite.tsv new file mode 100644 index 0000000..5e4b941 --- /dev/null +++ b/bench/results/diskwrite.tsv @@ -0,0 +1,2 @@ +bytes gib write_s sync_s total_s write_mib_s total_mib_s +10737418240 10.00 2.556 8.278 10.834 4007.0 945.2 diff --git a/bench/results/naive.tsv b/bench/results/naive.tsv new file mode 100644 index 0000000..19586ff --- /dev/null +++ b/bench/results/naive.tsv @@ -0,0 +1,49 @@ +file bytes gib net_s disk_s net_mib_s disk_mib_s +.gitattributes 1519 0.00 0.055 0.000 0.0 30.4 +LICENSE 6962 0.00 0.049 0.000 0.1 239.1 +README.md 6259 0.00 0.050 0.000 0.1 152.1 +config.json 663 0.00 0.049 0.000 0.0 31.4 +generation_config.json 242 0.00 0.054 0.000 0.0 3.3 +merges.txt 1671839 0.00 0.070 0.001 22.6 2778.1 +model-00001-of-00037.safetensors 3762345336 3.50 9.660 1.490 371.4 2408.1 +model-00002-of-00037.safetensors 3995200440 3.72 12.201 1.516 312.3 2513.4 +model-00003-of-00037.safetensors 3812769392 3.55 11.746 1.442 309.6 2522.0 +model-00004-of-00037.safetensors 3995183944 3.72 11.722 1.369 325.0 2782.4 +model-00005-of-00037.safetensors 3995183944 3.72 10.174 1.780 374.5 2140.4 +model-00006-of-00037.safetensors 3995200456 3.72 11.618 1.491 328.0 2555.0 +model-00007-of-00037.safetensors 3812769424 3.55 10.043 1.437 362.0 2529.7 +model-00008-of-00037.safetensors 3995183968 3.72 11.134 1.795 342.2 2123.1 +model-00009-of-00037.safetensors 3995183968 3.72 8.783 1.177 433.8 3238.5 +model-00010-of-00037.safetensors 3995200464 3.72 10.017 1.184 380.4 3217.2 +model-00011-of-00037.safetensors 3812769424 3.55 10.793 1.433 336.9 2538.3 +model-00012-of-00037.safetensors 3995183968 3.72 11.402 1.498 334.1 2543.9 +model-00013-of-00037.safetensors 3995183968 3.72 8.812 1.167 432.4 3264.8 +model-00014-of-00037.safetensors 3995200464 3.72 11.351 1.345 335.7 2832.5 +model-00015-of-00037.safetensors 3812769424 3.55 10.628 1.710 342.1 2126.6 +model-00016-of-00037.safetensors 3995183968 3.72 10.991 1.494 346.7 2550.4 +model-00017-of-00037.safetensors 3995183968 3.72 9.048 1.491 421.1 2555.5 +model-00018-of-00037.safetensors 3995200464 3.72 11.361 2.854 335.4 1335.2 +model-00019-of-00037.safetensors 3812769424 3.55 9.965 1.617 364.9 2248.1 +model-00020-of-00037.safetensors 3995183968 3.72 38.757 1.505 98.3 2532.4 +model-00021-of-00037.safetensors 3995183968 3.72 9.741 1.537 391.1 2479.3 +model-00022-of-00037.safetensors 3995200464 3.72 9.919 1.490 384.1 2556.7 +model-00023-of-00037.safetensors 3812769424 3.55 7.871 1.441 462.0 2523.5 +model-00024-of-00037.safetensors 3995183968 3.72 9.773 1.497 389.8 2544.3 +model-00025-of-00037.safetensors 3995183968 3.72 9.940 1.492 383.3 2554.3 +model-00026-of-00037.safetensors 3995200464 3.72 10.196 1.174 373.7 3245.2 +model-00027-of-00037.safetensors 3812769424 3.55 7.749 1.335 469.3 2723.6 +model-00028-of-00037.safetensors 3995183968 3.72 9.940 1.138 383.3 3348.6 +model-00029-of-00037.safetensors 3995183968 3.72 10.419 1.627 365.7 2342.0 +model-00030-of-00037.safetensors 3995200464 3.72 10.123 1.484 376.4 2567.8 +model-00031-of-00037.safetensors 3812769424 3.55 8.347 1.409 435.6 2580.6 +model-00032-of-00037.safetensors 3995183968 3.72 9.513 1.498 400.5 2542.9 +model-00033-of-00037.safetensors 3995183968 3.72 9.012 1.477 422.8 2578.8 +model-00034-of-00037.safetensors 3995200464 3.72 10.911 1.183 349.2 3220.2 +model-00035-of-00037.safetensors 3812769424 3.55 9.072 1.530 400.8 2377.3 +model-00036-of-00037.safetensors 3995183968 3.72 10.212 1.153 373.1 3303.5 +model-00037-of-00037.safetensors 3460317640 3.22 10.150 1.293 325.1 2551.6 +model.safetensors.index.json 79025 0.00 0.082 0.000 0.9 768.9 +tokenizer.json 7031645 0.01 0.104 0.002 64.6 2977.2 +tokenizer_config.json 7305 0.00 0.049 0.000 0.1 202.2 +vocab.json 2776833 0.00 0.077 0.001 34.6 2821.2 +TOTAL 145424101604 135.44 403.734 54.557 343.5 2542.1 diff --git a/bench/results/netread.tsv b/bench/results/netread.tsv new file mode 100644 index 0000000..50ab8b3 --- /dev/null +++ b/bench/results/netread.tsv @@ -0,0 +1,49 @@ +file bytes gib net_s net_mib_s +.gitattributes 1519 0.00 0.054 0.0 +LICENSE 6962 0.00 0.052 0.1 +README.md 6259 0.00 0.052 0.1 +config.json 663 0.00 0.051 0.0 +generation_config.json 242 0.00 0.054 0.0 +merges.txt 1671839 0.00 0.067 23.8 +model-00001-of-00037.safetensors 3762345336 3.50 9.284 386.5 +model-00002-of-00037.safetensors 3995200440 3.72 8.611 442.5 +model-00003-of-00037.safetensors 3812769392 3.55 9.153 397.3 +model-00004-of-00037.safetensors 3995183944 3.72 9.344 407.7 +model-00005-of-00037.safetensors 3995183944 3.72 9.703 392.7 +model-00006-of-00037.safetensors 3995200456 3.72 8.694 438.2 +model-00007-of-00037.safetensors 3812769424 3.55 8.934 407.0 +model-00008-of-00037.safetensors 3995183968 3.72 9.010 422.9 +model-00009-of-00037.safetensors 3995183968 3.72 9.717 392.1 +model-00010-of-00037.safetensors 3995200464 3.72 8.204 464.4 +model-00011-of-00037.safetensors 3812769424 3.55 8.792 413.6 +model-00012-of-00037.safetensors 3995183968 3.72 9.383 406.1 +model-00013-of-00037.safetensors 3995183968 3.72 9.704 392.6 +model-00014-of-00037.safetensors 3995200464 3.72 9.138 417.0 +model-00015-of-00037.safetensors 3812769424 3.55 8.983 404.8 +model-00016-of-00037.safetensors 3995183968 3.72 37.839 100.7 +model-00017-of-00037.safetensors 3995183968 3.72 9.246 412.1 +model-00018-of-00037.safetensors 3995200464 3.72 11.037 345.2 +model-00019-of-00037.safetensors 3812769424 3.55 9.158 397.1 +model-00020-of-00037.safetensors 3995183968 3.72 64.033 59.5 +model-00021-of-00037.safetensors 3995183968 3.72 9.266 411.2 +model-00022-of-00037.safetensors 3995200464 3.72 9.361 407.0 +model-00023-of-00037.safetensors 3812769424 3.55 8.369 434.5 +model-00024-of-00037.safetensors 3995183968 3.72 9.082 419.5 +model-00025-of-00037.safetensors 3995183968 3.72 8.912 427.5 +model-00026-of-00037.safetensors 3995200464 3.72 8.468 449.9 +model-00027-of-00037.safetensors 3812769424 3.55 8.361 434.9 +model-00028-of-00037.safetensors 3995183968 3.72 8.180 465.8 +model-00029-of-00037.safetensors 3995183968 3.72 8.403 453.4 +model-00030-of-00037.safetensors 3995200464 3.72 8.496 448.5 +model-00031-of-00037.safetensors 3812769424 3.55 9.085 400.2 +model-00032-of-00037.safetensors 3995183968 3.72 9.307 409.4 +model-00033-of-00037.safetensors 3995183968 3.72 42.116 90.5 +model-00034-of-00037.safetensors 3995200464 3.72 9.877 385.8 +model-00035-of-00037.safetensors 3812769424 3.55 8.772 414.5 +model-00036-of-00037.safetensors 3995183968 3.72 8.523 447.0 +model-00037-of-00037.safetensors 3460317640 3.22 7.673 430.1 +model.safetensors.index.json 79025 0.00 0.050 1.5 +tokenizer.json 7031645 0.01 0.227 29.5 +tokenizer_config.json 7305 0.00 0.048 0.1 +vocab.json 2776833 0.00 0.073 36.2 +TOTAL 145424101604 135.44 450.948 307.5