From 09abe065774fa66a247ca7884123497018e32f18 Mon Sep 17 00:00:00 2001
From: Daniel MS <danielmccannsayles@gmail.com>
Date: Fri, 19 Jun 2026 20:07:49 -0700
Subject: [PATCH 1/5] first pass (slop)

---
 bench/README.md     |  35 ++++++++
 bench/go.mod        |   3 +
 bench/naive/main.go | 192 ++++++++++++++++++++++++++++++++++++++++++++
 bench/run.sh        |  71 ++++++++++++++++
 writeup.md          | 110 +++++++++++++++++++++++++
 5 files changed, 411 insertions(+)
 create mode 100644 bench/README.md
 create mode 100644 bench/go.mod
 create mode 100644 bench/naive/main.go
 create mode 100755 bench/run.sh
 create mode 100644 writeup.md
diff --git a/bench/README.md b/bench/README.md
new file mode 100644
index 0000000..5409367
--- /dev/null
+++ b/bench/README.md
@@ -0,0 +1,35 @@
+# Download Benchmark
+
+Compares two ways to fetch a Hugging Face model, which is the question
+behind dropping the `hf` CLI from modelwrap's supply chain:
+
+1. **hf-cli** — `hf download` from `huggingface_hub[hf_xet]`. This is what
+   modelwrap does today (`wrap/wrap.go`). Pulls in Python + huggingface_hub
+   + the hf_xet plugin and their full transitive dependency tree.
+2. **naive** — `bench/naive/main.go`, a stdlib-only Go program. Lists the
+   repo tree via the Hub API and GETs each `resolve` URL over plain HTTPS.
+   Zero external dependencies, no Python.
+
+Each iteration downloads to a fresh directory with a clean cache, so both
+methods measure raw network transfer (no Xet chunk dedup across runs).
+
+## Run
+
+```bash
+# on a box with Go + python3, downloads land on /mnt/large
+ITERATIONS=2 WORKERS=8 bash bench/run.sh
+```
+
+Results are written to `$OUT_BASE/results.tsv` (tab-separated):
+`method iter seconds bytes gib mib_per_s`.
+
+## Notes
+
+- **naive** uses one TCP connection per file with bounded concurrency over
+  files (default 8). It does not do byte-range chunking of individual large
+  shards, which is Xet's main throughput lever. If naive is close, the Xet
+  stack isn't worth its supply-chain cost; if not, Go could add range
+  requests without the Python dependency.
+- **naive** does not verify SHA256 of LFS blobs (the hf CLI does). That is
+  less work, but also less safe — a tradeoff to call out.
+- Model: `Qwen/Qwen2.5-72B-Instruct` (~145 GiB, open, Xet-backed).
diff --git a/bench/go.mod b/bench/go.mod
new file mode 100644
index 0000000..7822515
--- /dev/null
+++ b/bench/go.mod
@@ -0,0 +1,3 @@
+module github.com/tinfoilsh/modelwrap/bench
+
+go 1.22.0
diff --git a/bench/naive/main.go b/bench/naive/main.go
new file mode 100644
index 0000000..c3b397b
--- /dev/null
+++ b/bench/naive/main.go
@@ -0,0 +1,192 @@
+// Command naive downloads a Hugging Face model using only the Go standard
+// library: it lists the repo file tree via the Hub API and fetches each
+// file over plain HTTPS (following the resolve redirects to the CDN).
+//
+// It is the "no supply chain" baseline against the official hf CLI
+// (huggingface_hub + hf_xet), which modelwrap currently shells out to.
+// No Python, no huggingface_hub, no Xet plugin — just HTTP.
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"net/url"
+	"os"
+	"path/filepath"
+	"strings"
+	"sync"
+	"time"
+)
+
+type entry struct {
+	Type string `json:"type"`
+	Path string `json:"path"`
+	Size int64  `json:"size"`
+}
+
+const hub = "https://huggingface.co"
+
+func main() {
+	repo := flag.String("repo", "", "Hugging Face repo id, e.g. Qwen/Qwen2.5-72B-Instruct")
+	rev := flag.String("revision", "main", "revision (branch or commit)")
+	out := flag.String("out", "", "output directory")
+	workers := flag.Int("workers", 8, "concurrent file downloads")
+	flag.Parse()
+
+	if *repo == "" || *out == "" {
+		log.Fatal("usage: naive --repo <org/name> --out <dir> [--revision main] [--workers 8]")
+	}
+
+	token := os.Getenv("HF_TOKEN")
+
+	start := time.Now()
+	total, n, err := run(context.Background(), *repo, *rev, *out, *workers, token)
+	if err != nil {
+		log.Fatalf("download failed after %d files: %v", n, err)
+	}
+	elapsed := time.Since(start)
+
+	gib := float64(total) / (1 << 30)
+	fmt.Printf("naive: files=%d bytes=%d (%.2f GiB) time=%.2fs throughput=%.1f MiB/s\n",
+		n, total, gib, elapsed.Seconds(), float64(total)/elapsed.Seconds()/(1<<20))
+}
+
+func run(ctx context.Context, repo, rev, out string, workers int, token string) (int64, int, error) {
+	files, err := listTree(ctx, repo, rev, token)
+	if err != nil {
+		return 0, 0, fmt.Errorf("list tree: %w", err)
+	}
+	log.Printf("listed %d files", len(files))
+
+	sem := make(chan struct{}, workers)
+	var wg sync.WaitGroup
+	var (
+		mu       sync.Mutex
+		total    int64
+		done     int
+		firstErr error
+	)
+
+	for _, f := range files {
+		wg.Add(1)
+		go func(f entry) {
+			defer wg.Done()
+			sem <- struct{}{}
+			defer func() { <-sem }()
+
+			n, err := fetchFile(ctx, repo, rev, f.Path, out, token)
+			mu.Lock()
+			total += n
+			done++
+			if err != nil && firstErr == nil {
+				firstErr = err
+			}
+			mu.Unlock()
+			if done%10 == 0 {
+				log.Printf("  %d/%d files done", done, len(files))
+			}
+		}(f)
+	}
+	wg.Wait()
+	return total, done, firstErr
+}
+
+// listTree paginates the Hub tree API and returns leaf (non-directory) entries.
+func listTree(ctx context.Context, repo, rev, token string) ([]entry, error) {
+	segments := strings.Split(repo, "/")
+	for i, s := range segments {
+		segments[i] = url.PathEscape(s)
+	}
+	u := fmt.Sprintf("%s/api/models/%s/tree/%s?recursive=true", hub, strings.Join(segments, "/"), rev)
+
+	var files []entry
+	for u != "" {
+		req, err := http.NewRequestWithContext(ctx, "GET", u, nil)
+		if err != nil {
+			return nil, err
+		}
+		if token != "" {
+			req.Header.Set("Authorization", "Bearer "+token)
+		}
+		resp, err := http.DefaultClient.Do(req)
+		if err != nil {
+			return nil, err
+		}
+		if resp.StatusCode != http.StatusOK {
+			body, _ := io.ReadAll(resp.Body)
+			resp.Body.Close()
+			return nil, fmt.Errorf("tree %s: %s: %s", u, resp.Status, body)
+		}
+		var page []entry
+		if err := json.NewDecoder(resp.Body).Decode(&page); err != nil {
+			resp.Body.Close()
+			return nil, err
+		}
+		resp.Body.Close()
+		for _, e := range page {
+			if e.Type != "directory" && e.Type != "tree" {
+				files = append(files, e)
+			}
+		}
+		u = nextLink(resp.Header.Get("Link"))
+	}
+	return files, nil
+}
+
+// nextLink extracts the rel="next" URL from an RFC 8288 Link header.
+func nextLink(link string) string {
+	for _, part := range strings.Split(link, ",") {
+		if !strings.Contains(part, `rel="next"`) {
+			continue
+		}
+		part = strings.TrimSpace(part)
+		part = strings.TrimPrefix(part, "<")
+		if i := strings.Index(part, ">"); i >= 0 {
+			return part[:i]
+		}
+	}
+	return ""
+}
+
+func fetchFile(ctx context.Context, repo, rev, path, out, token string) (int64, error) {
+	dest := filepath.Join(out, path)
+	if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
+		return 0, err
+	}
+	req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("%s/%s/resolve/%s/%s", hub, repo, rev, path), nil)
+	if err != nil {
+		return 0, err
+	}
+	if token != "" {
+		req.Header.Set("Authorization", "Bearer "+token)
+	}
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		return 0, err
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		return 0, fmt.Errorf("%s: %s", path, resp.Status)
+	}
+	tmp := dest + ".part"
+	f, err := os.Create(tmp)
+	if err != nil {
+		return 0, err
+	}
+	n, err := io.Copy(f, resp.Body)
+	if err != nil {
+		f.Close()
+		os.Remove(tmp)
+		return n, err
+	}
+	if err := f.Close(); err != nil {
+		os.Remove(tmp)
+		return n, err
+	}
+	return n, os.Rename(tmp, dest)
+}
diff --git a/bench/run.sh b/bench/run.sh
new file mode 100755
index 0000000..3ce8209
--- /dev/null
+++ b/bench/run.sh
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+# Benchmark: HF CLI (Xet) vs naive Go HTTP download of a Hugging Face model.
+#
+# Measures raw download throughput for the two approaches modelwrap could use:
+#   1. hf download from huggingface_hub[hf_xet] — the current approach
+#   2. ./naive — a stdlib-only Go HTTP downloader (no Python, no Xet)
+#
+# Each iteration downloads to a fresh directory with a clean cache, so we
+# measure network transfer, not Xet dedup of already-present chunks.
+set -euo pipefail
+
+MODEL="${MODEL:-Qwen/Qwen2.5-72B-Instruct}"
+REVISION="${REVISION:-main}"
+ITERATIONS="${ITERATIONS:-2}"
+WORKERS="${WORKERS:-8}"
+OUT_BASE="${OUT_BASE:-/mnt/large/modelwrap-bench}"
+HF_VENV="${HF_VENV:-$HOME/.hf-venv}"
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
+mkdir -p "$OUT_BASE"
+RESULTS="$OUT_BASE/results.tsv"
+printf "method\titer\tseconds\tbytes\tgib\tmib_per_s\n" > "$RESULTS"
+
+now()   { date +%s.%N; }
+delta() { awk -v a="$1" -v b="$2" 'BEGIN{printf "%.3f", b-a}'; }
+
+record() { # method iter start end dir
+	local secs; secs=$(delta "$3" "$4")
+	local bytes; bytes=$(du -sb "$5" | cut -f1)
+	local gib; gib=$(awk -v b="$bytes" 'BEGIN{printf "%.2f", b/1073741824}')
+	local mibps; mibps=$(awk -v b="$bytes" -v s="$secs" 'BEGIN{printf "%.1f", b/1048576/s}')
+	printf "%s\t%s\t%s\t%s\t%s\t%s\n" "$1" "$2" "$secs" "$bytes" "$gib" "$mibps" | tee -a "$RESULTS"
+}
+
+echo "Building naive downloader..."
+( cd "$SCRIPT_DIR" && go build -o "$OUT_BASE/naive" ./naive )
+
+if [ ! -x "$HF_VENV/bin/hf" ]; then
+	echo "Creating HF venv at $HF_VENV ..."
+	python3 -m venv "$HF_VENV"
+	"$HF_VENV/bin/pip" install --upgrade pip
+	"$HF_VENV/bin/pip" install "huggingface_hub[hf_xet]"
+fi
+echo "HF CLI version: $("$HF_VENV/bin/hf" --version)"
+
+for i in $(seq 1 "$ITERATIONS"); do
+	out="$OUT_BASE/hf-$i"
+	cache="$OUT_BASE/hf-cache-$i"
+	rm -rf "$out" "$cache"
+	echo -e "\n=== hf download (Xet) iter $i ==="
+	s=$(now)
+	HF_HOME="$cache" "$HF_VENV/bin/hf" download "$MODEL" --revision "$REVISION" --local-dir "$out"
+	e=$(now)
+	record hf-cli "$i" "$s" "$e" "$out"
+	rm -rf "$out" "$cache"
+done
+
+for i in $(seq 1 "$ITERATIONS"); do
+	out="$OUT_BASE/naive-$i"
+	rm -rf "$out"
+	echo -e "\n=== naive Go iter $i ==="
+	s=$(now)
+	"$OUT_BASE/naive" --repo "$MODEL" --revision "$REVISION" --out "$out" --workers "$WORKERS"
+	e=$(now)
+	record naive "$i" "$s" "$e" "$out"
+	rm -rf "$out"
+done
+
+echo -e "\n=== RESULTS ==="
+cat "$RESULTS"
diff --git a/writeup.md b/writeup.md
new file mode 100644
index 0000000..9a11232
--- /dev/null
+++ b/writeup.md
@@ -0,0 +1,110 @@
+# HF CLI vs Naive Go Download Benchmark
+
+## Question
+
+modelwrap downloads models today by shelling out to `hf download` from
+`huggingface_hub[hf_xet]` (`wrap/wrap.go`). That pulls Python,
+`huggingface_hub`, the `hf_xet` plugin, and their full transitive
+dependency tree into the packer container — a sizable supply-chain surface
+for a tool whose whole point is reproducibility and trust.
+
+The question: how much download throughput do we actually get from the Xet
+stack, and could a stdlib-only Go downloader replace it without giving up
+speed?
+
+## Setup
+
+- **Host:** `inf8.tinfoil.sh` — 2.0 TiB RAM, no GPU, Go 1.24.4, downloads
+  written to `/mnt/large` (25 TB RAID, 3.2 TB free)
+- **Model:** `Qwen/Qwen2.5-72B-Instruct` (revision `main`) — open, Xet-backed,
+  47 files, **135.44 GiB** total. Large enough that Xet's chunking/dedup
+  has room to help.
+- **hf-cli:** `huggingface_hub[hf_xet]` v1.20.1 (`hf-xet` 1.5.1), installed
+  in an isolated venv. Xet confirmed active.
+- **naive:** `bench/naive/main.go` — Go standard library only (no external
+  dependencies, no Python). Lists the repo file tree via the Hub API, then
+  GETs each `resolve` URL concurrently (8 workers), one TCP connection per
+  file, no byte-range chunking.
+
+## Methodology
+
+Each iteration downloaded the full model to a **fresh directory with a
+clean cache**, so both methods measured raw network transfer — not Xet
+chunk dedup of already-present data. After each iteration the download was
+deleted before the next run.
+
+- 2 iterations per method.
+- Throughput = total bytes downloaded / wall time, measured from process
+  start to completion.
+- Both methods ran unauthenticated (no `HF_TOKEN`), since the model is open.
+
+Harness: `bench/run.sh` (builds the naive binary, creates the HF venv on
+first run, loops iterations, records results to `results.tsv`).
+
+## Results
+
+| method | iter | seconds | GiB    | MiB/s  |
+| ------ | ---- | ------- | ------ | ------ |
+| hf-cli | 1    | 130.8   | 135.44 | 1060.7 |
+| hf-cli | 2    | 119.7   | 135.44 | 1158.4 |
+| naive  | 1    | 102.9   | 135.44 | 1347.3 |
+| naive  | 2    | 75.7    | 135.44 | 1832.1 |
+
+**Average throughput:**
+
+- hf-cli (Xet): **~1110 MiB/s**
+- naive Go: **~1590 MiB/s**
+
+The naive Go downloader was faster on **every iteration** — roughly 27–58%
+faster depending on the comparison, and ~43% faster on average.
+
+## Notes and caveats
+
+- **Xet was active.** `hf-xet` 1.5.1 was installed in the venv; the HF CLI
+  ran with its default Xet-backed transfer path. The naive path used plain
+  HTTPS `resolve` redirects to the CDN.
+- **Network variance.** The two naive runs (1347 vs 1832 MiB/s) and the two
+  hf-cli runs (1061 vs 1158 MiB/s) both show real variance, consistent with
+  shared-internet conditions. The ordering (naive > hf-cli) held across all
+  runs.
+- **naive does less work.** It does not verify SHA256 of LFS blobs (the
+  hf CLI does), and it does not do byte-range chunking of large shards.
+  Less work, but also less safe on integrity — a tradeoff to call out. If
+  integrity matters for the production path, Go could add SHA256 checks
+  cheaply without the Python stack.
+- **naive uses one connection per file** with bounded concurrency over
+  files (8). Xet's main throughput lever is parallel byte-range chunking of
+  individual large shards. Despite not doing that, naive still won — likely
+  because the CDN serves `resolve` URLs fast enough that per-file
+  parallelism saturates the link on a 2 TiB-RAM box with no other contention.
+- **Pagination was unused.** The naive downloader includes RFC-8288
+  `Link`-header pagination for the tree API, but Qwen 72B has 47 files
+  (under the ~1000-file page limit), so it ran a single page. Pagination
+  would matter for repos with thousands of files.
+
+## Conclusion
+
+For this model and host, the stdlib-only Go downloader was consistently
+faster than the Xet-backed HF CLI while carrying none of its supply-chain
+weight (no Python, no `huggingface_hub`, no `hf_xet`, no transitive deps).
+
+That inverts the usual assumption that you need Xet for fast large-model
+downloads: here, plain HTTPS `resolve` fetches with file-level
+concurrency saturated the available bandwidth more effectively than the
+Xet stack did. Dropping the `hf` CLI from modelwrap's packer would shrink
+the supply chain without costing throughput — the remaining work would be
+adding SHA256 verification (and possibly range requests for very large
+single shards) in Go.
+
+## Reproducing
+
+```bash
+# on inf8 (or any box with Go + python3 and a writable /mnt/large)
+ITERATIONS=2 WORKERS=8 bash bench/run.sh
+```
+
+Code lives in `bench/`:
+
+- `bench/naive/main.go` — the naive stdlib-only downloader
+- `bench/run.sh` — the benchmark harness
+- `bench/README.md` — quick reference

From 51fe74bcb4b5edb87f7390ea19d38f75670b8ff5 Mon Sep 17 00:00:00 2001
From: Daniel MS <danielmccannsayles@gmail.com>
Date: Sat, 20 Jun 2026 13:42:04 -0700
Subject: [PATCH 2/5] verify xet

---
 bench/xet_probe.py | 162 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 162 insertions(+)
 create mode 100644 bench/xet_probe.py

diff --git a/bench/xet_probe.py b/bench/xet_probe.py
new file mode 100644
index 0000000..11f2f4d
--- /dev/null
+++ b/bench/xet_probe.py
@@ -0,0 +1,162 @@
+"""Definitively detect which download path huggingface_hub takes.
+
+Monkeypatches `xet_get` and `http_get` in file_download.py so we can see
+exactly which one is called for a single file, plus snapshots established
+TCP connections during the download to fingerprint the endpoint
+(Xet native CAS -> cas-server.xethub.hf.co ; bridge/CDN -> *.cloudfront.net).
+
+Runs twice: Xet enabled, then HF_HUB_DISABLE_XET=1, to see if disabling
+changes the path or the peers.
+"""
+
+import json
+import os
+import re
+import shutil
+import socket
+import subprocess
+import sys
+import threading
+import time
+import urllib.request
+
+REPO = os.environ.get("PROBE_REPO", "Qwen/Qwen2.5-0.5B-Instruct")
+FILE = os.environ.get("PROBE_FILE", "model.safetensors")
+REV = os.environ.get("PROBE_REV", "main")
+
+import huggingface_hub.file_download as fd  # noqa: E402
+
+_orig_xet = fd.xet_get if hasattr(fd, "xet_get") else None
+_orig_http = fd.http_get
+
+calls = {"xet_get": 0, "http_get": 0}
+
+
+def spy_xet(*a, **k):
+    calls["xet_get"] += 1
+    print("  >>> xet_get() CALLED (native Xet CAS protocol)", flush=True)
+    return _orig_xet(*a, **k)
+
+
+def spy_http(*a, **k):
+    calls["http_get"] += 1
+    print("  >>> http_get() CALLED (plain HTTPS / bridge redirect)", flush=True)
+    return _orig_http(*a, **k)
+
+
+if _orig_xet is not None:
+    fd.xet_get = spy_xet
+fd.http_get = spy_http
+
+from huggingface_hub import hf_hub_download  # noqa: E402
+from huggingface_hub.utils._runtime import is_xet_available  # noqa: E402
+
+
+def api(path):
+    return json.load(urllib.request.urlopen(f"https://huggingface.co{path}"))
+
+
+def xet_hash_present():
+    tree = api(f"/api/models/{REPO}/tree/{REV}?recursive=true")
+    for e in tree:
+        if e.get("path") == FILE:
+            return e.get("lfs", {}).get("oid"), "xetHash" in e, e.get("size")
+    return None, False, None
+
+
+def snapshot_peers(pid):
+    try:
+        out = subprocess.check_output(
+            ["ss", "-tnp"], text=True, stderr=subprocess.DEVNULL
+        )
+    except subprocess.CalledProcessError:
+        return {}
+    peers = {}
+    for line in out.splitlines():
+        if "ESTAB" not in line or f"pid={pid}" not in line:
+            continue
+        m = re.search(r"([\d.]+):(\d+)\s+([\d.]+):(\d+)", line)
+        if not m:
+            continue
+        peer = m.group(3)
+        if peer.startswith("127."):
+            continue
+        if peer not in peers:
+            try:
+                peers[peer] = socket.gethostbyaddr(peer)[0]
+            except socket.herror:
+                peers[peer] = "?"
+    return peers
+
+
+def run_once(label, disable_xet):
+    outdir = f"/tmp/xetprobe-{label}"
+    shutil.rmtree(outdir, ignore_errors=True)
+    os.makedirs(outdir)
+    cache = f"/tmp/xetprobe-cache-{label}"
+    shutil.rmtree(cache, ignore_errors=True)
+    env = dict(os.environ)
+    env["HF_HOME"] = cache
+    if disable_xet:
+        env["HF_HUB_DISABLE_XET"] = "1"
+    for k in ("HF_HUB_DISABLE_XET",):
+        os.environ[k] = env.get(k, "")
+    import huggingface_hub.constants as constants
+
+    constants.HF_HUB_DISABLE_XET = bool(disable_xet)
+
+    calls["xet_get"] = 0
+    calls["http_get"] = 0
+    print(f"\n=== {label} ===", flush=True)
+    print(f"  HF_HUB_DISABLE_XET={constants.HF_HUB_DISABLE_XET}", flush=True)
+
+    pid = os.getpid()
+    peers = {}
+    done = threading.Event()
+
+    def poll():
+        while not done.is_set():
+            peers.update(snapshot_peers(pid))
+            time.sleep(0.02)
+
+    t = threading.Thread(target=poll, daemon=True)
+    t.start()
+    start = time.time()
+    path = hf_hub_download(REPO, FILE, revision=REV, local_dir=outdir)
+    done.set()
+    elapsed = time.time() - start
+    size = os.path.getsize(path)
+    mibs = size / (1 << 20) / elapsed
+    print(f"  size={size} time={elapsed:.2f}s {mibs:.0f} MiB/s", flush=True)
+    print(
+        f"  calls: xet_get={calls['xet_get']} http_get={calls['http_get']}", flush=True
+    )
+    print(f"  peers ({len(peers)}):", flush=True)
+    for ip, host in sorted(peers.items()):
+        tag = ""
+        if "xethub" in host and "bridge" not in host:
+            tag = "  <-- XET NATIVE CAS"
+        elif "xethub" in host:
+            tag = "  <-- XET BRIDGE"
+        elif "cloudfront" in host:
+            tag = "  <-- CDN/CloudFront"
+        print(f"    {ip} {host}{tag}", flush=True)
+    shutil.rmtree(outdir, ignore_errors=True)
+    shutil.rmtree(cache, ignore_errors=True)
+    return elapsed, mibs
+
+
+def main():
+    oid, has_xet, size = xet_hash_present()
+    print(f"repo={REPO} file={FILE} rev={REV}")
+    print(f"  lfs.oid={oid}")
+    print(f"  size={size}")
+    print(f"  xetHash present: {has_xet}")
+    print(f"  hf_xet importable: {is_xet_available()}")
+    print(f"  huggingface_hub: {__import__('huggingface_hub').__version__}")
+    run_once("xet-enabled", disable_xet=False)
+    run_once("xet-disabled", disable_xet=True)
+
+
+if __name__ == "__main__":
+    main()

From 220aa1302d5b44c9af2d3824fb15c388860a438b Mon Sep 17 00:00:00 2001
From: Daniel MS <danielmccannsayles@gmail.com>
Date: Sat, 20 Jun 2026 14:44:31 -0700
Subject: [PATCH 3/5] make bench simpler

---
 bench/README.md     |  6 ++---
 bench/naive/main.go | 57 ++++++++++++++-------------------------------
 bench/run.sh        |  3 +--
 3 files changed, 22 insertions(+), 44 deletions(-)

diff --git a/bench/README.md b/bench/README.md
index 5409367..0d5f2cd 100644
--- a/bench/README.md
+++ b/bench/README.md
@@ -17,7 +17,7 @@ methods measure raw network transfer (no Xet chunk dedup across runs).
 
 ```bash
 # on a box with Go + python3, downloads land on /mnt/large
-ITERATIONS=2 WORKERS=8 bash bench/run.sh
+ITERATIONS=2 bash bench/run.sh
 ```
 
 Results are written to `$OUT_BASE/results.tsv` (tab-separated):
@@ -25,8 +25,8 @@ Results are written to `$OUT_BASE/results.tsv` (tab-separated):
 
 ## Notes
 
-- **naive** uses one TCP connection per file with bounded concurrency over
-  files (default 8). It does not do byte-range chunking of individual large
+- **naive** fetches files sequentially, one TCP connection per file, no
+  concurrency. It does not do byte-range chunking of individual large
   shards, which is Xet's main throughput lever. If naive is close, the Xet
   stack isn't worth its supply-chain cost; if not, Go could add range
   requests without the Python dependency.
diff --git a/bench/naive/main.go b/bench/naive/main.go
index c3b397b..aa63649 100644
--- a/bench/naive/main.go
+++ b/bench/naive/main.go
@@ -1,10 +1,12 @@
 // Command naive downloads a Hugging Face model using only the Go standard
 // library: it lists the repo file tree via the Hub API and fetches each
-// file over plain HTTPS (following the resolve redirects to the CDN).
+// file over plain HTTPS (following the resolve redirects to the CDN), one
+// file at a time.
 //
-// It is the "no supply chain" baseline against the official hf CLI
-// (huggingface_hub + hf_xet), which modelwrap currently shells out to.
-// No Python, no huggingface_hub, no Xet plugin — just HTTP.
+// It is the simplest possible "no supply chain" baseline against the
+// official hf CLI (huggingface_hub + hf_xet), which modelwrap currently
+// shells out to. No Python, no huggingface_hub, no Xet plugin, no
+// concurrency — just HTTP, sequentially.
 package main
 
 import (
@@ -19,7 +21,6 @@ import (
 	"os"
 	"path/filepath"
 	"strings"
-	"sync"
 	"time"
 )
 
@@ -35,17 +36,16 @@ func main() {
 	repo := flag.String("repo", "", "Hugging Face repo id, e.g. Qwen/Qwen2.5-72B-Instruct")
 	rev := flag.String("revision", "main", "revision (branch or commit)")
 	out := flag.String("out", "", "output directory")
-	workers := flag.Int("workers", 8, "concurrent file downloads")
 	flag.Parse()
 
 	if *repo == "" || *out == "" {
-		log.Fatal("usage: naive --repo <org/name> --out <dir> [--revision main] [--workers 8]")
+		log.Fatal("usage: naive --repo <org/name> --out <dir> [--revision main]")
 	}
 
 	token := os.Getenv("HF_TOKEN")
 
 	start := time.Now()
-	total, n, err := run(context.Background(), *repo, *rev, *out, *workers, token)
+	total, n, err := run(context.Background(), *repo, *rev, *out, token)
 	if err != nil {
 		log.Fatalf("download failed after %d files: %v", n, err)
 	}
@@ -56,44 +56,23 @@ func main() {
 		n, total, gib, elapsed.Seconds(), float64(total)/elapsed.Seconds()/(1<<20))
 }
 
-func run(ctx context.Context, repo, rev, out string, workers int, token string) (int64, int, error) {
+func run(ctx context.Context, repo, rev, out, token string) (int64, int, error) {
 	files, err := listTree(ctx, repo, rev, token)
 	if err != nil {
 		return 0, 0, fmt.Errorf("list tree: %w", err)
 	}
 	log.Printf("listed %d files", len(files))
 
-	sem := make(chan struct{}, workers)
-	var wg sync.WaitGroup
-	var (
-		mu       sync.Mutex
-		total    int64
-		done     int
-		firstErr error
-	)
-
-	for _, f := range files {
-		wg.Add(1)
-		go func(f entry) {
-			defer wg.Done()
-			sem <- struct{}{}
-			defer func() { <-sem }()
-
-			n, err := fetchFile(ctx, repo, rev, f.Path, out, token)
-			mu.Lock()
-			total += n
-			done++
-			if err != nil && firstErr == nil {
-				firstErr = err
-			}
-			mu.Unlock()
-			if done%10 == 0 {
-				log.Printf("  %d/%d files done", done, len(files))
-			}
-		}(f)
+	var total int64
+	for i, f := range files {
+		log.Printf("[%d/%d] %s (%d bytes)", i+1, len(files), f.Path, f.Size)
+		n, err := fetchFile(ctx, repo, rev, f.Path, out, token)
+		total += n
+		if err != nil {
+			return total, i, fmt.Errorf("%s: %w", f.Path, err)
+		}
 	}
-	wg.Wait()
-	return total, done, firstErr
+	return total, len(files), nil
 }
 
 // listTree paginates the Hub tree API and returns leaf (non-directory) entries.
diff --git a/bench/run.sh b/bench/run.sh
index 3ce8209..de89905 100755
--- a/bench/run.sh
+++ b/bench/run.sh
@@ -12,7 +12,6 @@ set -euo pipefail
 MODEL="${MODEL:-Qwen/Qwen2.5-72B-Instruct}"
 REVISION="${REVISION:-main}"
 ITERATIONS="${ITERATIONS:-2}"
-WORKERS="${WORKERS:-8}"
 OUT_BASE="${OUT_BASE:-/mnt/large/modelwrap-bench}"
 HF_VENV="${HF_VENV:-$HOME/.hf-venv}"
 
@@ -61,7 +60,7 @@ for i in $(seq 1 "$ITERATIONS"); do
 	rm -rf "$out"
 	echo -e "\n=== naive Go iter $i ==="
 	s=$(now)
-	"$OUT_BASE/naive" --repo "$MODEL" --revision "$REVISION" --out "$out" --workers "$WORKERS"
+	"$OUT_BASE/naive" --repo "$MODEL" --revision "$REVISION" --out "$out"
 	e=$(now)
 	record naive "$i" "$s" "$e" "$out"
 	rm -rf "$out"

From 84f7e6b9df1c7d85f327d5613d5d2927ba24709f Mon Sep 17 00:00:00 2001
From: Daniel MS <danielmccannsayles@gmail.com>
Date: Sat, 20 Jun 2026 15:40:33 -0700
Subject: [PATCH 4/5] split commands apart

---
 bench/README.md         |  49 +++++-----
 bench/diskwrite/main.go | 149 +++++++++++++++++++++++++++++
 bench/diskwrite/run.sh  |  12 +++
 bench/naive/main.go     | 203 ++++++++++++++++++++++++++--------------
 bench/naive/run.sh      |  20 ++++
 bench/netread/main.go   | 187 ++++++++++++++++++++++++++++++++++++
 bench/netread/run.sh    |  13 +++
 bench/run.sh            |  70 --------------
 writeup.md              | 116 ++++-------------------
 9 files changed, 559 insertions(+), 260 deletions(-)
 create mode 100644 bench/diskwrite/main.go
 create mode 100755 bench/diskwrite/run.sh
 create mode 100755 bench/naive/run.sh
 create mode 100644 bench/netread/main.go
 create mode 100755 bench/netread/run.sh
 delete mode 100755 bench/run.sh

diff --git a/bench/README.md b/bench/README.md
index 0d5f2cd..a49779f 100644
--- a/bench/README.md
+++ b/bench/README.md
@@ -1,35 +1,34 @@
-# Download Benchmark
+# Download Bench
 
-Compares two ways to fetch a Hugging Face model, which is the question
-behind dropping the `hf` CLI from modelwrap's supply chain:
+Three small benchmarks for the question behind dropping the `hf` CLI from
+modelwrap: do we need the Xet stack for fast large-model downloads, or
+would a stdlib-only Go downloader do?
 
-1. **hf-cli** — `hf download` from `huggingface_hub[hf_xet]`. This is what
-   modelwrap does today (`wrap/wrap.go`). Pulls in Python + huggingface_hub
-   + the hf_xet plugin and their full transitive dependency tree.
-2. **naive** — `bench/naive/main.go`, a stdlib-only Go program. Lists the
-   repo tree via the Hub API and GETs each `resolve` URL over plain HTTPS.
-   Zero external dependencies, no Python.
+Each is a standalone Go program with its own run script, and each writes a
+TSV of results you can `rsync` off the bench host.
 
-Each iteration downloads to a fresh directory with a clean cache, so both
-methods measure raw network transfer (no Xet chunk dedup across runs).
+- `diskwrite/` — raw disk write throughput (no network). Writes arbitrary
+  data, fsyncs, reports write vs write+sync MiB/s.
+- `netread/` — raw network download throughput (no disk). Streams every file
+  in a Hugging Face repo to `io.Discard`, reports per-file and total MiB/s.
+- `naive/` — the real stdlib-only downloader, sequential, with per-file
+  network and disk timing separated. Compare against `netread` (no disk) and
+  `diskwrite` (no network) to see where time goes.
+
+All three are Go standard library only — no Python, no `huggingface_hub`,
+no `hf_xet`.
 
 ## Run
 
+On a box with Go (e.g. `inf8.tinfoil.sh`, downloads to `/mnt/large`):
+
 ```bash
-# on a box with Go + python3, downloads land on /mnt/large
-ITERATIONS=2 bash bench/run.sh
+OUT_BASE=/mnt/large/modelwrap-bench bash bench/diskwrite/run.sh
+OUT_BASE=/mnt/large/modelwrap-bench bash bench/netread/run.sh
+OUT_BASE=/mnt/large/modelwrap-bench bash bench/naive/run.sh
 ```
 
-Results are written to `$OUT_BASE/results.tsv` (tab-separated):
-`method iter seconds bytes gib mib_per_s`.
-
-## Notes
+Results land in `$OUT_BASE/{diskwrite,netread,naive}.tsv` (tab-separated).
 
-- **naive** fetches files sequentially, one TCP connection per file, no
-  concurrency. It does not do byte-range chunking of individual large
-  shards, which is Xet's main throughput lever. If naive is close, the Xet
-  stack isn't worth its supply-chain cost; if not, Go could add range
-  requests without the Python dependency.
-- **naive** does not verify SHA256 of LFS blobs (the hf CLI does). That is
-  less work, but also less safe — a tradeoff to call out.
-- Model: `Qwen/Qwen2.5-72B-Instruct` (~145 GiB, open, Xet-backed).
+`bench/xet_probe.py` is a separate one-off: it detects which download path
+`huggingface_hub` actually takes (native Xet CAS vs plain HTTPS) for a file.
diff --git a/bench/diskwrite/main.go b/bench/diskwrite/main.go
new file mode 100644
index 0000000..1af2761
--- /dev/null
+++ b/bench/diskwrite/main.go
@@ -0,0 +1,149 @@
+// Command diskwrite measures raw disk write throughput: it writes a file of
+// arbitrary data in fixed-size blocks, then fsyncs it. It reports write-only
+// and write+sync throughput so disk speed can be compared against network
+// in isolation. No network, no HF, no Python — just the disk.
+package main
+
+import (
+	"flag"
+	"fmt"
+	"log"
+	"os"
+	"strconv"
+	"strings"
+	"time"
+)
+
+func main() {
+	out := flag.String("out", "", "output file path")
+	sizeStr := flag.String("size", "10GiB", "total bytes to write (e.g. 10GiB, 512MiB)")
+	bsStr := flag.String("bs", "1MiB", "block size (e.g. 1MiB)")
+	results := flag.String("results", "", "append a TSV row to this path")
+	flag.Parse()
+
+	if *out == "" {
+		log.Fatal("usage: diskwrite --out <file> [--size 10GiB] [--bs 1MiB] [--results file]")
+	}
+	size, err := parseSize(*sizeStr)
+	if err != nil {
+		log.Fatalf("size: %v", err)
+	}
+	bs, err := parseSize(*bsStr)
+	if err != nil {
+		log.Fatalf("bs: %v", err)
+	}
+	if bs <= 0 || size <= 0 {
+		log.Fatal("size and bs must be > 0")
+	}
+
+	buf := make([]byte, bs)
+	f, err := os.Create(*out)
+	if err != nil {
+		log.Fatal(err)
+	}
+	defer os.Remove(*out)
+
+	written := int64(0)
+	writeStart := time.Now()
+	for written < size {
+		n := int64(bs)
+		if written+n > size {
+			n = size - written
+		}
+		if _, err := f.Write(buf[:n]); err != nil {
+			f.Close()
+			log.Fatalf("write at %d: %v", written, err)
+		}
+		written += n
+	}
+	writeElapsed := time.Since(writeStart)
+
+	syncStart := time.Now()
+	if err := f.Sync(); err != nil {
+		f.Close()
+		log.Fatalf("sync: %v", err)
+	}
+	syncElapsed := time.Since(syncStart)
+
+	if err := f.Close(); err != nil {
+		log.Fatal(err)
+	}
+
+	total := writeElapsed + syncElapsed
+	writeMib := mib(written, writeElapsed)
+	totalMib := mib(written, total)
+	gib := float64(written) / (1 << 30)
+
+	fmt.Printf("diskwrite: %d bytes (%.2f GiB)\n", written, gib)
+	fmt.Printf("  write: %.3fs  %.1f MiB/s\n", writeElapsed.Seconds(), writeMib)
+	fmt.Printf("  sync:  %.3fs\n", syncElapsed.Seconds())
+	fmt.Printf("  total: %.3fs  %.1f MiB/s\n", total.Seconds(), totalMib)
+
+	if *results != "" {
+		if err := appendRow(*results, written, writeElapsed, syncElapsed, total); err != nil {
+			log.Printf("warning: write results: %v", err)
+		}
+	}
+}
+
+func mib(b int64, d time.Duration) float64 {
+	if d <= 0 {
+		return 0
+	}
+	return float64(b) / d.Seconds() / (1 << 20)
+}
+
+func appendRow(path string, bytes int64, write, sync, total time.Duration) error {
+	header := false
+	if _, err := os.Stat(path); os.IsNotExist(err) {
+		header = true
+	}
+	f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	if header {
+		fmt.Fprintln(f, "bytes\tgib\twrite_s\tsync_s\ttotal_s\twrite_mib_s\ttotal_mib_s")
+	}
+	fmt.Fprintf(f, "%d\t%.2f\t%.3f\t%.3f\t%.3f\t%.1f\t%.1f\n",
+		bytes, float64(bytes)/(1<<30),
+		write.Seconds(), sync.Seconds(), total.Seconds(),
+		mib(bytes, write), mib(bytes, total))
+	return nil
+}
+
+func parseSize(s string) (int64, error) {
+	s = strings.TrimSpace(s)
+	i := 0
+	for i < len(s) && s[i] >= '0' && s[i] <= '9' {
+		i++
+	}
+	if i == 0 {
+		return 0, fmt.Errorf("invalid size %q", s)
+	}
+	n, err := strconv.ParseInt(s[:i], 10, 64)
+	if err != nil {
+		return 0, fmt.Errorf("invalid size %q: %w", s, err)
+	}
+	switch strings.ToLower(strings.TrimSpace(s[i:])) {
+	case "", "b":
+		return n, nil
+	case "kib":
+		return n << 10, nil
+	case "mib":
+		return n << 20, nil
+	case "gib":
+		return n << 30, nil
+	case "tib":
+		return n << 40, nil
+	case "kb":
+		return n * 1000, nil
+	case "mb":
+		return n * 1000 * 1000, nil
+	case "gb":
+		return n * 1000 * 1000 * 1000, nil
+	default:
+		return 0, fmt.Errorf("unknown unit in %q", s)
+	}
+}
diff --git a/bench/diskwrite/run.sh b/bench/diskwrite/run.sh
new file mode 100755
index 0000000..d45c146
--- /dev/null
+++ b/bench/diskwrite/run.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+# diskwrite: raw disk write throughput (no network). Writes arbitrary data,
+# fsyncs, reports write vs write+sync MiB/s. Results -> $OUT_BASE/diskwrite.tsv
+set -euo pipefail
+OUT_BASE="${OUT_BASE:-/mnt/large/modelwrap-bench}"
+SIZE="${SIZE:-10GiB}"
+BS="${BS:-1MiB}"
+mkdir -p "$OUT_BASE"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+( cd "$SCRIPT_DIR/.." && go build -o "$OUT_BASE/diskwrite" ./diskwrite )
+"$OUT_BASE/diskwrite" --out "$OUT_BASE/diskwrite.data" --size "$SIZE" --bs "$BS" --results "$OUT_BASE/diskwrite.tsv"
+echo "results: $OUT_BASE/diskwrite.tsv"
diff --git a/bench/naive/main.go b/bench/naive/main.go
index aa63649..93b094f 100644
--- a/bench/naive/main.go
+++ b/bench/naive/main.go
@@ -1,15 +1,14 @@
 // Command naive downloads a Hugging Face model using only the Go standard
-// library: it lists the repo file tree via the Hub API and fetches each
-// file over plain HTTPS (following the resolve redirects to the CDN), one
-// file at a time.
+// library, sequentially, and profiles each file: it separates the time to
+// read a file over the network (into memory) from the time to write it to
+// disk, so disk and network can be compared in isolation.
 //
-// It is the simplest possible "no supply chain" baseline against the
-// official hf CLI (huggingface_hub + hf_xet), which modelwrap currently
-// shells out to. No Python, no huggingface_hub, no Xet plugin, no
-// concurrency — just HTTP, sequentially.
+// It is the "no supply chain" baseline against the official hf CLI
+// (huggingface_hub + hf_xet). No Python, no Xet plugin, no concurrency.
 package main
 
 import (
+	"bytes"
 	"context"
 	"encoding/json"
 	"flag"
@@ -36,43 +35,150 @@ func main() {
 	repo := flag.String("repo", "", "Hugging Face repo id, e.g. Qwen/Qwen2.5-72B-Instruct")
 	rev := flag.String("revision", "main", "revision (branch or commit)")
 	out := flag.String("out", "", "output directory")
+	sync := flag.Bool("sync", false, "fsync each file after writing (measures real disk, not page cache)")
+	results := flag.String("results", "", "write per-file TSV results to this path")
 	flag.Parse()
 
 	if *repo == "" || *out == "" {
-		log.Fatal("usage: naive --repo <org/name> --out <dir> [--revision main]")
+		log.Fatal("usage: naive --repo <org/name> --out <dir> [--revision main] [--sync] [--results file]")
 	}
-
 	token := os.Getenv("HF_TOKEN")
 
-	start := time.Now()
-	total, n, err := run(context.Background(), *repo, *rev, *out, token)
+	ctx := context.Background()
+	files, err := listTree(ctx, *repo, *rev, token)
 	if err != nil {
-		log.Fatalf("download failed after %d files: %v", n, err)
+		log.Fatalf("list tree: %v", err)
+	}
+	log.Printf("listed %d files", len(files))
+
+	var rows []row
+	var totalBytes int64
+	var totalNet, totalDisk time.Duration
+	start := time.Now()
+
+	for i, f := range files {
+		n, netT, diskT, err := fetchFile(ctx, *repo, *rev, f.Path, *out, token, *sync)
+		if err != nil {
+			log.Fatalf("[%d/%d] %s: %v", i+1, len(files), f.Path, err)
+		}
+		totalBytes += n
+		totalNet += netT
+		totalDisk += diskT
+		log.Printf("[%d/%d] %s: %d bytes net=%.3fs (%.1f MiB/s) disk=%.3fs (%.1f MiB/s)",
+			i+1, len(files), f.Path, n, netT.Seconds(), mib(n, netT), diskT.Seconds(), mib(n, diskT))
+		rows = append(rows, row{f.Path, n, netT, diskT})
 	}
-	elapsed := time.Since(start)
 
-	gib := float64(total) / (1 << 30)
-	fmt.Printf("naive: files=%d bytes=%d (%.2f GiB) time=%.2fs throughput=%.1f MiB/s\n",
-		n, total, gib, elapsed.Seconds(), float64(total)/elapsed.Seconds()/(1<<20))
+	wall := time.Since(start)
+	fmt.Printf("naive: files=%d bytes=%d (%.2f GiB) net=%.3fs disk=%.3fs wall=%.3fs | net=%.1f MiB/s disk=%.1f MiB/s wall=%.1f MiB/s\n",
+		len(rows), totalBytes, float64(totalBytes)/(1<<30),
+		totalNet.Seconds(), totalDisk.Seconds(), wall.Seconds(),
+		mib(totalBytes, totalNet), mib(totalBytes, totalDisk), float64(totalBytes)/wall.Seconds()/(1<<20))
+
+	if *results != "" {
+		if err := writeResults(*results, rows, totalBytes, totalNet, totalDisk); err != nil {
+			log.Printf("warning: write results: %v", err)
+		}
+	}
+}
+
+type row struct {
+	path  string
+	bytes int64
+	net   time.Duration
+	disk  time.Duration
 }
 
-func run(ctx context.Context, repo, rev, out, token string) (int64, int, error) {
-	files, err := listTree(ctx, repo, rev, token)
+func mib(b int64, d time.Duration) float64 {
+	if d <= 0 {
+		return 0
+	}
+	return float64(b) / d.Seconds() / (1 << 20)
+}
+
+// fetchFile reads a file fully into memory (network time, isolated from
+// disk) then writes it to a .part file and renames (disk time, isolated
+// from network). The two phases are sequential by design: this measures the
+// components separately rather than overlapping them.
+func fetchFile(ctx context.Context, repo, rev, path, out, token string, doSync bool) (n int64, netT, diskT time.Duration, err error) {
+	dest := filepath.Join(out, path)
+	if err = os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
+		return
+	}
+	req, err := http.NewRequestWithContext(ctx, "GET",
+		fmt.Sprintf("%s/%s/resolve/%s/%s", hub, repo, rev, path), nil)
 	if err != nil {
-		return 0, 0, fmt.Errorf("list tree: %w", err)
+		return
+	}
+	if token != "" {
+		req.Header.Set("Authorization", "Bearer "+token)
 	}
-	log.Printf("listed %d files", len(files))
 
-	var total int64
-	for i, f := range files {
-		log.Printf("[%d/%d] %s (%d bytes)", i+1, len(files), f.Path, f.Size)
-		n, err := fetchFile(ctx, repo, rev, f.Path, out, token)
-		total += n
-		if err != nil {
-			return total, i, fmt.Errorf("%s: %w", f.Path, err)
-		}
+	netStart := time.Now()
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		return
+	}
+	if resp.StatusCode != http.StatusOK {
+		resp.Body.Close()
+		err = fmt.Errorf("%s: %s", path, resp.Status)
+		return
+	}
+	buf := bytes.NewBuffer(make([]byte, 0, resp.ContentLength))
+	n, err = io.Copy(buf, resp.Body)
+	resp.Body.Close()
+	netT = time.Since(netStart)
+	if err != nil {
+		err = fmt.Errorf("%s: read: %w", path, err)
+		return
+	}
+
+	tmp := dest + ".part"
+	diskStart := time.Now()
+	f, err := os.Create(tmp)
+	if err != nil {
+		return
+	}
+	_, werr := buf.WriteTo(f)
+	if werr == nil && doSync {
+		werr = f.Sync()
+	}
+	cerr := f.Close()
+	diskT = time.Since(diskStart)
+	err = werr
+	if err == nil {
+		err = cerr
+	}
+	if err != nil {
+		os.Remove(tmp)
+		err = fmt.Errorf("%s: write: %w", path, err)
+		return
 	}
-	return total, len(files), nil
+	return n, netT, diskT, os.Rename(tmp, dest)
+}
+
+func writeResults(path string, rows []row, totalBytes int64, totalNet, totalDisk time.Duration) error {
+	header := false
+	if _, err := os.Stat(path); os.IsNotExist(err) {
+		header = true
+	}
+	f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	if header {
+		fmt.Fprintln(f, "file\tbytes\tgib\tnet_s\tdisk_s\tnet_mib_s\tdisk_mib_s")
+	}
+	for _, r := range rows {
+		fmt.Fprintf(f, "%s\t%d\t%.2f\t%.3f\t%.3f\t%.1f\t%.1f\n",
+			r.path, r.bytes, float64(r.bytes)/(1<<30),
+			r.net.Seconds(), r.disk.Seconds(), mib(r.bytes, r.net), mib(r.bytes, r.disk))
+	}
+	fmt.Fprintf(f, "TOTAL\t%d\t%.2f\t%.3f\t%.3f\t%.1f\t%.1f\n",
+		totalBytes, float64(totalBytes)/(1<<30),
+		totalNet.Seconds(), totalDisk.Seconds(), mib(totalBytes, totalNet), mib(totalBytes, totalDisk))
+	return nil
 }
 
 // listTree paginates the Hub tree API and returns leaf (non-directory) entries.
@@ -117,7 +223,6 @@ func listTree(ctx context.Context, repo, rev, token string) ([]entry, error) {
 	return files, nil
 }
 
-// nextLink extracts the rel="next" URL from an RFC 8288 Link header.
 func nextLink(link string) string {
 	for _, part := range strings.Split(link, ",") {
 		if !strings.Contains(part, `rel="next"`) {
@@ -131,41 +236,3 @@ func nextLink(link string) string {
 	}
 	return ""
 }
-
-func fetchFile(ctx context.Context, repo, rev, path, out, token string) (int64, error) {
-	dest := filepath.Join(out, path)
-	if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
-		return 0, err
-	}
-	req, err := http.NewRequestWithContext(ctx, "GET", fmt.Sprintf("%s/%s/resolve/%s/%s", hub, repo, rev, path), nil)
-	if err != nil {
-		return 0, err
-	}
-	if token != "" {
-		req.Header.Set("Authorization", "Bearer "+token)
-	}
-	resp, err := http.DefaultClient.Do(req)
-	if err != nil {
-		return 0, err
-	}
-	defer resp.Body.Close()
-	if resp.StatusCode != http.StatusOK {
-		return 0, fmt.Errorf("%s: %s", path, resp.Status)
-	}
-	tmp := dest + ".part"
-	f, err := os.Create(tmp)
-	if err != nil {
-		return 0, err
-	}
-	n, err := io.Copy(f, resp.Body)
-	if err != nil {
-		f.Close()
-		os.Remove(tmp)
-		return n, err
-	}
-	if err := f.Close(); err != nil {
-		os.Remove(tmp)
-		return n, err
-	}
-	return n, os.Rename(tmp, dest)
-}
diff --git a/bench/naive/run.sh b/bench/naive/run.sh
new file mode 100755
index 0000000..85f46b5
--- /dev/null
+++ b/bench/naive/run.sh
@@ -0,0 +1,20 @@
+#!/usr/bin/env bash
+# naive: sequential stdlib-only model download to disk, with per-file network
+# and disk timing separated. Compare against netread (no disk) and diskwrite
+# (no network) to see where time goes. Results -> $OUT_BASE/naive.tsv
+#
+# SYNC=1 adds an fsync per file (measures real disk, not page cache).
+set -euo pipefail
+OUT_BASE="${OUT_BASE:-/mnt/large/modelwrap-bench}"
+MODEL="${MODEL:-Qwen/Qwen2.5-72B-Instruct}"
+REVISION="${REVISION:-main}"
+SYNC="${SYNC:-0}"
+mkdir -p "$OUT_BASE"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+( cd "$SCRIPT_DIR/.." && go build -o "$OUT_BASE/naive" ./naive )
+OUT="$OUT_BASE/naive-out"
+rm -rf "$OUT"
+args=( --repo "$MODEL" --revision "$REVISION" --out "$OUT" --results "$OUT_BASE/naive.tsv" )
+if [ "${SYNC}" = "1" ]; then args+=( --sync ); fi
+"$OUT_BASE/naive" "${args[@]}"
+echo "results: $OUT_BASE/naive.tsv"
diff --git a/bench/netread/main.go b/bench/netread/main.go
new file mode 100644
index 0000000..08a0fcc
--- /dev/null
+++ b/bench/netread/main.go
@@ -0,0 +1,187 @@
+// Command netread measures raw network download throughput: it lists a
+// Hugging Face repo's file tree and streams every file to io.Discard — no
+// disk writes at all. It is the network-only counterpart of naive: run both
+// to see how much disk adds. No Python, no Xet, no concurrency.
+package main
+
+import (
+	"context"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"io"
+	"log"
+	"net/http"
+	"net/url"
+	"os"
+	"strings"
+	"time"
+)
+
+type entry struct {
+	Type string `json:"type"`
+	Path string `json:"path"`
+	Size int64  `json:"size"`
+}
+
+const hub = "https://huggingface.co"
+
+func main() {
+	repo := flag.String("repo", "", "Hugging Face repo id, e.g. Qwen/Qwen2.5-72B-Instruct")
+	rev := flag.String("revision", "main", "revision (branch or commit)")
+	results := flag.String("results", "", "write per-file TSV results to this path")
+	flag.Parse()
+
+	if *repo == "" {
+		log.Fatal("usage: netread --repo <org/name> [--revision main] [--results file]")
+	}
+	token := os.Getenv("HF_TOKEN")
+
+	ctx := context.Background()
+	files, err := listTree(ctx, *repo, *rev, token)
+	if err != nil {
+		log.Fatalf("list tree: %v", err)
+	}
+	log.Printf("listed %d files", len(files))
+
+	var rows []row
+	var totalBytes int64
+	var totalNet time.Duration
+	start := time.Now()
+
+	for i, f := range files {
+		n, elapsed, err := readToDiscard(ctx, *repo, *rev, f.Path, token)
+		if err != nil {
+			log.Fatalf("[%d/%d] %s: %v", i+1, len(files), f.Path, err)
+		}
+		totalBytes += n
+		totalNet += elapsed
+		log.Printf("[%d/%d] %s: %d bytes %.3fs %.1f MiB/s", i+1, len(files), f.Path, n, elapsed.Seconds(), mib(n, elapsed))
+		rows = append(rows, row{f.Path, n, elapsed})
+	}
+
+	wall := time.Since(start)
+	fmt.Printf("netread: files=%d bytes=%d (%.2f GiB) net=%.3fs wall=%.3fs | net=%.1f MiB/s wall=%.1f MiB/s\n",
+		len(rows), totalBytes, float64(totalBytes)/(1<<30),
+		totalNet.Seconds(), wall.Seconds(),
+		mib(totalBytes, totalNet), float64(totalBytes)/wall.Seconds()/(1<<20))
+
+	if *results != "" {
+		if err := writeResults(*results, rows, totalBytes, totalNet); err != nil {
+			log.Printf("warning: write results: %v", err)
+		}
+	}
+}
+
+type row struct {
+	path    string
+	bytes   int64
+	elapsed time.Duration
+}
+
+func readToDiscard(ctx context.Context, repo, rev, path, token string) (int64, time.Duration, error) {
+	req, err := http.NewRequestWithContext(ctx, "GET",
+		fmt.Sprintf("%s/%s/resolve/%s/%s", hub, repo, rev, path), nil)
+	if err != nil {
+		return 0, 0, err
+	}
+	if token != "" {
+		req.Header.Set("Authorization", "Bearer "+token)
+	}
+	start := time.Now()
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		return 0, 0, err
+	}
+	defer resp.Body.Close()
+	if resp.StatusCode != http.StatusOK {
+		return 0, 0, fmt.Errorf("%s: %s", path, resp.Status)
+	}
+	n, err := io.Copy(io.Discard, resp.Body)
+	return n, time.Since(start), err
+}
+
+func writeResults(path string, rows []row, totalBytes int64, totalNet time.Duration) error {
+	header := false
+	if _, err := os.Stat(path); os.IsNotExist(err) {
+		header = true
+	}
+	f, err := os.OpenFile(path, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
+	if err != nil {
+		return err
+	}
+	defer f.Close()
+	if header {
+		fmt.Fprintln(f, "file\tbytes\tgib\tnet_s\tnet_mib_s")
+	}
+	for _, r := range rows {
+		fmt.Fprintf(f, "%s\t%d\t%.2f\t%.3f\t%.1f\n",
+			r.path, r.bytes, float64(r.bytes)/(1<<30), r.elapsed.Seconds(), mib(r.bytes, r.elapsed))
+	}
+	fmt.Fprintf(f, "TOTAL\t%d\t%.2f\t%.3f\t%.1f\n",
+		totalBytes, float64(totalBytes)/(1<<30), totalNet.Seconds(), mib(totalBytes, totalNet))
+	return nil
+}
+
+func mib(b int64, d time.Duration) float64 {
+	if d <= 0 {
+		return 0
+	}
+	return float64(b) / d.Seconds() / (1 << 20)
+}
+
+// listTree paginates the Hub tree API and returns leaf (non-directory) entries.
+func listTree(ctx context.Context, repo, rev, token string) ([]entry, error) {
+	segments := strings.Split(repo, "/")
+	for i, s := range segments {
+		segments[i] = url.PathEscape(s)
+	}
+	u := fmt.Sprintf("%s/api/models/%s/tree/%s?recursive=true", hub, strings.Join(segments, "/"), rev)
+
+	var files []entry
+	for u != "" {
+		req, err := http.NewRequestWithContext(ctx, "GET", u, nil)
+		if err != nil {
+			return nil, err
+		}
+		if token != "" {
+			req.Header.Set("Authorization", "Bearer "+token)
+		}
+		resp, err := http.DefaultClient.Do(req)
+		if err != nil {
+			return nil, err
+		}
+		if resp.StatusCode != http.StatusOK {
+			body, _ := io.ReadAll(resp.Body)
+			resp.Body.Close()
+			return nil, fmt.Errorf("tree %s: %s: %s", u, resp.Status, body)
+		}
+		var page []entry
+		if err := json.NewDecoder(resp.Body).Decode(&page); err != nil {
+			resp.Body.Close()
+			return nil, err
+		}
+		resp.Body.Close()
+		for _, e := range page {
+			if e.Type != "directory" && e.Type != "tree" {
+				files = append(files, e)
+			}
+		}
+		u = nextLink(resp.Header.Get("Link"))
+	}
+	return files, nil
+}
+
+func nextLink(link string) string {
+	for _, part := range strings.Split(link, ",") {
+		if !strings.Contains(part, `rel="next"`) {
+			continue
+		}
+		part = strings.TrimSpace(part)
+		part = strings.TrimPrefix(part, "<")
+		if i := strings.Index(part, ">"); i >= 0 {
+			return part[:i]
+		}
+	}
+	return ""
+}
diff --git a/bench/netread/run.sh b/bench/netread/run.sh
new file mode 100755
index 0000000..30915a8
--- /dev/null
+++ b/bench/netread/run.sh
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+# netread: raw network download throughput (no disk). Streams every file in a
+# Hugging Face repo to io.Discard, reports per-file and total MiB/s.
+# Results -> $OUT_BASE/netread.tsv
+set -euo pipefail
+OUT_BASE="${OUT_BASE:-/mnt/large/modelwrap-bench}"
+MODEL="${MODEL:-Qwen/Qwen2.5-72B-Instruct}"
+REVISION="${REVISION:-main}"
+mkdir -p "$OUT_BASE"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+( cd "$SCRIPT_DIR/.." && go build -o "$OUT_BASE/netread" ./netread )
+"$OUT_BASE/netread" --repo "$MODEL" --revision "$REVISION" --results "$OUT_BASE/netread.tsv"
+echo "results: $OUT_BASE/netread.tsv"
diff --git a/bench/run.sh b/bench/run.sh
deleted file mode 100755
index de89905..0000000
--- a/bench/run.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env bash
-# Benchmark: HF CLI (Xet) vs naive Go HTTP download of a Hugging Face model.
-#
-# Measures raw download throughput for the two approaches modelwrap could use:
-#   1. hf download from huggingface_hub[hf_xet] — the current approach
-#   2. ./naive — a stdlib-only Go HTTP downloader (no Python, no Xet)
-#
-# Each iteration downloads to a fresh directory with a clean cache, so we
-# measure network transfer, not Xet dedup of already-present chunks.
-set -euo pipefail
-
-MODEL="${MODEL:-Qwen/Qwen2.5-72B-Instruct}"
-REVISION="${REVISION:-main}"
-ITERATIONS="${ITERATIONS:-2}"
-OUT_BASE="${OUT_BASE:-/mnt/large/modelwrap-bench}"
-HF_VENV="${HF_VENV:-$HOME/.hf-venv}"
-
-SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-
-mkdir -p "$OUT_BASE"
-RESULTS="$OUT_BASE/results.tsv"
-printf "method\titer\tseconds\tbytes\tgib\tmib_per_s\n" > "$RESULTS"
-
-now()   { date +%s.%N; }
-delta() { awk -v a="$1" -v b="$2" 'BEGIN{printf "%.3f", b-a}'; }
-
-record() { # method iter start end dir
-	local secs; secs=$(delta "$3" "$4")
-	local bytes; bytes=$(du -sb "$5" | cut -f1)
-	local gib; gib=$(awk -v b="$bytes" 'BEGIN{printf "%.2f", b/1073741824}')
-	local mibps; mibps=$(awk -v b="$bytes" -v s="$secs" 'BEGIN{printf "%.1f", b/1048576/s}')
-	printf "%s\t%s\t%s\t%s\t%s\t%s\n" "$1" "$2" "$secs" "$bytes" "$gib" "$mibps" | tee -a "$RESULTS"
-}
-
-echo "Building naive downloader..."
-( cd "$SCRIPT_DIR" && go build -o "$OUT_BASE/naive" ./naive )
-
-if [ ! -x "$HF_VENV/bin/hf" ]; then
-	echo "Creating HF venv at $HF_VENV ..."
-	python3 -m venv "$HF_VENV"
-	"$HF_VENV/bin/pip" install --upgrade pip
-	"$HF_VENV/bin/pip" install "huggingface_hub[hf_xet]"
-fi
-echo "HF CLI version: $("$HF_VENV/bin/hf" --version)"
-
-for i in $(seq 1 "$ITERATIONS"); do
-	out="$OUT_BASE/hf-$i"
-	cache="$OUT_BASE/hf-cache-$i"
-	rm -rf "$out" "$cache"
-	echo -e "\n=== hf download (Xet) iter $i ==="
-	s=$(now)
-	HF_HOME="$cache" "$HF_VENV/bin/hf" download "$MODEL" --revision "$REVISION" --local-dir "$out"
-	e=$(now)
-	record hf-cli "$i" "$s" "$e" "$out"
-	rm -rf "$out" "$cache"
-done
-
-for i in $(seq 1 "$ITERATIONS"); do
-	out="$OUT_BASE/naive-$i"
-	rm -rf "$out"
-	echo -e "\n=== naive Go iter $i ==="
-	s=$(now)
-	"$OUT_BASE/naive" --repo "$MODEL" --revision "$REVISION" --out "$out"
-	e=$(now)
-	record naive "$i" "$s" "$e" "$out"
-	rm -rf "$out"
-done
-
-echo -e "\n=== RESULTS ==="
-cat "$RESULTS"
diff --git a/writeup.md b/writeup.md
index 9a11232..b501509 100644
--- a/writeup.md
+++ b/writeup.md
@@ -1,110 +1,32 @@
-# HF CLI vs Naive Go Download Benchmark
+# HF CLI vs Naive Go Download
 
 ## Question
 
-modelwrap downloads models today by shelling out to `hf download` from
+modelwrap downloads models by shelling out to `hf download` from
 `huggingface_hub[hf_xet]` (`wrap/wrap.go`). That pulls Python,
-`huggingface_hub`, the `hf_xet` plugin, and their full transitive
-dependency tree into the packer container — a sizable supply-chain surface
-for a tool whose whole point is reproducibility and trust.
+`huggingface_hub`, the `hf_xet` plugin, and their full transitive deps into
+the packer container — a sizable supply-chain surface for a tool whose
+point is reproducibility and trust.
 
-The question: how much download throughput do we actually get from the Xet
-stack, and could a stdlib-only Go downloader replace it without giving up
-speed?
+Do we actually need the Xet stack for fast large-model downloads, or could
+a stdlib-only Go downloader replace it?
 
-## Setup
+## Benchmarks
 
-- **Host:** `inf8.tinfoil.sh` — 2.0 TiB RAM, no GPU, Go 1.24.4, downloads
-  written to `/mnt/large` (25 TB RAID, 3.2 TB free)
-- **Model:** `Qwen/Qwen2.5-72B-Instruct` (revision `main`) — open, Xet-backed,
-  47 files, **135.44 GiB** total. Large enough that Xet's chunking/dedup
-  has room to help.
-- **hf-cli:** `huggingface_hub[hf_xet]` v1.20.1 (`hf-xet` 1.5.1), installed
-  in an isolated venv. Xet confirmed active.
-- **naive:** `bench/naive/main.go` — Go standard library only (no external
-  dependencies, no Python). Lists the repo file tree via the Hub API, then
-  GETs each `resolve` URL concurrently (8 workers), one TCP connection per
-  file, no byte-range chunking.
+Three small benchmarks in `bench/`, each its own program + run script, each
+writing a TSV you can `rsync` off the bench host:
 
-## Methodology
+- `bench/diskwrite` — raw disk write throughput (no network).
+- `bench/netread` — raw network download throughput to `io.Discard` (no disk).
+- `bench/naive` — the real stdlib downloader, sequential, with per-file
+  network and disk timing separated.
 
-Each iteration downloaded the full model to a **fresh directory with a
-clean cache**, so both methods measured raw network transfer — not Xet
-chunk dedup of already-present data. After each iteration the download was
-deleted before the next run.
-
-- 2 iterations per method.
-- Throughput = total bytes downloaded / wall time, measured from process
-  start to completion.
-- Both methods ran unauthenticated (no `HF_TOKEN`), since the model is open.
-
-Harness: `bench/run.sh` (builds the naive binary, creates the HF venv on
-first run, loops iterations, records results to `results.tsv`).
-
-## Results
-
-| method | iter | seconds | GiB    | MiB/s  |
-| ------ | ---- | ------- | ------ | ------ |
-| hf-cli | 1    | 130.8   | 135.44 | 1060.7 |
-| hf-cli | 2    | 119.7   | 135.44 | 1158.4 |
-| naive  | 1    | 102.9   | 135.44 | 1347.3 |
-| naive  | 2    | 75.7    | 135.44 | 1832.1 |
-
-**Average throughput:**
-
-- hf-cli (Xet): **~1110 MiB/s**
-- naive Go: **~1590 MiB/s**
-
-The naive Go downloader was faster on **every iteration** — roughly 27–58%
-faster depending on the comparison, and ~43% faster on average.
-
-## Notes and caveats
-
-- **Xet was active.** `hf-xet` 1.5.1 was installed in the venv; the HF CLI
-  ran with its default Xet-backed transfer path. The naive path used plain
-  HTTPS `resolve` redirects to the CDN.
-- **Network variance.** The two naive runs (1347 vs 1832 MiB/s) and the two
-  hf-cli runs (1061 vs 1158 MiB/s) both show real variance, consistent with
-  shared-internet conditions. The ordering (naive > hf-cli) held across all
-  runs.
-- **naive does less work.** It does not verify SHA256 of LFS blobs (the
-  hf CLI does), and it does not do byte-range chunking of large shards.
-  Less work, but also less safe on integrity — a tradeoff to call out. If
-  integrity matters for the production path, Go could add SHA256 checks
-  cheaply without the Python stack.
-- **naive uses one connection per file** with bounded concurrency over
-  files (8). Xet's main throughput lever is parallel byte-range chunking of
-  individual large shards. Despite not doing that, naive still won — likely
-  because the CDN serves `resolve` URLs fast enough that per-file
-  parallelism saturates the link on a 2 TiB-RAM box with no other contention.
-- **Pagination was unused.** The naive downloader includes RFC-8288
-  `Link`-header pagination for the tree API, but Qwen 72B has 47 files
-  (under the ~1000-file page limit), so it ran a single page. Pagination
-  would matter for repos with thousands of files.
-
-## Conclusion
-
-For this model and host, the stdlib-only Go downloader was consistently
-faster than the Xet-backed HF CLI while carrying none of its supply-chain
-weight (no Python, no `huggingface_hub`, no `hf_xet`, no transitive deps).
-
-That inverts the usual assumption that you need Xet for fast large-model
-downloads: here, plain HTTPS `resolve` fetches with file-level
-concurrency saturated the available bandwidth more effectively than the
-Xet stack did. Dropping the `hf` CLI from modelwrap's packer would shrink
-the supply chain without costing throughput — the remaining work would be
-adding SHA256 verification (and possibly range requests for very large
-single shards) in Go.
-
-## Reproducing
+Run on `inf8.tinfoil.sh` (downloads to `/mnt/large`):
 
 ```bash
-# on inf8 (or any box with Go + python3 and a writable /mnt/large)
-ITERATIONS=2 WORKERS=8 bash bench/run.sh
+OUT_BASE=/mnt/large/modelwrap-bench bash bench/diskwrite/run.sh
+OUT_BASE=/mnt/large/modelwrap-bench bash bench/netread/run.sh
+OUT_BASE=/mnt/large/modelwrap-bench bash bench/naive/run.sh
 ```
 
-Code lives in `bench/`:
-
-- `bench/naive/main.go` — the naive stdlib-only downloader
-- `bench/run.sh` — the benchmark harness
-- `bench/README.md` — quick reference
+Results land in `$OUT_BASE/{diskwrite,netread,naive}.tsv`.

From a6654987c06576abecec5bfc8ff239b91e17921c Mon Sep 17 00:00:00 2001
From: Daniel MS <danielmccannsayles@gmail.com>
Date: Sat, 20 Jun 2026 18:02:32 -0700
Subject: [PATCH 5/5] benchmarks!

---
 bench/naive/main.go         |  5 +++-
 bench/results/diskwrite.tsv |  2 ++
 bench/results/naive.tsv     | 49 +++++++++++++++++++++++++++++++++++++
 bench/results/netread.tsv   | 49 +++++++++++++++++++++++++++++++++++++
 4 files changed, 104 insertions(+), 1 deletion(-)
 create mode 100644 bench/results/diskwrite.tsv
 create mode 100644 bench/results/naive.tsv
 create mode 100644 bench/results/netread.tsv

diff --git a/bench/naive/main.go b/bench/naive/main.go
index 93b094f..0a1852b 100644
--- a/bench/naive/main.go
+++ b/bench/naive/main.go
@@ -124,7 +124,10 @@ func fetchFile(ctx context.Context, repo, rev, path, out, token string, doSync b
 		err = fmt.Errorf("%s: %s", path, resp.Status)
 		return
 	}
-	buf := bytes.NewBuffer(make([]byte, 0, resp.ContentLength))
+	buf := &bytes.Buffer{}
+	if resp.ContentLength > 0 {
+		buf.Grow(int(resp.ContentLength))
+	}
 	n, err = io.Copy(buf, resp.Body)
 	resp.Body.Close()
 	netT = time.Since(netStart)
diff --git a/bench/results/diskwrite.tsv b/bench/results/diskwrite.tsv
new file mode 100644
index 0000000..5e4b941
--- /dev/null
+++ b/bench/results/diskwrite.tsv
@@ -0,0 +1,2 @@
+bytes	gib	write_s	sync_s	total_s	write_mib_s	total_mib_s
+10737418240	10.00	2.556	8.278	10.834	4007.0	945.2
diff --git a/bench/results/naive.tsv b/bench/results/naive.tsv
new file mode 100644
index 0000000..19586ff
--- /dev/null
+++ b/bench/results/naive.tsv
@@ -0,0 +1,49 @@
+file	bytes	gib	net_s	disk_s	net_mib_s	disk_mib_s
+.gitattributes	1519	0.00	0.055	0.000	0.0	30.4
+LICENSE	6962	0.00	0.049	0.000	0.1	239.1
+README.md	6259	0.00	0.050	0.000	0.1	152.1
+config.json	663	0.00	0.049	0.000	0.0	31.4
+generation_config.json	242	0.00	0.054	0.000	0.0	3.3
+merges.txt	1671839	0.00	0.070	0.001	22.6	2778.1
+model-00001-of-00037.safetensors	3762345336	3.50	9.660	1.490	371.4	2408.1
+model-00002-of-00037.safetensors	3995200440	3.72	12.201	1.516	312.3	2513.4
+model-00003-of-00037.safetensors	3812769392	3.55	11.746	1.442	309.6	2522.0
+model-00004-of-00037.safetensors	3995183944	3.72	11.722	1.369	325.0	2782.4
+model-00005-of-00037.safetensors	3995183944	3.72	10.174	1.780	374.5	2140.4
+model-00006-of-00037.safetensors	3995200456	3.72	11.618	1.491	328.0	2555.0
+model-00007-of-00037.safetensors	3812769424	3.55	10.043	1.437	362.0	2529.7
+model-00008-of-00037.safetensors	3995183968	3.72	11.134	1.795	342.2	2123.1
+model-00009-of-00037.safetensors	3995183968	3.72	8.783	1.177	433.8	3238.5
+model-00010-of-00037.safetensors	3995200464	3.72	10.017	1.184	380.4	3217.2
+model-00011-of-00037.safetensors	3812769424	3.55	10.793	1.433	336.9	2538.3
+model-00012-of-00037.safetensors	3995183968	3.72	11.402	1.498	334.1	2543.9
+model-00013-of-00037.safetensors	3995183968	3.72	8.812	1.167	432.4	3264.8
+model-00014-of-00037.safetensors	3995200464	3.72	11.351	1.345	335.7	2832.5
+model-00015-of-00037.safetensors	3812769424	3.55	10.628	1.710	342.1	2126.6
+model-00016-of-00037.safetensors	3995183968	3.72	10.991	1.494	346.7	2550.4
+model-00017-of-00037.safetensors	3995183968	3.72	9.048	1.491	421.1	2555.5
+model-00018-of-00037.safetensors	3995200464	3.72	11.361	2.854	335.4	1335.2
+model-00019-of-00037.safetensors	3812769424	3.55	9.965	1.617	364.9	2248.1
+model-00020-of-00037.safetensors	3995183968	3.72	38.757	1.505	98.3	2532.4
+model-00021-of-00037.safetensors	3995183968	3.72	9.741	1.537	391.1	2479.3
+model-00022-of-00037.safetensors	3995200464	3.72	9.919	1.490	384.1	2556.7
+model-00023-of-00037.safetensors	3812769424	3.55	7.871	1.441	462.0	2523.5
+model-00024-of-00037.safetensors	3995183968	3.72	9.773	1.497	389.8	2544.3
+model-00025-of-00037.safetensors	3995183968	3.72	9.940	1.492	383.3	2554.3
+model-00026-of-00037.safetensors	3995200464	3.72	10.196	1.174	373.7	3245.2
+model-00027-of-00037.safetensors	3812769424	3.55	7.749	1.335	469.3	2723.6
+model-00028-of-00037.safetensors	3995183968	3.72	9.940	1.138	383.3	3348.6
+model-00029-of-00037.safetensors	3995183968	3.72	10.419	1.627	365.7	2342.0
+model-00030-of-00037.safetensors	3995200464	3.72	10.123	1.484	376.4	2567.8
+model-00031-of-00037.safetensors	3812769424	3.55	8.347	1.409	435.6	2580.6
+model-00032-of-00037.safetensors	3995183968	3.72	9.513	1.498	400.5	2542.9
+model-00033-of-00037.safetensors	3995183968	3.72	9.012	1.477	422.8	2578.8
+model-00034-of-00037.safetensors	3995200464	3.72	10.911	1.183	349.2	3220.2
+model-00035-of-00037.safetensors	3812769424	3.55	9.072	1.530	400.8	2377.3
+model-00036-of-00037.safetensors	3995183968	3.72	10.212	1.153	373.1	3303.5
+model-00037-of-00037.safetensors	3460317640	3.22	10.150	1.293	325.1	2551.6
+model.safetensors.index.json	79025	0.00	0.082	0.000	0.9	768.9
+tokenizer.json	7031645	0.01	0.104	0.002	64.6	2977.2
+tokenizer_config.json	7305	0.00	0.049	0.000	0.1	202.2
+vocab.json	2776833	0.00	0.077	0.001	34.6	2821.2
+TOTAL	145424101604	135.44	403.734	54.557	343.5	2542.1
diff --git a/bench/results/netread.tsv b/bench/results/netread.tsv
new file mode 100644
index 0000000..50ab8b3
--- /dev/null
+++ b/bench/results/netread.tsv
@@ -0,0 +1,49 @@
+file	bytes	gib	net_s	net_mib_s
+.gitattributes	1519	0.00	0.054	0.0
+LICENSE	6962	0.00	0.052	0.1
+README.md	6259	0.00	0.052	0.1
+config.json	663	0.00	0.051	0.0
+generation_config.json	242	0.00	0.054	0.0
+merges.txt	1671839	0.00	0.067	23.8
+model-00001-of-00037.safetensors	3762345336	3.50	9.284	386.5
+model-00002-of-00037.safetensors	3995200440	3.72	8.611	442.5
+model-00003-of-00037.safetensors	3812769392	3.55	9.153	397.3
+model-00004-of-00037.safetensors	3995183944	3.72	9.344	407.7
+model-00005-of-00037.safetensors	3995183944	3.72	9.703	392.7
+model-00006-of-00037.safetensors	3995200456	3.72	8.694	438.2
+model-00007-of-00037.safetensors	3812769424	3.55	8.934	407.0
+model-00008-of-00037.safetensors	3995183968	3.72	9.010	422.9
+model-00009-of-00037.safetensors	3995183968	3.72	9.717	392.1
+model-00010-of-00037.safetensors	3995200464	3.72	8.204	464.4
+model-00011-of-00037.safetensors	3812769424	3.55	8.792	413.6
+model-00012-of-00037.safetensors	3995183968	3.72	9.383	406.1
+model-00013-of-00037.safetensors	3995183968	3.72	9.704	392.6
+model-00014-of-00037.safetensors	3995200464	3.72	9.138	417.0
+model-00015-of-00037.safetensors	3812769424	3.55	8.983	404.8
+model-00016-of-00037.safetensors	3995183968	3.72	37.839	100.7
+model-00017-of-00037.safetensors	3995183968	3.72	9.246	412.1
+model-00018-of-00037.safetensors	3995200464	3.72	11.037	345.2
+model-00019-of-00037.safetensors	3812769424	3.55	9.158	397.1
+model-00020-of-00037.safetensors	3995183968	3.72	64.033	59.5
+model-00021-of-00037.safetensors	3995183968	3.72	9.266	411.2
+model-00022-of-00037.safetensors	3995200464	3.72	9.361	407.0
+model-00023-of-00037.safetensors	3812769424	3.55	8.369	434.5
+model-00024-of-00037.safetensors	3995183968	3.72	9.082	419.5
+model-00025-of-00037.safetensors	3995183968	3.72	8.912	427.5
+model-00026-of-00037.safetensors	3995200464	3.72	8.468	449.9
+model-00027-of-00037.safetensors	3812769424	3.55	8.361	434.9
+model-00028-of-00037.safetensors	3995183968	3.72	8.180	465.8
+model-00029-of-00037.safetensors	3995183968	3.72	8.403	453.4
+model-00030-of-00037.safetensors	3995200464	3.72	8.496	448.5
+model-00031-of-00037.safetensors	3812769424	3.55	9.085	400.2
+model-00032-of-00037.safetensors	3995183968	3.72	9.307	409.4
+model-00033-of-00037.safetensors	3995183968	3.72	42.116	90.5
+model-00034-of-00037.safetensors	3995200464	3.72	9.877	385.8
+model-00035-of-00037.safetensors	3812769424	3.55	8.772	414.5
+model-00036-of-00037.safetensors	3995183968	3.72	8.523	447.0
+model-00037-of-00037.safetensors	3460317640	3.22	7.673	430.1
+model.safetensors.index.json	79025	0.00	0.050	1.5
+tokenizer.json	7031645	0.01	0.227	29.5
+tokenizer_config.json	7305	0.00	0.048	0.1
+vocab.json	2776833	0.00	0.073	36.2
+TOTAL	145424101604	135.44	450.948	307.5