|
| 1 | +/** |
| 2 | + * Cloudflare Worker: data.isamples.org |
| 3 | + * |
| 4 | + * Proxies the iSamples R2 bucket and adds cache-control headers so the |
| 5 | + * Cloudflare edge and the browser can cache immutable parquet versions |
| 6 | + * aggressively. |
| 7 | + * |
| 8 | + * Strategy: |
| 9 | + * - Filename-versioned parquets (isamples_YYYYMM_*.parquet) are immutable |
| 10 | + * by naming convention → cache one year + immutable. |
| 11 | + * - Anything else falls back to a short TTL. |
| 12 | + * |
| 13 | + * Uses the R2 bucket binding (env.BUCKET) rather than fetching the r2.dev |
| 14 | + * public URL — fewer hops, lower latency, no need to expose the bucket |
| 15 | + * publicly. |
| 16 | + * |
| 17 | + * Range requests are supported so DuckDB-WASM's HTTP range fetches keep |
| 18 | + * working. |
| 19 | + */ |
| 20 | + |
| 21 | +const IMMUTABLE_PATTERN = /^isamples_\d{6}_.*\.parquet$/; |
| 22 | +const IMMUTABLE_MAX_AGE = 60 * 60 * 24 * 365; // 1 year |
| 23 | +const FALLBACK_MAX_AGE = 300; // 5 minutes |
| 24 | + |
| 25 | +const CORS_HEADERS = { |
| 26 | + 'Access-Control-Allow-Origin': '*', |
| 27 | + 'Access-Control-Allow-Methods': 'GET, HEAD, OPTIONS', |
| 28 | + 'Access-Control-Allow-Headers': 'Range', |
| 29 | + 'Access-Control-Expose-Headers': 'Content-Length, Content-Range, Accept-Ranges, ETag', |
| 30 | +}; |
| 31 | + |
| 32 | +export default { |
| 33 | + async fetch(request, env) { |
| 34 | + if (request.method === 'OPTIONS') { |
| 35 | + return new Response(null, { status: 204, headers: CORS_HEADERS }); |
| 36 | + } |
| 37 | + |
| 38 | + if (request.method !== 'GET' && request.method !== 'HEAD') { |
| 39 | + return new Response('Method not allowed', { status: 405, headers: CORS_HEADERS }); |
| 40 | + } |
| 41 | + |
| 42 | + const url = new URL(request.url); |
| 43 | + const key = decodeURIComponent(url.pathname.replace(/^\/+/, '')); |
| 44 | + |
| 45 | + if (!key) { |
| 46 | + // Simple root response — could be replaced with an index listing later. |
| 47 | + return new Response('data.isamples.org — R2 bucket proxy\n', { |
| 48 | + status: 200, |
| 49 | + headers: { 'content-type': 'text/plain; charset=utf-8', ...CORS_HEADERS }, |
| 50 | + }); |
| 51 | + } |
| 52 | + |
| 53 | + // Parse Range header if present. R2's get() accepts { offset, length } or |
| 54 | + // { suffix }, mirroring HTTP Range semantics. |
| 55 | + const rangeHeader = request.headers.get('range'); |
| 56 | + const range = rangeHeader ? parseRange(rangeHeader) : undefined; |
| 57 | + |
| 58 | + const getOptions = range ? { range } : {}; |
| 59 | + const object = request.method === 'HEAD' |
| 60 | + ? await env.BUCKET.head(key) |
| 61 | + : await env.BUCKET.get(key, getOptions); |
| 62 | + |
| 63 | + if (!object) { |
| 64 | + return new Response('Not found', { status: 404, headers: CORS_HEADERS }); |
| 65 | + } |
| 66 | + |
| 67 | + const headers = new Headers(); |
| 68 | + object.writeHttpMetadata(headers); |
| 69 | + headers.set('ETag', object.httpEtag); |
| 70 | + headers.set('Accept-Ranges', 'bytes'); |
| 71 | + |
| 72 | + for (const [k, v] of Object.entries(CORS_HEADERS)) headers.set(k, v); |
| 73 | + |
| 74 | + // Cache-Control: this is the optimization. |
| 75 | + if (IMMUTABLE_PATTERN.test(key)) { |
| 76 | + headers.set('Cache-Control', `public, max-age=${IMMUTABLE_MAX_AGE}, immutable`); |
| 77 | + } else { |
| 78 | + headers.set('Cache-Control', `public, max-age=${FALLBACK_MAX_AGE}`); |
| 79 | + } |
| 80 | + |
| 81 | + if (request.method === 'HEAD') { |
| 82 | + headers.set('Content-Length', String(object.size)); |
| 83 | + return new Response(null, { status: 200, headers }); |
| 84 | + } |
| 85 | + |
| 86 | + // Range response: 206 + Content-Range. R2 populates object.range when a |
| 87 | + // range was requested, but for safety compute the Content-Range ourselves. |
| 88 | + if (range) { |
| 89 | + const total = object.size !== undefined ? object.size : null; |
| 90 | + // object.get with range returns only the sliced body + partial size info. |
| 91 | + // We need the full object size for the Content-Range header; fetch via |
| 92 | + // head() once per cold request. |
| 93 | + let fullSize = total; |
| 94 | + if (fullSize == null || typeof fullSize !== 'number') { |
| 95 | + const head = await env.BUCKET.head(key); |
| 96 | + fullSize = head ? head.size : null; |
| 97 | + } |
| 98 | + const start = range.offset ?? 0; |
| 99 | + const length = range.length ?? (fullSize != null ? fullSize - start : undefined); |
| 100 | + const end = length != null ? start + length - 1 : (fullSize != null ? fullSize - 1 : 0); |
| 101 | + if (fullSize != null) { |
| 102 | + headers.set('Content-Range', `bytes ${start}-${end}/${fullSize}`); |
| 103 | + headers.set('Content-Length', String(end - start + 1)); |
| 104 | + } |
| 105 | + return new Response(object.body, { status: 206, headers }); |
| 106 | + } |
| 107 | + |
| 108 | + return new Response(object.body, { status: 200, headers }); |
| 109 | + }, |
| 110 | +}; |
| 111 | + |
| 112 | +/** |
| 113 | + * Parse an HTTP Range header into the { offset, length } shape R2 expects. |
| 114 | + * Supports `bytes=START-END` and `bytes=-SUFFIX`. Returns undefined for |
| 115 | + * anything we can't parse so the caller falls back to a full-object fetch. |
| 116 | + */ |
| 117 | +function parseRange(header) { |
| 118 | + const match = /^bytes=(\d*)-(\d*)$/.exec(header.trim()); |
| 119 | + if (!match) return undefined; |
| 120 | + const [, startStr, endStr] = match; |
| 121 | + if (startStr === '' && endStr === '') return undefined; |
| 122 | + if (startStr === '') { |
| 123 | + // Suffix: last N bytes |
| 124 | + const suffix = Number(endStr); |
| 125 | + if (!Number.isFinite(suffix) || suffix <= 0) return undefined; |
| 126 | + return { suffix }; |
| 127 | + } |
| 128 | + const offset = Number(startStr); |
| 129 | + if (!Number.isFinite(offset) || offset < 0) return undefined; |
| 130 | + if (endStr === '') return { offset }; |
| 131 | + const end = Number(endStr); |
| 132 | + if (!Number.isFinite(end) || end < offset) return undefined; |
| 133 | + return { offset, length: end - offset + 1 }; |
| 134 | +} |
0 commit comments