@@ -12,7 +12,7 @@ Search and explore **6.7 million physical samples** from scientific collections
1212
1313::: {.callout-note}
1414### Serverless Architecture
15- This app queries a ~ 280 MB Parquet file directly in your browser using DuckDB-WASM. No server required!
15+ This app uses a ** two-tier loading strategy ** : a 2KB pre-computed summary loads instantly for facet counts (source, material, context, specimen type), while the full ~ 280 MB Parquet file is only queried when drilling into records. All powered by DuckDB-WASM in your browser -- no server required!
1616:::
1717
1818## Setup
@@ -28,6 +28,9 @@ duckdbModule = import("https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@1.28.0/+
2828// Data source configuration
2929parquet_url = "https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202601_wide.parquet"
3030
31+ // Pre-computed facet summaries (2KB - loads instantly)
32+ facet_summaries_url = "https://pub-a18234d962364c22a50c787b7ca09fa5.r2.dev/isamples_202601_facet_summaries.parquet"
33+
3134// Source color scheme (consistent with iSamples conventions)
3235SOURCE_COLORS = ({
3336 'SESAR': '#3366CC', // Blue
@@ -79,14 +82,18 @@ viewof searchInput = Inputs.text({
7982
8083### Filters
8184
85+ ``` {ojs}
86+ facetSummariesWarning
87+ ```
88+
8289** Source**
8390
8491``` {ojs}
8592//| code-fold: true
86- // Source checkboxes with counts
93+ // Source checkboxes with counts - uses pre-computed summaries for instant load
8794viewof sourceCheckboxes = {
88- // Get source counts based on current search
89- const counts = await sourceCounts ;
95+ // Use pre-computed facet summaries (instant) instead of scanning full parquet
96+ const counts = facetsByType.source ;
9097 const options = counts.map(r => r.value);
9198
9299 return Inputs.checkbox(options, {
@@ -104,6 +111,69 @@ viewof sourceCheckboxes = {
104111}
105112```
106113
114+ ** Material**
115+
116+ ``` {ojs}
117+ //| code-fold: true
118+ // Material filter - loaded from pre-computed summaries
119+ viewof materialCheckboxes = {
120+ const counts = facetsByType.material;
121+ const options = counts.map(r => r.value);
122+ return Inputs.checkbox(options, {
123+ value: [],
124+ format: (x) => {
125+ const r = counts.find(s => s.value === x);
126+ const count = r ? Number(r.count).toLocaleString() : "0";
127+ return html`<span style="display: inline-flex; align-items: center; gap: 4px;">
128+ ${x} <span style="color: #888; font-size: 11px;">(${count})</span>
129+ </span>`;
130+ }
131+ });
132+ }
133+ ```
134+
135+ ** Sampled Feature**
136+
137+ ``` {ojs}
138+ //| code-fold: true
139+ // Context filter - loaded from pre-computed summaries
140+ viewof contextCheckboxes = {
141+ const counts = facetsByType.context;
142+ const options = counts.map(r => r.value);
143+ return Inputs.checkbox(options, {
144+ value: [],
145+ format: (x) => {
146+ const r = counts.find(s => s.value === x);
147+ const count = r ? Number(r.count).toLocaleString() : "0";
148+ return html`<span style="display: inline-flex; align-items: center; gap: 4px;">
149+ ${x} <span style="color: #888; font-size: 11px;">(${count})</span>
150+ </span>`;
151+ }
152+ });
153+ }
154+ ```
155+
156+ ** Specimen Type**
157+
158+ ``` {ojs}
159+ //| code-fold: true
160+ // Object type filter - loaded from pre-computed summaries
161+ viewof objectTypeCheckboxes = {
162+ const counts = facetsByType.object_type;
163+ const options = counts.map(r => r.value);
164+ return Inputs.checkbox(options, {
165+ value: [],
166+ format: (x) => {
167+ const r = counts.find(s => s.value === x);
168+ const count = r ? Number(r.count).toLocaleString() : "0";
169+ return html`<span style="display: inline-flex; align-items: center; gap: 4px;">
170+ ${x} <span style="color: #888; font-size: 11px;">(${count})</span>
171+ </span>`;
172+ }
173+ });
174+ }
175+ ```
176+
107177``` {ojs}
108178//| code-fold: true
109179html`<a href="?" style="font-size: 13px;">Clear All Filters</a>`
@@ -131,6 +201,9 @@ viewof maxSamples = Inputs.range([1000, 100000], {
131201 const params = new URLSearchParams();
132202 if (searchInput) params.set("q", searchInput);
133203 if (sourceCheckboxes?.length) params.set("sources", sourceCheckboxes.join(","));
204+ if (materialCheckboxes?.length) params.set("material", materialCheckboxes.join(","));
205+ if (contextCheckboxes?.length) params.set("context", contextCheckboxes.join(","));
206+ if (objectTypeCheckboxes?.length) params.set("object_type", objectTypeCheckboxes.join(","));
134207 if (viewMode !== "globe") params.set("view", viewMode);
135208
136209 const newUrl = params.toString() ? `?${params.toString()}` : window.location.pathname;
@@ -264,7 +337,50 @@ async function runQuery(sql) {
264337
265338``` {ojs}
266339//| code-fold: true
267- // Build WHERE clause from current filters
340+ // Tier 1: Load pre-computed facet summaries (2KB, instant)
341+ facetSummaries = {
342+ facetSummariesError = null;
343+ try {
344+ const rows = await runQuery(`SELECT * FROM read_parquet('${facet_summaries_url}')`);
345+ return rows;
346+ } catch (e) {
347+ console.error("Facet summaries load error:", e);
348+ facetSummariesError = e;
349+ return [];
350+ }
351+ }
352+
353+ ```
354+
355+ ``` {ojs}
356+ //| code-fold: true
357+ facetSummariesWarning = {
358+ if (!facetSummariesError) return null;
359+ return html`<div style="margin: 6px 0 10px; padding: 8px 10px; border: 1px solid #f0b429; background: #fff7e6; border-radius: 6px; color: #7a4b00; font-size: 12px;">
360+ Facet summaries failed to load. Filter counts may be missing. Try refreshing.
361+ </div>`;
362+ }
363+
364+ // Extract facet counts by type from pre-computed summaries
365+ facetsByType = {
366+ const grouped = { source: [], material: [], context: [], object_type: [] };
367+ for (const row of facetSummaries) {
368+ const ft = row.facet_type;
369+ if (grouped[ft]) {
370+ grouped[ft].push({ value: row.facet_value, count: Number(row.count), scheme: row.scheme });
371+ }
372+ }
373+ // Sort each by count descending
374+ for (const key of Object.keys(grouped)) {
375+ grouped[key].sort((a, b) => b.count - a.count);
376+ }
377+ return grouped;
378+ }
379+ ```
380+
381+ ``` {ojs}
382+ //| code-fold: true
383+ // Build WHERE clause from current filters (Tier 2: queries full parquet only when filtering)
268384whereClause = {
269385 const conditions = [
270386 "otype = 'MaterialSampleRecord'",
@@ -288,40 +404,36 @@ whereClause = {
288404 conditions.push(`n IN (${sourceList})`);
289405 }
290406
407+ // Material filter
408+ const materials = Array.from(materialCheckboxes || []);
409+ if (materials.length > 0) {
410+ const matList = materials.map(m => `'${m.replace(/'/g, "''")}'`).join(", ");
411+ conditions.push(`has_material_category IN (${matList})`);
412+ }
413+
414+ // Context (sampled feature) filter
415+ const contexts = Array.from(contextCheckboxes || []);
416+ if (contexts.length > 0) {
417+ const ctxList = contexts.map(c => `'${c.replace(/'/g, "''")}'`).join(", ");
418+ conditions.push(`has_context_category IN (${ctxList})`);
419+ }
420+
421+ // Object type (specimen type) filter
422+ const objectTypes = Array.from(objectTypeCheckboxes || []);
423+ if (objectTypes.length > 0) {
424+ const otList = objectTypes.map(o => `'${o.replace(/'/g, "''")}'`).join(", ");
425+ conditions.push(`has_specimen_category IN (${otList})`);
426+ }
427+
291428 return conditions.join(" AND ");
292429}
293430```
294431
295432``` {ojs}
296433//| code-fold: true
297- // Get source facet counts (respects text search but not source filter)
298- sourceCounts = {
299- let baseWhere = "otype = 'MaterialSampleRecord' AND latitude IS NOT NULL";
300-
301- if (searchInput?.trim()) {
302- const term = searchInput.trim().replace(/'/g, "''");
303- baseWhere += ` AND (
304- label ILIKE '%${term}%'
305- OR description ILIKE '%${term}%'
306- OR CAST(place_name AS VARCHAR) ILIKE '%${term}%'
307- )`;
308- }
309-
310- const query = `
311- SELECT n as value, COUNT(*) as count
312- FROM samples
313- WHERE ${baseWhere}
314- GROUP BY n
315- ORDER BY count DESC
316- `;
317-
318- try {
319- return await runQuery(query);
320- } catch (e) {
321- console.error("Facet query error:", e);
322- return [];
323- }
324- }
434+ // Source counts now come from pre-computed facet summaries (Tier 1)
435+ // No longer scans the full parquet file on every page load
436+ sourceCounts = facetsByType.source
325437```
326438
327439``` {ojs}
0 commit comments