Skip to content

Commit b9790f9

Browse files
rdhyeeclaude
andauthored
Add Explorer v2 behind ?v=2 flag (lite parquet, lazy description, no RANDOM, lazy Cesium) (#126)
Four architectural moves, each gated on ?v=2. v1 stays unchanged. 1. Primary read surface: samples_map_lite.parquet (60 MB) instead of wide.parquet (278 MB). The lite file has every column the Explorer needs for the list + globe view except description. 2. No ORDER BY RANDOM(). v1 uses RANDOM() which forces a scan across row groups; v2 uses bare LIMIT, accepting row-order bias in exchange for ~20× query speedup on columnar parquet. (Trade-off acceptable for a viz sample; revisit if source clustering becomes visible.) 3. Lazy description fetch. v2 drops description from sampleData and adds a lazyDescription cell that queries wide.parquet for just the one pid when a sample is clicked. sampleCard falls back to lazyDescription when s.description is empty. 4. Lazy Cesium mount. v2 returns null from the viewer cell until viewMode === 'globe', so the viewer constructor (~500 ms) doesn't run for users who stay in list/table view. v1 mounts eagerly. whereClause handles column-name drift (v1 uses `n`, v2 uses `source`) and skips the otype filter for v2 (lite is samples-only). Text search in v2 is limited to label + place_name (description isn't loaded eagerly). v1 keeps description search. Next: measure v2 and compare against the PR #124 baseline. Co-authored-by: Claude Opus 4.7 <noreply@anthropic.com>
1 parent cc5e571 commit b9790f9

1 file changed

Lines changed: 95 additions & 17 deletions

File tree

tutorials/isamples_explorer.qmd

Lines changed: 95 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,15 @@ duckdbModule = import("https://cdn.jsdelivr.net/npm/@duckdb/duckdb-wasm@1.28.0/+
7777

7878
```{ojs}
7979
//| code-fold: true
80+
// Version gate. Append ?v=2 to the URL to opt into the lite-backed
81+
// rewrite (samples_map_lite.parquet instead of wide.parquet, lazy
82+
// description fetch on click, no ORDER BY RANDOM(), lazy Cesium mount).
83+
explorerVersion = new URLSearchParams(location.search).get('v') === '2' ? 'v2' : 'v1'
84+
8085
// Data source configuration
81-
parquet_url = "https://data.isamples.org/isamples_202601_wide.parquet"
86+
wide_url = "https://data.isamples.org/isamples_202601_wide.parquet"
87+
lite_url = "https://data.isamples.org/isamples_202601_samples_map_lite.parquet"
88+
parquet_url = explorerVersion === 'v2' ? lite_url : wide_url
8289
8390
// Pre-computed facet summaries (2KB - loads instantly)
8491
facet_summaries_url = "https://data.isamples.org/isamples_202601_facet_summaries.parquet"
@@ -356,9 +363,19 @@ db = {
356363
await instance.instantiate(bundle.mainModule, bundle.pthreadWorker);
357364
URL.revokeObjectURL(worker_url);
358365
359-
// Create views for convenience
366+
// Create views for convenience. v1 reads the full wide parquet directly;
367+
// v2 reads the 60 MB lite file (no description, no row_id, source is
368+
// already named 'source' not 'n').
360369
const conn = await instance.connect();
361-
await conn.query(`CREATE VIEW samples AS SELECT * FROM read_parquet('${parquet_url}')`);
370+
if (explorerVersion === 'v2') {
371+
await conn.query(`
372+
CREATE VIEW samples AS
373+
SELECT pid, label, source, latitude, longitude, place_name
374+
FROM read_parquet('${parquet_url}')
375+
`);
376+
} else {
377+
await conn.query(`CREATE VIEW samples AS SELECT * FROM read_parquet('${parquet_url}')`);
378+
}
362379
// Slim facets view with correct URI-string columns for cross-filtering
363380
await conn.query(`CREATE VIEW sample_facets AS SELECT * FROM read_parquet('${sample_facets_url}')`);
364381
await conn.close();
@@ -636,26 +653,38 @@ crossFilteredFacets = {
636653
// Material/context/object_type filters use the sample_facets view (URI strings)
637654
// via a subquery, since the wide parquet stores these as BIGINT foreign keys.
638655
whereClause = {
639-
const conditions = [
640-
"otype = 'MaterialSampleRecord'",
641-
"latitude IS NOT NULL"
642-
];
656+
const conditions = ["latitude IS NOT NULL"];
643657
644-
// Text search (against wide parquet — has label, description, place_name)
658+
// v1 reads the multi-entity-type wide parquet, so filter to sample records.
659+
// v2 reads lite which is already samples-only.
660+
if (explorerVersion !== 'v2') {
661+
conditions.unshift("otype = 'MaterialSampleRecord'");
662+
}
663+
664+
// Text search. v1 can search description (column exists in wide);
665+
// v2 can't (description is not in lite — lazy-fetched on sample click).
645666
if (searchInput?.trim()) {
646667
const term = searchInput.trim().replace(/'/g, "''");
647-
conditions.push(`(
648-
label ILIKE '%${term}%'
649-
OR description ILIKE '%${term}%'
650-
OR CAST(place_name AS VARCHAR) ILIKE '%${term}%'
651-
)`);
668+
if (explorerVersion === 'v2') {
669+
conditions.push(`(
670+
label ILIKE '%${term}%'
671+
OR CAST(place_name AS VARCHAR) ILIKE '%${term}%'
672+
)`);
673+
} else {
674+
conditions.push(`(
675+
label ILIKE '%${term}%'
676+
OR description ILIKE '%${term}%'
677+
OR CAST(place_name AS VARCHAR) ILIKE '%${term}%'
678+
)`);
679+
}
652680
}
653681
654-
// Source filter (n column exists in wide parquet)
682+
// Source filter. v1 uses the wide parquet's `n` column; v2 uses `source`.
655683
const sources = Array.from(sourceCheckboxes || []);
656684
if (sources.length > 0) {
657685
const sourceList = sources.map(s => `'${s}'`).join(", ");
658-
conditions.push(`n IN (${sourceList})`);
686+
const col = explorerVersion === 'v2' ? 'source' : 'n';
687+
conditions.push(`${col} IN (${sourceList})`);
659688
}
660689
661690
// Facet filters: build a subquery against sample_facets to get matching PIDs
@@ -720,7 +749,24 @@ sampleData = {
720749
721750
performance.mark('explorer-samples-start');
722751
try {
723-
const query = `
752+
// v2: read from lite (60 MB), no description (fetched lazily on click),
753+
// no row_id, no ORDER BY RANDOM(). LIMIT returns whatever rows the
754+
// scan encounters first — biased toward row order but ~20x faster
755+
// than RANDOM() on a columnar file.
756+
// v1: original query against the 278 MB wide file.
757+
const query = explorerVersion === 'v2' ? `
758+
SELECT
759+
pid,
760+
label,
761+
'' AS description,
762+
latitude,
763+
longitude,
764+
source,
765+
place_name
766+
FROM samples
767+
WHERE ${whereClause}
768+
LIMIT ${maxSamples}
769+
` : `
724770
SELECT
725771
row_id,
726772
pid,
@@ -778,6 +824,14 @@ mutable clickedPointIndex = null
778824
//| code-fold: true
779825
// Cesium viewer setup
780826
viewer = {
827+
// v2: defer Cesium construction until the user actually switches to
828+
// globe view. The cell re-evaluates when viewMode changes (reactive
829+
// dependency below), so toggling into globe will mount on demand.
830+
// v1 mounts eagerly to preserve original behavior.
831+
if (explorerVersion === 'v2' && viewMode !== 'globe') {
832+
return null;
833+
}
834+
781835
// Wait for Cesium to be available
782836
await new Promise(resolve => {
783837
if (typeof Cesium !== 'undefined') resolve();
@@ -886,6 +940,28 @@ selectedSample = {
886940
}
887941
```
888942

943+
```{ojs}
944+
//| code-fold: true
945+
// v2: lazy description fetch — only hit the 278 MB wide parquet when a sample
946+
// is actually clicked, rather than pulling description for every row eagerly.
947+
lazyDescription = {
948+
if (explorerVersion !== 'v2') return null;
949+
if (!selectedSample?.pid) return null;
950+
const pid = selectedSample.pid.replace(/'/g, "''");
951+
try {
952+
const rows = await runQuery(`
953+
SELECT description FROM read_parquet('${wide_url}')
954+
WHERE pid = '${pid}' AND otype = 'MaterialSampleRecord'
955+
LIMIT 1
956+
`);
957+
return rows[0]?.description || '';
958+
} catch (e) {
959+
console.warn('Lazy description fetch failed:', e);
960+
return '';
961+
}
962+
}
963+
```
964+
889965
```{ojs}
890966
//| code-fold: true
891967
// Render sample card
@@ -900,7 +976,9 @@ sampleCard = {
900976
const sourceColor = SOURCE_COLORS[s.source] || SOURCE_COLORS.default;
901977
902978
const label = s.label || 'No label';
903-
const description = s.description || '';
979+
// v2: prefer the lazily-fetched description (from wide parquet on click);
980+
// v1: the description is already in sampleData.
981+
const description = (s.description || lazyDescription || '').trim();
904982
const truncDesc = description.length > 200 ? description.substring(0, 200) + '...' : description;
905983
906984
let placeStr = '';

0 commit comments

Comments
 (0)