Skip to content

Commit 68ec7ee

Browse files
rdhyeeclaude
andcommitted
Fix cross-filtering: use pre-computed cache + correct column mapping
- Add 6KB pre-computed cross-filter cache for instant single-filter lookups - Add 21MB sample_facets view with URI-string columns for on-the-fly fallback - Fix column name mismatch: wide parquet has p__* BIGINT[] columns, but facet values are URI strings — cross-filter now queries sample_facets - Main whereClause uses pid subquery against sample_facets for facet filters - Source filter still queries wide parquet directly (n column is correct) Supplementary files on data.isamples.org: - isamples_202601_facet_cross_filter.parquet (6 KB, 526 rows) - isamples_202601_sample_facets_v2.parquet (21 MB, 6M rows) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent e04e380 commit 68ec7ee

1 file changed

Lines changed: 88 additions & 43 deletions

File tree

tutorials/isamples_explorer.qmd

Lines changed: 88 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ parquet_url = "https://data.isamples.org/isamples_202601_wide.parquet"
3131
// Pre-computed facet summaries (2KB - loads instantly)
3232
facet_summaries_url = "https://data.isamples.org/isamples_202601_facet_summaries.parquet"
3333
34+
// Pre-computed cross-filter cache (6KB - instant single-filter lookups)
35+
cross_filter_url = "https://data.isamples.org/isamples_202601_facet_cross_filter.parquet"
36+
37+
// Slim facets file for on-the-fly multi-filter queries (0.3MB - URI strings, not BIGINT FKs)
38+
sample_facets_url = "https://data.isamples.org/isamples_202601_sample_facets_v2.parquet"
39+
3440
// Source color scheme (consistent with iSamples conventions)
3541
SOURCE_COLORS = ({
3642
'SESAR': '#3366CC', // Blue
@@ -307,9 +313,11 @@ db = {
307313
await instance.instantiate(bundle.mainModule, bundle.pthreadWorker);
308314
URL.revokeObjectURL(worker_url);
309315
310-
// Create view for convenience
316+
// Create views for convenience
311317
const conn = await instance.connect();
312318
await conn.query(`CREATE VIEW samples AS SELECT * FROM read_parquet('${parquet_url}')`);
319+
// Slim facets view with correct URI-string columns for cross-filtering
320+
await conn.query(`CREATE VIEW sample_facets AS SELECT * FROM read_parquet('${sample_facets_url}')`);
313321
await conn.close();
314322
315323
return instance;
@@ -385,55 +393,43 @@ facetsByType = {
385393
```{ojs}
386394
//| code-fold: true
387395
// Cross-filter: build WHERE clause excluding one facet dimension
388-
// This lets each facet show counts reflecting all OTHER active filters
389-
function buildWhereClause(excludeFacet) {
390-
const conditions = [
391-
"otype = 'MaterialSampleRecord'",
392-
"latitude IS NOT NULL"
393-
];
394-
395-
if (searchInput?.trim()) {
396-
const term = searchInput.trim().replace(/'/g, "''");
397-
conditions.push(`(
398-
label ILIKE '%${term}%'
399-
OR description ILIKE '%${term}%'
400-
OR CAST(place_name AS VARCHAR) ILIKE '%${term}%'
401-
)`);
402-
}
396+
// Queries the sample_facets view (URI strings, correct column names)
397+
function buildCrossFilterWhere(excludeFacet) {
398+
const conditions = [];
403399
404400
if (excludeFacet !== 'source') {
405401
const sources = Array.from(sourceCheckboxes || []);
406402
if (sources.length > 0) {
407403
const sourceList = sources.map(s => `'${s}'`).join(", ");
408-
conditions.push(`n IN (${sourceList})`);
404+
conditions.push(`source IN (${sourceList})`);
409405
}
410406
}
411407
412408
if (excludeFacet !== 'material') {
413409
const materials = Array.from(materialCheckboxes || []);
414410
if (materials.length > 0) {
415411
const matList = materials.map(m => `'${m.replace(/'/g, "''")}'`).join(", ");
416-
conditions.push(`has_material_category IN (${matList})`);
412+
conditions.push(`material IN (${matList})`);
417413
}
418414
}
419415
420416
if (excludeFacet !== 'context') {
421417
const contexts = Array.from(contextCheckboxes || []);
422418
if (contexts.length > 0) {
423419
const ctxList = contexts.map(c => `'${c.replace(/'/g, "''")}'`).join(", ");
424-
conditions.push(`has_context_category IN (${ctxList})`);
420+
conditions.push(`context IN (${ctxList})`);
425421
}
426422
}
427423
428424
if (excludeFacet !== 'object_type') {
429425
const objectTypes = Array.from(objectTypeCheckboxes || []);
430426
if (objectTypes.length > 0) {
431427
const otList = objectTypes.map(o => `'${o.replace(/'/g, "''")}'`).join(", ");
432-
conditions.push(`has_specimen_category IN (${otList})`);
428+
conditions.push(`object_type IN (${otList})`);
433429
}
434430
}
435431
436-
return conditions.join(" AND ");
432+
return conditions.length > 0 ? conditions.join(" AND ") : "1=1";
437433
}
438434
```
439435

@@ -452,28 +448,73 @@ hasActiveFilters = {
452448

453449
```{ojs}
454450
//| code-fold: true
455-
// Cross-filtered facet counts: recompute when filters are active
456-
// Each facet uses a WHERE clause with all filters EXCEPT its own dimension,
457-
// so you see how many items exist for each value given other active filters
451+
// Cross-filtered facet counts: use pre-computed cache for single-filter,
452+
// fall back to on-the-fly queries against sample_facets for multi-filter
458453
crossFilteredFacets = {
459454
if (!hasActiveFilters) return null; // Use pre-computed summaries when no filters
460455
456+
// Count how many facets have active filters
457+
const activeSources = Array.from(sourceCheckboxes || []);
458+
const activeMaterials = Array.from(materialCheckboxes || []);
459+
const activeContexts = Array.from(contextCheckboxes || []);
460+
const activeObjectTypes = Array.from(objectTypeCheckboxes || []);
461+
const hasSearch = searchInput?.trim()?.length > 0;
462+
463+
const activeFilterCount = [activeSources, activeMaterials, activeContexts, activeObjectTypes]
464+
.filter(a => a.length > 0).length;
465+
466+
// Try pre-computed cache for single-filter (no text search)
467+
if (activeFilterCount === 1 && !hasSearch) {
468+
try {
469+
// Build filter conditions for the cache lookup
470+
const conditions = ["filter_source IS NULL", "filter_material IS NULL",
471+
"filter_context IS NULL", "filter_object_type IS NULL"];
472+
if (activeSources.length === 1)
473+
conditions[0] = `filter_source = '${activeSources[0]}'`;
474+
else if (activeMaterials.length === 1)
475+
conditions[1] = `filter_material = '${activeMaterials[0].replace(/'/g, "''")}'`;
476+
else if (activeContexts.length === 1)
477+
conditions[2] = `filter_context = '${activeContexts[0].replace(/'/g, "''")}'`;
478+
else if (activeObjectTypes.length === 1)
479+
conditions[3] = `filter_object_type = '${activeObjectTypes[0].replace(/'/g, "''")}'`;
480+
481+
const sql = `
482+
SELECT facet_type, facet_value AS value, count
483+
FROM read_parquet('${cross_filter_url}')
484+
WHERE ${conditions.join(" AND ")}
485+
`;
486+
const rows = await runQuery(sql);
487+
488+
if (rows.length > 0) {
489+
const results = { source: [], material: [], context: [], object_type: [] };
490+
for (const r of rows) {
491+
if (results[r.facet_type]) {
492+
results[r.facet_type].push({ value: r.value, count: Number(r.count) });
493+
}
494+
}
495+
return results;
496+
}
497+
} catch (e) {
498+
console.warn("Pre-computed cache miss, falling back to on-the-fly:", e);
499+
}
500+
}
501+
502+
// Fallback: on-the-fly queries against the slim sample_facets view
461503
const facetConfig = [
462-
{ key: 'source', column: 'n', exclude: 'source' },
463-
{ key: 'material', column: 'has_material_category', exclude: 'material' },
464-
{ key: 'context', column: 'has_context_category', exclude: 'context' },
465-
{ key: 'object_type', column: 'has_specimen_category', exclude: 'object_type' },
504+
{ key: 'source', column: 'source', exclude: 'source' },
505+
{ key: 'material', column: 'material', exclude: 'material' },
506+
{ key: 'context', column: 'context', exclude: 'context' },
507+
{ key: 'object_type', column: 'object_type', exclude: 'object_type' },
466508
];
467509
468510
const results = {};
469511
470-
// Run all 4 facet queries in parallel
471512
const queries = facetConfig.map(async ({ key, column, exclude }) => {
472-
const where = buildWhereClause(exclude);
513+
const where = buildCrossFilterWhere(exclude);
473514
const sql = `
474515
SELECT ${column} AS value, COUNT(*) AS count
475-
FROM samples
476-
WHERE ${where} AND ${column} IS NOT NULL
516+
FROM sample_facets
517+
WHERE ${where} AND ${column} IS NOT NULL AND ${column} != ''
477518
GROUP BY ${column}
478519
ORDER BY count DESC
479520
`;
@@ -482,7 +523,7 @@ crossFilteredFacets = {
482523
results[key] = rows.map(r => ({ value: r.value, count: r.count }));
483524
} catch (e) {
484525
console.warn(`Cross-filter query failed for ${key}:`, e);
485-
results[key] = null; // Fall back to pre-computed
526+
results[key] = null;
486527
}
487528
});
488529
@@ -534,13 +575,16 @@ function getDisplayCounts(facetKey) {
534575
```{ojs}
535576
//| code-fold: true
536577
// Build WHERE clause from current filters (Tier 2: queries full parquet only when filtering)
578+
// Source filter uses the wide parquet's `n` column directly.
579+
// Material/context/object_type filters use the sample_facets view (URI strings)
580+
// via a subquery, since the wide parquet stores these as BIGINT foreign keys.
537581
whereClause = {
538582
const conditions = [
539583
"otype = 'MaterialSampleRecord'",
540584
"latitude IS NOT NULL"
541585
];
542586
543-
// Text search
587+
// Text search (against wide parquet — has label, description, place_name)
544588
if (searchInput?.trim()) {
545589
const term = searchInput.trim().replace(/'/g, "''");
546590
conditions.push(`(
@@ -550,32 +594,33 @@ whereClause = {
550594
)`);
551595
}
552596
553-
// Source filter
597+
// Source filter (n column exists in wide parquet)
554598
const sources = Array.from(sourceCheckboxes || []);
555599
if (sources.length > 0) {
556600
const sourceList = sources.map(s => `'${s}'`).join(", ");
557601
conditions.push(`n IN (${sourceList})`);
558602
}
559603
560-
// Material filter
604+
// Facet filters: build a subquery against sample_facets to get matching PIDs
605+
const facetConditions = [];
561606
const materials = Array.from(materialCheckboxes || []);
562607
if (materials.length > 0) {
563608
const matList = materials.map(m => `'${m.replace(/'/g, "''")}'`).join(", ");
564-
conditions.push(`has_material_category IN (${matList})`);
609+
facetConditions.push(`material IN (${matList})`);
565610
}
566-
567-
// Context (sampled feature) filter
568611
const contexts = Array.from(contextCheckboxes || []);
569612
if (contexts.length > 0) {
570613
const ctxList = contexts.map(c => `'${c.replace(/'/g, "''")}'`).join(", ");
571-
conditions.push(`has_context_category IN (${ctxList})`);
614+
facetConditions.push(`context IN (${ctxList})`);
572615
}
573-
574-
// Object type (specimen type) filter
575616
const objectTypes = Array.from(objectTypeCheckboxes || []);
576617
if (objectTypes.length > 0) {
577618
const otList = objectTypes.map(o => `'${o.replace(/'/g, "''")}'`).join(", ");
578-
conditions.push(`has_specimen_category IN (${otList})`);
619+
facetConditions.push(`object_type IN (${otList})`);
620+
}
621+
622+
if (facetConditions.length > 0) {
623+
conditions.push(`pid IN (SELECT pid FROM sample_facets WHERE ${facetConditions.join(" AND ")})`);
579624
}
580625
581626
return conditions.join(" AND ");

0 commit comments

Comments
 (0)