Skip to content

Commit b4f925c

Browse files
rdhyeeclaude
andcommitted
Transitional progress: 2 paths calculation working proof of concept
- pid is hardcoded for testing - working towards setting pid as the clicked id - get_samples_1 and get_samples_2 functions implemented 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 740afad commit b4f925c

1 file changed

Lines changed: 84 additions & 159 deletions

File tree

tutorials/parquet_cesium.qmd

Lines changed: 84 additions & 159 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,12 @@
11
---
2-
title: Using Cesium for geospatial visualization of remote parquet data
2+
title: Using Cesium for display of remote parquet.
33
categories: [parquet, spatial, recipe]
44
---
55

6-
One key development of the iSamples project centers on the demonstration of low-cost, simplified, and more sustainable approaches to access, analyze and visualize scientific data. Rather than relying upon elaborate and costly server-side infrastructure, iSamples demonstrates how open source technologies like parquet and DuckDB-WASM can streamline cheaper and faster approaches to interacting with geospatial data.
6+
This page renders points from an iSamples parquet file on cesium using point primitives.
77

8-
This page demonstrates how geospatial data can be dynamically accessed from a remote parquet file in cloud storage. The page uses Cesium for browser visualization of these spatial data on a 3D global map. The data in this demonstration comes from [Open Context's](https://opencontext.org/) export of specimen (archaeological artifact and ecofact) records for iSamples. However, this demonstration can also work with any other iSamples compliant parquet data source made publicly accessible on the Web.
9-
10-
11-
<script src="https://cesium.com/downloads/cesiumjs/releases/1.133/Build/Cesium/Cesium.js"></script>
12-
<link href="https://cesium.com/downloads/cesiumjs/releases/1.133/Build/Cesium/Widgets/widgets.css" rel="stylesheet"></link>
8+
<script src="https://cesium.com/downloads/cesiumjs/releases/1.127/Build/Cesium/Cesium.js"></script>
9+
<link href="https://cesium.com/downloads/cesiumjs/releases/1.127/Build/Cesium/Widgets/widgets.css" rel="stylesheet"></link>
1310
<style>
1411
div.cesium-topleft {
1512
display: block;
@@ -196,6 +193,62 @@ async function getGeoRecord(pid) {
196193
return result;
197194
}
198195
196+
async function get_samples_1(pid) {
197+
if (pid === null || pid ==="" || pid == "unset") {
198+
return "unset";
199+
}
200+
const q = `
201+
SELECT DISTINCT
202+
s.pid as sample_id,
203+
s.label as sample_label,
204+
s.name as sample_name,
205+
event.pid as event_id,
206+
event.label as event_label,
207+
'direct_event_location' as location_path
208+
FROM nodes s
209+
JOIN nodes e1 ON s.row_id = e1.s AND e1.p = 'produced_by'
210+
JOIN nodes event ON e1.o[1] = event.row_id
211+
JOIN nodes e2 ON event.row_id = e2.s AND e2.p = 'sample_location'
212+
JOIN nodes g ON e2.o[1] = g.row_id
213+
WHERE s.otype = 'MaterialSampleRecord'
214+
AND event.otype = 'SamplingEvent'
215+
AND g.otype = 'GeospatialCoordLocation'
216+
AND g.pid = ?
217+
`;
218+
const result = await db.queryRow(q, [pid]);
219+
return result;
220+
}
221+
222+
async function get_samples_2(pid) {
223+
if (pid === null || pid ==="" || pid == "unset") {
224+
return "unset";
225+
}
226+
const q = `
227+
SELECT DISTINCT
228+
s.pid as sample_id,
229+
s.label as sample_label,
230+
s.name as sample_name,
231+
event.pid as event_id,
232+
event.label as event_label,
233+
site.label as site_name,
234+
'via_site_location' as location_path
235+
FROM nodes s
236+
JOIN nodes e1 ON s.row_id = e1.s AND e1.p = 'produced_by'
237+
JOIN nodes event ON e1.o[1] = event.row_id
238+
JOIN nodes e2 ON event.row_id = e2.s AND e2.p = 'sampling_site'
239+
JOIN nodes site ON e2.o[1] = site.row_id
240+
JOIN nodes e3 ON site.row_id = e3.s AND e3.p = 'site_location'
241+
JOIN nodes g ON e3.o[1] = g.row_id
242+
WHERE s.otype = 'MaterialSampleRecord'
243+
AND event.otype = 'SamplingEvent'
244+
AND site.otype = 'SamplingSite'
245+
AND g.otype = 'GeospatialCoordLocation'
246+
AND g.pid = ?
247+
`;
248+
const result = await db.queryRow(q, [pid]);
249+
return result;
250+
}
251+
199252
async function locationUsedBy(rowid){
200253
if (rowid === undefined || rowid === null) {
201254
return [];
@@ -238,8 +291,6 @@ viewof pointdata = {
238291

239292
:::
240293

241-
The number of locations in the file is: ${pointdata.length}.
242-
243294
The click point ID is "${clickedPointId}".
244295

245296
```{ojs}
@@ -250,41 +301,7 @@ ${JSON.stringify(selectedGeoRecord, null, 2)}
250301
`
251302
```
252303

253-
## Table Structure Analysis
254-
255-
Understanding the structure and schema of the parquet file:
256-
257-
### Column Schema
258-
259-
```{ojs}
260-
//| code-fold: true
261-
tableSchema = {
262-
const query = `DESCRIBE nodes`;
263-
const data = await loadData(query, [], "loading_schema");
264-
return data;
265-
}
266-
```
267-
268-
<div id="loading_schema">Loading table schema...</div>
269-
270-
```{ojs}
271-
//| code-fold: true
272-
viewof schemaTable = {
273-
const data_table = Inputs.table(tableSchema, {
274-
header: {
275-
column_name: "Column Name",
276-
column_type: "Data Type",
277-
null: "Nullable",
278-
key: "Key",
279-
default: "Default",
280-
extra: "Extra"
281-
}
282-
});
283-
return data_table;
284-
}
285-
```
286-
287-
### Sample Data
304+
## Sample Data
288305

289306
First 10 rows of the dataset to understand the data structure:
290307

@@ -313,133 +330,41 @@ viewof sampleTable = {
313330
}
314331
```
315332

316-
### Sample Data by Object Type
317-
318-
Examples of records for each object type to understand the data semantics:
333+
## getGeoRecord (harcoded)
319334

320335
```{ojs}
321336
//| code-fold: true
322-
sampleDataByOtype = {
323-
// First get the list of unique object types
324-
const otypeQuery = `SELECT DISTINCT otype FROM nodes ORDER BY otype`;
325-
const otypes = await loadData(otypeQuery, [], "loading_otype_samples");
326-
327-
const results = [];
328-
for (const otypeRow of otypes) {
329-
const otype = otypeRow.otype;
330-
// Get 3 sample records for each otype
331-
const sampleQuery = `SELECT * FROM nodes WHERE otype = ? LIMIT 3`;
332-
const samples = await db.query(sampleQuery, [otype]);
333-
334-
results.push({
335-
otype: otype,
336-
count: samples.length,
337-
samples: samples
338-
});
339-
}
340-
return results;
341-
}
342-
```
343-
344-
<div id="loading_otype_samples">Loading sample data by object type...</div>
345-
346-
```{ojs}
347-
//| code-fold: true
348-
viewof otypeSamplesDisplay = {
349-
const container = html`<div></div>`;
350-
351-
for (const otypeData of sampleDataByOtype) {
352-
const section = html`<div style="margin-bottom: 2rem;">
353-
<h4 style="color: #2563eb; margin-bottom: 0.5rem;">Object Type: ${otypeData.otype}</h4>
354-
<p style="margin: 0.5rem 0; font-style: italic;">Sample records (showing up to 3):</p>
355-
</div>`;
356-
357-
// Create a table for this otype's samples
358-
const table = Inputs.table(otypeData.samples, {
359-
layout: "auto",
360-
width: {
361-
pid: 150,
362-
otype: 120,
363-
latitude: 100,
364-
longitude: 100
365-
}
366-
});
367-
368-
section.appendChild(table);
369-
container.appendChild(section);
370-
}
371-
372-
return container;
373-
}
337+
pid = "geoloc_7ea562cce4c70e4b37f7915e8384880c86607729";
338+
testrecord = await getGeoRecord(pid);
374339
```
375340

376-
## Object Type Counts
377-
378-
The distribution of object types (`otype`) in the dataset:
379-
380341
```{ojs}
381-
//| code-fold: true
382-
otypeCounts = {
383-
const query = `SELECT otype, COUNT(*) as count FROM nodes GROUP BY otype ORDER BY count DESC`;
384-
const data = await loadData(query, [], "loading_otype");
385-
return data;
386-
}
342+
//| echo: false
343+
md`\`\`\`
344+
${JSON.stringify(testrecord, null, 2)}
345+
\`\`\`
346+
`
387347
```
388348

389-
<div id="loading_otype">Loading object type counts...</div>
349+
## Related Sample Path 1 (harcoded)
390350

391351
```{ojs}
392-
//| code-fold: true
393-
viewof otypeTable = {
394-
const data_table = Inputs.table(otypeCounts, {
395-
header: {
396-
otype: "Object Type",
397-
count: "Count"
398-
},
399-
format: {
400-
count: d => d.toLocaleString()
401-
}
402-
});
403-
return data_table;
404-
}
352+
//| echo: false
353+
samples_1 = await get_samples_1(pid)
354+
md`\`\`\`
355+
${JSON.stringify(samples_1, null, 2)}
356+
\`\`\`
357+
`
405358
```
406359

407-
Total records by object type: ${otypeCounts.reduce((sum, row) => sum + row.count, 0).toLocaleString()}
408-
409-
## Property Distribution Analysis
410360

411-
Understanding the range of properties (predicates) in this graph database structure:
361+
## Related Sample Path 2 (harcoded)
412362

413363
```{ojs}
414-
//| code-fold: true
415-
propertyDistribution = {
416-
const query = `SELECT p as property, COUNT(*) as count FROM nodes WHERE p IS NOT NULL GROUP BY p ORDER BY count DESC`;
417-
const data = await loadData(query, [], "loading_properties");
418-
return data;
419-
}
420-
```
421-
422-
<div id="loading_properties">Loading property distribution...</div>
423-
424-
```{ojs}
425-
//| code-fold: true
426-
viewof propertyTable = {
427-
const data_table = Inputs.table(propertyDistribution, {
428-
header: {
429-
property: "Property (Predicate)",
430-
count: "Count"
431-
},
432-
format: {
433-
count: d => d.toLocaleString()
434-
},
435-
layout: "auto"
436-
});
437-
return data_table;
438-
}
439-
```
440-
441-
Total records with properties: ${propertyDistribution.reduce((sum, row) => sum + row.count, 0).toLocaleString()}
442-
443-
Unique properties in the dataset: ${propertyDistribution.length.toLocaleString()}
444-
445-
364+
//| echo: false
365+
samples_2 = await get_samples_2(pid)
366+
md`\`\`\`
367+
${JSON.stringify(samples_2, null, 2)}
368+
\`\`\`
369+
`
370+
```

0 commit comments

Comments
 (0)