|
70 | 70 | "\n", |
71 | 71 | "# Restart runtime after installing ITK (required for ITK to load properly)\n", |
72 | 72 | "import sys\n", |
| 73 | + "\n", |
73 | 74 | "if \"google.colab\" in sys.modules:\n", |
74 | 75 | " try:\n", |
75 | 76 | " import itk # noqa: F401\n", |
76 | 77 | " except ImportError:\n", |
77 | 78 | " print(\"Restarting runtime to load ITK...\")\n", |
78 | 79 | " import os\n", |
| 80 | + "\n", |
79 | 81 | " os.kill(os.getpid(), 9)" |
80 | 82 | ] |
81 | 83 | }, |
|
170 | 172 | "\n", |
171 | 173 | "# Query the primary 'index' table to see the overall scale of IDC.\n", |
172 | 174 | "# 'collection_id' groups series by dataset; 'PatientID' is the DICOM patient identifier.\n", |
173 | | - "stats = client.sql_query(\"\"\"\n", |
| 175 | + "stats = client.sql_query(\n", |
| 176 | + " \"\"\"\n", |
174 | 177 | " SELECT COUNT(DISTINCT collection_id) as collections,\n", |
175 | 178 | " COUNT(DISTINCT PatientID) as patients,\n", |
176 | 179 | " COUNT(DISTINCT SeriesInstanceUID) as series,\n", |
177 | 180 | " SUM(series_size_MB) as size_mb,\n", |
178 | 181 | " ROUND(SUM(series_size_MB) / 1e6, 2) as size_tb\n", |
179 | 182 | " FROM index\n", |
180 | | - "\"\"\")\n", |
| 183 | + "\"\"\"\n", |
| 184 | + ")\n", |
181 | 185 | "row = stats.iloc[0]\n", |
182 | 186 | "print(f\"Collections: {row['collections']}, Patients: {row['patients']}, Total size: {row['size_tb']}TB\")" |
183 | 187 | ] |
|
215 | 219 | "\n", |
216 | 220 | "# Join collections_index with the per-series index to filter by both cancer type and modality.\n", |
217 | 221 | "# The 'index' table has Modality per series; collections_index has CancerTypes per collection.\n", |
218 | | - "lung_collections = client.sql_query(\"\"\"\n", |
| 222 | + "lung_collections = client.sql_query(\n", |
| 223 | + " \"\"\"\n", |
219 | 224 | " SELECT c.collection_id, c.Subjects, c.CancerTypes,\n", |
220 | 225 | " COUNT(DISTINCT CASE WHEN i.Modality = 'CT' THEN i.SeriesInstanceUID END) as ct_series\n", |
221 | 226 | " FROM collections_index c\n", |
|
225 | 230 | " HAVING ct_series > 0\n", |
226 | 231 | " ORDER BY c.Subjects DESC\n", |
227 | 232 | " LIMIT 5\n", |
228 | | - "\"\"\")\n", |
| 233 | + "\"\"\"\n", |
| 234 | + ")\n", |
229 | 235 | "print(\"Lung CT collections:\")\n", |
230 | 236 | "print(lung_collections.to_string(index=False))" |
231 | 237 | ] |
|
260 | 266 | "# Select a few small CT series that form well-formed 3D volumes.\n", |
261 | 267 | "# We join 'index' (series metadata) with 'volume_geometry_index' (geometry flags).\n", |
262 | 268 | "# ORDER BY series_size_MB is not here, but LIMIT 3 keeps the demo download small.\n", |
263 | | - "series_df = client.sql_query(\"\"\"\n", |
| 269 | + "series_df = client.sql_query(\n", |
| 270 | + " \"\"\"\n", |
264 | 271 | " SELECT index.SeriesInstanceUID, PatientID, Modality,\n", |
265 | 272 | " ROUND(series_size_MB, 2) as size_mb\n", |
266 | 273 | " FROM index\n", |
267 | 274 | " JOIN volume_geometry_index USING (SeriesInstanceUID)\n", |
268 | 275 | " WHERE regularly_spaced_3d_volume = TRUE AND Modality = 'CT'\n", |
269 | 276 | " LIMIT 3\n", |
270 | | - "\"\"\")\n", |
| 277 | + "\"\"\"\n", |
| 278 | + ")\n", |
271 | 279 | "print(f\"Found {len(series_df)} CT series\")" |
272 | 280 | ] |
273 | 281 | }, |
|
327 | 335 | "source": [ |
328 | 336 | "# Create a temporary directory to hold downloaded DICOM files.\n", |
329 | 337 | "data_dir = tempfile.mkdtemp(prefix=\"idc_monai_\")\n", |
330 | | - "series_uids = list(series_df['SeriesInstanceUID'])\n", |
| 338 | + "series_uids = list(series_df[\"SeriesInstanceUID\"])\n", |
331 | 339 | "\n", |
332 | 340 | "print(f\"Downloading {len(series_uids)} series...\")\n", |
333 | 341 | "# download_from_selection() accepts a list of SeriesInstanceUIDs and fetches\n", |
334 | 342 | "# all DICOM files for those series from IDC's GCS buckets.\n", |
335 | 343 | "# dirTemplate=\"%SeriesInstanceUID\" puts each series in its own subdirectory —\n", |
336 | 344 | "# required because MONAI's ITKReader reads a directory to reconstruct a 3D volume.\n", |
337 | | - "client.download_from_selection(\n", |
338 | | - " seriesInstanceUID=series_uids,\n", |
339 | | - " downloadDir=data_dir,\n", |
340 | | - " dirTemplate=\"%SeriesInstanceUID\"\n", |
341 | | - ")\n", |
| 345 | + "client.download_from_selection(seriesInstanceUID=series_uids, downloadDir=data_dir, dirTemplate=\"%SeriesInstanceUID\")\n", |
342 | 346 | "print(\"Done!\")" |
343 | 347 | ] |
344 | 348 | }, |
|
383 | 387 | "source": [ |
384 | 388 | "# Define transforms for CT preprocessing\n", |
385 | 389 | "# Use ITKReader explicitly to load DICOM series from directories\n", |
386 | | - "transforms = Compose([\n", |
387 | | - " LoadImaged(keys=[\"image\"], reader=ITKReader()),\n", |
388 | | - " EnsureChannelFirstd(keys=[\"image\"]),\n", |
389 | | - " Orientationd(keys=[\"image\"], axcodes=\"RAS\"),\n", |
390 | | - " Spacingd(keys=[\"image\"], pixdim=(1.5, 1.5, 2.0)),\n", |
391 | | - " ScaleIntensityRanged(keys=[\"image\"], a_min=-175, a_max=250,\n", |
392 | | - " b_min=0.0, b_max=1.0, clip=True),\n", |
393 | | - "])\n", |
| 390 | + "transforms = Compose(\n", |
| 391 | + " [\n", |
| 392 | + " LoadImaged(keys=[\"image\"], reader=ITKReader()),\n", |
| 393 | + " EnsureChannelFirstd(keys=[\"image\"]),\n", |
| 394 | + " Orientationd(keys=[\"image\"], axcodes=\"RAS\"),\n", |
| 395 | + " Spacingd(keys=[\"image\"], pixdim=(1.5, 1.5, 2.0)),\n", |
| 396 | + " ScaleIntensityRanged(keys=[\"image\"], a_min=-175, a_max=250, b_min=0.0, b_max=1.0, clip=True),\n", |
| 397 | + " ]\n", |
| 398 | + ")\n", |
394 | 399 | "\n", |
395 | 400 | "# Create dataset\n", |
396 | 401 | "data_dicts = [{\"image\": os.path.join(data_dir, uid)} for uid in series_uids]\n", |
|
435 | 440 | } |
436 | 441 | ], |
437 | 442 | "source": [ |
438 | | - "\n", |
439 | | - "image = sample['image'][0]\n", |
| 443 | + "image = sample[\"image\"][0]\n", |
440 | 444 | "z = image.shape[2] // 2\n", |
441 | 445 | "\n", |
442 | 446 | "plt.figure(figsize=(6, 6))\n", |
443 | | - "plt.imshow(image[:, :, z].T, cmap='gray', origin='lower')\n", |
444 | | - "plt.title(f'CT from IDC (slice {z})')\n", |
445 | | - "plt.axis('off')\n", |
446 | | - "plt.show()\n" |
| 447 | + "plt.imshow(image[:, :, z].T, cmap=\"gray\", origin=\"lower\")\n", |
| 448 | + "plt.title(f\"CT from IDC (slice {z})\")\n", |
| 449 | + "plt.axis(\"off\")\n", |
| 450 | + "plt.show()" |
447 | 451 | ] |
448 | 452 | }, |
449 | 453 | { |
|
504 | 508 | "# TotalSegmentator is an AI model that auto-segments 100+ anatomical structures.\n", |
505 | 509 | "# The JOIN links each segmentation back to its source CT via segmented_SeriesInstanceUID.\n", |
506 | 510 | "# We sort by image size (ASC) so the demo downloads the smallest available pair.\n", |
507 | | - "paired = client.sql_query(\"\"\"\n", |
| 511 | + "paired = client.sql_query(\n", |
| 512 | + " \"\"\"\n", |
508 | 513 | " SELECT src.SeriesInstanceUID as image_uid,\n", |
509 | 514 | " seg.SeriesInstanceUID as seg_uid,\n", |
510 | 515 | " src.collection_id, seg.total_segments,\n", |
|
515 | 520 | " AND seg.AlgorithmName LIKE '%TotalSegmentator%'\n", |
516 | 521 | " ORDER BY src.series_size_MB ASC\n", |
517 | 522 | " LIMIT 3\n", |
518 | | - "\"\"\")\n", |
| 523 | + "\"\"\"\n", |
| 524 | + ")\n", |
519 | 525 | "print(\"CT with TotalSegmentator segmentations:\")\n", |
520 | 526 | "print(paired.to_string(index=False))" |
521 | 527 | ] |
|
538 | 544 | "\n", |
539 | 545 | "print(\"Downloading image and segmentation pair...\")\n", |
540 | 546 | "client.download_from_selection(\n", |
541 | | - " seriesInstanceUID=[demo_pair['image_uid'], demo_pair['seg_uid']],\n", |
| 547 | + " seriesInstanceUID=[demo_pair[\"image_uid\"], demo_pair[\"seg_uid\"]],\n", |
542 | 548 | " downloadDir=seg_dir,\n", |
543 | | - " dirTemplate=\"%SeriesInstanceUID\"\n", |
| 549 | + " dirTemplate=\"%SeriesInstanceUID\",\n", |
544 | 550 | ")\n", |
545 | 551 | "print(\"Done!\")" |
546 | 552 | ] |
|
600 | 606 | " physical axis i. ITK affine formula:\n", |
601 | 607 | " world_lps = D @ diag(spacing) @ voxel + origin\n", |
602 | 608 | " \"\"\"\n", |
603 | | - " lps_to_ras = np.diag([-1., -1., 1.])\n", |
| 609 | + " lps_to_ras = np.diag([-1.0, -1.0, 1.0])\n", |
604 | 610 | " affine = np.eye(4)\n", |
605 | 611 | " affine[:3, :3] = lps_to_ras @ direction @ np.diag(spacing)\n", |
606 | 612 | " affine[:3, 3] = lps_to_ras @ origin\n", |
|
645 | 651 | "\n", |
646 | 652 | "\n", |
647 | 653 | "# Load CT with MONAI's ITKReader\n", |
648 | | - "ct_transforms = Compose([\n", |
649 | | - " LoadImaged(keys=[\"image\"], reader=ITKReader()),\n", |
650 | | - " EnsureChannelFirstd(keys=[\"image\"]),\n", |
651 | | - "])\n", |
| 654 | + "ct_transforms = Compose(\n", |
| 655 | + " [\n", |
| 656 | + " LoadImaged(keys=[\"image\"], reader=ITKReader()),\n", |
| 657 | + " EnsureChannelFirstd(keys=[\"image\"]),\n", |
| 658 | + " ]\n", |
| 659 | + ")\n", |
652 | 660 | "\n", |
653 | 661 | "# Load SEG with our custom LoadDicomSegd\n", |
654 | | - "seg_transforms = Compose([\n", |
655 | | - " LoadDicomSegd(keys=[\"label\"]),\n", |
656 | | - " EnsureChannelFirstd(keys=[\"label\"]),\n", |
657 | | - "])\n", |
| 662 | + "seg_transforms = Compose(\n", |
| 663 | + " [\n", |
| 664 | + " LoadDicomSegd(keys=[\"label\"]),\n", |
| 665 | + " EnsureChannelFirstd(keys=[\"label\"]),\n", |
| 666 | + " ]\n", |
| 667 | + ")\n", |
658 | 668 | "\n", |
659 | 669 | "# Load both\n", |
660 | | - "image_path = os.path.join(seg_dir, demo_pair['image_uid'])\n", |
661 | | - "seg_path = os.path.join(seg_dir, demo_pair['seg_uid'])\n", |
| 670 | + "image_path = os.path.join(seg_dir, demo_pair[\"image_uid\"])\n", |
| 671 | + "seg_path = os.path.join(seg_dir, demo_pair[\"seg_uid\"])\n", |
662 | 672 | "\n", |
663 | 673 | "ct_data = ct_transforms({\"image\": image_path})\n", |
664 | 674 | "seg_data = seg_transforms({\"label\": seg_path})\n", |
|
782 | 792 | " modifier_seq = seg.get(\"SegmentedPropertyTypeModifierCodeSequence\", {})\n", |
783 | 793 | " modifier = modifier_seq.get(\"CodeMeaning\", \"\") if modifier_seq else \"\"\n", |
784 | 794 | "\n", |
785 | | - " segments.append({\n", |
786 | | - " \"label_id\": seg.get(\"labelID\", 0),\n", |
787 | | - " \"name\": seg.get(\"SegmentLabel\", \"Unknown\"),\n", |
788 | | - " \"category\": category,\n", |
789 | | - " \"type\": seg_type,\n", |
790 | | - " \"type_code\": f\"{coding_scheme}:{type_code}\" if type_code else \"\",\n", |
791 | | - " \"modifier\": modifier,\n", |
792 | | - " \"color_rgb\": seg.get(\"recommendedDisplayRGBValue\", [128, 128, 128]),\n", |
793 | | - " \"algorithm\": seg.get(\"SegmentAlgorithmName\", \"\"),\n", |
794 | | - " })\n", |
| 795 | + " segments.append(\n", |
| 796 | + " {\n", |
| 797 | + " \"label_id\": seg.get(\"labelID\", 0),\n", |
| 798 | + " \"name\": seg.get(\"SegmentLabel\", \"Unknown\"),\n", |
| 799 | + " \"category\": category,\n", |
| 800 | + " \"type\": seg_type,\n", |
| 801 | + " \"type_code\": f\"{coding_scheme}:{type_code}\" if type_code else \"\",\n", |
| 802 | + " \"modifier\": modifier,\n", |
| 803 | + " \"color_rgb\": seg.get(\"recommendedDisplayRGBValue\", [128, 128, 128]),\n", |
| 804 | + " \"algorithm\": seg.get(\"SegmentAlgorithmName\", \"\"),\n", |
| 805 | + " }\n", |
| 806 | + " )\n", |
795 | 807 | "\n", |
796 | 808 | " return sorted(segments, key=lambda x: x[\"label_id\"])\n", |
797 | 809 | "\n", |
798 | 810 | "\n", |
799 | 811 | "# Extract segment information\n", |
800 | | - "overlay_info = seg_data.get('label_meta_dict', {}).get('overlay_info', {})\n", |
| 812 | + "overlay_info = seg_data.get(\"label_meta_dict\", {}).get(\"overlay_info\", {})\n", |
801 | 813 | "segments = get_segment_info(overlay_info)\n", |
802 | 814 | "\n", |
803 | 815 | "print(f\"Found {len(segments)} segments in DICOM SEG:\\n\")\n", |
|
849 | 861 | "# Find SEG z-slice at the midpoint of the labeled region\n", |
850 | 862 | "z_slices_with_labels = np.where(seg_np.sum(axis=(0, 1)) > 0)[0]\n", |
851 | 863 | "seg_mid_z = (\n", |
852 | | - " int(z_slices_with_labels[len(z_slices_with_labels) // 2])\n", |
853 | | - " if len(z_slices_with_labels) > 0\n", |
854 | | - " else seg_np.shape[2] // 2\n", |
| 864 | + " int(z_slices_with_labels[len(z_slices_with_labels) // 2]) if len(z_slices_with_labels) > 0 else seg_np.shape[2] // 2\n", |
855 | 865 | ")\n", |
856 | 866 | "\n", |
857 | 867 | "# Map SEG midpoint z to CT z via world coordinates\n", |
|
915 | 925 | " for seg in all_segments:\n", |
916 | 926 | " label_id = seg.get(\"labelID\", 0)\n", |
917 | 927 | " rgb = seg.get(\"recommendedDisplayRGBValue\", [128, 128, 128])\n", |
918 | | - " colors[label_id] = [rgb[0]/255, rgb[1]/255, rgb[2]/255, 1.0]\n", |
| 928 | + " colors[label_id] = [rgb[0] / 255, rgb[1] / 255, rgb[2] / 255, 1.0]\n", |
919 | 929 | "\n", |
920 | 930 | " return ListedColormap(colors)\n", |
921 | 931 | "\n", |
|
928 | 938 | "fig, axes = plt.subplots(1, 3, figsize=(15, 5))\n", |
929 | 939 | "\n", |
930 | 940 | "# CT image only\n", |
931 | | - "axes[0].imshow(ct_np[:, :, z_mid].T, cmap='gray', origin='lower', vmin=-1000, vmax=500)\n", |
932 | | - "axes[0].set_title(f'CT Image (RAS, z={z_mid})')\n", |
933 | | - "axes[0].axis('off')\n", |
| 941 | + "axes[0].imshow(ct_np[:, :, z_mid].T, cmap=\"gray\", origin=\"lower\", vmin=-1000, vmax=500)\n", |
| 942 | + "axes[0].set_title(f\"CT Image (RAS, z={z_mid})\")\n", |
| 943 | + "axes[0].axis(\"off\")\n", |
934 | 944 | "\n", |
935 | 945 | "# Segmentation only (using DICOM SEG colors)\n", |
936 | | - "axes[1].imshow(seg_np[:, :, seg_mid_z].T, cmap=seg_cmap, origin='lower',\n", |
937 | | - " vmin=0, vmax=len(seg_cmap.colors)-1, interpolation='nearest')\n", |
938 | | - "axes[1].set_title(f'Segmentation (RAS, z={seg_mid_z})\\n({int(seg_np.max())} labels, DICOM SEG colors)')\n", |
939 | | - "axes[1].axis('off')\n", |
| 946 | + "axes[1].imshow(\n", |
| 947 | + " seg_np[:, :, seg_mid_z].T,\n", |
| 948 | + " cmap=seg_cmap,\n", |
| 949 | + " origin=\"lower\",\n", |
| 950 | + " vmin=0,\n", |
| 951 | + " vmax=len(seg_cmap.colors) - 1,\n", |
| 952 | + " interpolation=\"nearest\",\n", |
| 953 | + ")\n", |
| 954 | + "axes[1].set_title(f\"Segmentation (RAS, z={seg_mid_z})\\n({int(seg_np.max())} labels, DICOM SEG colors)\")\n", |
| 955 | + "axes[1].axis(\"off\")\n", |
940 | 956 | "\n", |
941 | 957 | "# Overlay — both arrays are in RAS, z-slices matched via world coordinates\n", |
942 | | - "axes[2].imshow(ct_np[:, :, z_mid].T, cmap='gray', origin='lower', vmin=-1000, vmax=500)\n", |
| 958 | + "axes[2].imshow(ct_np[:, :, z_mid].T, cmap=\"gray\", origin=\"lower\", vmin=-1000, vmax=500)\n", |
943 | 959 | "seg_slice = seg_np[:, :, seg_mid_z]\n", |
944 | 960 | "mask = np.ma.masked_where(seg_slice == 0, seg_slice)\n", |
945 | | - "axes[2].imshow(mask.T, cmap=seg_cmap, alpha=0.6, origin='lower',\n", |
946 | | - " vmin=0, vmax=len(seg_cmap.colors)-1, interpolation='nearest')\n", |
947 | | - "axes[2].set_title('Overlay (RAS)\\n(DICOM SEG colors)')\n", |
948 | | - "axes[2].axis('off')\n", |
| 961 | + "axes[2].imshow(\n", |
| 962 | + " mask.T, cmap=seg_cmap, alpha=0.6, origin=\"lower\", vmin=0, vmax=len(seg_cmap.colors) - 1, interpolation=\"nearest\"\n", |
| 963 | + ")\n", |
| 964 | + "axes[2].set_title(\"Overlay (RAS)\\n(DICOM SEG colors)\")\n", |
| 965 | + "axes[2].axis(\"off\")\n", |
949 | 966 | "\n", |
950 | | - "plt.suptitle('CT + TotalSegmentator Segmentation', fontsize=14)\n", |
| 967 | + "plt.suptitle(\"CT + TotalSegmentator Segmentation\", fontsize=14)\n", |
951 | 968 | "plt.tight_layout()\n", |
952 | 969 | "plt.show()\n", |
953 | 970 | "\n", |
|
994 | 1011 | "source": [ |
995 | 1012 | "# Always check licenses before use\n", |
996 | 1013 | "uid_list = \", \".join(f\"'{uid}'\" for uid in series_uids)\n", |
997 | | - "licenses = client.sql_query(f\"\"\"\n", |
| 1014 | + "licenses = client.sql_query(\n", |
| 1015 | + " f\"\"\"\n", |
998 | 1016 | " SELECT license_short_name, COUNT(*) as count\n", |
999 | 1017 | " FROM index WHERE SeriesInstanceUID IN ({uid_list})\n", |
1000 | 1018 | " GROUP BY license_short_name\n", |
1001 | | - "\"\"\")\n", |
| 1019 | + "\"\"\"\n", |
| 1020 | + ")\n", |
1002 | 1021 | "print(\"Licenses:\")\n", |
1003 | 1022 | "print(licenses.to_string(index=False))" |
1004 | 1023 | ] |
|
0 commit comments