|
| 1 | +use anndata::{ |
| 2 | + backend::{AttributeOp, Backend, DataContainer, DatasetOp, GroupOp, ScalarType}, |
| 3 | + data::{ArrayData, SelectInfoElem}, |
| 4 | + ArrayElemOp, |
| 5 | +}; |
| 6 | +use nalgebra_sparse::{pattern::SparsityPattern, CsrMatrix}; |
| 7 | + |
| 8 | + |
| 9 | + |
| 10 | + |
| 11 | + |
| 12 | + |
| 13 | + |
| 14 | +pub fn load_csr_chunked<B: Backend>( |
| 15 | + container: &DataContainer<B>, |
| 16 | + config: &LoadingConfig, |
| 17 | +) -> anyhow::Result<ArrayData> { |
| 18 | + let group = container.as_group()?; |
| 19 | + let shape: Vec<u64> = group.get_attr("shape")?; |
| 20 | + let nrows = shape[0] as usize; |
| 21 | + let ncols = shape[1] as usize; |
| 22 | + |
| 23 | + let data_ds = group.open_dataset("data")?; |
| 24 | + let indices_ds = group.open_dataset("indices")?; |
| 25 | + let indptr_ds = group.open_dataset("indptr")?; |
| 26 | + |
| 27 | + // Use the helper function to read indptr |
| 28 | + let indptr = read_array_as_usize::<B>(&indptr_ds)?; |
| 29 | + |
| 30 | + let nnz = data_ds.shape()[0]; |
| 31 | + |
| 32 | + if config.show_progress && nnz > 10_000_000 { |
| 33 | + println!( |
| 34 | + "Loading CSR matrix: {} rows, {} cols, {} non-zeros", |
| 35 | + nrows, ncols, nnz |
| 36 | + ); |
| 37 | + } |
| 38 | + |
| 39 | + match data_ds.dtype()? { |
| 40 | + ScalarType::F64 => load_csr_typed::<B, f64>( |
| 41 | + nrows, ncols, nnz, indptr, &data_ds, &indices_ds, config, |
| 42 | + ), |
| 43 | + ScalarType::F32 => load_csr_typed::<B, f32>( |
| 44 | + nrows, ncols, nnz, indptr, &data_ds, &indices_ds, config, |
| 45 | + ), |
| 46 | + ScalarType::I64 => load_csr_typed::<B, i64>( |
| 47 | + nrows, ncols, nnz, indptr, &data_ds, &indices_ds, config, |
| 48 | + ), |
| 49 | + ScalarType::I32 => load_csr_typed::<B, i32>( |
| 50 | + nrows, ncols, nnz, indptr, &data_ds, &indices_ds, config, |
| 51 | + ), |
| 52 | + ScalarType::I16 => load_csr_typed::<B, i16>( |
| 53 | + nrows, ncols, nnz, indptr, &data_ds, &indices_ds, config, |
| 54 | + ), |
| 55 | + ScalarType::I8 => load_csr_typed::<B, i8>( |
| 56 | + nrows, ncols, nnz, indptr, &data_ds, &indices_ds, config, |
| 57 | + ), |
| 58 | + ScalarType::U64 => load_csr_typed::<B, u64>( |
| 59 | + nrows, ncols, nnz, indptr, &data_ds, &indices_ds, config, |
| 60 | + ), |
| 61 | + ScalarType::U32 => load_csr_typed::<B, u32>( |
| 62 | + nrows, ncols, nnz, indptr, &data_ds, &indices_ds, config, |
| 63 | + ), |
| 64 | + ScalarType::U16 => load_csr_typed::<B, u16>( |
| 65 | + nrows, ncols, nnz, indptr, &data_ds, &indices_ds, config, |
| 66 | + ), |
| 67 | + ScalarType::U8 => load_csr_typed::<B, u8>( |
| 68 | + nrows, ncols, nnz, indptr, &data_ds, &indices_ds, config, |
| 69 | + ), |
| 70 | + dt => anyhow::bail!("Unsupported data type for CSR matrix: {:?}", dt), |
| 71 | + } |
| 72 | +} |
| 73 | + |
| 74 | + |
| 75 | +fn load_csr_typed<B: Backend, T: anndata::backend::BackendData>( |
| 76 | + nrows: usize, |
| 77 | + ncols: usize, |
| 78 | + nnz: usize, |
| 79 | + indptr: Vec<usize>, |
| 80 | + data_ds: &B::Dataset, |
| 81 | + indices_ds: &B::Dataset, |
| 82 | + config: &LoadingConfig, |
| 83 | +) -> anyhow::Result<ArrayData> |
| 84 | +where |
| 85 | + anndata::ArrayData: std::convert::From<nalgebra_sparse::CsrMatrix<T>> |
| 86 | +{ |
| 87 | + let chunk_size = (config.chunk_size_mb * 1_048_576) / (std::mem::size_of::<T>() + 8); |
| 88 | + let chunk_size = chunk_size.max(1000); |
| 89 | + |
| 90 | + let mut data = Vec::with_capacity(nnz); |
| 91 | + let mut indices = Vec::with_capacity(nnz); |
| 92 | + |
| 93 | + let mut offset = 0; |
| 94 | + let mut last_progress = 0; |
| 95 | + |
| 96 | + while offset < nnz { |
| 97 | + let chunk_end = (offset + chunk_size).min(nnz); |
| 98 | + |
| 99 | + let data_array = data_ds.read_array_slice::<T, _, ndarray::Ix1>(&[SelectInfoElem::from(offset..chunk_end)])?; |
| 100 | + let data_chunk: Vec<T> = data_array.into_raw_vec(); |
| 101 | + |
| 102 | + |
| 103 | + match indices_ds.dtype()? { |
| 104 | + ScalarType::U64 => { |
| 105 | + let indices_array = indices_ds.read_array_slice::<u64, _, ndarray::Ix1>(&[SelectInfoElem::from(offset..chunk_end)])?; |
| 106 | + let (indices_u64, _) = indices_array.into_raw_vec_and_offset(); |
| 107 | + indices.extend(indices_u64.into_iter().map(|x| x as usize)); |
| 108 | + } |
| 109 | + ScalarType::U32 => { |
| 110 | + let indices_array = indices_ds.read_array_slice::<u32, _, ndarray::Ix1>(&[SelectInfoElem::from(offset..chunk_end)])?; |
| 111 | + let (indices_u32, _) = indices_array.into_raw_vec_and_offset(); |
| 112 | + indices.extend(indices_u32.into_iter().map(|x| x as usize)); |
| 113 | + } |
| 114 | + ScalarType::I64 => { |
| 115 | + let indices_array = indices_ds.read_array_slice::<i64, _, ndarray::Ix1>(&[SelectInfoElem::from(offset..chunk_end)])?; |
| 116 | + let (indices_i64, _) = indices_array.into_raw_vec_and_offset(); |
| 117 | + indices.extend(indices_i64.into_iter().map(|x| x as usize)); |
| 118 | + } |
| 119 | + ScalarType::I32 => { |
| 120 | + let indices_array = indices_ds.read_array_slice::<i32, _, ndarray::Ix1>(&[SelectInfoElem::from(offset..chunk_end)])?; |
| 121 | + let (indices_i32, _) = indices_array.into_raw_vec_and_offset(); |
| 122 | + indices.extend(indices_i32.into_iter().map(|x| x as usize)); |
| 123 | + } |
| 124 | + _ => anyhow::bail!("Unsupported index type for CSR matrix"), |
| 125 | + } |
| 126 | + |
| 127 | + data.extend(data_chunk); |
| 128 | + |
| 129 | + offset = chunk_end; |
| 130 | + |
| 131 | + if config.show_progress && nnz > 10_000_000 { |
| 132 | + let progress = (offset as f64 / nnz as f64 * 100.0) as usize; |
| 133 | + if progress >= last_progress + 10 { |
| 134 | + println!("Loading CSR matrix: {}%", progress); |
| 135 | + last_progress = progress; |
| 136 | + } |
| 137 | + } |
| 138 | + } |
| 139 | + |
| 140 | + if config.show_progress && nnz > 10_000_000 { |
| 141 | + println!("Constructing CSR matrix structure..."); |
| 142 | + } |
| 143 | + |
| 144 | + let pattern = unsafe { |
| 145 | + SparsityPattern::from_offset_and_indices_unchecked(nrows, ncols, indptr, indices) |
| 146 | + }; |
| 147 | + let csr = CsrMatrix::try_from_pattern_and_values(pattern, data).map_err(|e| anyhow::anyhow!("There was an error constructing the matrix {}", e))?; |
| 148 | + |
| 149 | + Ok(ArrayData::from(csr)) |
| 150 | +} |
0 commit comments