1+ use std:: path:: Path ;
2+
3+ use anndata:: { backend:: DataContainer , data:: DataFrameIndex , ArrayData , Backend , Data , Readable } ;
4+ use anndata_hdf5:: { H5File , H5 } ;
5+ use polars:: frame:: DataFrame ;
6+ use anndata:: backend:: AttributeOp ;
7+ use anndata:: backend:: GroupOp ;
8+
9+ use crate :: IMArrayElement ;
10+ use crate :: IMElement ;
11+ use crate :: LoadingStrategy ;
12+ use crate :: { chunked_loader:: load_csr_chunked, optimized_loader:: load_csr_optimized, utils:: { read_dataframe_index, should_use_chunked_loading} , IMAnnData , LoadingConfig } ;
13+
14+ pub fn load_h5ad ( h5_path : impl AsRef < Path > ) -> anyhow:: Result < IMAnnData > {
15+ load_h5ad_with_config ( h5_path, LoadingConfig :: default ( ) )
16+ }
17+
18+ pub fn load_h5ad_fast ( h5_path : impl AsRef < Path > ) -> anyhow:: Result < IMAnnData > {
19+ let config = LoadingConfig {
20+ loading_strategy : LoadingStrategy :: ForceComplete ,
21+ chunk_size_mb : 256 ,
22+ memory_threshold_mb : 4096 ,
23+ show_progress : true ,
24+ } ;
25+
26+ load_h5ad_with_config ( h5_path, config)
27+ }
28+
29+ pub fn load_h5ad_conservative ( h5_path : impl AsRef < Path > ) -> anyhow:: Result < IMAnnData > {
30+ let config = LoadingConfig {
31+ loading_strategy : LoadingStrategy :: ForceChunked ,
32+ chunk_size_mb : 64 ,
33+ memory_threshold_mb : 256 ,
34+ show_progress : true ,
35+ } ;
36+
37+ load_h5ad_with_config ( h5_path, config)
38+ }
39+
40+ pub fn load_h5ad_with_config (
41+ h5_path : impl AsRef < Path > ,
42+ config : LoadingConfig ,
43+ ) -> anyhow:: Result < IMAnnData > {
44+ let h5_file = H5 :: open ( h5_path) ?;
45+
46+ if config. show_progress {
47+ println ! ( "Loading H5AD file directly..." ) ;
48+ }
49+
50+ // Load core components
51+ let ( obs_df, obs_names) = load_obs ( & h5_file, & config) ?;
52+ let ( var_df, var_names) = load_var ( & h5_file, & config) ?;
53+ let x_data = load_x_matrix ( & h5_file, & config) ?;
54+
55+ // Create the main structure
56+ let imad = IMAnnData :: new_extended ( x_data, obs_names, var_names, obs_df, var_df) ?;
57+
58+ // Load optional components in parallel if beneficial
59+ load_axis_arrays ( & h5_file, "obsm" , imad. obsm ( ) , & config) ?;
60+ load_axis_arrays ( & h5_file, "obsp" , imad. obsp ( ) , & config) ?;
61+ load_axis_arrays ( & h5_file, "varm" , imad. varm ( ) , & config) ?;
62+ load_axis_arrays ( & h5_file, "varp" , imad. varp ( ) , & config) ?;
63+ load_axis_arrays ( & h5_file, "layers" , imad. layers ( ) , & config) ?;
64+ load_uns ( & h5_file, imad. uns ( ) , & config) ?;
65+
66+ if config. show_progress {
67+ println ! ( "H5AD file loaded successfully" ) ;
68+ }
69+
70+ Ok ( imad)
71+ }
72+
73+ fn load_obs (
74+ h5_file : & H5File ,
75+ config : & LoadingConfig
76+ ) -> anyhow:: Result < ( DataFrame , Vec < String > ) > {
77+ if config. show_progress {
78+ println ! ( "Loading observations..." ) ;
79+ }
80+
81+ if !h5_file. exists ( "obs" ) ? {
82+ return Ok ( ( DataFrame :: empty ( ) , vec ! [ ] ) ) ;
83+ }
84+
85+ let obs_container = DataContainer :: open ( h5_file, "obs" ) ?;
86+ let obs_df: DataFrame = ArrayData :: read ( & obs_container) ?. try_into ( ) ?;
87+
88+ let obs_index = read_dataframe_index ( & obs_container) ?;
89+ let obs_names = obs_index. into_vec ( ) ;
90+
91+ if config. show_progress {
92+ println ! ( " {} observations loaded" , obs_names. len( ) ) ;
93+ }
94+
95+ Ok ( ( obs_df, obs_names) )
96+ }
97+
98+ fn load_var (
99+ h5_file : & H5File ,
100+ config : & LoadingConfig
101+ ) -> anyhow:: Result < ( DataFrame , Vec < String > ) > {
102+ if config. show_progress {
103+ println ! ( "Loading variables..." ) ;
104+ }
105+
106+ if !h5_file. exists ( "var" ) ? {
107+ return Ok ( ( DataFrame :: empty ( ) , vec ! [ ] ) ) ;
108+ }
109+
110+ let var_container = DataContainer :: open ( h5_file, "var" ) ?;
111+ let var_df: DataFrame = ArrayData :: read ( & var_container) ?. try_into ( ) ?;
112+
113+ let var_index = read_dataframe_index ( & var_container) ?;
114+ let var_names = var_index. into_vec ( ) ;
115+
116+ if config. show_progress {
117+ println ! ( " {} variables loaded" , var_names. len( ) ) ;
118+ }
119+
120+ Ok ( ( var_df, var_names) )
121+ }
122+
123+ fn load_x_matrix (
124+ h5_file : & anndata_hdf5:: H5File ,
125+ config : & LoadingConfig
126+ ) -> anyhow:: Result < ArrayData > {
127+ if !h5_file. link_exists ( "X" ) {
128+ if config. show_progress {
129+ println ! ( "No X matrix found, using empty matrix" ) ;
130+ }
131+ return Ok ( ArrayData :: Array (
132+ anndata:: data:: DynArray :: from ( ndarray:: Array2 :: < f64 > :: zeros ( ( 0 , 0 ) ) )
133+ ) ) ;
134+ }
135+
136+ if config. show_progress {
137+ println ! ( "Loading X matrix..." ) ;
138+ }
139+
140+ let x_container = DataContainer :: open ( h5_file, "X" ) ?;
141+
142+ let matrix_type = x_container. encoding_type ( ) ?;
143+ if config. show_progress {
144+ match & matrix_type {
145+ anndata:: backend:: DataType :: CsrMatrix ( _) => {
146+ let group = x_container. as_group ( ) ?;
147+ let shape: Vec < u64 > = group. get_attr ( "shape" ) ?;
148+ let nnz = group. open_dataset ( "data" ) ?. shape ( ) [ 0 ] ;
149+ println ! ( " CSR matrix: {}×{} with {} non-zeros" , shape[ 0 ] , shape[ 1 ] , nnz) ;
150+ }
151+ anndata:: backend:: DataType :: Array ( _) => {
152+ let shape = x_container. as_dataset ( ) ?. shape ( ) ;
153+ println ! ( " Dense matrix: {:?}" , shape) ;
154+ }
155+ _ => {
156+ println ! ( " Matrix type: {:?}" , matrix_type) ;
157+ }
158+ }
159+ }
160+
161+ let result = match matrix_type {
162+ anndata:: backend:: DataType :: CsrMatrix ( _) => {
163+ if should_use_chunked_loading ( & x_container, config) ? {
164+ if config. show_progress {
165+ println ! ( " Using chunked loading for large matrix" ) ;
166+ }
167+ load_csr_chunked ( & x_container, config) ?
168+ } else {
169+ if config. show_progress {
170+ println ! ( " Using optimized CSR loading" ) ;
171+ }
172+ load_csr_optimized ( & x_container) ?
173+ }
174+ }
175+ _ => {
176+ if config. show_progress {
177+ println ! ( " Using standard loading" ) ;
178+ }
179+ ArrayData :: read ( & x_container) ?
180+ }
181+ } ;
182+
183+ if config. show_progress {
184+ println ! ( " X matrix loaded successfully" ) ;
185+ }
186+
187+ Ok ( result)
188+ }
189+
190+ fn load_axis_arrays (
191+ h5_file : & anndata_hdf5:: H5File ,
192+ group_name : & str ,
193+ target_arrays : crate :: IMAxisArrays ,
194+ config : & LoadingConfig ,
195+ ) -> anyhow:: Result < ( ) > {
196+ if !h5_file. link_exists ( group_name) {
197+ return Ok ( ( ) ) ;
198+ }
199+
200+ let group = h5_file. open_group ( group_name) ?;
201+ let array_names = group. list ( ) ?;
202+
203+ if array_names. is_empty ( ) {
204+ return Ok ( ( ) ) ;
205+ }
206+
207+ if config. show_progress {
208+ println ! ( "Loading {} ({} items)..." , group_name, array_names. len( ) ) ;
209+ }
210+ let arr_n_len = array_names. len ( ) ;
211+ for array_name in array_names {
212+ if config. show_progress && arr_n_len > 3 {
213+ println ! ( " Loading {}/{}" , group_name, array_name) ;
214+ }
215+
216+ let array_container = DataContainer :: open ( & group, & array_name) ?;
217+
218+ let array_data = if group_name == "layers" {
219+ match array_container. encoding_type ( ) ? {
220+ anndata:: backend:: DataType :: CsrMatrix ( _) => {
221+ if should_use_chunked_loading ( & array_container, config) ? {
222+ if config. show_progress {
223+ println ! ( " Using chunked loading for layer {}" , array_name) ;
224+ }
225+ load_csr_chunked ( & array_container, config) ?
226+ } else {
227+ load_csr_optimized ( & array_container) ?
228+ }
229+ }
230+ _ => ArrayData :: read ( & array_container) ?,
231+ }
232+ } else {
233+ ArrayData :: read ( & array_container) ?
234+ } ;
235+
236+ let im_array = IMArrayElement :: new ( array_data) ;
237+ target_arrays. add_array ( array_name, im_array) ?;
238+ }
239+
240+ if config. show_progress {
241+ println ! ( " {} loaded successfully" , group_name) ;
242+ }
243+
244+ Ok ( ( ) )
245+ }
246+
247+ fn load_uns (
248+ h5_file : & anndata_hdf5:: H5File ,
249+ target_uns : crate :: IMElementCollection ,
250+ config : & LoadingConfig ,
251+ ) -> anyhow:: Result < ( ) > {
252+ if !h5_file. link_exists ( "uns" ) {
253+ return Ok ( ( ) ) ;
254+ }
255+
256+ if config. show_progress {
257+ println ! ( "Loading unstructured annotations..." ) ;
258+ }
259+
260+ let uns_group = h5_file. open_group ( "uns" ) ?;
261+ let item_names = uns_group. list ( ) ?;
262+
263+ for item_name in item_names. iter ( ) {
264+ let item_container = DataContainer :: open ( & uns_group, & item_name) ?;
265+ let data = Data :: read ( & item_container) ?;
266+ let im_element = IMElement :: new ( data) ;
267+ target_uns. add_data ( item_name. clone ( ) , im_element) ?;
268+ }
269+
270+ if config. show_progress && !item_names. is_empty ( ) {
271+ println ! ( " {} items loaded in uns" , item_names. len( ) ) ;
272+ }
273+
274+ Ok ( ( ) )
275+ }
0 commit comments