1- """This code performs several tests for the dataset module.
1+ """This code performs several tests for the dataset module.
22"""
33from __future__ import annotations
44
77import os
88import zipfile
99from importlib .util import find_spec
10+ from pathlib import Path
11+ from unittest .mock import patch
1012
1113import pytest
1214
13- from sed .core .config import USER_CONFIG_PATH
1415from sed .dataset import dataset as ds
1516from sed .dataset import DatasetsManager as dm
1617
@@ -23,16 +24,48 @@ def zip_buffer():
2324 zip_buffer = io .BytesIO ()
2425 with zipfile .ZipFile (zip_buffer , "a" , zipfile .ZIP_DEFLATED ) as zip_file :
2526 zip_file .writestr ("test_file.txt" , "This is a test file inside the zip." )
26- # add a subdir
2727 zip_file .writestr ("subdir/test_subdir.txt" , "This is a test file inside the subdir." )
2828 return zip_buffer
2929
3030
3131@pytest .fixture
32- def zip_file (fs , zip_buffer ):
33- fs .create_dir ("test/datasets/Test" )
34- with open ("test/datasets/Test/Test.zip" , "wb" ) as f :
35- f .write (zip_buffer .getvalue ())
32+ def zip_file (tmp_path , zip_buffer ):
33+ test_dir = tmp_path / "datasets" / "Test"
34+ test_dir .mkdir (parents = True )
35+ zip_path = test_dir / "Test.zip"
36+ zip_path .write_bytes (zip_buffer .getvalue ())
37+ return zip_path
38+
39+
40+ @pytest .fixture
41+ def mock_dataset_paths (tmp_path ):
42+ tmp_path = Path (tmp_path )
43+
44+ user_config = tmp_path / "user_datasets.json"
45+ folder_config = tmp_path / "folder_datasets.json"
46+
47+ with patch .object (ds , "_dir" , str (tmp_path / "datasets" / "Test" )), patch (
48+ "sed.core.config.USER_CONFIG_PATH" ,
49+ tmp_path ,
50+ ), patch .object (
51+ dm ,
52+ "json_path" ,
53+ {"user" : str (user_config ), "module" : json_path , "folder" : str (folder_config )},
54+ ):
55+ yield {"user" : user_config , "folder" : folder_config , "tmp_path" : tmp_path }
56+
57+
58+ @pytest .fixture
59+ def sample_dataset_config ():
60+ return {
61+ "Test" : {
62+ "url" : "http://test.com/files/file.zip" ,
63+ "subdirs" : ["subdir" ],
64+ "data_path" : [],
65+ "files" : [],
66+ },
67+ "TestSimple" : {"url" : "http://test.com/simple.zip" , "data_path" : [], "files" : []},
68+ }
3669
3770
3871def test_available_datasets ():
@@ -54,123 +87,217 @@ def test_check_dataset_availability():
5487 ds ._check_dataset_availability ()
5588
5689
57- def test_set_root_dir ():
58- # test with existing data path
59- ds .data_name = "Test"
60- ds ._state ["data_path" ] = ["test/data" ]
61- ds ._set_data_dir (root_dir = "test/data" , use_existing = True )
62- assert os .path .abspath ("test/data/" ) == ds ._dir
63-
64- # test without existing data path
65- ds ._state ["data_path" ] = []
66- ds ._set_data_dir (root_dir = "test/data" , use_existing = True )
67- assert os .path .abspath ("test/data/datasets/Test" ) == ds ._dir
68-
69- # test without data path and existing data path
70- ds ._set_data_dir (root_dir = None , use_existing = True )
71- assert os .path .abspath ("./datasets/Test" ) == ds ._dir
72-
73- # test with provided data path different from existing data path
74- ds ._state ["data_path" ] = ["test/data1" ]
75- ds ._set_data_dir (root_dir = "test/data" , use_existing = True )
76- assert os .path .abspath ("test/data1/" ) == ds ._dir
77- ds ._set_data_dir (root_dir = "test/data" , use_existing = False )
78- assert os .path .abspath ("test/data/datasets/Test" ) == ds ._dir
79-
80-
81- def test_get_file_list (fs ):
82- fs .create_file ("test/data/file.txt" )
83- fs .create_file ("test/data/subdir/file.txt" )
84- fs .create_file ("test/data/subdir/file.zip" )
85- fs .create_file ("test/data/file.zip" )
86- ds ._dir = "test/data"
87- assert ["file.txt" , "subdir/file.txt" ] == ds ._get_file_list ()
88-
89- assert ["file.txt" , "file.zip" , "subdir/file.txt" , "subdir/file.zip" ] == ds ._get_file_list (
90- ignore_zip = False ,
91- )
92-
90+ def test_set_root_dir (mock_dataset_paths , sample_dataset_config ):
91+ """Test _set_data_dir with proper path mocking."""
92+ user_config = mock_dataset_paths ["user" ]
93+ tmp_path = mock_dataset_paths ["tmp_path" ]
94+
95+ # Write sample config to temporary file
96+ user_config .write_text (json .dumps (sample_dataset_config ))
97+
98+ with patch .object (ds , "_datasets" , sample_dataset_config ):
99+ # test with existing data path
100+ ds .data_name = "Test"
101+ test_data_path = str (tmp_path / "test" / "data" )
102+ ds ._state ["data_path" ] = [test_data_path ]
103+ ds ._set_data_dir (root_dir = test_data_path , use_existing = True )
104+ assert os .path .abspath (test_data_path + "/" ) == ds ._dir
105+
106+ # test without existing data path
107+ ds ._state ["data_path" ] = []
108+ ds ._set_data_dir (root_dir = str (tmp_path / "test" / "data" ), use_existing = True )
109+ expected_dir = str (tmp_path / "test" / "data" / "datasets" / "Test" )
110+ assert os .path .abspath (expected_dir ) == ds ._dir
111+
112+ # Additional tests using temporary paths
113+ with patch ("os.getcwd" , return_value = str (tmp_path )):
114+ ds ._set_data_dir (root_dir = None , use_existing = True )
115+ expected_mock_dir = str (tmp_path / "datasets" / "Test" )
116+ assert ds ._dir == expected_mock_dir
117+
118+ # Test with different provided path vs existing path
119+ test_data_path1 = str (tmp_path / "test" / "data1" )
120+ test_data_path2 = str (tmp_path / "test" / "data2" )
121+ ds ._state ["data_path" ] = [test_data_path1 ]
122+ ds ._set_data_dir (root_dir = test_data_path2 , use_existing = True )
123+ assert os .path .abspath (test_data_path1 + "/" ) == ds ._dir
124+ ds ._set_data_dir (root_dir = test_data_path2 , use_existing = False )
125+ expected_dir = str (tmp_path / "test" / "data2" / "datasets" / "Test" )
126+ assert os .path .abspath (expected_dir ) == ds ._dir
127+
128+
129+ def test_get_file_list (tmp_path ):
130+ tmp_path = Path (tmp_path )
131+
132+ test_dir = tmp_path / "test" / "data"
133+ test_dir .mkdir (parents = True , exist_ok = True )
134+
135+ (test_dir / "file.txt" ).write_text ("content" )
136+ (test_dir / "file.zip" ).write_text ("zip content" )
137+
138+ subdir = test_dir / "subdir"
139+ subdir .mkdir (exist_ok = True )
140+ (subdir / "file.txt" ).write_text ("content" )
141+ (subdir / "file.zip" ).write_text ("zip content" )
142+
143+ ds ._dir = str (test_dir )
144+ assert sorted (ds ._get_file_list ()) == ["file.txt" , "subdir/file.txt" ]
145+ assert sorted (ds ._get_file_list (ignore_zip = False )) == [
146+ "file.txt" ,
147+ "file.zip" ,
148+ "subdir/file.txt" ,
149+ "subdir/file.zip" ,
150+ ]
151+
152+
153+ def test_download_data (
154+ tmp_path ,
155+ requests_mock ,
156+ zip_buffer ,
157+ mock_dataset_paths ,
158+ sample_dataset_config ,
159+ ):
160+ tmp_path = Path (tmp_path )
161+
162+ user_config = mock_dataset_paths ["user" ]
163+ user_config .write_text (json .dumps (sample_dataset_config ))
164+
165+ test_dir = tmp_path / "test"
166+ test_dir .mkdir (exist_ok = True )
93167
94- def test_download_data (fs , requests_mock , zip_buffer ):
95- fs .create_dir ("test" )
96168 data_url = "http://test.com/files/file.zip"
97169 requests_mock .get (data_url , content = zip_buffer .getvalue ())
98- ds ._data_name = "Test"
99- ds ._state = {"data_path" : []}
100- ds ._set_data_dir (root_dir = "test" , use_existing = True )
101- ds ._download_data (data_url )
102- assert os .path .exists ("test/datasets/Test/Test.zip" )
170+
171+ with patch .object (ds , "_datasets" , sample_dataset_config ):
172+ ds ._data_name = "Test"
173+ ds ._state = {"data_path" : []}
174+ ds ._set_data_dir (root_dir = str (test_dir ), use_existing = True )
175+ ds ._download_data (data_url )
176+
177+ expected_path = test_dir / "datasets" / "Test" / "Test.zip"
178+ assert expected_path .exists ()
103179
104180 # assert not ds._download_data("data", "test/data/", data_url) # already exists
105181
106182
107- def test_extract_data (zip_file ): # noqa: ARG001
108- ds ._data_name = "Test"
109- ds ._dir = "test/datasets/Test/"
110- ds ._extract_data ()
111- assert os .path .exists ("test/datasets/Test/test_file.txt" )
112- assert os .path .exists ("test/datasets/Test/subdir/test_subdir.txt" )
183+ def test_extract_data (tmp_path , zip_buffer , mock_dataset_paths , sample_dataset_config ):
184+ """Test extraction with proper isolation."""
185+ tmp_path = Path (tmp_path )
186+
187+ user_config = mock_dataset_paths ["user" ]
188+ user_config .write_text (json .dumps (sample_dataset_config ))
189+
190+ test_dir = tmp_path / "test" / "datasets" / "Test"
191+ test_dir .mkdir (parents = True , exist_ok = True )
192+
193+ # Create zip file in test directory
194+ zip_path = test_dir / "Test.zip"
195+ zip_path .write_bytes (zip_buffer .getvalue ())
196+
197+ with patch .object (ds , "_datasets" , sample_dataset_config ):
198+ ds ._data_name = "Test"
199+ ds ._dir = str (test_dir )
200+ ds ._extract_data ()
113201
202+ assert (test_dir / "test_file.txt" ).exists ()
203+ assert (test_dir / "subdir" / "test_subdir.txt" ).exists ()
114204
115- def test_rearrange_data (zip_file ): # noqa: ARG001
116- ds ._data_name = "Test"
117- ds ._dir = "test/datasets/Test/"
118- ds ._subdirs = ["subdir" ]
119- ds ._extract_data ()
120- ds ._rearrange_data ()
121- assert os .path .exists ("test/datasets/Test/test_file.txt" )
122- assert os .path .exists ("test/datasets/Test/test_subdir.txt" )
123- assert not os .path .exists ("test/datasets/Test/subdir" )
124205
125- with pytest .raises (FileNotFoundError ):
126- ds ._subdirs = ["non_existing_subdir" ]
206+ def test_rearrange_data (tmp_path , zip_buffer , mock_dataset_paths , sample_dataset_config ):
207+ """Test rearrangement with proper isolation."""
208+ tmp_path = Path (tmp_path )
209+
210+ user_config = mock_dataset_paths ["user" ]
211+ user_config .write_text (json .dumps (sample_dataset_config ))
212+
213+ test_dir = tmp_path / "test" / "datasets" / "Test"
214+ test_dir .mkdir (parents = True , exist_ok = True )
215+
216+ zip_path = test_dir / "Test.zip"
217+ zip_path .write_bytes (zip_buffer .getvalue ())
218+
219+ with patch .object (ds , "_datasets" , sample_dataset_config ):
220+ ds ._data_name = "Test"
221+ ds ._dir = str (test_dir )
222+ ds ._subdirs = ["subdir" ]
223+ ds ._extract_data ()
127224 ds ._rearrange_data ()
128225
226+ assert (test_dir / "test_file.txt" ).exists ()
227+ assert (test_dir / "test_subdir.txt" ).exists ()
228+ assert not (test_dir / "subdir" ).exists ()
129229
130- def test_get_remove_dataset (requests_mock , zip_buffer ):
131- json_path_user = USER_CONFIG_PATH .joinpath ("datasets.json" )
132- data_name = "Test"
133- _ = dm .load_datasets_dict () # to ensure datasets.json is in user dir
230+ with pytest .raises (FileNotFoundError ):
231+ ds ._subdirs = ["non_existing_subdir" ]
232+ ds ._rearrange_data ()
134233
135- ds .remove (data_name )
234+
235+ def test_get_remove_dataset (
236+ tmp_path ,
237+ requests_mock ,
238+ zip_buffer ,
239+ mock_dataset_paths ,
240+ sample_dataset_config ,
241+ ):
242+ tmp_path = Path (tmp_path )
243+
244+ user_config = mock_dataset_paths ["user" ]
245+ user_config .write_text (json .dumps (sample_dataset_config ))
136246
137247 data_url = "http://test.com/files/file.zip"
138248 requests_mock .get (data_url , content = zip_buffer .getvalue ())
139249
140- ds .get (data_name )
141- assert ds .dir == os .path .abspath (os .path .join ("./datasets" , data_name ))
250+ with patch .object (ds , "_datasets" , sample_dataset_config ):
251+ data_name = "Test"
252+
253+ ds .remove (data_name )
254+
255+ ds .get (data_name , root_dir = str (tmp_path ), use_existing = False )
142256
143- # check if subdir is removed after rearranging
144- assert not os . path . exists ( "./datasets/Test/subdir" )
257+ expected_dir = tmp_path / "datasets" / "Test"
258+ assert ds . dir == str ( expected_dir )
145259
146- # check datasets file to now have data_path listed
147- datasets_json = json .load (open (json_path_user ))
148- assert datasets_json [data_name ]["data_path" ]
149- assert datasets_json [data_name ]["files" ]
150- ds .remove (data_name )
260+ # Check if subdir is removed after rearranging (if subdirs are configured)
261+ assert not (expected_dir / "subdir" ).exists ()
151262
152- assert not os .path .exists (os .path .join ("./datasets" , data_name ))
263+ # Check datasets file to now have data_path listed
264+ datasets_json = json .loads (user_config .read_text ())
265+ assert datasets_json [data_name ]["data_path" ]
266+ assert datasets_json [data_name ]["files" ]
267+ ds .remove (data_name )
268+ assert not expected_dir .exists ()
153269
154- ds .get (data_name )
155- ds .get (data_name )
156- ds .remove (data_name , ds .existing_data_paths [0 ])
270+ ds .get (data_name , root_dir = str (tmp_path ), use_existing = False )
271+ ds .get (data_name , root_dir = str (tmp_path ), use_existing = False )
157272
273+ if hasattr (ds , "existing_data_paths" ) and ds .existing_data_paths :
274+ ds .remove (data_name , ds .existing_data_paths [0 ])
275+ else :
276+ ds .remove (data_name )
277+
278+
279+ def test_datasets_manager (mock_dataset_paths ):
280+ """Test dataset manager with proper isolation."""
281+ user_config = mock_dataset_paths ["user" ]
282+ folder_config = mock_dataset_paths ["folder" ]
158283
159- def test_datasets_manager ():
160284 dm .add (
161285 "Test_DM" ,
162286 {"url" : "http://test.com/files/file.zip" , "subdirs" : ["subdir" ]},
163287 levels = ["folder" , "user" ],
164288 )
165- datasets_json = json .load (open (dm .json_path ["folder" ]))
166- assert datasets_json ["Test_DM" ]
167- assert datasets_json ["Test_DM" ]["url" ] == "http://test.com/files/file.zip"
168- assert datasets_json ["Test_DM" ]["subdirs" ] == ["subdir" ]
169289
290+ # Check configurations were written to temporary files
291+ folder_data = json .loads (folder_config .read_text ())
292+ assert "Test_DM" in folder_data
293+
294+ user_data = json .loads (user_config .read_text ())
295+ assert "Test_DM" in user_data
296+
297+ # Test removal
170298 dm .remove ("Test_DM" , levels = ["folder" ])
171- datasets_json = json .load (open (dm .json_path ["folder" ]))
172- with pytest .raises (KeyError ):
173- datasets_json ["Test_DM" ]
299+ folder_data = json .loads (folder_config .read_text ())
300+ assert "Test_DM" not in folder_data
174301
175302 datasets_json = json .load (open (dm .json_path ["user" ]))
176303 assert datasets_json ["Test_DM" ]
0 commit comments