Skip to content

Commit 6982c0a

Browse files
committed
Merge remote-tracking branch 'origin/main' into update_pynxtools
2 parents dc5decd + 9c2c22f commit 6982c0a

1 file changed

Lines changed: 220 additions & 93 deletions

File tree

tests/test_dataset.py

Lines changed: 220 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""This code performs several tests for the dataset module.
1+
"""This code performs several tests for the dataset module.
22
"""
33
from __future__ import annotations
44

@@ -7,10 +7,11 @@
77
import os
88
import zipfile
99
from importlib.util import find_spec
10+
from pathlib import Path
11+
from unittest.mock import patch
1012

1113
import pytest
1214

13-
from sed.core.config import USER_CONFIG_PATH
1415
from sed.dataset import dataset as ds
1516
from sed.dataset import DatasetsManager as dm
1617

@@ -23,16 +24,48 @@ def zip_buffer():
2324
zip_buffer = io.BytesIO()
2425
with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED) as zip_file:
2526
zip_file.writestr("test_file.txt", "This is a test file inside the zip.")
26-
# add a subdir
2727
zip_file.writestr("subdir/test_subdir.txt", "This is a test file inside the subdir.")
2828
return zip_buffer
2929

3030

3131
@pytest.fixture
32-
def zip_file(fs, zip_buffer):
33-
fs.create_dir("test/datasets/Test")
34-
with open("test/datasets/Test/Test.zip", "wb") as f:
35-
f.write(zip_buffer.getvalue())
32+
def zip_file(tmp_path, zip_buffer):
33+
test_dir = tmp_path / "datasets" / "Test"
34+
test_dir.mkdir(parents=True)
35+
zip_path = test_dir / "Test.zip"
36+
zip_path.write_bytes(zip_buffer.getvalue())
37+
return zip_path
38+
39+
40+
@pytest.fixture
41+
def mock_dataset_paths(tmp_path):
42+
tmp_path = Path(tmp_path)
43+
44+
user_config = tmp_path / "user_datasets.json"
45+
folder_config = tmp_path / "folder_datasets.json"
46+
47+
with patch.object(ds, "_dir", str(tmp_path / "datasets" / "Test")), patch(
48+
"sed.core.config.USER_CONFIG_PATH",
49+
tmp_path,
50+
), patch.object(
51+
dm,
52+
"json_path",
53+
{"user": str(user_config), "module": json_path, "folder": str(folder_config)},
54+
):
55+
yield {"user": user_config, "folder": folder_config, "tmp_path": tmp_path}
56+
57+
58+
@pytest.fixture
59+
def sample_dataset_config():
60+
return {
61+
"Test": {
62+
"url": "http://test.com/files/file.zip",
63+
"subdirs": ["subdir"],
64+
"data_path": [],
65+
"files": [],
66+
},
67+
"TestSimple": {"url": "http://test.com/simple.zip", "data_path": [], "files": []},
68+
}
3669

3770

3871
def test_available_datasets():
@@ -54,123 +87,217 @@ def test_check_dataset_availability():
5487
ds._check_dataset_availability()
5588

5689

57-
def test_set_root_dir():
58-
# test with existing data path
59-
ds.data_name = "Test"
60-
ds._state["data_path"] = ["test/data"]
61-
ds._set_data_dir(root_dir="test/data", use_existing=True)
62-
assert os.path.abspath("test/data/") == ds._dir
63-
64-
# test without existing data path
65-
ds._state["data_path"] = []
66-
ds._set_data_dir(root_dir="test/data", use_existing=True)
67-
assert os.path.abspath("test/data/datasets/Test") == ds._dir
68-
69-
# test without data path and existing data path
70-
ds._set_data_dir(root_dir=None, use_existing=True)
71-
assert os.path.abspath("./datasets/Test") == ds._dir
72-
73-
# test with provided data path different from existing data path
74-
ds._state["data_path"] = ["test/data1"]
75-
ds._set_data_dir(root_dir="test/data", use_existing=True)
76-
assert os.path.abspath("test/data1/") == ds._dir
77-
ds._set_data_dir(root_dir="test/data", use_existing=False)
78-
assert os.path.abspath("test/data/datasets/Test") == ds._dir
79-
80-
81-
def test_get_file_list(fs):
82-
fs.create_file("test/data/file.txt")
83-
fs.create_file("test/data/subdir/file.txt")
84-
fs.create_file("test/data/subdir/file.zip")
85-
fs.create_file("test/data/file.zip")
86-
ds._dir = "test/data"
87-
assert ["file.txt", "subdir/file.txt"] == ds._get_file_list()
88-
89-
assert ["file.txt", "file.zip", "subdir/file.txt", "subdir/file.zip"] == ds._get_file_list(
90-
ignore_zip=False,
91-
)
92-
90+
def test_set_root_dir(mock_dataset_paths, sample_dataset_config):
91+
"""Test _set_data_dir with proper path mocking."""
92+
user_config = mock_dataset_paths["user"]
93+
tmp_path = mock_dataset_paths["tmp_path"]
94+
95+
# Write sample config to temporary file
96+
user_config.write_text(json.dumps(sample_dataset_config))
97+
98+
with patch.object(ds, "_datasets", sample_dataset_config):
99+
# test with existing data path
100+
ds.data_name = "Test"
101+
test_data_path = str(tmp_path / "test" / "data")
102+
ds._state["data_path"] = [test_data_path]
103+
ds._set_data_dir(root_dir=test_data_path, use_existing=True)
104+
assert os.path.abspath(test_data_path + "/") == ds._dir
105+
106+
# test without existing data path
107+
ds._state["data_path"] = []
108+
ds._set_data_dir(root_dir=str(tmp_path / "test" / "data"), use_existing=True)
109+
expected_dir = str(tmp_path / "test" / "data" / "datasets" / "Test")
110+
assert os.path.abspath(expected_dir) == ds._dir
111+
112+
# Additional tests using temporary paths
113+
with patch("os.getcwd", return_value=str(tmp_path)):
114+
ds._set_data_dir(root_dir=None, use_existing=True)
115+
expected_mock_dir = str(tmp_path / "datasets" / "Test")
116+
assert ds._dir == expected_mock_dir
117+
118+
# Test with different provided path vs existing path
119+
test_data_path1 = str(tmp_path / "test" / "data1")
120+
test_data_path2 = str(tmp_path / "test" / "data2")
121+
ds._state["data_path"] = [test_data_path1]
122+
ds._set_data_dir(root_dir=test_data_path2, use_existing=True)
123+
assert os.path.abspath(test_data_path1 + "/") == ds._dir
124+
ds._set_data_dir(root_dir=test_data_path2, use_existing=False)
125+
expected_dir = str(tmp_path / "test" / "data2" / "datasets" / "Test")
126+
assert os.path.abspath(expected_dir) == ds._dir
127+
128+
129+
def test_get_file_list(tmp_path):
130+
tmp_path = Path(tmp_path)
131+
132+
test_dir = tmp_path / "test" / "data"
133+
test_dir.mkdir(parents=True, exist_ok=True)
134+
135+
(test_dir / "file.txt").write_text("content")
136+
(test_dir / "file.zip").write_text("zip content")
137+
138+
subdir = test_dir / "subdir"
139+
subdir.mkdir(exist_ok=True)
140+
(subdir / "file.txt").write_text("content")
141+
(subdir / "file.zip").write_text("zip content")
142+
143+
ds._dir = str(test_dir)
144+
assert sorted(ds._get_file_list()) == ["file.txt", "subdir/file.txt"]
145+
assert sorted(ds._get_file_list(ignore_zip=False)) == [
146+
"file.txt",
147+
"file.zip",
148+
"subdir/file.txt",
149+
"subdir/file.zip",
150+
]
151+
152+
153+
def test_download_data(
154+
tmp_path,
155+
requests_mock,
156+
zip_buffer,
157+
mock_dataset_paths,
158+
sample_dataset_config,
159+
):
160+
tmp_path = Path(tmp_path)
161+
162+
user_config = mock_dataset_paths["user"]
163+
user_config.write_text(json.dumps(sample_dataset_config))
164+
165+
test_dir = tmp_path / "test"
166+
test_dir.mkdir(exist_ok=True)
93167

94-
def test_download_data(fs, requests_mock, zip_buffer):
95-
fs.create_dir("test")
96168
data_url = "http://test.com/files/file.zip"
97169
requests_mock.get(data_url, content=zip_buffer.getvalue())
98-
ds._data_name = "Test"
99-
ds._state = {"data_path": []}
100-
ds._set_data_dir(root_dir="test", use_existing=True)
101-
ds._download_data(data_url)
102-
assert os.path.exists("test/datasets/Test/Test.zip")
170+
171+
with patch.object(ds, "_datasets", sample_dataset_config):
172+
ds._data_name = "Test"
173+
ds._state = {"data_path": []}
174+
ds._set_data_dir(root_dir=str(test_dir), use_existing=True)
175+
ds._download_data(data_url)
176+
177+
expected_path = test_dir / "datasets" / "Test" / "Test.zip"
178+
assert expected_path.exists()
103179

104180
# assert not ds._download_data("data", "test/data/", data_url) # already exists
105181

106182

107-
def test_extract_data(zip_file): # noqa: ARG001
108-
ds._data_name = "Test"
109-
ds._dir = "test/datasets/Test/"
110-
ds._extract_data()
111-
assert os.path.exists("test/datasets/Test/test_file.txt")
112-
assert os.path.exists("test/datasets/Test/subdir/test_subdir.txt")
183+
def test_extract_data(tmp_path, zip_buffer, mock_dataset_paths, sample_dataset_config):
184+
"""Test extraction with proper isolation."""
185+
tmp_path = Path(tmp_path)
186+
187+
user_config = mock_dataset_paths["user"]
188+
user_config.write_text(json.dumps(sample_dataset_config))
189+
190+
test_dir = tmp_path / "test" / "datasets" / "Test"
191+
test_dir.mkdir(parents=True, exist_ok=True)
192+
193+
# Create zip file in test directory
194+
zip_path = test_dir / "Test.zip"
195+
zip_path.write_bytes(zip_buffer.getvalue())
196+
197+
with patch.object(ds, "_datasets", sample_dataset_config):
198+
ds._data_name = "Test"
199+
ds._dir = str(test_dir)
200+
ds._extract_data()
113201

202+
assert (test_dir / "test_file.txt").exists()
203+
assert (test_dir / "subdir" / "test_subdir.txt").exists()
114204

115-
def test_rearrange_data(zip_file): # noqa: ARG001
116-
ds._data_name = "Test"
117-
ds._dir = "test/datasets/Test/"
118-
ds._subdirs = ["subdir"]
119-
ds._extract_data()
120-
ds._rearrange_data()
121-
assert os.path.exists("test/datasets/Test/test_file.txt")
122-
assert os.path.exists("test/datasets/Test/test_subdir.txt")
123-
assert not os.path.exists("test/datasets/Test/subdir")
124205

125-
with pytest.raises(FileNotFoundError):
126-
ds._subdirs = ["non_existing_subdir"]
206+
def test_rearrange_data(tmp_path, zip_buffer, mock_dataset_paths, sample_dataset_config):
207+
"""Test rearrangement with proper isolation."""
208+
tmp_path = Path(tmp_path)
209+
210+
user_config = mock_dataset_paths["user"]
211+
user_config.write_text(json.dumps(sample_dataset_config))
212+
213+
test_dir = tmp_path / "test" / "datasets" / "Test"
214+
test_dir.mkdir(parents=True, exist_ok=True)
215+
216+
zip_path = test_dir / "Test.zip"
217+
zip_path.write_bytes(zip_buffer.getvalue())
218+
219+
with patch.object(ds, "_datasets", sample_dataset_config):
220+
ds._data_name = "Test"
221+
ds._dir = str(test_dir)
222+
ds._subdirs = ["subdir"]
223+
ds._extract_data()
127224
ds._rearrange_data()
128225

226+
assert (test_dir / "test_file.txt").exists()
227+
assert (test_dir / "test_subdir.txt").exists()
228+
assert not (test_dir / "subdir").exists()
129229

130-
def test_get_remove_dataset(requests_mock, zip_buffer):
131-
json_path_user = USER_CONFIG_PATH.joinpath("datasets.json")
132-
data_name = "Test"
133-
_ = dm.load_datasets_dict() # to ensure datasets.json is in user dir
230+
with pytest.raises(FileNotFoundError):
231+
ds._subdirs = ["non_existing_subdir"]
232+
ds._rearrange_data()
134233

135-
ds.remove(data_name)
234+
235+
def test_get_remove_dataset(
236+
tmp_path,
237+
requests_mock,
238+
zip_buffer,
239+
mock_dataset_paths,
240+
sample_dataset_config,
241+
):
242+
tmp_path = Path(tmp_path)
243+
244+
user_config = mock_dataset_paths["user"]
245+
user_config.write_text(json.dumps(sample_dataset_config))
136246

137247
data_url = "http://test.com/files/file.zip"
138248
requests_mock.get(data_url, content=zip_buffer.getvalue())
139249

140-
ds.get(data_name)
141-
assert ds.dir == os.path.abspath(os.path.join("./datasets", data_name))
250+
with patch.object(ds, "_datasets", sample_dataset_config):
251+
data_name = "Test"
252+
253+
ds.remove(data_name)
254+
255+
ds.get(data_name, root_dir=str(tmp_path), use_existing=False)
142256

143-
# check if subdir is removed after rearranging
144-
assert not os.path.exists("./datasets/Test/subdir")
257+
expected_dir = tmp_path / "datasets" / "Test"
258+
assert ds.dir == str(expected_dir)
145259

146-
# check datasets file to now have data_path listed
147-
datasets_json = json.load(open(json_path_user))
148-
assert datasets_json[data_name]["data_path"]
149-
assert datasets_json[data_name]["files"]
150-
ds.remove(data_name)
260+
# Check if subdir is removed after rearranging (if subdirs are configured)
261+
assert not (expected_dir / "subdir").exists()
151262

152-
assert not os.path.exists(os.path.join("./datasets", data_name))
263+
# Check datasets file to now have data_path listed
264+
datasets_json = json.loads(user_config.read_text())
265+
assert datasets_json[data_name]["data_path"]
266+
assert datasets_json[data_name]["files"]
267+
ds.remove(data_name)
268+
assert not expected_dir.exists()
153269

154-
ds.get(data_name)
155-
ds.get(data_name)
156-
ds.remove(data_name, ds.existing_data_paths[0])
270+
ds.get(data_name, root_dir=str(tmp_path), use_existing=False)
271+
ds.get(data_name, root_dir=str(tmp_path), use_existing=False)
157272

273+
if hasattr(ds, "existing_data_paths") and ds.existing_data_paths:
274+
ds.remove(data_name, ds.existing_data_paths[0])
275+
else:
276+
ds.remove(data_name)
277+
278+
279+
def test_datasets_manager(mock_dataset_paths):
280+
"""Test dataset manager with proper isolation."""
281+
user_config = mock_dataset_paths["user"]
282+
folder_config = mock_dataset_paths["folder"]
158283

159-
def test_datasets_manager():
160284
dm.add(
161285
"Test_DM",
162286
{"url": "http://test.com/files/file.zip", "subdirs": ["subdir"]},
163287
levels=["folder", "user"],
164288
)
165-
datasets_json = json.load(open(dm.json_path["folder"]))
166-
assert datasets_json["Test_DM"]
167-
assert datasets_json["Test_DM"]["url"] == "http://test.com/files/file.zip"
168-
assert datasets_json["Test_DM"]["subdirs"] == ["subdir"]
169289

290+
# Check configurations were written to temporary files
291+
folder_data = json.loads(folder_config.read_text())
292+
assert "Test_DM" in folder_data
293+
294+
user_data = json.loads(user_config.read_text())
295+
assert "Test_DM" in user_data
296+
297+
# Test removal
170298
dm.remove("Test_DM", levels=["folder"])
171-
datasets_json = json.load(open(dm.json_path["folder"]))
172-
with pytest.raises(KeyError):
173-
datasets_json["Test_DM"]
299+
folder_data = json.loads(folder_config.read_text())
300+
assert "Test_DM" not in folder_data
174301

175302
datasets_json = json.load(open(dm.json_path["user"]))
176303
assert datasets_json["Test_DM"]

0 commit comments

Comments
 (0)