Skip to content

Commit 0f3de40

Browse files
committed
update: update file paths to include 'notebooks' directory for interaction dataframes
1 parent c726de6 commit 0f3de40

4 files changed

Lines changed: 41 additions & 42 deletions

notebooks/astex_method_interaction_analysis_plotting_slurm.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def signal_handler(signum, frame):
200200
# ##### Analyze `Astex Diverse` set interactions as a baseline
201201

202202
# %%
203-
if not os.path.exists("astex_diverse_interaction_dataframes.h5"):
203+
if not os.path.exists(os.path.join("notebooks", "astex_diverse_interaction_dataframes.h5")):
204204
ad_protein_ligand_filepath_pairs = []
205205
for item in os.listdir(ad_set_dir):
206206
ligand_item_path = os.path.join(ad_set_dir, item)
@@ -235,7 +235,7 @@ def signal_handler(signum, frame):
235235
continue
236236

237237
# NOTE: we iteratively save the interaction dataframes to an HDF5 file
238-
with pd.HDFStore("astex_diverse_interaction_dataframes.h5") as store:
238+
with pd.HDFStore(os.path.join("notebooks", "astex_diverse_interaction_dataframes.h5")) as store:
239239
for i, df in enumerate(ad_protein_ligand_interaction_dfs):
240240
store.put(f"df_{i}", df)
241241

@@ -282,7 +282,7 @@ def signal_handler(signum, frame):
282282
method = method.split("_")[0]
283283

284284
if not os.path.exists(
285-
f"{method}{single_seq_suffix}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5"
285+
os.path.join("notebooks", f"{method}{single_seq_suffix}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5")
286286
):
287287
with open_dict(cfg):
288288
cfg.method = method
@@ -358,7 +358,7 @@ def signal_handler(signum, frame):
358358

359359
# NOTE: we iteratively save the interaction dataframes to an HDF5 file
360360
with pd.HDFStore(
361-
f"{method}{single_seq_suffix}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5"
361+
os.path.join("notebooks", f"{method}{single_seq_suffix}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5")
362362
) as store:
363363
for i, df in enumerate(astex_protein_ligand_interaction_dfs):
364364
store.put(f"df_{i}", df)
@@ -413,12 +413,12 @@ def process_method(file_path, category):
413413
for method in baseline_methods:
414414
for repeat_index in range(1, max_num_repeats_per_method + 1):
415415
method_title = method_mapping[method]
416-
file_path = f"{method}_astex_diverse_interaction_dataframes_{repeat_index}.h5"
416+
file_path = os.path.join("notebooks", f"{method}_astex_diverse_interaction_dataframes_{repeat_index}.h5")
417417
if os.path.exists(file_path):
418418
dfs.append(process_method(file_path, method_title))
419419

420-
if os.path.exists("astex_diverse_interaction_dataframes.h5"):
421-
dfs.append(process_method("astex_diverse_interaction_dataframes.h5", "Reference"))
420+
if os.path.exists(os.path.join("notebooks", "astex_diverse_interaction_dataframes.h5")):
421+
dfs.append(process_method(os.path.join("notebooks", "astex_diverse_interaction_dataframes.h5"), "Reference"))
422422

423423
# combine statistics
424424
assert len(dfs) > 0, "No interaction dataframes found."
@@ -539,14 +539,14 @@ def histogram_to_vector(histogram, bins):
539539
for method in baseline_methods:
540540
for repeat_index in range(1, max_num_repeats_per_method + 1):
541541
method_title = method_mapping[method]
542-
file_path = f"{method}_astex_diverse_interaction_dataframes_{repeat_index}.h5"
542+
file_path = os.path.join("notebooks", f"{method}_astex_diverse_interaction_dataframes_{repeat_index}.h5")
543543
if os.path.exists(file_path):
544544
dfs.append(bin_interactions(file_path, method_title))
545545

546546
assert os.path.exists(
547-
"astex_diverse_interaction_dataframes.h5"
547+
os.path.join("notebooks", "astex_diverse_interaction_dataframes.h5")
548548
), "No reference interaction dataframe found."
549-
reference_df = bin_interactions("astex_diverse_interaction_dataframes.h5", "Reference")
549+
reference_df = bin_interactions(os.path.join("notebooks", "astex_diverse_interaction_dataframes.h5"), "Reference")
550550

551551
# combine bins from all method dataframes
552552
assert len(dfs) > 0, "No interaction dataframes found."

notebooks/casp15_method_interaction_analysis_plotting_slurm.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def signal_handler(signum, frame):
200200
# ##### Analyze `CASP15` set interactions as a baseline
201201

202202
# %%
203-
if not os.path.exists("casp15_interaction_dataframes.h5"):
203+
if not os.path.exists(os.path.join("notebooks", "casp15_interaction_dataframes.h5")):
204204
casp15_protein_ligand_complex_filepaths = []
205205
for item in os.listdir(casp15_set_dir):
206206
item_path = os.path.join(casp15_set_dir, item)
@@ -244,7 +244,7 @@ def signal_handler(signum, frame):
244244
continue
245245

246246
# NOTE: we iteratively save the interaction dataframes to an HDF5 file
247-
with pd.HDFStore("casp15_interaction_dataframes.h5") as store:
247+
with pd.HDFStore(os.path.join("notebooks", "casp15_interaction_dataframes.h5")) as store:
248248
for i, df in enumerate(casp15_protein_ligand_interaction_dfs):
249249
store.put(f"df_{i}", df)
250250

@@ -261,7 +261,7 @@ def signal_handler(signum, frame):
261261
for repeat_index in range(1, max_num_repeats_per_method + 1):
262262
method_title = method_mapping[method]
263263

264-
if not os.path.exists(f"{method}_{dataset}_interaction_dataframes_{repeat_index}.h5"):
264+
if not os.path.exists(os.path.join("notebooks", f"{method}_{dataset}_interaction_dataframes_{repeat_index}.h5")):
265265
method_casp15_set_dir = os.path.join(
266266
"data",
267267
"test_cases",
@@ -340,7 +340,7 @@ def signal_handler(signum, frame):
340340

341341
# NOTE: we iteratively save the interaction dataframes to an HDF5 file
342342
with pd.HDFStore(
343-
f"{method}_{dataset}_interaction_dataframes_{repeat_index}.h5"
343+
os.path.join("notebooks", f"{method}_{dataset}_interaction_dataframes_{repeat_index}.h5")
344344
) as store:
345345
for i, df in enumerate(casp15_protein_ligand_interaction_dfs):
346346
store.put(f"df_{i}", df)
@@ -395,12 +395,12 @@ def process_method(file_path, category):
395395
for method in baseline_methods:
396396
for repeat_index in range(1, max_num_repeats_per_method + 1):
397397
method_title = method_mapping[method]
398-
file_path = f"{method}_casp15_interaction_dataframes_{repeat_index}.h5"
398+
file_path = os.path.join("notebooks", f"{method}_casp15_interaction_dataframes_{repeat_index}.h5")
399399
if os.path.exists(file_path):
400400
dfs.append(process_method(file_path, method_title))
401401

402-
if os.path.exists("casp15_interaction_dataframes.h5"):
403-
dfs.append(process_method("casp15_interaction_dataframes.h5", "Reference"))
402+
if os.path.exists(os.path.join("notebooks", "casp15_interaction_dataframes.h5")):
403+
dfs.append(process_method(os.path.join("notebooks", "casp15_interaction_dataframes.h5"), "Reference"))
404404

405405
# combine statistics
406406
assert len(dfs) > 0, "No interaction dataframes found."
@@ -521,14 +521,14 @@ def histogram_to_vector(histogram, bins):
521521
for method in baseline_methods:
522522
for repeat_index in range(1, max_num_repeats_per_method + 1):
523523
method_title = method_mapping[method]
524-
file_path = f"{method}_casp15_interaction_dataframes_{repeat_index}.h5"
524+
file_path = os.path.join("notebooks", f"{method}_casp15_interaction_dataframes_{repeat_index}.h5")
525525
if os.path.exists(file_path):
526526
dfs.append(bin_interactions(file_path, method_title))
527527

528528
assert os.path.exists(
529-
"casp15_interaction_dataframes.h5"
529+
os.path.join("notebooks", "casp15_interaction_dataframes.h5")
530530
), "No reference interaction dataframe found."
531-
reference_df = bin_interactions("casp15_interaction_dataframes.h5", "Reference")
531+
reference_df = bin_interactions(os.path.join("notebooks", "casp15_interaction_dataframes.h5"), "Reference")
532532

533533
# combine bins from all method dataframes
534534
assert len(dfs) > 0, "No interaction dataframes found."

notebooks/dockgen_method_interaction_analysis_plotting_slurm.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def signal_handler(signum, frame):
200200
# ##### Analyze `DockGen` set interactions as a baseline
201201

202202
# %%
203-
if not os.path.exists("dockgen_interaction_dataframes.h5"):
203+
if not os.path.exists(os.path.join("notebooks", "dockgen_interaction_dataframes.h5")):
204204
dockgen_test_ids_filepath = os.path.join(
205205
"data", "dockgen_set", "split_test.txt"
206206
) # NOTE: change as needed
@@ -248,7 +248,7 @@ def signal_handler(signum, frame):
248248
continue
249249

250250
# NOTE: we iteratively save the interaction dataframes to an HDF5 file
251-
with pd.HDFStore("dockgen_interaction_dataframes.h5") as store:
251+
with pd.HDFStore(os.path.join("notebooks", "dockgen_interaction_dataframes.h5")) as store:
252252
for i, df in enumerate(dg_protein_ligand_interaction_dfs):
253253
store.put(f"df_{i}", df)
254254

@@ -294,9 +294,7 @@ def signal_handler(signum, frame):
294294

295295
method = method.split("_")[0]
296296

297-
if not os.path.exists(
298-
f"{method}{single_seq_suffix}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5"
299-
):
297+
if not os.path.exists(os.path.join("notebooks", f"{method}{single_seq_suffix}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5")):
300298
with open_dict(cfg):
301299
cfg.method = method
302300
cfg.repeat_index = repeat_index
@@ -371,7 +369,7 @@ def signal_handler(signum, frame):
371369

372370
# NOTE: we iteratively save the interaction dataframes to an HDF5 file
373371
with pd.HDFStore(
374-
f"{method}{single_seq_suffix}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5"
372+
os.path.join("notebooks", f"{method}{single_seq_suffix}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5")
375373
) as store:
376374
for i, df in enumerate(dockgen_protein_ligand_interaction_dfs):
377375
store.put(f"df_{i}", df)
@@ -426,12 +424,12 @@ def process_method(file_path, category):
426424
for method in baseline_methods:
427425
for repeat_index in range(1, max_num_repeats_per_method + 1):
428426
method_title = method_mapping[method]
429-
file_path = f"{method}_dockgen_interaction_dataframes_{repeat_index}.h5"
427+
file_path = os.path.join("notebooks", f"{method}_dockgen_interaction_dataframes_{repeat_index}.h5")
430428
if os.path.exists(file_path):
431429
dfs.append(process_method(file_path, method_title))
432430

433-
if os.path.exists("dockgen_interaction_dataframes.h5"):
434-
dfs.append(process_method("dockgen_interaction_dataframes.h5", "Reference"))
431+
if os.path.exists(os.path.join("notebooks", "dockgen_interaction_dataframes.h5")):
432+
dfs.append(process_method(os.path.join("notebooks", "dockgen_interaction_dataframes.h5"), "Reference"))
435433

436434
# combine statistics
437435
assert len(dfs) > 0, "No interaction dataframes found."
@@ -552,14 +550,14 @@ def histogram_to_vector(histogram, bins):
552550
for method in baseline_methods:
553551
for repeat_index in range(1, max_num_repeats_per_method + 1):
554552
method_title = method_mapping[method]
555-
file_path = f"{method}_dockgen_interaction_dataframes_{repeat_index}.h5"
553+
file_path = os.path.join("notebooks", f"{method}_dockgen_interaction_dataframes_{repeat_index}.h5")
556554
if os.path.exists(file_path):
557555
dfs.append(bin_interactions(file_path, method_title))
558556

559557
assert os.path.exists(
560-
"dockgen_interaction_dataframes.h5"
558+
os.path.join("notebooks", "dockgen_interaction_dataframes.h5")
561559
), "No reference interaction dataframe found."
562-
reference_df = bin_interactions("dockgen_interaction_dataframes.h5", "Reference")
560+
reference_df = bin_interactions(os.path.join("notebooks", "dockgen_interaction_dataframes.h5"), "Reference")
563561

564562
# combine bins from all method dataframes
565563
assert len(dfs) > 0, "No interaction dataframes found."

notebooks/posebusters_method_interaction_analysis_plotting_slurm.py

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def signal_handler(signum, frame):
219219
# ##### Analyze `PoseBusters Benchmark` set interactions as a baseline
220220

221221
# %%
222-
if not os.path.exists("posebusters_benchmark_interaction_dataframes.h5"):
222+
if not os.path.exists(os.path.join("notebooks", "posebusters_benchmark_interaction_dataframes.h5")):
223223
posebusters_ccd_ids_filepath = os.path.join(
224224
"data",
225225
"posebusters_pdb_ccd_ids.txt",
@@ -265,7 +265,7 @@ def signal_handler(signum, frame):
265265
continue
266266

267267
# NOTE: we iteratively save the interaction dataframes to an HDF5 file
268-
with pd.HDFStore("posebusters_benchmark_interaction_dataframes.h5") as store:
268+
with pd.HDFStore(os.path.join("notebooks", "posebusters_benchmark_interaction_dataframes.h5")) as store:
269269
for i, df in enumerate(pb_protein_ligand_interaction_dfs):
270270
store.put(f"df_{i}", df)
271271

@@ -312,7 +312,7 @@ def signal_handler(signum, frame):
312312
method = method.split("_")[0]
313313

314314
if not os.path.exists(
315-
f"{method}{single_seq_suffix}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5"
315+
os.path.join("notebooks", f"{method}{single_seq_suffix}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5")
316316
):
317317
with open_dict(cfg):
318318
cfg.method = method
@@ -390,7 +390,7 @@ def signal_handler(signum, frame):
390390

391391
# NOTE: we iteratively save the interaction dataframes to an HDF5 file
392392
with pd.HDFStore(
393-
f"{method}{single_seq_suffix}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5"
393+
os.path.join("notebooks", f"{method}{single_seq_suffix}{vina_suffix}_{dataset}_interaction_dataframes_{repeat_index}.h5")
394394
) as store:
395395
for i, df in enumerate(posebusters_protein_ligand_interaction_dfs):
396396
store.put(f"df_{i}", df)
@@ -452,12 +452,12 @@ def process_method(file_path, category):
452452
for method in baseline_methods:
453453
for repeat_index in range(1, max_num_repeats_per_method + 1):
454454
method_title = method_mapping[method]
455-
file_path = f"{method}_posebusters_benchmark_interaction_dataframes_{repeat_index}.h5"
455+
file_path = os.path.join("notebooks", f"{method}_posebusters_benchmark_interaction_dataframes_{repeat_index}.h5")
456456
if os.path.exists(file_path):
457457
dfs.append(process_method(file_path, method_title))
458458

459-
if os.path.exists("posebusters_benchmark_interaction_dataframes.h5"):
460-
dfs.append(process_method("posebusters_benchmark_interaction_dataframes.h5", "Reference"))
459+
if os.path.exists(os.path.join("notebooks", "posebusters_benchmark_interaction_dataframes.h5")):
460+
dfs.append(process_method(os.path.join("notebooks", "posebusters_benchmark_interaction_dataframes.h5"), "Reference"))
461461

462462
# combine statistics
463463
assert len(dfs) > 0, "No interaction dataframes found."
@@ -581,14 +581,15 @@ def histogram_to_vector(histogram, bins):
581581
for method in baseline_methods:
582582
for repeat_index in range(1, max_num_repeats_per_method + 1):
583583
method_title = method_mapping[method]
584-
file_path = f"{method}_posebusters_benchmark_interaction_dataframes_{repeat_index}.h5"
584+
file_path = os.path.join("notebooks", f"{method}_posebusters_benchmark_interaction_dataframes_{repeat_index}.h5")
585585
if os.path.exists(file_path):
586586
dfs.append(bin_interactions(file_path, method_title))
587587

588588
assert os.path.exists(
589-
"posebusters_benchmark_interaction_dataframes.h5"
589+
os.path.join("notebooks", "posebusters_benchmark_interaction_dataframes.h5")
590+
590591
), "No reference interaction dataframe found."
591-
reference_df = bin_interactions("posebusters_benchmark_interaction_dataframes.h5", "Reference")
592+
reference_df = bin_interactions(os.path.join("notebooks", "posebusters_benchmark_interaction_dataframes.h5"), "Reference")
592593

593594
# combine bins from all method dataframes
594595
assert len(dfs) > 0, "No interaction dataframes found."

0 commit comments

Comments
 (0)