Skip to content

Commit 886aa22

Browse files
committed
Add mmCIF to PDB conversion script
1 parent ef89b5b commit 886aa22

2 files changed

Lines changed: 66 additions & 0 deletions

File tree

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
input_mmcif_dir: ???
2+
output_pdb_dir: ???
3+
model_index_to_select: 0
4+
dataset: "N/A"
5+
lowercase_id: false
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# -------------------------------------------------------------------------------------------------------------------------------------
2+
# Following code curated for PoseBench: (https://github.com/BioinfoMachineLearning/PoseBench)
3+
# -------------------------------------------------------------------------------------------------------------------------------------
4+
5+
import logging
6+
import os
7+
8+
import hydra
9+
import rootutils
10+
from Bio import PDB
11+
from omegaconf import DictConfig
12+
from tqdm import tqdm
13+
14+
logging.basicConfig(format="[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s")
15+
logger = logging.getLogger(__name__)
16+
17+
rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True)
18+
19+
20+
def convert_mmcif_to_pdb(mmcif_file: str, pdb_file: str):
21+
"""Convert an mmCIF file to a PDB file."""
22+
parser = PDB.MMCIFParser(QUIET=True)
23+
structure = parser.get_structure("structure", mmcif_file)
24+
25+
io = PDB.PDBIO()
26+
io.set_structure(structure)
27+
io.save(pdb_file)
28+
29+
30+
@hydra.main(
31+
version_base="1.3",
32+
config_path="../../../configs/data/components",
33+
config_name="convert_mmcif_to_pdb.yaml",
34+
)
35+
def main(cfg: DictConfig):
36+
"""Convert an input directory of mmCIF files to an output directory of PDB files."""
37+
os.makedirs(cfg.output_pdb_dir, exist_ok=True)
38+
39+
for id in tqdm(
40+
os.listdir(cfg.input_mmcif_dir),
41+
desc=f"Converting mmCIF to PDB for {cfg.dataset}",
42+
):
43+
new_id = id.replace("fold_", "")
44+
if cfg.lowercase_id:
45+
# Support the DockGen dataset's hybrid lowercase-uppercase pdb id-CCD ID format
46+
new_id_parts = new_id.split("_")
47+
new_id = "_".join([part.lower() for part in new_id_parts[:2]] + new_id_parts[2:])
48+
else:
49+
new_id = new_id.upper()
50+
mmcif_filepath = os.path.join(
51+
cfg.input_mmcif_dir, id, f"{id}_model_{cfg.model_index_to_select}.cif"
52+
)
53+
pdb_filepath = os.path.join(cfg.output_pdb_dir, f"{new_id}.pdb")
54+
if os.path.isfile(mmcif_filepath):
55+
convert_mmcif_to_pdb(mmcif_filepath, pdb_filepath)
56+
57+
logger.info(f"Converted mmCIF files to PDB files for {cfg.dataset} dataset.")
58+
59+
60+
if __name__ == "__main__":
61+
main()

0 commit comments

Comments
 (0)