|
| 1 | +# ------------------------------------------------------------------------------------------------------------------------------------- |
| 2 | +# Following code curated for PoseBench: (https://github.com/BioinfoMachineLearning/PoseBench) |
| 3 | +# ------------------------------------------------------------------------------------------------------------------------------------- |
| 4 | + |
| 5 | +import logging |
| 6 | +import os |
| 7 | + |
| 8 | +import hydra |
| 9 | +import rootutils |
| 10 | +from Bio import PDB |
| 11 | +from omegaconf import DictConfig |
| 12 | +from tqdm import tqdm |
| 13 | + |
| 14 | +logging.basicConfig(format="[%(asctime)s] {%(filename)s:%(lineno)d} %(levelname)s - %(message)s") |
| 15 | +logger = logging.getLogger(__name__) |
| 16 | + |
| 17 | +rootutils.setup_root(__file__, indicator=".project-root", pythonpath=True) |
| 18 | + |
| 19 | + |
| 20 | +def convert_mmcif_to_pdb(mmcif_file: str, pdb_file: str): |
| 21 | + """Convert an mmCIF file to a PDB file.""" |
| 22 | + parser = PDB.MMCIFParser(QUIET=True) |
| 23 | + structure = parser.get_structure("structure", mmcif_file) |
| 24 | + |
| 25 | + io = PDB.PDBIO() |
| 26 | + io.set_structure(structure) |
| 27 | + io.save(pdb_file) |
| 28 | + |
| 29 | + |
| 30 | +@hydra.main( |
| 31 | + version_base="1.3", |
| 32 | + config_path="../../../configs/data/components", |
| 33 | + config_name="convert_mmcif_to_pdb.yaml", |
| 34 | +) |
| 35 | +def main(cfg: DictConfig): |
| 36 | + """Convert an input directory of mmCIF files to an output directory of PDB files.""" |
| 37 | + os.makedirs(cfg.output_pdb_dir, exist_ok=True) |
| 38 | + |
| 39 | + for id in tqdm( |
| 40 | + os.listdir(cfg.input_mmcif_dir), |
| 41 | + desc=f"Converting mmCIF to PDB for {cfg.dataset}", |
| 42 | + ): |
| 43 | + new_id = id.replace("fold_", "") |
| 44 | + if cfg.lowercase_id: |
| 45 | + # Support the DockGen dataset's hybrid lowercase-uppercase pdb id-CCD ID format |
| 46 | + new_id_parts = new_id.split("_") |
| 47 | + new_id = "_".join([part.lower() for part in new_id_parts[:2]] + new_id_parts[2:]) |
| 48 | + else: |
| 49 | + new_id = new_id.upper() |
| 50 | + mmcif_filepath = os.path.join( |
| 51 | + cfg.input_mmcif_dir, id, f"{id}_model_{cfg.model_index_to_select}.cif" |
| 52 | + ) |
| 53 | + pdb_filepath = os.path.join(cfg.output_pdb_dir, f"{new_id}.pdb") |
| 54 | + if os.path.isfile(mmcif_filepath): |
| 55 | + convert_mmcif_to_pdb(mmcif_filepath, pdb_filepath) |
| 56 | + |
| 57 | + logger.info(f"Converted mmCIF files to PDB files for {cfg.dataset} dataset.") |
| 58 | + |
| 59 | + |
| 60 | +if __name__ == "__main__": |
| 61 | + main() |
0 commit comments