|
221 | 221 | " for protein_filepath, ligand_filepath in tqdm(\n", |
222 | 222 | " pdbbind_protein_ligand_filepath_pairs, desc=\"Processing PDBBind 2020 set\"\n", |
223 | 223 | " ):\n", |
224 | | - " temp_protein_filepath = create_temp_pdb_with_only_molecule_type_residues(\n", |
225 | | - " protein_filepath, molecule_type=\"protein\"\n", |
226 | | - " )\n", |
227 | | - " ligand_mol = None\n", |
228 | 224 | " try:\n", |
229 | | - " ligand_mol = Chem.MolFromMolFile(ligand_filepath)\n", |
230 | | - " except Exception as e:\n", |
231 | | - " ligand_mol = Chem.MolFromMolFile(ligand_filepath, sanitize=False)\n", |
232 | | - " if ligand_mol is None:\n", |
233 | | - " print(\n", |
234 | | - " f\"Using the `.mol2` file for PDBBind 2020 target {ligand_filepath} failed. We found a `.sdf` file instead and are trying to use that. Be aware that the `.sdf` files from PDBBind 2020 are missing chirality tags, although we will do our best to impute such information automatically using RDKit. Reference: https://www.blopig.com/blog/2021/09/watch-out-when-using-pdbbind.\"\n", |
| 225 | + " temp_protein_filepath = create_temp_pdb_with_only_molecule_type_residues(\n", |
| 226 | + " protein_filepath, molecule_type=\"protein\"\n", |
235 | 227 | " )\n", |
| 228 | + " ligand_mol = None\n", |
236 | 229 | " try:\n", |
237 | | - " ligand_mol = Chem.MolFromMolFile(ligand_filepath.replace(\".mol2\", \".sdf\"))\n", |
238 | | - " except Exception as e:\n", |
239 | | - " ligand_mol = Chem.MolFromMolFile(\n", |
240 | | - " ligand_filepath.replace(\".mol2\", \".sdf\"), sanitize=False\n", |
241 | | - " )\n", |
242 | | - " try:\n", |
243 | | - " Chem.rdmolops.AssignAtomChiralTagsFromStructure(ligand_mol)\n", |
| 230 | + " ligand_mol = Chem.MolFromMolFile(ligand_filepath)\n", |
244 | 231 | " except Exception as e:\n", |
| 232 | + " ligand_mol = Chem.MolFromMolFile(ligand_filepath, sanitize=False)\n", |
| 233 | + " if ligand_mol is None:\n", |
245 | 234 | " print(\n", |
246 | | - " f\"Could not assign chirality tags to the atoms in the PDBBind ligand molecule from {ligand_filepath}.\"\n", |
| 235 | + " f\"Using the `.mol2` file for PDBBind 2020 target {ligand_filepath} failed. We found a `.sdf` file instead and are trying to use that. Be aware that the `.sdf` files from PDBBind 2020 are missing chirality tags, although we will do our best to impute such information automatically using RDKit. Reference: https://www.blopig.com/blog/2021/09/watch-out-when-using-pdbbind.\"\n", |
247 | 236 | " )\n", |
248 | | - " if ligand_mol is None:\n", |
249 | | - " raise ValueError(f\"Could not load PDBBind 2020 ligand from {ligand_filepath}.\")\n", |
250 | | - " pc.load_protein_from_pdb(temp_protein_filepath)\n", |
251 | | - " pc.load_ligands_from_mols([ligand_mol])\n", |
252 | | - " pdbbind_protein_ligand_interaction_dfs.append(pc.calculate_interactions())\n", |
| 237 | + " try:\n", |
| 238 | + " ligand_mol = Chem.MolFromMolFile(ligand_filepath.replace(\".mol2\", \".sdf\"))\n", |
| 239 | + " except Exception as e:\n", |
| 240 | + " ligand_mol = Chem.MolFromMolFile(\n", |
| 241 | + " ligand_filepath.replace(\".mol2\", \".sdf\"), sanitize=False\n", |
| 242 | + " )\n", |
| 243 | + " try:\n", |
| 244 | + " Chem.rdmolops.AssignAtomChiralTagsFromStructure(ligand_mol)\n", |
| 245 | + " except Exception as e:\n", |
| 246 | + " print(\n", |
| 247 | + " f\"Could not assign chirality tags to the atoms in the PDBBind ligand molecule from {ligand_filepath}.\"\n", |
| 248 | + " )\n", |
| 249 | + " if ligand_mol is None:\n", |
| 250 | + " raise ValueError(f\"Could not load PDBBind 2020 ligand from {ligand_filepath}.\")\n", |
| 251 | + " pc.load_protein_from_pdb(temp_protein_filepath)\n", |
| 252 | + " pc.load_ligands_from_mols([ligand_mol])\n", |
| 253 | + " pdbbind_protein_ligand_interaction_dfs.append(pc.calculate_interactions())\n", |
| 254 | + " except Exception as e:\n", |
| 255 | + " print(\n", |
| 256 | + " f\"Error processing PDBBind filepaths {temp_protein_filepath} and {ligand_filepath} due to: {e}. Skipping...\"\n", |
| 257 | + " )\n", |
| 258 | + " continue\n", |
253 | 259 | "\n", |
254 | 260 | " # NOTE: we iteratively save the interaction dataframes to an HDF5 file\n", |
255 | 261 | " with pd.HDFStore(\n", |
|
397 | 403 | " dg_protein_ligand_interaction_dfs.append(pc.calculate_interactions())\n", |
398 | 404 | " except Exception as e:\n", |
399 | 405 | " print(\n", |
400 | | - " f\"Error processing Dockgen filepath pari {temp_protein_filepath} and {ligand_filepath} due to: {e}. Skipping...\"\n", |
| 406 | + " f\"Error processing Dockgen filepaths {temp_protein_filepath} and {ligand_filepath} due to: {e}. Skipping...\"\n", |
401 | 407 | " )\n", |
402 | 408 | " continue\n", |
403 | 409 | "\n", |
|
0 commit comments