Skip to content

Commit ef89b5b

Browse files
committed
Catch PDBBind analysis errors
1 parent 2e128f0 commit ef89b5b

1 file changed

Lines changed: 30 additions & 24 deletions

File tree

notebooks/dataset_interaction_analysis_plotting.ipynb

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -221,35 +221,41 @@
221221
" for protein_filepath, ligand_filepath in tqdm(\n",
222222
" pdbbind_protein_ligand_filepath_pairs, desc=\"Processing PDBBind 2020 set\"\n",
223223
" ):\n",
224-
" temp_protein_filepath = create_temp_pdb_with_only_molecule_type_residues(\n",
225-
" protein_filepath, molecule_type=\"protein\"\n",
226-
" )\n",
227-
" ligand_mol = None\n",
228224
" try:\n",
229-
" ligand_mol = Chem.MolFromMolFile(ligand_filepath)\n",
230-
" except Exception as e:\n",
231-
" ligand_mol = Chem.MolFromMolFile(ligand_filepath, sanitize=False)\n",
232-
" if ligand_mol is None:\n",
233-
" print(\n",
234-
" f\"Using the `.mol2` file for PDBBind 2020 target {ligand_filepath} failed. We found a `.sdf` file instead and are trying to use that. Be aware that the `.sdf` files from PDBBind 2020 are missing chirality tags, although we will do our best to impute such information automatically using RDKit. Reference: https://www.blopig.com/blog/2021/09/watch-out-when-using-pdbbind.\"\n",
225+
" temp_protein_filepath = create_temp_pdb_with_only_molecule_type_residues(\n",
226+
" protein_filepath, molecule_type=\"protein\"\n",
235227
" )\n",
228+
" ligand_mol = None\n",
236229
" try:\n",
237-
" ligand_mol = Chem.MolFromMolFile(ligand_filepath.replace(\".mol2\", \".sdf\"))\n",
238-
" except Exception as e:\n",
239-
" ligand_mol = Chem.MolFromMolFile(\n",
240-
" ligand_filepath.replace(\".mol2\", \".sdf\"), sanitize=False\n",
241-
" )\n",
242-
" try:\n",
243-
" Chem.rdmolops.AssignAtomChiralTagsFromStructure(ligand_mol)\n",
230+
" ligand_mol = Chem.MolFromMolFile(ligand_filepath)\n",
244231
" except Exception as e:\n",
232+
" ligand_mol = Chem.MolFromMolFile(ligand_filepath, sanitize=False)\n",
233+
" if ligand_mol is None:\n",
245234
" print(\n",
246-
" f\"Could not assign chirality tags to the atoms in the PDBBind ligand molecule from {ligand_filepath}.\"\n",
235+
" f\"Using the `.mol2` file for PDBBind 2020 target {ligand_filepath} failed. We found a `.sdf` file instead and are trying to use that. Be aware that the `.sdf` files from PDBBind 2020 are missing chirality tags, although we will do our best to impute such information automatically using RDKit. Reference: https://www.blopig.com/blog/2021/09/watch-out-when-using-pdbbind.\"\n",
247236
" )\n",
248-
" if ligand_mol is None:\n",
249-
" raise ValueError(f\"Could not load PDBBind 2020 ligand from {ligand_filepath}.\")\n",
250-
" pc.load_protein_from_pdb(temp_protein_filepath)\n",
251-
" pc.load_ligands_from_mols([ligand_mol])\n",
252-
" pdbbind_protein_ligand_interaction_dfs.append(pc.calculate_interactions())\n",
237+
" try:\n",
238+
" ligand_mol = Chem.MolFromMolFile(ligand_filepath.replace(\".mol2\", \".sdf\"))\n",
239+
" except Exception as e:\n",
240+
" ligand_mol = Chem.MolFromMolFile(\n",
241+
" ligand_filepath.replace(\".mol2\", \".sdf\"), sanitize=False\n",
242+
" )\n",
243+
" try:\n",
244+
" Chem.rdmolops.AssignAtomChiralTagsFromStructure(ligand_mol)\n",
245+
" except Exception as e:\n",
246+
" print(\n",
247+
" f\"Could not assign chirality tags to the atoms in the PDBBind ligand molecule from {ligand_filepath}.\"\n",
248+
" )\n",
249+
" if ligand_mol is None:\n",
250+
" raise ValueError(f\"Could not load PDBBind 2020 ligand from {ligand_filepath}.\")\n",
251+
" pc.load_protein_from_pdb(temp_protein_filepath)\n",
252+
" pc.load_ligands_from_mols([ligand_mol])\n",
253+
" pdbbind_protein_ligand_interaction_dfs.append(pc.calculate_interactions())\n",
254+
" except Exception as e:\n",
255+
" print(\n",
256+
" f\"Error processing PDBBind filepaths {temp_protein_filepath} and {ligand_filepath} due to: {e}. Skipping...\"\n",
257+
" )\n",
258+
" continue\n",
253259
"\n",
254260
" # NOTE: we iteratively save the interaction dataframes to an HDF5 file\n",
255261
" with pd.HDFStore(\n",
@@ -397,7 +403,7 @@
397403
" dg_protein_ligand_interaction_dfs.append(pc.calculate_interactions())\n",
398404
" except Exception as e:\n",
399405
" print(\n",
400-
" f\"Error processing Dockgen filepath pari {temp_protein_filepath} and {ligand_filepath} due to: {e}. Skipping...\"\n",
406+
" f\"Error processing Dockgen filepaths {temp_protein_filepath} and {ligand_filepath} due to: {e}. Skipping...\"\n",
401407
" )\n",
402408
" continue\n",
403409
"\n",

0 commit comments

Comments
 (0)