BioinfoMachineLearning
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎CHANGELOG.md‎
Lines changed: 2 additions & 2 deletions b/‎CHANGELOG.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 4 deletions b/‎README.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎notebooks/astex_diverse_inference_results_plotting.ipynb‎
Lines changed: 24 additions & 42 deletions b/‎notebooks/astex_diverse_inference_results_plotting.ipynb‎
Lines changed: 24 additions & 42 deletions
diff --git a/‎notebooks/astex_method_interaction_analysis_plotting.ipynb‎
Lines changed: 2 additions & 4 deletions b/‎notebooks/astex_method_interaction_analysis_plotting.ipynb‎
Lines changed: 2 additions & 4 deletions
diff --git a/‎notebooks/astex_method_interaction_analysis_plotting.py‎
Lines changed: 2 additions & 4 deletions b/‎notebooks/astex_method_interaction_analysis_plotting.py‎
Lines changed: 2 additions & 4 deletions
@@ -180,7 +180,7 @@ configs/local/default.yaml
 /forks/FlowDock/checkpoints/
 /forks/NeuralPLexer/NeuralPLexer/
 /forks/NeuralPLexer/**/neuralplexermodels*
-/forks/NeuralPLexer3/prediction_inputs/
+/forks/NeuralPLexer*/prediction_inputs/
 /forks/P2Rank/
 /forks/*/*inference*/
 /forks/RoseTTAFold-All-Atom/blast-2.2.26
 
@@ -1,13 +1,13 @@
 ### 0.6.0 - TBD
 
-- Added new baseline methods (AlphaFold 3, NeuralPLexer3, Chai-1 with multiple sequence alignments (MSAs))
+- Added new baseline methods (AlphaFold 3, Chai-1 with multiple sequence alignments (MSAs))
 - Added new binding site-focused implementation of `complex_alignment.py` based on PyMOL's `align` command, which in many cases yields 3x better docking evaluation scores for baseline methods
 - Added new script for analyzing baseline methods' protein conformational changes w.r.t. input (e.g., AlphaFold) protein structures and the corresponding reference (crystal) protein structures
 - Added the new centroid RMSD and PLIF-EMD/WM metrics
 - Added a failure mode analysis notebook
 - Introducing DockGen-E, a new version of the DockGen benchmark dataset featuring enhanced biomolecular context for docking and co-folding predictions - namely, now all DockGen complexes represent the first (biologically relevant) bioassembly of the corresponding PDB structure
 - For the single-ligand datasets (i.e., Astex Diverse, PoseBusters Benchmark, and DockGen), now providing each baseline method with primary *and cofactor* ligand SMILES strings for prediction, to enhance the biomolecular context of these methods' predicted structures - as a result, for these single-ligand datasets, now the predicted ligand *most similar* to the primary ligand (in terms of both Tanimoto and structural similarity) is selected for scoring
-- Updated Chai-1's inference code to commit `44375d5d4ea44c0b5b7204519e63f40b063e4a7c`, and ran it also with NeuralPLexer3's (paired) MSAs
+- Updated Chai-1's inference code to commit `44375d5d4ea44c0b5b7204519e63f40b063e4a7c`, and ran it also with standardized (paired) MSAs
 - Replaced all AlphaFold 3 server predictions of each dataset's protein structures with predictions from AlphaFold 3's local inference code
 - Pocket-only benchmarking has been deprecated
 - With all the above changed in place, simplified, re-ran, and re-analyzed all baseline methods for each benchmark dataset, and updated the baseline predictions and datasets (now containing standardized MSAs) hosted on Zenodo
 
@@ -201,6 +201,10 @@ rm diffdock_benchmark_method_predictions.tar.gz
 wget https://zenodo.org/records/14629652/files/dynamicbind_benchmark_method_predictions.tar.gz
 tar -xzf dynamicbind_benchmark_method_predictions.tar.gz
 rm dynamicbind_benchmark_method_predictions.tar.gz
+# NeuralPLexer predictions and results
+wget https://zenodo.org/records/14629652/files/neuralplexer_benchmark_method_predictions.tar.gz
+tar -xzf neuralplexer_benchmark_method_predictions.tar.gz
+rm neuralplexer_benchmark_method_predictions.tar.gz
 # RoseTTAFold-All-Atom predictions and results
 wget https://zenodo.org/records/14629652/files/rfaa_benchmark_method_predictions.tar.gz
 tar -xzf rfaa_benchmark_method_predictions.tar.gz
@@ -213,10 +217,6 @@ rm chai_benchmark_method_predictions.tar.gz
 wget https://zenodo.org/records/14629652/files/af3_benchmark_method_predictions.tar.gz
 tar -xzf af3_benchmark_method_predictions.tar.gz
 rm af3_benchmark_method_predictions.tar.gz
-# NeuralPLexer3 predictions and results
-wget https://zenodo.org/records/14629652/files/neuralplexer3_benchmark_method_predictions.tar.gz
-tar -xzf neuralplexer3_benchmark_method_predictions.tar.gz
-rm neuralplexer3_benchmark_method_predictions.tar.gz
 ```
 
 ### Downloading benchmark method interactions
 
@@ -64,13 +64,12 @@
     "    \"vina_p2rank\",\n",
     "    \"diffdock\",\n",
     "    \"dynamicbind\",\n",
+    "    \"neuralplexer\",\n",
     "    \"rfaa\",\n",
     "    \"chai-lab_ss\",\n",
     "    \"chai-lab\",\n",
     "    \"alphafold3_ss\",\n",
     "    \"alphafold3\",\n",
-    "    \"neuralplexer3_ss\",\n",
-    "    \"neuralplexer3\",\n",
     "]\n",
     "max_num_repeats_per_method = 3\n",
     "\n",
@@ -80,10 +79,10 @@
     "globals()[\"dynamicbind_output_dir\"] = os.path.join(\n",
     "    \"..\", \"forks\", \"DynamicBind\", \"inference\", \"outputs\", \"results\"\n",
     ")\n",
+    "globals()[\"neuralplexer_output_dir\"] = os.path.join(\"..\", \"forks\", \"NeuralPLexer\", \"inference\")\n",
     "globals()[\"rfaa_output_dir\"] = os.path.join(\"..\", \"forks\", \"RoseTTAFold-All-Atom\", \"inference\")\n",
     "globals()[\"chai-lab_output_dir\"] = os.path.join(\"..\", \"forks\", \"chai-lab\", \"inference\")\n",
     "globals()[\"alphafold3_output_dir\"] = os.path.join(\"..\", \"forks\", \"alphafold3\", \"inference\")\n",
-    "globals()[\"neuralplexer3_output_dir\"] = os.path.join(\"..\", \"forks\", \"NeuralPLexer3\", \"inference\")\n",
     "for repeat_index in range(1, max_num_repeats_per_method + 1):\n",
     "    # PLIF metrics\n",
     "    globals()[f\"astex_plif_metrics_csv_filepath_{repeat_index}\"] = \"astex_diverse_plif_metrics.csv\"\n",
@@ -128,6 +127,20 @@
     "        )\n",
     "    )\n",
     "\n",
+    "    # NeuralPLexer results\n",
+    "    globals()[f\"neuralplexer_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n",
+    "        globals()[\"neuralplexer_output_dir\"],\n",
+    "        f\"neuralplexer_astex_diverse_outputs_{repeat_index}\",\n",
+    "        \"bust_results.csv\",\n",
+    "    )\n",
+    "    globals()[f\"neuralplexer_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n",
+    "        os.path.join(\n",
+    "            globals()[\"neuralplexer_output_dir\"],\n",
+    "            f\"neuralplexer_astex_diverse_outputs_{repeat_index}_relaxed\",\n",
+    "            \"bust_results.csv\",\n",
+    "        )\n",
+    "    )\n",
+    "\n",
     "    # RoseTTAFold-All-Atom results\n",
     "    globals()[f\"rfaa_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n",
     "        globals()[\"rfaa_output_dir\"],\n",
@@ -192,59 +205,29 @@
     "        \"bust_results.csv\",\n",
     "    )\n",
     "\n",
-    "    # NeuralPLexer3 (Single-Seq) results\n",
-    "    globals()[f\"neuralplexer3_ss_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n",
-    "        globals()[\"neuralplexer3_output_dir\"],\n",
-    "        f\"neuralplexer3_ss_astex_diverse_outputs_{repeat_index}\",\n",
-    "        \"bust_results.csv\",\n",
-    "    )\n",
-    "    globals()[f\"neuralplexer3_ss_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n",
-    "        os.path.join(\n",
-    "            globals()[\"neuralplexer3_output_dir\"],\n",
-    "            f\"neuralplexer3_ss_astex_diverse_outputs_{repeat_index}_relaxed\",\n",
-    "            \"bust_results.csv\",\n",
-    "        )\n",
-    "    )\n",
-    "\n",
-    "    # NeuralPLexer3 results\n",
-    "    globals()[f\"neuralplexer3_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n",
-    "        globals()[\"neuralplexer3_output_dir\"],\n",
-    "        f\"neuralplexer3_astex_diverse_outputs_{repeat_index}\",\n",
-    "        \"bust_results.csv\",\n",
-    "    )\n",
-    "    globals()[f\"neuralplexer3_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n",
-    "        os.path.join(\n",
-    "            globals()[\"neuralplexer3_output_dir\"],\n",
-    "            f\"neuralplexer3_astex_diverse_outputs_{repeat_index}_relaxed\",\n",
-    "            \"bust_results.csv\",\n",
-    "        )\n",
-    "    )\n",
-    "\n",
     "# Mappings\n",
     "method_mapping = {\n",
     "    \"vina_p2rank\": \"P2Rank-Vina\",\n",
     "    \"diffdock\": \"DiffDock-L\",\n",
     "    \"dynamicbind\": \"DynamicBind\",\n",
+    "    \"neuralplexer\": \"NeuralPLexer\",\n",
     "    \"rfaa\": \"RoseTTAFold-AA\",\n",
     "    \"chai-lab_ss\": \"Chai-1-Single-Seq\",\n",
     "    \"chai-lab\": \"Chai-1\",\n",
     "    \"alphafold3_ss\": \"AF3-Single-Seq\",\n",
     "    \"alphafold3\": \"AF3\",\n",
-    "    \"neuralplexer3_ss\": \"NP3-Single-Seq\",\n",
-    "    \"neuralplexer3\": \"NP3\",\n",
     "}\n",
     "\n",
     "method_category_mapping = {\n",
     "    \"vina_p2rank\": \"Conventional blind\",\n",
     "    \"diffdock\": \"DL-based blind\",\n",
     "    \"dynamicbind\": \"DL-based blind\",\n",
+    "    \"neuralplexer\": \"DL-based blind\",\n",
     "    \"rfaa\": \"DL-based blind\",\n",
     "    \"chai-lab_ss\": \"DL-based blind\",\n",
     "    \"chai-lab\": \"DL-based blind\",\n",
     "    \"alphafold3_ss\": \"DL-based blind\",\n",
     "    \"alphafold3\": \"DL-based blind\",\n",
-    "    \"neuralplexer3_ss\": \"DL-based blind\",\n",
-    "    \"neuralplexer3\": \"DL-based blind\",\n",
     "}"
    ]
   },
@@ -522,7 +505,7 @@
     "colors = [\"#AB8042\", \"#FB8072\", \"#BEBADA\", \"#FCCDE5\"]\n",
     "\n",
     "bar_width = 0.5\n",
-    "r1 = [item - 0.5 for item in range(2, 22, 2)]\n",
+    "r1 = [item - 0.5 for item in range(2, 20, 2)]\n",
     "r2 = [x + bar_width for x in r1]\n",
     "r3 = [x + bar_width for x in r2]\n",
     "\n",
@@ -900,7 +883,7 @@
     "\n",
     "# add labels, titles, ticks, etc.\n",
     "axis.set_ylabel(\"Percentage of predictions\")\n",
-    "axis.set_xlim(1, 21 + 0.1)\n",
+    "axis.set_xlim(1, 19 + 0.1)\n",
     "axis.set_ylim(0, 150)\n",
     "\n",
     "assert all(\n",
@@ -1035,22 +1018,21 @@
     "axis.grid(axis=\"y\", color=\"#EAEFF8\")\n",
     "axis.set_axisbelow(True)\n",
     "\n",
-    "axis.set_xticks([2, 2 + 1e-3, 4, 6, 8, 10, 12, 12 + 1e-3, 14, 16, 18, 20])\n",
+    "axis.set_xticks([2, 2 + 1e-3, 4, 6, 8, 10, 11, 12, 14, 16, 18])\n",
     "axis.set_xticks([1 + 0.1], minor=True)\n",
     "axis.set_xticklabels(\n",
     "    [\n",
     "        \"P2Rank-Vina\",\n",
     "        \"Conventional blind\",\n",
     "        \"DiffDock-L\",\n",
     "        \"DynamicBind\",\n",
+    "        \"NeuralPLexer\",\n",
     "        \"RoseTTAFold-AA\",\n",
+    "        \"DL-based blind\",\n",
     "        \"Chai-1-Single-Seq\",\n",
     "        \"Chai-1\",\n",
-    "        \"DL-based blind\",\n",
     "        \"AF3-Single-Seq\",\n",
     "        \"AF3\",\n",
-    "        \"NP3-Single-Seq\",\n",
-    "        \"NP3\",\n",
     "    ]\n",
     ")\n",
     "\n",
@@ -1062,7 +1044,7 @@
     "axis.tick_params(axis=\"y\", which=\"major\", left=\"off\", right=\"on\", color=\"#EAEFF8\")\n",
     "\n",
     "# vertical alignment of xtick labels\n",
-    "vert_alignments = [0.0, -0.1, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0]\n",
+    "vert_alignments = [0.0, -0.1, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0]\n",
     "for tick, y in zip(axis.get_xticklabels(), vert_alignments):\n",
     "    tick.set_y(y)\n",
     "\n",
 
@@ -84,13 +84,12 @@
     "    \"vina_p2rank\",\n",
     "    \"diffdock\",\n",
     "    \"dynamicbind\",\n",
+    "    \"neuralplexer\",\n",
     "    \"rfaa\",\n",
     "    \"chai-lab_ss\",\n",
     "    \"chai-lab\",\n",
     "    \"alphafold3_ss\",\n",
     "    \"alphafold3\",\n",
-    "    \"neuralplexer3_ss\",\n",
-    "    \"neuralplexer3\",\n",
     "]\n",
     "max_num_repeats_per_method = (\n",
     "    1  # NOTE: Here, to simplify the analysis, we only consider the first run of each method\n",
@@ -110,13 +109,12 @@
     "    \"vina_p2rank\": \"P2Rank-Vina\",\n",
     "    \"diffdock\": \"DiffDock-L\",\n",
     "    \"dynamicbind\": \"DynamicBind\",\n",
+    "    \"neuralplexer\": \"NeuralPlexer\",\n",
     "    \"rfaa\": \"RoseTTAFold-AA\",\n",
     "    \"chai-lab_ss\": \"Chai-1-Single-Seq\",\n",
     "    \"chai-lab\": \"Chai-1\",\n",
     "    \"alphafold3_ss\": \"AF3-Single-Seq\",\n",
     "    \"alphafold3\": \"AF3\",\n",
-    "    \"neuralplexer3_ss\": \"NP3-Single-Seq\",\n",
-    "    \"neuralplexer3\": \"NP3\",\n",
     "}\n",
     "\n",
     "MAX_ASTEX_DIVERSE_ANALYSIS_PROTEIN_SEQUENCE_LENGTH = (\n",
 
@@ -49,13 +49,12 @@
     "vina_p2rank",
     "diffdock",
     "dynamicbind",
+    "neuralplexer",
     "rfaa",
     "chai-lab_ss",
     "chai-lab",
     "alphafold3_ss",
     "alphafold3",
-    "neuralplexer3_ss",
-    "neuralplexer3",
 ]
 max_num_repeats_per_method = (
     1  # NOTE: Here, to simplify the analysis, we only consider the first run of each method
@@ -75,13 +74,12 @@
     "vina_p2rank": "P2Rank-Vina",
     "diffdock": "DiffDock-L",
     "dynamicbind": "DynamicBind",
+    "neuralplexer": "NeuralPlexer",
     "rfaa": "RoseTTAFold-AA",
     "chai-lab_ss": "Chai-1-Single-Seq",
     "chai-lab": "Chai-1",
     "alphafold3_ss": "AF3-Single-Seq",
     "alphafold3": "AF3",
-    "neuralplexer3_ss": "NP3-Single-Seq",
-    "neuralplexer3": "NP3",
 }
 
 MAX_ASTEX_DIVERSE_ANALYSIS_PROTEIN_SEQUENCE_LENGTH = (