Skip to content

Commit be68709

Browse files
committed
Update methods
1 parent cba896f commit be68709

18 files changed

Lines changed: 145 additions & 268 deletions

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ configs/local/default.yaml
180180
/forks/FlowDock/checkpoints/
181181
/forks/NeuralPLexer/NeuralPLexer/
182182
/forks/NeuralPLexer/**/neuralplexermodels*
183-
/forks/NeuralPLexer3/prediction_inputs/
183+
/forks/NeuralPLexer*/prediction_inputs/
184184
/forks/P2Rank/
185185
/forks/*/*inference*/
186186
/forks/RoseTTAFold-All-Atom/blast-2.2.26

CHANGELOG.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
### 0.6.0 - TBD
22

3-
- Added new baseline methods (AlphaFold 3, NeuralPLexer3, Chai-1 with multiple sequence alignments (MSAs))
3+
- Added new baseline methods (AlphaFold 3, Chai-1 with multiple sequence alignments (MSAs))
44
- Added new binding site-focused implementation of `complex_alignment.py` based on PyMOL's `align` command, which in many cases yields 3x better docking evaluation scores for baseline methods
55
- Added new script for analyzing baseline methods' protein conformational changes w.r.t. input (e.g., AlphaFold) protein structures and the corresponding reference (crystal) protein structures
66
- Added the new centroid RMSD and PLIF-EMD/WM metrics
77
- Added a failure mode analysis notebook
88
- Introducing DockGen-E, a new version of the DockGen benchmark dataset featuring enhanced biomolecular context for docking and co-folding predictions - namely, now all DockGen complexes represent the first (biologically relevant) bioassembly of the corresponding PDB structure
99
- For the single-ligand datasets (i.e., Astex Diverse, PoseBusters Benchmark, and DockGen), now providing each baseline method with primary *and cofactor* ligand SMILES strings for prediction, to enhance the biomolecular context of these methods' predicted structures - as a result, for these single-ligand datasets, now the predicted ligand *most similar* to the primary ligand (in terms of both Tanimoto and structural similarity) is selected for scoring
10-
- Updated Chai-1's inference code to commit `44375d5d4ea44c0b5b7204519e63f40b063e4a7c`, and ran it also with NeuralPLexer3's (paired) MSAs
10+
- Updated Chai-1's inference code to commit `44375d5d4ea44c0b5b7204519e63f40b063e4a7c`, and ran it also with standardized (paired) MSAs
1111
- Replaced all AlphaFold 3 server predictions of each dataset's protein structures with predictions from AlphaFold 3's local inference code
1212
- Pocket-only benchmarking has been deprecated
1313
- With all the above changed in place, simplified, re-ran, and re-analyzed all baseline methods for each benchmark dataset, and updated the baseline predictions and datasets (now containing standardized MSAs) hosted on Zenodo

README.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,10 @@ rm diffdock_benchmark_method_predictions.tar.gz
201201
wget https://zenodo.org/records/14629652/files/dynamicbind_benchmark_method_predictions.tar.gz
202202
tar -xzf dynamicbind_benchmark_method_predictions.tar.gz
203203
rm dynamicbind_benchmark_method_predictions.tar.gz
204+
# NeuralPLexer predictions and results
205+
wget https://zenodo.org/records/14629652/files/neuralplexer_benchmark_method_predictions.tar.gz
206+
tar -xzf neuralplexer_benchmark_method_predictions.tar.gz
207+
rm neuralplexer_benchmark_method_predictions.tar.gz
204208
# RoseTTAFold-All-Atom predictions and results
205209
wget https://zenodo.org/records/14629652/files/rfaa_benchmark_method_predictions.tar.gz
206210
tar -xzf rfaa_benchmark_method_predictions.tar.gz
@@ -213,10 +217,6 @@ rm chai_benchmark_method_predictions.tar.gz
213217
wget https://zenodo.org/records/14629652/files/af3_benchmark_method_predictions.tar.gz
214218
tar -xzf af3_benchmark_method_predictions.tar.gz
215219
rm af3_benchmark_method_predictions.tar.gz
216-
# NeuralPLexer3 predictions and results
217-
wget https://zenodo.org/records/14629652/files/neuralplexer3_benchmark_method_predictions.tar.gz
218-
tar -xzf neuralplexer3_benchmark_method_predictions.tar.gz
219-
rm neuralplexer3_benchmark_method_predictions.tar.gz
220220
```
221221

222222
### Downloading benchmark method interactions

notebooks/astex_diverse_inference_results_plotting.ipynb

Lines changed: 24 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,12 @@
6464
" \"vina_p2rank\",\n",
6565
" \"diffdock\",\n",
6666
" \"dynamicbind\",\n",
67+
" \"neuralplexer\",\n",
6768
" \"rfaa\",\n",
6869
" \"chai-lab_ss\",\n",
6970
" \"chai-lab\",\n",
7071
" \"alphafold3_ss\",\n",
7172
" \"alphafold3\",\n",
72-
" \"neuralplexer3_ss\",\n",
73-
" \"neuralplexer3\",\n",
7473
"]\n",
7574
"max_num_repeats_per_method = 3\n",
7675
"\n",
@@ -80,10 +79,10 @@
8079
"globals()[\"dynamicbind_output_dir\"] = os.path.join(\n",
8180
" \"..\", \"forks\", \"DynamicBind\", \"inference\", \"outputs\", \"results\"\n",
8281
")\n",
82+
"globals()[\"neuralplexer_output_dir\"] = os.path.join(\"..\", \"forks\", \"NeuralPLexer\", \"inference\")\n",
8383
"globals()[\"rfaa_output_dir\"] = os.path.join(\"..\", \"forks\", \"RoseTTAFold-All-Atom\", \"inference\")\n",
8484
"globals()[\"chai-lab_output_dir\"] = os.path.join(\"..\", \"forks\", \"chai-lab\", \"inference\")\n",
8585
"globals()[\"alphafold3_output_dir\"] = os.path.join(\"..\", \"forks\", \"alphafold3\", \"inference\")\n",
86-
"globals()[\"neuralplexer3_output_dir\"] = os.path.join(\"..\", \"forks\", \"NeuralPLexer3\", \"inference\")\n",
8786
"for repeat_index in range(1, max_num_repeats_per_method + 1):\n",
8887
" # PLIF metrics\n",
8988
" globals()[f\"astex_plif_metrics_csv_filepath_{repeat_index}\"] = \"astex_diverse_plif_metrics.csv\"\n",
@@ -128,6 +127,20 @@
128127
" )\n",
129128
" )\n",
130129
"\n",
130+
" # NeuralPLexer results\n",
131+
" globals()[f\"neuralplexer_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n",
132+
" globals()[\"neuralplexer_output_dir\"],\n",
133+
" f\"neuralplexer_astex_diverse_outputs_{repeat_index}\",\n",
134+
" \"bust_results.csv\",\n",
135+
" )\n",
136+
" globals()[f\"neuralplexer_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n",
137+
" os.path.join(\n",
138+
" globals()[\"neuralplexer_output_dir\"],\n",
139+
" f\"neuralplexer_astex_diverse_outputs_{repeat_index}_relaxed\",\n",
140+
" \"bust_results.csv\",\n",
141+
" )\n",
142+
" )\n",
143+
"\n",
131144
" # RoseTTAFold-All-Atom results\n",
132145
" globals()[f\"rfaa_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n",
133146
" globals()[\"rfaa_output_dir\"],\n",
@@ -192,59 +205,29 @@
192205
" \"bust_results.csv\",\n",
193206
" )\n",
194207
"\n",
195-
" # NeuralPLexer3 (Single-Seq) results\n",
196-
" globals()[f\"neuralplexer3_ss_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n",
197-
" globals()[\"neuralplexer3_output_dir\"],\n",
198-
" f\"neuralplexer3_ss_astex_diverse_outputs_{repeat_index}\",\n",
199-
" \"bust_results.csv\",\n",
200-
" )\n",
201-
" globals()[f\"neuralplexer3_ss_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n",
202-
" os.path.join(\n",
203-
" globals()[\"neuralplexer3_output_dir\"],\n",
204-
" f\"neuralplexer3_ss_astex_diverse_outputs_{repeat_index}_relaxed\",\n",
205-
" \"bust_results.csv\",\n",
206-
" )\n",
207-
" )\n",
208-
"\n",
209-
" # NeuralPLexer3 results\n",
210-
" globals()[f\"neuralplexer3_astex_bust_results_csv_filepath_{repeat_index}\"] = os.path.join(\n",
211-
" globals()[\"neuralplexer3_output_dir\"],\n",
212-
" f\"neuralplexer3_astex_diverse_outputs_{repeat_index}\",\n",
213-
" \"bust_results.csv\",\n",
214-
" )\n",
215-
" globals()[f\"neuralplexer3_astex_relaxed_bust_results_csv_filepath_{repeat_index}\"] = (\n",
216-
" os.path.join(\n",
217-
" globals()[\"neuralplexer3_output_dir\"],\n",
218-
" f\"neuralplexer3_astex_diverse_outputs_{repeat_index}_relaxed\",\n",
219-
" \"bust_results.csv\",\n",
220-
" )\n",
221-
" )\n",
222-
"\n",
223208
"# Mappings\n",
224209
"method_mapping = {\n",
225210
" \"vina_p2rank\": \"P2Rank-Vina\",\n",
226211
" \"diffdock\": \"DiffDock-L\",\n",
227212
" \"dynamicbind\": \"DynamicBind\",\n",
213+
" \"neuralplexer\": \"NeuralPLexer\",\n",
228214
" \"rfaa\": \"RoseTTAFold-AA\",\n",
229215
" \"chai-lab_ss\": \"Chai-1-Single-Seq\",\n",
230216
" \"chai-lab\": \"Chai-1\",\n",
231217
" \"alphafold3_ss\": \"AF3-Single-Seq\",\n",
232218
" \"alphafold3\": \"AF3\",\n",
233-
" \"neuralplexer3_ss\": \"NP3-Single-Seq\",\n",
234-
" \"neuralplexer3\": \"NP3\",\n",
235219
"}\n",
236220
"\n",
237221
"method_category_mapping = {\n",
238222
" \"vina_p2rank\": \"Conventional blind\",\n",
239223
" \"diffdock\": \"DL-based blind\",\n",
240224
" \"dynamicbind\": \"DL-based blind\",\n",
225+
" \"neuralplexer\": \"DL-based blind\",\n",
241226
" \"rfaa\": \"DL-based blind\",\n",
242227
" \"chai-lab_ss\": \"DL-based blind\",\n",
243228
" \"chai-lab\": \"DL-based blind\",\n",
244229
" \"alphafold3_ss\": \"DL-based blind\",\n",
245230
" \"alphafold3\": \"DL-based blind\",\n",
246-
" \"neuralplexer3_ss\": \"DL-based blind\",\n",
247-
" \"neuralplexer3\": \"DL-based blind\",\n",
248231
"}"
249232
]
250233
},
@@ -522,7 +505,7 @@
522505
"colors = [\"#AB8042\", \"#FB8072\", \"#BEBADA\", \"#FCCDE5\"]\n",
523506
"\n",
524507
"bar_width = 0.5\n",
525-
"r1 = [item - 0.5 for item in range(2, 22, 2)]\n",
508+
"r1 = [item - 0.5 for item in range(2, 20, 2)]\n",
526509
"r2 = [x + bar_width for x in r1]\n",
527510
"r3 = [x + bar_width for x in r2]\n",
528511
"\n",
@@ -900,7 +883,7 @@
900883
"\n",
901884
"# add labels, titles, ticks, etc.\n",
902885
"axis.set_ylabel(\"Percentage of predictions\")\n",
903-
"axis.set_xlim(1, 21 + 0.1)\n",
886+
"axis.set_xlim(1, 19 + 0.1)\n",
904887
"axis.set_ylim(0, 150)\n",
905888
"\n",
906889
"assert all(\n",
@@ -1035,22 +1018,21 @@
10351018
"axis.grid(axis=\"y\", color=\"#EAEFF8\")\n",
10361019
"axis.set_axisbelow(True)\n",
10371020
"\n",
1038-
"axis.set_xticks([2, 2 + 1e-3, 4, 6, 8, 10, 12, 12 + 1e-3, 14, 16, 18, 20])\n",
1021+
"axis.set_xticks([2, 2 + 1e-3, 4, 6, 8, 10, 11, 12, 14, 16, 18])\n",
10391022
"axis.set_xticks([1 + 0.1], minor=True)\n",
10401023
"axis.set_xticklabels(\n",
10411024
" [\n",
10421025
" \"P2Rank-Vina\",\n",
10431026
" \"Conventional blind\",\n",
10441027
" \"DiffDock-L\",\n",
10451028
" \"DynamicBind\",\n",
1029+
" \"NeuralPLexer\",\n",
10461030
" \"RoseTTAFold-AA\",\n",
1031+
" \"DL-based blind\",\n",
10471032
" \"Chai-1-Single-Seq\",\n",
10481033
" \"Chai-1\",\n",
1049-
" \"DL-based blind\",\n",
10501034
" \"AF3-Single-Seq\",\n",
10511035
" \"AF3\",\n",
1052-
" \"NP3-Single-Seq\",\n",
1053-
" \"NP3\",\n",
10541036
" ]\n",
10551037
")\n",
10561038
"\n",
@@ -1062,7 +1044,7 @@
10621044
"axis.tick_params(axis=\"y\", which=\"major\", left=\"off\", right=\"on\", color=\"#EAEFF8\")\n",
10631045
"\n",
10641046
"# vertical alignment of xtick labels\n",
1065-
"vert_alignments = [0.0, -0.1, 0.0, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0]\n",
1047+
"vert_alignments = [0.0, -0.1, 0.0, 0.0, 0.0, 0.0, -0.1, 0.0, 0.0, 0.0, 0.0]\n",
10661048
"for tick, y in zip(axis.get_xticklabels(), vert_alignments):\n",
10671049
" tick.set_y(y)\n",
10681050
"\n",

notebooks/astex_method_interaction_analysis_plotting.ipynb

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,12 @@
8484
" \"vina_p2rank\",\n",
8585
" \"diffdock\",\n",
8686
" \"dynamicbind\",\n",
87+
" \"neuralplexer\",\n",
8788
" \"rfaa\",\n",
8889
" \"chai-lab_ss\",\n",
8990
" \"chai-lab\",\n",
9091
" \"alphafold3_ss\",\n",
9192
" \"alphafold3\",\n",
92-
" \"neuralplexer3_ss\",\n",
93-
" \"neuralplexer3\",\n",
9493
"]\n",
9594
"max_num_repeats_per_method = (\n",
9695
" 1 # NOTE: Here, to simplify the analysis, we only consider the first run of each method\n",
@@ -110,13 +109,12 @@
110109
" \"vina_p2rank\": \"P2Rank-Vina\",\n",
111110
" \"diffdock\": \"DiffDock-L\",\n",
112111
" \"dynamicbind\": \"DynamicBind\",\n",
112+
" \"neuralplexer\": \"NeuralPlexer\",\n",
113113
" \"rfaa\": \"RoseTTAFold-AA\",\n",
114114
" \"chai-lab_ss\": \"Chai-1-Single-Seq\",\n",
115115
" \"chai-lab\": \"Chai-1\",\n",
116116
" \"alphafold3_ss\": \"AF3-Single-Seq\",\n",
117117
" \"alphafold3\": \"AF3\",\n",
118-
" \"neuralplexer3_ss\": \"NP3-Single-Seq\",\n",
119-
" \"neuralplexer3\": \"NP3\",\n",
120118
"}\n",
121119
"\n",
122120
"MAX_ASTEX_DIVERSE_ANALYSIS_PROTEIN_SEQUENCE_LENGTH = (\n",

notebooks/astex_method_interaction_analysis_plotting.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,13 +49,12 @@
4949
"vina_p2rank",
5050
"diffdock",
5151
"dynamicbind",
52+
"neuralplexer",
5253
"rfaa",
5354
"chai-lab_ss",
5455
"chai-lab",
5556
"alphafold3_ss",
5657
"alphafold3",
57-
"neuralplexer3_ss",
58-
"neuralplexer3",
5958
]
6059
max_num_repeats_per_method = (
6160
1 # NOTE: Here, to simplify the analysis, we only consider the first run of each method
@@ -75,13 +74,12 @@
7574
"vina_p2rank": "P2Rank-Vina",
7675
"diffdock": "DiffDock-L",
7776
"dynamicbind": "DynamicBind",
77+
"neuralplexer": "NeuralPlexer",
7878
"rfaa": "RoseTTAFold-AA",
7979
"chai-lab_ss": "Chai-1-Single-Seq",
8080
"chai-lab": "Chai-1",
8181
"alphafold3_ss": "AF3-Single-Seq",
8282
"alphafold3": "AF3",
83-
"neuralplexer3_ss": "NP3-Single-Seq",
84-
"neuralplexer3": "NP3",
8583
}
8684

8785
MAX_ASTEX_DIVERSE_ANALYSIS_PROTEIN_SEQUENCE_LENGTH = (

0 commit comments

Comments
 (0)