BioinfoMachineLearning
diff --git a/‎.gitignore‎
Lines changed: 6 additions & 1 deletion b/‎.gitignore‎
Lines changed: 6 additions & 1 deletion
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 16 additions & 16 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 16 additions & 16 deletions
diff --git a/‎CHANGELOG.md‎
Lines changed: 26 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 26 additions & 0 deletions
@@ -147,6 +147,7 @@ dmypy.json
 # PoseBench
 configs/local/default.yaml
 /*cache_dir/
+/*alignment_viz/
 /casp15_ligand_scoring/
 /data/
 /ensemble_generation_scripts/
@@ -163,6 +164,7 @@ configs/local/default.yaml
 
 # Forks
 /workdir/
+/forks/alphafold3/*prediction_outputs/
 /forks/chai-lab/chai-lab/
 /forks/chai-lab/prediction_inputs/
 /forks/chai-lab/prediction_outputs/
@@ -174,10 +176,13 @@ configs/local/default.yaml
 /forks/DynamicBind/workdir/
 /forks/FABind/ckpt/best_model.bin
 /forks/FABind/FABind/
+/forks/FlowDock/FlowDock/
+/forks/FlowDock/checkpoints/
 /forks/NeuralPLexer/NeuralPLexer/
 /forks/NeuralPLexer/**/neuralplexermodels*
+/forks/NeuralPLexer*/prediction_inputs/
 /forks/P2Rank/
-/forks/*/inference*/
+/forks/*/*inference*/
 /forks/RoseTTAFold-All-Atom/blast-2.2.26
 /forks/RoseTTAFold-All-Atom/rf2aa/config/inference/*_rfaa_inference.yaml
 /forks/RoseTTAFold-All-Atom/csblast-2.2.3
 
@@ -5,7 +5,7 @@ exclude: "^forks/"
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v5.0.0
     hooks:
       # list of supported hooks: https://pre-commit.com/hooks.html
       - id: trailing-whitespace
@@ -18,32 +18,32 @@ repos:
       - id: check-toml
       - id: check-case-conflict
       - id: check-added-large-files
-        args: ["--maxkb=15000"]
+        args: ["--maxkb=40000"]
 
   # python code formatting
   - repo: https://github.com/psf/black
-    rev: 23.1.0
+    rev: 24.10.0
     hooks:
       - id: black
         args: [--line-length, "99"]
 
   # python import sorting
   - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
+    rev: 5.13.2
     hooks:
       - id: isort
         args: ["--profile", "black", "--filter-files"]
 
   # python upgrading syntax to newer version
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.3.1
+    rev: v3.19.1
     hooks:
       - id: pyupgrade
         args: [--py38-plus]
 
   # python docstring formatting
   - repo: https://github.com/myint/docformatter
-    rev: v1.7.4
+    rev: v1.7.5
     hooks:
       - id: docformatter
         args:
@@ -57,7 +57,7 @@ repos:
 
   # python docstring coverage checking
   - repo: https://github.com/econchick/interrogate
-    rev: 1.5.0 # or master if you're bold
+    rev: 1.7.0 # or master if you're bold
     hooks:
       - id: interrogate
         args:
@@ -74,7 +74,7 @@ repos:
 
   # python check (PEP8), programming errors and code complexity
   - repo: https://github.com/PyCQA/flake8
-    rev: 6.0.0
+    rev: 7.1.1
     hooks:
       - id: flake8
         args:
@@ -88,28 +88,28 @@ repos:
 
   # python security linter
   - repo: https://github.com/PyCQA/bandit
-    rev: "1.7.5"
+    rev: "1.8.0"
     hooks:
       - id: bandit
         args: ["-s", "B101"]
 
   # yaml formatting
   - repo: https://github.com/pre-commit/mirrors-prettier
-    rev: v3.0.0-alpha.6
+    rev: v4.0.0-alpha.8
     hooks:
       - id: prettier
         types: [yaml]
         exclude: "environment.yaml"
 
   # shell scripts linter
   - repo: https://github.com/shellcheck-py/shellcheck-py
-    rev: v0.9.0.2
+    rev: v0.10.0.1
     hooks:
       - id: shellcheck
 
   # md formatting
   - repo: https://github.com/executablebooks/mdformat
-    rev: 0.7.16
+    rev: 0.7.21
     hooks:
       - id: mdformat
         args: ["--number"]
@@ -122,22 +122,22 @@ repos:
 
   # word spelling linter
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.4
+    rev: v2.3.0
     hooks:
       - id: codespell
         args:
-          - --skip=logs/**,data/**,*.ipynb,posebench/utils/data_utils.py,posebench/utils/residue_utils.py,posebench/data/components/protein_fasta_preparation.py,posebench/models/minimize_energy.py,posebench/data/components/create_casp15_ensemble_input_csv.py,posebench/analysis/casp15_ligand_scoring/casp_parser.py
+          - --skip=logs/**,data/**,*.ipynb,posebench/utils/data_utils.py,posebench/utils/residue_utils.py,posebench/data/components/fasta_preparation.py,posebench/models/minimize_energy.py,posebench/data/components/create_casp15_ensemble_input_csv.py,posebench/analysis/casp15_ligand_scoring/casp_parser.py,*Components-smiles-stereo-oe.smi,notebooks/pdb_reports/transferase/*
           # - --ignore-words-list=abc,def
 
   # jupyter notebook cell output clearing
   - repo: https://github.com/kynan/nbstripout
-    rev: 0.6.1
+    rev: 0.8.1
     hooks:
       - id: nbstripout
 
   # jupyter notebook linting
   - repo: https://github.com/nbQA-dev/nbQA
-    rev: 1.6.3
+    rev: 1.9.1
     hooks:
       - id: nbqa-black
         args: ["--line-length=99"]
 
@@ -1,3 +1,29 @@
+### 0.6.0 - 02/09/2025
+
+**Additions**:
+
+- Added new baseline methods (AlphaFold 3, Chai-1 with multiple sequence alignments (MSAs))
+- Added new binding site-focused implementation of `complex_alignment.py` based on PyMOL's `align` command, which in many cases yields 3x better docking evaluation scores for baseline methods
+- Added new script for analyzing baseline methods' protein conformational changes w.r.t. input (e.g., AlphaFold) protein structures and the corresponding reference (crystal) protein structures
+- Added the new centroid RMSD and **PLIF-EMD/WM** metrics (n.b., see new arXiv preprint for more details)
+- Added a failure mode analysis notebook (n.b., see new arXiv preprint for more details)
+
+**Changes**:
+
+- Introducing **DockGen-E**, a new version of the DockGen benchmark dataset featuring enhanced biomolecular context for docking and co-folding predictions - namely, now all DockGen complexes represent the first (biologically relevant) bioassembly of the corresponding PDB structure
+- For the single-ligand datasets (i.e., Astex Diverse, PoseBusters Benchmark, and DockGen), now providing each baseline method with primary *and cofactor* ligand SMILES strings for prediction, to enhance the biomolecular context of these methods' predicted structures - as a result, for these single-ligand datasets, now the predicted ligand *most similar* to the primary ligand (in terms of both Tanimoto and structural similarity) is selected for scoring (which adds an additional layer of challenges for baseline methods)
+- Updated Chai-1's inference code to commit `44375d5d4ea44c0b5b7204519e63f40b063e4a7c`, and ran it also with standardized (paired) MSAs
+- Replaced all AlphaFold 3 server predictions of each dataset's protein structures with predictions from AlphaFold 3's local inference code
+
+**Deprecations**:
+
+- Pocket-only benchmarking has been deprecated
+
+**Results**:
+
+- With all the above changed in place, simplified, re-ran, and re-analyzed all baseline methods for each benchmark dataset, and updated the baseline predictions and datasets (now containing standardized MSAs) hosted on Zenodo
+- **NOTE**: The updated arXiv preprint should be publicly available by 02/12/2025
+
 ### 0.5.0 - 09/30/2024
 
 - Added results with AlphaFold 3 predicted structures (now the default)