Skip to content

Commit ed2337b

Browse files
committed
Analyze RMSD plots further
1 parent 886aa22 commit ed2337b

1 file changed

Lines changed: 15 additions & 0 deletions

File tree

src/data/components/plot_dataset_rmsd.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ def plot_dataset_rmsd(
105105
filtered_ids_to_keep_file: Optional[str] = None,
106106
filtered_ids_to_skip: Optional[Set[str]] = None,
107107
is_casp_dataset: bool = False,
108+
accurate_rmsd_threshold: float = 4.0,
109+
accurate_tm_score_threshold: float = 0.7,
108110
):
109111
"""Plot the RMSD between predicted and reference protein structures in a given dataset.
110112
@@ -117,6 +119,8 @@ def plot_dataset_rmsd(
117119
:param filtered_ids_to_keep_file: File containing IDs of sequences to keep.
118120
:param filtered_ids_to_skip: Set of IDs of sequences to skip.
119121
:param is_casp_dataset: Whether the dataset is a CASP dataset.
122+
:param accurate_rmsd_threshold: RMSD threshold for accurate predictions.
123+
:param accurate_tm_score_threshold: TM-score threshold for accurate predictions.
120124
"""
121125

122126
# Filter out sequences that are not in the filtered_ids_file
@@ -181,6 +185,17 @@ def plot_dataset_rmsd(
181185

182186
# Plot the RMSD values
183187

188+
accurate_predictions_percent = (
189+
dataset_df[
190+
(dataset_df["RMSD"] < accurate_rmsd_threshold)
191+
& (dataset_df["TM-score"] > accurate_tm_score_threshold)
192+
].shape[0]
193+
/ dataset_df.shape[0]
194+
)
195+
logging.info(
196+
f"For the {dataset_name} dataset, {accurate_predictions_percent * 100:.2f}% of the predictions have RMSD < {accurate_rmsd_threshold} and TM-score > {accurate_tm_score_threshold}."
197+
)
198+
184199
plot_dir = Path(output_dir) / ("public_plots" if is_casp_dataset else "plots")
185200
plot_dir.mkdir(exist_ok=True)
186201

0 commit comments

Comments
 (0)