Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ coverage.xml
htmlcov/
pytest-*.xml
*.DS_Store
localdata/

# Project-generated local data/artifacts
*.egg-info/
Expand Down
6 changes: 3 additions & 3 deletions scripts/analysis/pullevaldata.ps1
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
$Remote = "<USER>@monsoon.hpc.nau.edu"
$RemoteBase = "/scratch/<USER>/evals/cocci_eval/seeded_runs"
$LocalBase = "localdata/evals/cocci_eval/seeded_runs"
$Remote = "jmh2338@monsoon.hpc.nau.edu"
$RemoteBase = "/scratch/jmh2338/evals/bkp_eval/seeded_runs"
$LocalBase = "data/evals/bkp_eval/seeded_runs"

$Models = @("flagship1", "flagship2")
$Sets = 1..10 | ForEach-Object { "set_{0:D2}" -f $_ }
Expand Down
12 changes: 7 additions & 5 deletions scripts/hpc/evalffnnsweep.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
usage() {
echo "Usage: $0 <flagship1_manifest> <flagship2_manifest>"
echo ""
echo "Submits seeded Cocci evaluation jobs for two flagship ensemble manifests."
echo "Submits seeded evaluation jobs for two flagship ensemble manifests."
echo "Each set uses shared prepared/embedding/label artifacts and runs:"
echo " predict + eval + peptide compare"
echo ""
Expand All @@ -14,7 +14,8 @@ usage() {
echo " HPC_DIR default: /home/\$USER/test"
echo " SHARED default: /scratch/\$USER/evals/cocci_eval/combined"
echo " OUT_BASE default: /scratch/\$USER/evals/cocci_eval/seeded_runs"
echo " DATA_DIR default: /scratch/\$USER/data/CWP"
echo " DATA_DIR default: /scratch/\$USER/psp_data/CWP"
echo " EVAL_SCRIPT default: cocci_eval_pipeline.py (set bkp_eval_pipeline.py for BKP)"
echo " SET_START default: 1"
echo " SET_END default: 10"
echo " DRY_RUN default: 0 (1 = print sbatch commands only)"
Expand All @@ -36,7 +37,8 @@ MODEL2="$2"
HPC_DIR="${HPC_DIR:-/home/$USER/test}"
SHARED="${SHARED:-/scratch/$USER/evals/cocci_eval/combined}"
OUT_BASE="${OUT_BASE:-/scratch/$USER/evals/cocci_eval/seeded_runs}"
DATA_DIR="${DATA_DIR:-/scratch/$USER/data/CWP}"
DATA_DIR="${DATA_DIR:-/scratch/$USER/psp_data/CWP}"
EVAL_SCRIPT="${EVAL_SCRIPT:-cocci_eval_pipeline.py}"
SET_START="${SET_START:-1}"
SET_END="${SET_END:-10}"
DRY_RUN="${DRY_RUN:-0}"
Expand Down Expand Up @@ -69,13 +71,13 @@ submit_one() {

mkdir -p "${run_root}/prepared" "${run_root}/embeddings/artifacts" "${run_root}/labels"

# Reuse existing evaluation inputs from the shared combined run.
# Reuse existing evaluation inputs from the shared combined run root.
ln -sfn "${SHARED}/prepared/eval_metadata.tsv" "${run_root}/prepared/eval_metadata.tsv"
ln -sfn "${SHARED}/prepared/eval_proteins.fasta" "${run_root}/prepared/eval_proteins.fasta"
ln -sfn "${SHARED}/embeddings/artifacts/pts" "${run_root}/embeddings/artifacts/pts"
ln -sfn "${SHARED}/labels/labels_eval.pt" "${run_root}/labels/labels_eval.pt"

local exports="ALL,DATA_DIR=${DATA_DIR},EVAL_MODE=combined,ENSEMBLE_SET_INDEX=${set_index},EXPECTED_SET_INDEX=${set_index},SKIP_IF_EXISTS=0,RUN_PREP=0,RUN_EMBED=0,RUN_LABELS=0,RUN_PREDICT=1,RUN_EVAL=1,RUN_COMPARE=1,EMIT_FOLD_METRICS=1,INCLUDE_CURVES=1,PLOT_DIR=${run_root}/evaluation/plots"
local exports="ALL,DATA_DIR=${DATA_DIR},EVAL_SCRIPT=${EVAL_SCRIPT},EVAL_MODE=combined,ENSEMBLE_SET_INDEX=${set_index},EXPECTED_SET_INDEX=${set_index},SKIP_IF_EXISTS=0,RUN_PREP=0,RUN_EMBED=0,RUN_LABELS=0,RUN_PREDICT=1,RUN_EVAL=1,RUN_COMPARE=1,EMIT_FOLD_METRICS=1,INCLUDE_CURVES=1,PLOT_DIR=${run_root}/evaluation/plots"

local cmd=(
sbatch
Expand Down
23 changes: 18 additions & 5 deletions scripts/hpc/evaluateffnn.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ usage() {
echo ""
echo "Optional environment variables:"
echo " USE_SRUN default: 1 (set 0 to run without srun)"
echo " DATA_DIR default: /scratch/\$USER/data/CWP"
echo " DATA_DIR default: /scratch/\$USER/psp_data/CWP"
echo " EVAL_MODE default: combined (reactive|nonreactive|combined)"
echo " RUN_ROOT default: /scratch/\$USER/evals/\${SLURM_JOB_NAME}/\${EVAL_MODE}"
echo " SKIP_IF_EXISTS default: 1 (skip stage when output sentinel already exists)"
Expand All @@ -38,6 +38,7 @@ usage() {
echo " RUN_PREDICT default: 1"
echo " RUN_EVAL default: 1"
echo " RUN_COMPARE default: 1"
echo " EVAL_SCRIPT default: cocci_eval_pipeline.py (resolved from HPC working directory)"
echo ""
echo "Embedding/prediction/eval knobs:"
echo " MODEL_NAME default: esm2_t33_650M_UR50D"
Expand Down Expand Up @@ -85,7 +86,7 @@ fi

MODEL_ARTIFACT="$1"

DATA_DIR="${DATA_DIR:-/scratch/$USER/data/CWP}"
DATA_DIR="${DATA_DIR:-/scratch/$USER/psp_data/CWP}"
EVAL_MODE="${EVAL_MODE:-combined}"
RUN_ROOT_DEFAULT="/scratch/$USER/evals/${SLURM_JOB_NAME:-evaluate_ffnn}/${EVAL_MODE}"
RUN_ROOT="${2:-${RUN_ROOT:-$RUN_ROOT_DEFAULT}}"
Expand All @@ -97,6 +98,7 @@ RUN_LABELS="${RUN_LABELS:-1}"
RUN_PREDICT="${RUN_PREDICT:-1}"
RUN_EVAL="${RUN_EVAL:-1}"
RUN_COMPARE="${RUN_COMPARE:-1}"
EVAL_SCRIPT="${EVAL_SCRIPT:-cocci_eval_pipeline.py}"

MODEL_NAME="${MODEL_NAME:-esm2_t33_650M_UR50D}"
MAX_TOKENS="${MAX_TOKENS:-1022}"
Expand Down Expand Up @@ -173,11 +175,22 @@ run_launcher() {
fi
}

if [[ "${EVAL_SCRIPT}" = /* ]]; then
EVAL_SCRIPT_PATH="${EVAL_SCRIPT}"
else
EVAL_SCRIPT_PATH="${PWD}/${EVAL_SCRIPT}"
fi
if [ ! -f "${EVAL_SCRIPT_PATH}" ]; then
echo "[evaluateffnn] ERROR: EVAL_SCRIPT not found in HPC_DIR (${PWD}): ${EVAL_SCRIPT}"
exit 1
fi

echo "[evaluateffnn] model_artifact=${MODEL_ARTIFACT}"
echo "[evaluateffnn] data_dir=${DATA_DIR}"
echo "[evaluateffnn] eval_mode=${EVAL_MODE}"
echo "[evaluateffnn] run_root=${RUN_ROOT}"
echo "[evaluateffnn] ensemble_set_index=${ENSEMBLE_SET_INDEX}"
echo "[evaluateffnn] eval_script=${EVAL_SCRIPT_PATH}"

if [ -n "${EXPECTED_SET_INDEX}" ] && [ "${EXPECTED_SET_INDEX}" != "${ENSEMBLE_SET_INDEX}" ]; then
echo "[evaluateffnn] ERROR: EXPECTED_SET_INDEX=${EXPECTED_SET_INDEX} does not match ENSEMBLE_SET_INDEX=${ENSEMBLE_SET_INDEX}"
Expand All @@ -188,8 +201,8 @@ if [ "${RUN_PREP}" -eq 1 ]; then
if [ "${SKIP_IF_EXISTS}" -eq 1 ] && [ -s "${PREP_SUMMARY_JSON}" ]; then
echo "[prepare] skip existing: ${PREP_SUMMARY_JSON}"
else
echo "[prepare] building reduced Cocci inputs"
python -u cocci_eval_pipeline.py prepare \
echo "[prepare] building reduced evaluation inputs"
python -u "${EVAL_SCRIPT_PATH}" prepare \
--data-dir "${DATA_DIR}" \
--output-dir "${PREP_DIR}" \
--mode "${EVAL_MODE}"
Expand Down Expand Up @@ -334,7 +347,7 @@ if [ "${RUN_COMPARE}" -eq 1 ]; then
echo "[compare] skip existing: ${COMPARE_JSON}"
else
echo "[compare] running peptide-level predicted-ones comparison"
python -u cocci_eval_pipeline.py compare \
python -u "${EVAL_SCRIPT_PATH}" compare \
--prediction-fasta "${PRED_FASTA}" \
--metadata-tsv "${PREP_META}" \
--label-shard "${LABEL_SHARD}" \
Expand Down
Loading
Loading