Skip to content

Commit bf89858

Browse files
authored
Merge pull request #229 from VectorInstitute/bugfix/misc
Update server address write logic
2 parents c1aef46 + f8a7f2c commit bf89858

1 file changed

Lines changed: 18 additions & 8 deletions

File tree

vec_inf/client/_slurm_templates.py

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -186,10 +186,15 @@ class SlurmScriptTemplate(TypedDict):
186186
],
187187
"write_to_json": [
188188
'\njson_path="{log_dir}/{model_name}.$SLURM_JOB_ID/{model_name}.$SLURM_JOB_ID.json"',
189-
'jq --arg server_addr "$server_address" \\',
190-
" '. + {{\"server_address\": $server_addr}}' \\",
191-
' "$json_path" > temp.json \\',
192-
' && mv temp.json "$json_path"',
189+
'tmp_json="${{json_path}}.tmp.$$"',
190+
"for _attempt in 1 2 3 4 5; do",
191+
' jq --arg server_addr "$server_address" \\',
192+
" '. + {{\"server_address\": $server_addr}}' \\",
193+
' "$json_path" > "$tmp_json" \\',
194+
' && mv "$tmp_json" "$json_path" \\',
195+
" && break",
196+
" sleep 2",
197+
"done",
193198
],
194199
"launch_cmd": {
195200
"vllm": [
@@ -303,10 +308,15 @@ class BatchModelLaunchScriptTemplate(TypedDict):
303308
"write_to_json": [
304309
"het_job_id=$(($SLURM_JOB_ID+{het_group_id}))",
305310
'json_path="{log_dir}/{slurm_job_name}.$het_job_id/{model_name}.$het_job_id.json"',
306-
'jq --arg server_addr "$server_address" \\',
307-
" '. + {{\"server_address\": $server_addr}}' \\",
308-
' "$json_path" > temp_{model_name}.json \\',
309-
' && mv temp_{model_name}.json "$json_path"\n',
311+
'tmp_json="${{json_path}}.tmp.$$"',
312+
"for _attempt in 1 2 3 4 5; do",
313+
' jq --arg server_addr "$server_address" \\',
314+
" '. + {{\"server_address\": $server_addr}}' \\",
315+
' "$json_path" > "$tmp_json" \\',
316+
' && mv "$tmp_json" "$json_path" \\',
317+
" && break",
318+
" sleep 2",
319+
"done\n",
310320
],
311321
"container_command": f"{CONTAINER_MODULE_NAME} exec --nv --containall {{image_path}} \\",
312322
"launch_cmd": {

0 commit comments

Comments
 (0)