diff --git a/.github/configs/amd-master.yaml b/.github/configs/amd-master.yaml index c6d06c2e1..98c0d4bc2 100644 --- a/.github/configs/amd-master.yaml +++ b/.github/configs/amd-master.yaml @@ -1861,8 +1861,8 @@ dsr1-fp4-mi355x-sglang-disagg: - "DECODE_NODES=1" - "DECODE_MTP_SIZE=0" -dsr1-fp4-mi355x-sglang-disagg-mtp: - image: lmsysorg/sglang-rocm:v0.5.12-rocm720-mi35x-20260519 +dsr1-fp4-mi355x-sglang-disagg-1k1k-mtp: + image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260529 model: amd/DeepSeek-R1-0528-MXFP4-v2 model-prefix: dsr1 runner: mi355x-disagg @@ -1970,7 +1970,19 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: additional-settings: - "DECODE_NODES=1" - "DECODE_MTP_SIZE=1" + +dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp: + image: lmsysorg/sglang-rocm:v0.5.12.post1-rocm720-mi35x-20260529 + model: amd/DeepSeek-R1-0528-MXFP4-v2 + model-prefix: dsr1 + runner: mi355x-disagg + precision: fp4 + framework: sglang-disagg + multinode: true + disagg: true + scenarios: + fixed-seq-len: - isl: 8192 osl: 1024 search-space: @@ -2030,11 +2042,11 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: dp-attn: false additional-settings: - "DECODE_NODES=2" - - "DECODE_MTP_SIZE=2" + - "DECODE_MTP_SIZE=3" # 1*DEP8 + 1*DEP8 - spec-decoding: "mtp" - conc-list: [ 128, 512 ] + conc-list: [ 640, 512 ] prefill: num-worker: 1 tp: 8 @@ -2049,11 +2061,11 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: dp-attn: true additional-settings: - "DECODE_NODES=1" - - "DECODE_MTP_SIZE=1" + - "DECODE_MTP_SIZE=3" # 1*DEP8 + 1*DEP8 - spec-decoding: "mtp" - conc-list: [ 64, 256 ] + conc-list: [ 288, 256 ] prefill: num-worker: 1 tp: 8 @@ -2068,7 +2080,46 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: dp-attn: true additional-settings: - "DECODE_NODES=1" - - "DECODE_MTP_SIZE=1" + - "DECODE_MTP_SIZE=3" + + + # 1*DEP8 + 1*DEP8 + - spec-decoding: "mtp" + conc-list: [ 144, 128 ] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + - "PREFILL_NODES=1" + decode: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + - "DECODE_NODES=1" + - "DECODE_MTP_SIZE=3" + + # 1*DEP8 + 1*DEP8 + - spec-decoding: "mtp" + conc-list: [ 72, 64 ] + prefill: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + - "PREFILL_NODES=1" + decode: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + additional-settings: + - "DECODE_NODES=1" + - "DECODE_MTP_SIZE=3" # 2*DEP8 + 1*DEP8 - spec-decoding: "mtp" @@ -2088,7 +2139,7 @@ dsr1-fp4-mi355x-sglang-disagg-mtp: additional-settings: - "DECODE_NODES=1" - "DECODE_MTP_SIZE=1" - + # DSv4-Pro FP4 on MI355X via SGLang. Uses a rocm720 mi35x image built off the # amd/deepseek_v4 branch in sgl-project/sglang; the SHA is encoded in the diff --git a/benchmarks/multi_node/amd_utils/env.sh b/benchmarks/multi_node/amd_utils/env.sh index 8a141eaeb..71d2653bd 100755 --- a/benchmarks/multi_node/amd_utils/env.sh +++ b/benchmarks/multi_node/amd_utils/env.sh @@ -124,9 +124,11 @@ else # ========================================================================= export SGLANG_USE_AITER=1 + export AITER_LOG_LEVEL=ERROR export SGLANG_MORI_DISPATCH_DTYPE=auto - export SGLANG_MORI_FP8_COMB=true + export MORI_COMBINE_DTYPE_PREFILL=fp8_direct_cast + export MORI_COMBINE_DTYPE_DECODE=fp8 export SGLANG_MORI_QP_PER_TRANSFER=4 export SGLANG_MORI_NUM_WORKERS=4 export MORI_IO_SQ_BACKOFF_TIMEOUT_US=50000 diff --git a/perf-changelog.yaml b/perf-changelog.yaml index c6ddcbe0d..5b7d56cd1 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -3201,6 +3201,13 @@ - "MoRI conn.py overlay (48e459bd) via job.slurm; launcher qwen3.5_fp4_mi355x_sglang-disagg.sh" pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1579 +- config-keys: + - dsr1-fp4-mi355x-sglang-disagg-8k1k-mtp + description: + - "Bump the image to May 26" + - "Add conc 128/256 new sweep point" + pr-link: https://github.com/SemiAnalysisAI/InferenceX/pull/1584 + - config-keys: - glm5-fp8-gb300-dynamo-sglang description: