批量跑EvalScope推理性能压测
#!/bin/bash
# 并发列表(你可以自己改)
PARALLEL_LIST=(131 132 133 134)
# 固定参数
URL="http://10.63.114.139:8080/v1/chat/completions"
MODEL="/data/Qwen3-32B"
TOKENIZER="/home/tzc/quanzhongwenjian/Qwen3-32B"
for PARALLEL in "${PARALLEL_LIST[@]}"
do
NUMBER=$((PARALLEL * 5))
LOG_FILE="qwen3-32b_p${PARALLEL}_n${NUMBER}.log"
echo "========================================="
echo "Running test: parallel=${PARALLEL}, number=${NUMBER}"
echo "Log: ${LOG_FILE}"
echo "========================================="
evalscope perf \
--api openai \
--url ${URL} \
--model ${MODEL} \
--tokenizer-path ${TOKENIZER} \
--dataset random \
--number ${NUMBER} \
--parallel ${PARALLEL} \
--rate 10 \
--min-prompt-length 2048 \
--max-prompt-length 2048 \
--min-tokens 2048 \
--max-tokens 2048 \
--extra-args '{"ignore_eos":true, "temperature":0.0, "top_p":1.0, "skip_special_tokens": true}' \
--stream \
--no-test-connection \
--prefix-length 0 \
> ${LOG_FILE} 2>&1
echo "Finished: parallel=${PARALLEL}"
done
echo "All tests done."
执行脚本
nohup bash run_perf.sh > run_all.log 2>&1 &
结果整理脚本
root@# cat parse_perf_txt1.sh
#!/bin/bash
OUTPUT_FILE="result.txt"
# 表头(对齐 + 空格分隔)
printf "%-10s %-10s %-12s %-10s %-15s %-30s %-30s\n" \
"parallel" "number" "TTFT(ms)" "TPOT(ms)" "请求频率(req/s)" "单卡整体吞吐(tok/s)" "单卡输出吞吐(tok/s)" > $OUTPUT_FILE
for file in qwen3-32b_p*_n*.log; do
echo "Processing $file..."
# 只处理跑完的日志
if ! grep -q "Save the summary to:" "$file"; then
echo "Skip $file (not finished)"
continue
fi
parallel=$(echo $file | sed -E 's/.*_p([0-9]+)_n([0-9]+).*/\1/')
number=$(echo $file | sed -E 's/.*_p([0-9]+)_n([0-9]+).*/\2/')
# 提取数值(只取数字,规避 | 等符号)
TTFT=$(grep "Average time to first token" "$file" | tail -1 | grep -oE "[0-9]+\.[0-9]+")
TPOT=$(grep "Average time per output token" "$file" | tail -1 | grep -oE "[0-9]+\.[0-9]+")
REQ=$(grep "Request throughput" "$file" | tail -1 | grep -oE "[0-9]+\.[0-9]+")
TOTAL_TPS=$(grep "Total token throughput" "$file" | tail -1 | grep -oE "[0-9]+\.[0-9]+")
OUTPUT_TPS=$(grep "Output token throughput" "$file" | tail -1 | grep -oE "[0-9]+\.[0-9]+")
# 判空
if [[ -z "$TTFT" || -z "$TPOT" || -z "$REQ" ]]; then
echo "Skip $file (parse failed)"
continue
fi
# 单位换算
TTFT_MS=$(awk -v x=$TTFT 'BEGIN{printf "%.2f", x*1000}')
TPOT_MS=$(awk -v x=$TPOT 'BEGIN{printf "%.2f", x*1000}')
SINGLE_TOTAL=$(awk -v x=$TOTAL_TPS 'BEGIN{printf "%.2f", x/4}')
SINGLE_OUTPUT=$(awk -v x=$OUTPUT_TPS 'BEGIN{printf "%.2f", x/4}')
# 去掉所有可能的内部空格(保险)
TOTAL_TPS=$(echo $TOTAL_TPS | tr -d ' ')
OUTPUT_TPS=$(echo $OUTPUT_TPS | tr -d ' ')
# 输出(列之间是对齐空格,字段内部无空格)
printf "%-10s %-10s %-12s %-10s %-15s %-30s %-30s\n" \
"$parallel" "$number" "$TTFT_MS" "$TPOT_MS" "$REQ" \
"${SINGLE_TOTAL}(=${TOTAL_TPS}/4)" \
"${SINGLE_OUTPUT}(=${OUTPUT_TPS}/4)" >> $OUTPUT_FILE
done
echo "Done! Output -> $OUTPUT_FILE"
在上一个脚本执行目录执行:
root@# bash parse_perf_txt1.sh
Processing qwen3-32b_p131_n655.log...
Processing qwen3-32b_p132_n660.log...
Processing qwen3-32b_p133_n665.log...
Processing qwen3-32b_p134_n670.log...
Done! Output -> result.txt
root@# cat result.txt
parallel number TTFT(ms) TPOT(ms) 请求频率(req/s) 单卡整体吞吐(tok/s) 单卡输出吞吐(tok/s)
131 655 3827.90 51.30 1.1122 1172.03(=4688.11/4) 569.46(=2277.85/4)
132 660 4428.90 51.20 1.1134 1173.24(=4692.98/4) 570.05(=2280.21/4)
133 665 5395.30 51.20 1.1136 1173.34(=4693.35/4) 570.15(=2280.61/4)
134 670 6239.60 51.00 1.1194 1179.44(=4717.77/4) 573.14(=2292.56/4)