xc-llm-ascend/examples/run_dp_attention_etp16_benmark.sh

#!/bin/bash
# Concurrency array
concurrency_array=(48)
#best rate
rate_array=(0.7)

# Result file
result_file="benchmark_results.txt"
echo "Benchmark Results" > $result_file
echo "===================" >> $result_file

# Loop through all combinations
for concurrency in "${concurrency_array[@]}"; do
    for rate in "${rate_array[@]}"; do
        echo "Testing with concurrency=$concurrency, rate=$rate"
        echo "" >> $result_file
        echo "Concurrency: $concurrency, Request Rate: $rate" >> $result_file
        echo "-------------------" >> $result_file

        # Run benchmark test
        python /mnt/deepseek/vllm/benchmarks/benchmark_serving.py \
            --backend vllm \
            --trust-remote-code \
            --model auto \
            --tokenizer /mnt/deepseek/DeepSeek-R1-W8A8-VLLM \
            --dataset-name random \
            --random-input-len 4096 \
            --random-output-len 1536 \
            --ignore-eos \
            --num-prompts 400 \
            --max-concurrency $concurrency \
            --request-rate $rate \
            --metric-percentiles 90 \
            --base-url http://localhost:8006 2>&1 | tee -a $result_file

        # Wait for system cool down
        sleep 30
    done
done

# Analyze results
echo "Analysis Results" > analysis_results.txt
echo "=================" >> analysis_results.txt

# Extract and analyze TPOT data
echo "TPOT Analysis:" >> analysis_results.txt
grep "Mean TPOT" $result_file | awk -F':' '{
    printf "Concurrency %s, Rate %s: %s ms\n", $1, $2, $3
}' >> analysis_results.txt

# Extract and analyze throughput data
echo -e "\nThroughput Analysis:" >> analysis_results.txt
grep "Output token throughput" $result_file | awk -F':' '{
    printf "Concurrency %s, Rate %s: %s tokens/s\n", $1, $2, $3
}' >> analysis_results.txt

echo "Testing completed. Results saved in $result_file and analysis in analysis_results.txt"