sglangv0.5.2 & support Qwen3-Next-80B-A3B-Instruct

This commit is contained in:
maxiao1
2025-09-13 17:00:20 +08:00
commit 118f1fc726
2037 changed files with 515371 additions and 0 deletions

View File

@@ -0,0 +1,7 @@
tasks:
- name: sglang-benchmark
server_cmd: python3 -m sglang.launch_server --model-path meta-llama/Llama-3.1-8B-Instruct --disable-radix-cache
client_cmd: python3 -m sglang.bench_serving --backend sglang --dataset-name sharegpt --request-rate 16
- name: vllm-benchmark
server_cmd: python3 -m vllm.entrypoints.openai.api_server --model meta-llama/Llama-3.1-8B-Instruct --disable-log-requests
client_cmd: python3 -m sglang.bench_serving --backend vllm --dataset-name sharegpt --request-rate 16