From 866ce7168cc2df2b35d16de21261b9b2a317447b Mon Sep 17 00:00:00 2001 From: Li Wang Date: Thu, 24 Apr 2025 14:48:24 +0800 Subject: [PATCH] [Benchmark] Download model from modelscope (#634) ### What this PR does / why we need it? - Run benchmark scripts will Download model from modelscope Signed-off-by: wangli --- benchmarks/scripts/run-performance-benchmarks.sh | 4 ++++ benchmarks/tests/latency-tests.json | 2 +- benchmarks/tests/serving-tests.json | 4 ++-- benchmarks/tests/throughput-tests.json | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/benchmarks/scripts/run-performance-benchmarks.sh b/benchmarks/scripts/run-performance-benchmarks.sh index 489c59e..b0e3121 100644 --- a/benchmarks/scripts/run-performance-benchmarks.sh +++ b/benchmarks/scripts/run-performance-benchmarks.sh @@ -264,6 +264,10 @@ main() { # turn of the reporting of the status of each request, to clean up the terminal output export VLLM_LOG_LEVEL="WARNING" + # set env + export VLLM_USE_MODELSCOPE="True" + export HF_ENDPOINT="https://hf-mirror.com" + # prepare for benchmarking cd benchmarks || exit 1 get_benchmarks_scripts diff --git a/benchmarks/tests/latency-tests.json b/benchmarks/tests/latency-tests.json index 0033bf5..a9b951f 100644 --- a/benchmarks/tests/latency-tests.json +++ b/benchmarks/tests/latency-tests.json @@ -2,7 +2,7 @@ { "test_name": "latency_llama8B_tp1", "parameters": { - "model": "meta-llama/Llama-3.1-8B-Instruct", + "model": "LLM-Research/Meta-Llama-3.1-8B-Instruct", "tensor_parallel_size": 1, "load_format": "dummy", "num_iters_warmup": 5, diff --git a/benchmarks/tests/serving-tests.json b/benchmarks/tests/serving-tests.json index 5eb9ac0..fe200b4 100644 --- a/benchmarks/tests/serving-tests.json +++ b/benchmarks/tests/serving-tests.json @@ -8,7 +8,7 @@ "inf" ], "server_parameters": { - "model": "meta-llama/Llama-3.1-8B-Instruct", + "model": "LLM-Research/Meta-Llama-3.1-8B-Instruct", "tensor_parallel_size": 1, "swap_space": 16, "disable_log_stats": "", @@ -16,7 +16,7 @@ "load_format": "dummy" }, "client_parameters": { - "model": "meta-llama/Llama-3.1-8B-Instruct", + "model": "LLM-Research/Meta-Llama-3.1-8B-Instruct", "backend": "vllm", "dataset_name": "sharegpt", "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json", diff --git a/benchmarks/tests/throughput-tests.json b/benchmarks/tests/throughput-tests.json index 16a8cd1..41f8ab2 100644 --- a/benchmarks/tests/throughput-tests.json +++ b/benchmarks/tests/throughput-tests.json @@ -2,7 +2,7 @@ { "test_name": "throughput_llama8B_tp1", "parameters": { - "model": "meta-llama/Llama-3.1-8B-Instruct", + "model": "LLM-Research/Meta-Llama-3.1-8B-Instruct", "tensor_parallel_size": 1, "load_format": "dummy", "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",