[Benchmark] Download model from modelscope (#634)

### What this PR does / why we need it? - Run benchmark scripts will Download model from modelscope Signed-off-by: wangli <wangli858794774@gmail.com>
2025-04-24 14:48:24 +08:00
parent 05bdcbeae4
commit 866ce7168c
4 changed files with 8 additions and 4 deletions
--- a/benchmarks/scripts/run-performance-benchmarks.sh
+++ b/benchmarks/scripts/run-performance-benchmarks.sh
@@ -264,6 +264,10 @@ main() {
  # turn of the reporting of the status of each request, to clean up the terminal output
  export VLLM_LOG_LEVEL="WARNING"

+  # set env
+  export VLLM_USE_MODELSCOPE="True"
+  export HF_ENDPOINT="https://hf-mirror.com"
+
  # prepare for benchmarking
  cd benchmarks || exit 1
  get_benchmarks_scripts
--- a/benchmarks/tests/latency-tests.json
+++ b/benchmarks/tests/latency-tests.json
@@ -2,7 +2,7 @@
  {
    "test_name": "latency_llama8B_tp1",
    "parameters": {
-      "model": "meta-llama/Llama-3.1-8B-Instruct",
+      "model": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
      "tensor_parallel_size": 1,
      "load_format": "dummy",
      "num_iters_warmup": 5,
--- a/benchmarks/tests/serving-tests.json
+++ b/benchmarks/tests/serving-tests.json
@@ -8,7 +8,7 @@
      "inf"
    ],
    "server_parameters": {
-      "model": "meta-llama/Llama-3.1-8B-Instruct",
+      "model": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
      "tensor_parallel_size": 1,
      "swap_space": 16,
      "disable_log_stats": "",
@@ -16,7 +16,7 @@
      "load_format": "dummy"
    },
    "client_parameters": {
-      "model": "meta-llama/Llama-3.1-8B-Instruct",
+      "model": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
      "backend": "vllm",
      "dataset_name": "sharegpt",
      "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
--- a/benchmarks/tests/throughput-tests.json
+++ b/benchmarks/tests/throughput-tests.json
@@ -2,7 +2,7 @@
  {
    "test_name": "throughput_llama8B_tp1",
    "parameters": {
-      "model": "meta-llama/Llama-3.1-8B-Instruct",
+      "model": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
      "tensor_parallel_size": 1,
      "load_format": "dummy",
      "dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",