[Benchmark] Download model from modelscope (#634)
### What this PR does / why we need it? - Run benchmark scripts will Download model from modelscope Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -264,6 +264,10 @@ main() {
|
||||
# turn of the reporting of the status of each request, to clean up the terminal output
|
||||
export VLLM_LOG_LEVEL="WARNING"
|
||||
|
||||
# set env
|
||||
export VLLM_USE_MODELSCOPE="True"
|
||||
export HF_ENDPOINT="https://hf-mirror.com"
|
||||
|
||||
# prepare for benchmarking
|
||||
cd benchmarks || exit 1
|
||||
get_benchmarks_scripts
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
{
|
||||
"test_name": "latency_llama8B_tp1",
|
||||
"parameters": {
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"model": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
|
||||
"tensor_parallel_size": 1,
|
||||
"load_format": "dummy",
|
||||
"num_iters_warmup": 5,
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
"inf"
|
||||
],
|
||||
"server_parameters": {
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"model": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
|
||||
"tensor_parallel_size": 1,
|
||||
"swap_space": 16,
|
||||
"disable_log_stats": "",
|
||||
@@ -16,7 +16,7 @@
|
||||
"load_format": "dummy"
|
||||
},
|
||||
"client_parameters": {
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"model": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
|
||||
"backend": "vllm",
|
||||
"dataset_name": "sharegpt",
|
||||
"dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
{
|
||||
"test_name": "throughput_llama8B_tp1",
|
||||
"parameters": {
|
||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
||||
"model": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
|
||||
"tensor_parallel_size": 1,
|
||||
"load_format": "dummy",
|
||||
"dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
|
||||
|
||||
Reference in New Issue
Block a user