[Benchmark] Download model from modelscope (#634)
### What this PR does / why we need it? - Run benchmark scripts will Download model from modelscope Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -264,6 +264,10 @@ main() {
|
|||||||
# turn of the reporting of the status of each request, to clean up the terminal output
|
# turn of the reporting of the status of each request, to clean up the terminal output
|
||||||
export VLLM_LOG_LEVEL="WARNING"
|
export VLLM_LOG_LEVEL="WARNING"
|
||||||
|
|
||||||
|
# set env
|
||||||
|
export VLLM_USE_MODELSCOPE="True"
|
||||||
|
export HF_ENDPOINT="https://hf-mirror.com"
|
||||||
|
|
||||||
# prepare for benchmarking
|
# prepare for benchmarking
|
||||||
cd benchmarks || exit 1
|
cd benchmarks || exit 1
|
||||||
get_benchmarks_scripts
|
get_benchmarks_scripts
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
{
|
{
|
||||||
"test_name": "latency_llama8B_tp1",
|
"test_name": "latency_llama8B_tp1",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
|
||||||
"tensor_parallel_size": 1,
|
"tensor_parallel_size": 1,
|
||||||
"load_format": "dummy",
|
"load_format": "dummy",
|
||||||
"num_iters_warmup": 5,
|
"num_iters_warmup": 5,
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
"inf"
|
"inf"
|
||||||
],
|
],
|
||||||
"server_parameters": {
|
"server_parameters": {
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
|
||||||
"tensor_parallel_size": 1,
|
"tensor_parallel_size": 1,
|
||||||
"swap_space": 16,
|
"swap_space": 16,
|
||||||
"disable_log_stats": "",
|
"disable_log_stats": "",
|
||||||
@@ -16,7 +16,7 @@
|
|||||||
"load_format": "dummy"
|
"load_format": "dummy"
|
||||||
},
|
},
|
||||||
"client_parameters": {
|
"client_parameters": {
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
|
||||||
"backend": "vllm",
|
"backend": "vllm",
|
||||||
"dataset_name": "sharegpt",
|
"dataset_name": "sharegpt",
|
||||||
"dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
|
"dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
{
|
{
|
||||||
"test_name": "throughput_llama8B_tp1",
|
"test_name": "throughput_llama8B_tp1",
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"model": "meta-llama/Llama-3.1-8B-Instruct",
|
"model": "LLM-Research/Meta-Llama-3.1-8B-Instruct",
|
||||||
"tensor_parallel_size": 1,
|
"tensor_parallel_size": 1,
|
||||||
"load_format": "dummy",
|
"load_format": "dummy",
|
||||||
"dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
|
"dataset_path": "./ShareGPT_V3_unfiltered_cleaned_split.json",
|
||||||
|
|||||||
Reference in New Issue
Block a user