forked from EngineX-Cambricon/enginex-mlu370-vllm
add qwen3
This commit is contained in:
59
vllm-v0.6.2/tests/benchmark/test_benchmark_latency.py
Normal file
59
vllm-v0.6.2/tests/benchmark/test_benchmark_latency.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import numpy as np
|
||||
from vllm import LLM, SamplingParams
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
def test_generating_csv():
|
||||
'''
|
||||
test generating csv
|
||||
'''
|
||||
# contents of this test is brought from benchmark_latency.py
|
||||
|
||||
csv_file = "output.csv"
|
||||
if os.path.isfile(csv_file):
|
||||
os.remove("output.csv")
|
||||
assert not os.path.isfile(csv_file)
|
||||
|
||||
os.environ['VLLM_LATENCY_DEBUG'] = "1"
|
||||
model_path = "/data/vllm/sq_per_tensor_per_channel/Llama-2-7b-hf"
|
||||
tp = 1
|
||||
batch_size = 4
|
||||
input_len = 128
|
||||
output_len = 5
|
||||
quantization = "smoothquant"
|
||||
llm = LLM(model=model_path,
|
||||
tokenizer=model_path,
|
||||
quantization=quantization,
|
||||
tensor_parallel_size=tp,
|
||||
trust_remote_code=True,
|
||||
enforce_eager=True)
|
||||
sampling_params = SamplingParams(
|
||||
n=1,
|
||||
temperature=1.0,
|
||||
top_p=1.0,
|
||||
ignore_eos=True,
|
||||
max_tokens=output_len,
|
||||
)
|
||||
dummy_prompt_token_ids = np.random.randint(10000,
|
||||
size=(batch_size,
|
||||
input_len))
|
||||
dummy_prompt_token_ids = dummy_prompt_token_ids.tolist()
|
||||
llm.generate(prompt_token_ids=dummy_prompt_token_ids,
|
||||
sampling_params=sampling_params,
|
||||
use_tqdm=False)
|
||||
llm.get_metrics(0, # args.num_iters_warmup,
|
||||
False, #args.only_average,
|
||||
input_len, #args.input_len,
|
||||
output_len, #args.output_len,
|
||||
tp, #args.tensor_parallel_size,
|
||||
quantization, #args.quantization
|
||||
llm.dump_info)
|
||||
assert os.path.isfile(csv_file)
|
||||
df = pd.read_csv(csv_file)
|
||||
assert df['batch size'].item() == batch_size
|
||||
assert df['model'].item() == model_path
|
||||
assert df['input len'].item() == input_len
|
||||
assert df['output len'].item() == output_len
|
||||
assert df['tp'].item() == tp
|
||||
assert df['weight dtype'].item() == "SmoothQuant-int8"
|
||||
os.remove(csv_file)
|
||||
Reference in New Issue
Block a user