add qwen3

2026-02-04 17:22:39 +08:00
parent d1c0f68ab4
commit 8511fe8530
1932 changed files with 300426 additions and 0 deletions
--- a/vllm-v0.6.2/tests/benchmark/test_benchmark_latency.py
+++ b/vllm-v0.6.2/tests/benchmark/test_benchmark_latency.py
@@ -0,0 +1,59 @@
+import numpy as np
+from vllm import LLM, SamplingParams
+import os
+import pandas as pd
+
+def test_generating_csv():
+    '''
+    test generating csv
+    '''
+    # contents of this test is brought from benchmark_latency.py
+
+    csv_file = "output.csv"
+    if os.path.isfile(csv_file):
+        os.remove("output.csv")
+    assert not os.path.isfile(csv_file)
+
+    os.environ['VLLM_LATENCY_DEBUG'] = "1"
+    model_path = "/data/vllm/sq_per_tensor_per_channel/Llama-2-7b-hf"
+    tp = 1
+    batch_size = 4
+    input_len = 128
+    output_len = 5
+    quantization = "smoothquant"
+    llm = LLM(model=model_path,
+              tokenizer=model_path,
+              quantization=quantization,
+              tensor_parallel_size=tp,
+              trust_remote_code=True,
+              enforce_eager=True)
+    sampling_params = SamplingParams(
+        n=1,
+        temperature=1.0,
+        top_p=1.0,
+        ignore_eos=True,
+        max_tokens=output_len,
+    )
+    dummy_prompt_token_ids = np.random.randint(10000,
+                                               size=(batch_size,
+                                                     input_len))
+    dummy_prompt_token_ids = dummy_prompt_token_ids.tolist()
+    llm.generate(prompt_token_ids=dummy_prompt_token_ids,
+                 sampling_params=sampling_params,
+                 use_tqdm=False)
+    llm.get_metrics(0, # args.num_iters_warmup,
+                    False, #args.only_average,
+                    input_len, #args.input_len,
+                    output_len, #args.output_len,
+                    tp, #args.tensor_parallel_size,
+                    quantization, #args.quantization
+                    llm.dump_info)
+    assert os.path.isfile(csv_file)
+    df = pd.read_csv(csv_file)
+    assert df['batch size'].item() == batch_size
+    assert df['model'].item() == model_path
+    assert df['input len'].item() == input_len
+    assert df['output len'].item() == output_len
+    assert df['tp'].item() == tp
+    assert df['weight dtype'].item() == "SmoothQuant-int8"
+    os.remove(csv_file)