[MISC] set default model to qwen in example (#87)

- Set default model to Qwen2.5-0.5B-Instruct in example - Remove Ultravox 0.3 because it is not tested currently Signed-off-by: MengqingCao <cmq0113@163.com>
2025-02-18 17:09:59 +08:00
parent 8ea8523744
commit c18fb09b55
3 changed files with 4 additions and 31 deletions
--- a/examples/offline_distributed_inference_npu.py
+++ b/examples/offline_distributed_inference_npu.py
@@ -30,9 +30,9 @@ prompts = [
 sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
 # Create an LLM.
 llm = LLM(
-    model="facebook/opt-125m",
+    model="Qwen/Qwen2.5-0.5B-Instruct",
    tensor_parallel_size=2,
-    distributed_executor_backend="ray",
+    distributed_executor_backend="mp",
    trust_remote_code=True,
 )

--- a/examples/offline_inference_audio_language.py
+++ b/examples/offline_inference_audio_language.py
@@ -24,7 +24,6 @@ For most models, the prompt format should follow corresponding examples
 on HuggingFace model repository.
 """

-from transformers import AutoTokenizer
 from vllm import LLM, SamplingParams
 from vllm.assets.audio import AudioAsset
 from vllm.utils import FlexibleArgumentParser
@@ -41,28 +40,6 @@ question_per_audio_count = {
 # Unless specified, these settings have been tested to work on a single L4.


-# Ultravox 0.3
-def run_ultravox(question: str, audio_count: int):
-    model_name = "fixie-ai/ultravox-v0_3"
-
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    messages = [{
-        'role': 'user',
-        'content': "<|audio|>\n" * audio_count + question
-    }]
-    prompt = tokenizer.apply_chat_template(messages,
-                                           tokenize=False,
-                                           add_generation_prompt=True)
-
-    llm = LLM(model=model_name,
-              max_model_len=4096,
-              max_num_seqs=5,
-              trust_remote_code=True,
-              limit_mm_per_prompt={"audio": audio_count})
-    stop_token_ids = None
-    return llm, prompt, stop_token_ids
-
-
 # Qwen2-Audio
 def run_qwen2_audio(question: str, audio_count: int):
    model_name = "Qwen/Qwen2-Audio-7B-Instruct"
@@ -85,11 +62,7 @@ def run_qwen2_audio(question: str, audio_count: int):
    return llm, prompt, stop_token_ids


-# TODO (cmq): test ultravox
-model_example_map = {
-    # "ultravox": run_ultravox,
-    "qwen2_audio": run_qwen2_audio
-}
+model_example_map = {"qwen2_audio": run_qwen2_audio}


 def main(args):
--- a/examples/offline_inference_npu.py
+++ b/examples/offline_inference_npu.py
@@ -29,7 +29,7 @@ prompts = [
 # Create a sampling params object.
 sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
 # Create an LLM.
-llm = LLM(model="facebook/opt-125m")
+llm = LLM(model="Qwen/Qwen2.5-0.5B-Instruct")

 # Generate texts from the prompts.
 outputs = llm.generate(prompts, sampling_params)