[MISC] set default model to qwen in example (#87)

- Set default model to Qwen2.5-0.5B-Instruct in example - Remove Ultravox 0.3 because it is not tested currently Signed-off-by: MengqingCao <cmq0113@163.com>
2025-02-18 17:09:59 +08:00
parent 8ea8523744
commit c18fb09b55
3 changed files with 4 additions and 31 deletions
--- a/examples/offline_distributed_inference_npu.py
+++ b/examples/offline_distributed_inference_npu.py
@@ -30,9 +30,9 @@ prompts = [
 sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
 # Create an LLM.
 llm = LLM(
-    model="facebook/opt-125m",
+    model="Qwen/Qwen2.5-0.5B-Instruct",
    tensor_parallel_size=2,
-    distributed_executor_backend="ray",
+    distributed_executor_backend="mp",
    trust_remote_code=True,
 )
--- a/examples/offline_inference_audio_language.py
+++ b/examples/offline_inference_audio_language.py
@@ -24,7 +24,6 @@ For most models, the prompt format should follow corresponding examples
 on HuggingFace model repository.
 """
 from transformers import AutoTokenizer
 from vllm import LLM, SamplingParams
 from vllm.assets.audio import AudioAsset
 from vllm.utils import FlexibleArgumentParser
@@ -41,28 +40,6 @@ question_per_audio_count = {
 # Unless specified, these settings have been tested to work on a single L4.
 # Ultravox 0.3
 def run_ultravox(question: str, audio_count: int):
    model_name = "fixie-ai/ultravox-v0_3"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    messages = [{
        'role': 'user',
        'content': "<|audio|>\n" * audio_count + question
    }]
    prompt = tokenizer.apply_chat_template(messages,
                                           tokenize=False,
                                           add_generation_prompt=True)
    llm = LLM(model=model_name,
              max_model_len=4096,
              max_num_seqs=5,
              trust_remote_code=True,
              limit_mm_per_prompt={"audio": audio_count})
    stop_token_ids = None
    return llm, prompt, stop_token_ids
 # Qwen2-Audio
 def run_qwen2_audio(question: str, audio_count: int):
    model_name = "Qwen/Qwen2-Audio-7B-Instruct"
@@ -85,11 +62,7 @@ def run_qwen2_audio(question: str, audio_count: int):
    return llm, prompt, stop_token_ids
-# TODO (cmq): test ultravox
+model_example_map = {"qwen2_audio": run_qwen2_audio}
 model_example_map = {
    # "ultravox": run_ultravox,
    "qwen2_audio": run_qwen2_audio
 }
 def main(args):
--- a/examples/offline_inference_npu.py
+++ b/examples/offline_inference_npu.py
@@ -29,7 +29,7 @@ prompts = [
 # Create a sampling params object.
 sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
 # Create an LLM.
-llm = LLM(model="facebook/opt-125m")
+llm = LLM(model="Qwen/Qwen2.5-0.5B-Instruct")
 # Generate texts from the prompts.
 outputs = llm.generate(prompts, sampling_params)