[MISC] set default model to qwen in example (#87)

- Set default model to Qwen2.5-0.5B-Instruct in example
- Remove Ultravox 0.3 because it is not tested currently

Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
Mengqing Cao
2025-02-18 17:09:59 +08:00
committed by GitHub
parent 8ea8523744
commit c18fb09b55
3 changed files with 4 additions and 31 deletions

View File

@@ -30,9 +30,9 @@ prompts = [
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
# Create an LLM.
llm = LLM(
model="facebook/opt-125m",
model="Qwen/Qwen2.5-0.5B-Instruct",
tensor_parallel_size=2,
distributed_executor_backend="ray",
distributed_executor_backend="mp",
trust_remote_code=True,
)

View File

@@ -24,7 +24,6 @@ For most models, the prompt format should follow corresponding examples
on HuggingFace model repository.
"""
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams
from vllm.assets.audio import AudioAsset
from vllm.utils import FlexibleArgumentParser
@@ -41,28 +40,6 @@ question_per_audio_count = {
# Unless specified, these settings have been tested to work on a single L4.
# Ultravox 0.3
def run_ultravox(question: str, audio_count: int):
model_name = "fixie-ai/ultravox-v0_3"
tokenizer = AutoTokenizer.from_pretrained(model_name)
messages = [{
'role': 'user',
'content': "<|audio|>\n" * audio_count + question
}]
prompt = tokenizer.apply_chat_template(messages,
tokenize=False,
add_generation_prompt=True)
llm = LLM(model=model_name,
max_model_len=4096,
max_num_seqs=5,
trust_remote_code=True,
limit_mm_per_prompt={"audio": audio_count})
stop_token_ids = None
return llm, prompt, stop_token_ids
# Qwen2-Audio
def run_qwen2_audio(question: str, audio_count: int):
model_name = "Qwen/Qwen2-Audio-7B-Instruct"
@@ -85,11 +62,7 @@ def run_qwen2_audio(question: str, audio_count: int):
return llm, prompt, stop_token_ids
# TODO (cmq): test ultravox
model_example_map = {
# "ultravox": run_ultravox,
"qwen2_audio": run_qwen2_audio
}
model_example_map = {"qwen2_audio": run_qwen2_audio}
def main(args):

View File

@@ -29,7 +29,7 @@ prompts = [
# Create a sampling params object.
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
# Create an LLM.
llm = LLM(model="facebook/opt-125m")
llm = LLM(model="Qwen/Qwen2.5-0.5B-Instruct")
# Generate texts from the prompts.
outputs = llm.generate(prompts, sampling_params)