[MISC] set default model to qwen in example (#87)
- Set default model to Qwen2.5-0.5B-Instruct in example - Remove Ultravox 0.3 because it is not tested currently Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
@@ -30,9 +30,9 @@ prompts = [
|
||||
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
|
||||
# Create an LLM.
|
||||
llm = LLM(
|
||||
model="facebook/opt-125m",
|
||||
model="Qwen/Qwen2.5-0.5B-Instruct",
|
||||
tensor_parallel_size=2,
|
||||
distributed_executor_backend="ray",
|
||||
distributed_executor_backend="mp",
|
||||
trust_remote_code=True,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,7 +24,6 @@ For most models, the prompt format should follow corresponding examples
|
||||
on HuggingFace model repository.
|
||||
"""
|
||||
|
||||
from transformers import AutoTokenizer
|
||||
from vllm import LLM, SamplingParams
|
||||
from vllm.assets.audio import AudioAsset
|
||||
from vllm.utils import FlexibleArgumentParser
|
||||
@@ -41,28 +40,6 @@ question_per_audio_count = {
|
||||
# Unless specified, these settings have been tested to work on a single L4.
|
||||
|
||||
|
||||
# Ultravox 0.3
|
||||
def run_ultravox(question: str, audio_count: int):
|
||||
model_name = "fixie-ai/ultravox-v0_3"
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||
messages = [{
|
||||
'role': 'user',
|
||||
'content': "<|audio|>\n" * audio_count + question
|
||||
}]
|
||||
prompt = tokenizer.apply_chat_template(messages,
|
||||
tokenize=False,
|
||||
add_generation_prompt=True)
|
||||
|
||||
llm = LLM(model=model_name,
|
||||
max_model_len=4096,
|
||||
max_num_seqs=5,
|
||||
trust_remote_code=True,
|
||||
limit_mm_per_prompt={"audio": audio_count})
|
||||
stop_token_ids = None
|
||||
return llm, prompt, stop_token_ids
|
||||
|
||||
|
||||
# Qwen2-Audio
|
||||
def run_qwen2_audio(question: str, audio_count: int):
|
||||
model_name = "Qwen/Qwen2-Audio-7B-Instruct"
|
||||
@@ -85,11 +62,7 @@ def run_qwen2_audio(question: str, audio_count: int):
|
||||
return llm, prompt, stop_token_ids
|
||||
|
||||
|
||||
# TODO (cmq): test ultravox
|
||||
model_example_map = {
|
||||
# "ultravox": run_ultravox,
|
||||
"qwen2_audio": run_qwen2_audio
|
||||
}
|
||||
model_example_map = {"qwen2_audio": run_qwen2_audio}
|
||||
|
||||
|
||||
def main(args):
|
||||
|
||||
@@ -29,7 +29,7 @@ prompts = [
|
||||
# Create a sampling params object.
|
||||
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
|
||||
# Create an LLM.
|
||||
llm = LLM(model="facebook/opt-125m")
|
||||
llm = LLM(model="Qwen/Qwen2.5-0.5B-Instruct")
|
||||
|
||||
# Generate texts from the prompts.
|
||||
outputs = llm.generate(prompts, sampling_params)
|
||||
|
||||
Reference in New Issue
Block a user