[MISC] set default model to qwen in example (#87)
- Set default model to Qwen2.5-0.5B-Instruct in example - Remove Ultravox 0.3 because it is not tested currently Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
@@ -30,9 +30,9 @@ prompts = [
|
|||||||
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
|
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
|
||||||
# Create an LLM.
|
# Create an LLM.
|
||||||
llm = LLM(
|
llm = LLM(
|
||||||
model="facebook/opt-125m",
|
model="Qwen/Qwen2.5-0.5B-Instruct",
|
||||||
tensor_parallel_size=2,
|
tensor_parallel_size=2,
|
||||||
distributed_executor_backend="ray",
|
distributed_executor_backend="mp",
|
||||||
trust_remote_code=True,
|
trust_remote_code=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -24,7 +24,6 @@ For most models, the prompt format should follow corresponding examples
|
|||||||
on HuggingFace model repository.
|
on HuggingFace model repository.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from transformers import AutoTokenizer
|
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
from vllm.assets.audio import AudioAsset
|
from vllm.assets.audio import AudioAsset
|
||||||
from vllm.utils import FlexibleArgumentParser
|
from vllm.utils import FlexibleArgumentParser
|
||||||
@@ -41,28 +40,6 @@ question_per_audio_count = {
|
|||||||
# Unless specified, these settings have been tested to work on a single L4.
|
# Unless specified, these settings have been tested to work on a single L4.
|
||||||
|
|
||||||
|
|
||||||
# Ultravox 0.3
|
|
||||||
def run_ultravox(question: str, audio_count: int):
|
|
||||||
model_name = "fixie-ai/ultravox-v0_3"
|
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
||||||
messages = [{
|
|
||||||
'role': 'user',
|
|
||||||
'content': "<|audio|>\n" * audio_count + question
|
|
||||||
}]
|
|
||||||
prompt = tokenizer.apply_chat_template(messages,
|
|
||||||
tokenize=False,
|
|
||||||
add_generation_prompt=True)
|
|
||||||
|
|
||||||
llm = LLM(model=model_name,
|
|
||||||
max_model_len=4096,
|
|
||||||
max_num_seqs=5,
|
|
||||||
trust_remote_code=True,
|
|
||||||
limit_mm_per_prompt={"audio": audio_count})
|
|
||||||
stop_token_ids = None
|
|
||||||
return llm, prompt, stop_token_ids
|
|
||||||
|
|
||||||
|
|
||||||
# Qwen2-Audio
|
# Qwen2-Audio
|
||||||
def run_qwen2_audio(question: str, audio_count: int):
|
def run_qwen2_audio(question: str, audio_count: int):
|
||||||
model_name = "Qwen/Qwen2-Audio-7B-Instruct"
|
model_name = "Qwen/Qwen2-Audio-7B-Instruct"
|
||||||
@@ -85,11 +62,7 @@ def run_qwen2_audio(question: str, audio_count: int):
|
|||||||
return llm, prompt, stop_token_ids
|
return llm, prompt, stop_token_ids
|
||||||
|
|
||||||
|
|
||||||
# TODO (cmq): test ultravox
|
model_example_map = {"qwen2_audio": run_qwen2_audio}
|
||||||
model_example_map = {
|
|
||||||
# "ultravox": run_ultravox,
|
|
||||||
"qwen2_audio": run_qwen2_audio
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def main(args):
|
def main(args):
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ prompts = [
|
|||||||
# Create a sampling params object.
|
# Create a sampling params object.
|
||||||
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
|
sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
|
||||||
# Create an LLM.
|
# Create an LLM.
|
||||||
llm = LLM(model="facebook/opt-125m")
|
llm = LLM(model="Qwen/Qwen2.5-0.5B-Instruct")
|
||||||
|
|
||||||
# Generate texts from the prompts.
|
# Generate texts from the prompts.
|
||||||
outputs = llm.generate(prompts, sampling_params)
|
outputs = llm.generate(prompts, sampling_params)
|
||||||
|
|||||||
Reference in New Issue
Block a user