diff --git a/examples/offline_distributed_inference_npu.py b/examples/offline_distributed_inference_npu.py
index 8e503ad..8853378 100644
--- a/examples/offline_distributed_inference_npu.py
+++ b/examples/offline_distributed_inference_npu.py
@@ -30,9 +30,9 @@ prompts = [
 sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
 # Create an LLM.
 llm = LLM(
-    model="facebook/opt-125m",
+    model="Qwen/Qwen2.5-0.5B-Instruct",
     tensor_parallel_size=2,
-    distributed_executor_backend="ray",
+    distributed_executor_backend="mp",
     trust_remote_code=True,
 )
 
diff --git a/examples/offline_inference_audio_language.py b/examples/offline_inference_audio_language.py
index 785492c..25fabfd 100644
--- a/examples/offline_inference_audio_language.py
+++ b/examples/offline_inference_audio_language.py
@@ -24,7 +24,6 @@ For most models, the prompt format should follow corresponding examples
 on HuggingFace model repository.
 """
 
-from transformers import AutoTokenizer
 from vllm import LLM, SamplingParams
 from vllm.assets.audio import AudioAsset
 from vllm.utils import FlexibleArgumentParser
@@ -41,28 +40,6 @@ question_per_audio_count = {
 # Unless specified, these settings have been tested to work on a single L4.
 
 
-# Ultravox 0.3
-def run_ultravox(question: str, audio_count: int):
-    model_name = "fixie-ai/ultravox-v0_3"
-
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    messages = [{
-        'role': 'user',
-        'content': "<|audio|>\n" * audio_count + question
-    }]
-    prompt = tokenizer.apply_chat_template(messages,
-                                           tokenize=False,
-                                           add_generation_prompt=True)
-
-    llm = LLM(model=model_name,
-              max_model_len=4096,
-              max_num_seqs=5,
-              trust_remote_code=True,
-              limit_mm_per_prompt={"audio": audio_count})
-    stop_token_ids = None
-    return llm, prompt, stop_token_ids
-
-
 # Qwen2-Audio
 def run_qwen2_audio(question: str, audio_count: int):
     model_name = "Qwen/Qwen2-Audio-7B-Instruct"
@@ -85,11 +62,7 @@ def run_qwen2_audio(question: str, audio_count: int):
     return llm, prompt, stop_token_ids
 
 
-# TODO (cmq): test ultravox
-model_example_map = {
-    # "ultravox": run_ultravox,
-    "qwen2_audio": run_qwen2_audio
-}
+model_example_map = {"qwen2_audio": run_qwen2_audio}
 
 
 def main(args):
diff --git a/examples/offline_inference_npu.py b/examples/offline_inference_npu.py
index 10c2c6e..cb39639 100644
--- a/examples/offline_inference_npu.py
+++ b/examples/offline_inference_npu.py
@@ -29,7 +29,7 @@ prompts = [
 # Create a sampling params object.
 sampling_params = SamplingParams(max_tokens=100, temperature=0.0)
 # Create an LLM.
-llm = LLM(model="facebook/opt-125m")
+llm = LLM(model="Qwen/Qwen2.5-0.5B-Instruct")
 
 # Generate texts from the prompts.
 outputs = llm.generate(prompts, sampling_params)