diff --git a/docs/source/tutorials/Qwen3_embedding.md b/docs/source/tutorials/Qwen3_embedding.md
index 5aca58e8..667c1de6 100644
--- a/docs/source/tutorials/Qwen3_embedding.md
+++ b/docs/source/tutorials/Qwen3_embedding.md
@@ -30,7 +30,7 @@ Using the Qwen3-Embedding-8B model as an example, first run the docker container
 ### Online Inference
 
 ```bash
-vllm serve Qwen/Qwen3-Embedding-8B --task embed --host 127.0.0.1 --port 8888
+vllm serve Qwen/Qwen3-Embedding-8B --runner pooling --host 127.0.0.1 --port 8888
 ```
 
 Once your server is started, you can query the model with input prompts.
@@ -71,7 +71,6 @@ if __name__=="__main__":
     input_texts = queries + documents
 
     model = LLM(model="Qwen/Qwen3-Embedding-8B",
-                task="embed",
                 distributed_executor_backend="mp")
 
     outputs = model.embed(input_texts)