init v0.11.0rc0

2025-10-14 10:38:28 +08:00
parent 67afd0ea78
commit 66dc16f966
278 changed files with 28130 additions and 11708 deletions
--- a/benchmarks/ops/ben_vocabparallelembedding.py
+++ b/benchmarks/ops/ben_vocabparallelembedding.py
@@ -112,7 +112,7 @@ def test_get_masked_input_and_mask(

    # Define custom function
    def custom_fn():
-        return torch.ops._C.get_masked_input_and_mask(
+        return torch.ops._C_ascend.get_masked_input_and_mask(
            input_tensor,
            test_case["org_start"],
            test_case["org_end"],
--- a/benchmarks/scripts/run-performance-benchmarks.sh
+++ b/benchmarks/scripts/run-performance-benchmarks.sh
@@ -78,7 +78,9 @@ kill_npu_processes() {
  ps -aux
  lsof -t -i:8000 | xargs -r kill -9
  pgrep python3 | xargs -r kill -9
-  
+  # vLLM now names the process with VLLM prefix after https://github.com/vllm-project/vllm/pull/21445
+  pgrep VLLM | xargs -r kill -9
+
  sleep 4
  rm -rf ~/.config/vllm

--- a/benchmarks/tests/serving-tests.json
+++ b/benchmarks/tests/serving-tests.json
@@ -23,7 +23,8 @@
      "hf_split": "train",
      "endpoint": "/v1/chat/completions",
      "dataset_path": "lmarena-ai/vision-arena-bench-v0.1",
-      "num_prompts": 200
+      "num_prompts": 200,
+      "no_stream": ""
    }
  },
  {