Add e2e test related to weight updates in RL scenarios. (#2954)

### What this PR does / why we need it? Add e2e test related to weight updates in RL scenarios. Due to CI issues, the newly added Python test files cannot locate the correct path. As a temporary solution, use absolute paths to add test cases. - vLLM version: v0.10.2 - vLLM main: 52d0cb8458 Signed-off-by: wangxiaoxin-sherie <wangxiaoxin7@huawei.com> Co-authored-by: wangxiaoxin-sherie <wangxiaoxin7@huawei.com> Co-authored-by: Shangwei-Li <lishangwei2@huawei.com>
2025-09-26 11:07:10 +08:00
parent d8a9cb8458
commit 8406aafaff
2 changed files with 514 additions and 0 deletions
--- a/tests/e2e/multicard/test_weight_loader.py
+++ b/tests/e2e/multicard/test_weight_loader.py
@@ -0,0 +1,188 @@
+#
+# Copyright (c) 2025 Huawei Technologies Co., Ltd. All Rights Reserved.
+# Copyright 2023 The vLLM team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+Compare the outputs of vLLM with and without aclgraph.
+
+Run `pytest tests/multicard/test_external_launcher.py`.
+"""
+
+import os
+import subprocess
+import sys
+
+import pytest
+import torch_npu
+
+MOE_MODELS = ["Qwen/Qwen3-30B-A3B"]
+MODELS = ["Qwen/Qwen3-8B"]
+DEVICE_NAME = torch_npu.npu.get_device_name(0)[:10]
+
+
+@pytest.mark.parametrize("model", MOE_MODELS)
+def test_external_launcher_eager(model):
+    script = script = "/usr/local/python3.11.13/bin/python3.11/__w/vllm-ascend/tests/examples/test_weight_loader.py"
+    env = os.environ.copy()
+    # TODO: Change to 2 when ci machine has 4 cards
+    cmd = [
+        sys.executable,
+        str(script),
+        "--model",
+        model,
+        "--tp-size",
+        "2",
+        "--proc-per-node",
+        "2",
+        "--trust-remote-code",
+        "--enforce-eager",
+        "--enable-expert-parallel",
+        "--enable-sleep-mode",
+        "--model-weight-gib",
+        "20",
+    ]
+
+    print(f"Running subprocess: {' '.join(cmd)}")
+    proc = subprocess.run(
+        cmd,
+        env=env,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        timeout=600,
+    )
+    output = proc.stdout.decode()
+
+    print(output)
+
+    assert "TP RANKS: [0]" in output
+    assert "TP RANKS: [1]" in output
+    assert "Generated text:" in output
+    assert proc.returncode == 0
+
+
+@pytest.mark.parametrize("model", MOE_MODELS)
+def test_external_launcher_aclgraph(model):
+    script = "/usr/local/python3.11.13/bin/python3.11/__w/vllm-ascend/tests/examples/test_weight_loader.py"
+    env = os.environ.copy()
+    # TODO: Change to 2 when ci machine has 4 cards
+    cmd = [
+        sys.executable,
+        str(script),
+        "--model",
+        model,
+        "--tp-size",
+        "2",
+        "--proc-per-node",
+        "2",
+        "--trust-remote-code",
+        "--enable-expert-parallel",
+        "--enable-sleep-mode",
+        "--model-weight-gib",
+        "20",
+    ]
+
+    print(f"Running subprocess: {' '.join(cmd)}")
+    proc = subprocess.run(
+        cmd,
+        env=env,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        timeout=600,
+    )
+    output = proc.stdout.decode()
+
+    print(output)
+
+    assert "TP RANKS: [0]" in output
+    assert "TP RANKS: [1]" in output
+    assert "Generated text:" in output
+    assert proc.returncode == 0
+
+
+@pytest.mark.parametrize("model", MODELS)
+def test_external_launcher_dense(model):
+    script = "/usr/local/python3.11.13/bin/python3.11/__w/vllm-ascend/tests/examples/test_weight_loader.py"
+    env = os.environ.copy()
+    # TODO: Change to 2 when ci machine has 4 cards
+    cmd = [
+        sys.executable,
+        str(script),
+        "--model",
+        model,
+        "--tp-size",
+        "2",
+        "--proc-per-node",
+        "2",
+        "--trust-remote-code",
+        "--enable-sleep-mode",
+        "--model-weight-gib",
+        "20",
+    ]
+
+    print(f"Running subprocess: {' '.join(cmd)}")
+    proc = subprocess.run(
+        cmd,
+        env=env,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        timeout=600,
+    )
+    output = proc.stdout.decode()
+
+    print(output)
+
+    assert "TP RANKS: [0]" in output
+    assert "TP RANKS: [1]" in output
+    assert "Generated text:" in output
+    assert proc.returncode == 0
+
+
+@pytest.mark.parametrize("model", MODELS)
+def test_external_launcher_dense_eager(model):
+    script = "/usr/local/python3.11.13/bin/python3.11/__w/vllm-ascend/tests/examples/test_weight_loader.py"
+    env = os.environ.copy()
+    # TODO: Change to 2 when ci machine has 4 cards
+    cmd = [
+        sys.executable,
+        str(script),
+        "--model",
+        model,
+        "--tp-size",
+        "2",
+        "--proc-per-node",
+        "2",
+        "--trust-remote-code",
+        "--enforce-eager",
+        "--enable-sleep-mode",
+        "--model-weight-gib",
+        "20",
+    ]
+
+    print(f"Running subprocess: {' '.join(cmd)}")
+    proc = subprocess.run(
+        cmd,
+        env=env,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        timeout=600,
+    )
+    output = proc.stdout.decode()
+
+    print(output)
+
+    assert "TP RANKS: [0]" in output
+    assert "TP RANKS: [1]" in output
+    assert "Generated text:" in output
+    assert proc.returncode == 0