[Feature] model_runner refactor (#4764)

### What this PR does / why we need it? refactor npu_modelrunner， we should be close to gpu_modelrunner ### Does this PR introduce _any_ user-facing change? NO - vLLM version: v0.12.0 - vLLM main: ad32e3e19c --------- Signed-off-by: zhenwenqi2024 <zhenwenqi_2022@qq.com> Signed-off-by: zhenwenqi2024 <155598497+zhenwenqi2024@users.noreply.github.com>
2025-12-12 17:27:09 +08:00
parent 5b12c068f9
commit f708d919f8
10 changed files with 676 additions and 1815 deletions
--- a/tests/ut/worker/test_input_batch.py
+++ b/tests/ut/worker/test_input_batch.py
@@ -24,6 +24,7 @@ from vllm.utils.torch_utils import make_tensor_with_pad
 from vllm.v1.pool.metadata import PoolingMetadata
 from vllm.v1.sample.logits_processor import LogitsProcessors
 from vllm.v1.sample.metadata import SamplingMetadata
+from vllm.v1.utils import CpuGpuBuffer

 from vllm_ascend.worker.block_table import BlockTable, MultiGroupBlockTable
 from vllm_ascend.worker.npu_input_batch import CachedRequestState, InputBatch
@@ -67,6 +68,8 @@ def _compare_objs(obj1,
            is_same = True  # if we make it here must be same
        elif a == b:
            is_same = True
+        elif isinstance(a, CpuGpuBuffer):
+            is_same = np.allclose(a.np, b.np) and torch.allclose(a.gpu, b.gpu)
        assert is_same, f"Attribute {attr_name} is different"\
            f" in {obj1} and {obj2}: {a} != {b}"