implement model runner v2 basic framework (#5051)

### What this PR does / why we need it? This PR aim to implement model runner v2 basic framework in vllm-ascend, the e2e function is not guaranteed by this pr. ### Does this PR introduce _any_ user-facing change? use envs.VLLM_USE_V2_MODEL_RUNNER to decide if choose model_runenr_v2. ### How was this patch tested? - vLLM version: v0.12.0 - vLLM main: ad32e3e19c --------- Signed-off-by: Ronald1995 <ronaldautomobile@163.com>
2025-12-18 15:51:54 +08:00
parent 1c8c23de58
commit b69b04d3a9
16 changed files with 843 additions and 98 deletions
--- a/vllm_ascend/xlite/xlite_worker.py
+++ b/vllm_ascend/xlite/xlite_worker.py
@@ -13,7 +13,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from vllm_ascend.worker.worker_v1 import NPUWorker
+from vllm_ascend.worker.worker import NPUWorker
 from vllm_ascend.xlite.xlite_model_runner import XliteModelRunner


@@ -23,4 +23,4 @@ class XliteWorker(NPUWorker):
    def init_device(self):
        """Override init_device to init xlite model runner"""
        self.device = self._init_device()
-        self.model_runner = XliteModelRunner(self.vllm_config, self.device)
+        self.model_runner = XliteModelRunner(self.vllm_config, self.device)