[Model] Support DeepSeek-V4

This commit is contained in:
chenxb002
2026-04-24 09:50:34 +08:00
commit b9925203b8
172 changed files with 44780 additions and 0 deletions

View File

@@ -0,0 +1,25 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM-MLU project
from vllm.v1.worker.gpu_input_batch import InputBatch
from vllm_mlu.mlu_hijack_utils import MluHijackObject
def split_decodes_and_prefills(self):
decodes = 0
prefills = 0
for i, req_id in enumerate(self.req_ids):
req_index = self.req_id_to_index.get(req_id)
num_prompt_tokens = self.num_prompt_tokens[req_index]
num_computed_tokens = self.num_computed_tokens_cpu[req_index]
if num_computed_tokens < num_prompt_tokens:
prefills += 1
else:
decodes += 1
return decodes, prefills
MluHijackObject.apply_hijack(InputBatch,
"split_decodes_and_prefills",
split_decodes_and_prefills)