[main][bugfix] bugfix for minicpm models (#3527)
### What this PR does / why we need it? bugfix for minicpm-2b and minicpm3-4b - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: Wang Kunpeng <1289706727@qq.com>
This commit is contained in:
1
.github/workflows/vllm_ascend_test.yaml
vendored
1
.github/workflows/vllm_ascend_test.yaml
vendored
@@ -121,7 +121,6 @@ jobs:
|
|||||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
|
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib
|
||||||
pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut \
|
pytest -sv --cov --cov-report=xml:unittests-coverage.xml tests/ut \
|
||||||
--ignore=tests/ut/test_platform.py \
|
--ignore=tests/ut/test_platform.py \
|
||||||
--ignore=tests/ut/patch/worker/patch_common/test_patch_minicpm.py \
|
|
||||||
--ignore=tests/ut/core/test_scheduler.py \
|
--ignore=tests/ut/core/test_scheduler.py \
|
||||||
--ignore=tests/ut/kv_connector/test_llmdatadist_connector.py \
|
--ignore=tests/ut/kv_connector/test_llmdatadist_connector.py \
|
||||||
--ignore=tests/ut/kv_connector/test_mooncake_connector.py \
|
--ignore=tests/ut/kv_connector/test_mooncake_connector.py \
|
||||||
|
|||||||
@@ -26,6 +26,4 @@ import vllm_ascend.patch.worker.patch_common.patch_logits # noqa
|
|||||||
import vllm_ascend.patch.worker.patch_common.patch_roberta # noqa
|
import vllm_ascend.patch.worker.patch_common.patch_roberta # noqa
|
||||||
import vllm_ascend.patch.worker.patch_common.patch_weight_loader # noqa
|
import vllm_ascend.patch.worker.patch_common.patch_weight_loader # noqa
|
||||||
import vllm_ascend.patch.worker.patch_common.patch_multimodal_merge # noqa
|
import vllm_ascend.patch.worker.patch_common.patch_multimodal_merge # noqa
|
||||||
|
import vllm_ascend.patch.worker.patch_common.patch_minicpm # noqa
|
||||||
# TODO: revert me when triton import is fixed
|
|
||||||
# import vllm_ascend.patch.worker.patch_common.patch_minicpm # noqa
|
|
||||||
|
|||||||
@@ -1346,6 +1346,8 @@ class NPUModelRunner(LoRAModelRunnerMixin):
|
|||||||
positions_cpu = self.positions_cpu[:num_input_tokens]
|
positions_cpu = self.positions_cpu[:num_input_tokens]
|
||||||
positions = self.positions[:num_input_tokens]
|
positions = self.positions[:num_input_tokens]
|
||||||
seq_lens_cpu = self.seq_lens_cpu[:num_reqs]
|
seq_lens_cpu = self.seq_lens_cpu[:num_reqs]
|
||||||
|
attn_state = self._build_attn_state(num_reqs, num_scheduled_tokens,
|
||||||
|
num_valid_tokens)
|
||||||
self.attn_mask = self._make_attention_mask(seq_lens=seq_lens_cpu,
|
self.attn_mask = self._make_attention_mask(seq_lens=seq_lens_cpu,
|
||||||
position=positions_cpu,
|
position=positions_cpu,
|
||||||
attn_state=attn_state)
|
attn_state=attn_state)
|
||||||
|
|||||||
Reference in New Issue
Block a user