### What this PR does / why we need it?
adapt vllm-ascend main branch with vllm releases/v0.11.1
fix `forward context not set` in test_vlm.py caused by:
https://github.com/vllm-project/vllm/pull/23207
fix import `cdiv round` failed caused by:
https://github.com/vllm-project/vllm/pull/27188
fix import `init_cached_hf_modules` failed caused by:
https://github.com/vllm-project/vllm/pull/27567
adapt triton kernel `fused_recurrent_gated_delta_rule_fwd_kernel` caused
by: https://github.com/vllm-project/vllm/pull/27654
- remove unused code in sigmoid_gating.py
- `class FusedRecurrentFunction` , `fused_recurrent_gated_delta_rule`,
`fused_recurrent_gated_delta_rule_fwd`
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
CI
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
Signed-off-by: 22dimensions <waitingwind@foxmail.com>
22 lines
1.3 KiB
Python
22 lines
1.3 KiB
Python
import vllm.model_executor.layers.fla.ops.chunk
|
|
import vllm.model_executor.layers.fla.ops.fused_recurrent
|
|
import vllm.model_executor.layers.fla.ops.layernorm_guard
|
|
import vllm.model_executor.layers.mamba.ops.causal_conv1d
|
|
|
|
from vllm_ascend.ops.casual_conv1d import (causal_conv1d_fn,
|
|
causal_conv1d_update_npu)
|
|
from vllm_ascend.ops.fla import LayerNormFn, torch_chunk_gated_delta_rule
|
|
from vllm_ascend.ops.sigmoid_gating import (
|
|
fused_recurrent_gated_delta_rule_fwd_kernel,
|
|
fused_recurrent_gated_delta_rule_fwd_kernel_0_11_0)
|
|
from vllm_ascend.utils import vllm_version_is
|
|
|
|
vllm.model_executor.layers.mamba.ops.causal_conv1d.causal_conv1d_update = causal_conv1d_update_npu
|
|
vllm.model_executor.layers.mamba.ops.causal_conv1d.causal_conv1d_fn = causal_conv1d_fn
|
|
if vllm_version_is('0.11.0'):
|
|
vllm.model_executor.layers.fla.ops.fused_recurrent.fused_recurrent_gated_delta_rule_fwd_kernel = fused_recurrent_gated_delta_rule_fwd_kernel_0_11_0
|
|
else:
|
|
vllm.model_executor.layers.fla.ops.fused_recurrent.fused_recurrent_gated_delta_rule_fwd_kernel = fused_recurrent_gated_delta_rule_fwd_kernel
|
|
vllm.model_executor.layers.fla.ops.layernorm_guard.LayerNormFn = LayerNormFn
|
|
vllm.model_executor.layers.fla.ops.chunk.chunk_gated_delta_rule = torch_chunk_gated_delta_rule
|