[main] flashcomm_v1 optim in Qwen Dense Models (#2802)
### What this PR does / why we need it?
Flashcomm_v1 optim in Qwen Dense Models.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
CI passed with new added/existing test.
- vLLM version: v0.10.1.1
- vLLM main:
5e537f45b4
Co-authored-by: 1024daniel <xxltju324@gmail.com>
This commit is contained in:
@@ -493,6 +493,7 @@ def register_ascend_customop():
|
||||
from vllm_ascend.ops.activation import AscendQuickGELU, AscendSiluAndMul
|
||||
from vllm_ascend.ops.linear import (AscendColumnParallelLinear,
|
||||
AscendMergedColumnParallelLinear,
|
||||
AscendQKVParallelLinear,
|
||||
AscendRowParallelLinear)
|
||||
from vllm_ascend.ops.rotary_embedding import (
|
||||
AscendDeepseekScalingRotaryEmbedding, AscendRotaryEmbedding)
|
||||
@@ -510,6 +511,8 @@ def register_ascend_customop():
|
||||
name="RowParallelLinear")
|
||||
CustomOp.register_oot(_decorated_op_cls=AscendMergedColumnParallelLinear,
|
||||
name="MergedColumnParallelLinear")
|
||||
CustomOp.register_oot(_decorated_op_cls=AscendQKVParallelLinear,
|
||||
name="QKVParallelLinear")
|
||||
CustomOp.register_oot(
|
||||
_decorated_op_cls=AscendDeepseekScalingRotaryEmbedding,
|
||||
name="DeepseekScalingRotaryEmbedding")
|
||||
@@ -572,3 +575,7 @@ def mlp_tp_enable() -> bool:
|
||||
|
||||
def matmul_allreduce_enable() -> bool:
|
||||
return envs_ascend.VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE
|
||||
|
||||
|
||||
def dense_optim_enable() -> bool:
|
||||
return envs_ascend.VLLM_ASCEND_ENABLE_DENSE_OPTIMIZE
|
||||
|
||||
Reference in New Issue
Block a user