forked from EngineX-Ascend/enginex-ascend-910-vllm
init v0.11.0rc0
This commit is contained in:
@@ -20,6 +20,7 @@ import torch
|
||||
import vllm_ascend.ops.common_fused_moe # noqa
|
||||
import vllm_ascend.ops.fused_moe # noqa
|
||||
import vllm_ascend.ops.layernorm # noqa
|
||||
import vllm_ascend.ops.register_custom_ops # noqa
|
||||
import vllm_ascend.ops.vocab_parallel_embedding # noqa
|
||||
from vllm_ascend.ops.activation import AscendQuickGELU, AscendSiluAndMul
|
||||
from vllm_ascend.ops.rotary_embedding import (
|
||||
@@ -34,19 +35,20 @@ class dummyFusionOp:
|
||||
|
||||
|
||||
def register_dummy_fusion_op() -> None:
|
||||
torch.ops._C.rms_norm = dummyFusionOp(name="rms_norm")
|
||||
torch.ops._C.fused_add_rms_norm = dummyFusionOp(name="fused_add_rms_norm")
|
||||
torch.ops._C.static_scaled_fp8_quant = dummyFusionOp(
|
||||
torch.ops._C_ascend.rms_norm = dummyFusionOp(name="rms_norm")
|
||||
torch.ops._C_ascend.fused_add_rms_norm = dummyFusionOp(
|
||||
name="fused_add_rms_norm")
|
||||
torch.ops._C_ascend.static_scaled_fp8_quant = dummyFusionOp(
|
||||
name="static_scaled_fp8_quant")
|
||||
torch.ops._C.dynamic_scaled_fp8_quant = dummyFusionOp(
|
||||
torch.ops._C_ascend.dynamic_scaled_fp8_quant = dummyFusionOp(
|
||||
name="dynamic_scaled_fp8_quant")
|
||||
torch.ops._C.dynamic_per_token_scaled_fp8_quant = dummyFusionOp(
|
||||
torch.ops._C_ascend.dynamic_per_token_scaled_fp8_quant = dummyFusionOp(
|
||||
name="dynamic_per_token_scaled_fp8_quant")
|
||||
torch.ops._C.rms_norm_static_fp8_quant = dummyFusionOp(
|
||||
torch.ops._C_ascend.rms_norm_static_fp8_quant = dummyFusionOp(
|
||||
name="rms_norm_static_fp8_quant")
|
||||
torch.ops._C.fused_add_rms_norm_static_fp8_quant = dummyFusionOp(
|
||||
torch.ops._C_ascend.fused_add_rms_norm_static_fp8_quant = dummyFusionOp(
|
||||
name="fused_add_rms_norm_static_fp8_quant")
|
||||
torch.ops._C.rms_norm_dynamic_per_token_quant = dummyFusionOp(
|
||||
torch.ops._C_ascend.rms_norm_dynamic_per_token_quant = dummyFusionOp(
|
||||
name="rms_norm_dynamic_per_token_quant")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user