@@ -371,6 +371,8 @@ class BlockInt8MoEMethod:
|
||||
custom_routing_function: Optional[Callable] = None,
|
||||
correction_bias: Optional[torch.Tensor] = None,
|
||||
activation: str = "silu",
|
||||
inplace: bool = True,
|
||||
no_combine: bool = False,
|
||||
) -> torch.Tensor:
|
||||
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_experts
|
||||
from sglang.srt.layers.moe.topk import select_experts
|
||||
@@ -395,7 +397,7 @@ class BlockInt8MoEMethod:
|
||||
layer.w2_weight,
|
||||
topk_weights=topk_weights,
|
||||
topk_ids=topk_ids,
|
||||
inplace=True,
|
||||
inplace=inplace,
|
||||
activation=activation,
|
||||
use_int8_w8a8=True,
|
||||
w1_scale=(layer.w13_weight_scale_inv),
|
||||
@@ -403,4 +405,5 @@ class BlockInt8MoEMethod:
|
||||
a1_scale=layer.w13_input_scale,
|
||||
a2_scale=layer.w2_input_scale,
|
||||
block_shape=self.quant_config.weight_block_size,
|
||||
no_combine=no_combine,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user