Fix the global scale fix does not support EPLB and improve enabling condition (#10369)
This commit is contained in:
@@ -999,12 +999,14 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
|
||||
data=torch.empty(layer.num_experts, 2, dtype=torch.float32),
|
||||
weight_loader=weight_loader,
|
||||
)
|
||||
w13_input_scale._sglang_require_global_experts = True
|
||||
layer.register_parameter("w13_input_scale", w13_input_scale)
|
||||
|
||||
w2_input_scale = PerTensorScaleParameter(
|
||||
data=torch.empty(layer.num_experts, dtype=torch.float32),
|
||||
weight_loader=weight_loader,
|
||||
)
|
||||
w2_input_scale._sglang_require_global_experts = True
|
||||
layer.register_parameter("w2_input_scale", w2_input_scale)
|
||||
|
||||
def swizzle_blockscale(self, scale: torch.Tensor):
|
||||
|
||||
Reference in New Issue
Block a user