This reverts commit 71e9b379c8. It breaks vllm-ascend/Qwen3-30B-A3B-W8A8 test
This commit is contained in:
@@ -108,13 +108,13 @@ class AscendRMSNorm(RMSNorm):
|
||||
residual: Optional[torch.Tensor] = None,
|
||||
) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
|
||||
import torch_npu
|
||||
|
||||
if residual is not None:
|
||||
residual = torch.ops.vllm.maybe_chunk_residual(x, residual)
|
||||
assert x.size(0) == residual.size(0)
|
||||
next_need_quant_fusion_linear = getattr(
|
||||
self, 'next_need_quant_fusion_linear', None)
|
||||
x, residual = _addrmsnorm_forward_oot(
|
||||
self, x, residual, next_need_quant_fusion_linear, self.bias)
|
||||
self, x, residual, self.next_need_quant_fusion_linear,
|
||||
self.bias)
|
||||
return x, residual
|
||||
x, residual = torch_npu.npu_rms_norm(x, self.weight,
|
||||
self.variance_epsilon)
|
||||
|
||||
@@ -173,9 +173,7 @@ def _maybe_prefetch_mlp_down_proj_impl(x_dependency: torch.Tensor) -> None:
|
||||
except AssertionError:
|
||||
return
|
||||
|
||||
prefetch_mlp_enabled = getattr(forward_context, 'prefetch_mlp_enabled',
|
||||
False)
|
||||
if not prefetch_mlp_enabled:
|
||||
if not forward_context.prefetch_mlp_enabled:
|
||||
return
|
||||
forward_context.prefetch_mlp_down_proj = True
|
||||
model_instance = forward_context.model_instance
|
||||
@@ -204,9 +202,7 @@ def _maybe_wait_prefetch_done_impl(x: torch.Tensor) -> None:
|
||||
except AssertionError:
|
||||
return
|
||||
|
||||
prefetch_mlp_enabled = getattr(forward_context, 'prefetch_mlp_enabled',
|
||||
False)
|
||||
if not prefetch_mlp_enabled:
|
||||
if not forward_context.prefetch_mlp_enabled:
|
||||
return
|
||||
if forward_context.prefetch_mlp_gate_up_proj or \
|
||||
forward_context.prefetch_mlp_down_proj:
|
||||
|
||||
Reference in New Issue
Block a user