diff --git a/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py b/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py index ceba1c4..5bd622e 100644 --- a/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py +++ b/vllm_ascend/torchair/quantization/torchair_w8a8_dynamic.py @@ -459,7 +459,7 @@ def torchair_fused_experts_with_all2all( token_counts_combined = token_counts_combined.view( 2, ep_group.world_size, -1).sum(dim=2) token_counts_combined_cpu = token_counts_combined.to( - torch.device("cpu"), non_blocking=True).numpy() + torch.device("cpu"), non_blocking=False).numpy() all_tokens = gather_sizes.sum() gathered_tokens = quantized_tokens.new_empty(all_tokens.item(),