[main] remove the redundant log prints in register_custom_ops.py (#3094)
### What this PR does / why we need it?
This PR removed the redundant log prints in register_custom_ops.py, in
order to make output clear.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
CI passed with new added/existing test.
- vLLM version: v0.10.2
- vLLM main:
9607d5eb44
Signed-off-by: rjg-lyh <1318825571@qq.com>
This commit is contained in:
@@ -7,7 +7,6 @@ from vllm.distributed import (get_tensor_model_parallel_rank,
|
|||||||
tensor_model_parallel_all_reduce,
|
tensor_model_parallel_all_reduce,
|
||||||
tensor_model_parallel_reduce_scatter)
|
tensor_model_parallel_reduce_scatter)
|
||||||
from vllm.forward_context import get_forward_context
|
from vllm.forward_context import get_forward_context
|
||||||
from vllm.logger import logger
|
|
||||||
from vllm.utils import direct_register_custom_op
|
from vllm.utils import direct_register_custom_op
|
||||||
|
|
||||||
import vllm_ascend.envs as envs_ascend
|
import vllm_ascend.envs as envs_ascend
|
||||||
@@ -18,7 +17,6 @@ def _maybe_chunk_residual_impl(x: torch.Tensor,
|
|||||||
try:
|
try:
|
||||||
forward_context = get_forward_context()
|
forward_context = get_forward_context()
|
||||||
except AssertionError:
|
except AssertionError:
|
||||||
logger.info("Forward context is None, skipping the operation.")
|
|
||||||
return residual
|
return residual
|
||||||
|
|
||||||
if x.size(0) != residual.size(0):
|
if x.size(0) != residual.size(0):
|
||||||
@@ -41,7 +39,6 @@ def _maybe_all_gather_and_maybe_unpad_impl(x: torch.Tensor,
|
|||||||
try:
|
try:
|
||||||
forward_context = get_forward_context()
|
forward_context = get_forward_context()
|
||||||
except AssertionError:
|
except AssertionError:
|
||||||
logger.info("Forward context is None, skipping the operation.")
|
|
||||||
return x
|
return x
|
||||||
|
|
||||||
flashcomm_v1_enabled = forward_context.flashcomm_v1_enabled
|
flashcomm_v1_enabled = forward_context.flashcomm_v1_enabled
|
||||||
@@ -57,7 +54,6 @@ def _maybe_pad_and_reduce_impl(x: torch.Tensor) -> torch.Tensor:
|
|||||||
try:
|
try:
|
||||||
forward_context = get_forward_context()
|
forward_context = get_forward_context()
|
||||||
except AssertionError:
|
except AssertionError:
|
||||||
logger.info("Forward context is None, skipping the operation.")
|
|
||||||
return tensor_model_parallel_all_reduce(x)
|
return tensor_model_parallel_all_reduce(x)
|
||||||
|
|
||||||
flashcomm_v1_enabled = forward_context.flashcomm_v1_enabled
|
flashcomm_v1_enabled = forward_context.flashcomm_v1_enabled
|
||||||
@@ -75,7 +71,6 @@ def _maybe_prefetch_mlp_gate_up_proj_impl(x_dependency: torch.Tensor,
|
|||||||
try:
|
try:
|
||||||
forward_context = get_forward_context()
|
forward_context = get_forward_context()
|
||||||
except AssertionError:
|
except AssertionError:
|
||||||
logger.info("Forward context is None, skipping the operation.")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
if not forward_context.prefetch_mlp_enabled:
|
if not forward_context.prefetch_mlp_enabled:
|
||||||
@@ -106,7 +101,6 @@ def _maybe_prefetch_mlp_down_proj_impl(x_dependency: torch.Tensor) -> None:
|
|||||||
try:
|
try:
|
||||||
forward_context = get_forward_context()
|
forward_context = get_forward_context()
|
||||||
except AssertionError:
|
except AssertionError:
|
||||||
logger.info("Forward context is None, skipping the operation.")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
if not forward_context.prefetch_mlp_enabled:
|
if not forward_context.prefetch_mlp_enabled:
|
||||||
@@ -136,7 +130,6 @@ def _maybe_wait_prefetch_done_impl(x: torch.Tensor) -> None:
|
|||||||
try:
|
try:
|
||||||
forward_context = get_forward_context()
|
forward_context = get_forward_context()
|
||||||
except AssertionError:
|
except AssertionError:
|
||||||
logger.info("Forward context is None, skipping the operation.")
|
|
||||||
return
|
return
|
||||||
|
|
||||||
if not forward_context.prefetch_mlp_enabled:
|
if not forward_context.prefetch_mlp_enabled:
|
||||||
|
|||||||
Reference in New Issue
Block a user