[Misc] Cleanup useless print and logger (#5220)
1. Remove useless print
2. use vLLM logger
3. change useless INFO to DEBUG level
- vLLM version: release/v0.13.0
- vLLM main:
ad32e3e19c
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -15,13 +15,12 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import logging
|
||||
|
||||
import torch
|
||||
import torch._inductor.pattern_matcher as pm
|
||||
from torch._inductor.pattern_matcher import PatternMatcherPass
|
||||
from vllm.compilation.vllm_inductor_pass import VllmInductorPass
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.logger import logger
|
||||
|
||||
|
||||
class AddRMSNormQuantPattern:
|
||||
@@ -288,7 +287,7 @@ class AddRMSNormQuantFusionPass(VllmInductorPass):
|
||||
|
||||
dtype = vllm_config.model_config.dtype
|
||||
if dtype not in (torch.bfloat16, torch.float16):
|
||||
logging.info("Quant fusion not enabled: unsupported dtype %s",
|
||||
logger.debug("Quant fusion not enabled: unsupported dtype %s",
|
||||
dtype)
|
||||
return
|
||||
|
||||
@@ -306,7 +305,7 @@ class AddRMSNormQuantFusionPass(VllmInductorPass):
|
||||
def __call__(self, graph: torch.fx.Graph):
|
||||
self.begin()
|
||||
self.matched_count = self.pattern_match_passes.apply(graph)
|
||||
logging.debug("Replaced %s patterns", self.matched_count)
|
||||
logger.debug("Replaced %s patterns", self.matched_count)
|
||||
self.end_and_log()
|
||||
|
||||
def is_applicable(self, runtime_shape: int | None = None) -> bool:
|
||||
|
||||
@@ -15,8 +15,6 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
import logging
|
||||
|
||||
import torch
|
||||
import torch._inductor.pattern_matcher as pm
|
||||
from torch._inductor.pattern_matcher import (PatternMatcherPass,
|
||||
@@ -24,6 +22,7 @@ from torch._inductor.pattern_matcher import (PatternMatcherPass,
|
||||
from vllm.attention.layer import Attention
|
||||
from vllm.compilation.vllm_inductor_pass import VllmInductorPass
|
||||
from vllm.config import VllmConfig, get_layers_from_vllm_config
|
||||
from vllm.logger import logger
|
||||
|
||||
|
||||
class QKNormRopeFusionPattern:
|
||||
@@ -237,7 +236,7 @@ class QKNormRopeFusionPass(VllmInductorPass):
|
||||
|
||||
dtype = vllm_config.model_config.dtype
|
||||
if dtype not in (torch.bfloat16, torch.float16):
|
||||
logging.info(
|
||||
logger.debug(
|
||||
"QKNorm and Rope fusion not enabled: unsupported dtype %s",
|
||||
dtype)
|
||||
return
|
||||
@@ -246,14 +245,14 @@ class QKNormRopeFusionPass(VllmInductorPass):
|
||||
attn_layers: dict[str, Attention] = get_layers_from_vllm_config(
|
||||
vllm_config, Attention)
|
||||
if len(attn_layers) == 0:
|
||||
logging.info(
|
||||
logger.debug(
|
||||
"QKNorm and Rope fusion enabled, but no Attention layers were discovered."
|
||||
)
|
||||
return
|
||||
layer = next(iter(attn_layers.values()))
|
||||
for epsilon in [1e-6, 1e-5]:
|
||||
if layer.head_size != 128:
|
||||
logging.debug(
|
||||
logger.debug(
|
||||
"QKNorm and Rope fusion not enabled: head_dim %d is not equal of 128",
|
||||
layer.head_size)
|
||||
continue
|
||||
@@ -274,13 +273,13 @@ class QKNormRopeFusionPass(VllmInductorPass):
|
||||
def __call__(self, graph: torch.fx.Graph):
|
||||
self.begin()
|
||||
self.matched_count = self.pattern_match_passes.apply(graph)
|
||||
logging.debug("Fused %s QKNorm and Rope patterns", self.matched_count)
|
||||
logging.debug("Patterns registered for replacement:")
|
||||
logger.debug("Fused %s QKNorm and Rope patterns", self.matched_count)
|
||||
logger.debug("Patterns registered for replacement:")
|
||||
pattern_idx = 0
|
||||
for pattern_entry in self.pattern_match_passes.patterns.values():
|
||||
for p in pattern_entry:
|
||||
p_str = PatternPrettyPrinter.run(p.pattern)
|
||||
logging.debug("Pattern %d: %s", pattern_idx, p_str)
|
||||
logger.debug("Pattern %d: %s", pattern_idx, p_str)
|
||||
pattern_idx += 1
|
||||
self.end_and_log()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user