refactor: add a check before layer_sharding logging (#7186)
### What this PR does / why we need it?
We should only display this log message when layer_sharding is enabled.
- vLLM version: v0.16.0
- vLLM main:
4034c3d32e
Signed-off-by: QiuChunshuo <qiuchunshuo@huawei.com>
This commit is contained in:
@@ -52,11 +52,12 @@ class AscendConfig:
|
|||||||
self.dump_config_path = additional_config.get("dump_config_path", None)
|
self.dump_config_path = additional_config.get("dump_config_path", None)
|
||||||
self._construct_weight_prefetch_config(additional_config)
|
self._construct_weight_prefetch_config(additional_config)
|
||||||
self.layer_sharding = additional_config.get("layer_sharding", None)
|
self.layer_sharding = additional_config.get("layer_sharding", None)
|
||||||
logger.info_once(
|
if self.layer_sharding:
|
||||||
f"Linear layer sharding enabled with config: {self.layer_sharding}. "
|
logger.info_once(
|
||||||
"Note: This feature works optimally with FLASHCOMM2 and DSA-CP enabled; "
|
f"Linear layer sharding enabled with config: {self.layer_sharding}. "
|
||||||
"using it without these features may result in significant performance degradation."
|
"Note: This feature works optimally with FLASHCOMM2 and DSA-CP enabled; "
|
||||||
)
|
"using it without these features may result in significant performance degradation."
|
||||||
|
)
|
||||||
|
|
||||||
self.enable_shared_expert_dp = (
|
self.enable_shared_expert_dp = (
|
||||||
additional_config.get("enable_shared_expert_dp", False)
|
additional_config.get("enable_shared_expert_dp", False)
|
||||||
|
|||||||
Reference in New Issue
Block a user