Super tiny enable sole usage of expert distribution metrics and update doc (#6680)
This commit is contained in:
@@ -27,7 +27,8 @@ class EPLBManager:
|
||||
<= self._server_args.expert_distribution_recorder_buffer_size
|
||||
), "eplb_rebalance_num_iterations must be less than expert_distribution_recorder_buffer_size"
|
||||
|
||||
get_global_expert_distribution_recorder().start_record()
|
||||
if not get_global_expert_distribution_recorder().recording:
|
||||
get_global_expert_distribution_recorder().start_record()
|
||||
|
||||
logger.info(
|
||||
f"[EPLBManager] system started, will rebalance per {self._server_args.eplb_rebalance_num_iterations} iterations."
|
||||
|
||||
@@ -91,6 +91,10 @@ class ExpertDistributionRecorder(ABC):
|
||||
def dump_record(self, output_mode: _OutputMode = "file"):
|
||||
self._on_not_implemented()
|
||||
|
||||
@property
|
||||
def recording(self):
|
||||
return False
|
||||
|
||||
def _on_not_implemented(self):
|
||||
raise Exception(
|
||||
"Please set ServerArgs.expert_distribution_recorder_mode to use ExpertDistributionRecorder."
|
||||
@@ -123,6 +127,12 @@ class _ExpertDistributionRecorderReal(ExpertDistributionRecorder):
|
||||
for k in self._accumulator.get_single_pass_gatherer_keys()
|
||||
}
|
||||
|
||||
if server_args.enable_expert_distribution_metrics:
|
||||
logger.info(
|
||||
"ExpertDistributionRecorder auto start record since enable_expert_distribution_metrics"
|
||||
)
|
||||
self.start_record()
|
||||
|
||||
def with_current_layer(self, layer_idx):
|
||||
return self._current_layer_idx.with_value(layer_idx)
|
||||
|
||||
@@ -221,6 +231,10 @@ class _ExpertDistributionRecorderReal(ExpertDistributionRecorder):
|
||||
self._reset()
|
||||
return output
|
||||
|
||||
@property
|
||||
def recording(self):
|
||||
return self._recording
|
||||
|
||||
|
||||
_global_expert_distribution_recorder: Optional[ExpertDistributionRecorder] = (
|
||||
_ExpertDistributionRecorderNoop()
|
||||
|
||||
@@ -1355,7 +1355,7 @@ class ServerArgs:
|
||||
"--deepep-config",
|
||||
type=str,
|
||||
default=ServerArgs.deepep_config,
|
||||
help="Tuned DeepEP config suitable for your own cluster.",
|
||||
help="Tuned DeepEP config suitable for your own cluster. It can be either a string with JSON content or a file path.",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
|
||||
Reference in New Issue
Block a user