[Feature] improve TBO: two chunk overlap (#8144)

This commit is contained in:
HouseWest
2025-08-06 12:11:01 +08:00
committed by GitHub
parent d26ca84f39
commit ca47e24f5d
6 changed files with 218 additions and 29 deletions

View File

@@ -229,6 +229,7 @@ class ServerArgs:
enable_dp_attention: bool = False
enable_dp_lm_head: bool = False
enable_two_batch_overlap: bool = False
tbo_token_distribution_threshold: float = 0.48
enable_torch_compile: bool = False
torch_compile_max_bs: int = 32
torchao_config: str = ""
@@ -1689,6 +1690,12 @@ class ServerArgs:
action="store_true",
help="Enabling two micro batches to overlap.",
)
parser.add_argument(
"--tbo-token-distribution-threshold",
type=float,
default=ServerArgs.tbo_token_distribution_threshold,
help="The threshold of token distribution between two batches in micro-batch-overlap, determines whether to two-batch-overlap or two-chunk-overlap. Set to 0 denote disable two-chunk-overlap.",
)
parser.add_argument(
"--enable-torch-compile",
action="store_true",