[Feature] improve TBO: two chunk overlap (#8144)
This commit is contained in:
@@ -229,6 +229,7 @@ class ServerArgs:
|
||||
enable_dp_attention: bool = False
|
||||
enable_dp_lm_head: bool = False
|
||||
enable_two_batch_overlap: bool = False
|
||||
tbo_token_distribution_threshold: float = 0.48
|
||||
enable_torch_compile: bool = False
|
||||
torch_compile_max_bs: int = 32
|
||||
torchao_config: str = ""
|
||||
@@ -1689,6 +1690,12 @@ class ServerArgs:
|
||||
action="store_true",
|
||||
help="Enabling two micro batches to overlap.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tbo-token-distribution-threshold",
|
||||
type=float,
|
||||
default=ServerArgs.tbo_token_distribution_threshold,
|
||||
help="The threshold of token distribution between two batches in micro-batch-overlap, determines whether to two-batch-overlap or two-chunk-overlap. Set to 0 denote disable two-chunk-overlap.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-torch-compile",
|
||||
action="store_true",
|
||||
|
||||
Reference in New Issue
Block a user