Integrate triton moe kernel (#7689)
Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>
This commit is contained in:
@@ -222,6 +222,7 @@ class ServerArgs:
|
||||
disable_chunked_prefix_cache: bool = False
|
||||
disable_fast_image_processor: bool = False
|
||||
enable_return_hidden_states: bool = False
|
||||
enable_triton_kernel_moe: bool = False
|
||||
warmups: Optional[str] = None
|
||||
|
||||
# Debug tensor dumps
|
||||
@@ -1554,6 +1555,11 @@ class ServerArgs:
|
||||
action="store_true",
|
||||
help="Enable returning hidden states with responses.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--enable-triton-kernel-moe",
|
||||
action="store_true",
|
||||
help="Use triton moe grouped gemm kernel.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--warmups",
|
||||
type=str,
|
||||
|
||||
Reference in New Issue
Block a user