feat: support flashinfer mla with prefix cache (#3643)
This commit is contained in:
@@ -177,6 +177,7 @@ class ModelRunner:
|
||||
"enable_ep_moe": server_args.enable_ep_moe,
|
||||
"device": server_args.device,
|
||||
"enable_flashinfer_mla": server_args.enable_flashinfer_mla,
|
||||
"disable_radix_cache": server_args.disable_radix_cache,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user