Add PDL support for quant kernel and rope kernel (#9106)
This commit is contained in:
@@ -550,7 +550,6 @@ class ServerArgs:
|
||||
assert (
|
||||
self.quantization == "modelopt_fp4"
|
||||
), "modelopt_fp4 quantization is required for Flashinfer MOE"
|
||||
os.environ["TRTLLM_ENABLE_PDL"] = "1"
|
||||
assert self.ep_size in [
|
||||
1,
|
||||
self.tp_size,
|
||||
|
||||
Reference in New Issue
Block a user