Fix nightly ci Gsm8k & Fix flashinfer backend kvcache quant (#4147)
This commit is contained in:
@@ -904,6 +904,7 @@ class FlashInferIndicesUpdaterPrefill:
|
||||
self.head_dim,
|
||||
1,
|
||||
q_data_type=self.q_data_type,
|
||||
kv_data_type=self.data_type,
|
||||
custom_mask=custom_mask,
|
||||
non_blocking=True,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user