fix: specify dtype with begin_forward aka plan (#2404)

This commit is contained in:
Yineng Zhang
2024-12-08 20:07:30 +08:00
committed by GitHub
parent a2486eb58f
commit 6128f7cff5

View File

@@ -678,6 +678,7 @@ class FlashInferIndicesUpdaterPrefill:
self.num_qo_heads,
self.num_kv_heads,
self.head_dim,
q_data_type=self.q_data_type,
)
# cached part
@@ -691,6 +692,7 @@ class FlashInferIndicesUpdaterPrefill:
self.num_kv_heads,
self.head_dim,
1,
q_data_type=self.q_data_type,
)