PrefillAdder abstraction (#968)

This commit is contained in:
Liangsheng Yin
2024-08-07 13:47:28 -07:00
committed by GitHub
parent 6db27f7b3b
commit f724f1f1e9
5 changed files with 151 additions and 135 deletions

View File

@@ -130,7 +130,7 @@ class ModelRunner:
server_args.max_total_tokens,
)
self.init_cublas()
self.init_flash_infer()
self.init_flashinfer()
# Capture cuda graphs
self.init_cuda_graphs()
@@ -287,7 +287,7 @@ class ModelRunner:
c = a @ b
return c
def init_flash_infer(self):
def init_flashinfer(self):
if self.server_args.disable_flashinfer:
self.flashinfer_prefill_wrapper_ragged = None
self.flashinfer_prefill_wrapper_paged = None