Fix prefill size (#711)

This commit is contained in:
Ying Sheng
2024-07-24 03:41:15 -07:00
committed by GitHub
parent 00e4baa728
commit 4367f4bb8d
2 changed files with 5 additions and 0 deletions

View File

@@ -11,6 +11,7 @@ class ReqToTokenPool:
"""A memory pool that maps a request to its token locations."""
def __init__(self, size: int, max_context_len: int):
self.size = size
self.mem_state = torch.ones((size,), dtype=torch.bool, device="cuda")
self.req_to_token = torch.empty(
(size, max_context_len), dtype=torch.int32, device="cuda"