Adjust default mem fraction to avoid OOM (#823)

This commit is contained in:
Ying Sheng
2024-07-30 01:58:31 -07:00
committed by GitHub
parent ae5c0fc442
commit e7487b08bc
4 changed files with 22 additions and 17 deletions

View File

@@ -103,7 +103,7 @@ class RadixAttention(nn.Module):
return o
def extend_forward_flashinfer(self, q, k, v, input_metadata: InputMetadata):
if not input_metadata.use_ragged:
if not input_metadata.flashinfer_use_ragged:
self.store_kv_cache(k, v, input_metadata)
o = input_metadata.flashinfer_prefill_wrapper_paged.forward(