@@ -506,6 +506,7 @@ class AiterIndicesUpdaterPrefill:
|
||||
spec_info.generate_attn_arg_prefill(
|
||||
req_pool_indices,
|
||||
paged_kernel_lens,
|
||||
None,
|
||||
self.req_to_token,
|
||||
)
|
||||
)
|
||||
|
||||
@@ -412,6 +412,10 @@ class ModelRunner:
|
||||
if not server_args.disable_chunked_prefix_cache:
|
||||
logger.info("Chunked prefix cache is turned on.")
|
||||
|
||||
if server_args.attention_backend == "aiter":
|
||||
if self.model_config.context_len > 8192:
|
||||
self.mem_fraction_static *= 0.85
|
||||
|
||||
def init_torch_distributed(self):
|
||||
logger.info("Init torch distributed begin.")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user