make radix cache deterministic (#10721)

Signed-off-by: Alex Chi Z <iskyzh@gmail.com>
This commit is contained in:
Alex Chi Z
2025-10-14 15:01:52 +02:00
committed by GitHub
parent 817e46f412
commit dc965db0e0
5 changed files with 81 additions and 17 deletions

View File

@@ -3441,3 +3441,16 @@ def cached_triton_kernel(key_fn=None):
return CachedKernel(fn, key_fn)
return decorator
DEFAULT_DETERMINISTIC_INFERENCE_BACKEND_SIZE = 4096
DEFAULT_DETERMINISTIC_INFERENCE_BACKEND_SIZE_CONFIG = {
"flashinfer": (
"SGLANG_FLASHINFER_PREFILL_SPLIT_TILE_SIZE",
DEFAULT_DETERMINISTIC_INFERENCE_BACKEND_SIZE,
),
"triton": (
"SGLANG_TRITON_PREFILL_TRUNCATION_ALIGN_SIZE",
DEFAULT_DETERMINISTIC_INFERENCE_BACKEND_SIZE,
),
}