make radix cache deterministic (#10721)

Signed-off-by: Alex Chi Z <iskyzh@gmail.com>
This commit is contained in:
Alex Chi Z
2025-10-14 15:01:52 +02:00
committed by GitHub
parent 817e46f412
commit dc965db0e0
5 changed files with 81 additions and 17 deletions

View File

@@ -163,6 +163,7 @@ from sglang.srt.tracing.trace import (
)
from sglang.srt.two_batch_overlap import TboDPAttentionPreparer
from sglang.srt.utils import (
DEFAULT_DETERMINISTIC_INFERENCE_BACKEND_SIZE_CONFIG,
DynamicGradMode,
broadcast_pyobj,
configure_gc_logger,
@@ -705,11 +706,7 @@ class Scheduler(
self.truncation_align_size = None
return
backend_sizes = {
"flashinfer": ("SGLANG_FLASHINFER_PREFILL_SPLIT_TILE_SIZE", 4096),
"triton": ("SGLANG_TRITON_PREFILL_TRUNCATION_ALIGN_SIZE", 4096),
}
env_var, default_size = backend_sizes.get(
env_var, default_size = DEFAULT_DETERMINISTIC_INFERENCE_BACKEND_SIZE_CONFIG.get(
self.server_args.attention_backend, (None, None)
)
self.truncation_align_size = (
@@ -849,6 +846,7 @@ class Scheduler(
disable=server_args.disable_radix_cache,
enable_kv_cache_events=self.enable_kv_cache_events,
eviction_policy=server_args.radix_eviction_policy,
enable_deterministic_inference=server_args.enable_deterministic_inference,
is_eagle=self.spec_algorithm.is_eagle(),
)