Revert "Fix: Dynamic RoPE Cache Expansion to Prevent Position-ID Out-of-Bounds in EAGLE + Long-Sequence Workloads" (#11827)

This commit is contained in:
Liangsheng Yin
2025-10-19 19:44:06 +08:00
committed by GitHub
parent 12eb02e982
commit 57e25de756
4 changed files with 0 additions and 103 deletions

View File

@@ -140,7 +140,6 @@ from sglang.srt.utils import (
log_info_on_rank0,
monkey_patch_p2p_access_check,
monkey_patch_vllm_gguf_config,
reserve_rope_cache_for_long_sequences,
set_cuda_arch,
slow_rank_detector,
)
@@ -899,15 +898,6 @@ class ModelRunner:
f"mem usage={self.weight_load_mem_usage:.2f} GB."
)
# Pre-expand RoPE cache before CUDA Graph capture
reserve_rope_cache_for_long_sequences(
self.model,
self.server_args,
self.model_config,
self.req_to_token_pool,
logger,
)
if self.server_args.elastic_ep_backend == "mooncake":
# Mooncake does not support `monitored_barrier`
dist.barrier(group=get_tp_group().cpu_group)