From f949ad5794c17d34fe1fb90d34143764e42cc86a Mon Sep 17 00:00:00 2001
From: Lianmin Zheng <lianminzheng@gmail.com>
Date: Tue, 16 Sep 2025 17:06:43 -0700
Subject: [PATCH] [Auto Sync] Update activation.py, chunk_cache.py, utils.py
 (20250917) (#10538)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
---
 python/sglang/srt/layers/activation.py     | 13 +++++++------
 python/sglang/srt/mem_cache/chunk_cache.py |  9 ++++++++-
 python/sglang/srt/utils.py                 |  2 --
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/python/sglang/srt/layers/activation.py b/python/sglang/srt/layers/activation.py
index 67bae9b52..5dc48821a 100644
--- a/python/sglang/srt/layers/activation.py
+++ b/python/sglang/srt/layers/activation.py
@@ -224,12 +224,13 @@ class XIELU(CustomOp):
                 self._xielu_cuda_fn = self._xielu_cuda
             logger.warning_once(msg)
         except Exception as err:
-            logger.warning_once(
-                "CUDA-fused xIELU not available (%s) –"
-                " falling back to a Python version.\n"
-                "For CUDA xIELU (experimental), `pip install git+https://github.com/nickjbrowning/XIELU`",
-                str(err),
-            )
+            pass
+            # logger.warning_once(
+            #     "CUDA-fused xIELU not available (%s) –"
+            #     " falling back to a Python version.\n"
+            #     "For CUDA xIELU (experimental), `pip install git+https://github.com/nickjbrowning/XIELU`",
+            #     str(err),
+            # )
 
     def _xielu_python(self, x: torch.Tensor) -> torch.Tensor:
         alpha_p = nn.functional.softplus(self.alpha_p)
diff --git a/python/sglang/srt/mem_cache/chunk_cache.py b/python/sglang/srt/mem_cache/chunk_cache.py
index 1a576bfa2..6ca8d9995 100644
--- a/python/sglang/srt/mem_cache/chunk_cache.py
+++ b/python/sglang/srt/mem_cache/chunk_cache.py
@@ -28,6 +28,13 @@ class ChunkCache(BasePrefixCache):
         self.token_to_kv_pool_allocator = token_to_kv_pool_allocator
         self.page_size = page_size
 
+    # NOTE (csy): this is to determine if a cache has prefix matching feature.
+    # Chunk cache always return True to indicate no prefix matching.
+    # TODO (csy): Using a prefix cache trait to replace this
+    @property
+    def disable(self):
+        return True
+
     def reset(self):
         pass
 
@@ -38,7 +45,7 @@ class ChunkCache(BasePrefixCache):
             last_host_node=None,
         )
 
-    def cache_finished_req(self, req: Req):
+    def cache_finished_req(self, req: Req, insert: bool = True):
         kv_indices = self.req_to_token_pool.req_to_token[
             req.req_pool_idx,
             # For decode server: if req.output_ids is empty, we want to free all req.origin_input_ids
diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py
index e38400e3f..1c9de7b7b 100644
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -82,11 +82,9 @@ from packaging import version as pkg_version
 from PIL import Image
 from starlette.routing import Mount
 from torch import nn
-from torch.func import functional_call
 from torch.library import Library
 from torch.profiler import ProfilerActivity, profile, record_function
 from torch.utils._contextlib import _DecoratorContextManager
-from triton.runtime.cache import FileCacheManager
 from typing_extensions import Literal
 
 from sglang.srt.metrics.func_timer import enable_func_timer