diff --git a/benchmark/latency_throughput/bench_serving.py b/benchmark/latency_throughput/bench_serving.py
index d46b84579..cbe63a55b 100644
--- a/benchmark/latency_throughput/bench_serving.py
+++ b/benchmark/latency_throughput/bench_serving.py
@@ -38,7 +38,6 @@ def sample_requests(
     num_requests: int,
     tokenizer: AutoTokenizer,
 ) -> List[Tuple[str, int, int]]:
-
     def load_dataset():
         with open(dataset_path, encoding="utf-8") as f:
             dataset = json.load(f)
diff --git a/benchmark/line_retrieval/gen_data.py b/benchmark/line_retrieval/gen_data.py
index c88ecba49..5763e6615 100644
--- a/benchmark/line_retrieval/gen_data.py
+++ b/benchmark/line_retrieval/gen_data.py
@@ -48,9 +48,9 @@ def generate_lines(random_words, num_lines, redirect_ratio):
         )
         for i in redirect_indices:
             target_idx = np.random.choice(min(i * 2 + 100, num_lines))
-            lines[i] = (
-                f"Line {indices[i]}: The REGISTER_CONTENT is the same as Line {indices[target_idx]}."
-            )
+            lines[
+                i
+            ] = f"Line {indices[i]}: The REGISTER_CONTENT is the same as Line {indices[target_idx]}."
             redirects[i] = target_idx
 
     # Build links and find sources
diff --git a/python/sglang/backend/litellm.py b/python/sglang/backend/litellm.py
index eef6b0cda..d9b4023ca 100644
--- a/python/sglang/backend/litellm.py
+++ b/python/sglang/backend/litellm.py
@@ -13,7 +13,6 @@ except ImportError as e:
 
 
 class LiteLLM(BaseBackend):
-
     def __init__(
         self,
         model_name,
diff --git a/python/sglang/lang/compiler.py b/python/sglang/lang/compiler.py
index 2c071e407..36287cd39 100644
--- a/python/sglang/lang/compiler.py
+++ b/python/sglang/lang/compiler.py
@@ -4,7 +4,7 @@ from queue import Queue
 from typing import List, Union
 
 from sglang.global_config import global_config
-from sglang.lang.interpreter import ProgramState, StreamExecutor, pin_program
+from sglang.lang.interpreter import ProgramState, StreamExecutor, cache_program
 from sglang.lang.ir import (
     SglArgument,
     SglConstantText,
@@ -184,7 +184,7 @@ class CompiledFunction:
 
         # Extract prefix by tracing and cache it
         if len(batch_kwargs) > 1:
-            pin_program(self.function, backend)
+            cache_program(self.function, backend)
 
         # Run all programs
         if num_threads == "auto":
diff --git a/python/sglang/launch_server_llavavid.py b/python/sglang/launch_server_llavavid.py
index a048c3dec..b71d8701d 100644
--- a/python/sglang/launch_server_llavavid.py
+++ b/python/sglang/launch_server_llavavid.py
@@ -6,7 +6,6 @@ import multiprocessing as mp
 from sglang.srt.server import ServerArgs, launch_server
 
 if __name__ == "__main__":
-
     model_overide_args = {}
 
     model_overide_args["mm_spatial_pool_stride"] = 2
diff --git a/python/sglang/srt/managers/controller/infer_batch.py b/python/sglang/srt/managers/controller/infer_batch.py
index 653225d68..773d6a500 100644
--- a/python/sglang/srt/managers/controller/infer_batch.py
+++ b/python/sglang/srt/managers/controller/infer_batch.py
@@ -498,9 +498,10 @@ class Batch:
                         req.output_ids = cur_output_ids
                         continue
 
-                    jump_forward_str, next_state = (
-                        req.jump_forward_map.jump_forward_symbol(cur_state)
-                    )
+                    (
+                        jump_forward_str,
+                        next_state,
+                    ) = req.jump_forward_map.jump_forward_symbol(cur_state)
 
                     # Make the incrementally decoded text part of jump_forward_str
                     # so that the UTF-8 will not corrupt
diff --git a/python/sglang/srt/managers/controller/tp_worker.py b/python/sglang/srt/managers/controller/tp_worker.py
index ba19142da..7ee1e5079 100644
--- a/python/sglang/srt/managers/controller/tp_worker.py
+++ b/python/sglang/srt/managers/controller/tp_worker.py
@@ -283,13 +283,14 @@ class ModelTpServer:
                 (recv_req.image_hash >> 64) % self.model_config.vocab_size,
             ]
             req.image_size = recv_req.image_size
-            req.origin_input_ids, req.image_offset = (
-                self.model_runner.model.pad_input_ids(
-                    req.origin_input_ids_unpadded,
-                    req.pad_value,
-                    req.pixel_values.shape,
-                    req.image_size,
-                )
+            (
+                req.origin_input_ids,
+                req.image_offset,
+            ) = self.model_runner.model.pad_input_ids(
+                req.origin_input_ids_unpadded,
+                req.pad_value,
+                req.pixel_values.shape,
+                req.image_size,
             )
         req.sampling_params = recv_req.sampling_params
         req.return_logprob = recv_req.return_logprob
diff --git a/python/sglang/srt/managers/io_struct.py b/python/sglang/srt/managers/io_struct.py
index 20590bc24..e661edfaf 100644
--- a/python/sglang/srt/managers/io_struct.py
+++ b/python/sglang/srt/managers/io_struct.py
@@ -35,7 +35,6 @@ class GenerateReqInput:
     stream: bool = False
 
     def post_init(self):
-
         if (self.text is None and self.input_ids is None) or (
             self.text is not None and self.input_ids is not None
         ):
diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py
index 42f970370..0d137eb8a 100644
--- a/python/sglang/srt/managers/tokenizer_manager.py
+++ b/python/sglang/srt/managers/tokenizer_manager.py
@@ -334,15 +334,15 @@ class TokenizerManager:
                 ret["meta_info"]["decode_token_logprobs"], return_text_in_logprobs
             )
         if top_logprobs_num > 0:
-            ret["meta_info"]["prefill_top_logprobs"] = (
-                self.detokenize_top_logprobs_tokens(
-                    ret["meta_info"]["prefill_top_logprobs"], return_text_in_logprobs
-                )
+            ret["meta_info"][
+                "prefill_top_logprobs"
+            ] = self.detokenize_top_logprobs_tokens(
+                ret["meta_info"]["prefill_top_logprobs"], return_text_in_logprobs
             )
-            ret["meta_info"]["decode_top_logprobs"] = (
-                self.detokenize_top_logprobs_tokens(
-                    ret["meta_info"]["decode_top_logprobs"], return_text_in_logprobs
-                )
+            ret["meta_info"][
+                "decode_top_logprobs"
+            ] = self.detokenize_top_logprobs_tokens(
+                ret["meta_info"]["decode_top_logprobs"], return_text_in_logprobs
             )
         return ret
 
diff --git a/python/sglang/srt/models/chatglm.py b/python/sglang/srt/models/chatglm.py
index a15cc3d4c..e9ec3e2d2 100644
--- a/python/sglang/srt/models/chatglm.py
+++ b/python/sglang/srt/models/chatglm.py
@@ -36,7 +36,6 @@ LoraConfig = None
 
 
 class GLMAttention(nn.Module):
-
     def __init__(
         self,
         config,
@@ -294,7 +293,6 @@ class GLMTransformer(nn.Module):
 
 
 class ChatGLMModel(nn.Module):
-
     def __init__(
         self,
         config,
diff --git a/python/sglang/srt/models/grok.py b/python/sglang/srt/models/grok.py
index 9cae0b105..cbf29055c 100644
--- a/python/sglang/srt/models/grok.py
+++ b/python/sglang/srt/models/grok.py
@@ -521,7 +521,6 @@ class Grok1DecoderLayer(nn.Module):
         hidden_states: torch.Tensor,
         input_metadata: InputMetadata,
     ) -> torch.Tensor:
-
         hidden_states = (
             self.post_attn_norm(
                 self.self_attn(
diff --git a/python/sglang/srt/models/llama2.py b/python/sglang/srt/models/llama2.py
index 051036525..e60b036bd 100644
--- a/python/sglang/srt/models/llama2.py
+++ b/python/sglang/srt/models/llama2.py
@@ -160,9 +160,9 @@ class LlamaDecoderLayer(nn.Module):
         if rope_scaling is not None and getattr(
             config, "original_max_position_embeddings", None
         ):
-            rope_scaling["original_max_position_embeddings"] = (
-                config.original_max_position_embeddings
-            )
+            rope_scaling[
+                "original_max_position_embeddings"
+            ] = config.original_max_position_embeddings
         max_position_embeddings = getattr(config, "max_position_embeddings", 8192)
         self.self_attn = LlamaAttention(
             hidden_size=self.hidden_size,