Support Kimi K2 (#7940)

2025-07-11 15:02:21 +08:00
parent 49a5915f53
commit 615553079d
7 changed files with 480 additions and 3 deletions
--- a/python/sglang/srt/hf_transformers_utils.py
+++ b/python/sglang/srt/hf_transformers_utils.py
@@ -14,6 +14,7 @@
 """Utilities for Huggingface Transformers."""

 import contextlib
+import logging
 import os
 import warnings
 from pathlib import Path
@@ -25,6 +26,7 @@ from transformers import (
    AutoConfig,
    AutoProcessor,
    AutoTokenizer,
+    GenerationConfig,
    PretrainedConfig,
    PreTrainedTokenizer,
    PreTrainedTokenizerBase,
@@ -153,6 +155,22 @@ def get_config(
    return config


+@lru_cache_frozenset(maxsize=32)
+def get_generation_config(
+    model: str,
+    trust_remote_code: bool,
+    revision: Optional[str] = None,
+    **kwargs,
+):
+    try:
+        return GenerationConfig.from_pretrained(
+            model, trust_remote_code=trust_remote_code, revision=revision, **kwargs
+        )
+    except OSError as e:
+        logging.info("model doesn't have generation_config.json")
+        return None
+
+
 # Models don't use the same configuration key for determining the maximum
 # context length.  Store them here so we can sanely check them.
 # NOTE: The ordering here is important. Some models have two of these and we