Support Kimi K2 (#7940)

This commit is contained in:
Atream
2025-07-11 15:02:21 +08:00
committed by GitHub
parent 49a5915f53
commit 615553079d
7 changed files with 480 additions and 3 deletions

View File

@@ -25,6 +25,7 @@ from transformers import PretrainedConfig
from sglang.srt.hf_transformers_utils import (
get_config,
get_context_length,
get_generation_config,
get_hf_text_config,
)
from sglang.srt.layers.quantization import QUANTIZATION_METHODS
@@ -83,6 +84,13 @@ class ModelConfig:
**kwargs,
)
self.hf_generation_config = get_generation_config(
self.model_path,
trust_remote_code=trust_remote_code,
revision=revision,
**kwargs,
)
self.hf_text_config = get_hf_text_config(self.hf_config)
self.attention_chunk_size = getattr(
self.hf_text_config, "attention_chunk_size", None
@@ -467,6 +475,19 @@ class ModelConfig:
if eos_ids:
# it can be either int or list of int
eos_ids = {eos_ids} if isinstance(eos_ids, int) else set(eos_ids)
if eos_ids is None:
eos_ids = set()
if self.hf_generation_config:
generation_eos_ids = getattr(
self.hf_generation_config, "eos_token_id", None
)
if generation_eos_ids:
generation_eos_ids = (
{generation_eos_ids}
if isinstance(generation_eos_ids, int)
else set(generation_eos_ids)
)
eos_ids = eos_ids | generation_eos_ids
return eos_ids
def maybe_pull_model_tokenizer_from_remote(self) -> None: