chore: upgrade transformers 4.52.3 (#6575)

Co-authored-by: Mick <mickjagger19@icloud.com>
This commit is contained in:
Yineng Zhang
2025-05-25 22:49:58 -07:00
committed by GitHub
parent 84147254c9
commit 7eb9d8e594
5 changed files with 152 additions and 125 deletions

View File

@@ -41,7 +41,7 @@ runtime_common = [
"soundfile==0.13.1",
"scipy",
"torchao==0.9.0",
"transformers==4.51.1",
"transformers==4.52.3",
"uvicorn",
"uvloop",
"xgrammar==0.1.19",

View File

@@ -7,11 +7,8 @@ import sentencepiece as spm
from transformers import (
TOKENIZER_MAPPING,
LlamaConfig,
Phi3Config,
PretrainedConfig,
PreTrainedTokenizer,
PreTrainedTokenizerFast,
Qwen2Config,
)
from sglang.utils import logger
@@ -302,24 +299,23 @@ class InternVLChatConfig(PretrainedConfig):
)
if llm_config is None:
# TODO: There might still be a bug in transformers version 4.44 and above.
llm_config = {"architectures": [""]}
llm_config = {"architectures": ["InternLM2ForCausalLM"]}
logger.info(
"llm_config is None. Initializing the LlamaConfig config with default values (`LlamaConfig`)."
)
self.vision_config = InternVisionConfig(**vision_config)
if llm_config["architectures"][0] == "LlamaForCausalLM":
if llm_config.get("architectures")[0] == "LlamaForCausalLM":
self.llm_config = LlamaConfig(**llm_config)
elif llm_config["architectures"][0] == "InternLM2ForCausalLM":
elif llm_config.get("architectures")[0] == "InternLM2ForCausalLM":
self.llm_config = InternLM2Config(**llm_config)
elif llm_config["architectures"][0] == "Phi3ForCausalLM":
self.llm_config = Phi3Config(**llm_config)
elif llm_config["architectures"][0] == "Qwen2ForCausalLM":
self.llm_config = Qwen2Config(**llm_config)
else:
raise ValueError(
"Unsupported architecture: {}".format(llm_config["architectures"][0])
"Unsupported architecture: {}".format(
llm_config.get("architectures")[0]
)
)
self.use_backbone_lora = use_backbone_lora
self.use_llm_lora = use_llm_lora
self.pad2square = pad2square

View File

@@ -196,6 +196,21 @@ class ModelConfig:
self.v_head_dim = self.hf_text_config.v_head_dim
self.qk_nope_head_dim = self.hf_text_config.qk_nope_head_dim
else:
if (
"MistralModel" in self.hf_config.architectures
or "MixtralForCausalLM" in self.hf_config.architectures
):
if getattr(self, "head_dim", None) is None:
self.head_dim = (
self.hf_config.hidden_size // self.hf_config.num_attention_heads
)
# In transformers==4.52.3, the head_dim is null in MistralConfig
if (
not hasattr(self.hf_text_config, "head_dim")
or self.hf_text_config.head_dim is None
):
setattr(self.hf_text_config, "head_dim", self.head_dim)
self.attention_arch = AttentionArch.MHA
self.num_attention_heads = self.hf_text_config.num_attention_heads

View File

@@ -26,6 +26,7 @@ from transformers import (
AutoModelForCausalLM,
AutoModelForVision2Seq,
AutoProcessor,
GenerationConfig,
)
from sglang.srt.entrypoints.engine import Engine
@@ -382,13 +383,17 @@ class HFRunner:
model = base_model
outputs = model.generate(
input_ids,
do_sample=False,
temperature=None,
top_p=None,
max_new_tokens=max_new_tokens,
return_dict_in_generate=True,
output_scores=(not output_str_only),
input_ids=input_ids,
generation_config=GenerationConfig(
do_sample=False,
temperature=None,
top_p=None,
max_new_tokens=max_new_tokens,
return_dict_in_generate=True,
output_scores=(not output_str_only),
# make sure to disable compile
disable_compile=True,
),
)
text = tokenizer.decode(