chore: upgrade transformers 4.52.3 (#6575)
Co-authored-by: Mick <mickjagger19@icloud.com>
This commit is contained in:
@@ -41,7 +41,7 @@ runtime_common = [
|
||||
"soundfile==0.13.1",
|
||||
"scipy",
|
||||
"torchao==0.9.0",
|
||||
"transformers==4.51.1",
|
||||
"transformers==4.52.3",
|
||||
"uvicorn",
|
||||
"uvloop",
|
||||
"xgrammar==0.1.19",
|
||||
|
||||
@@ -7,11 +7,8 @@ import sentencepiece as spm
|
||||
from transformers import (
|
||||
TOKENIZER_MAPPING,
|
||||
LlamaConfig,
|
||||
Phi3Config,
|
||||
PretrainedConfig,
|
||||
PreTrainedTokenizer,
|
||||
PreTrainedTokenizerFast,
|
||||
Qwen2Config,
|
||||
)
|
||||
|
||||
from sglang.utils import logger
|
||||
@@ -302,24 +299,23 @@ class InternVLChatConfig(PretrainedConfig):
|
||||
)
|
||||
|
||||
if llm_config is None:
|
||||
# TODO: There might still be a bug in transformers version 4.44 and above.
|
||||
llm_config = {"architectures": [""]}
|
||||
llm_config = {"architectures": ["InternLM2ForCausalLM"]}
|
||||
logger.info(
|
||||
"llm_config is None. Initializing the LlamaConfig config with default values (`LlamaConfig`)."
|
||||
)
|
||||
|
||||
self.vision_config = InternVisionConfig(**vision_config)
|
||||
if llm_config["architectures"][0] == "LlamaForCausalLM":
|
||||
if llm_config.get("architectures")[0] == "LlamaForCausalLM":
|
||||
self.llm_config = LlamaConfig(**llm_config)
|
||||
elif llm_config["architectures"][0] == "InternLM2ForCausalLM":
|
||||
elif llm_config.get("architectures")[0] == "InternLM2ForCausalLM":
|
||||
self.llm_config = InternLM2Config(**llm_config)
|
||||
elif llm_config["architectures"][0] == "Phi3ForCausalLM":
|
||||
self.llm_config = Phi3Config(**llm_config)
|
||||
elif llm_config["architectures"][0] == "Qwen2ForCausalLM":
|
||||
self.llm_config = Qwen2Config(**llm_config)
|
||||
else:
|
||||
raise ValueError(
|
||||
"Unsupported architecture: {}".format(llm_config["architectures"][0])
|
||||
"Unsupported architecture: {}".format(
|
||||
llm_config.get("architectures")[0]
|
||||
)
|
||||
)
|
||||
|
||||
self.use_backbone_lora = use_backbone_lora
|
||||
self.use_llm_lora = use_llm_lora
|
||||
self.pad2square = pad2square
|
||||
|
||||
@@ -196,6 +196,21 @@ class ModelConfig:
|
||||
self.v_head_dim = self.hf_text_config.v_head_dim
|
||||
self.qk_nope_head_dim = self.hf_text_config.qk_nope_head_dim
|
||||
else:
|
||||
if (
|
||||
"MistralModel" in self.hf_config.architectures
|
||||
or "MixtralForCausalLM" in self.hf_config.architectures
|
||||
):
|
||||
if getattr(self, "head_dim", None) is None:
|
||||
self.head_dim = (
|
||||
self.hf_config.hidden_size // self.hf_config.num_attention_heads
|
||||
)
|
||||
# In transformers==4.52.3, the head_dim is null in MistralConfig
|
||||
if (
|
||||
not hasattr(self.hf_text_config, "head_dim")
|
||||
or self.hf_text_config.head_dim is None
|
||||
):
|
||||
setattr(self.hf_text_config, "head_dim", self.head_dim)
|
||||
|
||||
self.attention_arch = AttentionArch.MHA
|
||||
|
||||
self.num_attention_heads = self.hf_text_config.num_attention_heads
|
||||
|
||||
@@ -26,6 +26,7 @@ from transformers import (
|
||||
AutoModelForCausalLM,
|
||||
AutoModelForVision2Seq,
|
||||
AutoProcessor,
|
||||
GenerationConfig,
|
||||
)
|
||||
|
||||
from sglang.srt.entrypoints.engine import Engine
|
||||
@@ -382,13 +383,17 @@ class HFRunner:
|
||||
model = base_model
|
||||
|
||||
outputs = model.generate(
|
||||
input_ids,
|
||||
do_sample=False,
|
||||
temperature=None,
|
||||
top_p=None,
|
||||
max_new_tokens=max_new_tokens,
|
||||
return_dict_in_generate=True,
|
||||
output_scores=(not output_str_only),
|
||||
input_ids=input_ids,
|
||||
generation_config=GenerationConfig(
|
||||
do_sample=False,
|
||||
temperature=None,
|
||||
top_p=None,
|
||||
max_new_tokens=max_new_tokens,
|
||||
return_dict_in_generate=True,
|
||||
output_scores=(not output_str_only),
|
||||
# make sure to disable compile
|
||||
disable_compile=True,
|
||||
),
|
||||
)
|
||||
|
||||
text = tokenizer.decode(
|
||||
|
||||
Reference in New Issue
Block a user