chore: bump transformer to 4.54.0 (#8416)
Co-authored-by: Binyao Jiang <byjiang1996@gmail.com> Co-authored-by: Lifu Huang <lifu.hlf@gmail.com>
This commit is contained in:
2
.github/workflows/vllm-dependency-test.yml
vendored
2
.github/workflows/vllm-dependency-test.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci_install_dependency.sh
|
||||
pip install "vllm==0.9.0.1"
|
||||
pip install "vllm==0.10.0"
|
||||
pip install "bitsandbytes>=0.44.0"
|
||||
|
||||
- name: Run VLLM dependency tests
|
||||
|
||||
@@ -45,7 +45,7 @@ runtime_common = [
|
||||
"soundfile==0.13.1",
|
||||
"scipy",
|
||||
"torchao==0.9.0",
|
||||
"transformers==4.53.2",
|
||||
"transformers==4.54.0",
|
||||
"timm==1.0.16",
|
||||
"uvicorn",
|
||||
"uvloop",
|
||||
|
||||
@@ -656,11 +656,15 @@ class LlavaForConditionalGeneration(LlavaBaseForCausalLM):
|
||||
self, auto_model_type: Type[AutoModel]
|
||||
) -> Dict[str, str]:
|
||||
mapping = {}
|
||||
for config_cls, archs in auto_model_type._model_mapping.items():
|
||||
if isinstance(archs, tuple):
|
||||
mapping[config_cls.__name__] = tuple(arch.__name__ for arch in archs)
|
||||
else:
|
||||
mapping[config_cls.__name__] = archs.__name__
|
||||
for config_cls in auto_model_type._model_mapping.keys():
|
||||
archs = auto_model_type._model_mapping.get(config_cls, None)
|
||||
if archs is not None:
|
||||
if isinstance(archs, tuple):
|
||||
mapping[config_cls.__name__] = tuple(
|
||||
arch.__name__ for arch in archs
|
||||
)
|
||||
else:
|
||||
mapping[config_cls.__name__] = archs.__name__
|
||||
return mapping
|
||||
|
||||
def __init__(
|
||||
|
||||
@@ -1134,7 +1134,10 @@ class MiniCPMWhisperEncoderLayer(nn.Module):
|
||||
"""
|
||||
residual = hidden_states
|
||||
hidden_states = self.self_attn_layer_norm(hidden_states)
|
||||
hidden_states, attn_weights, past_key_values = self.self_attn(
|
||||
# TODO (lifuhuang): confirmed with Mick that the logic for past_key_values is copied from minicpmo official code,
|
||||
# currently we are not using past_key_values at all. We need to redesign the caching logic when we support streaming
|
||||
# in the future.
|
||||
hidden_states, attn_weights = self.self_attn(
|
||||
hidden_states=hidden_states,
|
||||
attention_mask=attention_mask,
|
||||
layer_head_mask=layer_head_mask,
|
||||
|
||||
@@ -51,7 +51,8 @@ class ModelCase:
|
||||
# Popular models that run on the CI
|
||||
CI_MODELS = [
|
||||
ModelCase("meta-llama/Llama-3.1-8B-Instruct"),
|
||||
ModelCase("google/gemma-2-2b"),
|
||||
# TODO: Gemma is broken by the bug introduced in the latest transformers version, we should restore once its fixed: https://github.com/huggingface/transformers/issues/39711
|
||||
# ModelCase("google/gemma-2-2b"),
|
||||
]
|
||||
|
||||
# the complete set of models to test sglang's generation model
|
||||
|
||||
@@ -172,28 +172,29 @@ class TestGemma3nServer(TestOpenAIVisionServer):
|
||||
cls.base_url += "/v1"
|
||||
|
||||
|
||||
class TestKimiVLServer(TestOpenAIVisionServer):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = "moonshotai/Kimi-VL-A3B-Instruct"
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
cls.api_key = "sk-123456"
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=[
|
||||
"--trust-remote-code",
|
||||
"--context-length",
|
||||
"4096",
|
||||
"--dtype",
|
||||
"bfloat16",
|
||||
],
|
||||
)
|
||||
cls.base_url += "/v1"
|
||||
# commented out before https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct/discussions/27 get fixed
|
||||
# class TestKimiVLServer(TestOpenAIVisionServer):
|
||||
# @classmethod
|
||||
# def setUpClass(cls):
|
||||
# cls.model = "moonshotai/Kimi-VL-A3B-Instruct"
|
||||
# cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
# cls.api_key = "sk-123456"
|
||||
# cls.process = popen_launch_server(
|
||||
# cls.model,
|
||||
# cls.base_url,
|
||||
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
# other_args=[
|
||||
# "--trust-remote-code",
|
||||
# "--context-length",
|
||||
# "4096",
|
||||
# "--dtype",
|
||||
# "bfloat16",
|
||||
# ],
|
||||
# )
|
||||
# cls.base_url += "/v1"
|
||||
|
||||
def test_video_images_chat_completion(self):
|
||||
pass
|
||||
# def test_video_images_chat_completion(self):
|
||||
# pass
|
||||
|
||||
|
||||
class TestPhi4MMServer(TestOpenAIVisionServer):
|
||||
|
||||
@@ -189,31 +189,32 @@ class TestGemmaUnderstandsImage(VLMInputTestBase, unittest.IsolatedAsyncioTestCa
|
||||
)
|
||||
|
||||
|
||||
class TestKimiVLImageUnderstandsImage(
|
||||
VLMInputTestBase, unittest.IsolatedAsyncioTestCase
|
||||
):
|
||||
model_path = "moonshotai/Kimi-VL-A3B-Instruct"
|
||||
chat_template = "kimi-vl"
|
||||
# commented out before https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct/discussions/27 get fixed
|
||||
# class TestKimiVLImageUnderstandsImage(
|
||||
# VLMInputTestBase, unittest.IsolatedAsyncioTestCase
|
||||
# ):
|
||||
# model_path = "moonshotai/Kimi-VL-A3B-Instruct"
|
||||
# chat_template = "kimi-vl"
|
||||
|
||||
@classmethod
|
||||
def _init_visual(cls):
|
||||
model = AutoModel.from_pretrained(cls.model_path, trust_remote_code=True)
|
||||
cls.vision_tower = model.vision_tower.eval().to(cls.device)
|
||||
cls.mm_projector = model.multi_modal_projector.eval().to(cls.device)
|
||||
# @classmethod
|
||||
# def _init_visual(cls):
|
||||
# model = AutoModel.from_pretrained(cls.model_path, trust_remote_code=True)
|
||||
# cls.vision_tower = model.vision_tower.eval().to(cls.device)
|
||||
# cls.mm_projector = model.multi_modal_projector.eval().to(cls.device)
|
||||
|
||||
cls.visual = lambda tokenizer_output: cls.mm_projector(
|
||||
cls.vision_tower(
|
||||
pixel_values=tokenizer_output["pixel_values"],
|
||||
grid_hws=tokenizer_output["image_grid_hws"],
|
||||
)
|
||||
)
|
||||
# cls.visual = lambda tokenizer_output: cls.mm_projector(
|
||||
# cls.vision_tower(
|
||||
# pixel_values=tokenizer_output["pixel_values"],
|
||||
# grid_hws=tokenizer_output["image_grid_hws"],
|
||||
# )
|
||||
# )
|
||||
|
||||
def _pixel_values_image_data(self, processor_output):
|
||||
return dict(
|
||||
modality="IMAGE",
|
||||
pixel_values=processor_output["pixel_values"],
|
||||
image_grid_hws=processor_output["image_grid_hws"],
|
||||
)
|
||||
# def _pixel_values_image_data(self, processor_output):
|
||||
# return dict(
|
||||
# modality="IMAGE",
|
||||
# pixel_values=processor_output["pixel_values"],
|
||||
# image_grid_hws=processor_output["image_grid_hws"],
|
||||
# )
|
||||
|
||||
|
||||
# not for CI: too large
|
||||
|
||||
Reference in New Issue
Block a user