example: add vlm to token in & out example (#3941)
Co-authored-by: zhaochenyang20 <zhaochen20@outlook.com>
This commit is contained in:
@@ -40,7 +40,7 @@ class ModelConfig:
|
||||
trust_remote_code: bool = True,
|
||||
revision: Optional[str] = None,
|
||||
context_length: Optional[int] = None,
|
||||
model_override_args: Optional[dict] = None,
|
||||
model_override_args: Optional[str] = None,
|
||||
is_embedding: Optional[bool] = None,
|
||||
dtype: str = "auto",
|
||||
quantization: Optional[str] = None,
|
||||
|
||||
@@ -42,7 +42,6 @@ from sglang.srt.layers.logits_processor import LogitsProcessorOutput
|
||||
from sglang.srt.managers.io_struct import (
|
||||
AbortReq,
|
||||
BatchEmbeddingOut,
|
||||
BatchMultimodalDecodeReq,
|
||||
BatchTokenIDOut,
|
||||
CloseSessionReqInput,
|
||||
FlushCacheReq,
|
||||
@@ -104,7 +103,6 @@ from sglang.srt.utils import (
|
||||
crash_on_warnings,
|
||||
get_bool_env_var,
|
||||
get_zmq_socket,
|
||||
kill_itself_when_parent_died,
|
||||
pyspy_dump_schedulers,
|
||||
set_gpu_proc_affinity,
|
||||
set_random_seed,
|
||||
@@ -1199,7 +1197,6 @@ class Scheduler:
|
||||
self.spec_num_total_forward_ct += batch.batch_size()
|
||||
self.num_generated_tokens += num_accepted_tokens
|
||||
batch.output_ids = next_token_ids
|
||||
|
||||
# These 2 values are needed for processing the output, but the values can be
|
||||
# modified by overlap schedule. So we have to copy them here so that
|
||||
# we can use the correct values in output processing.
|
||||
@@ -1480,7 +1477,6 @@ class Scheduler:
|
||||
batch.next_batch_sampling_info.update_regex_vocab_mask()
|
||||
self.current_stream.synchronize()
|
||||
batch.next_batch_sampling_info.sampling_info_done.set()
|
||||
|
||||
self.stream_output(batch.reqs, batch.return_logprob)
|
||||
|
||||
self.token_to_kv_pool.free_group_end()
|
||||
@@ -1580,11 +1576,11 @@ class Scheduler:
|
||||
if req.top_logprobs_num > 0:
|
||||
req.input_top_logprobs_val = [None]
|
||||
req.input_top_logprobs_idx = [None]
|
||||
|
||||
assert len(req.temp_input_token_ids_logprobs_val) == len(
|
||||
req.temp_input_token_ids_logprobs_idx
|
||||
)
|
||||
for val, idx in zip(
|
||||
req.temp_input_top_logprobs_val,
|
||||
req.temp_input_top_logprobs_idx,
|
||||
strict=True,
|
||||
req.temp_input_top_logprobs_val, req.temp_input_top_logprobs_idx
|
||||
):
|
||||
req.input_top_logprobs_val.extend(val)
|
||||
req.input_top_logprobs_idx.extend(idx)
|
||||
@@ -1779,7 +1775,6 @@ class Scheduler:
|
||||
if rids:
|
||||
if self.model_config.is_multimodal_gen:
|
||||
raise NotImplementedError()
|
||||
|
||||
self.send_to_detokenizer.send_pyobj(
|
||||
BatchTokenIDOut(
|
||||
rids,
|
||||
|
||||
@@ -11,7 +11,7 @@ import math
|
||||
import os
|
||||
from abc import ABC, abstractmethod
|
||||
from contextlib import contextmanager
|
||||
from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple, Type, cast
|
||||
from typing import Any, Dict, Generator, Iterable, List, Optional, Tuple, cast
|
||||
|
||||
import gguf
|
||||
import huggingface_hub
|
||||
@@ -19,7 +19,7 @@ import numpy as np
|
||||
import torch
|
||||
from huggingface_hub import HfApi, hf_hub_download
|
||||
from torch import nn
|
||||
from transformers import AutoModelForCausalLM, PretrainedConfig
|
||||
from transformers import AutoModelForCausalLM
|
||||
from transformers.utils import SAFE_WEIGHTS_INDEX_NAME
|
||||
|
||||
from sglang.srt.configs.device_config import DeviceConfig
|
||||
@@ -197,7 +197,7 @@ class DefaultModelLoader(BaseModelLoader):
|
||||
|
||||
Returns the path to the downloaded model, or None if the model is not
|
||||
downloaded from ModelScope."""
|
||||
if "SGLANG_USE_MODELSCOPE" in os.environ:
|
||||
if os.environ.get("SGLANG_USE_MODELSCOPE", None) == "True":
|
||||
# download model from ModelScope hub,
|
||||
# lazy import so that modelscope is not required for normal use.
|
||||
# pylint: disable=C.
|
||||
|
||||
@@ -43,10 +43,15 @@ DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP1 = "neuralmagic/Meta-Llama-3.1-8B-Ins
|
||||
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 = "neuralmagic/Meta-Llama-3.1-70B-Instruct-FP8,neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8,neuralmagic/Qwen2-72B-Instruct-FP8,neuralmagic/Qwen2-57B-A14B-Instruct-FP8,neuralmagic/DeepSeek-Coder-V2-Lite-Instruct-FP8"
|
||||
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1 = "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4,hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
|
||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN = "Qwen/Qwen2.5-1.5B-Instruct"
|
||||
DEFAULT_SMALL_VLM_MODEL_NAME = "Qwen/Qwen2-VL-2B"
|
||||
|
||||
|
||||
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST = "meta-llama/Llama-2-7b-chat-hf"
|
||||
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST = "lmsys/sglang-EAGLE-llama2-chat-7B"
|
||||
|
||||
DEFAULT_IMAGE_URL = "https://github.com/sgl-project/sglang/blob/main/test/lang/example_image.png?raw=true"
|
||||
DEFAULT_VIDEO_URL = "https://raw.githubusercontent.com/EvolvingLMMs-Lab/sglang/dev/onevision_local/assets/jobs.mp4"
|
||||
|
||||
|
||||
def is_in_ci():
|
||||
"""Return whether it is in CI runner."""
|
||||
|
||||
Reference in New Issue
Block a user