diff --git a/examples/quick_start/srt_example_llava.py b/examples/quick_start/srt_example_llava.py index b6d0907f5..a781bede3 100644 --- a/examples/quick_start/srt_example_llava.py +++ b/examples/quick_start/srt_example_llava.py @@ -7,10 +7,8 @@ def image_qa(s, image_path, question): s += sgl.assistant(sgl.gen("answer")) -# runtime = sgl.Runtime(model_path="liuhaotian/llava-v1.5-7b", -# tokenizer_path="llava-hf/llava-1.5-7b-hf") -runtime = sgl.Runtime(model_path="llava-internal/llava-v1.6-7b-hd-224px_3x2-preview-20230103", - tokenizer_path="llava-internal/llava-v1.6-7b-hd-224px_3x2-preview-20230103-tokenizer") +runtime = sgl.Runtime(model_path="liuhaotian/llava-v1.5-7b", + tokenizer_path="llava-hf/llava-1.5-7b-hf") sgl.set_default_backend(runtime) diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index bab2fc158..af27f6460 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -1,6 +1,7 @@ import asyncio import concurrent.futures import dataclasses +import multiprocessing as mp import os from typing import List @@ -101,7 +102,9 @@ class TokenizerManager: self.tokenizer = self.processor.tokenizer os.environ["TOKENIZERS_PARALLELISM"] = "false" self.executor = concurrent.futures.ProcessPoolExecutor( - initializer=init_global_processor, initargs=(server_args,) + initializer=init_global_processor, + mp_context=mp.get_context("fork"), + initargs=(server_args,), ) else: self.tokenizer = get_tokenizer( diff --git a/python/sglang/srt/models/llava.py b/python/sglang/srt/models/llava.py index 97a26322d..72107e0eb 100644 --- a/python/sglang/srt/models/llava.py +++ b/python/sglang/srt/models/llava.py @@ -34,9 +34,10 @@ class LlavaLlamaForCausalLM(nn.Module): self.config.text_config.hidden_size = config.hidden_size self.multi_modal_projector = LlavaMultiModalProjector(config) self.language_model = LlamaForCausalLM(config, linear_method) - if "unpad" in getattr(config, "mm_patch_merge_type"): + if "unpad" in getattr(config, "mm_patch_merge_type", ""): self.language_model.model.image_newline = nn.Parameter( - torch.empty(config.text_config.hidden_size, dtype=torch.float16)) + torch.empty(config.text_config.hidden_size, dtype=torch.float16) + ) def pad_input_ids(self, input_ids, pad_value, pt_shape=None, image_size=None): new_image_feature_len = self.image_feature_len diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index ce47b541d..0c9d4da21 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -425,8 +425,6 @@ class Runtime: random_seed=random_seed, log_level=log_level, ) - import torch - torch.multiprocessing.set_start_method("spawn", force=True) self.url = self.server_args.url() self.generate_url = (