From 94aead9e8d9340764a2ef92fe6e079ec2475fb0b Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 12 Jun 2024 13:17:35 -0700 Subject: [PATCH] Fix dependency (#538) --- python/pyproject.toml | 2 +- python/sglang/srt/constrained/jump_forward.py | 2 +- python/sglang/srt/models/chatglm.py | 5 +++-- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index 95ec66102..b4923f763 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ [project.optional-dependencies] srt = ["aiohttp", "fastapi", "psutil", "rpyc", "torch", "uvloop", "uvicorn", - "zmq", "vllm==0.4.3", "interegular", "pydantic", "pillow", "packaging", "huggingface_hub", "hf_transfer", "outlines>=0.0.34"] + "zmq", "vllm==0.4.3", "interegular", "pydantic", "pillow", "packaging", "huggingface_hub", "hf_transfer", "outlines>=0.0.41"] openai = ["openai>=1.0", "tiktoken"] anthropic = ["anthropic>=0.20.0"] litellm = ["litellm>=1.0.0"] diff --git a/python/sglang/srt/constrained/jump_forward.py b/python/sglang/srt/constrained/jump_forward.py index f71123cf2..39356a71a 100644 --- a/python/sglang/srt/constrained/jump_forward.py +++ b/python/sglang/srt/constrained/jump_forward.py @@ -3,10 +3,10 @@ Faster constrained decoding. Reference: https://lmsys.org/blog/2024-02-05-compressed-fsm/ """ -import interegular import dataclasses from collections import defaultdict +import interegular import outlines.caching from sglang.srt.constrained import ( FSMInfo, diff --git a/python/sglang/srt/models/chatglm.py b/python/sglang/srt/models/chatglm.py index 83c0ef750..415542dce 100644 --- a/python/sglang/srt/models/chatglm.py +++ b/python/sglang/srt/models/chatglm.py @@ -5,7 +5,6 @@ from typing import Iterable, List, Optional, Tuple import torch -from peft import LoraConfig from sglang.srt.layers.radix_attention import RadixAttention from sglang.srt.managers.controller.model_runner import InputMetadata from sglang.srt.layers.logits_processor import LogitsProcessor @@ -31,6 +30,8 @@ from vllm.sequence import SamplerOutput from vllm.transformers_utils.configs import ChatGLMConfig +LoraConfig = None + class GLMAttention(nn.Module): @@ -387,4 +388,4 @@ class ChatGLMForCausalLM(nn.Module): EntryClass = ChatGLMForCausalLM # compat: glm model.config class == ChatGLMModel -EntryClassRemapping = [("ChatGLMModel", ChatGLMForCausalLM)] \ No newline at end of file +EntryClassRemapping = [("ChatGLMModel", ChatGLMForCausalLM)]