From 94aead9e8d9340764a2ef92fe6e079ec2475fb0b Mon Sep 17 00:00:00 2001
From: Lianmin Zheng <lianminzheng@gmail.com>
Date: Wed, 12 Jun 2024 13:17:35 -0700
Subject: [PATCH] Fix dependency (#538)

---
 python/pyproject.toml                         | 2 +-
 python/sglang/srt/constrained/jump_forward.py | 2 +-
 python/sglang/srt/models/chatglm.py           | 5 +++--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/python/pyproject.toml b/python/pyproject.toml
index 95ec66102..b4923f763 100644
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -21,7 +21,7 @@ dependencies = [
 
 [project.optional-dependencies]
 srt = ["aiohttp", "fastapi", "psutil", "rpyc", "torch", "uvloop", "uvicorn",
-       "zmq", "vllm==0.4.3", "interegular", "pydantic", "pillow", "packaging", "huggingface_hub", "hf_transfer", "outlines>=0.0.34"]
+       "zmq", "vllm==0.4.3", "interegular", "pydantic", "pillow", "packaging", "huggingface_hub", "hf_transfer", "outlines>=0.0.41"]
 openai = ["openai>=1.0", "tiktoken"]
 anthropic = ["anthropic>=0.20.0"]
 litellm = ["litellm>=1.0.0"]
diff --git a/python/sglang/srt/constrained/jump_forward.py b/python/sglang/srt/constrained/jump_forward.py
index f71123cf2..39356a71a 100644
--- a/python/sglang/srt/constrained/jump_forward.py
+++ b/python/sglang/srt/constrained/jump_forward.py
@@ -3,10 +3,10 @@ Faster constrained decoding.
 Reference: https://lmsys.org/blog/2024-02-05-compressed-fsm/
 """
 
-import interegular
 import dataclasses
 from collections import defaultdict
 
+import interegular
 import outlines.caching
 from sglang.srt.constrained import (
     FSMInfo,
diff --git a/python/sglang/srt/models/chatglm.py b/python/sglang/srt/models/chatglm.py
index 83c0ef750..415542dce 100644
--- a/python/sglang/srt/models/chatglm.py
+++ b/python/sglang/srt/models/chatglm.py
@@ -5,7 +5,6 @@
 from typing import Iterable, List, Optional, Tuple
 
 import torch
-from peft import LoraConfig
 from sglang.srt.layers.radix_attention import RadixAttention
 from sglang.srt.managers.controller.model_runner import InputMetadata
 from sglang.srt.layers.logits_processor import LogitsProcessor
@@ -31,6 +30,8 @@ from vllm.sequence import SamplerOutput
 from vllm.transformers_utils.configs import ChatGLMConfig
 
 
+LoraConfig = None
+
 
 class GLMAttention(nn.Module):
 
@@ -387,4 +388,4 @@ class ChatGLMForCausalLM(nn.Module):
 
 EntryClass = ChatGLMForCausalLM
 # compat: glm model.config class == ChatGLMModel
-EntryClassRemapping = [("ChatGLMModel", ChatGLMForCausalLM)]
\ No newline at end of file
+EntryClassRemapping = [("ChatGLMModel", ChatGLMForCausalLM)]