Fix dependency (#538)

This commit is contained in:
Lianmin Zheng
2024-06-12 13:17:35 -07:00
committed by GitHub
parent 9c902b1954
commit 94aead9e8d
3 changed files with 5 additions and 4 deletions

View File

@@ -21,7 +21,7 @@ dependencies = [
[project.optional-dependencies]
srt = ["aiohttp", "fastapi", "psutil", "rpyc", "torch", "uvloop", "uvicorn",
"zmq", "vllm==0.4.3", "interegular", "pydantic", "pillow", "packaging", "huggingface_hub", "hf_transfer", "outlines>=0.0.34"]
"zmq", "vllm==0.4.3", "interegular", "pydantic", "pillow", "packaging", "huggingface_hub", "hf_transfer", "outlines>=0.0.41"]
openai = ["openai>=1.0", "tiktoken"]
anthropic = ["anthropic>=0.20.0"]
litellm = ["litellm>=1.0.0"]

View File

@@ -3,10 +3,10 @@ Faster constrained decoding.
Reference: https://lmsys.org/blog/2024-02-05-compressed-fsm/
"""
import interegular
import dataclasses
from collections import defaultdict
import interegular
import outlines.caching
from sglang.srt.constrained import (
FSMInfo,

View File

@@ -5,7 +5,6 @@
from typing import Iterable, List, Optional, Tuple
import torch
from peft import LoraConfig
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.managers.controller.model_runner import InputMetadata
from sglang.srt.layers.logits_processor import LogitsProcessor
@@ -31,6 +30,8 @@ from vllm.sequence import SamplerOutput
from vllm.transformers_utils.configs import ChatGLMConfig
LoraConfig = None
class GLMAttention(nn.Module):
@@ -387,4 +388,4 @@ class ChatGLMForCausalLM(nn.Module):
EntryClass = ChatGLMForCausalLM
# compat: glm model.config class == ChatGLMModel
EntryClassRemapping = [("ChatGLMModel", ChatGLMForCausalLM)]
EntryClassRemapping = [("ChatGLMModel", ChatGLMForCausalLM)]