Fix dependency (#538)
This commit is contained in:
@@ -21,7 +21,7 @@ dependencies = [
|
||||
|
||||
[project.optional-dependencies]
|
||||
srt = ["aiohttp", "fastapi", "psutil", "rpyc", "torch", "uvloop", "uvicorn",
|
||||
"zmq", "vllm==0.4.3", "interegular", "pydantic", "pillow", "packaging", "huggingface_hub", "hf_transfer", "outlines>=0.0.34"]
|
||||
"zmq", "vllm==0.4.3", "interegular", "pydantic", "pillow", "packaging", "huggingface_hub", "hf_transfer", "outlines>=0.0.41"]
|
||||
openai = ["openai>=1.0", "tiktoken"]
|
||||
anthropic = ["anthropic>=0.20.0"]
|
||||
litellm = ["litellm>=1.0.0"]
|
||||
|
||||
@@ -3,10 +3,10 @@ Faster constrained decoding.
|
||||
Reference: https://lmsys.org/blog/2024-02-05-compressed-fsm/
|
||||
"""
|
||||
|
||||
import interegular
|
||||
import dataclasses
|
||||
from collections import defaultdict
|
||||
|
||||
import interegular
|
||||
import outlines.caching
|
||||
from sglang.srt.constrained import (
|
||||
FSMInfo,
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
from typing import Iterable, List, Optional, Tuple
|
||||
|
||||
import torch
|
||||
from peft import LoraConfig
|
||||
from sglang.srt.layers.radix_attention import RadixAttention
|
||||
from sglang.srt.managers.controller.model_runner import InputMetadata
|
||||
from sglang.srt.layers.logits_processor import LogitsProcessor
|
||||
@@ -31,6 +30,8 @@ from vllm.sequence import SamplerOutput
|
||||
from vllm.transformers_utils.configs import ChatGLMConfig
|
||||
|
||||
|
||||
LoraConfig = None
|
||||
|
||||
|
||||
class GLMAttention(nn.Module):
|
||||
|
||||
@@ -387,4 +388,4 @@ class ChatGLMForCausalLM(nn.Module):
|
||||
|
||||
EntryClass = ChatGLMForCausalLM
|
||||
# compat: glm model.config class == ChatGLMModel
|
||||
EntryClassRemapping = [("ChatGLMModel", ChatGLMForCausalLM)]
|
||||
EntryClassRemapping = [("ChatGLMModel", ChatGLMForCausalLM)]
|
||||
|
||||
Reference in New Issue
Block a user