Fix dependency (#538)
This commit is contained in:
@@ -21,7 +21,7 @@ dependencies = [
|
|||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
srt = ["aiohttp", "fastapi", "psutil", "rpyc", "torch", "uvloop", "uvicorn",
|
srt = ["aiohttp", "fastapi", "psutil", "rpyc", "torch", "uvloop", "uvicorn",
|
||||||
"zmq", "vllm==0.4.3", "interegular", "pydantic", "pillow", "packaging", "huggingface_hub", "hf_transfer", "outlines>=0.0.34"]
|
"zmq", "vllm==0.4.3", "interegular", "pydantic", "pillow", "packaging", "huggingface_hub", "hf_transfer", "outlines>=0.0.41"]
|
||||||
openai = ["openai>=1.0", "tiktoken"]
|
openai = ["openai>=1.0", "tiktoken"]
|
||||||
anthropic = ["anthropic>=0.20.0"]
|
anthropic = ["anthropic>=0.20.0"]
|
||||||
litellm = ["litellm>=1.0.0"]
|
litellm = ["litellm>=1.0.0"]
|
||||||
|
|||||||
@@ -3,10 +3,10 @@ Faster constrained decoding.
|
|||||||
Reference: https://lmsys.org/blog/2024-02-05-compressed-fsm/
|
Reference: https://lmsys.org/blog/2024-02-05-compressed-fsm/
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import interegular
|
|
||||||
import dataclasses
|
import dataclasses
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
|
|
||||||
|
import interegular
|
||||||
import outlines.caching
|
import outlines.caching
|
||||||
from sglang.srt.constrained import (
|
from sglang.srt.constrained import (
|
||||||
FSMInfo,
|
FSMInfo,
|
||||||
|
|||||||
@@ -5,7 +5,6 @@
|
|||||||
from typing import Iterable, List, Optional, Tuple
|
from typing import Iterable, List, Optional, Tuple
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from peft import LoraConfig
|
|
||||||
from sglang.srt.layers.radix_attention import RadixAttention
|
from sglang.srt.layers.radix_attention import RadixAttention
|
||||||
from sglang.srt.managers.controller.model_runner import InputMetadata
|
from sglang.srt.managers.controller.model_runner import InputMetadata
|
||||||
from sglang.srt.layers.logits_processor import LogitsProcessor
|
from sglang.srt.layers.logits_processor import LogitsProcessor
|
||||||
@@ -31,6 +30,8 @@ from vllm.sequence import SamplerOutput
|
|||||||
from vllm.transformers_utils.configs import ChatGLMConfig
|
from vllm.transformers_utils.configs import ChatGLMConfig
|
||||||
|
|
||||||
|
|
||||||
|
LoraConfig = None
|
||||||
|
|
||||||
|
|
||||||
class GLMAttention(nn.Module):
|
class GLMAttention(nn.Module):
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user