Fix dependency (#538)

This commit is contained in:
Lianmin Zheng
2024-06-12 13:17:35 -07:00
committed by GitHub
parent 9c902b1954
commit 94aead9e8d
3 changed files with 5 additions and 4 deletions

View File

@@ -3,10 +3,10 @@ Faster constrained decoding.
Reference: https://lmsys.org/blog/2024-02-05-compressed-fsm/
"""
import interegular
import dataclasses
from collections import defaultdict
import interegular
import outlines.caching
from sglang.srt.constrained import (
FSMInfo,

View File

@@ -5,7 +5,6 @@
from typing import Iterable, List, Optional, Tuple
import torch
from peft import LoraConfig
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.managers.controller.model_runner import InputMetadata
from sglang.srt.layers.logits_processor import LogitsProcessor
@@ -31,6 +30,8 @@ from vllm.sequence import SamplerOutput
from vllm.transformers_utils.configs import ChatGLMConfig
LoraConfig = None
class GLMAttention(nn.Module):
@@ -387,4 +388,4 @@ class ChatGLMForCausalLM(nn.Module):
EntryClass = ChatGLMForCausalLM
# compat: glm model.config class == ChatGLMModel
EntryClassRemapping = [("ChatGLMModel", ChatGLMForCausalLM)]
EntryClassRemapping = [("ChatGLMModel", ChatGLMForCausalLM)]