Fix grammar backend for tensor parallelism (#2020)
This commit is contained in:
@@ -37,6 +37,7 @@ import torch
|
||||
|
||||
from sglang.global_config import global_config
|
||||
from sglang.srt.configs.model_config import ModelConfig
|
||||
from sglang.srt.constrained.base_grammar_backend import BaseGrammarObject
|
||||
from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
|
||||
from sglang.srt.mem_cache.chunk_cache import ChunkCache
|
||||
from sglang.srt.mem_cache.memory_pool import BaseTokenToKVPool, ReqToTokenPool
|
||||
@@ -248,7 +249,7 @@ class Req:
|
||||
self.embedding = None
|
||||
|
||||
# Constrained decoding
|
||||
self.grammar = None
|
||||
self.grammar: Optional[BaseGrammarObject] = None
|
||||
|
||||
# The number of cached tokens, that were already cached in the KV cache
|
||||
self.cached_tokens = 0
|
||||
|
||||
Reference in New Issue
Block a user