Fix grammar backend for tensor parallelism (#2020)

This commit is contained in:
Lianmin Zheng
2024-11-13 01:49:45 -08:00
committed by GitHub
parent ba069a24d3
commit 54479d6f30
7 changed files with 250 additions and 328 deletions

View File

@@ -37,6 +37,7 @@ import torch
from sglang.global_config import global_config
from sglang.srt.configs.model_config import ModelConfig
from sglang.srt.constrained.base_grammar_backend import BaseGrammarObject
from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
from sglang.srt.mem_cache.chunk_cache import ChunkCache
from sglang.srt.mem_cache.memory_pool import BaseTokenToKVPool, ReqToTokenPool
@@ -248,7 +249,7 @@ class Req:
self.embedding = None
# Constrained decoding
self.grammar = None
self.grammar: Optional[BaseGrammarObject] = None
# The number of cached tokens, that were already cached in the KV cache
self.cached_tokens = 0