Fix grammar backend for tensor parallelism (#2020)

2024-11-13 01:49:45 -08:00
parent ba069a24d3
commit 54479d6f30
7 changed files with 250 additions and 328 deletions
--- a/python/sglang/srt/managers/schedule_batch.py
+++ b/python/sglang/srt/managers/schedule_batch.py
@@ -37,6 +37,7 @@ import torch

 from sglang.global_config import global_config
 from sglang.srt.configs.model_config import ModelConfig
+from sglang.srt.constrained.base_grammar_backend import BaseGrammarObject
 from sglang.srt.mem_cache.base_prefix_cache import BasePrefixCache
 from sglang.srt.mem_cache.chunk_cache import ChunkCache
 from sglang.srt.mem_cache.memory_pool import BaseTokenToKVPool, ReqToTokenPool
@@ -248,7 +249,7 @@ class Req:
        self.embedding = None

        # Constrained decoding
-        self.grammar = None
+        self.grammar: Optional[BaseGrammarObject] = None

        # The number of cached tokens, that were already cached in the KV cache
        self.cached_tokens = 0