Higher priority for user input of max_prefill_tokens & format (#540)

2024-06-12 21:48:40 -07:00
parent 1374334d38
commit fb9296f0ed
50 changed files with 817 additions and 569 deletions
--- a/python/sglang/srt/constrained/init.py
+++ b/python/sglang/srt/constrained/init.py
@@ -4,7 +4,7 @@ from typing import Dict, Optional, Union
 from outlines.caching import cache as disk_cache
 from outlines.caching import disable_cache
 from outlines.fsm.guide import RegexGuide
-from outlines.fsm.regex import FSMInfo, make_deterministic_fsm, make_byte_level_fsm
+from outlines.fsm.regex import FSMInfo, make_byte_level_fsm, make_deterministic_fsm
 from outlines.models.transformers import TransformerTokenizer
 from pydantic import BaseModel

--- a/python/sglang/srt/constrained/fsm_cache.py
+++ b/python/sglang/srt/constrained/fsm_cache.py
@@ -1,4 +1,5 @@
 """Cache for the compressed finite state machine."""
+
 from sglang.srt.constrained import RegexGuide, TransformerTokenizer
 from sglang.srt.constrained.base_cache import BaseCache

--- a/python/sglang/srt/constrained/jump_forward.py
+++ b/python/sglang/srt/constrained/jump_forward.py
@@ -8,11 +8,12 @@ from collections import defaultdict

 import interegular
 import outlines.caching
+
 from sglang.srt.constrained import (
    FSMInfo,
    disk_cache,
-    make_deterministic_fsm,
    make_byte_level_fsm,
+    make_deterministic_fsm,
 )
 from sglang.srt.constrained.base_cache import BaseCache