Increase the number of thread limitation for tp worker managers. (#567)

This commit is contained in:
Lianmin Zheng
2024-06-26 09:33:45 -07:00
committed by GitHub
parent a385ee27bd
commit 2e6e62e156
9 changed files with 148 additions and 84 deletions

View File

@@ -8,7 +8,8 @@ class FSMCache(BaseCache):
def __init__(self, tokenizer_path, tokenizer_args_dict, enable=True):
super().__init__(enable=enable)
if tokenizer_path.endswith(".json"):
if tokenizer_path.endswith(".json") or tokenizer_path.endswith(".model"):
# Do not support TiktokenTokenizer or SentencePieceTokenizer
return
from importlib.metadata import version