From 8b6a4486ecbf83c915c6a9d3c727d188a22f455e Mon Sep 17 00:00:00 2001 From: giorgiopiatti-dfinity Date: Sun, 19 Jan 2025 20:36:07 +0100 Subject: [PATCH] fix missing revision arg when loading tokenizer (#2982) --- python/sglang/srt/managers/detokenizer_manager.py | 1 + python/sglang/srt/managers/scheduler.py | 2 ++ python/sglang/srt/managers/tokenizer_manager.py | 2 ++ python/sglang/srt/managers/tp_worker.py | 2 ++ python/sglang/srt/server.py | 1 + 5 files changed, 8 insertions(+) diff --git a/python/sglang/srt/managers/detokenizer_manager.py b/python/sglang/srt/managers/detokenizer_manager.py index f0605ee1f..a8dc14f01 100644 --- a/python/sglang/srt/managers/detokenizer_manager.py +++ b/python/sglang/srt/managers/detokenizer_manager.py @@ -71,6 +71,7 @@ class DetokenizerManager: server_args.tokenizer_path, tokenizer_mode=server_args.tokenizer_mode, trust_remote_code=server_args.trust_remote_code, + revision=server_args.revision, ) self.decode_status = LimitedCapacityDict() diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index d859a30a0..5df9c24ce 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -206,6 +206,7 @@ class Scheduler: server_args.tokenizer_path, tokenizer_mode=server_args.tokenizer_mode, trust_remote_code=server_args.trust_remote_code, + revision=server_args.revision, ) self.tokenizer = self.processor.tokenizer else: @@ -213,6 +214,7 @@ class Scheduler: server_args.tokenizer_path, tokenizer_mode=server_args.tokenizer_mode, trust_remote_code=server_args.trust_remote_code, + revision=server_args.revision, ) # Check whether overlap can be enabled diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index 033a660df..9cf6d9cc5 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -158,6 +158,7 @@ class TokenizerManager: server_args.tokenizer_path, tokenizer_mode=server_args.tokenizer_mode, trust_remote_code=server_args.trust_remote_code, + revision=server_args.revision, ) self.tokenizer = self.processor.tokenizer os.environ["TOKENIZERS_PARALLELISM"] = "false" @@ -171,6 +172,7 @@ class TokenizerManager: server_args.tokenizer_path, tokenizer_mode=server_args.tokenizer_mode, trust_remote_code=server_args.trust_remote_code, + revision=server_args.revision, ) # Store states diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py index 47e3eea40..fd4dbae99 100644 --- a/python/sglang/srt/managers/tp_worker.py +++ b/python/sglang/srt/managers/tp_worker.py @@ -83,6 +83,7 @@ class TpModelWorker: server_args.tokenizer_path, tokenizer_mode=server_args.tokenizer_mode, trust_remote_code=server_args.trust_remote_code, + revision=server_args.revision, ) self.tokenizer = self.processor.tokenizer else: @@ -90,6 +91,7 @@ class TpModelWorker: server_args.tokenizer_path, tokenizer_mode=server_args.tokenizer_mode, trust_remote_code=server_args.trust_remote_code, + revision=server_args.revision, ) self.device = self.model_runner.device diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index b3526520c..a2c1cb375 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -1027,6 +1027,7 @@ class Runtime: self.server_args.tokenizer_path, tokenizer_mode=self.server_args.tokenizer_mode, trust_remote_code=self.server_args.trust_remote_code, + revision=self.server_args.revision, ) async def async_generate(