From 0498c437e5ca2686d14ac3ce0ff995f2575958f1 Mon Sep 17 00:00:00 2001 From: i-peixingyu Date: Tue, 19 May 2026 18:35:48 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=20K100-vLLM-Patched-v2.0/det?= =?UTF-8?q?ect=5Ftokenizer.py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- K100-vLLM-Patched-v2.0/detect_tokenizer.py | 25 ++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 K100-vLLM-Patched-v2.0/detect_tokenizer.py diff --git a/K100-vLLM-Patched-v2.0/detect_tokenizer.py b/K100-vLLM-Patched-v2.0/detect_tokenizer.py new file mode 100644 index 0000000..03396c2 --- /dev/null +++ b/K100-vLLM-Patched-v2.0/detect_tokenizer.py @@ -0,0 +1,25 @@ +import os +import json + +def detect(model_dir): + cfg_path = os.path.join(model_dir, "tokenizer_config.json") + + if os.path.exists(cfg_path): + with open(cfg_path) as f: + cfg = json.load(f) + cls = cfg.get("tokenizer_class", "") + else: + cls = "" + + files = os.listdir(model_dir) + + if "tokenizer.json" in files: + return "fast", cls + + if "tokenizer.model" in files: + return "sentencepiece", cls + + if "vocab.json" in files and "merges.txt" in files: + return "bpe", cls + + return "unknown", cls \ No newline at end of file