From 0498c437e5ca2686d14ac3ce0ff995f2575958f1 Mon Sep 17 00:00:00 2001
From: i-peixingyu <i-peixingyu@4paradigm.com>
Date: Tue, 19 May 2026 18:35:48 +0800
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=20K100-vLLM-Patched-v2.0/det?=
 =?UTF-8?q?ect=5Ftokenizer.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 K100-vLLM-Patched-v2.0/detect_tokenizer.py | 25 ++++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 K100-vLLM-Patched-v2.0/detect_tokenizer.py

diff --git a/K100-vLLM-Patched-v2.0/detect_tokenizer.py b/K100-vLLM-Patched-v2.0/detect_tokenizer.py
new file mode 100644
index 0000000..03396c2
--- /dev/null
+++ b/K100-vLLM-Patched-v2.0/detect_tokenizer.py
@@ -0,0 +1,25 @@
+import os 
+import json
+
+def detect(model_dir):
+    cfg_path = os.path.join(model_dir, "tokenizer_config.json")
+
+    if os.path.exists(cfg_path):
+        with open(cfg_path) as f:
+            cfg = json.load(f)
+        cls = cfg.get("tokenizer_class", "")
+    else:
+        cls = ""
+
+    files = os.listdir(model_dir)
+
+    if "tokenizer.json" in files:
+        return "fast", cls
+
+    if "tokenizer.model" in files:
+        return "sentencepiece", cls
+
+    if "vocab.json" in files and "merges.txt" in files:
+        return "bpe", cls
+
+    return "unknown", cls
\ No newline at end of file