From c701ca89d3a718fa8dbbb8370182bb1f513cbde6 Mon Sep 17 00:00:00 2001
From: i-peixingyu <i-peixingyu@4paradigm.com>
Date: Mon, 27 Apr 2026 13:12:10 +0800
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=20NV=20A100=20Patched=20?=
 =?UTF-8?q?=E9=95=9C=E5=83=8F=E5=90=88=E5=B9=B6/detect=5Ftokenizer.py?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 NV A100 Patched 镜像合并/detect_tokenizer.py | 25 ++++++++++++++++++++
 1 file changed, 25 insertions(+)
 create mode 100644 NV A100 Patched 镜像合并/detect_tokenizer.py

diff --git a/NV A100 Patched 镜像合并/detect_tokenizer.py b/NV A100 Patched 镜像合并/detect_tokenizer.py
new file mode 100644
index 0000000..03396c2
--- /dev/null
+++ b/NV A100 Patched 镜像合并/detect_tokenizer.py	
@@ -0,0 +1,25 @@
+import os 
+import json
+
+def detect(model_dir):
+    cfg_path = os.path.join(model_dir, "tokenizer_config.json")
+
+    if os.path.exists(cfg_path):
+        with open(cfg_path) as f:
+            cfg = json.load(f)
+        cls = cfg.get("tokenizer_class", "")
+    else:
+        cls = ""
+
+    files = os.listdir(model_dir)
+
+    if "tokenizer.json" in files:
+        return "fast", cls
+
+    if "tokenizer.model" in files:
+        return "sentencepiece", cls
+
+    if "vocab.json" in files and "merges.txt" in files:
+        return "bpe", cls
+
+    return "unknown", cls
\ No newline at end of file