添加 NV A100 Patched 镜像合并/detect_tokenizer.py

This commit is contained in:
2026-04-27 13:12:10 +08:00
parent c586d9a1f4
commit c701ca89d3

View File

@@ -0,0 +1,25 @@
import os
import json
def detect(model_dir):
cfg_path = os.path.join(model_dir, "tokenizer_config.json")
if os.path.exists(cfg_path):
with open(cfg_path) as f:
cfg = json.load(f)
cls = cfg.get("tokenizer_class", "")
else:
cls = ""
files = os.listdir(model_dir)
if "tokenizer.json" in files:
return "fast", cls
if "tokenizer.model" in files:
return "sentencepiece", cls
if "vocab.json" in files and "merges.txt" in files:
return "bpe", cls
return "unknown", cls