import os import json def detect(model_dir): cfg_path = os.path.join(model_dir, "tokenizer_config.json") if os.path.exists(cfg_path): with open(cfg_path) as f: cfg = json.load(f) cls = cfg.get("tokenizer_class", "") else: cls = "" files = os.listdir(model_dir) if "tokenizer.json" in files: return "fast", cls if "tokenizer.model" in files: return "sentencepiece", cls if "vocab.json" in files and "merges.txt" in files: return "bpe", cls return "unknown", cls