Files
K100-vllm/NV A100 Patched 镜像合并/detect_tokenizer.py

25 lines
560 B
Python

import os
import json
def detect(model_dir):
cfg_path = os.path.join(model_dir, "tokenizer_config.json")
if os.path.exists(cfg_path):
with open(cfg_path) as f:
cfg = json.load(f)
cls = cfg.get("tokenizer_class", "")
else:
cls = ""
files = os.listdir(model_dir)
if "tokenizer.json" in files:
return "fast", cls
if "tokenizer.model" in files:
return "sentencepiece", cls
if "vocab.json" in files and "merges.txt" in files:
return "bpe", cls
return "unknown", cls