K100-vllm/NV A100 Patched 镜像合并/detect_tokenizer.py

import os
import json

def detect(model_dir):
    cfg_path = os.path.join(model_dir, "tokenizer_config.json")

    if os.path.exists(cfg_path):
        with open(cfg_path) as f:
            cfg = json.load(f)
        cls = cfg.get("tokenizer_class", "")
    else:
        cls = ""

    files = os.listdir(model_dir)

    if "tokenizer.json" in files:
        return "fast", cls

    if "tokenizer.model" in files:
        return "sentencepiece", cls

    if "vocab.json" in files and "merges.txt" in files:
        return "bpe", cls

    return "unknown", cls