添加 NV A100 Patched 镜像合并/detect_tokenizer.py
This commit is contained in:
25
NV A100 Patched 镜像合并/detect_tokenizer.py
Normal file
25
NV A100 Patched 镜像合并/detect_tokenizer.py
Normal file
@@ -0,0 +1,25 @@
|
||||
import os
|
||||
import json
|
||||
|
||||
def detect(model_dir):
|
||||
cfg_path = os.path.join(model_dir, "tokenizer_config.json")
|
||||
|
||||
if os.path.exists(cfg_path):
|
||||
with open(cfg_path) as f:
|
||||
cfg = json.load(f)
|
||||
cls = cfg.get("tokenizer_class", "")
|
||||
else:
|
||||
cls = ""
|
||||
|
||||
files = os.listdir(model_dir)
|
||||
|
||||
if "tokenizer.json" in files:
|
||||
return "fast", cls
|
||||
|
||||
if "tokenizer.model" in files:
|
||||
return "sentencepiece", cls
|
||||
|
||||
if "vocab.json" in files and "merges.txt" in files:
|
||||
return "bpe", cls
|
||||
|
||||
return "unknown", cls
|
||||
Reference in New Issue
Block a user