添加 NV A100 Patched 镜像合并/detect_tokenizer.py
This commit is contained in:
25
NV A100 Patched 镜像合并/detect_tokenizer.py
Normal file
25
NV A100 Patched 镜像合并/detect_tokenizer.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
import os
|
||||||
|
import json
|
||||||
|
|
||||||
|
def detect(model_dir):
|
||||||
|
cfg_path = os.path.join(model_dir, "tokenizer_config.json")
|
||||||
|
|
||||||
|
if os.path.exists(cfg_path):
|
||||||
|
with open(cfg_path) as f:
|
||||||
|
cfg = json.load(f)
|
||||||
|
cls = cfg.get("tokenizer_class", "")
|
||||||
|
else:
|
||||||
|
cls = ""
|
||||||
|
|
||||||
|
files = os.listdir(model_dir)
|
||||||
|
|
||||||
|
if "tokenizer.json" in files:
|
||||||
|
return "fast", cls
|
||||||
|
|
||||||
|
if "tokenizer.model" in files:
|
||||||
|
return "sentencepiece", cls
|
||||||
|
|
||||||
|
if "vocab.json" in files and "merges.txt" in files:
|
||||||
|
return "bpe", cls
|
||||||
|
|
||||||
|
return "unknown", cls
|
||||||
Reference in New Issue
Block a user