[Fix] Fix clean_up_tokenization_spaces in tokenizer (#1510)

This commit is contained in:
Lianmin Zheng
2024-09-24 21:37:33 -07:00
committed by GitHub
parent 067d8e16fc
commit fb2d0680e0
4 changed files with 12 additions and 9 deletions

View File

@@ -26,12 +26,14 @@ I'm going to the
import argparse
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import AutoModelForCausalLM
from sglang.srt.hf_transformers_utils import get_tokenizer
@torch.inference_mode()
def normal_text(args):
t = AutoTokenizer.from_pretrained(args.model_path, trust_remote_code=True)
t = get_tokenizer(args.model_path, trust_remote_code=True)
m = AutoModelForCausalLM.from_pretrained(
args.model_path,
torch_dtype=torch.float16,