[Fix] Fix clean_up_tokenization_spaces in tokenizer (#1510)

2024-09-24 21:37:33 -07:00
parent 067d8e16fc
commit fb2d0680e0
4 changed files with 12 additions and 9 deletions
--- a/scripts/playground/reference_hf.py
+++ b/scripts/playground/reference_hf.py
@@ -26,12 +26,14 @@ I'm going to the
 import argparse

 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer
+from transformers import AutoModelForCausalLM
+
+from sglang.srt.hf_transformers_utils import get_tokenizer


@torch.inference_mode()
 def normal_text(args):
-    t = AutoTokenizer.from_pretrained(args.model_path, trust_remote_code=True)
+    t = get_tokenizer(args.model_path, trust_remote_code=True)
    m = AutoModelForCausalLM.from_pretrained(
        args.model_path,
        torch_dtype=torch.float16,