Increase the number of thread limitation for tp worker managers. (#567)

This commit is contained in:
Lianmin Zheng
2024-06-26 09:33:45 -07:00
committed by GitHub
parent a385ee27bd
commit 2e6e62e156
9 changed files with 148 additions and 84 deletions

View File

@@ -1,8 +1,13 @@
import transformers
import argparse
import code
#name = "meta-llama/Llama-2-7b-chat-hf"
name = "meta-llama/Meta-Llama-3-8B-Instruct"
from sglang.srt.hf_transformers_utils import get_tokenizer
t = transformers.AutoTokenizer.from_pretrained(name)
code.interact(local=locals())
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--name", type=str, default="meta-llama/Meta-Llama-3-8B-Instruct")
args = parser.parse_args()
t = get_tokenizer(args.name)
code.interact(local=locals())