From 05774d31bb13cde306f3282e9592b7da90e026a2 Mon Sep 17 00:00:00 2001 From: Taeyoung Lee Date: Sun, 4 Aug 2024 03:17:31 +0000 Subject: [PATCH] Update README.md --- README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/README.md b/README.md index a9f2972..6c2f337 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,21 @@ print(outputs[0]["generated_text"]) ## 💻 Usage for VLLM +Use with transformers +Starting with ```vllm``` onward, you can run conversational inference using the vLLM pipeline abstraction with the gen() function. +Make sure to update your vllm installation via ```pip install --upgrade vllm.``` + ```python +from vllm import LLM, SamplingParams +from transformers import AutoTokenizer, pipeline + +BASE_MODEL = "sh2orc/Llama-3.1-Korean-8B-Instruct" + +llm = LLM(model=BASE_MODEL) + +tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) +tokenizer.pad_token = tokenizer.eos_token +tokenizer.padding_side = 'right' def gen(instruction): messages = [