初始化项目，由ModelHub XC社区提供模型

Model: ngxson/Vistral-7B-ChatML Source: Original Platform
2026-04-12 11:22:56 +08:00
commit 35213c84a8
25 changed files with 111222 additions and 0 deletions
--- a/run.py
+++ b/run.py
@@ -0,0 +1,64 @@
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline, logging, TextStreamer
+from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training, get_peft_model
+import os, torch, wandb, platform, warnings
+from datasets import load_dataset
+from trl import SFTTrainer
+
+hf_token = '..........'
+
+tokenizer = AutoTokenizer.from_pretrained('./vistral-tokenizer')
+bnb_config = BitsAndBytesConfig(
+  load_in_4bit=True,
+  bnb_4bit_quant_type="nf4",
+  bnb_4bit_compute_dtype=torch.bfloat16,
+  bnb_4bit_use_double_quant=True,
+)
+model = AutoModelForCausalLM.from_pretrained(
+  'Viet-Mistral/Vistral-7B-Chat',
+  device_map="auto",
+  token=hf_token,
+  quantization_config=bnb_config,
+)
+ft_model = PeftModel.from_pretrained(model, CHECKPOINT_PATH)
+
+#torch.backends.cuda.enable_mem_efficient_sdp(False)
+#torch.backends.cuda.enable_flash_sdp(False)
+
+system_prompt = "Bạn là một trợ lí Tiếng Việt nhiệt tình và trung thực. Hãy luôn trả lời một cách hữu ích nhất có thể, đồng thời giữ an toàn."
+
+stop_tokens = [tokenizer.eos_token_id, tokenizer('<|im_end|>')['input_ids'].pop()]
+
+def chat_test():
+  conversation = [{"role": "system", "content": system_prompt }]
+  while True:
+    human = input("Human: ")
+    if human.lower() == "reset":
+      conversation = [{"role": "system", "content": system_prompt }]
+      print("The chat history has been cleared!")
+      continue
+
+    if human.lower() == "exit":
+      break
+
+    conversation.append({"role": "user", "content": human })
+    formatted = tokenizer.apply_chat_template(conversation, tokenize=False) + "<|im_start|>assistant"
+    tok = tokenizer(formatted, return_tensors="pt").to(ft_model.device)
+    input_ids = tok['input_ids']
+
+    out_ids = ft_model.generate(
+      input_ids=input_ids,
+      attention_mask=tok['attention_mask'],
+      eos_token_id=stop_tokens,
+      max_new_tokens=50,
+      do_sample=True,
+      top_p=0.95,
+      top_k=40,
+      temperature=0.1,
+      repetition_penalty=1.05,
+    )
+    assistant = tokenizer.batch_decode(out_ids[:, input_ids.size(1): ], skip_special_tokens=True)[0].strip()
+    print("Assistant: ", assistant)
+    conversation.append({"role": "assistant", "content": assistant })
+
+chat_test()