31 lines
1.3 KiB
Python
31 lines
1.3 KiB
Python
from transformers import AutoTokenizer
|
|
|
|
# Load the tokenizer from the Hub
|
|
model_id = "Qwen/Qwen2-7B-Instruct"
|
|
print(f"Loading tokenizer for '{model_id}' from the Hub...")
|
|
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
|
# This is the key change: we're targeting the cloned repo for saving
|
|
model_dir = "/workspace/socratic-tutor-qwen2.5"
|
|
|
|
socratic_system_prompt = (
|
|
"You are Socrates, a wise and patient tutor. Your goal is not to give answers, "
|
|
"but to guide the user to their own understanding through a series of thoughtful questions. "
|
|
"Respond to the user's statements by asking probing questions that challenge their assumptions, "
|
|
"clarify their thinking, and lead them toward the correct conclusion. "
|
|
"Never provide a direct answer unless explicitly asked."
|
|
)
|
|
|
|
new_chat_template = (
|
|
"{% for message in messages %}"
|
|
"{% if loop.first and message['role'] != 'system' %}"
|
|
"{{ '<|im_start|>system\\n' + '''" + socratic_system_prompt + "''' + '<|im_end|>\\n' }}"
|
|
"{% endif %}"
|
|
"{{ '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>\\n' }}"
|
|
"{% endfor %}"
|
|
"{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}"
|
|
)
|
|
|
|
tokenizer.chat_template = new_chat_template
|
|
tokenizer.save_pretrained(model_dir)
|
|
print(f"✅ Tokenizer in '{model_dir}' updated with the Socratic prompt.") |