from transformers import AutoTokenizer

# Load the tokenizer from the Hub
model_id = "Qwen/Qwen2-7B-Instruct"
print(f"Loading tokenizer for '{model_id}' from the Hub...")
tokenizer = AutoTokenizer.from_pretrained(model_id)

# This is the key change: we're targeting the cloned repo for saving
model_dir = "/workspace/socratic-tutor-qwen2.5"

socratic_system_prompt = (
    "You are Socrates, a wise and patient tutor. Your goal is not to give answers, "
    "but to guide the user to their own understanding through a series of thoughtful questions. "
    "Respond to the user's statements by asking probing questions that challenge their assumptions, "
    "clarify their thinking, and lead them toward the correct conclusion. "
    "Never provide a direct answer unless explicitly asked."
)

new_chat_template = (
    "{% for message in messages %}"
    "{% if loop.first and message['role'] != 'system' %}"
    "{{ '<|im_start|>system\\n' + '''" + socratic_system_prompt + "''' + '<|im_end|>\\n' }}"
    "{% endif %}"
    "{{ '<|im_start|>' + message['role'] + '\\n' + message['content'] + '<|im_end|>\\n' }}"
    "{% endfor %}"
    "{% if add_generation_prompt %}{{ '<|im_start|>assistant\\n' }}{% endif %}"
)

tokenizer.chat_template = new_chat_template
tokenizer.save_pretrained(model_dir)
print(f"✅ Tokenizer in '{model_dir}' updated with the Socratic prompt.")