fromtransformersimportAutoModelForCausalLM,AutoTokenizermodel=AutoModelForCausalLM.from_pretrained("jaygala24/Qwen2.5-3B-GRPO-KL-math-reasoning",revision="step-0200")# or whatever branch name, e.g. "step-0400", "step-0600"tokenizer=AutoTokenizer.from_pretrained("jaygala24/Qwen2.5-3B-GRPO-KL-math-reasoning",revision="step-0200")# or whatever branch name, e.g. "step-0400", "step-0600"prompt="Please reason step by step, and put your final answer within \\boxed{{}}.\n\nWhat is the sum of 123 and 456?"inputs=tokenizer(prompt,return_tensors="pt")outputs=model.generate(**inputs,max_new_tokens=4096,temperature=0.7)print(tokenizer.decode(outputs[0],skip_special_tokens=True))
vLLM
fromvllmimportLLM,SamplingParamsllm=LLM(model="jaygala24/Qwen2.5-3B-GRPO-KL-math-reasoning",revision="step-0200")# or whatever branch name, e.g. "step-0400", "step-0600"sampling_params=SamplingParams(temperature=0.7,max_tokens=4096)prompt="Please reason step by step, and put your final answer within \\boxed{}.\n\nWhat is the sum of 123 and 456?"outputs=llm.generate([prompt],sampling_params)print(outputs[0].outputs[0].text)