# LoRA configurationpeft_config=LoraConfig(r=16,lora_alpha=16,lora_dropout=0.05,bias="none",task_type="CAUSAL_LM",target_modules=['k_proj','gate_proj','v_proj','up_proj','q_proj','o_proj','down_proj'])# Model to fine-tunemodel=AutoModelForCausalLM.from_pretrained(model_name,torch_dtype=torch.float16,load_in_4bit=True)model.config.use_cache=False# Training argumentstraining_args=TrainingArguments(per_device_train_batch_size=4,gradient_accumulation_steps=4,gradient_checkpointing=True,learning_rate=5e-5,lr_scheduler_type="cosine",max_steps=120,save_strategy="no",logging_steps=1,output_dir=new_model,optim="paged_adamw_32bit",warmup_steps=50,bf16=True,report_to="wandb",)# Create DPO trainerdpo_trainer=DPOTrainer(model,args=training_args,train_dataset=dataset,tokenizer=tokenizer,peft_config=peft_config,beta=0.1,max_prompt_length=1024,max_length=1536,)# Fine-tune model with DPOdpo_trainer.train()