Model: kmseong/llama2_7b_chat-SSFT-AGNEWS-FT-safety-mix-0.1-lr5e-5 Source: Original Platform
24 lines
747 B
JSON
24 lines
747 B
JSON
{
|
|
"base_model": "kmseong/llama2_7b-chat-Safety-FT-lr5e-5",
|
|
"fine_tuning_type": "Full Parameter Fine-tuning",
|
|
"dataset": "agnews",
|
|
"num_train_samples": 8080,
|
|
"num_eval_samples": 0,
|
|
"batch_size": 4,
|
|
"grad_accum": 4,
|
|
"learning_rate": 3e-05,
|
|
"weight_decay": 0.01,
|
|
"warmup_ratio": 0.1,
|
|
"epochs": 3,
|
|
"max_length": 1024,
|
|
"max_grad_norm": 1.0,
|
|
"lr_scheduler_type": "cosine",
|
|
"optimizer": "AdamW (torch)",
|
|
"gradient_checkpointing": false,
|
|
"dtype": "bf16",
|
|
"trainer_type": "Trainer",
|
|
"safety_mix_ratio": 0.01,
|
|
"safety_data_path": "/home/yonsei_jong/Safety-WaRP-LLM/data/circuit_breakers_train.json",
|
|
"agnews_train_path": "/home/yonsei_jong/Safety-WaRP-LLM/data/agnews_train_8000.jsonl",
|
|
"agnews_eval_path": null
|
|
} |