58 lines
1.3 KiB
Markdown
58 lines
1.3 KiB
Markdown
---
|
|
license: apache-2.0
|
|
---
|
|
|
|
wandb: https://wandb.ai/open-assistant/supervised-finetuning/runs/t2adm3wu
|
|
|
|
checkpoint: 11000 step (2 epochs)
|
|
|
|
datasets:
|
|
```
|
|
pretrain:
|
|
weight_decay: 0.01
|
|
use_custom_sampler: true
|
|
sort_by_length: false
|
|
datasets:
|
|
- joke
|
|
- webgpt:
|
|
val_split: 0.1
|
|
- gpt4all:
|
|
val_split: 0.01
|
|
- alpaca:
|
|
val_split: 0.025
|
|
- code_alpaca:
|
|
val_split: 0.05
|
|
- minimath
|
|
- humaneval_mbpp_codegen_qa
|
|
- humaneval_mbpp_testgen_qa
|
|
- grade_school_math_instructions
|
|
- recipes
|
|
- cmu_wiki_qa
|
|
- oa_wiki_qa_bart_10000row
|
|
- prosocial_dialogue:
|
|
fraction: 0.1
|
|
- explain_prosocial:
|
|
fraction: 0.05
|
|
```
|
|
|
|
|
|
pythia:
|
|
```
|
|
pythia-1.4b-pretrain:
|
|
dtype: fp16
|
|
learning_rate: 6e-6
|
|
model_name: EleutherAI/pythia-1.4b-deduped
|
|
deepspeed_config: configs/zero_config_pretrain.json
|
|
weight_decay: 0.0
|
|
max_length: 2048
|
|
use_flash_attention: true
|
|
warmup_steps: 50
|
|
gradient_checkpointing: false
|
|
gradient_accumulation_steps: 1
|
|
per_device_train_batch_size: 16
|
|
per_device_eval_batch_size: 16
|
|
num_train_epochs: 2
|
|
save_total_limit: 2
|
|
```
|
|
|
|
command: `deepspeed trainer_sft.py --configs defaults pretrain pythia-1.4b-pretrain --cache_dir .cache/ --output_dir .saved_models/pythia-1.4b-pre --residual_dropout 0.0 --deepspeed` |