初始化项目,由ModelHub XC社区提供模型
Model: CorticalStack/gemma-7b-ultrachat-sft Source: Original Platform
This commit is contained in:
28
.ipynb_checkpoints/README-checkpoint.md
Normal file
28
.ipynb_checkpoints/README-checkpoint.md
Normal file
@@ -0,0 +1,28 @@
|
||||
---
|
||||
license: apache-2.0
|
||||
---
|
||||
|
||||
# gemma-7b-ultrachat-sft
|
||||
|
||||
gemma-7b-ultrachat-sft is an SFT fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) using the [stingning/ultrachat](https://huggingface.co/datasets/stingning/ultrachat) dataset.
|
||||
|
||||
## Fine-tuning configuration
|
||||
### LoRA
|
||||
- r: 8
|
||||
- LoRA alpha: 16
|
||||
- LoRA dropout: 0.1
|
||||
|
||||
### Training arguments
|
||||
- Epochs: 1
|
||||
- Batch size: 4
|
||||
- Gradient accumulation steps: 6
|
||||
- Optimizer: paged_adamw_32bit
|
||||
- Max steps: 100
|
||||
- Learning rate: 0.0002
|
||||
- Weight decay: 0.001
|
||||
- Learning rate scheduler type: constant
|
||||
- Max seq length: 2048
|
||||
|
||||
Trained with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
|
||||
28
.ipynb_checkpoints/config-checkpoint.json
Normal file
28
.ipynb_checkpoints/config-checkpoint.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"_name_or_path": "google/gemma-7b",
|
||||
"architectures": [
|
||||
"GemmaForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 2,
|
||||
"eos_token_id": 1,
|
||||
"head_dim": 256,
|
||||
"hidden_act": "gelu",
|
||||
"hidden_size": 3072,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 24576,
|
||||
"max_position_embeddings": 8192,
|
||||
"model_type": "gemma",
|
||||
"num_attention_heads": 16,
|
||||
"num_hidden_layers": 28,
|
||||
"num_key_value_heads": 16,
|
||||
"pad_token_id": 0,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 10000.0,
|
||||
"torch_dtype": "float16",
|
||||
"transformers_version": "4.38.0",
|
||||
"use_cache": true,
|
||||
"vocab_size": 256000
|
||||
}
|
||||
Reference in New Issue
Block a user