初始化项目,由ModelHub XC社区提供模型
Model: KnutJaegersberg/webMistral-7B Source: Original Platform
This commit is contained in:
23
adapter/adapter_config.json
Normal file
23
adapter/adapter_config.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"auto_mapping": null,
|
||||
"base_model_name_or_path": "/run/media/knut/HD2/Yarn-Mistral-7b-128k/",
|
||||
"bias": "none",
|
||||
"fan_in_fan_out": false,
|
||||
"inference_mode": true,
|
||||
"init_lora_weights": true,
|
||||
"layers_pattern": null,
|
||||
"layers_to_transform": null,
|
||||
"lora_alpha": 32,
|
||||
"lora_dropout": 0.05,
|
||||
"modules_to_save": null,
|
||||
"peft_type": "LORA",
|
||||
"r": 16,
|
||||
"revision": null,
|
||||
"target_modules": [
|
||||
"q_proj",
|
||||
"k_proj",
|
||||
"v_proj",
|
||||
"o_proj"
|
||||
],
|
||||
"task_type": "CAUSAL_LM"
|
||||
}
|
||||
3
adapter/adapter_model.safetensors
Normal file
3
adapter/adapter_model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:266ecfaf03faf1094a6672be7d82008834bfcc1f6a4597bc12dcb18cd2811127
|
||||
size 54560368
|
||||
34
adapter/checkpoint-38370/README.md
Normal file
34
adapter/checkpoint-38370/README.md
Normal file
@@ -0,0 +1,34 @@
|
||||
---
|
||||
library_name: peft
|
||||
---
|
||||
## Training procedure
|
||||
|
||||
|
||||
The following `bitsandbytes` quantization config was used during training:
|
||||
- quant_method: bitsandbytes
|
||||
- load_in_8bit: False
|
||||
- load_in_4bit: True
|
||||
- llm_int8_threshold: 6.0
|
||||
- llm_int8_skip_modules: None
|
||||
- llm_int8_enable_fp32_cpu_offload: False
|
||||
- llm_int8_has_fp16_weight: False
|
||||
- bnb_4bit_quant_type: nf4
|
||||
- bnb_4bit_use_double_quant: False
|
||||
- bnb_4bit_compute_dtype: float16
|
||||
|
||||
The following `bitsandbytes` quantization config was used during training:
|
||||
- quant_method: bitsandbytes
|
||||
- load_in_8bit: False
|
||||
- load_in_4bit: True
|
||||
- llm_int8_threshold: 6.0
|
||||
- llm_int8_skip_modules: None
|
||||
- llm_int8_enable_fp32_cpu_offload: False
|
||||
- llm_int8_has_fp16_weight: False
|
||||
- bnb_4bit_quant_type: nf4
|
||||
- bnb_4bit_use_double_quant: False
|
||||
- bnb_4bit_compute_dtype: float16
|
||||
### Framework versions
|
||||
|
||||
- PEFT 0.5.0
|
||||
|
||||
- PEFT 0.5.0
|
||||
23
adapter/checkpoint-38370/adapter_config.json
Normal file
23
adapter/checkpoint-38370/adapter_config.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"auto_mapping": null,
|
||||
"base_model_name_or_path": "/run/media/knut/HD2/Yarn-Mistral-7b-128k/",
|
||||
"bias": "none",
|
||||
"fan_in_fan_out": false,
|
||||
"inference_mode": true,
|
||||
"init_lora_weights": true,
|
||||
"layers_pattern": null,
|
||||
"layers_to_transform": null,
|
||||
"lora_alpha": 32,
|
||||
"lora_dropout": 0.05,
|
||||
"modules_to_save": null,
|
||||
"peft_type": "LORA",
|
||||
"r": 16,
|
||||
"revision": null,
|
||||
"target_modules": [
|
||||
"q_proj",
|
||||
"k_proj",
|
||||
"v_proj",
|
||||
"o_proj"
|
||||
],
|
||||
"task_type": "CAUSAL_LM"
|
||||
}
|
||||
3
adapter/checkpoint-38370/adapter_model.bin
Normal file
3
adapter/checkpoint-38370/adapter_model.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:37bc033e91923f97b589f7ad0921f784d59607aff049111a9dbb4767897988c3
|
||||
size 54618762
|
||||
3
adapter/checkpoint-38370/adapter_model.safetensors
Normal file
3
adapter/checkpoint-38370/adapter_model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:266ecfaf03faf1094a6672be7d82008834bfcc1f6a4597bc12dcb18cd2811127
|
||||
size 54560368
|
||||
3
adapter/checkpoint-38370/optimizer.pt
Normal file
3
adapter/checkpoint-38370/optimizer.pt
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:87962e750449e597cd9a0137313ea17bd7ea5493c73b2ea30627abea915c284c
|
||||
size 109267450
|
||||
3
adapter/checkpoint-38370/rng_state.pth
Normal file
3
adapter/checkpoint-38370/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d2e02efbe945ec1715460932f146f749fc3f3f1b1d090be07f9f7be968023437
|
||||
size 14244
|
||||
3
adapter/checkpoint-38370/scheduler.pt
Normal file
3
adapter/checkpoint-38370/scheduler.pt
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:761bebbcaf851203bd35a78f6ee5c02c12f5f63b8918137036fb751ea2b87606
|
||||
size 1064
|
||||
29
adapter/checkpoint-38370/special_tokens_map.json
Normal file
29
adapter/checkpoint-38370/special_tokens_map.json
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<unk>",
|
||||
"<s>",
|
||||
"</s>"
|
||||
],
|
||||
"bos_token": {
|
||||
"content": "<s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eos_token": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": "</s>",
|
||||
"unk_token": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
91122
adapter/checkpoint-38370/tokenizer.json
Normal file
91122
adapter/checkpoint-38370/tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
BIN
adapter/checkpoint-38370/tokenizer.model
(Stored with Git LFS)
Normal file
BIN
adapter/checkpoint-38370/tokenizer.model
(Stored with Git LFS)
Normal file
Binary file not shown.
44
adapter/checkpoint-38370/tokenizer_config.json
Normal file
44
adapter/checkpoint-38370/tokenizer_config.json
Normal file
@@ -0,0 +1,44 @@
|
||||
{
|
||||
"added_tokens_decoder": {
|
||||
"0": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"1": {
|
||||
"content": "<s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"2": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<unk>",
|
||||
"<s>",
|
||||
"</s>"
|
||||
],
|
||||
"bos_token": "<s>",
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "</s>",
|
||||
"legacy": true,
|
||||
"model_max_length": 1024,
|
||||
"pad_token": "</s>",
|
||||
"sp_model_kwargs": {},
|
||||
"spaces_between_special_tokens": false,
|
||||
"tokenizer_class": "LlamaTokenizer",
|
||||
"unk_token": "<unk>",
|
||||
"use_default_system_prompt": true
|
||||
}
|
||||
67
adapter/checkpoint-38370/trainer_state.json
Normal file
67
adapter/checkpoint-38370/trainer_state.json
Normal file
@@ -0,0 +1,67 @@
|
||||
{
|
||||
"best_metric": null,
|
||||
"best_model_checkpoint": null,
|
||||
"epoch": 2.5869664983937586,
|
||||
"eval_steps": 500,
|
||||
"global_step": 38370,
|
||||
"is_hyper_param_search": false,
|
||||
"is_local_process_zero": true,
|
||||
"is_world_process_zero": true,
|
||||
"log_history": [
|
||||
{
|
||||
"epoch": 0.2,
|
||||
"learning_rate": 1.997705369435521e-05,
|
||||
"loss": 1.6603,
|
||||
"step": 4357
|
||||
},
|
||||
{
|
||||
"epoch": 0.4,
|
||||
"learning_rate": 2.889296823211463e-05,
|
||||
"loss": 1.6193,
|
||||
"step": 8714
|
||||
},
|
||||
{
|
||||
"epoch": 1.01,
|
||||
"learning_rate": 2.6671765845698842e-05,
|
||||
"loss": 1.6126,
|
||||
"step": 13071
|
||||
},
|
||||
{
|
||||
"epoch": 1.21,
|
||||
"learning_rate": 2.4450053541379837e-05,
|
||||
"loss": 1.6028,
|
||||
"step": 17428
|
||||
},
|
||||
{
|
||||
"epoch": 1.41,
|
||||
"learning_rate": 2.222936107286727e-05,
|
||||
"loss": 1.5803,
|
||||
"step": 21785
|
||||
},
|
||||
{
|
||||
"epoch": 2.03,
|
||||
"learning_rate": 2.000815868645148e-05,
|
||||
"loss": 1.5767,
|
||||
"step": 26142
|
||||
},
|
||||
{
|
||||
"epoch": 2.23,
|
||||
"learning_rate": 1.778746621793891e-05,
|
||||
"loss": 1.5682,
|
||||
"step": 30499
|
||||
},
|
||||
{
|
||||
"epoch": 2.43,
|
||||
"learning_rate": 1.556728366732956e-05,
|
||||
"loss": 1.5451,
|
||||
"step": 34856
|
||||
}
|
||||
],
|
||||
"logging_steps": 4357,
|
||||
"max_steps": 65370,
|
||||
"num_train_epochs": 3,
|
||||
"save_steps": 500,
|
||||
"total_flos": 3.359036228543447e+18,
|
||||
"trial_name": null,
|
||||
"trial_params": null
|
||||
}
|
||||
3
adapter/checkpoint-38370/training_args.bin
Normal file
3
adapter/checkpoint-38370/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7b1d0570ad569f6ae4477ba29527d49d16b3a13035f2623f3fda9d851806436f
|
||||
size 4600
|
||||
3
adapter/training_args.bin
Normal file
3
adapter/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7b1d0570ad569f6ae4477ba29527d49d16b3a13035f2623f3fda9d851806436f
|
||||
size 4600
|
||||
47
adapter/training_params.json
Normal file
47
adapter/training_params.json
Normal file
@@ -0,0 +1,47 @@
|
||||
{
|
||||
"model": "/run/media/knut/HD2/Yarn-Mistral-7b-128k/",
|
||||
"data_path": ".",
|
||||
"project_name": "webMistral",
|
||||
"train_split": "train",
|
||||
"valid_split": null,
|
||||
"text_column": "text",
|
||||
"rejected_text_column": "rejected",
|
||||
"token": null,
|
||||
"lr": 3e-05,
|
||||
"epochs": 3,
|
||||
"batch_size": 2,
|
||||
"warmup_ratio": 0.1,
|
||||
"gradient_accumulation": 1,
|
||||
"optimizer": "adamw_torch",
|
||||
"scheduler": "linear",
|
||||
"weight_decay": 0.0,
|
||||
"max_grad_norm": 1.0,
|
||||
"seed": 42,
|
||||
"add_eos_token": false,
|
||||
"block_size": -1,
|
||||
"use_peft": true,
|
||||
"lora_r": 16,
|
||||
"lora_alpha": 32,
|
||||
"lora_dropout": 0.05,
|
||||
"logging_steps": -1,
|
||||
"evaluation_strategy": "epoch",
|
||||
"save_total_limit": 1,
|
||||
"save_strategy": "epoch",
|
||||
"auto_find_batch_size": false,
|
||||
"fp16": false,
|
||||
"push_to_hub": false,
|
||||
"use_int8": false,
|
||||
"model_max_length": 1024,
|
||||
"repo_id": null,
|
||||
"use_int4": true,
|
||||
"trainer": "sft",
|
||||
"target_modules": "q_proj,k_proj,v_proj,o_proj",
|
||||
"merge_adapter": true,
|
||||
"username": null,
|
||||
"use_flash_attention_2": true,
|
||||
"log": "none",
|
||||
"disable_gradient_checkpointing": false,
|
||||
"model_ref": null,
|
||||
"dpo_beta": 0.1,
|
||||
"prompt_text_column": "prompt"
|
||||
}
|
||||
Reference in New Issue
Block a user