初始化项目,由ModelHub XC社区提供模型

Model: KnutJaegersberg/webMistral-7B
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-13 21:52:10 +08:00
commit 79e1c26b3c
31 changed files with 184688 additions and 0 deletions

View File

@@ -0,0 +1,23 @@
{
"auto_mapping": null,
"base_model_name_or_path": "/run/media/knut/HD2/Yarn-Mistral-7b-128k/",
"bias": "none",
"fan_in_fan_out": false,
"inference_mode": true,
"init_lora_weights": true,
"layers_pattern": null,
"layers_to_transform": null,
"lora_alpha": 32,
"lora_dropout": 0.05,
"modules_to_save": null,
"peft_type": "LORA",
"r": 16,
"revision": null,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj"
],
"task_type": "CAUSAL_LM"
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:266ecfaf03faf1094a6672be7d82008834bfcc1f6a4597bc12dcb18cd2811127
size 54560368

View File

@@ -0,0 +1,34 @@
---
library_name: peft
---
## Training procedure
The following `bitsandbytes` quantization config was used during training:
- quant_method: bitsandbytes
- load_in_8bit: False
- load_in_4bit: True
- llm_int8_threshold: 6.0
- llm_int8_skip_modules: None
- llm_int8_enable_fp32_cpu_offload: False
- llm_int8_has_fp16_weight: False
- bnb_4bit_quant_type: nf4
- bnb_4bit_use_double_quant: False
- bnb_4bit_compute_dtype: float16
The following `bitsandbytes` quantization config was used during training:
- quant_method: bitsandbytes
- load_in_8bit: False
- load_in_4bit: True
- llm_int8_threshold: 6.0
- llm_int8_skip_modules: None
- llm_int8_enable_fp32_cpu_offload: False
- llm_int8_has_fp16_weight: False
- bnb_4bit_quant_type: nf4
- bnb_4bit_use_double_quant: False
- bnb_4bit_compute_dtype: float16
### Framework versions
- PEFT 0.5.0
- PEFT 0.5.0

View File

@@ -0,0 +1,23 @@
{
"auto_mapping": null,
"base_model_name_or_path": "/run/media/knut/HD2/Yarn-Mistral-7b-128k/",
"bias": "none",
"fan_in_fan_out": false,
"inference_mode": true,
"init_lora_weights": true,
"layers_pattern": null,
"layers_to_transform": null,
"lora_alpha": 32,
"lora_dropout": 0.05,
"modules_to_save": null,
"peft_type": "LORA",
"r": 16,
"revision": null,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj"
],
"task_type": "CAUSAL_LM"
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:37bc033e91923f97b589f7ad0921f784d59607aff049111a9dbb4767897988c3
size 54618762

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:266ecfaf03faf1094a6672be7d82008834bfcc1f6a4597bc12dcb18cd2811127
size 54560368

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:87962e750449e597cd9a0137313ea17bd7ea5493c73b2ea30627abea915c284c
size 109267450

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d2e02efbe945ec1715460932f146f749fc3f3f1b1d090be07f9f7be968023437
size 14244

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:761bebbcaf851203bd35a78f6ee5c02c12f5f63b8918137036fb751ea2b87606
size 1064

View File

@@ -0,0 +1,29 @@
{
"additional_special_tokens": [
"<unk>",
"<s>",
"</s>"
],
"bos_token": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": "</s>",
"unk_token": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

File diff suppressed because it is too large Load Diff

BIN
adapter/checkpoint-38370/tokenizer.model (Stored with Git LFS) Normal file

Binary file not shown.

View File

@@ -0,0 +1,44 @@
{
"added_tokens_decoder": {
"0": {
"content": "<unk>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "</s>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<unk>",
"<s>",
"</s>"
],
"bos_token": "<s>",
"clean_up_tokenization_spaces": false,
"eos_token": "</s>",
"legacy": true,
"model_max_length": 1024,
"pad_token": "</s>",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
"unk_token": "<unk>",
"use_default_system_prompt": true
}

View File

@@ -0,0 +1,67 @@
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.5869664983937586,
"eval_steps": 500,
"global_step": 38370,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2,
"learning_rate": 1.997705369435521e-05,
"loss": 1.6603,
"step": 4357
},
{
"epoch": 0.4,
"learning_rate": 2.889296823211463e-05,
"loss": 1.6193,
"step": 8714
},
{
"epoch": 1.01,
"learning_rate": 2.6671765845698842e-05,
"loss": 1.6126,
"step": 13071
},
{
"epoch": 1.21,
"learning_rate": 2.4450053541379837e-05,
"loss": 1.6028,
"step": 17428
},
{
"epoch": 1.41,
"learning_rate": 2.222936107286727e-05,
"loss": 1.5803,
"step": 21785
},
{
"epoch": 2.03,
"learning_rate": 2.000815868645148e-05,
"loss": 1.5767,
"step": 26142
},
{
"epoch": 2.23,
"learning_rate": 1.778746621793891e-05,
"loss": 1.5682,
"step": 30499
},
{
"epoch": 2.43,
"learning_rate": 1.556728366732956e-05,
"loss": 1.5451,
"step": 34856
}
],
"logging_steps": 4357,
"max_steps": 65370,
"num_train_epochs": 3,
"save_steps": 500,
"total_flos": 3.359036228543447e+18,
"trial_name": null,
"trial_params": null
}

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7b1d0570ad569f6ae4477ba29527d49d16b3a13035f2623f3fda9d851806436f
size 4600

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7b1d0570ad569f6ae4477ba29527d49d16b3a13035f2623f3fda9d851806436f
size 4600

View File

@@ -0,0 +1,47 @@
{
"model": "/run/media/knut/HD2/Yarn-Mistral-7b-128k/",
"data_path": ".",
"project_name": "webMistral",
"train_split": "train",
"valid_split": null,
"text_column": "text",
"rejected_text_column": "rejected",
"token": null,
"lr": 3e-05,
"epochs": 3,
"batch_size": 2,
"warmup_ratio": 0.1,
"gradient_accumulation": 1,
"optimizer": "adamw_torch",
"scheduler": "linear",
"weight_decay": 0.0,
"max_grad_norm": 1.0,
"seed": 42,
"add_eos_token": false,
"block_size": -1,
"use_peft": true,
"lora_r": 16,
"lora_alpha": 32,
"lora_dropout": 0.05,
"logging_steps": -1,
"evaluation_strategy": "epoch",
"save_total_limit": 1,
"save_strategy": "epoch",
"auto_find_batch_size": false,
"fp16": false,
"push_to_hub": false,
"use_int8": false,
"model_max_length": 1024,
"repo_id": null,
"use_int4": true,
"trainer": "sft",
"target_modules": "q_proj,k_proj,v_proj,o_proj",
"merge_adapter": true,
"username": null,
"use_flash_attention_2": true,
"log": "none",
"disable_gradient_checkpointing": false,
"model_ref": null,
"dpo_beta": 0.1,
"prompt_text_column": "prompt"
}