初始化项目,由ModelHub XC社区提供模型

Model: SummerSigh/Pythia410m-V0-Instruct
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-11 15:49:40 +08:00
commit 61149ee9c2
10 changed files with 100699 additions and 0 deletions

35
.gitattributes vendored Normal file
View File

@@ -0,0 +1,35 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text

26
README.md Normal file
View File

@@ -0,0 +1,26 @@
---
license: apache-2.0
---
# Model info
This is EleutherAI/pythia-410m finetuned on OpenAssistant/oasst_top1_2023-08-25
# Why
Plain and simple. Im experimenting with making instruction LLMs under 1B params. I think we can still squeeze out better performance out of these models.
# Random Notes
- Only using OpenAssistant data gives fantastic results becuase of its high quality. I like the top1 dataset becuase of it's lack of prompt refusals.
- Prompt refusals have been shown to damage the performance of instruction LLMs. My theory is that the model "spends" parameters learning how to refuse prompts rather than learning actually useful information. Adding to this, I think that unlike other tasks, learning prompt refusals most likely has no other value in terms of transfer learning.
# Usage
```
from transformers import pipeline
pipe = pipeline("text-generation", model="SummerSigh/Pythia410m-V0-Instruct")
out= pipe("<|im_start|>user\nWhat's the meaning of life?<|im_end|>\n<|im_start|>assistant\n",max_length = 500,repetition_penalty = 1.2, temperature = 0.5, do_sample = True)
print(out[0]["generated_text"])
```
# Contact
If you want to contact me and work with me on making good under 1B param models, you can reach me on discord at summer_ai.

29
config.json Normal file
View File

@@ -0,0 +1,29 @@
{
"_name_or_path": "EleutherAI/pythia-410m",
"architectures": [
"GPTNeoXForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 0,
"classifier_dropout": 0.1,
"eos_token_id": 0,
"hidden_act": "gelu",
"hidden_dropout": 0.0,
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 4096,
"layer_norm_eps": 1e-05,
"max_position_embeddings": 2048,
"model_type": "gpt_neox",
"num_attention_heads": 16,
"num_hidden_layers": 24,
"rope_scaling": null,
"rotary_emb_base": 10000,
"rotary_pct": 0.25,
"tie_word_embeddings": false,
"torch_dtype": "float32",
"transformers_version": "4.33.0",
"use_cache": true,
"use_parallel_residual": true,
"vocab_size": 50304
}

6
generation_config.json Normal file
View File

@@ -0,0 +1,6 @@
{
"_from_model_config": true,
"bos_token_id": 0,
"eos_token_id": 0,
"transformers_version": "4.33.0"
}

3
model.safetensors Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bcfb42005e6cce61bea2e04c4a3f1f9a2f58ba36052241ebe5d1f11045e878b4
size 1621373792

3
pytorch_model.bin Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:21f56606d8842c943f26f4498d3594f54f9778cbb3e752999cdd812bd28018e0
size 1621443005

6
special_tokens_map.json Normal file
View File

@@ -0,0 +1,6 @@
{
"bos_token": "<|endoftext|>",
"eos_token": "<|endoftext|>",
"pad_token": "[PAD]",
"unk_token": "<|endoftext|>"
}

100579
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

9
tokenizer_config.json Normal file
View File

@@ -0,0 +1,9 @@
{
"add_prefix_space": false,
"bos_token": "<|endoftext|>",
"clean_up_tokenization_spaces": true,
"eos_token": "<|endoftext|>",
"model_max_length": 300,
"tokenizer_class": "GPTNeoXTokenizer",
"unk_token": "<|endoftext|>"
}

3
training_args.bin Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1bb5923d2bc4b750d60d957e0a3aae661912a26b16d7cc224d171d9b0f5522d7
size 4155