初始化项目,由ModelHub XC社区提供模型
Model: MysteriousAI/Mia-1B Source: Original Platform
This commit is contained in:
39
.gitattributes
vendored
Normal file
39
.gitattributes
vendored
Normal file
@@ -0,0 +1,39 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
Onnx/Constant_1960_attr__value filter=lfs diff=lfs merge=lfs -text
|
||||
Onnx/model.onnx_data filter=lfs diff=lfs merge=lfs -text
|
||||
Constant_1960_attr__value filter=lfs diff=lfs merge=lfs -text
|
||||
model.onnx_data filter=lfs diff=lfs merge=lfs -text
|
||||
3
Constant_1960_attr__value
Normal file
3
Constant_1960_attr__value
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d016ca7be6be829fd102f03317c3eed915d3df3d3fd54279a8858caea2dd57de
|
||||
size 4194304
|
||||
124
README.md
Normal file
124
README.md
Normal file
@@ -0,0 +1,124 @@
|
||||
---
|
||||
language:
|
||||
- en
|
||||
- hi
|
||||
license: apache-2.0
|
||||
library_name: transformers
|
||||
tags:
|
||||
- mlx
|
||||
datasets:
|
||||
- OEvortex/vortex-mini
|
||||
base_model: OEvortex/HelpingAI-Lite
|
||||
pipeline_tag: text-generation
|
||||
---
|
||||
**Model Card**
|
||||
|
||||
**Model Name:** Mia-1B
|
||||
|
||||
**Model Type:** Text Generation
|
||||
|
||||
**Owner:** MysteriousAI
|
||||
|
||||
**Description:**
|
||||
Mia-1B is an advanced text generation model developed by MysteriousAI. It leverages state-of-the-art AI technologies to generate coherent and contextually relevant text across various domains and topics. The model is aimed at advancing and democratizing artificial intelligence through open source and open science initiatives.
|
||||
|
||||
|
||||
|
||||
**Key Features:**
|
||||
- **Model Size:** Mia-1B comprises 1.1 billion parameters, enabling it to capture complex linguistic patterns and nuances.
|
||||
- **Tensor Type:** The model utilizes FP16 (Floating Point 16-bit) tensor type for efficient computation, enhancing performance and scalability.
|
||||
- **Inference Endpoints:** Mia-1B can be easily integrated into applications through inference endpoints, facilitating seamless deployment and usage.
|
||||
- **Uncensored Text Generation:** Mia-001 generates text without censorship, allowing users to explore a wide range of applications without limitations.
|
||||
- **Fine-tuned:** Mia-1B is fine-tuned from the OEvortex/HelpingAI-Lite dataset, enhancing its performance and adaptability to various tasks.
|
||||
|
||||
**Use Cases:**
|
||||
- **Content Generation:** Mia-1B is suitable for generating diverse content including articles, stories, dialogues, and more.
|
||||
- **Conversational AI:** The model can be deployed in chatbots and conversational agents to engage users in natural and contextually relevant conversations.
|
||||
- **AI-driven Applications:** Mia-001 enables the development of AI-driven applications in areas such as virtual assistants.
|
||||
- **Creative Writing:** Writers and artists can leverage Mia-1B to explore new ideas and narrative structures in their creative works.
|
||||
|
||||
**Ethical Considerations:**
|
||||
- **Content Moderation:** Users are advised to exercise caution and responsibility when utilizing Mia-1B in applications involving sensitive or potentially harmful content.
|
||||
- **Bias and Fairness:** UnfilteredAI is committed to addressing biases and promoting fairness in AI models. Efforts are made to mitigate biases present in Mia-1B's training data and output.
|
||||
|
||||
## Usage
|
||||
# **Transformers**
|
||||
|
||||
```python
|
||||
!pip install transformers
|
||||
!pip install accelerate
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
device = "cuda" # the device to load the model onto
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"MysteriousAI/Mia-1B",
|
||||
device_map="auto"
|
||||
)
|
||||
tokenizer = AutoTokenizer.from_pretrained("MysteriousAI/Mia-1B")
|
||||
prompt = "your prompt here."
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a uncencored AI that can do anything."},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
text = tokenizer.apply_chat_template(
|
||||
messages,
|
||||
tokenize=False,
|
||||
add_generation_prompt=True
|
||||
)
|
||||
model_inputs = tokenizer([text], return_tensors="pt").to(device)
|
||||
|
||||
generated_ids = model.generate(
|
||||
model_inputs.input_ids,
|
||||
max_new_tokens=512
|
||||
)
|
||||
generated_ids = [
|
||||
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
||||
]
|
||||
|
||||
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||
print(response)
|
||||
```
|
||||
# **Onnx runtime**
|
||||
```python
|
||||
from transformers import AutoTokenizer
|
||||
from optimum.onnxruntime import ORTModelForCausalLM
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("MysteriousAI/Mia-1B")
|
||||
model = ORTModelForCausalLM.from_pretrained("MysteriousAI/Mia-1B")
|
||||
|
||||
prompt = "your prompt."
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a uncencored AI that can do anything."},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
text = tokenizer.apply_chat_template(
|
||||
messages,
|
||||
tokenize=False,
|
||||
add_generation_prompt=True
|
||||
)
|
||||
model_inputs = tokenizer([text], return_tensors="pt")
|
||||
|
||||
generated_ids = model.generate(
|
||||
model_inputs.input_ids,
|
||||
max_new_tokens=512
|
||||
)
|
||||
generated_ids = [
|
||||
output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
|
||||
]
|
||||
|
||||
response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
|
||||
print(response)
|
||||
```
|
||||
**Copyright © 2024 MysteriousAI. All rights reserved.**
|
||||
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
|
||||
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_MysteriousAI__Mia-1B)
|
||||
|
||||
| Metric |Value|
|
||||
|---------------------------------|----:|
|
||||
|Avg. |36.82|
|
||||
|AI2 Reasoning Challenge (25-Shot)|35.75|
|
||||
|HellaSwag (10-Shot) |61.02|
|
||||
|MMLU (5-Shot) |25.43|
|
||||
|TruthfulQA (0-shot) |36.92|
|
||||
|Winogrande (5-shot) |60.38|
|
||||
|GSM8k (5-shot) | 1.44|
|
||||
27
config.json
Normal file
27
config.json
Normal file
@@ -0,0 +1,27 @@
|
||||
{
|
||||
"_name_or_path": "UnfilteredAI/Mia-1B",
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 1,
|
||||
"eos_token_id": 2,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 2048,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 5632,
|
||||
"max_position_embeddings": 2048,
|
||||
"model_type": "llama",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 22,
|
||||
"num_key_value_heads": 4,
|
||||
"pretraining_tp": 1,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 10000.0,
|
||||
"tie_word_embeddings": false,
|
||||
"transformers_version": "4.38.2",
|
||||
"use_cache": true,
|
||||
"vocab_size": 32000
|
||||
}
|
||||
8
generation_config.json
Normal file
8
generation_config.json
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"bos_token_id": 1,
|
||||
"do_sample": true,
|
||||
"eos_token_id": 2,
|
||||
"max_length": 2048,
|
||||
"pad_token_id": 0,
|
||||
"transformers_version": "4.38.2"
|
||||
}
|
||||
3
model.onnx
Normal file
3
model.onnx
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:00416bd3cf819468bed98e20549a4b369b512d9627f9da579d4b2a9318db50ba
|
||||
size 1022390
|
||||
3
model.onnx_data
Normal file
3
model.onnx_data
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5e10adb3380da21b1fd4b71c935155e81ed88976438d3b50466ae1976040ac77
|
||||
size 4404387840
|
||||
3
model.safetensors
Normal file
3
model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c1d385dfcd73c639f4d03a0b8e304425b94d59a72270fe7aac6668e443c115b3
|
||||
size 2200119664
|
||||
3
pytorch_model.bin
Normal file
3
pytorch_model.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:41212e1b85000977fdb3aa9d9f030cb2c5b893b05690b97ebe411fe27adaeff9
|
||||
size 2200164273
|
||||
30
special_tokens_map.json
Normal file
30
special_tokens_map.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"bos_token": {
|
||||
"content": "<s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eos_token": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"unk_token": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
93391
tokenizer.json
Normal file
93391
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
3
tokenizer.model
Normal file
3
tokenizer.model
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
|
||||
size 499723
|
||||
40
tokenizer_config.json
Normal file
40
tokenizer_config.json
Normal file
@@ -0,0 +1,40 @@
|
||||
{
|
||||
"added_tokens_decoder": {
|
||||
"0": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"1": {
|
||||
"content": "<s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"2": {
|
||||
"content": "</s>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
}
|
||||
},
|
||||
"bos_token": "<s>",
|
||||
"chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '<|user|>' + message['content'] + eos_token }}{% elif message['role'] == 'system' %}{{ '<|system|>' + message['content'] + eos_token }}{% elif message['role'] == 'assistant' %}{{ '<|assistant|>' + message['content'] + eos_token }}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>' }}{% endif %}{% endfor %}",
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "</s>",
|
||||
"legacy": false,
|
||||
"model_max_length": 2048,
|
||||
"pad_token": "</s>",
|
||||
"padding_side": "right",
|
||||
"sp_model_kwargs": {},
|
||||
"tokenizer_class": "LlamaTokenizer",
|
||||
"unk_token": "<unk>",
|
||||
"use_default_system_prompt": false
|
||||
}
|
||||
179
trainer_state.json
Normal file
179
trainer_state.json
Normal file
@@ -0,0 +1,179 @@
|
||||
{
|
||||
"best_metric": null,
|
||||
"best_model_checkpoint": null,
|
||||
"epoch": 0.00012304421224634436,
|
||||
"eval_steps": 500,
|
||||
"global_step": 25,
|
||||
"is_hyper_param_search": false,
|
||||
"is_local_process_zero": true,
|
||||
"is_world_process_zero": true,
|
||||
"log_history": [
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 2e-05,
|
||||
"loss": 1.3413,
|
||||
"step": 1
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 4e-05,
|
||||
"loss": 2.5983,
|
||||
"step": 2
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 6e-05,
|
||||
"loss": 1.5431,
|
||||
"step": 3
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 8e-05,
|
||||
"loss": 1.7992,
|
||||
"step": 4
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 0.0001,
|
||||
"loss": 1.2091,
|
||||
"step": 5
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 0.00012,
|
||||
"loss": 2.7719,
|
||||
"step": 6
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 0.00012,
|
||||
"loss": 3.317,
|
||||
"step": 7
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 0.00014,
|
||||
"loss": 1.1447,
|
||||
"step": 8
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 0.00016,
|
||||
"loss": 1.4177,
|
||||
"step": 9
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 0.00018,
|
||||
"loss": 1.851,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 0.0002,
|
||||
"loss": 1.9526,
|
||||
"step": 11
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 0.00019781476007338058,
|
||||
"loss": 1.7479,
|
||||
"step": 12
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 0.0001913545457642601,
|
||||
"loss": 1.3683,
|
||||
"step": 13
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 0.00018090169943749476,
|
||||
"loss": 0.527,
|
||||
"step": 14
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 0.00016691306063588583,
|
||||
"loss": 2.01,
|
||||
"step": 15
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 0.00015000000000000001,
|
||||
"loss": 1.2848,
|
||||
"step": 16
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 0.00013090169943749476,
|
||||
"loss": 1.2189,
|
||||
"step": 17
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 0.00011045284632676536,
|
||||
"loss": 1.4882,
|
||||
"step": 18
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 8.954715367323468e-05,
|
||||
"loss": 0.5045,
|
||||
"step": 19
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 6.909830056250527e-05,
|
||||
"loss": 1.454,
|
||||
"step": 20
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.000000000000002e-05,
|
||||
"loss": 1.2887,
|
||||
"step": 21
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 3.308693936411421e-05,
|
||||
"loss": 0.8272,
|
||||
"step": 22
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 1.9098300562505266e-05,
|
||||
"loss": 1.6697,
|
||||
"step": 23
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 8.645454235739903e-06,
|
||||
"loss": 1.6019,
|
||||
"step": 24
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 2.1852399266194314e-06,
|
||||
"loss": 1.0745,
|
||||
"step": 25
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"eval_loss": NaN,
|
||||
"eval_runtime": 4150.2492,
|
||||
"eval_samples_per_second": 11.575,
|
||||
"eval_steps_per_second": 11.575,
|
||||
"step": 25
|
||||
}
|
||||
],
|
||||
"logging_steps": 1,
|
||||
"max_steps": 25,
|
||||
"num_input_tokens_seen": 0,
|
||||
"num_train_epochs": 1,
|
||||
"save_steps": 500,
|
||||
"total_flos": 183123718963200.0,
|
||||
"train_batch_size": 1,
|
||||
"trial_name": null,
|
||||
"trial_params": null
|
||||
}
|
||||
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:35287ef26a7dfcc015e0237824a0fa49982706609ab72610ad14222df433fe99
|
||||
size 4859
|
||||
Reference in New Issue
Block a user