初始化项目,由ModelHub XC社区提供模型
Model: Felladrin/Llama-68M-Chat-v1 Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
209
README.md
Normal file
209
README.md
Normal file
@@ -0,0 +1,209 @@
|
|||||||
|
---
|
||||||
|
language:
|
||||||
|
- en
|
||||||
|
license: apache-2.0
|
||||||
|
tags:
|
||||||
|
- text-generation
|
||||||
|
datasets:
|
||||||
|
- THUDM/webglm-qa
|
||||||
|
- databricks/databricks-dolly-15k
|
||||||
|
- cognitivecomputations/wizard_vicuna_70k_unfiltered
|
||||||
|
- totally-not-an-llm/EverythingLM-data-V3
|
||||||
|
- Amod/mental_health_counseling_conversations
|
||||||
|
- sablo/oasst2_curated
|
||||||
|
- starfishmedical/webGPT_x_dolly
|
||||||
|
- Open-Orca/OpenOrca
|
||||||
|
- mlabonne/chatml_dpo_pairs
|
||||||
|
base_model: JackFram/llama-68m
|
||||||
|
widget:
|
||||||
|
- messages:
|
||||||
|
- role: system
|
||||||
|
content: You are a career counselor. The user will provide you with an individual
|
||||||
|
looking for guidance in their professional life, and your task is to assist
|
||||||
|
them in determining what careers they are most suited for based on their skills,
|
||||||
|
interests, and experience. You should also conduct research into the various
|
||||||
|
options available, explain the job market trends in different industries, and
|
||||||
|
advice on which qualifications would be beneficial for pursuing particular fields.
|
||||||
|
- role: user
|
||||||
|
content: Heya!
|
||||||
|
- role: assistant
|
||||||
|
content: Hi! How may I help you?
|
||||||
|
- role: user
|
||||||
|
content: I am interested in developing a career in software engineering. What
|
||||||
|
would you recommend me to do?
|
||||||
|
- messages:
|
||||||
|
- role: system
|
||||||
|
content: You are a knowledgeable assistant. Help the user as much as you can.
|
||||||
|
- role: user
|
||||||
|
content: How to become healthier?
|
||||||
|
- messages:
|
||||||
|
- role: system
|
||||||
|
content: You are a helpful assistant who provides concise responses.
|
||||||
|
- role: user
|
||||||
|
content: Hi!
|
||||||
|
- role: assistant
|
||||||
|
content: Hello there! How may I help you?
|
||||||
|
- role: user
|
||||||
|
content: I need to build a simple website. Where should I start learning about web development?
|
||||||
|
- messages:
|
||||||
|
- role: system
|
||||||
|
content: You are a very creative assistant. User will give you a task, which you should complete with all your knowledge.
|
||||||
|
- role: user
|
||||||
|
content: Write the background story of an RPG game about wizards and dragons in a sci-fi world.
|
||||||
|
inference:
|
||||||
|
parameters:
|
||||||
|
max_new_tokens: 64
|
||||||
|
penalty_alpha: 0.5
|
||||||
|
top_k: 4
|
||||||
|
model-index:
|
||||||
|
- name: Llama-68M-Chat-v1
|
||||||
|
results:
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: AI2 Reasoning Challenge (25-Shot)
|
||||||
|
type: ai2_arc
|
||||||
|
config: ARC-Challenge
|
||||||
|
split: test
|
||||||
|
args:
|
||||||
|
num_few_shot: 25
|
||||||
|
metrics:
|
||||||
|
- type: acc_norm
|
||||||
|
value: 23.29
|
||||||
|
name: normalized accuracy
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/Llama-68M-Chat-v1
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: HellaSwag (10-Shot)
|
||||||
|
type: hellaswag
|
||||||
|
split: validation
|
||||||
|
args:
|
||||||
|
num_few_shot: 10
|
||||||
|
metrics:
|
||||||
|
- type: acc_norm
|
||||||
|
value: 28.27
|
||||||
|
name: normalized accuracy
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/Llama-68M-Chat-v1
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: MMLU (5-Shot)
|
||||||
|
type: cais/mmlu
|
||||||
|
config: all
|
||||||
|
split: test
|
||||||
|
args:
|
||||||
|
num_few_shot: 5
|
||||||
|
metrics:
|
||||||
|
- type: acc
|
||||||
|
value: 25.18
|
||||||
|
name: accuracy
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/Llama-68M-Chat-v1
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: TruthfulQA (0-shot)
|
||||||
|
type: truthful_qa
|
||||||
|
config: multiple_choice
|
||||||
|
split: validation
|
||||||
|
args:
|
||||||
|
num_few_shot: 0
|
||||||
|
metrics:
|
||||||
|
- type: mc2
|
||||||
|
value: 47.27
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/Llama-68M-Chat-v1
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: Winogrande (5-shot)
|
||||||
|
type: winogrande
|
||||||
|
config: winogrande_xl
|
||||||
|
split: validation
|
||||||
|
args:
|
||||||
|
num_few_shot: 5
|
||||||
|
metrics:
|
||||||
|
- type: acc
|
||||||
|
value: 54.3
|
||||||
|
name: accuracy
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/Llama-68M-Chat-v1
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
- task:
|
||||||
|
type: text-generation
|
||||||
|
name: Text Generation
|
||||||
|
dataset:
|
||||||
|
name: GSM8k (5-shot)
|
||||||
|
type: gsm8k
|
||||||
|
config: main
|
||||||
|
split: test
|
||||||
|
args:
|
||||||
|
num_few_shot: 5
|
||||||
|
metrics:
|
||||||
|
- type: acc
|
||||||
|
value: 0.0
|
||||||
|
name: accuracy
|
||||||
|
source:
|
||||||
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=Felladrin/Llama-68M-Chat-v1
|
||||||
|
name: Open LLM Leaderboard
|
||||||
|
---
|
||||||
|
|
||||||
|
# A Llama Chat Model of 68M Parameters
|
||||||
|
|
||||||
|
- Base model: [JackFram/llama-68m](https://huggingface.co/JackFram/llama-68m)
|
||||||
|
- Datasets:
|
||||||
|
- [THUDM/webglm-qa](https://huggingface.co/datasets/THUDM/webglm-qa)
|
||||||
|
- [databricks/databricks-dolly-15k](https://huggingface.co/datasets/databricks/databricks-dolly-15k)
|
||||||
|
- [cognitivecomputations/wizard_vicuna_70k_unfiltered](https://huggingface.co/datasets/cognitivecomputations/wizard_vicuna_70k_unfiltered)
|
||||||
|
- [totally-not-an-llm/EverythingLM-data-V3](https://huggingface.co/datasets/totally-not-an-llm/EverythingLM-data-V3)
|
||||||
|
- [Amod/mental_health_counseling_conversations](https://huggingface.co/datasets/Amod/mental_health_counseling_conversations)
|
||||||
|
- [sablo/oasst2_curated](https://huggingface.co/datasets/sablo/oasst2_curated)
|
||||||
|
- [starfishmedical/webGPT_x_dolly](https://huggingface.co/datasets/starfishmedical/webGPT_x_dolly)
|
||||||
|
- [Open-Orca/OpenOrca](https://huggingface.co/datasets/Open-Orca/OpenOrca)
|
||||||
|
- [mlabonne/chatml_dpo_pairs](https://huggingface.co/datasets/mlabonne/chatml_dpo_pairs)
|
||||||
|
- Availability in other ML formats:
|
||||||
|
- GGUF: [afrideva/Llama-68M-Chat-v1-GGUF](https://huggingface.co/afrideva/Llama-68M-Chat-v1-GGUF)
|
||||||
|
- ONNX: [Felladrin/onnx-Llama-68M-Chat-v1](https://huggingface.co/Felladrin/onnx-Llama-68M-Chat-v1)
|
||||||
|
|
||||||
|
## Recommended Prompt Format
|
||||||
|
|
||||||
|
```
|
||||||
|
<|im_start|>system
|
||||||
|
{system_message}<|im_end|>
|
||||||
|
<|im_start|>user
|
||||||
|
{user_message}<|im_end|>
|
||||||
|
<|im_start|>assistant
|
||||||
|
```
|
||||||
|
|
||||||
|
## Recommended Inference Parameters
|
||||||
|
|
||||||
|
```yml
|
||||||
|
penalty_alpha: 0.5
|
||||||
|
top_k: 4
|
||||||
|
```
|
||||||
|
|
||||||
|
## [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
|
||||||
|
|
||||||
|
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_Felladrin__Llama-68M-Chat-v1)
|
||||||
|
|
||||||
|
| Metric |Value|
|
||||||
|
|---------------------------------|----:|
|
||||||
|
|Avg. |29.72|
|
||||||
|
|AI2 Reasoning Challenge (25-Shot)|23.29|
|
||||||
|
|HellaSwag (10-Shot) |28.27|
|
||||||
|
|MMLU (5-Shot) |25.18|
|
||||||
|
|TruthfulQA (0-shot) |47.27|
|
||||||
|
|Winogrande (5-shot) |54.30|
|
||||||
|
|GSM8k (5-shot) | 0.00|
|
||||||
24
config.json
Normal file
24
config.json
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
"architectures": ["LlamaForCausalLM"],
|
||||||
|
"attention_bias": false,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 768,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 3072,
|
||||||
|
"max_position_embeddings": 2048,
|
||||||
|
"model_type": "llama",
|
||||||
|
"num_attention_heads": 12,
|
||||||
|
"num_hidden_layers": 2,
|
||||||
|
"num_key_value_heads": 12,
|
||||||
|
"pretraining_tp": 1,
|
||||||
|
"rms_norm_eps": 1e-6,
|
||||||
|
"rope_scaling": null,
|
||||||
|
"rope_theta": 10000.0,
|
||||||
|
"tie_word_embeddings": false,
|
||||||
|
"torch_dtype": "float32",
|
||||||
|
"transformers_version": "4.35.2",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 32000
|
||||||
|
}
|
||||||
3
model.safetensors
Normal file
3
model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:a0bf4d8d47e9d27cd68543726e6e6192fbc7a2ed061f9d1c3a6cc2a5c452ffbc
|
||||||
|
size 272123144
|
||||||
30
special_tokens_map.json
Normal file
30
special_tokens_map.json
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
{
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"unk_token": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
93391
tokenizer.json
Normal file
93391
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
BIN
tokenizer.model
(Stored with Git LFS)
Normal file
BIN
tokenizer.model
(Stored with Git LFS)
Normal file
Binary file not shown.
41
tokenizer_config.json
Normal file
41
tokenizer_config.json
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
{
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"0": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"1": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"2": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "</s>",
|
||||||
|
"legacy": false,
|
||||||
|
"model_max_length": 1000000000000000019884624838656,
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"padding_side": "right",
|
||||||
|
"sp_model_kwargs": {},
|
||||||
|
"spaces_between_special_tokens": false,
|
||||||
|
"tokenizer_class": "LlamaTokenizer",
|
||||||
|
"unk_token": "<unk>",
|
||||||
|
"use_default_system_prompt": false
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user