初始化项目,由ModelHub XC社区提供模型
Model: dphn/dolphin-2.2-mistral-7b Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
13
README.md
Normal file
13
README.md
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
---
|
||||||
|
license: apache-2.0
|
||||||
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
|
datasets:
|
||||||
|
- ehartford/dolphin
|
||||||
|
- jondurbin/airoboros-2.2.1
|
||||||
|
language:
|
||||||
|
- en
|
||||||
|
---
|
||||||
|
|
||||||
|
# dolphin-2.2-mistral-7b
|
||||||
|
|
||||||
|
This model was overfit and has been re-released as [dolphin-2.2.1-mistral-7b](https://huggingface.co/ehartford/dolphin-2.2.1-mistral-7b). Please use that model instead.
|
||||||
4
added_tokens.json
Normal file
4
added_tokens.json
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
{
|
||||||
|
"<|im_end|>": 32000,
|
||||||
|
"<|im_start|>": 32001
|
||||||
|
}
|
||||||
25
config.json
Normal file
25
config.json
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"_name_or_path": "mistralai/Mistral-7B-v0.1",
|
||||||
|
"architectures": [
|
||||||
|
"MistralForCausalLM"
|
||||||
|
],
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 4096,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 14336,
|
||||||
|
"max_position_embeddings": 32768,
|
||||||
|
"model_type": "mistral",
|
||||||
|
"num_attention_heads": 32,
|
||||||
|
"num_hidden_layers": 32,
|
||||||
|
"num_key_value_heads": 8,
|
||||||
|
"rms_norm_eps": 1e-05,
|
||||||
|
"rope_theta": 10000.0,
|
||||||
|
"sliding_window": 4096,
|
||||||
|
"tie_word_embeddings": false,
|
||||||
|
"torch_dtype": "bfloat16",
|
||||||
|
"transformers_version": "4.34.1",
|
||||||
|
"use_cache": false,
|
||||||
|
"vocab_size": 32002
|
||||||
|
}
|
||||||
71
configs/dolphin-mistral-7b.yml
Normal file
71
configs/dolphin-mistral-7b.yml
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
base_model: mistralai/Mistral-7B-v0.1
|
||||||
|
model_type: MistralForCausalLM
|
||||||
|
tokenizer_type: LlamaTokenizer
|
||||||
|
is_mistral_derived_model: true
|
||||||
|
|
||||||
|
load_in_8bit: false
|
||||||
|
load_in_4bit: false
|
||||||
|
strict: false
|
||||||
|
|
||||||
|
datasets:
|
||||||
|
- path: /workspace/datasets/dolphin/dolphin201.jsonl
|
||||||
|
type: alpaca_w_system.load_open_orca_chatml
|
||||||
|
- path: /workspace/datasets/WizardLM_evol_instruct_cleaned.jsonl
|
||||||
|
type: sharegpt
|
||||||
|
conversation: chatml
|
||||||
|
- path: /workspace/datasets/not_samantha_norefusals.jsonl
|
||||||
|
type: sharegpt
|
||||||
|
conversation: chatml
|
||||||
|
dataset_prepared_path: last_run_prepared
|
||||||
|
val_set_size: 0
|
||||||
|
output_dir: /workspace/dolphin-2.2-mistral-7b
|
||||||
|
|
||||||
|
sequence_len: 8192
|
||||||
|
sample_packing: true
|
||||||
|
pad_to_sequence_len: true
|
||||||
|
|
||||||
|
wandb_project: dolphin
|
||||||
|
wandb_entity:
|
||||||
|
wandb_watch:
|
||||||
|
wandb_run_id:
|
||||||
|
wandb_log_model:
|
||||||
|
|
||||||
|
gradient_accumulation_steps: 4
|
||||||
|
micro_batch_size: 5
|
||||||
|
num_epochs: 4
|
||||||
|
adam_beta2: 0.95
|
||||||
|
adam_epsilon: 0.00001
|
||||||
|
max_grad_norm: 1.0
|
||||||
|
lr_scheduler: cosine
|
||||||
|
learning_rate: 0.000006
|
||||||
|
|
||||||
|
train_on_inputs: false
|
||||||
|
group_by_length: false
|
||||||
|
bf16: true
|
||||||
|
fp16: false
|
||||||
|
tf32: false
|
||||||
|
|
||||||
|
gradient_checkpointing: true
|
||||||
|
early_stopping_patience:
|
||||||
|
resume_from_checkpoint:
|
||||||
|
local_rank:
|
||||||
|
logging_steps: 1
|
||||||
|
xformers_attention:
|
||||||
|
flash_attention: true
|
||||||
|
|
||||||
|
warmup_steps: 100
|
||||||
|
eval_steps:
|
||||||
|
eval_table_size:
|
||||||
|
eval_table_max_new_tokens:
|
||||||
|
eval_sample_packing: false
|
||||||
|
save_steps: 0.25
|
||||||
|
debug:
|
||||||
|
deepspeed: deepspeed/zero2.json
|
||||||
|
weight_decay: 0.1
|
||||||
|
fsdp:
|
||||||
|
fsdp_config:
|
||||||
|
special_tokens:
|
||||||
|
eos_token: "<|im_end|>"
|
||||||
|
tokens:
|
||||||
|
- "<|im_start|>"
|
||||||
|
- "<|im_end|>"
|
||||||
1
configuration.json
Normal file
1
configuration.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
|
||||||
41
eval.sh
Normal file
41
eval.sh
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
MODEL_PATH="cognitivecomputations/dolphin-2.2-mistral-7b"
|
||||||
|
MODEL_NAME="dolphin-2.2-mistral-7b"
|
||||||
|
RESULTS_PATH="/workspace/results/$MODEL_NAME"
|
||||||
|
mkdir -p "$RESULTS_PATH"
|
||||||
|
|
||||||
|
PRETRAINED_ARGS="$MODEL_PATH,tensor_parallel_size=4,dtype=auto,trust_remote_code=True,gpu_memory_utilization=0.8"
|
||||||
|
MODEL_ARGS="pretrained=$PRETRAINED_ARGS"
|
||||||
|
|
||||||
|
tasks=(
|
||||||
|
"mmlu"
|
||||||
|
"truthfulqa"
|
||||||
|
"gsm8k"
|
||||||
|
"hellaswag"
|
||||||
|
"arc_challenge"
|
||||||
|
"winogrande")
|
||||||
|
|
||||||
|
# Function to get the number of fewshot for a given task
|
||||||
|
get_num_fewshot() {
|
||||||
|
case "$1" in
|
||||||
|
"mmlu") echo 5 ;;
|
||||||
|
"truthfulqa") echo 0 ;;
|
||||||
|
"gsm8k") echo 5 ;;
|
||||||
|
"hellaswag") echo 10 ;;
|
||||||
|
"arc_challenge") echo 25 ;;
|
||||||
|
"winogrande") echo 5 ;;
|
||||||
|
*) echo 0 ;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
for TASK in "${tasks[@]}"; do
|
||||||
|
echo lm_eval --model vllm --model_args "$MODEL_ARGS" --task="$TASK" --num_fewshot "$(get_num_fewshot "$TASK")" --batch_size 8 --output_path "$RESULTS_PATH/$TASK.json"
|
||||||
|
lm_eval --model vllm --model_args "$MODEL_ARGS" --task="$TASK" --num_fewshot "$(get_num_fewshot "$TASK")" --batch_size 8 --output_path "$RESULTS_PATH/$TASK.json"
|
||||||
|
done
|
||||||
|
|
||||||
|
|
||||||
|
jq -s '[.[]]' $RESULTS_PATH/*.json > $RESULTS_PATH/eval_results.json
|
||||||
|
|
||||||
|
huggingface-cli upload cognitivecomputations/$MODEL_NAME $RESULTS_PATH/eval_results.json
|
||||||
|
huggingface-cli upload cognitivecomputations/$MODEL_NAME eval.sh
|
||||||
3210
eval_results.json
Normal file
3210
eval_results.json
Normal file
File diff suppressed because it is too large
Load Diff
6
generation_config.json
Normal file
6
generation_config.json
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"transformers_version": "4.34.1"
|
||||||
|
}
|
||||||
3
pytorch_model.bin
Normal file
3
pytorch_model.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:8d005de299890e20f27ab2e96bea212ba451236e3810d6b243a8f39cbcf23aec
|
||||||
|
size 14483537189
|
||||||
24
special_tokens_map.json
Normal file
24
special_tokens_map.json
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "<|im_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"unk_token": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
BIN
tokenizer.model
(Stored with Git LFS)
Normal file
BIN
tokenizer.model
(Stored with Git LFS)
Normal file
Binary file not shown.
61
tokenizer_config.json
Normal file
61
tokenizer_config.json
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
{
|
||||||
|
"add_bos_token": true,
|
||||||
|
"add_eos_token": false,
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"0": {
|
||||||
|
"content": "<unk>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"1": {
|
||||||
|
"content": "<s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"2": {
|
||||||
|
"content": "</s>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"32000": {
|
||||||
|
"content": "<|im_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
},
|
||||||
|
"32001": {
|
||||||
|
"content": "<|im_start|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additional_special_tokens": [],
|
||||||
|
"bos_token": "<s>",
|
||||||
|
"chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "<|im_end|>",
|
||||||
|
"legacy": true,
|
||||||
|
"model_max_length": 1000000000000000019884624838656,
|
||||||
|
"pad_token": "</s>",
|
||||||
|
"sp_model_kwargs": {},
|
||||||
|
"spaces_between_special_tokens": false,
|
||||||
|
"tokenizer_class": "LlamaTokenizer",
|
||||||
|
"trust_remote_code": false,
|
||||||
|
"unk_token": "<unk>",
|
||||||
|
"use_default_system_prompt": true,
|
||||||
|
"use_fast": true
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user