初始化项目,由ModelHub XC社区提供模型
Model: DevQuasar/analytical_reasoning_r16a32_unsloth-Llama-3.2-3B-Instruct-bnb-4bit Source: Original Platform
This commit is contained in:
49
.gitattributes
vendored
Normal file
49
.gitattributes
vendored
Normal file
@@ -0,0 +1,49 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
||||
*.tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
*.db* filter=lfs diff=lfs merge=lfs -text
|
||||
*.ark* filter=lfs diff=lfs merge=lfs -text
|
||||
**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
|
||||
**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
|
||||
**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.gguf* filter=lfs diff=lfs merge=lfs -text
|
||||
*.ggml filter=lfs diff=lfs merge=lfs -text
|
||||
*.llamafile* filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
121
README.md
Normal file
121
README.md
Normal file
@@ -0,0 +1,121 @@
|
||||
---
|
||||
base_model: unsloth/Llama-3.2-3B-Instruct-bnb-4bit
|
||||
datasets:
|
||||
- microsoft/orca-agentinstruct-1M-v1
|
||||
pipeline_tag: text-generation
|
||||
library_name: transformers
|
||||
license: llama3.2
|
||||
tags:
|
||||
- unsloth
|
||||
- transformers
|
||||
model-index:
|
||||
- name: analytical_reasoning_r16a32_unsloth-Llama-3.2-3B-Instruct-bnb-4bit
|
||||
results:
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: lm-evaluation-harness
|
||||
name: bbh
|
||||
metrics:
|
||||
- name: acc_norm
|
||||
type: acc_norm
|
||||
value: 0.4168
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: lm-evaluation-harness
|
||||
name: gpqa
|
||||
metrics:
|
||||
- name: acc_norm
|
||||
type: acc_norm
|
||||
value: 0.2691
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: lm-evaluation-harness
|
||||
name: math
|
||||
metrics:
|
||||
- name: exact_match
|
||||
type: exact_match
|
||||
value: 0.0867
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: lm-evaluation-harness
|
||||
name: mmlu
|
||||
metrics:
|
||||
- name: acc_norm
|
||||
type: acc_norm
|
||||
value: 0.2822
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: lm-evaluation-harness
|
||||
name: musr
|
||||
metrics:
|
||||
- name: acc_norm
|
||||
type: acc_norm
|
||||
value: 0.3648
|
||||
verified: false
|
||||
- task:
|
||||
type: text-generation
|
||||
dataset:
|
||||
type: lm-evaluation-harness
|
||||
name: hellaswag
|
||||
metrics:
|
||||
- name: acc
|
||||
type: acc
|
||||
value: 0.5141
|
||||
verified: false
|
||||
- name: acc_norm
|
||||
type: acc_norm
|
||||
value: 0.6793
|
||||
verified: false
|
||||
|
||||
---
|
||||
|
||||

|
||||
|
||||
### Eval
|
||||
|
||||
The fine tuned model (DevQuasar/analytical_reasoning_r16a32_unsloth-Llama-3.2-3B-Instruct-bnb-4bit)
|
||||
has gained performace over the base model (unsloth/Llama-3.2-3B-Instruct-bnb-4bit)
|
||||
in the following tasks.
|
||||
|
||||
| Test | Base Model | Fine-Tuned Model | Performance Gain |
|
||||
|---|---|---|---|
|
||||
| leaderboard_bbh_logical_deduction_seven_objects | 0.2520 | 0.4360 | 0.1840 |
|
||||
| leaderboard_bbh_logical_deduction_five_objects | 0.3560 | 0.4560 | 0.1000 |
|
||||
| leaderboard_musr_team_allocation | 0.2200 | 0.3200 | 0.1000 |
|
||||
| leaderboard_bbh_disambiguation_qa | 0.3040 | 0.3760 | 0.0720 |
|
||||
| leaderboard_gpqa_diamond | 0.2222 | 0.2727 | 0.0505 |
|
||||
| leaderboard_bbh_movie_recommendation | 0.5960 | 0.6360 | 0.0400 |
|
||||
| leaderboard_bbh_formal_fallacies | 0.5080 | 0.5400 | 0.0320 |
|
||||
| leaderboard_bbh_tracking_shuffled_objects_three_objects | 0.3160 | 0.3440 | 0.0280 |
|
||||
| leaderboard_bbh_causal_judgement | 0.5455 | 0.5668 | 0.0214 |
|
||||
| leaderboard_bbh_web_of_lies | 0.4960 | 0.5160 | 0.0200 |
|
||||
| leaderboard_math_geometry_hard | 0.0455 | 0.0606 | 0.0152 |
|
||||
| leaderboard_math_num_theory_hard | 0.0519 | 0.0649 | 0.0130 |
|
||||
| leaderboard_musr_murder_mysteries | 0.5280 | 0.5400 | 0.0120 |
|
||||
| leaderboard_gpqa_extended | 0.2711 | 0.2802 | 0.0092 |
|
||||
| leaderboard_bbh_sports_understanding | 0.5960 | 0.6040 | 0.0080 |
|
||||
| leaderboard_math_intermediate_algebra_hard | 0.0107 | 0.0143 | 0.0036 |
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- unsloth 2024.11.5
|
||||
- trl 0.12.0
|
||||
|
||||
### Training HW
|
||||
- V100
|
||||
|
||||
I'm doing this to 'Make knowledge free for everyone', using my personal time and resources.
|
||||
|
||||
If you want to support my efforts please visit my ko-fi page: https://ko-fi.com/devquasar
|
||||
|
||||
Also feel free to visit my website https://devquasar.com/
|
||||
42
config.json
Normal file
42
config.json
Normal file
@@ -0,0 +1,42 @@
|
||||
{
|
||||
"_name_or_path": "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 128000,
|
||||
"eos_token_id": [
|
||||
128001,
|
||||
128008,
|
||||
128009
|
||||
],
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 3072,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 8192,
|
||||
"max_position_embeddings": 131072,
|
||||
"mlp_bias": false,
|
||||
"model_type": "llama",
|
||||
"num_attention_heads": 24,
|
||||
"num_hidden_layers": 28,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 128004,
|
||||
"pretraining_tp": 1,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_scaling": {
|
||||
"factor": 32.0,
|
||||
"high_freq_factor": 4.0,
|
||||
"low_freq_factor": 1.0,
|
||||
"original_max_position_embeddings": 8192,
|
||||
"rope_type": "llama3"
|
||||
},
|
||||
"rope_theta": 500000.0,
|
||||
"tie_word_embeddings": true,
|
||||
"torch_dtype": "float16",
|
||||
"transformers_version": "4.46.2",
|
||||
"unsloth_version": "2024.11.5",
|
||||
"use_cache": true,
|
||||
"vocab_size": 128256
|
||||
}
|
||||
1
configuration.json
Normal file
1
configuration.json
Normal file
@@ -0,0 +1 @@
|
||||
{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
14
generation_config.json
Normal file
14
generation_config.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"bos_token_id": 128000,
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
128001,
|
||||
128008,
|
||||
128009
|
||||
],
|
||||
"max_length": 131072,
|
||||
"pad_token_id": 128004,
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.9,
|
||||
"transformers_version": "4.46.2"
|
||||
}
|
||||
3
model-00001-of-00002.safetensors
Normal file
3
model-00001-of-00002.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:eae1ad11e4c8866a56d00420f4f461de9fa475fd7f2a4e8ff9291028c666b50b
|
||||
size 4965798912
|
||||
3
model-00002-of-00002.safetensors
Normal file
3
model-00002-of-00002.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2758257df3bc66300d332687e4cebfe730762210dbd954c07bdc0fd18e47f8b7
|
||||
size 1459729880
|
||||
261
model.safetensors.index.json
Normal file
261
model.safetensors.index.json
Normal file
@@ -0,0 +1,261 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_size": 6425499648
|
||||
},
|
||||
"weight_map": {
|
||||
"model.embed_tokens.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
|
||||
"model.norm.weight": "model-00002-of-00002.safetensors"
|
||||
}
|
||||
}
|
||||
200
results_2024-11-30T09-11-17.191351.json
Normal file
200
results_2024-11-30T09-11-17.191351.json
Normal file
@@ -0,0 +1,200 @@
|
||||
{
|
||||
"results": {
|
||||
"leaderboard_musr": {
|
||||
" ": " ",
|
||||
"alias": "leaderboard_musr"
|
||||
},
|
||||
"leaderboard_musr_murder_mysteries": {
|
||||
"alias": " - leaderboard_musr_murder_mysteries",
|
||||
"acc_norm,none": 0.54,
|
||||
"acc_norm_stderr,none": 0.03158465389149902
|
||||
},
|
||||
"leaderboard_musr_object_placements": {
|
||||
"alias": " - leaderboard_musr_object_placements",
|
||||
"acc_norm,none": 0.234375,
|
||||
"acc_norm_stderr,none": 0.02652733398834892
|
||||
},
|
||||
"leaderboard_musr_team_allocation": {
|
||||
"alias": " - leaderboard_musr_team_allocation",
|
||||
"acc_norm,none": 0.32,
|
||||
"acc_norm_stderr,none": 0.029561724955241033
|
||||
}
|
||||
},
|
||||
"group_subtasks": {
|
||||
"leaderboard_musr": [
|
||||
"leaderboard_musr_murder_mysteries",
|
||||
"leaderboard_musr_object_placements",
|
||||
"leaderboard_musr_team_allocation"
|
||||
]
|
||||
},
|
||||
"configs": {
|
||||
"leaderboard_musr_murder_mysteries": {
|
||||
"task": "leaderboard_musr_murder_mysteries",
|
||||
"dataset_path": "TAUR-Lab/MuSR",
|
||||
"test_split": "murder_mysteries",
|
||||
"doc_to_text": "def doc_to_text(doc):\n \"\"\"\n Convert a doc to text.\n \"\"\"\n choices = \"\"\n for i, choice in enumerate(ast.literal_eval(doc[\"choices\"])):\n choices += f\"{i+1} - {choice}\\n\"\n\n text = DOC_TO_TEXT.format(\n narrative=doc[\"narrative\"], question=doc[\"question\"], choices=choices\n )\n\n return text\n",
|
||||
"doc_to_target": "{{answer_choice}}",
|
||||
"doc_to_choice": "{{choices}}",
|
||||
"description": "",
|
||||
"target_delimiter": " ",
|
||||
"fewshot_delimiter": "\n\n",
|
||||
"num_fewshot": 0,
|
||||
"metric_list": [
|
||||
{
|
||||
"metric": "acc_norm",
|
||||
"aggregation": "mean",
|
||||
"higher_is_better": true
|
||||
}
|
||||
],
|
||||
"output_type": "multiple_choice",
|
||||
"repeats": 1,
|
||||
"should_decontaminate": false,
|
||||
"metadata": {
|
||||
"version": 1.0
|
||||
}
|
||||
},
|
||||
"leaderboard_musr_object_placements": {
|
||||
"task": "leaderboard_musr_object_placements",
|
||||
"dataset_path": "TAUR-Lab/MuSR",
|
||||
"test_split": "object_placements",
|
||||
"doc_to_text": "def doc_to_text(doc):\n \"\"\"\n Convert a doc to text.\n \"\"\"\n choices = \"\"\n for i, choice in enumerate(ast.literal_eval(doc[\"choices\"])):\n choices += f\"{i+1} - {choice}\\n\"\n\n text = DOC_TO_TEXT.format(\n narrative=doc[\"narrative\"], question=doc[\"question\"], choices=choices\n )\n\n return text\n",
|
||||
"doc_to_target": "{{answer_choice}}",
|
||||
"doc_to_choice": "{{choices}}",
|
||||
"description": "",
|
||||
"target_delimiter": " ",
|
||||
"fewshot_delimiter": "\n\n",
|
||||
"num_fewshot": 0,
|
||||
"metric_list": [
|
||||
{
|
||||
"metric": "acc_norm",
|
||||
"aggregation": "mean",
|
||||
"higher_is_better": true
|
||||
}
|
||||
],
|
||||
"output_type": "multiple_choice",
|
||||
"repeats": 1,
|
||||
"should_decontaminate": false,
|
||||
"metadata": {
|
||||
"version": 1.0
|
||||
}
|
||||
},
|
||||
"leaderboard_musr_team_allocation": {
|
||||
"task": "leaderboard_musr_team_allocation",
|
||||
"dataset_path": "TAUR-Lab/MuSR",
|
||||
"test_split": "team_allocation",
|
||||
"doc_to_text": "def doc_to_text(doc):\n \"\"\"\n Convert a doc to text.\n \"\"\"\n choices = \"\"\n for i, choice in enumerate(ast.literal_eval(doc[\"choices\"])):\n choices += f\"{i+1} - {choice}\\n\"\n\n text = DOC_TO_TEXT.format(\n narrative=doc[\"narrative\"], question=doc[\"question\"], choices=choices\n )\n\n return text\n",
|
||||
"doc_to_target": "{{answer_choice}}",
|
||||
"doc_to_choice": "{{choices}}",
|
||||
"description": "",
|
||||
"target_delimiter": " ",
|
||||
"fewshot_delimiter": "\n\n",
|
||||
"num_fewshot": 0,
|
||||
"metric_list": [
|
||||
{
|
||||
"metric": "acc_norm",
|
||||
"aggregation": "mean",
|
||||
"higher_is_better": true
|
||||
}
|
||||
],
|
||||
"output_type": "multiple_choice",
|
||||
"repeats": 1,
|
||||
"should_decontaminate": false,
|
||||
"metadata": {
|
||||
"version": 1.0
|
||||
}
|
||||
}
|
||||
},
|
||||
"versions": {
|
||||
"leaderboard_musr_murder_mysteries": 1.0,
|
||||
"leaderboard_musr_object_placements": 1.0,
|
||||
"leaderboard_musr_team_allocation": 1.0
|
||||
},
|
||||
"n-shot": {
|
||||
"leaderboard_musr_murder_mysteries": 0,
|
||||
"leaderboard_musr_object_placements": 0,
|
||||
"leaderboard_musr_team_allocation": 0
|
||||
},
|
||||
"higher_is_better": {
|
||||
"leaderboard_musr": {
|
||||
"acc_norm": true
|
||||
},
|
||||
"leaderboard_musr_murder_mysteries": {
|
||||
"acc_norm": true
|
||||
},
|
||||
"leaderboard_musr_object_placements": {
|
||||
"acc_norm": true
|
||||
},
|
||||
"leaderboard_musr_team_allocation": {
|
||||
"acc_norm": true
|
||||
}
|
||||
},
|
||||
"n-samples": {
|
||||
"leaderboard_musr_murder_mysteries": {
|
||||
"original": 250,
|
||||
"effective": 250
|
||||
},
|
||||
"leaderboard_musr_object_placements": {
|
||||
"original": 256,
|
||||
"effective": 256
|
||||
},
|
||||
"leaderboard_musr_team_allocation": {
|
||||
"original": 250,
|
||||
"effective": 250
|
||||
}
|
||||
},
|
||||
"config": {
|
||||
"model": "hf",
|
||||
"model_args": "pretrained=DevQuasar/analytical_reasoning_r16a32_unsloth-Llama-3.2-3B-Instruct-bnb-4bit",
|
||||
"batch_size": "auto:4",
|
||||
"batch_sizes": [
|
||||
16,
|
||||
16,
|
||||
16,
|
||||
32
|
||||
],
|
||||
"device": null,
|
||||
"use_cache": "eval_cache",
|
||||
"limit": null,
|
||||
"bootstrap_iters": 100000,
|
||||
"gen_kwargs": null,
|
||||
"random_seed": 0,
|
||||
"numpy_seed": 1234,
|
||||
"torch_seed": 1234,
|
||||
"fewshot_seed": 1234
|
||||
},
|
||||
"git_hash": "0230356",
|
||||
"date": 1732986471.4917576,
|
||||
"pretty_env_info": "PyTorch version: 2.5.1+cu124\nIs debug build: False\nCUDA used to build PyTorch: 12.4\nROCM used to build PyTorch: N/A\n\nOS: Debian GNU/Linux 12 (bookworm) (x86_64)\nGCC version: (Debian 12.2.0-14) 12.2.0\nClang version: Could not collect\nCMake version: Could not collect\nLibc version: glibc-2.36\n\nPython version: 3.11.10 (main, Oct 3 2024, 07:29:13) [GCC 11.2.0] (64-bit runtime)\nPython platform: Linux-6.1.0-26-amd64-x86_64-with-glibc2.36\nIs CUDA available: True\nCUDA runtime version: Could not collect\nCUDA_MODULE_LOADING set to: LAZY\nGPU models and configuration: \nGPU 0: NVIDIA GeForce GTX 1050 Ti\nGPU 1: Tesla P40\nGPU 2: Tesla V100-PCIE-32GB\nGPU 3: Tesla V100-PCIE-32GB\n\nNvidia driver version: 535.183.01\ncuDNN version: Could not collect\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 43 bits physical, 48 bits virtual\nByte Order: Little Endian\nCPU(s): 32\nOn-line CPU(s) list: 0-31\nVendor ID: AuthenticAMD\nModel name: AMD Ryzen Threadripper 1950X 16-Core Processor\nCPU family: 23\nModel: 1\nThread(s) per core: 2\nCore(s) per socket: 16\nSocket(s): 1\nStepping: 1\nFrequency boost: enabled\nCPU(s) scaling MHz: 66%\nCPU max MHz: 3400.0000\nCPU min MHz: 2200.0000\nBogoMIPS: 6786.43\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid amd_dcm aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb hw_pstate ssbd ibpb vmmcall fsgsbase bmi1 avx2 smep bmi2 rdseed adx smap clflushopt sha_ni xsaveopt xsavec xgetbv1 clzero irperf xsaveerptr arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif overflow_recov succor smca sev\nVirtualization: AMD-V\nL1d cache: 512 KiB (16 instances)\nL1i cache: 1 MiB (16 instances)\nL2 cache: 8 MiB (16 instances)\nL3 cache: 32 MiB (4 instances)\nNUMA node(s): 1\nNUMA node0 CPU(s): 0-31\nVulnerability Gather data sampling: Not affected\nVulnerability Itlb multihit: Not affected\nVulnerability L1tf: Not affected\nVulnerability Mds: Not affected\nVulnerability Meltdown: Not affected\nVulnerability Mmio stale data: Not affected\nVulnerability Reg file data sampling: Not affected\nVulnerability Retbleed: Mitigation; untrained return thunk; SMT vulnerable\nVulnerability Spec rstack overflow: Mitigation; safe RET\nVulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl\nVulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization\nVulnerability Spectre v2: Mitigation; Retpolines; IBPB conditional; STIBP disabled; RSB filling; PBRSB-eIBRS Not affected; BHI Not affected\nVulnerability Srbds: Not affected\nVulnerability Tsx async abort: Not affected\n\nVersions of relevant libraries:\n[pip3] numpy==2.1.3\n[pip3] torch==2.5.1\n[pip3] triton==3.1.0\n[conda] numpy 2.1.3 pypi_0 pypi\n[conda] torch 2.5.1 pypi_0 pypi\n[conda] triton 3.1.0 pypi_0 pypi",
|
||||
"transformers_version": "4.46.3",
|
||||
"upper_git_hash": null,
|
||||
"tokenizer_pad_token": [
|
||||
"<|finetune_right_pad_id|>",
|
||||
"128004"
|
||||
],
|
||||
"tokenizer_eos_token": [
|
||||
"<|eot_id|>",
|
||||
"128009"
|
||||
],
|
||||
"tokenizer_bos_token": [
|
||||
"<|begin_of_text|>",
|
||||
"128000"
|
||||
],
|
||||
"eot_token_id": 128009,
|
||||
"max_length": 131072,
|
||||
"task_hashes": {
|
||||
"leaderboard_musr_murder_mysteries": "a696259562ea5c5c09a2613e30526fae1de29f55da9e28e8d7e8a53027e6d330",
|
||||
"leaderboard_musr_object_placements": "3aa8c5e5bc59cd6ba2326269b9f0bf3cee8cba1b4e9e1d1330cf5f1f59ea0dce",
|
||||
"leaderboard_musr_team_allocation": "5a75f135c145ee861a1cf31b63346709ef41b9d542be6a61c5818c210a3797a5"
|
||||
},
|
||||
"model_source": "hf",
|
||||
"model_name": "DevQuasar/analytical_reasoning_r16a32_unsloth-Llama-3.2-3B-Instruct-bnb-4bit",
|
||||
"model_name_sanitized": "DevQuasar__analytical_reasoning_r16a32_unsloth-Llama-3.2-3B-Instruct-bnb-4bit",
|
||||
"system_instruction": null,
|
||||
"system_instruction_sha": null,
|
||||
"fewshot_as_multiturn": false,
|
||||
"chat_template": null,
|
||||
"chat_template_sha": null,
|
||||
"start_time": 52195.45405349,
|
||||
"end_time": 52407.302247922,
|
||||
"total_evaluation_time_seconds": "211.84819443200104"
|
||||
}
|
||||
2547
results_2024-11-30T13-44-48.543919.json
Normal file
2547
results_2024-11-30T13-44-48.543919.json
Normal file
File diff suppressed because it is too large
Load Diff
23
special_tokens_map.json
Normal file
23
special_tokens_map.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"bos_token": {
|
||||
"content": "<|begin_of_text|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eos_token": {
|
||||
"content": "<|eot_id|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|finetune_right_pad_id|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
BIN
tokenizer.json
(Stored with Git LFS)
Normal file
BIN
tokenizer.json
(Stored with Git LFS)
Normal file
Binary file not shown.
2064
tokenizer_config.json
Normal file
2064
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user