初始化项目,由ModelHub XC社区提供模型

Model: MathMindsAGI/Test_context_pretrain
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-04-11 11:04:57 +08:00
commit 200675bd2d
23 changed files with 28409 additions and 0 deletions

37
.gitattributes vendored Normal file
View File

@@ -0,0 +1,37 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
results/eval_difficulty/checkpoint-18779_id_generations.jsonl filter=lfs diff=lfs merge=lfs -text
results/eval_difficulty/checkpoint-18779_ood_generations.jsonl filter=lfs diff=lfs merge=lfs -text

60
README.md Normal file
View File

@@ -0,0 +1,60 @@
---
library_name: transformers
license: other
base_model: model_configs/qwen2_100M
tags:
- llama-factory
- full
- generated_from_trainer
model-index:
- name: pt
results: []
---
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
should probably proofread and complete it, then remove this comment. -->
# pt
This model is a fine-tuned version of [model_configs/qwen2_100M](https://huggingface.co/model_configs/qwen2_100M) on the composition dataset.
## Model description
More information needed
## Intended uses & limitations
More information needed
## Training and evaluation data
More information needed
## Training procedure
### Training hyperparameters
The following hyperparameters were used during training:
- learning_rate: 0.0001
- train_batch_size: 64
- eval_batch_size: 8
- seed: 42
- distributed_type: multi-GPU
- num_devices: 4
- total_train_batch_size: 256
- total_eval_batch_size: 32
- optimizer: Use adamw_torch with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
- lr_scheduler_type: cosine_with_min_lr
- lr_scheduler_warmup_ratio: 0.05
- num_epochs: 1.0
### Training results
### Framework versions
- Transformers 4.52.4
- Pytorch 2.7.0+cu126
- Datasets 3.6.0
- Tokenizers 0.21.1

View File

@@ -0,0 +1,8 @@
{
"epoch": 1.0,
"total_flos": 5.994863411375112e+18,
"train_loss": 0.031714059101823636,
"train_runtime": 3465.0177,
"train_samples_per_second": 1387.365,
"train_steps_per_second": 5.42
}

View File

@@ -0,0 +1,24 @@
#!/usr/bin/env bash
#SBATCH --ntasks=1
#SBATCH --nodes=1
#SBATCH --partition=short-unkillable
#SBATCH --gres=gpu:h100:4
#SBATCH -c 24
#SBATCH --mem=64G
#SBATCH -t 2:59:0
set -euo pipefail
SCRATCH_ROOT="${SCRATCH_ROOT:-/network/scratch/k/kamran.chitsaz}"
export HF_HOME="${HF_HOME:-${SCRATCH_ROOT}/.cache/huggingface}"
export HF_DATASETS_CACHE="${HF_DATASETS_CACHE:-${HF_HOME}/datasets}"
export TRANSFORMERS_CACHE="${TRANSFORMERS_CACHE:-${HF_HOME}/transformers}"
export TMPDIR="${TMPDIR:-${SCRATCH_ROOT}/tmp}"
mkdir -p "${HF_DATASETS_CACHE}" "${TRANSFORMERS_CACHE}" "${TMPDIR}"
export PREPROCESSING_NUM_WORKERS=6
export DATALOADER_NUM_WORKERS=6
bash scripts/composition/op-difficulty-10B/script_pt/run_pretrain_id2-10_0.25easy_0.25medium_0.5hard.sh

View File

@@ -0,0 +1,62 @@
#!/usr/bin/env bash
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/../../../.." && pwd)"
cd "${REPO_ROOT}"
# llamafactory-cli launches distributed training via `torchrun`, so the venv
# bin dir must be on PATH even when the CLI itself is invoked by absolute path.
export PATH="${REPO_ROOT}/.venv/bin:${PATH}"
if [[ -n "${SLURM_CPUS_PER_TASK:-}" ]]; then
CPU_WORKERS_DEFAULT="${SLURM_CPUS_PER_TASK}"
elif command -v nproc >/dev/null 2>&1; then
CPU_WORKERS_DEFAULT="$(nproc)"
else
CPU_WORKERS_DEFAULT=1
fi
DEFAULT_PREPROCESSING_WORKERS="${PREPROCESSING_NUM_WORKERS:-${CPU_WORKERS_DEFAULT}}"
DEFAULT_DATALOADER_WORKERS="${DATALOADER_NUM_WORKERS:-${CPU_WORKERS_DEFAULT}}"
if [[ -z "${CUDA_VISIBLE_DEVICES:-}" ]]; then
echo "CUDA_VISIBLE_DEVICES must be set before running this script" >&2
exit 1
fi
LLAMA_BIN_DEFAULT="${REPO_ROOT}/.venv/bin/llamafactory-cli"
DATASET_DIR_ROOT="${DATASET_DIR_ROOT:-data}"
if [[ ! -x "${LLAMA_BIN_DEFAULT}" ]]; then
echo "Missing ${LLAMA_BIN_DEFAULT}. Run scripts/setup/install_local_llamafactory.sh first." >&2
exit 1
fi
if [[ ! -d "${REPO_ROOT}/${DATASET_DIR_ROOT}/composition/train" ]]; then
echo "Missing ${DATASET_DIR_ROOT}/composition/train. Run scripts/composition/prepare_hf_composition_data.sh first." >&2
exit 1
fi
if [[ ! -d "${REPO_ROOT}/${DATASET_DIR_ROOT}/composition/test" ]]; then
echo "Missing ${DATASET_DIR_ROOT}/composition/test. Run scripts/composition/prepare_hf_composition_data.sh first." >&2
exit 1
fi
export WANDB_PROJECT="${WANDB_PROJECT:-Interplay-LM-Reasoning}"
export WANDB_ENTITY="${WANDB_ENTITY:-kmchiti}"
DEFAULT_LLAMA_ARGS=(
"preprocessing_num_workers=${DEFAULT_PREPROCESSING_WORKERS}"
"dataloader_num_workers=${DEFAULT_DATALOADER_WORKERS}"
)
if [[ -n "${LLAMA_EXTRA_ARGS:-}" ]]; then
export LLAMA_EXTRA_ARGS="${DEFAULT_LLAMA_ARGS[*]} ${LLAMA_EXTRA_ARGS}"
else
export LLAMA_EXTRA_ARGS="${DEFAULT_LLAMA_ARGS[*]}"
fi
EVAL_DATA_ROOT="${EVAL_DATA_ROOT:-${DATASET_DIR_ROOT}/composition/test}" \
LLAMA_BIN="${LLAMA_BIN:-${LLAMA_BIN_DEFAULT}}" \
LLAMA_CONFIG="scripts/composition/op-difficulty-10B/pt-diff2_10-tok10B-lr1e-4-bs512k-schedcos-minlr3e-5/id2-10_0.25easy_0.25medium_0.5hard.yaml" \
./scripts/meta_run.sh --skip-rl "$@"

View File

@@ -0,0 +1,8 @@
{
"epoch": 1.0,
"total_flos": 5.994863411375112e+18,
"train_loss": 0.031714059101823636,
"train_runtime": 3465.0177,
"train_samples_per_second": 1387.365,
"train_steps_per_second": 5.42
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:513e25aff4b26f6d4627ba5293f2d27ea5ae535068c635b6f731c100fe640f23
size 6353

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

View File

@@ -0,0 +1,114 @@
{
"repo_id": "MathMindsAGI/Test_context_pretrain",
"requested_repo_id": "MathMindsAGI/Test_context_pretrain",
"model_export_dir": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/saves/composition-10B/op_level/id2-10_0.25easy_0.25medium_0.5hard/pt",
"training_run_dir": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/saves/composition-10B/op_level/id2-10_0.25easy_0.25medium_0.5hard/pt",
"results_dir": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/results/eval_difficulty",
"uploaded_files": [
{
"path_in_repo": "README.md",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/saves/composition-10B/op_level/id2-10_0.25easy_0.25medium_0.5hard/pt/README.md",
"size_bytes": 1318
},
{
"path_in_repo": "config.json",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/saves/composition-10B/op_level/id2-10_0.25easy_0.25medium_0.5hard/pt/config.json",
"size_bytes": 691
},
{
"path_in_repo": "generation_config.json",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/saves/composition-10B/op_level/id2-10_0.25easy_0.25medium_0.5hard/pt/generation_config.json",
"size_bytes": 133
},
{
"path_in_repo": "model.safetensors",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/saves/composition-10B/op_level/id2-10_0.25easy_0.25medium_0.5hard/pt/model.safetensors",
"size_bytes": 412699256
},
{
"path_in_repo": "special_tokens_map.json",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/saves/composition-10B/op_level/id2-10_0.25easy_0.25medium_0.5hard/pt/special_tokens_map.json",
"size_bytes": 699
},
{
"path_in_repo": "tokenizer.json",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/saves/composition-10B/op_level/id2-10_0.25easy_0.25medium_0.5hard/pt/tokenizer.json",
"size_bytes": 132794
},
{
"path_in_repo": "tokenizer_config.json",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/saves/composition-10B/op_level/id2-10_0.25easy_0.25medium_0.5hard/pt/tokenizer_config.json",
"size_bytes": 3143
},
{
"path_in_repo": "chat_template.jinja",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/saves/composition-10B/op_level/id2-10_0.25easy_0.25medium_0.5hard/pt/chat_template.jinja",
"size_bytes": 443
},
{
"path_in_repo": "artifacts/training/all_results.json",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/saves/composition-10B/op_level/id2-10_0.25easy_0.25medium_0.5hard/pt/all_results.json",
"size_bytes": 211
},
{
"path_in_repo": "artifacts/training/train_results.json",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/saves/composition-10B/op_level/id2-10_0.25easy_0.25medium_0.5hard/pt/train_results.json",
"size_bytes": 211
},
{
"path_in_repo": "artifacts/training/trainer_log.jsonl",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/saves/composition-10B/op_level/id2-10_0.25easy_0.25medium_0.5hard/pt/trainer_log.jsonl",
"size_bytes": 385767
},
{
"path_in_repo": "artifacts/training/trainer_state.json",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/saves/composition-10B/op_level/id2-10_0.25easy_0.25medium_0.5hard/pt/trainer_state.json",
"size_bytes": 330574
},
{
"path_in_repo": "artifacts/training/training_args.bin",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/saves/composition-10B/op_level/id2-10_0.25easy_0.25medium_0.5hard/pt/training_args.bin",
"size_bytes": 6353
},
{
"path_in_repo": "artifacts/training/training_loss.png",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/saves/composition-10B/op_level/id2-10_0.25easy_0.25medium_0.5hard/pt/training_loss.png",
"size_bytes": 28679
},
{
"path_in_repo": "artifacts/training/scripts/run_pretrain_id2-10_0.25easy_0.25medium_0.5hard.sh",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/scripts/composition/op-difficulty-10B/script_pt/run_pretrain_id2-10_0.25easy_0.25medium_0.5hard.sh",
"size_bytes": 2320
},
{
"path_in_repo": "artifacts/training/scripts/run.sh",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/run.sh",
"size_bytes": 754
},
{
"path_in_repo": "results/eval_difficulty/checkpoint-18779_id_generations.jsonl",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/results/eval_difficulty/checkpoint-18779_id_generations.jsonl",
"size_bytes": 1790663092
},
{
"path_in_repo": "results/eval_difficulty/checkpoint-18779_metrics.json",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/results/eval_difficulty/checkpoint-18779_metrics.json",
"size_bytes": 66147
},
{
"path_in_repo": "results/eval_difficulty/checkpoint-18779_ood_generations.jsonl",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/results/eval_difficulty/checkpoint-18779_ood_generations.jsonl",
"size_bytes": 2445405195
},
{
"path_in_repo": "results/eval_difficulty/summary.csv",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/results/eval_difficulty/summary.csv",
"size_bytes": 322
},
{
"path_in_repo": "results/eval_difficulty/summary.json",
"source_path": "/network/scratch/k/kamran.chitsaz/Interplay-LM-Reasoning/results/eval_difficulty/summary.json",
"size_bytes": 22961
}
]
}

1
chat_template.jinja Normal file
View File

@@ -0,0 +1 @@
{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ system_message }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ content }}{% elif message['role'] == 'assistant' %}{{ content }}{% endif %}{% endfor %}

29
config.json Normal file
View File

@@ -0,0 +1,29 @@
{
"architectures": [
"Qwen2ForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 2,
"eos_token_id": 3,
"hidden_act": "silu",
"hidden_size": 768,
"initializer_range": 0.02,
"intermediate_size": 3072,
"max_position_embeddings": 2048,
"max_window_layers": 24,
"model_type": "qwen2",
"num_attention_heads": 12,
"num_hidden_layers": 12,
"num_key_value_heads": 2,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"rope_theta": 1000000.0,
"sliding_window": 2048,
"tie_word_embeddings": true,
"torch_dtype": "float32",
"transformers_version": "4.52.4",
"use_cache": false,
"use_mrope": false,
"use_sliding_window": false,
"vocab_size": 2200
}

7
generation_config.json Normal file
View File

@@ -0,0 +1,7 @@
{
"_from_model_config": true,
"bos_token_id": 2,
"eos_token_id": 3,
"transformers_version": "4.52.4",
"use_cache": false
}

3
model.safetensors Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9e368dc0425800ba5c9c096b5ec663716d772c1a574a36c5ef4154b32edaa256
size 412699256

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:429575de766ebb33c1be6f82b91f879487774f5e1fd41f3ccb8fc45b86528934
size 1790663092

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3d4a966841cd19dec1e7886c21e4af184eda37cf81bc10ce458df13a7e15eea2
size 2445405195

View File

@@ -0,0 +1,2 @@
checkpoint,id_acc,ood_acc,total_acc,id_avg_loss,ood_avg_loss,total_avg_loss,id_avg_resp_len,ood_avg_resp_len,count_id,count_ood,count_total
checkpoint-18779,0.9630347222222222,0.25220703125,0.588914884868421,6.54602900314331,5.825457085227966,6.166780625293129,201.21272916666666,284.85050390625,1152000,1280000,2432000
1 checkpoint id_acc ood_acc total_acc id_avg_loss ood_avg_loss total_avg_loss id_avg_resp_len ood_avg_resp_len count_id count_ood count_total
2 checkpoint-18779 0.9630347222222222 0.25220703125 0.588914884868421 6.54602900314331 5.825457085227966 6.166780625293129 201.21272916666666 284.85050390625 1152000 1280000 2432000

View File

@@ -0,0 +1,645 @@
[
{
"checkpoint": "checkpoint-18779",
"id_acc": 0.9630347222222222,
"ood_acc": 0.25220703125,
"total_acc": 0.588914884868421,
"id_avg_loss": 6.54602900314331,
"ood_avg_loss": 5.825457085227966,
"total_avg_loss": 6.166780625293129,
"id_avg_resp_len": 201.21272916666666,
"ood_avg_resp_len": 284.85050390625,
"count_id": 1152000,
"count_ood": 1280000,
"count_total": 2432000,
"template_metrics": {
"crazy_zootopia": {
"count": 809728,
"correct": 477128,
"answer_accuracy": 0.5892447834334492,
"avg_response_len": 235.475637498024,
"resp_tokens_sum": 190671217,
"pass_at_k": {
"pass@1": 0.5892447834334492,
"pass@2": 0.6242494417489666,
"pass@4": 0.6517067209785661,
"pass@8": 0.6752650274111465,
"pass@16": 0.6966098480320178,
"pass@32": 0.7161446042348242,
"pass@64": 0.7340453705919688,
"pass@128": 0.7510275055327221
},
"per_op_pass_at_k": {
"10": {
"pass@1": 0.8994976032448377,
"pass@2": 0.9366747090795067,
"pass@4": 0.954640098361598,
"pass@8": 0.9671935395338649,
"pass@16": 0.9780409476989385,
"pass@32": 0.9873672705205916,
"pass@64": 0.9931124421272916,
"pass@128": 0.9941002949852508
},
"2": {
"pass@1": 0.9997322819314641,
"pass@2": 0.9999915679348491,
"pass@4": 0.9999999897793149,
"pass@8": 1.0,
"pass@16": 1.0,
"pass@32": 1.0,
"pass@64": 1.0,
"pass@128": 1.0
},
"3": {
"pass@1": 0.9875525611620795,
"pass@2": 0.9937483445303281,
"pass@4": 0.9960841566249173,
"pass@8": 0.9973945614382809,
"pass@16": 0.9984052318072489,
"pass@32": 0.9992931754002767,
"pass@64": 0.9999118688143109,
"pass@128": 1.0
},
"4": {
"pass@1": 0.996337890625,
"pass@2": 0.9982602577509845,
"pass@4": 0.9992694307742784,
"pass@8": 0.9998455780548049,
"pass@16": 0.9999940908757206,
"pass@32": 0.9999999967265157,
"pass@64": 1.0,
"pass@128": 1.0
},
"5": {
"pass@1": 0.9481150793650793,
"pass@2": 0.9641533089613797,
"pass@4": 0.9757381440415184,
"pass@8": 0.9837977641761237,
"pass@16": 0.9891567790393069,
"pass@32": 0.9924331098404072,
"pass@64": 0.9948057827917357,
"pass@128": 0.9968253968253968
},
"6": {
"pass@1": 0.9665746631736527,
"pass@2": 0.9810815131547932,
"pass@4": 0.9889092237347574,
"pass@8": 0.992817054998713,
"pass@16": 0.9942118482404367,
"pass@32": 0.9947566055669427,
"pass@64": 0.9955089818359227,
"pass@128": 0.9970059880239521
},
"7": {
"pass@1": 0.9575397559171598,
"pass@2": 0.9706686361412663,
"pass@4": 0.9781720887884576,
"pass@8": 0.9836457175352227,
"pass@16": 0.9877211930993773,
"pass@32": 0.991030454111008,
"pass@64": 0.9934560055493304,
"pass@128": 0.9940828402366864
},
"8": {
"pass@1": 0.9534755608974359,
"pass@2": 0.9758759684786995,
"pass@4": 0.9847553208493165,
"pass@8": 0.9897007291187125,
"pass@16": 0.9935694000922215,
"pass@32": 0.9965151373391712,
"pass@64": 0.998300039303945,
"pass@128": 1.0
},
"9": {
"pass@1": 0.9408450704225352,
"pass@2": 0.9764160751913055,
"pass@4": 0.9897490211610868,
"pass@8": 0.9954928597779902,
"pass@16": 0.9986146363237696,
"pass@32": 0.9998472651017836,
"pass@64": 0.9999990904114461,
"pass@128": 1.0
},
"11": {
"pass@1": 0.7521689093484419,
"pass@2": 0.8267172826838576,
"pass@4": 0.8749963240253604,
"pass@8": 0.9072539747181511,
"pass@16": 0.9305891455160197,
"pass@32": 0.9482171914771189,
"pass@64": 0.9598296757598728,
"pass@128": 0.9660056657223796
},
"12": {
"pass@1": 0.4268626412429379,
"pass@2": 0.5226548250589441,
"pass@4": 0.5947451512946471,
"pass@8": 0.6499540700619184,
"pass@16": 0.697671407935311,
"pass@32": 0.7391764349175001,
"pass@64": 0.7728700105439386,
"pass@128": 0.8022598870056498
},
"13": {
"pass@1": 0.226048197492163,
"pass@2": 0.2745025516500875,
"pass@4": 0.32170087379359735,
"pass@8": 0.36726090847720233,
"pass@16": 0.4118837384913241,
"pass@32": 0.4577964268716136,
"pass@64": 0.5060004900301981,
"pass@128": 0.554858934169279
},
"14": {
"pass@1": 0.1970404984423676,
"pass@2": 0.2447659871955257,
"pass@4": 0.2905214605183697,
"pass@8": 0.33432116638118264,
"pass@16": 0.37689919457954874,
"pass@32": 0.41849874775311363,
"pass@64": 0.46201871752633594,
"pass@128": 0.5109034267912772
},
"15": {
"pass@1": 0.190774024566474,
"pass@2": 0.24494681910245306,
"pass@4": 0.29680882700356104,
"pass@8": 0.3440605324830605,
"pass@16": 0.38846742937996726,
"pass@32": 0.43008799604881653,
"pass@64": 0.4654618905817254,
"pass@128": 0.4913294797687861
},
"16": {
"pass@1": 0.1630796370967742,
"pass@2": 0.1964297053594107,
"pass@4": 0.23033826426938572,
"pass@8": 0.2660781043146322,
"pass@16": 0.3019958633560044,
"pass@32": 0.33569879011745574,
"pass@64": 0.3686862876847773,
"pass@128": 0.4064516129032258
},
"17": {
"pass@1": 0.1676300578034682,
"pass@2": 0.20699551397296437,
"pass@4": 0.2408987041359715,
"pass@8": 0.271017564290959,
"pass@16": 0.29876652985892266,
"pass@32": 0.3264747152113181,
"pass@64": 0.3549548508714773,
"pass@128": 0.38439306358381503
},
"18": {
"pass@1": 0.1616517857142857,
"pass@2": 0.19595648200224966,
"pass@4": 0.23106420322459695,
"pass@8": 0.26888622832851616,
"pass@16": 0.3071732341417159,
"pass@32": 0.3420263056744111,
"pass@64": 0.3714714335108666,
"pass@128": 0.39714285714285713
},
"19": {
"pass@1": 0.15040822072072071,
"pass@2": 0.1915057281691139,
"pass@4": 0.2273034600967671,
"pass@8": 0.2598221642368353,
"pass@16": 0.2905735616301428,
"pass@32": 0.32042698971733125,
"pass@64": 0.35124146512270843,
"pass@128": 0.3843843843843844
},
"20": {
"pass@1": 0.15052552552552553,
"pass@2": 0.18885223806483653,
"pass@4": 0.22942468621602477,
"pass@8": 0.26979999128432536,
"pass@16": 0.30689533475643643,
"pass@32": 0.3397581402934893,
"pass@64": 0.370991289408613,
"pass@128": 0.4024024024024024
}
}
},
"teachers_in_school": {
"count": 821120,
"correct": 483803,
"answer_accuracy": 0.5891988990646921,
"avg_response_len": 242.341766124318,
"resp_tokens_sum": 198991671,
"pass_at_k": {
"pass@1": 0.5891988990646921,
"pass@2": 0.6227068003142299,
"pass@4": 0.650107249088997,
"pass@8": 0.6741994105674554,
"pass@16": 0.6956359816626693,
"pass@32": 0.7149534992963424,
"pass@64": 0.7325456550204769,
"pass@128": 0.7480904130943102
},
"per_op_pass_at_k": {
"10": {
"pass@1": 0.8878930214723927,
"pass@2": 0.9240016333751996,
"pass@4": 0.9427751138086263,
"pass@8": 0.9559837013558444,
"pass@16": 0.9669589357176731,
"pass@32": 0.9757400698048759,
"pass@64": 0.9820426961144421,
"pass@128": 0.9877300613496932
},
"2": {
"pass@1": 0.9987177051671733,
"pass@2": 0.9996395064978579,
"pass@4": 0.9999613177152248,
"pass@8": 0.9999996232076699,
"pass@16": 0.9999999999864305,
"pass@32": 1.0,
"pass@64": 1.0,
"pass@128": 1.0
},
"3": {
"pass@1": 0.9907670454545454,
"pass@2": 0.9960163892865667,
"pass@4": 0.9985778993534897,
"pass@8": 0.9996915957072215,
"pass@16": 0.9999815253902985,
"pass@32": 0.99999995938087,
"pass@64": 0.9999999999999987,
"pass@128": 1.0
},
"4": {
"pass@1": 0.9982664571005917,
"pass@2": 0.9998132687648512,
"pass@4": 0.999995557138198,
"pass@8": 0.9999999967656203,
"pass@16": 0.9999999999999998,
"pass@32": 1.0,
"pass@64": 1.0,
"pass@128": 1.0
},
"5": {
"pass@1": 0.9600317028985508,
"pass@2": 0.9691975493552438,
"pass@4": 0.9763000869456536,
"pass@8": 0.9828931121195829,
"pass@16": 0.989036439654314,
"pass@32": 0.9944838684919953,
"pass@64": 0.9981653755234783,
"pass@128": 1.0
},
"6": {
"pass@1": 0.9823379297994269,
"pass@2": 0.9910194058389551,
"pass@4": 0.9946701218221643,
"pass@8": 0.9965445299840145,
"pass@16": 0.9976712739922713,
"pass@32": 0.9983904225804993,
"pass@64": 0.9992893080034889,
"pass@128": 1.0
},
"7": {
"pass@1": 0.9681855130057804,
"pass@2": 0.9832421146056164,
"pass@4": 0.9914642009163593,
"pass@8": 0.9964758591421767,
"pass@16": 0.9990374180123244,
"pass@32": 0.9999126056145744,
"pass@64": 0.9999997855847184,
"pass@128": 1.0
},
"8": {
"pass@1": 0.9455765845070423,
"pass@2": 0.9712158284351782,
"pass@4": 0.9835400283591312,
"pass@8": 0.9910390348371692,
"pass@16": 0.9955726997113149,
"pass@32": 0.9980539040490879,
"pass@64": 0.9995741614459047,
"pass@128": 1.0
},
"9": {
"pass@1": 0.9390437874251497,
"pass@2": 0.9670264539818006,
"pass@4": 0.9783135702722786,
"pass@8": 0.9840906736702117,
"pass@16": 0.9879928605789021,
"pass@32": 0.9908513468886352,
"pass@64": 0.9930496212972972,
"pass@128": 0.9940119760479041
},
"11": {
"pass@1": 0.7309864457831325,
"pass@2": 0.811214839673655,
"pass@4": 0.8645121111555325,
"pass@8": 0.9001346785530842,
"pass@16": 0.9235781504819824,
"pass@32": 0.939017807747773,
"pass@64": 0.9485292552001391,
"pass@128": 0.9548192771084337
},
"12": {
"pass@1": 0.439042907523511,
"pass@2": 0.5317378835188704,
"pass@4": 0.6075961422729685,
"pass@8": 0.6683062360710545,
"pass@16": 0.7177272091161964,
"pass@32": 0.7598458853246168,
"pass@64": 0.7942021269976,
"pass@128": 0.8213166144200627
},
"13": {
"pass@1": 0.19227065826330533,
"pass@2": 0.23474754074858276,
"pass@4": 0.27791213361985206,
"pass@8": 0.32123267772057174,
"pass@16": 0.36547236780648024,
"pass@32": 0.412591392141311,
"pass@64": 0.46083134462740805,
"pass@128": 0.5042016806722689
},
"14": {
"pass@1": 0.20837902046783627,
"pass@2": 0.2522699670764838,
"pass@4": 0.2964683594923442,
"pass@8": 0.3416618793696011,
"pass@16": 0.3841336779597966,
"pass@32": 0.42375806001618826,
"pass@64": 0.4632207639904677,
"pass@128": 0.5029239766081871
},
"15": {
"pass@1": 0.19093276515151514,
"pass@2": 0.24199400501073715,
"pass@4": 0.2891956014020975,
"pass@8": 0.3326345282383242,
"pass@16": 0.37366558323041027,
"pass@32": 0.4128573661812689,
"pass@64": 0.4483166035224402,
"pass@128": 0.4727272727272727
},
"16": {
"pass@1": 0.15642806267806267,
"pass@2": 0.1904278691926329,
"pass@4": 0.22132350069489173,
"pass@8": 0.251268169779741,
"pass@16": 0.28180434335829546,
"pass@32": 0.3122157586188942,
"pass@64": 0.34211985927431315,
"pass@128": 0.3732193732193732
},
"17": {
"pass@1": 0.16779891304347827,
"pass@2": 0.21377104526336374,
"pass@4": 0.2616579632708955,
"pass@8": 0.3084358591590647,
"pass@16": 0.3485515761094292,
"pass@32": 0.38192734451252297,
"pass@64": 0.4116145438455866,
"pass@128": 0.43788819875776397
},
"18": {
"pass@1": 0.1518612132352941,
"pass@2": 0.1958187384205651,
"pass@4": 0.23828893419572547,
"pass@8": 0.27971765090438966,
"pass@16": 0.31909231139592203,
"pass@32": 0.3541919542192856,
"pass@64": 0.3879892602530647,
"pass@128": 0.4235294117647059
},
"19": {
"pass@1": 0.14004371279761904,
"pass@2": 0.17627703763592056,
"pass@4": 0.21033196631671033,
"pass@8": 0.24241868591666452,
"pass@16": 0.2725452956766527,
"pass@32": 0.3023936104609465,
"pass@64": 0.331830773125219,
"pass@128": 0.3601190476190476
},
"20": {
"pass@1": 0.1367421407185629,
"pass@2": 0.17771825934744678,
"pass@4": 0.21898993408009634,
"pass@8": 0.260066157867715,
"pass@16": 0.2989195527821917,
"pass@32": 0.3334463816599912,
"pass@64": 0.3631967204725436,
"pass@128": 0.38622754491017963
}
}
},
"movie_festival_awards": {
"count": 801152,
"correct": 471310,
"answer_accuracy": 0.5882903618788944,
"avg_response_len": 258.0569242790382,
"resp_tokens_sum": 206742821,
"pass_at_k": {
"pass@1": 0.5882903618788944,
"pass@2": 0.6212685622467475,
"pass@4": 0.6478714293112718,
"pass@8": 0.6709216329453926,
"pass@16": 0.6916683710373397,
"pass@32": 0.7106315691905148,
"pass@64": 0.728740000525103,
"pass@128": 0.7466048889598977
},
"per_op_pass_at_k": {
"10": {
"pass@1": 0.8983908582089553,
"pass@2": 0.9340132506757551,
"pass@4": 0.9530650669599133,
"pass@8": 0.9652801856862316,
"pass@16": 0.9745693164759084,
"pass@32": 0.9818461284408381,
"pass@64": 0.9871328155545905,
"pass@128": 0.991044776119403
},
"2": {
"pass@1": 0.9998660714285714,
"pass@2": 0.9999992969628797,
"pass@4": 1.0,
"pass@8": 1.0,
"pass@16": 1.0,
"pass@32": 1.0,
"pass@64": 1.0,
"pass@128": 1.0
},
"3": {
"pass@1": 0.9907069970845481,
"pass@2": 0.9970669721769471,
"pass@4": 0.9993563997831172,
"pass@8": 0.9999514691641971,
"pass@16": 0.9999996499221584,
"pass@32": 0.9999999999946803,
"pass@64": 1.0,
"pass@128": 1.0
},
"4": {
"pass@1": 0.9958196271929824,
"pass@2": 0.9977559342911083,
"pass@4": 0.9990211264710332,
"pass@8": 0.9997663197662907,
"pass@16": 0.9999856176620865,
"pass@32": 0.9999999740185194,
"pass@64": 0.9999999999999997,
"pass@128": 1.0
},
"5": {
"pass@1": 0.9548943014705882,
"pass@2": 0.9685079174386292,
"pass@4": 0.9784535587463331,
"pass@8": 0.9869293632462349,
"pass@16": 0.9934826994856965,
"pass@32": 0.9970310861536675,
"pass@64": 0.9985066302193607,
"pass@128": 1.0
},
"6": {
"pass@1": 0.9775483044164038,
"pass@2": 0.9891293716932861,
"pass@4": 0.9959075131381444,
"pass@8": 0.9991092920224423,
"pass@16": 0.9999352040645176,
"pass@32": 0.9999996662295679,
"pass@64": 0.9999999999997393,
"pass@128": 1.0
},
"7": {
"pass@1": 0.9744115901898734,
"pass@2": 0.9867456923402768,
"pass@4": 0.9912063912303684,
"pass@8": 0.9935151685224703,
"pass@16": 0.9954041601504686,
"pass@32": 0.9972167179953726,
"pass@64": 0.9990265956175474,
"pass@128": 1.0
},
"8": {
"pass@1": 0.9515531156156156,
"pass@2": 0.9743795222387744,
"pass@4": 0.9852795146102226,
"pass@8": 0.9899431408996546,
"pass@16": 0.9925003924523234,
"pass@32": 0.9950902478963195,
"pass@64": 0.9976578190444356,
"pass@128": 1.0
},
"9": {
"pass@1": 0.9379521704180064,
"pass@2": 0.9720591912043948,
"pass@4": 0.9858236963747689,
"pass@8": 0.9932814976290268,
"pass@16": 0.9968830356374474,
"pass@32": 0.9981725276413653,
"pass@64": 0.9992024635603814,
"pass@128": 1.0
},
"11": {
"pass@1": 0.7650545634920635,
"pass@2": 0.8431473409573809,
"pass@4": 0.8877359086066622,
"pass@8": 0.9145899228176874,
"pass@16": 0.9334761048558039,
"pass@32": 0.9489928847114996,
"pass@64": 0.9642010673849086,
"pass@128": 0.9809523809523809
},
"12": {
"pass@1": 0.4054615825688073,
"pass@2": 0.49957183534397626,
"pass@4": 0.5768995727598271,
"pass@8": 0.639355428961972,
"pass@16": 0.6924379381519293,
"pass@32": 0.7389465717283092,
"pass@64": 0.7833812207530858,
"pass@128": 0.8256880733944955
},
"13": {
"pass@1": 0.21805073302469136,
"pass@2": 0.2614506628511712,
"pass@4": 0.30509700407819385,
"pass@8": 0.34843945585161107,
"pass@16": 0.3892841325907048,
"pass@32": 0.42707158587605587,
"pass@64": 0.46350207303509766,
"pass@128": 0.5
},
"14": {
"pass@1": 0.19482566765578635,
"pass@2": 0.24658432440010283,
"pass@4": 0.2977982871762691,
"pass@8": 0.34710213397424194,
"pass@16": 0.39305571585018145,
"pass@32": 0.4354334888531387,
"pass@64": 0.4760201367236571,
"pass@128": 0.516320474777448
},
"15": {
"pass@1": 0.15263310185185186,
"pass@2": 0.20657633724603863,
"pass@4": 0.26040626837154607,
"pass@8": 0.31066200412971595,
"pass@16": 0.3575054534309268,
"pass@32": 0.40181191645690056,
"pass@64": 0.4460592807353767,
"pass@128": 0.49074074074074076
},
"16": {
"pass@1": 0.15613477138643067,
"pass@2": 0.1863313822497852,
"pass@4": 0.21273005393131156,
"pass@8": 0.23865739981671225,
"pass@16": 0.2676959517806339,
"pass@32": 0.3011310763692745,
"pass@64": 0.3362407178410369,
"pass@128": 0.37168141592920356
},
"17": {
"pass@1": 0.1378012048192771,
"pass@2": 0.17401619272365054,
"pass@4": 0.21124320340981484,
"pass@8": 0.24873698272180494,
"pass@16": 0.28468715470923933,
"pass@32": 0.31684364503562207,
"pass@64": 0.3460631347877581,
"pass@128": 0.37650602409638556
},
"18": {
"pass@1": 0.16940524193548387,
"pass@2": 0.20793830962661927,
"pass@4": 0.24442847708552565,
"pass@8": 0.27696590133200394,
"pass@16": 0.3047539165997947,
"pass@32": 0.329140512075412,
"pass@64": 0.3525837977178592,
"pass@128": 0.3774193548387097
},
"19": {
"pass@1": 0.13812311178247735,
"pass@2": 0.16759305790137258,
"pass@4": 0.19758813267676884,
"pass@8": 0.2284911534750015,
"pass@16": 0.2603002938970115,
"pass@32": 0.29249731296624343,
"pass@64": 0.3260332407783438,
"pass@128": 0.36253776435045315
},
"20": {
"pass@1": 0.13072447447447447,
"pass@2": 0.16936079780567978,
"pass@4": 0.2094049935762534,
"pass@8": 0.25098284669882537,
"pass@16": 0.291417867530838,
"pass@32": 0.32739810777438544,
"pass@64": 0.35806392636422313,
"pass@128": 0.3813813813813814
}
}
}
}
}
]

38
special_tokens_map.json Normal file
View File

@@ -0,0 +1,38 @@
{
"additional_special_tokens": [
"<question>",
"</question>",
"<solution>",
"</solution>",
"<answer>",
"</answer>"
],
"bos_token": {
"content": "[BOS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "[EOS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "[PAD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "[UNK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

10104
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

143
tokenizer_config.json Normal file
View File

@@ -0,0 +1,143 @@
{
"add_prefix_space": false,
"added_tokens_decoder": {
"0": {
"content": "[UNK]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "[PAD]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "[BOS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"3": {
"content": "[EOS]",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"4": {
"content": "<question>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"5": {
"content": "</question>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"6": {
"content": "<solution>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"7": {
"content": "</solution>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"8": {
"content": "<answer>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"9": {
"content": "</answer>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2196": {
"content": "<special_token_0>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2197": {
"content": "<special_token_1>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2198": {
"content": "<special_token_2>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2199": {
"content": "<special_token_3>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2200": {
"content": "<special_token_4>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": [
"<question>",
"</question>",
"<solution>",
"</solution>",
"<answer>",
"</answer>"
],
"bos_token": "[BOS]",
"clean_up_tokenization_spaces": false,
"eos_token": "[EOS]",
"extra_special_tokens": {},
"model_max_length": 1000000000000000019884624838656,
"pad_token": "[PAD]",
"padding_side": "right",
"split_special_tokens": false,
"tokenizer_class": "PreTrainedTokenizer",
"unk_token": "[UNK]"
}