初始化项目,由ModelHub XC社区提供模型

Model: hadadxyz/OpenSonnet-Lite-MAX
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-24 05:50:15 +08:00
commit b0bf055b3e
12 changed files with 937 additions and 0 deletions

160
.eval_results/gsm8k.json Normal file
View File

@@ -0,0 +1,160 @@
{
"results": {
"gsm8k": {
"alias": "gsm8k",
"exact_match,strict-match": 0.8521607278241091,
"exact_match_stderr,strict-match": 0.00977682767914391,
"exact_match,flexible-extract": 0.8529188779378317,
"exact_match_stderr,flexible-extract": 0.009756063660359896
}
},
"group_subtasks": {
"gsm8k": []
},
"configs": {
"gsm8k": {
"task": "gsm8k",
"tag": [
"math_word_problems"
],
"dataset_path": "openai/gsm8k",
"dataset_name": "main",
"training_split": "train",
"test_split": "test",
"fewshot_split": "train",
"doc_to_text": "Question: {{question}}\nAnswer:",
"doc_to_target": "{{answer}}",
"unsafe_code": false,
"description": "",
"target_delimiter": " ",
"fewshot_delimiter": "\n\n",
"fewshot_config": {
"sampler": "default",
"split": "train",
"process_docs": null,
"fewshot_indices": null,
"samples": null,
"doc_to_text": "Question: {{question}}\nAnswer:",
"doc_to_choice": null,
"doc_to_target": "{{answer}}",
"gen_prefix": null,
"fewshot_delimiter": "\n\n",
"target_delimiter": " "
},
"num_fewshot": 8,
"metric_list": [
{
"metric": "exact_match",
"aggregation": "mean",
"higher_is_better": true,
"ignore_case": true,
"ignore_punctuation": false,
"regexes_to_ignore": [
",",
"\\$",
"(?s).*#### ",
"\\.$"
]
}
],
"output_type": "generate_until",
"generation_kwargs": {
"until": [
"Question:",
"</s>",
"<|im_end|>"
],
"do_sample": false,
"temperature": 0.6,
"max_gen_toks": 131072,
"max_tokens": 131072,
"top_p": 0.95,
"top_k": 20,
"min_p": 0,
"repeat_penalty": 1.0,
"presence_penalty": 1.0,
"num_ctx": 262144
},
"repeats": 1,
"filter_list": [
{
"name": "strict-match",
"filter": [
{
"function": "regex",
"regex_pattern": "#### (\\-?[0-9\\.\\,]+)"
},
{
"function": "take_first"
}
]
},
{
"name": "flexible-extract",
"filter": [
{
"function": "regex",
"group_select": -1,
"regex_pattern": "(-?[$0-9.,]{2,})|(-?[0-9]+)"
},
{
"function": "take_first"
}
]
}
],
"should_decontaminate": false,
"metadata": {
"version": 3.0
}
}
},
"versions": {
"gsm8k": 3.0
},
"n-shot": {
"gsm8k": 8
},
"higher_is_better": {
"gsm8k": {
"exact_match": true
}
},
"n-samples": {
"gsm8k": {
"original": 1319,
"effective": 1319
}
},
"config": {
"model": "LocalChatCompletion",
"model_args": null,
"batch_size": 1,
"batch_sizes": [],
"device": null,
"use_cache": null,
"limit": null,
"bootstrap_iters": 100000,
"gen_kwargs": {
"max_gen_toks": 131072,
"max_tokens": 131072,
"temperature": 0.6,
"top_p": 0.95,
"top_k": 20,
"min_p": 0,
"repeat_penalty": 1.0,
"presence_penalty": 1.0,
"num_ctx": 262144
},
"random_seed": 0,
"numpy_seed": 1234,
"torch_seed": 1234,
"fewshot_seed": 1234
},
"git_hash": null,
"date": 1778390867.1640942,
"pretty_env_info": "PyTorch version: 2.6.0+cpu\nIs debug build: False\nCUDA used to build PyTorch: Could not collect\nROCM used to build PyTorch: N/A\n\nOS: Debian GNU/Linux 12 (bookworm) (x86_64)\nGCC version: (Debian 12.2.0-14+deb12u1) 12.2.0\nClang version: Could not collect\nCMake version: version 3.25.1\nLibc version: glibc-2.36\n\nPython version: 3.11.5 (main, Aug 26 2023, 07:22:50) [Clang 16.0.3 ] (64-bit runtime)\nPython platform: Linux-4.4.0-x86_64-with-glibc2.36\nIs CUDA available: False\nCUDA runtime version: 12.8.93\nCUDA_MODULE_LOADING set to: N/A\nGPU models and configuration: GPU 0: NVIDIA B200\nNvidia driver version: 580.95.05\ncuDNN version: Probably one of the following:\n/usr/lib/x86_64-linux-gnu/libcudnn.so.9.22.0\n/usr/lib/x86_64-linux-gnu/libcudnn_adv.so.9.22.0\n/usr/lib/x86_64-linux-gnu/libcudnn_cnn.so.9.22.0\n/usr/lib/x86_64-linux-gnu/libcudnn_engines_precompiled.so.9.22.0\n/usr/lib/x86_64-linux-gnu/libcudnn_engines_runtime_compiled.so.9.22.0\n/usr/lib/x86_64-linux-gnu/libcudnn_engines_tensor_ir.so.9.22.0\n/usr/lib/x86_64-linux-gnu/libcudnn_ext.so.9.22.0\n/usr/lib/x86_64-linux-gnu/libcudnn_graph.so.9.22.0\n/usr/lib/x86_64-linux-gnu/libcudnn_heuristic.so.9.22.0\n/usr/lib/x86_64-linux-gnu/libcudnn_ops.so.9.22.0\nHIP runtime version: N/A\nMIOpen runtime version: N/A\nIs XNNPACK available: True\n\nCPU:\nArchitecture: x86_64\nCPU op-mode(s): 32-bit, 64-bit\nAddress sizes: 46 bits physical, 48 bits virtual\nByte Order: Little Endian\nCPU(s): 17\nOn-line CPU(s) list: 0-16\nVendor ID: AuthenticAMD\nModel name: unknown\nCPU family: 191\nModel: 2\nThread(s) per core: 1\nCore(s) per socket: 17\nSocket(s): 1\nStepping: unknown\nBogoMIPS: 5035.66\nFlags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid cqm rdt_a avx512f avx512dq rdseed adx smap clwb avx512cd sha_ni avx512bw avx512vl xsaveopt xsavec xgetbv1 xsaves avx512vbmi umip pku ospke avx512_vbmi2 gfni vaes vpclmulqdq avx512_vnni avx512_bitalg avx512_vpopcntdq la57 rdpid movdiri movdir64b fsrm avx512_vp2intersect flush_l1d\nVirtualization: AMD-V\n\nVersions of relevant libraries:\n[pip3] numpy==1.26.4\n[pip3] torch==2.6.0+cpu\n[conda] Could not collect",
"transformers_version": "5.5.1",
"lm_eval_version": "0.4.11",
"upper_git_hash": null
}

148
.eval_results/gsm8k.md Normal file
View File

@@ -0,0 +1,148 @@
# Instruction
````md
You are OpenSonnet, a large language model trained by the Open Source community. You are based on the Qwen3 architecture.
You are a highly careful mathematical reasoning assistant.
Your task is to solve grade-school math word problems with maximum accuracy.
---
# Goals
* Read the problem carefully.
* Identify all relevant numbers, entities, and relationships.
* Solve the problem step by step.
* Avoid mistakes caused by missing details, distraction, or rushed arithmetic.
* Give the final answer in the exact required format.
---
# Core Reasoning Rules
* Always parse the question before calculating.
* Determine what the question is asking for.
* Translate words into mathematical operations:
- 'total', 'altogether', 'in all' --> addition
- 'left', 'remain', 'difference' --> subtraction
- 'each', 'per', 'every' --> multiplication or division depending on context
- 'half as many' --> divide by 2
- 'twice as many' --> multiply by 2
- 'shared equally' --> division
- Track each entity separately when multiple people, objects, or time periods are involved.
- Ignore irrelevant information.
- Use exact arithmetic whenever possible.
- Re-check every intermediate result before producing the final answer.
- If the problem involves multiple steps, solve them in a logical order.
- If a number is expressed in words, convert it correctly.
- If a fraction, decimal, or ratio appears, handle it carefully.
- If the result should be a whole number, verify that the interpretation is consistent.
- If there are multiple possible interpretations, choose the one most directly supported by the wording.
---
# Robustness Rules
* Do not guess.
* Do not skip steps.
* Do not let earlier mistakes propagate.
* Recompute suspicious calculations.
* Sanity-check the answer against the question.
* Be especially careful with:
- multi-step arithmetic
- rates and ratios
- fractions and percentages
- time-based changes
- repeated operations
- comparisons like 'more than', 'less than', and 'how many left'
---
# Internal Reasoning Policy
* Think step by step internally.
* Keep intermediate values consistent.
* Verify the final result before answering.
---
# Output Format
* Return only the final answer in exactly this format:
```
#### {final_answer}
```
---
# Output Constraints
* Do not include any explanation.
* Do not include any extra text before or after the final answer.
* Do not use bullets, numbering, or markdown outside the required final-answer format.
* The final answer should usually be a single integer or number.
* Preserve the exact required format strictly.
---
# Example Behavior
* Question:
```
Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?
```
* Your internal reasoning/thinking step-by-step (example only):
```
Natalia sold 48/2 = <<48/2=24>>24 clips in May.
Natalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.
```
* Output:
```
#### 72
```
````

24
.eval_results/gsm8k.yaml Normal file
View File

@@ -0,0 +1,24 @@
- dataset:
id: openai/gsm8k
task_id: gsm8k
value: 85.22
date: "2026-05-10"
source:
url: https://huggingface.co/hadadxyz/OpenSonnet-Lite-MAX
name: Model Card
user: hadadxyz
notes: |
framework: lm-evaluation-harness
n-shot: 8
batch_size: 1
generation_kwargs:
num_ctx: 262144
max_tokens: 131072
temperature: 0.6
top_p: 0.95
top_k: 20
min_p: 0
repeat_penalty: 1.0
presence_penalty: 1.0
metric: exact_match
match_type: strict-match