commit 2feb2eab289b7ccab2ecb9b2995215ccb12d52a6 Author: ModelHub XC Date: Wed May 6 13:43:43 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: g023/qwen3-tiny-v2-finetuned Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..ae756a3 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/Modelfile b/Modelfile new file mode 100644 index 0000000..72f4514 --- /dev/null +++ b/Modelfile @@ -0,0 +1,78 @@ +FROM ./Qwen3-g023-tiny-v2-FT-Q8_0.gguf +TEMPLATE """ +{{- $lastUserIdx := -1 -}} +{{- range $idx, $msg := .Messages -}} +{{- if eq $msg.Role "user" }}{{ $lastUserIdx = $idx }}{{ end -}} +{{- end }} +{{- if or .System .Tools }}<|im_start|>system +{{ if .System }} +{{ .System }} +{{- end }} +{{- if .Tools }} + +# Tools + +You may call one or more functions to assist with the user query. + +You are provided with function signatures within XML tags: + +{{- range .Tools }} +{"type": "function", "function": {{ .Function }}} +{{- end }} + + +For each function call, return a json object with function name and arguments within XML tags: + +{"name": , "arguments": } + +{{- end -}} +<|im_end|> +{{ end }} +{{- range $i, $_ := .Messages }} +{{- $last := eq (len (slice $.Messages $i)) 1 -}} +{{- if eq .Role "user" }}<|im_start|>user +{{ .Content }} +{{- if and $.IsThinkSet (eq $i $lastUserIdx) }} + {{- if $.Think -}} + {{- " "}}/think + {{- else -}} + {{- " "}}/no_think + {{- end -}} +{{- end }}<|im_end|> +{{ else if eq .Role "assistant" }}<|im_start|>assistant +{{ if (and $.IsThinkSet (and .Thinking (or $last (gt $i $lastUserIdx)))) -}} +{{ .Thinking }} +{{ end -}} +{{ if .Content }}{{ .Content }} +{{- else if .ToolCalls }} +{{ range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}} +{{ end }} +{{- end }}{{ if not $last }}<|im_end|> +{{ end }} +{{- else if eq .Role "tool" }}<|im_start|>user + +{{ .Content }} +<|im_end|> +{{ end }} +{{- if and (ne .Role "assistant") $last }}<|im_start|>assistant +{{ if and $.IsThinkSet (not $.Think) -}} + + + + +{{ end -}} +{{ end }} +{{- end }} +""" +PARAMETER num_ctx 40000 +PARAMETER repeat_last_n 16384 +PARAMETER stop <|im_start|> +PARAMETER stop <|im_end|> +PARAMETER temperature 0.65 +PARAMETER top_p 0.9 +PARAMETER top_k 20 +PARAMETER min_p 0.0 +PARAMETER repeat_penalty 1.05 +PARAMETER presence_penalty 0.1 +PARAMETER frequency_penalty 0.1 +SYSTEM "You are a helpful assistant." diff --git a/Qwen3-g023-tiny-v2-FT-Q8_0.gguf b/Qwen3-g023-tiny-v2-FT-Q8_0.gguf new file mode 100644 index 0000000..2dccb5f --- /dev/null +++ b/Qwen3-g023-tiny-v2-FT-Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0ed606874dbfaf528ea1bef31edc70d63f4ba91707162871edb3690ea2ed0b9 +size 1941413632 diff --git a/README.md b/README.md new file mode 100644 index 0000000..5f4edcc --- /dev/null +++ b/README.md @@ -0,0 +1,68 @@ +--- +license: apache-2.0 +language: + - en +base_model: g023/qwen3-tiny-v2 +tags: + - qwen3 + - gguf + - q8_0 + - finetuned + - grpo + - lora-merged + - text-generation +pipeline_tag: text-generation +library_name: llama.cpp +quantized_by: g023 +--- + +# Qwen3-g023-tiny-v2-FT-Q8_0 - GRPO Finetuned Q8_0 GGUF Export + +https://huggingface.co/g023/qwen3-tiny-v2-finetuned/ + +Q8_0 GGUF export of a GRPO finetuned Qwen3 model to achieve improved reasoning and reduced repetition. +Original SRC Model: https://huggingface.co/g023/qwen3-tiny-v2 + +*THIS IS A WIP (WORK IN PROGRESS)* + +## Files + +- `Qwen3-g023-tiny-v2-FT-Q8_0.gguf`: Q8_0 GGUF model (~1.81 GB) +- `Modelfile`: Ollama template + tested default sampling settings +- `params_best.json`: Best sampled parameters from automated sweep +- `sweep_results.json`: Full sweep results and per-test outcomes + +## Tested Best Parameters (Default in Modelfile) + +- `temperature`: 0.65 +- `top_p`: 0.9 +- `top_k`: 20 +- `min_p`: 0.0 +- `repeat_penalty`: 1.05 +- `presence_penalty`: 0.1 +- `frequency_penalty`: 0.1 +- `num_ctx`: 40000 + +## Usage (Ollama) + +```bash +ollama create qwen3-g023-tiny-v2-FT-Q8_0 -f Modelfile +ollama run qwen3-g023-tiny-v2-FT-Q8_0 + +# thinking on +ollama run qwen3-g023-tiny-v2-FT-Q8_0 --think "Explain why the sky is blue" + +# thinking off +ollama run qwen3-g023-tiny-v2-FT-Q8_0 --think=false "Explain why the sky is blue" +``` + +### or pull from huggingface directly to ollama: + +```bash +ollama run hf.co/g023/qwen3-tiny-v2-finetuned:Q8_0 +``` + +## Notes + +- Template is the Qwen3-compatible template with think/no_think handling. +- If you want stricter non-thinking behavior, compare alternatives in `sweep_results.json`. diff --git a/params_best.json b/params_best.json new file mode 100644 index 0000000..a14ec88 --- /dev/null +++ b/params_best.json @@ -0,0 +1,10 @@ +{ + "label": "balanced_01", + "temperature": 0.65, + "top_p": 0.9, + "top_k": 20, + "min_p": 0.0, + "repeat_penalty": 1.05, + "presence_penalty": 0.1, + "frequency_penalty": 0.1 +} \ No newline at end of file diff --git a/sweep_results.json b/sweep_results.json new file mode 100644 index 0000000..d80720a --- /dev/null +++ b/sweep_results.json @@ -0,0 +1,1272 @@ +{ + "model": "qwen3-best-p2-finetuned-grpo-q8", + "best": { + "params": { + "label": "balanced_01", + "temperature": 0.65, + "top_p": 0.9, + "top_k": 20, + "min_p": 0.0, + "repeat_penalty": 1.05, + "presence_penalty": 0.1, + "frequency_penalty": 0.1 + }, + "pass_count": 8, + "total_tests": 8, + "pass_rate": 1.0, + "score_sum": 18.5, + "score_avg": 2.3125, + "tests": [ + { + "test": "factual_france", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "The capital of France is Paris.", + "think": true + }, + { + "test": "factual_japan", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 5, + "thinking_len": 0 + }, + "response_preview": "Tokyo", + "think": false + }, + { + "test": "math_multiply", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 0.9375, + "unique_ratio": 0.8889, + "content_len": 32, + "thinking_len": 0 + }, + "response_preview": "17 * 19 = 323 The answer is: 323", + "think": true + }, + { + "test": "math_sqrt", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 2, + "thinking_len": 0 + }, + "response_preview": "12", + "think": false + }, + { + "test": "reasoning", + "passed": true, + "score": 2.75, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.5909, + "content_len": 124, + "thinking_len": 0 + }, + "response_preview": "Yes, because if all bloops are razzies and all razzies are lazzies, then all bloops must be lazzies. All bloops are lazzies.", + "think": true + }, + { + "test": "coding", + "passed": true, + "score": 2.5, + "issues": [], + "metrics": { + "readable_ratio": 0.9677, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "def add(a, b): return a + b", + "think": true + }, + { + "test": "short_planets", + "passed": true, + "score": 3.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 61, + "thinking_len": 0 + }, + "response_preview": "Three planets in a single sentence: Earth, Mars, and Jupiter.", + "think": false + }, + { + "test": "coherent_explanation", + "passed": true, + "score": 2.25, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.9286, + "content_len": 173, + "thinking_len": 0 + }, + "response_preview": "Photosynthesis is the process by which plants convert light energy into chemical energy, storing it in the form of glucose. This process also releases oxygen as a byproduct.", + "think": false + } + ] + }, + "all_candidates": [ + { + "params": { + "label": "qwen_think_default", + "temperature": 0.6, + "top_p": 0.95, + "top_k": 20, + "min_p": 0.0, + "repeat_penalty": 1.0, + "presence_penalty": 0.0, + "frequency_penalty": 0.0 + }, + "pass_count": 8, + "total_tests": 8, + "pass_rate": 1.0, + "score_sum": 18.5, + "score_avg": 2.3125, + "tests": [ + { + "test": "factual_france", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "The capital of France is Paris.", + "think": true + }, + { + "test": "factual_japan", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 5, + "thinking_len": 0 + }, + "response_preview": "Tokyo", + "think": false + }, + { + "test": "math_multiply", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 0.9375, + "unique_ratio": 0.8889, + "content_len": 32, + "thinking_len": 0 + }, + "response_preview": "17 * 19 = 323 The answer is: 323", + "think": true + }, + { + "test": "math_sqrt", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 2, + "thinking_len": 0 + }, + "response_preview": "12", + "think": false + }, + { + "test": "reasoning", + "passed": true, + "score": 2.75, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.5909, + "content_len": 124, + "thinking_len": 0 + }, + "response_preview": "Yes, because if all bloops are razzies and all razzies are lazzies, then all bloops must be lazzies. All bloops are lazzies.", + "think": true + }, + { + "test": "coding", + "passed": true, + "score": 2.5, + "issues": [], + "metrics": { + "readable_ratio": 0.9677, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "def add(a, b): return a + b", + "think": true + }, + { + "test": "short_planets", + "passed": true, + "score": 3.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 61, + "thinking_len": 0 + }, + "response_preview": "Three planets in a single sentence: Earth, Mars, and Jupiter.", + "think": false + }, + { + "test": "coherent_explanation", + "passed": true, + "score": 2.25, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.9286, + "content_len": 173, + "thinking_len": 0 + }, + "response_preview": "Photosynthesis is the process by which plants convert light energy into chemical energy, storing it in the form of glucose. This process also releases oxygen as a byproduct.", + "think": false + } + ] + }, + { + "params": { + "label": "qwen_nonthink_default", + "temperature": 0.7, + "top_p": 0.8, + "top_k": 20, + "min_p": 0.0, + "repeat_penalty": 1.0, + "presence_penalty": 0.0, + "frequency_penalty": 0.0 + }, + "pass_count": 7, + "total_tests": 8, + "pass_rate": 0.875, + "score_sum": 17.5, + "score_avg": 2.1875, + "tests": [ + { + "test": "factual_france", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "The capital of France is Paris.", + "think": true + }, + { + "test": "factual_japan", + "passed": false, + "score": 1.0, + "issues": [ + "missing expected token 'Tokyo'" + ], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 5, + "thinking_len": 0 + }, + "response_preview": "Japan", + "think": false + }, + { + "test": "math_multiply", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 0.9375, + "unique_ratio": 0.8889, + "content_len": 32, + "thinking_len": 0 + }, + "response_preview": "17 * 19 = 323 The answer is: 323", + "think": true + }, + { + "test": "math_sqrt", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 2, + "thinking_len": 0 + }, + "response_preview": "12", + "think": false + }, + { + "test": "reasoning", + "passed": true, + "score": 2.75, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.6667, + "content_len": 132, + "thinking_len": 0 + }, + "response_preview": "Yes, because if all bloops are razzies and all razzies are lazzies, then all bloops must be lazzies as well. All bloops are lazzies.", + "think": true + }, + { + "test": "coding", + "passed": true, + "score": 2.5, + "issues": [], + "metrics": { + "readable_ratio": 0.9677, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "def add(a, b): return a + b", + "think": true + }, + { + "test": "short_planets", + "passed": true, + "score": 3.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 59, + "thinking_len": 0 + }, + "response_preview": "The three planets in question are Earth, Mars, and Jupiter.", + "think": false + }, + { + "test": "coherent_explanation", + "passed": true, + "score": 2.25, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.9286, + "content_len": 173, + "thinking_len": 0 + }, + "response_preview": "Photosynthesis is the process by which plants convert light energy into chemical energy, storing it in the form of glucose. This process also releases oxygen as a byproduct.", + "think": false + } + ] + }, + { + "params": { + "label": "balanced_01", + "temperature": 0.65, + "top_p": 0.9, + "top_k": 20, + "min_p": 0.0, + "repeat_penalty": 1.05, + "presence_penalty": 0.1, + "frequency_penalty": 0.1 + }, + "pass_count": 8, + "total_tests": 8, + "pass_rate": 1.0, + "score_sum": 18.5, + "score_avg": 2.3125, + "tests": [ + { + "test": "factual_france", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "The capital of France is Paris.", + "think": true + }, + { + "test": "factual_japan", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 5, + "thinking_len": 0 + }, + "response_preview": "Tokyo", + "think": false + }, + { + "test": "math_multiply", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 0.9375, + "unique_ratio": 0.8889, + "content_len": 32, + "thinking_len": 0 + }, + "response_preview": "17 * 19 = 323 The answer is: 323", + "think": true + }, + { + "test": "math_sqrt", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 2, + "thinking_len": 0 + }, + "response_preview": "12", + "think": false + }, + { + "test": "reasoning", + "passed": true, + "score": 2.75, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.5909, + "content_len": 124, + "thinking_len": 0 + }, + "response_preview": "Yes, because if all bloops are razzies and all razzies are lazzies, then all bloops must be lazzies. All bloops are lazzies.", + "think": true + }, + { + "test": "coding", + "passed": true, + "score": 2.5, + "issues": [], + "metrics": { + "readable_ratio": 0.9677, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "def add(a, b): return a + b", + "think": true + }, + { + "test": "short_planets", + "passed": true, + "score": 3.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 61, + "thinking_len": 0 + }, + "response_preview": "Three planets in a single sentence: Earth, Mars, and Jupiter.", + "think": false + }, + { + "test": "coherent_explanation", + "passed": true, + "score": 2.25, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.9286, + "content_len": 173, + "thinking_len": 0 + }, + "response_preview": "Photosynthesis is the process by which plants convert light energy into chemical energy, storing it in the form of glucose. This process also releases oxygen as a byproduct.", + "think": false + } + ] + }, + { + "params": { + "label": "balanced_02", + "temperature": 0.65, + "top_p": 0.9, + "top_k": 30, + "min_p": 0.0, + "repeat_penalty": 1.1, + "presence_penalty": 0.2, + "frequency_penalty": 0.2 + }, + "pass_count": 8, + "total_tests": 8, + "pass_rate": 1.0, + "score_sum": 18.5, + "score_avg": 2.3125, + "tests": [ + { + "test": "factual_france", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "The capital of France is Paris.", + "think": true + }, + { + "test": "factual_japan", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 5, + "thinking_len": 0 + }, + "response_preview": "Tokyo", + "think": false + }, + { + "test": "math_multiply", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 0.9375, + "unique_ratio": 0.8889, + "content_len": 32, + "thinking_len": 0 + }, + "response_preview": "17 * 19 = 323 The answer is: 323", + "think": true + }, + { + "test": "math_sqrt", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 2, + "thinking_len": 0 + }, + "response_preview": "12", + "think": false + }, + { + "test": "reasoning", + "passed": true, + "score": 2.75, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.5909, + "content_len": 124, + "thinking_len": 0 + }, + "response_preview": "Yes, because if all bloops are razzies and all razzies are lazzies, then all bloops must be lazzies. All bloops are lazzies.", + "think": true + }, + { + "test": "coding", + "passed": true, + "score": 2.5, + "issues": [], + "metrics": { + "readable_ratio": 0.9677, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "def add(a, b): return a + b", + "think": true + }, + { + "test": "short_planets", + "passed": true, + "score": 3.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 59, + "thinking_len": 0 + }, + "response_preview": "The three planets in question are Earth, Mars, and Jupiter.", + "think": false + }, + { + "test": "coherent_explanation", + "passed": true, + "score": 2.25, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.6818, + "content_len": 421, + "thinking_len": 0 + }, + "response_preview": "Photosynthesis is the process by which plants, algae, and some bacteria convert light energy into chemical energy stored in glucose. This process involves the absorption of sunlight, the splitting of water molecules, and", + "think": false + } + ] + }, + { + "params": { + "label": "anti_repeat_01", + "temperature": 0.6, + "top_p": 0.95, + "top_k": 20, + "min_p": 0.0, + "repeat_penalty": 1.1, + "presence_penalty": 0.4, + "frequency_penalty": 0.4 + }, + "pass_count": 8, + "total_tests": 8, + "pass_rate": 1.0, + "score_sum": 18.5, + "score_avg": 2.3125, + "tests": [ + { + "test": "factual_france", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "The capital of France is Paris.", + "think": true + }, + { + "test": "factual_japan", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 5, + "thinking_len": 0 + }, + "response_preview": "Tokyo", + "think": false + }, + { + "test": "math_multiply", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 0.9375, + "unique_ratio": 0.8889, + "content_len": 32, + "thinking_len": 0 + }, + "response_preview": "17 * 19 = 323 The answer is: 323", + "think": true + }, + { + "test": "math_sqrt", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 2, + "thinking_len": 0 + }, + "response_preview": "12", + "think": false + }, + { + "test": "reasoning", + "passed": true, + "score": 2.75, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.75, + "content_len": 108, + "thinking_len": 0 + }, + "response_preview": "Yes, because if all bloops are razzies and all razzies are lazzies, then all bloops must be lazzies as well.", + "think": true + }, + { + "test": "coding", + "passed": true, + "score": 2.5, + "issues": [], + "metrics": { + "readable_ratio": 0.9677, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "def add(a, b): return a + b", + "think": true + }, + { + "test": "short_planets", + "passed": true, + "score": 3.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 61, + "thinking_len": 0 + }, + "response_preview": "Three planets in a single sentence: Earth, Mars, and Jupiter.", + "think": false + }, + { + "test": "coherent_explanation", + "passed": true, + "score": 2.25, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.9286, + "content_len": 173, + "thinking_len": 0 + }, + "response_preview": "Photosynthesis is the process by which plants convert light energy into chemical energy, storing it in the form of glucose. This process also releases oxygen as a byproduct.", + "think": false + } + ] + }, + { + "params": { + "label": "anti_repeat_02", + "temperature": 0.7, + "top_p": 0.9, + "top_k": 40, + "min_p": 0.05, + "repeat_penalty": 1.15, + "presence_penalty": 0.5, + "frequency_penalty": 0.4 + }, + "pass_count": 7, + "total_tests": 8, + "pass_rate": 0.875, + "score_sum": 17.5, + "score_avg": 2.1875, + "tests": [ + { + "test": "factual_france", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "The capital of France is Paris.", + "think": true + }, + { + "test": "factual_japan", + "passed": false, + "score": 1.0, + "issues": [ + "missing expected token 'Tokyo'" + ], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 5, + "thinking_len": 0 + }, + "response_preview": "Japan", + "think": false + }, + { + "test": "math_multiply", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 0.9375, + "unique_ratio": 0.8889, + "content_len": 32, + "thinking_len": 0 + }, + "response_preview": "17 * 19 = 323 The answer is: 323", + "think": true + }, + { + "test": "math_sqrt", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 2, + "thinking_len": 0 + }, + "response_preview": "12", + "think": false + }, + { + "test": "reasoning", + "passed": true, + "score": 2.75, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.5909, + "content_len": 124, + "thinking_len": 0 + }, + "response_preview": "Yes, because if all bloops are razzies and all razzies are lazzies, then all bloops must be lazzies. All bloops are lazzies.", + "think": true + }, + { + "test": "coding", + "passed": true, + "score": 2.5, + "issues": [], + "metrics": { + "readable_ratio": 0.9677, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "def add(a, b): return a + b", + "think": true + }, + { + "test": "short_planets", + "passed": true, + "score": 3.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 61, + "thinking_len": 0 + }, + "response_preview": "Three planets in a single sentence: Earth, Mars, and Jupiter.", + "think": false + }, + { + "test": "coherent_explanation", + "passed": true, + "score": 2.25, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.9286, + "content_len": 173, + "thinking_len": 0 + }, + "response_preview": "Photosynthesis is the process by which plants convert light energy into chemical energy, storing it in the form of glucose. This process also releases oxygen as a byproduct.", + "think": false + } + ] + }, + { + "params": { + "label": "creative", + "temperature": 0.8, + "top_p": 0.92, + "top_k": 40, + "min_p": 0.05, + "repeat_penalty": 1.08, + "presence_penalty": 0.2, + "frequency_penalty": 0.2 + }, + "pass_count": 8, + "total_tests": 8, + "pass_rate": 1.0, + "score_sum": 18.5, + "score_avg": 2.3125, + "tests": [ + { + "test": "factual_france", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "The capital of France is Paris.", + "think": true + }, + { + "test": "factual_japan", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 5, + "thinking_len": 0 + }, + "response_preview": "Tokyo", + "think": false + }, + { + "test": "math_multiply", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 0.9375, + "unique_ratio": 0.8889, + "content_len": 32, + "thinking_len": 0 + }, + "response_preview": "17 * 19 = 323 The answer is: 323", + "think": true + }, + { + "test": "math_sqrt", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 2, + "thinking_len": 0 + }, + "response_preview": "12", + "think": false + }, + { + "test": "reasoning", + "passed": true, + "score": 2.75, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.75, + "content_len": 108, + "thinking_len": 0 + }, + "response_preview": "Yes, because if all bloops are razzies and all razzies are lazzies, then all bloops must be lazzies as well.", + "think": true + }, + { + "test": "coding", + "passed": true, + "score": 2.5, + "issues": [], + "metrics": { + "readable_ratio": 0.9677, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "def add(a, b): return a + b", + "think": true + }, + { + "test": "short_planets", + "passed": true, + "score": 3.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 61, + "thinking_len": 0 + }, + "response_preview": "Three planets in a single sentence: Earth, Mars, and Jupiter.", + "think": false + }, + { + "test": "coherent_explanation", + "passed": true, + "score": 2.25, + "issues": [], + "metrics": { + "readable_ratio": 0.9886, + "unique_ratio": 0.75, + "content_len": 351, + "thinking_len": 0 + }, + "response_preview": "Photosynthesis is the process by which plants, including algae and certain fungi, convert light energy into chemical energy through the absorption of sunlight. This process involves the use of carbon dioxide and water to", + "think": false + } + ] + }, + { + "params": { + "label": "low_temp_precise", + "temperature": 0.55, + "top_p": 0.95, + "top_k": 20, + "min_p": 0.0, + "repeat_penalty": 1.05, + "presence_penalty": 0.0, + "frequency_penalty": 0.0 + }, + "pass_count": 8, + "total_tests": 8, + "pass_rate": 1.0, + "score_sum": 18.5, + "score_avg": 2.3125, + "tests": [ + { + "test": "factual_france", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "The capital of France is Paris.", + "think": true + }, + { + "test": "factual_japan", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 5, + "thinking_len": 0 + }, + "response_preview": "Tokyo", + "think": false + }, + { + "test": "math_multiply", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 0.9375, + "unique_ratio": 0.8889, + "content_len": 32, + "thinking_len": 0 + }, + "response_preview": "17 * 19 = 323 The answer is: 323", + "think": true + }, + { + "test": "math_sqrt", + "passed": true, + "score": 2.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 2, + "thinking_len": 0 + }, + "response_preview": "12", + "think": false + }, + { + "test": "reasoning", + "passed": true, + "score": 2.75, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.5909, + "content_len": 124, + "thinking_len": 0 + }, + "response_preview": "Yes, because if all bloops are razzies and all razzies are lazzies, then all bloops must be lazzies. All bloops are lazzies.", + "think": true + }, + { + "test": "coding", + "passed": true, + "score": 2.5, + "issues": [], + "metrics": { + "readable_ratio": 0.9677, + "unique_ratio": 1.0, + "content_len": 31, + "thinking_len": 0 + }, + "response_preview": "def add(a, b): return a + b", + "think": true + }, + { + "test": "short_planets", + "passed": true, + "score": 3.0, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 1.0, + "content_len": 61, + "thinking_len": 0 + }, + "response_preview": "Three planets in a single sentence: Earth, Mars, and Jupiter.", + "think": false + }, + { + "test": "coherent_explanation", + "passed": true, + "score": 2.25, + "issues": [], + "metrics": { + "readable_ratio": 1.0, + "unique_ratio": 0.7273, + "content_len": 414, + "thinking_len": 0 + }, + "response_preview": "Photosynthesis is the process by which plants, including trees and shrubs, convert light energy into chemical energy stored in glucose. This process involves the absorption of carbon dioxide and the release of oxygen gas", + "think": false + } + ] + } + ], + "tests": [ + { + "name": "factual_france", + "prompt": "What is the capital of France? Answer in one short sentence.", + "think": true, + "max_tokens": 120, + "check_contains": [ + "Paris" + ] + }, + { + "name": "factual_japan", + "prompt": "Capital of Japan? Answer with one word.", + "think": false, + "max_tokens": 64, + "check_contains": [ + "Tokyo" + ] + }, + { + "name": "math_multiply", + "prompt": "What is 17 * 19? Give only the number.", + "think": true, + "max_tokens": 64, + "check_contains": [ + "323" + ] + }, + { + "name": "math_sqrt", + "prompt": "Square root of 144? Give only the number.", + "think": false, + "max_tokens": 64, + "check_contains": [ + "12" + ] + }, + { + "name": "reasoning", + "prompt": "If all bloops are razzies and all razzies are lazzies, are all bloops lazzies? Answer yes or no then one sentence.", + "think": true, + "max_tokens": 180, + "check_contains": [ + "yes" + ], + "check_min_words": 6 + }, + { + "name": "coding", + "prompt": "Write a Python function add(a, b) that returns a + b. Output only code.", + "think": true, + "max_tokens": 200, + "check_any_contains": [ + "def add", + "return a + b", + "return a+b" + ], + "check_no_garbage": true + }, + { + "name": "short_planets", + "prompt": "Name three planets in a single sentence.", + "think": false, + "max_tokens": 120, + "check_any_contains": [ + "Mercury", + "Venus", + "Earth", + "Mars", + "Jupiter", + "Saturn" + ], + "check_min_words": 4, + "check_no_garbage": true + }, + { + "name": "coherent_explanation", + "prompt": "Explain photosynthesis in 2 short sentences.", + "think": false, + "max_tokens": 220, + "check_min_words": 12, + "check_no_garbage": true + } + ] +} \ No newline at end of file