初始化项目，由ModelHub XC社区提供模型

Model: osmosis-ai/Osmosis-Apply-1.7B Source: Original Platform
2026-04-11 20:28:57 +08:00
commit f853297876
27 changed files with 152393 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,49 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bin.* filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zstandard filter=lfs diff=lfs merge=lfs -text
+*.tfevents* filter=lfs diff=lfs merge=lfs -text
+*.db* filter=lfs diff=lfs merge=lfs -text
+*.ark* filter=lfs diff=lfs merge=lfs -text
+**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
+**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
+**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.gguf* filter=lfs diff=lfs merge=lfs -text
+*.ggml filter=lfs diff=lfs merge=lfs -text
+*.llamafile* filter=lfs diff=lfs merge=lfs -text
+*.pt2 filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+
+tokenizer.json filter=lfs diff=lfs merge=lfs -text
--- a/52
+++ b/52
@@ -0,0 +1,52 @@
+# Osmosis-Apply-1.7B Modelfile for Ollama
+# A specialized language model for applying code edits
+
+FROM ./osmosis-apply-1.7b-bf16.gguf
+FROM ./osmosis-apply-1.7b-bf16.gguf
+FROM ./osmosis-mcp-1.7b.Q3_K_M.gguf
+FROM ./osmosis-mcp-1.7b.Q5_K_M.gguf
+FROM ./osmosis-mcp-1.7b.IQ4_XS.gguf
+FROM ./osmosis-mcp-1.7b.Q3_K_S.gguf
+FROM ./osmosis-mcp-1.7b.Q5_K_S.gguf
+FROM ./osmosis-mcp-1.7b.Q2_K.gguf
+FROM ./osmosis-mcp-1.7b.Q4_K_M.gguf
+FROM ./osmosis-mcp-1.7b.Q6_K.gguf
+FROM ./osmosis-mcp-1.7b.Q3_K_L.gguf
+FROM ./osmosis-mcp-1.7b.Q4_K_S.gguf
+FROM ./osmosis-mcp-1.7b.Q8_0.gguf
+
+# Model parameters from generation_config.json
+PARAMETER temperature 0.6
+PARAMETER top_k 20
+PARAMETER top_p 0.95
+PARAMETER stop "<|endoftext|>"
+PARAMETER stop "<|im_end|>"
+PARAMETER stop "<//code>"
+PARAMETER stop "</code>"
+
+# System prompt for code editing functionality
+SYSTEM """You are a helpful assistant for a code editor that applies an edit to code to merge them together. That is, you will be given code wrapper in <code> tags and an edit wrapped in <edit> tags, and you will apply the edit to the code.
+
+For example:
+
+<code>
+CODE_SNIPPET
+</code>
+
+<edit>
+EDIT_SNIPPET
+</edit>
+
+The code is any type of code and the edit is in the form of:
+
+// ... existing code ...
+FIRST_EDIT
+// ... existing code ...
+SECOND_EDIT
+// ... existing code ...
+THIRD_EDIT
+// ... existing code ...
+
+The merged code must be exact with no room for any errors. Make sure all whitespaces are preserved correctly. A small typo in code will cause it to fail to compile or error out, leading to poor user experience.
+
+Output the code wrapped in <code> tags."""
--- a/README.md
+++ b/README.md
@@ -0,0 +1,438 @@
+---
+license: apache-2.0
+library_name: transformers
+---
+
+# Osmosis-Apply-1.7B
+
+<div align="center">
+
+</div>
+
+`Osmosis-Apply-1.7B` is a specialized language model finetuned on `Qwen3-1.7B` designed to perform code merges, similar to the apply feature of modern AI code editors. Given an original code snippet and an edit snippet, this model can apply the edit snippet to original code snippet, updating the code snippet with the edit.
+
+Here's an example. Let's say we prompt an LLM to fill out the body of this binary search function.
+
+```python
+def binary_search(arr, x):
+    left = 0
+    right = len(arr)
+
+    # TODO: fill out the body of this
+    return -1
+
+arr = [1,2,3,4,5,6,7,8,9]
+
+assert binary_search(arr, 0) == -1
+assert binary_search(arr, 1) == 0
+assert binary_search(arr, 2) == 1
+assert binary_search(arr, 3) == 2
+assert binary_search(arr, 8) == 7
+assert binary_search(arr, 9) == 8
+assert binary_search(arr, 10) == -1
+```
+
+With a custom prompt, the LLM produces an edit snippet that includes the binary search code and some surrounding context.
+
+```python
+// ... existing code ...
+    left = 0
+    right = len(arr)
+
+    while(left < right):
+        mid = left + (right - left) // 2
+
+        if(arr[mid] == x):
+            return mid
+        elif(arr[mid] < x):
+            left = mid + 1
+        else:
+            right = mid
+    return -1
+
+arr = [1,2,3,4,5,6,7,8,9]
+// ... existing code ...
+```
+
+`Osmosis-Apply-1.7B` can apply this edit snippet to the original code, producing the updated, final code.
+
+```python
+def binary_search(arr, x):
+    left = 0
+    right = len(arr)
+
+    while(left < right):
+        mid = left + (right - left) // 2
+
+        if(arr[mid] == x):
+            return mid
+        elif(arr[mid] < x):
+            left = mid + 1
+        else:
+            right = mid
+    return -1
+
+arr = [1,2,3,4,5,6,7,8,9]
+
+assert binary_search(arr, 0) == -1
+assert binary_search(arr, 1) == 0
+assert binary_search(arr, 2) == 1
+assert binary_search(arr, 3) == 2
+assert binary_search(arr, 8) == 7
+assert binary_search(arr, 9) == 8
+assert binary_search(arr, 10) == -1
+```
+
+## Benchmarks
+
+We benchmarked our model against several large language models using 10,000 random samples from commitpackft. The rewards are calculated according to our reward function (see: Reward function section).
+
+<div align="center">
+
+| Model | Average reward |
+|-------|:-------------:|
+| Osmosis-Apply-1.7B | 0.98046 |
+| Claude 4 Sonnet | 0.93284 |
+| OpenAI o3 | 0.86394 |
+| Gemini-2.5-Flash | 0.77452 |
+
+<em>Table 1: Performance on 10k samples from commitpackft.</em>
+
+</div>
+
+## Methodology
+
+`Osmosis-Apply-1.7B` was trained on about 100k randomly sampled commits from the [commitpackft dataset](https://huggingface.co/datasets/bigcode/commitpackft), which is less than 15% of the entire dataset. A unified diff was generated between `old_contents` and `new_contents`, and the unified diff was parsed to create a natural language diff, similar to those outputted by LLMs.
+
+```python
+import difflib
+
+unified_diff = difflib.unified_diff(old_code, new_code)
+natural_language_diff = generate_from_unified_diff(unified_diff)
+```
+
+The original code + edit were provided as input to the model along with a custom system prompt.
+
+```xml
+<code>
+{ORIGINAL CODE}
+</code>
+
+<edit>
+{EDIT SNIPPET}
+</edit>
+```
+
+### Infrastructure
+
+We used [verl](https://github.com/volcengine/verl) as the framework to train our model and [SGLang](https://github.com/sgl-project/sglang) as the rollout backend.
+
+### Model system prompt
+
+Below is the system prompt we trained our model with.
+
+```python
+SYSTEM_PROMPT = \
+'''
+You are a helpful assistant for a code editor that applies an edit to code to merge them together. That is, you will be given code wrapper in <code> tags and an edit wrapped in <edit> tags, and you will apply the edit to the code.
+
+For example:
+
+<code>
+CODE_SNIPPET
+</code>
+
+<edit>
+EDIT_SNIPPET
+</edit>
+
+The code is any type of code and the edit is in the form of:
+
+// ... existing code ...
+FIRST_EDIT
+// ... existing code ...
+SECOND_EDIT
+// ... existing code ...
+THIRD_EDIT
+// ... existing code ...
+
+The merged code must be exact with no room for any errors. Make sure all whitespaces are preserved correctly. A small typo in code will cause it to fail to compile or error out, leading to poor user experience.
+
+Output the code wrapped in <code> tags.
+'''
+```
+
+### Edit format
+
+The edit format is designed to be in mostly natural language, with `// ... existing code ...` condensing original code that remains unchanged between edits. It is important that when prompting the LLM, it is also instructed provide some additional context (unchanged lines from the original code surrounding the edit), so that `Osmosis-Apply-1.7B` can locate where to insert the edit.
+
+```
+// ... existing code ...
+FIRST_EDIT
+// ... existing code ...
+SECOND_EDIT
+// ... existing code ...
+THIRD_EDIT
+// ... existing code ...
+```
+
+We find that the simple, sequential nature of this edit format makes it easier for smaller models to work with and larger models to output, in exchange for parsability and exactness. 
+
+### Reward function
+
+We use a simple reward function that looks for exactness in the model outputs.
+
+**TL;DR**: 
+
+1. If the new code is exactly correct including whitespaces, then give a large reward (1.0).
+2. If the new code is correct when excluding empty lines, then give a small reward (0.2).
+3. Otherwise, give no reward (0.0).
+
+Below is the entire reward function.
+
+```python
+import re
+
+def extract_solution(solution_str):
+    matches = list(re.finditer(r'<code>(.*?)</code>', solution_str, re.DOTALL))
+
+    # If nonempty matches and exactly one <code> block exists
+    if(matches and len(matches) == 1):
+        return matches[0].group(1).strip()
+    return None
+
+def filter_empty_lines(lines):
+    return list(filter(lambda line : line.strip() != "", lines))
+
+def calc_score(answer, ground_truth):
+    answer = answer.strip()
+    ground_truth = ground_truth.strip()
+
+    if(answer == ground_truth):
+        return 1.0
+    else:
+        answer_lines = filter_empty_lines(answer.splitlines(True))
+        ground_truth_lines = filter_empty_lines(ground_truth.splitlines(True))
+
+        # Give small positive reward if lines are almost correct
+        if(answer_lines == ground_truth_lines):
+            return 0.2
+
+        return 0
+
+
+def compute_score(data_source, solution_str, ground_truth, extra_info=None, format_score=0.0, score=1.0):
+    answer = extract_solution(solution_str=solution_str)
+    if answer is None:
+        return 0
+    else:
+        return calc_score(answer, ground_truth)
+```
+
+## Usage
+
+### LLM prompt
+
+
+Since edits should be generated in a specific format, we have provided an example prompt to give to a coding LLM. This prompt is by no means perfect and can be tweaked a bit to get better results.
+
+````
+You are an AI coding assistant that takes in original code and responds with an edit snippet to the users.
+
+```
+<edit>
+// ... existing code ...
+FIRST_EDIT
+// ... existing code ...
+SECOND_EDIT
+// ... existing code ...
+THIRD_EDIT
+// ... existing code ...
+</edit>
+```
+
+Your response must strictly follow this format.
+
+Guidelines for creating the edit snippet:
+
+1. Regardless of programming language, collapse unchanged lines of code with this exact literal (ignoring backticks): `// ... existing code ...`
+2. Provide 2-3 lines of context above and below your changes in the edit to help indicate where it is in the file. If the change is at the start or end of the file, just provide what you can.
+3. You do not need to begin or end with `// ... existing code ...` for edits that include the beginning or end of file.
+4. Make sure whitespaces, indentation, and formatting matches the original code.
+5. You may make as many edits as you would like, but condense edits so that there are not too many, similar to a unified diff.
+6. Wrap your final output in <edit> tags.
+
+Here is an example.
+
+Original code:
+
+```
+def binary_search(arr, x):
+    left = 0
+    right = len(arr)
+
+    # TODO: fill out the body of this
+    return -1
+
+arr = [1,2,3,4,5,6,7,8,9]
+
+assert binary_search(arr, 0) == -1
+assert binary_search(arr, 1) == 0
+assert binary_search(arr, 2) == 1
+assert binary_search(arr, 3) == 2
+assert binary_search(arr, 8) == 7
+assert binary_search(arr, 9) == 8
+assert binary_search(arr, 10) == -1
+```
+
+Generated edit:
+
+```
+<edit>
+// ... existing code ...
+    left = 0
+    right = len(arr)
+
+    while(left < right):
+        mid = left + (right - left) // 2
+
+        if(arr[mid] == x):
+            return mid
+        elif(arr[mid] < x):
+            left = mid + 1
+        else:
+            right = mid
+    return -1
+
+arr = [1,2,3,4,5,6,7,8,9]
+// ... existing code ...
+</edit>
+```
+````
+
+### Serving
+
+During development, we used SGLang to serve the model, though it should be straightforward enough to do something similiar with Ollama.
+
+Below is an example using SGLang.
+
+`python3 -m sglang.launch_server --model-path osmosis-ai/Osmosis-Apply-1.7B --host 0.0.0.0 --api-key osmosis`
+
+```python
+from openai import OpenAI
+import re
+
+def create_query(old_code, edit):
+    return f"<code>\n{old_code}\n</code>\n\n<edit>\n{edit}\n</edit>"
+
+def extract_solution(solution_str):
+    matches = list(re.finditer(r'<code>(.*?)</code>', solution_str, re.DOTALL))
+
+    # If nonempty matches and exactly one <code> block exists
+    if(matches and len(matches) == 1):
+        return matches[0].group(1).strip()
+    return None
+
+SYSTEM_PROMPT = \
+'''
+You are a helpful assistant for a code editor that applies an edit to code to merge them together. That is, you will be given code wrapper in <code> tags and an edit wrapped in <edit> tags, and you will apply the edit to the code.
+
+For example:
+
+<code>
+CODE_SNIPPET
+</code>
+
+<edit>
+EDIT_SNIPPET
+</edit>
+
+The code is any type of code and the edit is in the form of:
+
+// ... existing code ...
+FIRST_EDIT
+// ... existing code ...
+SECOND_EDIT
+// ... existing code ...
+THIRD_EDIT
+// ... existing code ...
+
+The merged code must be exact with no room for any errors. Make sure all whitespaces are preserved correctly. A small typo in code will cause it to fail to compile or error out, leading to poor user experience.
+
+Output the code wrapped in <code> tags.
+'''
+
+api_key = "osmosis"
+api_base_url = "http://0.0.0.0:30000/v1"
+client = OpenAI(
+    api_key=api_key,
+    base_url=api_base_url,
+)
+
+def generate_completion(query: str, system_prompt: str) -> str:
+    messages = [
+        {
+            "role": "user",
+            "content": query,
+        },
+        {
+            "role": "system",
+            "content": system_prompt,
+        },
+    ]
+
+    response = client.chat.completions.create(
+        model="",
+        messages=messages,
+        temperature=0,
+        max_tokens=3072,
+    )
+
+    completion = response.choices[0].message.content
+    return completion
+
+original_code = \
+'''
+def binary_search(arr, x):
+    left = 0
+    right = len(arr)
+
+    # TODO: fill out the body of this
+    return -1
+
+arr = [1,2,3,4,5,6,7,8,9]
+
+assert binary_search(arr, 0) == -1
+assert binary_search(arr, 1) == 0
+assert binary_search(arr, 2) == 1
+assert binary_search(arr, 3) == 2
+assert binary_search(arr, 8) == 7
+assert binary_search(arr, 9) == 8
+assert binary_search(arr, 10) == -1
+'''
+
+edit = \
+'''
+// ... existing code ...
+    left = 0
+    right = len(arr)
+
+    while(left < right):
+        mid = left + (right - left) // 2
+
+        if(arr[mid] == x):
+            return mid
+        elif(arr[mid] < x):
+            left = mid + 1
+        else:
+            right = mid
+    return -1
+
+arr = [1,2,3,4,5,6,7,8,9]
+// ... existing code ...
+'''
+
+completion = generate_completion(create_query(original_code, edit), SYSTEM_PROMPT)
+updated_code = extract_solution(completion)
+print(updated_code)
+```
--- a/added_tokens.json
+++ b/added_tokens.json
@@ -0,0 +1,28 @@
+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}
--- a/config.json
+++ b/config.json
@@ -0,0 +1,30 @@
+{
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 6144,
+  "max_position_embeddings": 40960,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "pad_token_id": 151643,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
--- a/configuration.json
+++ b/configuration.json
@@ -0,0 +1 @@
+{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
--- a/generation_config.json
+++ b/generation_config.json
@@ -0,0 +1,13 @@
+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "temperature": 0.6,
+  "top_k": 20,
+  "top_p": 0.95,
+  "transformers_version": "4.51.1"
+}
--- a/merges.txt
+++ b/merges.txt
--- a/mise.toml
+++ b/mise.toml
@@ -0,0 +1,2 @@
+[tools]
+python = "3.8"
--- a/model.safetensors
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04fc2801cfeaafd13b70ba6c5c10c3624c4cc647fbd1970ed41d43f2f0e3d9b1
+size 4063515640
--- a/osmosis-apply-1.7b-bf16.gguf
+++ b/osmosis-apply-1.7b-bf16.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:878a0807397919b13ce01ed69f29b0b766d2ff035b8c698dea771dff70c2b64d
+size 4069678784
--- a/osmosis-mcp-1.7b.IQ4_XS.gguf
+++ b/osmosis-mcp-1.7b.IQ4_XS.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7f69035f8224e21294999345036e57efaf956ab3ca9ced39dfc9bf9879b7bf84
+size 1181587136
--- a/osmosis-mcp-1.7b.Q2_K.gguf
+++ b/osmosis-mcp-1.7b.Q2_K.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:97ee60b4752994044f5daf0d2b045c8637a336d4182557d3dcaacaf939e14646
+size 879896256
--- a/osmosis-mcp-1.7b.Q3_K_L.gguf
+++ b/osmosis-mcp-1.7b.Q3_K_L.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3f7d5013adc0391622a5cdd7b5263c0eeba45862600b4a7b94bcdedcfdb5772
+size 1137204928
--- a/osmosis-mcp-1.7b.Q3_K_M.gguf
+++ b/osmosis-mcp-1.7b.Q3_K_M.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce63b47498d70d9d1dcb94c4467b9735efca8f0fa0d1ad334596f3ec888245d1
+size 1073241792
--- a/osmosis-mcp-1.7b.Q3_K_S.gguf
+++ b/osmosis-mcp-1.7b.Q3_K_S.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:684144d7aa44ee732cd8939171b72eb67e3a2308ad9ce84dff919b2212e2dc3e
+size 1000955584
--- a/osmosis-mcp-1.7b.Q4_K_M.gguf
+++ b/osmosis-mcp-1.7b.Q4_K_M.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85d16ed10da4ea851da357377c419c1b0cdd5f6875c5cb0e006207a35943dc3f
+size 1282438848
--- a/osmosis-mcp-1.7b.Q4_K_S.gguf
+++ b/osmosis-mcp-1.7b.Q4_K_S.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6215d0f7756a1005cca262f34fa1c3b363010c9a986116adcde13192478f2f95
+size 1235220160
--- a/osmosis-mcp-1.7b.Q5_K_M.gguf
+++ b/osmosis-mcp-1.7b.Q5_K_M.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e49b5d2e20b9bb7568127e5ad8b876a060bbfdfa5ca253ca83f32a2ce090c435
+size 1471805120
--- a/osmosis-mcp-1.7b.Q5_K_S.gguf
+++ b/osmosis-mcp-1.7b.Q5_K_S.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b7222b24599b8c99ceb2d4e1bd89dc425b970fdd3c0140e8070f8bd89cdd667
+size 1444509376
--- a/osmosis-mcp-1.7b.Q6_K.gguf
+++ b/osmosis-mcp-1.7b.Q6_K.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:349fff3d655b3e739b92751248c34dd4702fc32c39f58a2ffdb301ccfef829d9
+size 1673006784
--- a/osmosis-mcp-1.7b.Q8_0.gguf
+++ b/osmosis-mcp-1.7b.Q8_0.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:948d1077ba710f12e1a9501124f05398bfb285b95ef447b145f19dd1988bc676
+size 2165038784
--- a/quantize_models.sh
+++ b/quantize_models.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+# Input model file (corrected filename)
+INPUT_MODEL="osmosis-apply-1.7b-bf16.gguf"
+
+# Define quantization formats to generate
+QUANT_FORMATS=(
+    "Q4_K_S"
+    "Q5_K_M"
+    "Q5_K_S"
+    "Q6_K"
+    "IQ4_XS"
+    "Q8_0"
+    "Q2_K"
+    "Q3_K_L"
+    "Q3_K_M"
+    "Q3_K_S"
+    "Q4_K_M"
+)
+
+# Generate bf16 model if it doesn't exist
+if [ ! -f "$INPUT_MODEL" ]; then
+    echo "bf16 model not found. Generating $INPUT_MODEL..."
+    
+    # Run the conversion
+    cd llama.cpp && python3 convert_hf_to_gguf.py ../ --outfile ../osmosis-apply-1.7b-bf16.gguf
+    cd ..
+    
+    # Check if bf16 generation was successful
+    if [ ! -f "$INPUT_MODEL" ]; then
+        echo "Error: Failed to generate bf16 model $INPUT_MODEL"
+        exit 1
+    fi
+    echo "Successfully generated $INPUT_MODEL"
+fi
+
+# Path to llama-quantize tool (corrected path)
+QUANTIZE_TOOL="llama.cpp/build/bin/llama-quantize"
+
+# Check if quantize tool exists
+if [ ! -f "$QUANTIZE_TOOL" ]; then
+    echo "Error: Quantize tool not found at $QUANTIZE_TOOL"
+    echo "Please build it first by running: cd llama.cpp && mkdir -p build && cd build && cmake .. && make llama-quantize"
+    exit 1
+fi
+
+# Process each quantization format
+for format in "${QUANT_FORMATS[@]}"; do
+    echo "------------------------------------------------------"
+    echo "Starting quantization: $format"
+    echo "------------------------------------------------------"
+    
+    # Define output filename with the exact format requested
+    OUTPUT_MODEL="osmosis-mcp-1.7b.${format}.gguf"
+    
+    # Check if output model already exists
+    if [ -f "$OUTPUT_MODEL" ]; then
+        echo "Model $OUTPUT_MODEL already exists. Skipping..."
+        continue
+    fi
+    
+    # Run quantization
+    echo "Quantizing to $format..."
+    "$QUANTIZE_TOOL" "$INPUT_MODEL" "$OUTPUT_MODEL" "$format"
+    
+    # Check if quantization was successful
+    if [ $? -eq 0 ]; then
+        echo "Successfully created $OUTPUT_MODEL"
+    else
+        echo "Failed to create $OUTPUT_MODEL"
+    fi
+    
+    echo ""
+done
+
+echo "All quantizations completed!"
+echo "Generated models:"
+ls -lah osmosis-mcp-1.7b.*.gguf 
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,31 @@
+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
--- a/tokenizer.json
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
+size 11422654
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,240 @@
+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0].role == 'system' %}\n        {{- messages[0].content + '\\n\\n' }}\n    {%- endif %}\n    {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0].role == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n    {%- set index = (messages|length - 1) - loop.index0 %}\n    {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n        {%- set ns.multi_step_tool = false %}\n        {%- set ns.last_query_index = index %}\n    {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n    {%- if message.content is string %}\n        {%- set content = message.content %}\n    {%- else %}\n        {%- set content = '' %}\n    {%- endif %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set reasoning_content = '' %}\n        {%- if message.reasoning_content is string %}\n            {%- set reasoning_content = message.reasoning_content %}\n        {%- else %}\n            {%- if '</think>' in content %}\n                {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n                {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n            {%- endif %}\n        {%- endif %}\n        {%- if loop.index0 > ns.last_query_index %}\n            {%- if loop.last or (not loop.last and reasoning_content) %}\n                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n            {%- else %}\n                {{- '<|im_start|>' + message.role + '\\n' + content }}\n            {%- endif %}\n        {%- else %}\n            {{- '<|im_start|>' + message.role + '\\n' + content }}\n        {%- endif %}\n        {%- if message.tool_calls %}\n            {%- for tool_call in message.tool_calls %}\n                {%- if (loop.first and content) or (not loop.first) %}\n                    {{- '\\n' }}\n                {%- endif %}\n                {%- if tool_call.function %}\n                    {%- set tool_call = tool_call.function %}\n                {%- endif %}\n                {{- '<tool_call>\\n{\"name\": \"' }}\n                {{- tool_call.name }}\n                {{- '\", \"arguments\": ' }}\n                {%- if tool_call.arguments is string %}\n                    {{- tool_call.arguments }}\n                {%- else %}\n                    {{- tool_call.arguments | tojson }}\n                {%- endif %}\n                {{- '}\\n</tool_call>' }}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n    {%- if enable_thinking is defined and enable_thinking is false %}\n        {{- '<think>\\n\\n</think>\\n\\n' }}\n    {%- endif %}\n{%- endif %}",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}
--- a/vocab.json
+++ b/vocab.json
				`@@ -0,0 +1 @@`
				`{"framework": "pytorch", "task": "text-generation", "allow_remote": true}`