初始化项目，由ModelHub XC社区提供模型

Model: osmosis-ai/Osmosis-Apply-1.7B Source: Original Platform
2026-04-11 20:28:57 +08:00
commit f853297876
27 changed files with 152393 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,49 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bin.* filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *.tfevents* filter=lfs diff=lfs merge=lfs -text
 *.db* filter=lfs diff=lfs merge=lfs -text
 *.ark* filter=lfs diff=lfs merge=lfs -text
 **/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
 **/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
 **/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.gguf* filter=lfs diff=lfs merge=lfs -text
 *.ggml filter=lfs diff=lfs merge=lfs -text
 *.llamafile* filter=lfs diff=lfs merge=lfs -text
 *.pt2 filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
 *.npz filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text
--- a/52
+++ b/52
@@ -0,0 +1,52 @@
 # Osmosis-Apply-1.7B Modelfile for Ollama
 # A specialized language model for applying code edits
 FROM ./osmosis-apply-1.7b-bf16.gguf
 FROM ./osmosis-apply-1.7b-bf16.gguf
 FROM ./osmosis-mcp-1.7b.Q3_K_M.gguf
 FROM ./osmosis-mcp-1.7b.Q5_K_M.gguf
 FROM ./osmosis-mcp-1.7b.IQ4_XS.gguf
 FROM ./osmosis-mcp-1.7b.Q3_K_S.gguf
 FROM ./osmosis-mcp-1.7b.Q5_K_S.gguf
 FROM ./osmosis-mcp-1.7b.Q2_K.gguf
 FROM ./osmosis-mcp-1.7b.Q4_K_M.gguf
 FROM ./osmosis-mcp-1.7b.Q6_K.gguf
 FROM ./osmosis-mcp-1.7b.Q3_K_L.gguf
 FROM ./osmosis-mcp-1.7b.Q4_K_S.gguf
 FROM ./osmosis-mcp-1.7b.Q8_0.gguf
 # Model parameters from generation_config.json
 PARAMETER temperature 0.6
 PARAMETER top_k 20
 PARAMETER top_p 0.95
 PARAMETER stop "<|endoftext|>"
 PARAMETER stop "<|im_end|>"
 PARAMETER stop "<//code>"
 PARAMETER stop "</code>"
 # System prompt for code editing functionality
 SYSTEM """You are a helpful assistant for a code editor that applies an edit to code to merge them together. That is, you will be given code wrapper in <code> tags and an edit wrapped in <edit> tags, and you will apply the edit to the code.
 For example:
 <code>
 CODE_SNIPPET
 </code>
 <edit>
 EDIT_SNIPPET
 </edit>
 The code is any type of code and the edit is in the form of:
 // ... existing code ...
 FIRST_EDIT
 // ... existing code ...
 SECOND_EDIT
 // ... existing code ...
 THIRD_EDIT
 // ... existing code ...
 The merged code must be exact with no room for any errors. Make sure all whitespaces are preserved correctly. A small typo in code will cause it to fail to compile or error out, leading to poor user experience.
 Output the code wrapped in <code> tags."""
--- a/README.md
+++ b/README.md
@@ -0,0 +1,438 @@
 ---
 license: apache-2.0
 library_name: transformers
 ---
 # Osmosis-Apply-1.7B
 <div align="center">
 </div>
 `Osmosis-Apply-1.7B` is a specialized language model finetuned on `Qwen3-1.7B` designed to perform code merges, similar to the apply feature of modern AI code editors. Given an original code snippet and an edit snippet, this model can apply the edit snippet to original code snippet, updating the code snippet with the edit.
 Here's an example. Let's say we prompt an LLM to fill out the body of this binary search function.
 ```python
 def binary_search(arr, x):
    left = 0
    right = len(arr)
    # TODO: fill out the body of this
    return -1
 arr = [1,2,3,4,5,6,7,8,9]
 assert binary_search(arr, 0) == -1
 assert binary_search(arr, 1) == 0
 assert binary_search(arr, 2) == 1
 assert binary_search(arr, 3) == 2
 assert binary_search(arr, 8) == 7
 assert binary_search(arr, 9) == 8
 assert binary_search(arr, 10) == -1
 ```
 With a custom prompt, the LLM produces an edit snippet that includes the binary search code and some surrounding context.
 ```python
 // ... existing code ...
    left = 0
    right = len(arr)
    while(left < right):
        mid = left + (right - left) // 2
        if(arr[mid] == x):
            return mid
        elif(arr[mid] < x):
            left = mid + 1
        else:
            right = mid
    return -1
 arr = [1,2,3,4,5,6,7,8,9]
 // ... existing code ...
 ```
 `Osmosis-Apply-1.7B` can apply this edit snippet to the original code, producing the updated, final code.
 ```python
 def binary_search(arr, x):
    left = 0
    right = len(arr)
    while(left < right):
        mid = left + (right - left) // 2
        if(arr[mid] == x):
            return mid
        elif(arr[mid] < x):
            left = mid + 1
        else:
            right = mid
    return -1
 arr = [1,2,3,4,5,6,7,8,9]
 assert binary_search(arr, 0) == -1
 assert binary_search(arr, 1) == 0
 assert binary_search(arr, 2) == 1
 assert binary_search(arr, 3) == 2
 assert binary_search(arr, 8) == 7
 assert binary_search(arr, 9) == 8
 assert binary_search(arr, 10) == -1
 ```
 ## Benchmarks
 We benchmarked our model against several large language models using 10,000 random samples from commitpackft. The rewards are calculated according to our reward function (see: Reward function section).
 <div align="center">
 | Model | Average reward |
 |-------|:-------------:|
 | Osmosis-Apply-1.7B | 0.98046 |
 | Claude 4 Sonnet | 0.93284 |
 | OpenAI o3 | 0.86394 |
 | Gemini-2.5-Flash | 0.77452 |
 <em>Table 1: Performance on 10k samples from commitpackft.</em>
 </div>
 ## Methodology
 `Osmosis-Apply-1.7B` was trained on about 100k randomly sampled commits from the [commitpackft dataset](https://huggingface.co/datasets/bigcode/commitpackft), which is less than 15% of the entire dataset. A unified diff was generated between `old_contents` and `new_contents`, and the unified diff was parsed to create a natural language diff, similar to those outputted by LLMs.
 ```python
 import difflib
 unified_diff = difflib.unified_diff(old_code, new_code)
 natural_language_diff = generate_from_unified_diff(unified_diff)
 ```
 The original code + edit were provided as input to the model along with a custom system prompt.
 ```xml
 <code>
 {ORIGINAL CODE}
 </code>
 <edit>
 {EDIT SNIPPET}
 </edit>
 ```
 ### Infrastructure
 We used [verl](https://github.com/volcengine/verl) as the framework to train our model and [SGLang](https://github.com/sgl-project/sglang) as the rollout backend.
 ### Model system prompt
 Below is the system prompt we trained our model with.
 ```python
 SYSTEM_PROMPT = \
 '''
 You are a helpful assistant for a code editor that applies an edit to code to merge them together. That is, you will be given code wrapper in <code> tags and an edit wrapped in <edit> tags, and you will apply the edit to the code.
 For example:
 <code>
 CODE_SNIPPET
 </code>
 <edit>
 EDIT_SNIPPET
 </edit>
 The code is any type of code and the edit is in the form of:
 // ... existing code ...
 FIRST_EDIT
 // ... existing code ...
 SECOND_EDIT
 // ... existing code ...
 THIRD_EDIT
 // ... existing code ...
 The merged code must be exact with no room for any errors. Make sure all whitespaces are preserved correctly. A small typo in code will cause it to fail to compile or error out, leading to poor user experience.
 Output the code wrapped in <code> tags.
 '''
 ```
 ### Edit format
 The edit format is designed to be in mostly natural language, with `// ... existing code ...` condensing original code that remains unchanged between edits. It is important that when prompting the LLM, it is also instructed provide some additional context (unchanged lines from the original code surrounding the edit), so that `Osmosis-Apply-1.7B` can locate where to insert the edit.
 ```
 // ... existing code ...
 FIRST_EDIT
 // ... existing code ...
 SECOND_EDIT
 // ... existing code ...
 THIRD_EDIT
 // ... existing code ...
 ```
 We find that the simple, sequential nature of this edit format makes it easier for smaller models to work with and larger models to output, in exchange for parsability and exactness. 
 ### Reward function
 We use a simple reward function that looks for exactness in the model outputs.
 **TL;DR**: 
 1. If the new code is exactly correct including whitespaces, then give a large reward (1.0).
 2. If the new code is correct when excluding empty lines, then give a small reward (0.2).
 3. Otherwise, give no reward (0.0).
 Below is the entire reward function.
 ```python
 import re
 def extract_solution(solution_str):
    matches = list(re.finditer(r'<code>(.*?)</code>', solution_str, re.DOTALL))
    # If nonempty matches and exactly one <code> block exists
    if(matches and len(matches) == 1):
        return matches[0].group(1).strip()
    return None
 def filter_empty_lines(lines):
    return list(filter(lambda line : line.strip() != "", lines))
 def calc_score(answer, ground_truth):
    answer = answer.strip()
    ground_truth = ground_truth.strip()
    if(answer == ground_truth):
        return 1.0
    else:
        answer_lines = filter_empty_lines(answer.splitlines(True))
        ground_truth_lines = filter_empty_lines(ground_truth.splitlines(True))
        # Give small positive reward if lines are almost correct
        if(answer_lines == ground_truth_lines):
            return 0.2
        return 0
 def compute_score(data_source, solution_str, ground_truth, extra_info=None, format_score=0.0, score=1.0):
    answer = extract_solution(solution_str=solution_str)
    if answer is None:
        return 0
    else:
        return calc_score(answer, ground_truth)
 ```
 ## Usage
 ### LLM prompt
 Since edits should be generated in a specific format, we have provided an example prompt to give to a coding LLM. This prompt is by no means perfect and can be tweaked a bit to get better results.
 ````
 You are an AI coding assistant that takes in original code and responds with an edit snippet to the users.
 ```
 <edit>
 // ... existing code ...
 FIRST_EDIT
 // ... existing code ...
 SECOND_EDIT
 // ... existing code ...
 THIRD_EDIT
 // ... existing code ...
 </edit>
 ```
 Your response must strictly follow this format.
 Guidelines for creating the edit snippet:
 1. Regardless of programming language, collapse unchanged lines of code with this exact literal (ignoring backticks): `// ... existing code ...`
 2. Provide 2-3 lines of context above and below your changes in the edit to help indicate where it is in the file. If the change is at the start or end of the file, just provide what you can.
 3. You do not need to begin or end with `// ... existing code ...` for edits that include the beginning or end of file.
 4. Make sure whitespaces, indentation, and formatting matches the original code.
 5. You may make as many edits as you would like, but condense edits so that there are not too many, similar to a unified diff.
 6. Wrap your final output in <edit> tags.
 Here is an example.
 Original code:
 ```
 def binary_search(arr, x):
    left = 0
    right = len(arr)
    # TODO: fill out the body of this
    return -1
 arr = [1,2,3,4,5,6,7,8,9]
 assert binary_search(arr, 0) == -1
 assert binary_search(arr, 1) == 0
 assert binary_search(arr, 2) == 1
 assert binary_search(arr, 3) == 2
 assert binary_search(arr, 8) == 7
 assert binary_search(arr, 9) == 8
 assert binary_search(arr, 10) == -1
 ```
 Generated edit:
 ```
 <edit>
 // ... existing code ...
    left = 0
    right = len(arr)
    while(left < right):
        mid = left + (right - left) // 2
        if(arr[mid] == x):
            return mid
        elif(arr[mid] < x):
            left = mid + 1
        else:
            right = mid
    return -1
 arr = [1,2,3,4,5,6,7,8,9]
 // ... existing code ...
 </edit>
 ```
 ````
 ### Serving
 During development, we used SGLang to serve the model, though it should be straightforward enough to do something similiar with Ollama.
 Below is an example using SGLang.
 `python3 -m sglang.launch_server --model-path osmosis-ai/Osmosis-Apply-1.7B --host 0.0.0.0 --api-key osmosis`
 ```python
 from openai import OpenAI
 import re
 def create_query(old_code, edit):
    return f"<code>\n{old_code}\n</code>\n\n<edit>\n{edit}\n</edit>"
 def extract_solution(solution_str):
    matches = list(re.finditer(r'<code>(.*?)</code>', solution_str, re.DOTALL))
    # If nonempty matches and exactly one <code> block exists
    if(matches and len(matches) == 1):
        return matches[0].group(1).strip()
    return None
 SYSTEM_PROMPT = \
 '''
 You are a helpful assistant for a code editor that applies an edit to code to merge them together. That is, you will be given code wrapper in <code> tags and an edit wrapped in <edit> tags, and you will apply the edit to the code.
 For example:
 <code>
 CODE_SNIPPET
 </code>
 <edit>
 EDIT_SNIPPET
 </edit>
 The code is any type of code and the edit is in the form of:
 // ... existing code ...
 FIRST_EDIT
 // ... existing code ...
 SECOND_EDIT
 // ... existing code ...
 THIRD_EDIT
 // ... existing code ...
 The merged code must be exact with no room for any errors. Make sure all whitespaces are preserved correctly. A small typo in code will cause it to fail to compile or error out, leading to poor user experience.
 Output the code wrapped in <code> tags.
 '''
 api_key = "osmosis"
 api_base_url = "http://0.0.0.0:30000/v1"
 client = OpenAI(
    api_key=api_key,
    base_url=api_base_url,
 )
 def generate_completion(query: str, system_prompt: str) -> str:
    messages = [
        {
            "role": "user",
            "content": query,
        },
        {
            "role": "system",
            "content": system_prompt,
        },
    ]
    response = client.chat.completions.create(
        model="",
        messages=messages,
        temperature=0,
        max_tokens=3072,
    )
    completion = response.choices[0].message.content
    return completion
 original_code = \
 '''
 def binary_search(arr, x):
    left = 0
    right = len(arr)
    # TODO: fill out the body of this
    return -1
 arr = [1,2,3,4,5,6,7,8,9]
 assert binary_search(arr, 0) == -1
 assert binary_search(arr, 1) == 0
 assert binary_search(arr, 2) == 1
 assert binary_search(arr, 3) == 2
 assert binary_search(arr, 8) == 7
 assert binary_search(arr, 9) == 8
 assert binary_search(arr, 10) == -1
 '''
 edit = \
 '''
 // ... existing code ...
    left = 0
    right = len(arr)
    while(left < right):
        mid = left + (right - left) // 2
        if(arr[mid] == x):
            return mid
        elif(arr[mid] < x):
            left = mid + 1
        else:
            right = mid
    return -1
 arr = [1,2,3,4,5,6,7,8,9]
 // ... existing code ...
 '''
 completion = generate_completion(create_query(original_code, edit), SYSTEM_PROMPT)
 updated_code = extract_solution(completion)
 print(updated_code)
 ```
--- a/added_tokens.json
+++ b/added_tokens.json
@@ -0,0 +1,28 @@
 {
  "</think>": 151668,
  "</tool_call>": 151658,
  "</tool_response>": 151666,
  "<think>": 151667,
  "<tool_call>": 151657,
  "<tool_response>": 151665,
  "<|box_end|>": 151649,
  "<|box_start|>": 151648,
  "<|endoftext|>": 151643,
  "<|file_sep|>": 151664,
  "<|fim_middle|>": 151660,
  "<|fim_pad|>": 151662,
  "<|fim_prefix|>": 151659,
  "<|fim_suffix|>": 151661,
  "<|im_end|>": 151645,
  "<|im_start|>": 151644,
  "<|image_pad|>": 151655,
  "<|object_ref_end|>": 151647,
  "<|object_ref_start|>": 151646,
  "<|quad_end|>": 151651,
  "<|quad_start|>": 151650,
  "<|repo_name|>": 151663,
  "<|video_pad|>": 151656,
  "<|vision_end|>": 151653,
  "<|vision_pad|>": 151654,
  "<|vision_start|>": 151652
 }
--- a/config.json
+++ b/config.json
@@ -0,0 +1,30 @@
 {
  "architectures": [
    "Qwen3ForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "eos_token_id": 151645,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 6144,
  "max_position_embeddings": 40960,
  "max_window_layers": 28,
  "model_type": "qwen3",
  "num_attention_heads": 16,
  "num_hidden_layers": 28,
  "num_key_value_heads": 8,
  "pad_token_id": 151643,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "rope_theta": 1000000,
  "sliding_window": null,
  "tie_word_embeddings": true,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.51.1",
  "use_cache": true,
  "use_sliding_window": false,
  "vocab_size": 151936
 }
--- a/configuration.json
+++ b/configuration.json
@@ -0,0 +1 @@
 {"framework": "pytorch", "task": "text-generation", "allow_remote": true}
--- a/generation_config.json
+++ b/generation_config.json
@@ -0,0 +1,13 @@
 {
  "bos_token_id": 151643,
  "do_sample": true,
  "eos_token_id": [
    151645,
    151643
  ],
  "pad_token_id": 151643,
  "temperature": 0.6,
  "top_k": 20,
  "top_p": 0.95,
  "transformers_version": "4.51.1"
 }
--- a/merges.txt
+++ b/merges.txt
--- a/mise.toml
+++ b/mise.toml
@@ -0,0 +1,2 @@
 [tools]
 python = "3.8"
--- a/model.safetensors
+++ b/model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:04fc2801cfeaafd13b70ba6c5c10c3624c4cc647fbd1970ed41d43f2f0e3d9b1
 size 4063515640
--- a/osmosis-apply-1.7b-bf16.gguf
+++ b/osmosis-apply-1.7b-bf16.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:878a0807397919b13ce01ed69f29b0b766d2ff035b8c698dea771dff70c2b64d
 size 4069678784
--- a/osmosis-mcp-1.7b.IQ4_XS.gguf
+++ b/osmosis-mcp-1.7b.IQ4_XS.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:7f69035f8224e21294999345036e57efaf956ab3ca9ced39dfc9bf9879b7bf84
 size 1181587136
--- a/osmosis-mcp-1.7b.Q2_K.gguf
+++ b/osmosis-mcp-1.7b.Q2_K.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:97ee60b4752994044f5daf0d2b045c8637a336d4182557d3dcaacaf939e14646
 size 879896256
--- a/osmosis-mcp-1.7b.Q3_K_L.gguf
+++ b/osmosis-mcp-1.7b.Q3_K_L.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:d3f7d5013adc0391622a5cdd7b5263c0eeba45862600b4a7b94bcdedcfdb5772
 size 1137204928
--- a/osmosis-mcp-1.7b.Q3_K_M.gguf
+++ b/osmosis-mcp-1.7b.Q3_K_M.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:ce63b47498d70d9d1dcb94c4467b9735efca8f0fa0d1ad334596f3ec888245d1
 size 1073241792
--- a/osmosis-mcp-1.7b.Q3_K_S.gguf
+++ b/osmosis-mcp-1.7b.Q3_K_S.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:684144d7aa44ee732cd8939171b72eb67e3a2308ad9ce84dff919b2212e2dc3e
 size 1000955584
--- a/osmosis-mcp-1.7b.Q4_K_M.gguf
+++ b/osmosis-mcp-1.7b.Q4_K_M.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:85d16ed10da4ea851da357377c419c1b0cdd5f6875c5cb0e006207a35943dc3f
 size 1282438848
--- a/osmosis-mcp-1.7b.Q4_K_S.gguf
+++ b/osmosis-mcp-1.7b.Q4_K_S.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:6215d0f7756a1005cca262f34fa1c3b363010c9a986116adcde13192478f2f95
 size 1235220160
--- a/osmosis-mcp-1.7b.Q5_K_M.gguf
+++ b/osmosis-mcp-1.7b.Q5_K_M.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:e49b5d2e20b9bb7568127e5ad8b876a060bbfdfa5ca253ca83f32a2ce090c435
 size 1471805120
--- a/osmosis-mcp-1.7b.Q5_K_S.gguf
+++ b/osmosis-mcp-1.7b.Q5_K_S.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:4b7222b24599b8c99ceb2d4e1bd89dc425b970fdd3c0140e8070f8bd89cdd667
 size 1444509376
--- a/osmosis-mcp-1.7b.Q6_K.gguf
+++ b/osmosis-mcp-1.7b.Q6_K.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:349fff3d655b3e739b92751248c34dd4702fc32c39f58a2ffdb301ccfef829d9
 size 1673006784
--- a/osmosis-mcp-1.7b.Q8_0.gguf
+++ b/osmosis-mcp-1.7b.Q8_0.gguf
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:948d1077ba710f12e1a9501124f05398bfb285b95ef447b145f19dd1988bc676
 size 2165038784
--- a/quantize_models.sh
+++ b/quantize_models.sh
@@ -0,0 +1,78 @@
 #!/bin/bash
 # Input model file (corrected filename)
 INPUT_MODEL="osmosis-apply-1.7b-bf16.gguf"
 # Define quantization formats to generate
 QUANT_FORMATS=(
    "Q4_K_S"
    "Q5_K_M"
    "Q5_K_S"
    "Q6_K"
    "IQ4_XS"
    "Q8_0"
    "Q2_K"
    "Q3_K_L"
    "Q3_K_M"
    "Q3_K_S"
    "Q4_K_M"
 )
 # Generate bf16 model if it doesn't exist
 if [ ! -f "$INPUT_MODEL" ]; then
    echo "bf16 model not found. Generating $INPUT_MODEL..."
    # Run the conversion
    cd llama.cpp && python3 convert_hf_to_gguf.py ../ --outfile ../osmosis-apply-1.7b-bf16.gguf
    cd ..
    # Check if bf16 generation was successful
    if [ ! -f "$INPUT_MODEL" ]; then
        echo "Error: Failed to generate bf16 model $INPUT_MODEL"
        exit 1
    fi
    echo "Successfully generated $INPUT_MODEL"
 fi
 # Path to llama-quantize tool (corrected path)
 QUANTIZE_TOOL="llama.cpp/build/bin/llama-quantize"
 # Check if quantize tool exists
 if [ ! -f "$QUANTIZE_TOOL" ]; then
    echo "Error: Quantize tool not found at $QUANTIZE_TOOL"
    echo "Please build it first by running: cd llama.cpp && mkdir -p build && cd build && cmake .. && make llama-quantize"
    exit 1
 fi
 # Process each quantization format
 for format in "${QUANT_FORMATS[@]}"; do
    echo "------------------------------------------------------"
    echo "Starting quantization: $format"
    echo "------------------------------------------------------"
    # Define output filename with the exact format requested
    OUTPUT_MODEL="osmosis-mcp-1.7b.${format}.gguf"
    # Check if output model already exists
    if [ -f "$OUTPUT_MODEL" ]; then
        echo "Model $OUTPUT_MODEL already exists. Skipping..."
        continue
    fi
    # Run quantization
    echo "Quantizing to $format..."
    "$QUANTIZE_TOOL" "$INPUT_MODEL" "$OUTPUT_MODEL" "$format"
    # Check if quantization was successful
    if [ $? -eq 0 ]; then
        echo "Successfully created $OUTPUT_MODEL"
    else
        echo "Failed to create $OUTPUT_MODEL"
    fi
    echo ""
 done
 echo "All quantizations completed!"
 echo "Generated models:"
 ls -lah osmosis-mcp-1.7b.*.gguf 
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,31 @@
 {
  "additional_special_tokens": [
    "<|im_start|>",
    "<|im_end|>",
    "<|object_ref_start|>",
    "<|object_ref_end|>",
    "<|box_start|>",
    "<|box_end|>",
    "<|quad_start|>",
    "<|quad_end|>",
    "<|vision_start|>",
    "<|vision_end|>",
    "<|vision_pad|>",
    "<|image_pad|>",
    "<|video_pad|>"
  ],
  "eos_token": {
    "content": "<|im_end|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "pad_token": {
    "content": "<|endoftext|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  }
 }
--- a/tokenizer.json
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
 size 11422654
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,240 @@
 {
  "add_bos_token": false,
  "add_prefix_space": false,
  "added_tokens_decoder": {
    "151643": {
      "content": "<|endoftext|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151644": {
      "content": "<|im_start|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151645": {
      "content": "<|im_end|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151646": {
      "content": "<|object_ref_start|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151647": {
      "content": "<|object_ref_end|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151648": {
      "content": "<|box_start|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151649": {
      "content": "<|box_end|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151650": {
      "content": "<|quad_start|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151651": {
      "content": "<|quad_end|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151652": {
      "content": "<|vision_start|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151653": {
      "content": "<|vision_end|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151654": {
      "content": "<|vision_pad|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151655": {
      "content": "<|image_pad|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151656": {
      "content": "<|video_pad|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151657": {
      "content": "<tool_call>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151658": {
      "content": "</tool_call>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151659": {
      "content": "<|fim_prefix|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151660": {
      "content": "<|fim_middle|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151661": {
      "content": "<|fim_suffix|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151662": {
      "content": "<|fim_pad|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151663": {
      "content": "<|repo_name|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151664": {
      "content": "<|file_sep|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151665": {
      "content": "<tool_response>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151666": {
      "content": "</tool_response>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151667": {
      "content": "<think>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151668": {
      "content": "</think>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    }
  },
  "additional_special_tokens": [
    "<|im_start|>",
    "<|im_end|>",
    "<|object_ref_start|>",
    "<|object_ref_end|>",
    "<|box_start|>",
    "<|box_end|>",
    "<|quad_start|>",
    "<|quad_end|>",
    "<|vision_start|>",
    "<|vision_end|>",
    "<|vision_pad|>",
    "<|image_pad|>",
    "<|video_pad|>"
  ],
  "bos_token": null,
  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0].role == 'system' %}\n        {{- messages[0].content + '\\n\\n' }}\n    {%- endif %}\n    {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0].role == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0].content + '<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n    {%- set index = (messages|length - 1) - loop.index0 %}\n    {%- if ns.multi_step_tool and message.role == \"user\" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}\n        {%- set ns.multi_step_tool = false %}\n        {%- set ns.last_query_index = index %}\n    {%- endif %}\n{%- endfor %}\n{%- for message in messages %}\n    {%- if message.content is string %}\n        {%- set content = message.content %}\n    {%- else %}\n        {%- set content = '' %}\n    {%- endif %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) %}\n        {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {%- set reasoning_content = '' %}\n        {%- if message.reasoning_content is string %}\n            {%- set reasoning_content = message.reasoning_content %}\n        {%- else %}\n            {%- if '</think>' in content %}\n                {%- set reasoning_content = content.split('</think>')[0].rstrip('\\n').split('<think>')[-1].lstrip('\\n') %}\n                {%- set content = content.split('</think>')[-1].lstrip('\\n') %}\n            {%- endif %}\n        {%- endif %}\n        {%- if loop.index0 > ns.last_query_index %}\n            {%- if loop.last or (not loop.last and reasoning_content) %}\n                {{- '<|im_start|>' + message.role + '\\n<think>\\n' + reasoning_content.strip('\\n') + '\\n</think>\\n\\n' + content.lstrip('\\n') }}\n            {%- else %}\n                {{- '<|im_start|>' + message.role + '\\n' + content }}\n            {%- endif %}\n        {%- else %}\n            {{- '<|im_start|>' + message.role + '\\n' + content }}\n        {%- endif %}\n        {%- if message.tool_calls %}\n            {%- for tool_call in message.tool_calls %}\n                {%- if (loop.first and content) or (not loop.first) %}\n                    {{- '\\n' }}\n                {%- endif %}\n                {%- if tool_call.function %}\n                    {%- set tool_call = tool_call.function %}\n                {%- endif %}\n                {{- '<tool_call>\\n{\"name\": \"' }}\n                {{- tool_call.name }}\n                {{- '\", \"arguments\": ' }}\n                {%- if tool_call.arguments is string %}\n                    {{- tool_call.arguments }}\n                {%- else %}\n                    {{- tool_call.arguments | tojson }}\n                {%- endif %}\n                {{- '}\\n</tool_call>' }}\n            {%- endfor %}\n        {%- endif %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n    {%- if enable_thinking is defined and enable_thinking is false %}\n        {{- '<think>\\n\\n</think>\\n\\n' }}\n    {%- endif %}\n{%- endif %}",
  "clean_up_tokenization_spaces": false,
  "eos_token": "<|im_end|>",
  "errors": "replace",
  "extra_special_tokens": {},
  "model_max_length": 131072,
  "pad_token": "<|endoftext|>",
  "split_special_tokens": false,
  "tokenizer_class": "Qwen2Tokenizer",
  "unk_token": null
 }
--- a/vocab.json
+++ b/vocab.json
		`@@ -0,0 +1 @@`
							`{"framework": "pytorch", "task": "text-generation", "allow_remote": true}`