From 4f81b2c935eb1615037230113f7748d06f95b7ef Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Tue, 21 Apr 2026 17:55:38 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: duyntnet/Octopus-v2-imatrix-GGUF Source: Original Platform --- .gitattributes | 60 +++++++++++++++++++++++++++++++++++++++++ Octopus-v2-IQ1_M.gguf | 3 +++ Octopus-v2-IQ1_S.gguf | 3 +++ Octopus-v2-IQ2_M.gguf | 3 +++ Octopus-v2-IQ2_S.gguf | 3 +++ Octopus-v2-IQ2_XS.gguf | 3 +++ Octopus-v2-IQ2_XXS.gguf | 3 +++ Octopus-v2-IQ3_M.gguf | 3 +++ Octopus-v2-IQ3_S.gguf | 3 +++ Octopus-v2-IQ3_XS.gguf | 3 +++ Octopus-v2-IQ3_XXS.gguf | 3 +++ Octopus-v2-IQ4_NL.gguf | 3 +++ Octopus-v2-IQ4_XS.gguf | 3 +++ Octopus-v2-Q2_K.gguf | 3 +++ Octopus-v2-Q2_K_S.gguf | 3 +++ Octopus-v2-Q3_K_L.gguf | 3 +++ Octopus-v2-Q3_K_M.gguf | 3 +++ Octopus-v2-Q3_K_S.gguf | 3 +++ Octopus-v2-Q4_0.gguf | 3 +++ Octopus-v2-Q4_K_M.gguf | 3 +++ Octopus-v2-Q4_K_S.gguf | 3 +++ Octopus-v2-Q5_0.gguf | 3 +++ Octopus-v2-Q5_K_M.gguf | 3 +++ Octopus-v2-Q5_K_S.gguf | 3 +++ Octopus-v2-Q6_K.gguf | 3 +++ Octopus-v2-Q8_0.gguf | 3 +++ README.md | 51 +++++++++++++++++++++++++++++++++++ 27 files changed, 186 insertions(+) create mode 100644 .gitattributes create mode 100644 Octopus-v2-IQ1_M.gguf create mode 100644 Octopus-v2-IQ1_S.gguf create mode 100644 Octopus-v2-IQ2_M.gguf create mode 100644 Octopus-v2-IQ2_S.gguf create mode 100644 Octopus-v2-IQ2_XS.gguf create mode 100644 Octopus-v2-IQ2_XXS.gguf create mode 100644 Octopus-v2-IQ3_M.gguf create mode 100644 Octopus-v2-IQ3_S.gguf create mode 100644 Octopus-v2-IQ3_XS.gguf create mode 100644 Octopus-v2-IQ3_XXS.gguf create mode 100644 Octopus-v2-IQ4_NL.gguf create mode 100644 Octopus-v2-IQ4_XS.gguf create mode 100644 Octopus-v2-Q2_K.gguf create mode 100644 Octopus-v2-Q2_K_S.gguf create mode 100644 Octopus-v2-Q3_K_L.gguf create mode 100644 Octopus-v2-Q3_K_M.gguf create mode 100644 Octopus-v2-Q3_K_S.gguf create mode 100644 Octopus-v2-Q4_0.gguf create mode 100644 Octopus-v2-Q4_K_M.gguf create mode 100644 Octopus-v2-Q4_K_S.gguf create mode 100644 Octopus-v2-Q5_0.gguf create mode 100644 Octopus-v2-Q5_K_M.gguf create mode 100644 Octopus-v2-Q5_K_S.gguf create mode 100644 Octopus-v2-Q6_K.gguf create mode 100644 Octopus-v2-Q8_0.gguf create mode 100644 README.md diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..7a4e6cb --- /dev/null +++ b/.gitattributes @@ -0,0 +1,60 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +Octopus-v2-IQ1_M.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-IQ1_S.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-IQ2_M.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-IQ2_S.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-IQ2_XS.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-IQ2_XXS.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-Q2_K_S.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-IQ3_M.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-IQ3_S.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-IQ3_XS.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-IQ3_XXS.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-IQ4_NL.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-Q5_0.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +Octopus-v2-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/Octopus-v2-IQ1_M.gguf b/Octopus-v2-IQ1_M.gguf new file mode 100644 index 0000000..63c23e8 --- /dev/null +++ b/Octopus-v2-IQ1_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0787afdf956074323b1ae140507766061d552516df61a11000a52272a1f5b4ce +size 813875904 diff --git a/Octopus-v2-IQ1_S.gguf b/Octopus-v2-IQ1_S.gguf new file mode 100644 index 0000000..76b1581 --- /dev/null +++ b/Octopus-v2-IQ1_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9be4e4b7e038f26b891dbb4dbc89fd69f0dd0a7a82e6516716521c70492fd8a +size 770990784 diff --git a/Octopus-v2-IQ2_M.gguf b/Octopus-v2-IQ2_M.gguf new file mode 100644 index 0000000..b82df6f --- /dev/null +++ b/Octopus-v2-IQ2_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ba80a3b5d53247ed3c24c3198fd3c24b15ef6f7b3b4ea2b9c010e8a49beb188 +size 1019503296 diff --git a/Octopus-v2-IQ2_S.gguf b/Octopus-v2-IQ2_S.gguf new file mode 100644 index 0000000..b115cd9 --- /dev/null +++ b/Octopus-v2-IQ2_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f9a9569bc07de00478cb86b4469f656fec8dfb9db7666ac16c8c10e54d9fd43 +size 962323136 diff --git a/Octopus-v2-IQ2_XS.gguf b/Octopus-v2-IQ2_XS.gguf new file mode 100644 index 0000000..4bc82d9 --- /dev/null +++ b/Octopus-v2-IQ2_XS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4d1dc2a307284e0ecf11d66e60b9a99e9782f60be55dc2597f37c04af02b5b0 +size 944890560 diff --git a/Octopus-v2-IQ2_XXS.gguf b/Octopus-v2-IQ2_XXS.gguf new file mode 100644 index 0000000..32f6cb3 --- /dev/null +++ b/Octopus-v2-IQ2_XXS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92f17030b801d7afa5f8fb73318f0e9e18194e4650377713c0d8d3c1e18d3122 +size 885351104 diff --git a/Octopus-v2-IQ3_M.gguf b/Octopus-v2-IQ3_M.gguf new file mode 100644 index 0000000..916d16b --- /dev/null +++ b/Octopus-v2-IQ3_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0037f33dcac14ec7df1ca764f36a21ba4a3bbcb6d7d545152812d8e696418c7a +size 1308211744 diff --git a/Octopus-v2-IQ3_S.gguf b/Octopus-v2-IQ3_S.gguf new file mode 100644 index 0000000..95794cf --- /dev/null +++ b/Octopus-v2-IQ3_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb33fbea0ea0889d5d98da37fce2cc24f71c37902e52b1b3756f615ca6ce221f +size 1289271840 diff --git a/Octopus-v2-IQ3_XS.gguf b/Octopus-v2-IQ3_XS.gguf new file mode 100644 index 0000000..100fed7 --- /dev/null +++ b/Octopus-v2-IQ3_XS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4db9f2702a48f159f1c8b13c625aa396b3cd57ee85c0562a34147bdd0007591 +size 1244396064 diff --git a/Octopus-v2-IQ3_XXS.gguf b/Octopus-v2-IQ3_XXS.gguf new file mode 100644 index 0000000..2b42b3c --- /dev/null +++ b/Octopus-v2-IQ3_XXS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32018fb23754c4c7061d7a3397165d4d514cbb07c908319b7e3402679d648273 +size 1125409472 diff --git a/Octopus-v2-IQ4_NL.gguf b/Octopus-v2-IQ4_NL.gguf new file mode 100644 index 0000000..1e44ccb --- /dev/null +++ b/Octopus-v2-IQ4_NL.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ade63584f7c89759b95e177fca1c7bf626372744120d060ce78b15c52c7e3c9 +size 1552407072 diff --git a/Octopus-v2-IQ4_XS.gguf b/Octopus-v2-IQ4_XS.gguf new file mode 100644 index 0000000..03a7eea --- /dev/null +++ b/Octopus-v2-IQ4_XS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f30d1cb9f948b857dfbcae2f74025955e08f42d8fe0ce77455e671c0bd88e5f +size 1490770464 diff --git a/Octopus-v2-Q2_K.gguf b/Octopus-v2-Q2_K.gguf new file mode 100644 index 0000000..d3e16d0 --- /dev/null +++ b/Octopus-v2-Q2_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26ae8d964016ebb2b9fbc7b5f2282ef56c330cc6fe32868b96f58ee5a953e6bc +size 1157962272 diff --git a/Octopus-v2-Q2_K_S.gguf b/Octopus-v2-Q2_K_S.gguf new file mode 100644 index 0000000..87a254e --- /dev/null +++ b/Octopus-v2-Q2_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e3b3704bd118ab5594e5ff9b73cfc74d512dd019a3353146f2dbb465365d8a1 +size 1104681504 diff --git a/Octopus-v2-Q3_K_L.gguf b/Octopus-v2-Q3_K_L.gguf new file mode 100644 index 0000000..ed238c9 --- /dev/null +++ b/Octopus-v2-Q3_K_L.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:81a912ea284a7aa832f31b27b57ac25dceb41c36397a17f5711008147a1ca92f +size 1465629216 diff --git a/Octopus-v2-Q3_K_M.gguf b/Octopus-v2-Q3_K_M.gguf new file mode 100644 index 0000000..8a17990 --- /dev/null +++ b/Octopus-v2-Q3_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ba5b3305e7823340df87e6142d4c9b8a78bdcb3db1d488d6ce8e3eafec64679 +size 1383840288 diff --git a/Octopus-v2-Q3_K_S.gguf b/Octopus-v2-Q3_K_S.gguf new file mode 100644 index 0000000..a60a5b3 --- /dev/null +++ b/Octopus-v2-Q3_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:608eb75a25c2c7a19f99140abf6242e481a46f9e46efea056ec32421b0c06e67 +size 1288018464 diff --git a/Octopus-v2-Q4_0.gguf b/Octopus-v2-Q4_0.gguf new file mode 100644 index 0000000..3979444 --- /dev/null +++ b/Octopus-v2-Q4_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5777de5f00b917225480b47b42b8029c3653b585094e6b9331a3604bdb594ef +size 1555421728 diff --git a/Octopus-v2-Q4_K_M.gguf b/Octopus-v2-Q4_K_M.gguf new file mode 100644 index 0000000..c6113d2 --- /dev/null +++ b/Octopus-v2-Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b05b07e26e90c65f6017190462e7565264f2798b4c02c28dafd59b34a567653d +size 1630300704 diff --git a/Octopus-v2-Q4_K_S.gguf b/Octopus-v2-Q4_K_S.gguf new file mode 100644 index 0000000..04166f6 --- /dev/null +++ b/Octopus-v2-Q4_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c37adb68ead471f7fcb3018f9ee9e58b3e21236e42fc28d817459a925cc42233 +size 1559878176 diff --git a/Octopus-v2-Q5_0.gguf b/Octopus-v2-Q5_0.gguf new file mode 100644 index 0000000..d0f4423 --- /dev/null +++ b/Octopus-v2-Q5_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deef29d8236ea0f8d66739e2f711bc85fa27268947b1680fc1df526db54948cd +size 1803147808 diff --git a/Octopus-v2-Q5_K_M.gguf b/Octopus-v2-Q5_K_M.gguf new file mode 100644 index 0000000..b731f2c --- /dev/null +++ b/Octopus-v2-Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78141788a904b22e08f535195e887ab6dee19f396df2cf057429b7626e742160 +size 1839688224 diff --git a/Octopus-v2-Q5_K_S.gguf b/Octopus-v2-Q5_K_S.gguf new file mode 100644 index 0000000..2ba7170 --- /dev/null +++ b/Octopus-v2-Q5_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce9e388322c4a3e439fd81682c00d8bb05960a06879bb8b167b82550479681c8 +size 1798953504 diff --git a/Octopus-v2-Q6_K.gguf b/Octopus-v2-Q6_K.gguf new file mode 100644 index 0000000..359a12d --- /dev/null +++ b/Octopus-v2-Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:646720dcc3f76995e096b1ebb9c33e15b4fd2067d822e4048dba87ef6c91e630 +size 2062162464 diff --git a/Octopus-v2-Q8_0.gguf b/Octopus-v2-Q8_0.gguf new file mode 100644 index 0000000..93509b4 --- /dev/null +++ b/Octopus-v2-Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba0083af608fc5710030aa21d84d72652cc354aa12c0b94f5fd6221bed1f2a1e +size 2669118656 diff --git a/README.md b/README.md new file mode 100644 index 0000000..0a2388b --- /dev/null +++ b/README.md @@ -0,0 +1,51 @@ +--- +license: other +inference: false +language: +- en +pipeline_tag: text-generation +tags: +- transformers +- gguf +- imatrix +- NexaAIDev +- Octopus-v2 +--- +Quantizations of https://huggingface.co/NexaAIDev/Octopus-v2 + +# From original readme + +## Example Use Cases + + +You can run the model on a GPU using the following code. +```python +from transformers import AutoTokenizer, GemmaForCausalLM +import torch +import time + +def inference(input_text): + start_time = time.time() + input_ids = tokenizer(input_text, return_tensors="pt").to(model.device) + input_length = input_ids["input_ids"].shape[1] + outputs = model.generate( + input_ids=input_ids["input_ids"], + max_length=1024, + do_sample=False) + generated_sequence = outputs[:, input_length:].tolist() + res = tokenizer.decode(generated_sequence[0]) + end_time = time.time() + return {"output": res, "latency": end_time - start_time} + +model_id = "NexaAIDev/Octopus-v2" +tokenizer = AutoTokenizer.from_pretrained(model_id) +model = GemmaForCausalLM.from_pretrained( + model_id, torch_dtype=torch.bfloat16, device_map="auto" +) + +input_text = "Take a selfie for me with front camera" +nexa_query = f"Below is the query from the users, please call the correct function and generate the parameters to call the function.\n\nQuery: {input_text} \n\nResponse:" +start_time = time.time() +print("nexa model result:\n", inference(nexa_query)) +print("latency:", time.time() - start_time," s") +```