5 lines
4.8 KiB
Batchfile
5 lines
4.8 KiB
Batchfile
C:\Quanter\llama.cpp\llama-quantize --tensor-type output.weight=Q8_0 --tensor-type token_embd.weight=F16 --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.attn_k.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.attn_k.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.attn_output.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.attn_output.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.attn_q.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.attn_q.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.attn_v.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.attn_v.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.ffn_down.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.ffn_down.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.ffn_gate.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.ffn_gate.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.ffn_up.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.ffn_up.weight=F16" B:\12B\Naphula--Ancient-Awakening-12B\Ancient-Awakening-12B-f16.gguf B:\12B\Naphula--Ancient-Awakening-12B\Ancient-Awakening-12B-Q8_K_XL.gguf Q8_0
|
|
|
|
C:\Quanter\llama.cpp\llama-quantize --tensor-type output.weight=Q8_0 --tensor-type token_embd.weight=F16 --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.attn_k.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.attn_k.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.attn_output.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.attn_output.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.attn_q.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.attn_q.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.attn_v.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.attn_v.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.ffn_down.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.ffn_down.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.ffn_gate.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.ffn_gate.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.ffn_up.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.ffn_up.weight=F16" B:\12B\Naphula--Ancient-Awakening-12B-MPOA\Ancient-Awakening-12B-MPOA-f16.gguf B:\12B\Naphula--Ancient-Awakening-12B-MPOA\Ancient-Awakening-12B-MPOA-Q8_K_XL.gguf Q8_0
|
|
|
|
C:\Quanter\llama.cpp\llama-quantize --tensor-type output.weight=Q8_0 --tensor-type token_embd.weight=F16 --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.attn_k.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.attn_k.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.attn_output.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.attn_output.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.attn_q.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.attn_q.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.attn_v.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.attn_v.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.ffn_down.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.ffn_down.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.ffn_gate.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.ffn_gate.weight=F16" --tensor-type "blk\.(3|4|5|6|7|8|9|10|11|12|13|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37)\.ffn_up.weight=Q8_0" --tensor-type "blk\.(0|1|2|38|39)\.ffn_up.weight=F16" B:\KrakenSakura-Maelstrom-12B-v1.f16.gguf B:\KrakenSakura-Maelstrom-12B-v1-Q8_K_XL.gguf Q8_0 |