model: add support for qwen3vl series (#16780)
* support qwen3vl series. Co-authored-by: Thireus ☠ <Thireus@users.noreply.github.com> Co-authored-by: yairpatch <yairpatch@users.noreply.github.com> Co-authored-by: LETS-BEE <LETS-BEE@users.noreply.github.com> * bugfix: fix the arch check for qwen3vl-moe. * use build_ffn * optimize deepstack structure * optimize deepstack feature saving * Revert "optimize deepstack feature saving" for temporal fix This reverts commit f321b9fdf13e59527408152e73b1071e19a87e71. * code clean * use fused qkv in clip * clean up / rm is_deepstack_layers for simplification * add test model * move test model to "big" section * fix imrope check * remove trailing whitespace * fix rope fail * metal : add imrope support * add imrope support for sycl * vulkan: add imrope w/o check * fix vulkan * webgpu: add imrope w/o check * Update gguf-py/gguf/tensor_mapping.py Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * fix tensor mapping --------- Co-authored-by: Thireus ☠ <Thireus@users.noreply.github.com> Co-authored-by: yairpatch <yairpatch@users.noreply.github.com> Co-authored-by: LETS-BEE <LETS-BEE@users.noreply.github.com> Co-authored-by: Xuan Son Nguyen <son@huggingface.co> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
@@ -7076,7 +7076,12 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_rope(type, {128, 28, 2, 1}, 128, GGML_ROPE_TYPE_MROPE, 512, fs, ef, af, ff, v, fw)); // rope_multi,m-rope (qwen2vl 7B)
|
||||
test_cases.emplace_back(new test_rope(type, {128, 12, 2, 1}, 20, GGML_ROPE_TYPE_MROPE, 512, fs, ef, af, ff, v, fw));
|
||||
test_cases.emplace_back(new test_rope(type, {128, 28, 2, 1}, 32, GGML_ROPE_TYPE_MROPE, 512, fs, ef, af, ff, v, fw));
|
||||
test_cases.emplace_back(new test_rope(type, {128, 12, 2, 1}, 128, GGML_ROPE_TYPE_IMROPE, 512, fs, ef, af, ff, v, fw)); // rope_multi,imrope (qwen3vl 2B)
|
||||
test_cases.emplace_back(new test_rope(type, {128, 28, 2, 1}, 128, GGML_ROPE_TYPE_IMROPE, 512, fs, ef, af, ff, v, fw)); // rope_multi,imrope (qwen3vl 7B)
|
||||
test_cases.emplace_back(new test_rope(type, {128, 12, 2, 1}, 20, GGML_ROPE_TYPE_IMROPE, 512, fs, ef, af, ff, v, fw));
|
||||
test_cases.emplace_back(new test_rope(type, {128, 28, 2, 1}, 32, GGML_ROPE_TYPE_IMROPE, 512, fs, ef, af, ff, v, fw));
|
||||
test_cases.emplace_back(new test_rope(type, { 80, 16, 2, 1}, 80, GGML_ROPE_TYPE_VISION, 512, fs, ef, af, ff, v, fw)); // rope_multi,m-rope (qwen2vl ViT)
|
||||
test_cases.emplace_back(new test_rope(type, {128, 16, 2, 1}, 128, GGML_ROPE_TYPE_IMROPE, 512, fs, ef, af, ff, v, fw)); // rope_multi,m-rope (qwen3vl)
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_rope(type, { 64, 128, 2, 1}, 64, GGML_ROPE_TYPE_NEOX, 512, fs, ef, af, ff, v, fw)); // neox (falcon 40B)
|
||||
@@ -7092,7 +7097,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
|
||||
// single inplace test per type/mode/ff
|
||||
for (ggml_type type : {GGML_TYPE_F32, GGML_TYPE_F16}) {
|
||||
for (int mode : {GGML_ROPE_TYPE_NORMAL, GGML_ROPE_TYPE_NEOX, GGML_ROPE_TYPE_MROPE, GGML_ROPE_TYPE_VISION}) {
|
||||
for (int mode : {GGML_ROPE_TYPE_NORMAL, GGML_ROPE_TYPE_NEOX, GGML_ROPE_TYPE_MROPE, GGML_ROPE_TYPE_IMROPE, GGML_ROPE_TYPE_VISION}) {
|
||||
for (bool ff : {false, true}) {
|
||||
test_cases.emplace_back(new test_rope(type, {128, 32, 2, 1}, 128, mode, 512, 1.4245f, 0.7465f, 1.4245f, ff, 0, true, true));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user