Files
enginex-c_series-vllm/vllm/attention/backends/configs/tp8_merge.json

986 lines
16 KiB
JSON

[
{
"BS": 1,
"L": 2,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 1,
"L": 4,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 1,
"L": 8,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 1,
"L": 16,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 32,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 64,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 128,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 256,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 512,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 1024,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 2048,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 4096,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 8192,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 16384,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 32768,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 65536,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 2,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 2,
"L": 4,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 2,
"L": 8,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 2,
"L": 16,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 2,
"L": 32,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 64,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 128,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 2,
"L": 256,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 512,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 1024,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 2048,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 4096,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 8192,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 16384,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 32768,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 65536,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 2,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 4,
"L": 4,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 4,
"L": 8,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 16,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 4,
"L": 32,
"num_kv_splits": 13,
"num_stages": 2
},
{
"BS": 4,
"L": 64,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 4,
"L": 128,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 4,
"L": 256,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 512,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 1024,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 2048,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 4096,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 8192,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 16384,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 32768,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 8,
"L": 2,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 8,
"L": 4,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 8,
"L": 8,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 8,
"L": 16,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 8,
"L": 32,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 8,
"L": 64,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 8,
"L": 128,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 8,
"L": 256,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 8,
"L": 512,
"num_kv_splits": 13,
"num_stages": 2
},
{
"BS": 8,
"L": 1024,
"num_kv_splits": 13,
"num_stages": 2
},
{
"BS": 8,
"L": 2048,
"num_kv_splits": 13,
"num_stages": 2
},
{
"BS": 8,
"L": 4096,
"num_kv_splits": 13,
"num_stages": 2
},
{
"BS": 8,
"L": 8192,
"num_kv_splits": 13,
"num_stages": 2
},
{
"BS": 8,
"L": 16384,
"num_kv_splits": 13,
"num_stages": 2
},
{
"BS": 16,
"L": 2,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 16,
"L": 4,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 16,
"L": 8,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 16,
"L": 16,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 16,
"L": 32,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 16,
"L": 64,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 16,
"L": 128,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 16,
"L": 256,
"num_kv_splits": 6,
"num_stages": 2
},
{
"BS": 16,
"L": 512,
"num_kv_splits": 13,
"num_stages": 1
},
{
"BS": 16,
"L": 1024,
"num_kv_splits": 13,
"num_stages": 1
},
{
"BS": 16,
"L": 2048,
"num_kv_splits": 13,
"num_stages": 1
},
{
"BS": 16,
"L": 4096,
"num_kv_splits": 13,
"num_stages": 1
},
{
"BS": 16,
"L": 8192,
"num_kv_splits": 13,
"num_stages": 1
},
{
"BS": 32,
"L": 2,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 32,
"L": 4,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 32,
"L": 8,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 32,
"L": 16,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 32,
"L": 32,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 32,
"L": 64,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 32,
"L": 128,
"num_kv_splits": 3,
"num_stages": 2
},
{
"BS": 32,
"L": 256,
"num_kv_splits": 6,
"num_stages": 1
},
{
"BS": 32,
"L": 512,
"num_kv_splits": 6,
"num_stages": 1
},
{
"BS": 32,
"L": 1024,
"num_kv_splits": 6,
"num_stages": 1
},
{
"BS": 32,
"L": 2048,
"num_kv_splits": 6,
"num_stages": 1
},
{
"BS": 32,
"L": 4096,
"num_kv_splits": 13,
"num_stages": 1
},
{
"BS": 64,
"L": 2,
"num_kv_splits": 4,
"num_stages": 1
},
{
"BS": 64,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 64,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 64,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 64,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 64,
"L": 64,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 64,
"L": 128,
"num_kv_splits": 3,
"num_stages": 1
},
{
"BS": 64,
"L": 256,
"num_kv_splits": 3,
"num_stages": 1
},
{
"BS": 64,
"L": 512,
"num_kv_splits": 3,
"num_stages": 1
},
{
"BS": 64,
"L": 1024,
"num_kv_splits": 3,
"num_stages": 1
},
{
"BS": 64,
"L": 2048,
"num_kv_splits": 3,
"num_stages": 1
},
{
"BS": 64,
"L": 2048,
"num_kv_splits": 8,
"num_stages": 1
},
{
"BS": 96,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 64,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 128,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 256,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 512,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 1024,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 128,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 128,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 128,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 128,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 128,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 128,
"L": 64,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 128,
"L": 128,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 128,
"L": 256,
"num_kv_splits": 3,
"num_stages": 1
},
{
"BS": 128,
"L": 512,
"num_kv_splits": 3,
"num_stages": 1
},
{
"BS": 128,
"L": 1024,
"num_kv_splits": 3,
"num_stages": 1
},
{
"BS": 256,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 256,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 256,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 256,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 256,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 256,
"L": 64,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 256,
"L": 128,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 256,
"L": 256,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 256,
"L": 512,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 512,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 512,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 512,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 512,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 512,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 512,
"L": 64,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 512,
"L": 128,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 512,
"L": 256,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1024,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1024,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1024,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1024,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1024,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1024,
"L": 64,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1024,
"L": 128,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1536,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1536,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1536,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1536,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1536,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1536,
"L": 64,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 2048,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 2048,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 2048,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 2048,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 2048,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 2048,
"L": 64,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 3072,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 3072,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 3072,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 3072,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 3072,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 4096,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 4096,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 4096,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 4096,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 4096,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
}
]