[gpt-oss] Add gpt-oss bf16 support

This commit is contained in:
2025-08-13 21:25:57 +08:00
parent 5d2e7edf78
commit 17ea2ec6aa
1232 changed files with 777 additions and 36 deletions

View File

@@ -0,0 +1,986 @@
[
{
"BS": 1,
"L": 2,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 1,
"L": 4,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 1,
"L": 8,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 1,
"L": 16,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 32,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 64,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 128,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 256,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 512,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 1024,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 2048,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 4096,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 8192,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 16384,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 32768,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 1,
"L": 65536,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 2,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 2,
"L": 4,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 2,
"L": 8,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 2,
"L": 16,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 2,
"L": 32,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 64,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 128,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 2,
"L": 256,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 512,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 1024,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 2048,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 4096,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 8192,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 16384,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 32768,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 2,
"L": 65536,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 2,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 4,
"L": 4,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 4,
"L": 8,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 16,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 4,
"L": 32,
"num_kv_splits": 13,
"num_stages": 2
},
{
"BS": 4,
"L": 64,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 4,
"L": 128,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 4,
"L": 256,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 512,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 1024,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 2048,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 4096,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 8192,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 16384,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 4,
"L": 32768,
"num_kv_splits": 16,
"num_stages": 2
},
{
"BS": 8,
"L": 2,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 8,
"L": 4,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 8,
"L": 8,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 8,
"L": 16,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 8,
"L": 32,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 8,
"L": 64,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 8,
"L": 128,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 8,
"L": 256,
"num_kv_splits": 8,
"num_stages": 2
},
{
"BS": 8,
"L": 512,
"num_kv_splits": 13,
"num_stages": 2
},
{
"BS": 8,
"L": 1024,
"num_kv_splits": 13,
"num_stages": 2
},
{
"BS": 8,
"L": 2048,
"num_kv_splits": 13,
"num_stages": 2
},
{
"BS": 8,
"L": 4096,
"num_kv_splits": 13,
"num_stages": 2
},
{
"BS": 8,
"L": 8192,
"num_kv_splits": 13,
"num_stages": 2
},
{
"BS": 8,
"L": 16384,
"num_kv_splits": 13,
"num_stages": 2
},
{
"BS": 16,
"L": 2,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 16,
"L": 4,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 16,
"L": 8,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 16,
"L": 16,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 16,
"L": 32,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 16,
"L": 64,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 16,
"L": 128,
"num_kv_splits": 4,
"num_stages": 2
},
{
"BS": 16,
"L": 256,
"num_kv_splits": 6,
"num_stages": 2
},
{
"BS": 16,
"L": 512,
"num_kv_splits": 13,
"num_stages": 1
},
{
"BS": 16,
"L": 1024,
"num_kv_splits": 13,
"num_stages": 1
},
{
"BS": 16,
"L": 2048,
"num_kv_splits": 13,
"num_stages": 1
},
{
"BS": 16,
"L": 4096,
"num_kv_splits": 13,
"num_stages": 1
},
{
"BS": 16,
"L": 8192,
"num_kv_splits": 13,
"num_stages": 1
},
{
"BS": 32,
"L": 2,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 32,
"L": 4,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 32,
"L": 8,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 32,
"L": 16,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 32,
"L": 32,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 32,
"L": 64,
"num_kv_splits": 2,
"num_stages": 2
},
{
"BS": 32,
"L": 128,
"num_kv_splits": 3,
"num_stages": 2
},
{
"BS": 32,
"L": 256,
"num_kv_splits": 6,
"num_stages": 1
},
{
"BS": 32,
"L": 512,
"num_kv_splits": 6,
"num_stages": 1
},
{
"BS": 32,
"L": 1024,
"num_kv_splits": 6,
"num_stages": 1
},
{
"BS": 32,
"L": 2048,
"num_kv_splits": 6,
"num_stages": 1
},
{
"BS": 32,
"L": 4096,
"num_kv_splits": 13,
"num_stages": 1
},
{
"BS": 64,
"L": 2,
"num_kv_splits": 4,
"num_stages": 1
},
{
"BS": 64,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 64,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 64,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 64,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 64,
"L": 64,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 64,
"L": 128,
"num_kv_splits": 3,
"num_stages": 1
},
{
"BS": 64,
"L": 256,
"num_kv_splits": 3,
"num_stages": 1
},
{
"BS": 64,
"L": 512,
"num_kv_splits": 3,
"num_stages": 1
},
{
"BS": 64,
"L": 1024,
"num_kv_splits": 3,
"num_stages": 1
},
{
"BS": 64,
"L": 2048,
"num_kv_splits": 3,
"num_stages": 1
},
{
"BS": 64,
"L": 2048,
"num_kv_splits": 8,
"num_stages": 1
},
{
"BS": 96,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 64,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 128,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 256,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 512,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 96,
"L": 1024,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 128,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 128,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 128,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 128,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 128,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 128,
"L": 64,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 128,
"L": 128,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 128,
"L": 256,
"num_kv_splits": 3,
"num_stages": 1
},
{
"BS": 128,
"L": 512,
"num_kv_splits": 3,
"num_stages": 1
},
{
"BS": 128,
"L": 1024,
"num_kv_splits": 3,
"num_stages": 1
},
{
"BS": 256,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 256,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 256,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 256,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 256,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 256,
"L": 64,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 256,
"L": 128,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 256,
"L": 256,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 256,
"L": 512,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 512,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 512,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 512,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 512,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 512,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 512,
"L": 64,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 512,
"L": 128,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 512,
"L": 256,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1024,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1024,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1024,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1024,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1024,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1024,
"L": 64,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1024,
"L": 128,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1536,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1536,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1536,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1536,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1536,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 1536,
"L": 64,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 2048,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 2048,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 2048,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 2048,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 2048,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 2048,
"L": 64,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 3072,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 3072,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 3072,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 3072,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 3072,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 4096,
"L": 2,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 4096,
"L": 4,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 4096,
"L": 8,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 4096,
"L": 16,
"num_kv_splits": 2,
"num_stages": 1
},
{
"BS": 4096,
"L": 32,
"num_kv_splits": 2,
"num_stages": 1
}
]