support e4m3 kvcache in qwen2 & add kv scaling facotr json (#2894)

Co-authored-by: bjmsong <bjmsong@126.com>
This commit is contained in:
bjmsong
2025-01-18 11:43:22 +08:00
committed by GitHub
parent 13387e6b7a
commit d3024f4fc8
8 changed files with 227 additions and 9 deletions

View File

@@ -0,0 +1,38 @@
{
"model_type": "qwen",
"kv_cache": {
"dtype": "float8_e4m3fn",
"scaling_factor": {
"0": {
"0": 0.9846,
"1": 0.0645,
"2": 0.0731,
"3": 0.0800,
"4": 0.0748,
"5": 0.0780,
"6": 0.0702,
"7": 0.0894,
"8": 0.0410,
"9": 0.0758,
"10": 0.0556,
"11": 0.0731,
"12": 0.0899,
"13": 0.0780,
"14": 0.1441,
"15": 0.0914,
"16": 0.5614,
"17": 0.1067,
"18": 0.0537,
"19": 0.0658,
"20": 0.0523,
"21": 0.0533,
"22": 0.0699,
"23": 0.0635,
"24": 0.0588,
"25": 0.0884,
"26": 0.0947,
"27": 0.1032
}
}
}
}