support e4m3 kvcache in qwen2 & add kv scaling facotr json (#2894)
Co-authored-by: bjmsong <bjmsong@126.com>
This commit is contained in:
38
test/srt/kv_cache_scales_qwen2_1_5b.json
Normal file
38
test/srt/kv_cache_scales_qwen2_1_5b.json
Normal file
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"model_type": "qwen",
|
||||
"kv_cache": {
|
||||
"dtype": "float8_e4m3fn",
|
||||
"scaling_factor": {
|
||||
"0": {
|
||||
"0": 0.9846,
|
||||
"1": 0.0645,
|
||||
"2": 0.0731,
|
||||
"3": 0.0800,
|
||||
"4": 0.0748,
|
||||
"5": 0.0780,
|
||||
"6": 0.0702,
|
||||
"7": 0.0894,
|
||||
"8": 0.0410,
|
||||
"9": 0.0758,
|
||||
"10": 0.0556,
|
||||
"11": 0.0731,
|
||||
"12": 0.0899,
|
||||
"13": 0.0780,
|
||||
"14": 0.1441,
|
||||
"15": 0.0914,
|
||||
"16": 0.5614,
|
||||
"17": 0.1067,
|
||||
"18": 0.0537,
|
||||
"19": 0.0658,
|
||||
"20": 0.0523,
|
||||
"21": 0.0533,
|
||||
"22": 0.0699,
|
||||
"23": 0.0635,
|
||||
"24": 0.0588,
|
||||
"25": 0.0884,
|
||||
"26": 0.0947,
|
||||
"27": 0.1032
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user