support e4m3 kvcache in qwen2 & add kv scaling facotr json (#2894)

Co-authored-by: bjmsong <bjmsong@126.com>
This commit is contained in:
bjmsong
2025-01-18 11:43:22 +08:00
committed by GitHub
parent 13387e6b7a
commit d3024f4fc8
8 changed files with 227 additions and 9 deletions

View File

@@ -52,6 +52,7 @@ suites = {
"test_vision_openai_server.py",
"test_w8a8_quantization.py",
"test_session_control.py",
"test_fp8_kvcache.py",
],
"nightly": [
"test_nightly_gsm8k_eval.py",