[QUANT] Add GPTQModel Dynamic Quantization + lm_head Quantization (#3790)

Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai>
Co-authored-by: ZX-ModelCloud <zx@modelcloud.ai>
This commit is contained in:
Qubitium-ModelCloud
2025-03-05 17:11:00 +08:00
committed by GitHub
parent 583d6af71b
commit 56a724eba3
56 changed files with 1988 additions and 282 deletions

View File

@@ -12,6 +12,7 @@ suites = {
"models/test_generation_models.py",
"models/test_qwen_models.py",
"models/test_reward_models.py",
"test_gptqmodel_dynamic.py",
"test_abort.py",
"test_chunked_prefill.py",
"test_custom_allreduce.py",