[QUANT] Add GPTQModel Dynamic Quantization + lm_head Quantization (#3790)

Signed-off-by: ZX-ModelCloud <zx@modelcloud.ai> Co-authored-by: ZX-ModelCloud <zx@modelcloud.ai>
2025-03-05 17:11:00 +08:00
parent 583d6af71b
commit 56a724eba3
56 changed files with 1988 additions and 282 deletions
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -12,6 +12,7 @@ suites = {
        "models/test_generation_models.py",
        "models/test_qwen_models.py",
        "models/test_reward_models.py",
+        "test_gptqmodel_dynamic.py",
        "test_abort.py",
        "test_chunked_prefill.py",
        "test_custom_allreduce.py",