Support compressed tensors fp8w8a8 (#4743)

This commit is contained in:
Xiaoyu Zhang
2025-03-27 04:21:25 +08:00
committed by GitHub
parent 45fdf1f7f3
commit 04e3ff6975
30 changed files with 2386 additions and 113 deletions

View File

@@ -23,16 +23,12 @@ suites = {
TestFile("models/test_reward_models.py", 83),
TestFile("models/test_gme_qwen_models.py", 45),
TestFile("test_abort.py", 51),
TestFile("test_awq.py"),
TestFile("test_block_int8.py", 22),
TestFile("test_chunked_prefill.py", 336),
TestFile("test_eagle_infer.py", 447),
TestFile("test_ebnf_constrained.py"),
TestFile("test_fp8_kernel.py", 2),
TestFile("test_embedding_openai_server.py", 36),
TestFile("test_expert_distribution.py", 31),
TestFile("test_gguf.py", 78),
TestFile("test_gptqmodel_dynamic.py", 72),
TestFile("test_hidden_states.py", 55),
TestFile("test_int8_kernel.py", 1),
TestFile("test_input_embeddings.py", 38),
@@ -82,6 +78,12 @@ suites = {
"nightly": [
TestFile("test_nightly_gsm8k_eval.py"),
],
"vllm_dependency_test": [
TestFile("test_vllm_dependency.py"),
TestFile("test_awq.py"),
TestFile("test_gguf.py", 78),
TestFile("test_gptqmodel_dynamic.py", 72),
],
}