[1/2] Speed up trtllm_mla attention backend (>10% e2e) (#10473)

2025-09-16 02:53:21 +08:00
parent 5c08d7d21d
commit 3b25dc127a
6 changed files with 119 additions and 3 deletions
--- a/test/srt/models/test_generation_models.py
+++ b/test/srt/models/test_generation_models.py
@@ -67,11 +67,8 @@ ALL_MODELS = [
    ModelCase("openai-community/gpt2"),
    ModelCase("microsoft/phi-1_5", trust_remote_code=True),
    ModelCase("adept/persimmon-8b-chat"),
-
    ModelCase("upstage/SOLAR-10.7B-Instruct-v1.0"),
-
    ModelCase("inclusionAI/Ling-lite", trust_remote_code=True),
-
    ModelCase("microsoft/Phi-3-small-8k-instruct", trust_remote_code=True),
    ModelCase("allenai/OLMo-2-1124-7B-Instruct", skip_long_prompt=True),
    ModelCase("ibm-granite/granite-3.0-2b-instruct", skip_long_prompt=True),