AITER backend extension and workload optimizations (#6838)

Co-authored-by: wunhuang <wunhuang@amd.com>
Co-authored-by: Hubert Lu <Hubert.Lu@amd.com>
This commit is contained in:
HAI
2025-06-05 23:00:18 -07:00
committed by GitHub
parent 562f279a2d
commit b819381fec
12 changed files with 583 additions and 164 deletions

View File

@@ -171,7 +171,7 @@ class TestNightlyGsm8KEval(unittest.TestCase):
os.environ["HF_HUB_DISABLE_XET"] = (
"1" if model in DISABLE_HF_XET_MODELS else "0"
)
os.environ["SGLANG_AITER_MOE"] = (
os.environ["SGLANG_USE_AITER"] = (
"0" if model in TRITON_MOE_MODELS else "1"
)