[Model] Adding Qwen3 and Qwen3MoE (#4693)

This commit is contained in:
yhyang201
2025-04-19 00:51:29 +08:00
committed by GitHub
parent bfa3922451
commit 4db463b1ad
5 changed files with 780 additions and 14 deletions

View File

@@ -100,8 +100,11 @@ class FlashInferAttnBackend(AttentionBackend):
self.num_wrappers = 1
self.dispatch_reason = None
# Qwen2 models require higher flashinfer workspace size
if "Qwen2ForCausalLM" in model_runner.model_config.hf_config.architectures:
# Qwen2/Qwen3 models require higher flashinfer workspace size
if (
"Qwen2ForCausalLM" in model_runner.model_config.hf_config.architectures
or "Qwen3ForCausalLM" in model_runner.model_config.hf_config.architectures
):
global_config.flashinfer_workspace_size = 512 * 1024 * 1024
# Allocate buffers