add qwen3

This commit is contained in:
Chranos
2026-02-04 17:22:39 +08:00
parent d1c0f68ab4
commit 8511fe8530
1932 changed files with 300426 additions and 0 deletions

View File

@@ -0,0 +1,76 @@
import torch
import sys
import ray
import gc
import contextlib
import numpy as np
import os
os.environ['EXPERT_PARALLEL_EN'] = "True"
from vllm import LLM, SamplingParams
from vllm.platforms import current_platform
def string_list_to_float(text_list: list):
'''
convert string list to float list
'''
txt = np.array(text_list)
max_len = max(len(s) for s in txt)
string_to_float = lambda s: np.array([ord(char) for char in s.ljust(max_len)])
txt_char = np.array([string_to_float(s) for s in txt])
txt_float = txt_char.astype('float32')
return txt_float
def compute_diff_text(baseline_text: list, compare_text: list):
'''
compute the outputs diff1 and diff2
'''
baseline = string_list_to_float(baseline_text)
compare = string_list_to_float(compare_text)
error = np.abs(baseline - compare)
diff1 = np.sum(error) / np.sum(np.abs(baseline))
diff2 = np.sqrt(np.sum(error**2)/np.sum(baseline**2))
return diff1, diff2
def cleanup():
'''Release occupied resources and reset parallel_state'''
from examples.cambricon_custom_func.vllm.mlu_hijack.distributed.parallel_state import destroy_model_parallel
destroy_model_parallel()
from vllm.distributed import destroy_distributed_environment
destroy_distributed_environment()
with contextlib.suppress(AssertionError):
torch.distributed.destroy_process_group()
gc.collect()
if not current_platform.is_cpu():
torch.cuda.empty_cache()
if ray.is_initialized():
ray.shutdown()
def run_vllm(prompts, sampling_params, tp, mtp=-1, mep=-1, model_dir="/data/AE/llm/models/Qwen1.5-MoE-A2.7B/"):
'''Run LLM'''
llm = LLM(model=model_dir,
enforce_eager=True,
tensor_parallel_size=tp,
moe_tp_size=mtp,
moe_ep_size=mep)
outputs = llm.generate(prompts, sampling_params)
return outputs
def test_expert_parallel():
"""Compare the output results of tp4 and mtp=1, 2"""
qwen2_moe_model_dir = "/data/AE/llm/models/Qwen1.5-MoE-A2.7B"
eps = 1e-6
prompts = [
"Hello, my name is",
]
sampling_params = SamplingParams(temperature=0.8, max_tokens=1)
outputs_1 = run_vllm(prompts, sampling_params, tp=2, mtp=1, model_dir=qwen2_moe_model_dir)
cleanup()
outputs_2 = run_vllm(prompts, sampling_params, tp=2, mtp=2, model_dir=qwen2_moe_model_dir)
cleanup()
generated_text_1 = [output.outputs[0].text for output in outputs_1]
generated_text_2 = [output.outputs[0].text for output in outputs_2]
diff1, diff2 = compute_diff_text(generated_text_1, generated_text_2)
assert diff1 <= eps and diff2 <= eps, (
f"qwen2_moe generated_1({generated_text_1}) and generated_2{generated_text_2} diff error")