enginex-mlu370-vllm

EngineX-Cambricon/enginex-mlu370-vllm

Fork 1

Files

History

Chranos 79dfc69789 add ops

2026-02-04 17:39:32 +08:00

benchmark_active.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_apply_rotary.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_attention_project.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_batch_matmul.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_dequant_from_linear_cache.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_dequant_from_paged_cache.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_ffn.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_flash_attn.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_fused_layer_norm.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_fused_moe.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_fused_norm_attention_project.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_fused_norm_residual_ffn.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_fused_rms_norm.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_fused_rope.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_group_gemm.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_matmul.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_moe_active.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_moe_cast_gating.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_moe_combine_result.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_moe_expand_input.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_moe_gen_idx.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_moe_quantize.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_moe_softmax_topk.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_offline_quant_to_linear_cache.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_offline_quant_to_paged_cache.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_per_token_smooth_quantize.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_preload.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_quant_to_linear_cache.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_quantize.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_reshape_linear_cache.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_reshape_paged_cache.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_single_query_cached_kv_attn.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_single_query_mixed_cached_kv_attn.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_smooth_quant_matmul.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_test.sh

add ops

2026-02-04 17:39:32 +08:00

benchmark_update_out_and_lse.py

add ops

2026-02-04 17:39:32 +08:00

benchmark_weight_only_quant_matmul.py

add ops

2026-02-04 17:39:32 +08:00

common.py

add ops

2026-02-04 17:39:32 +08:00

README.md

add ops

2026-02-04 17:39:32 +08:00

README.md

benchmark测试脚本使用方式

Torch-MLU-Ops benchmark测试脚本为用户提供了进行算子性能测试的便捷入口。用户可通过以下命令获取各个参数的含义。

# 测试命令帮助
python3 benchmark_xxx.py --help

各个参数含义如下：

options:

-h, --help show this help message and exit
--repeat_times REPEAT_TIMES repeat times for testing
--csv write the report data to csv
-o O specify the output folder name under --csv mode

# 测试命令示例如下
python3 benchmark_active.py --repeat_times 10 --csv -o './active/'

支持如下算子：

op_name
active
apply_rotary
attention_project
ffn
flash_attn
fused_layer_norm
fused_moe
fused_norm_attention_project
fused_norm_residual_ffn
fused_rms_norm
group_gemm
matmul
offline_quant_to_linear_cache
per_token_smooth_quantize
preload
quantize
reshape_linear_cache
quant_to_linear_cache
reshape_paged_cache
single_query_cached_kv_attn
smooth_quant_matmul
weight_only_quant_matmul
moe_gen_idx
moe_expand_input
moe_softmax_topk
moe_combine_result