xc-llm-kunlun/vllm_kunlun/ops/__init__.py

#
# Copyright (c) 2025 Baidu, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is a part of the vllm-ascend project.
#

import vllm_kunlun.ops.rotary_embedding
import vllm_kunlun.ops.layernorm
import vllm_kunlun.ops.quantization.awq
import vllm_kunlun.ops.quantization.gptq
import vllm_kunlun.ops.vocab_parallel_embedding
import vllm_kunlun.ops.linear
# import vllm_kunlun.ops.quantization.kernels.scaled_mm.cutlass
import vllm_kunlun.ops.fused_moe.layer
import vllm_kunlun.ops.quantization.compressed_tensors.compressed_tensors
import vllm_kunlun.ops.quantization.compressed_tensors.compressed_tensors_moe
import vllm_kunlun.ops.quantization.kernels.scaled_mm.kunlun
Initial commit for vLLM-Kunlun Plugin 2025-12-10 12:05:39 +08:00			`#`
			`# Copyright (c) 2025 Baidu, Inc. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
提交vllm0.11.0开发分支 2025-12-10 17:51:24 +08:00			`# This file is a part of the vllm-ascend project.`
Initial commit for vLLM-Kunlun Plugin 2025-12-10 12:05:39 +08:00			`#`

			`import vllm_kunlun.ops.rotary_embedding`
[dev] support AWQ/GPTQ quantization for dense models 2025-12-24 13:45:55 +08:00			`import vllm_kunlun.ops.layernorm`
			`import vllm_kunlun.ops.quantization.awq`
[Feature] Support XiaoMi MIMO Flash V2 (#62) * [Feature] Support MIMO Flash V2 2025-12-31 10:16:33 +08:00			`import vllm_kunlun.ops.quantization.gptq`
remove qwen2.py llama.py fix llama output 2025-12-31 11:31:26 +08:00			`import vllm_kunlun.ops.vocab_parallel_embedding`
[Feature] support deepseek v3/r1/v3.2 (#78) * [Feature] support deepseek v3/r1/v3.2 * fix gpt_oss * update readme * update readme --------- Co-authored-by: hanhaowen <hanhaowen@baidu.com> 2026-01-05 22:55:35 +08:00			`import vllm_kunlun.ops.linear`
[dev] support compressed-tensors w8a8 quantization (#75) * [dev] support compressed-tensors w8a8 quantization Co-authored-by: Li Wei <liwei.109@outlook.com> * [refact]update KunlunScaleMMKernel impl * [rebase]resolve conflicts and remove redundant code --------- Co-authored-by: tangshiwen <tangshiwen@baidu.com> 2026-01-06 13:51:53 +08:00			`# import vllm_kunlun.ops.quantization.kernels.scaled_mm.cutlass`
			`import vllm_kunlun.ops.fused_moe.layer`
			`import vllm_kunlun.ops.quantization.compressed_tensors.compressed_tensors`
			`import vllm_kunlun.ops.quantization.compressed_tensors.compressed_tensors_moe`
			`import vllm_kunlun.ops.quantization.kernels.scaled_mm.kunlun`