23 lines
844 B
CMake
23 lines
844 B
CMake
|
|
set(VLLM_EXT_SRC
|
||
|
|
"csrc/mamba/mamba_ssm/selective_scan_fwd.cu"
|
||
|
|
"csrc/cache_kernels.cu"
|
||
|
|
"csrc/attention/paged_attention_v1.cu"
|
||
|
|
"csrc/attention/paged_attention_v2.cu"
|
||
|
|
"csrc/attention/merge_attn_states.cu"
|
||
|
|
"csrc/attention/vertical_slash_index.cu"
|
||
|
|
"csrc/pos_encoding_kernels.cu"
|
||
|
|
"csrc/activation_kernels.cu"
|
||
|
|
"csrc/layernorm_kernels.cu"
|
||
|
|
"csrc/fused_qknorm_rope_kernel.cu"
|
||
|
|
"csrc/layernorm_quant_kernels.cu"
|
||
|
|
"csrc/sampler.cu"
|
||
|
|
"csrc/cuda_view.cu"
|
||
|
|
"csrc/quantization/gptq/q_gemm.cu"
|
||
|
|
"csrc/quantization/w8a8/int8/scaled_quant.cu"
|
||
|
|
"csrc/quantization/w8a8/fp8/common.cu"
|
||
|
|
"csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu"
|
||
|
|
"csrc/quantization/gguf/gguf_kernel.cu"
|
||
|
|
"csrc/quantization/activation_kernels.cu"
|
||
|
|
"csrc/cuda_utils_kernels.cu"
|
||
|
|
"csrc/custom_all_reduce.cu"
|
||
|
|
"csrc/torch_bindings.cpp")
|