* CUDA: add set rows for f32 and f16 * Review: change kernel params, use strides from host * Use 1-d kernel * Review: use int64_t for blockDim.x, rename nb->s for clarity
8 lines
155 B
Plaintext
8 lines
155 B
Plaintext
#pragma once
|
|
|
|
#include "common.cuh"
|
|
|
|
#define CUDA_SET_ROWS_BLOCK_SIZE 256
|
|
|
|
void ggml_cuda_op_set_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|