Sync from v0.13
This commit is contained in:
34
csrc/cuda_view.cu
Normal file
34
csrc/cuda_view.cu
Normal file
@@ -0,0 +1,34 @@
|
||||
#include <torch/all.h>
|
||||
#include <torch/cuda.h>
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
// This function assumes that `cpu_tensor` is a CPU tensor allocated with pinned
|
||||
// memory, and that UVA (Unified Virtual Addressing) is enabled.
|
||||
torch::Tensor get_cuda_view_from_cpu_tensor(torch::Tensor& cpu_tensor) {
|
||||
TORCH_CHECK(cpu_tensor.device().is_cpu(), "Input tensor must be on CPU");
|
||||
|
||||
// Get raw host pointer from CPU tensor
|
||||
void* host_ptr = cpu_tensor.data_ptr();
|
||||
|
||||
// Get a device pointer corresponding to the pinned host memory
|
||||
void* device_ptr = nullptr;
|
||||
cudaError_t err = cudaHostGetDevicePointer(&device_ptr, host_ptr, 0);
|
||||
TORCH_CHECK(err == cudaSuccess,
|
||||
"cudaHostGetDevicePointer failed: ", cudaGetErrorString(err));
|
||||
|
||||
// We'll use the same sizes, strides, and dtype as the CPU tensor.
|
||||
// TODO: check if layout is respected.
|
||||
auto sizes = cpu_tensor.sizes();
|
||||
auto strides = cpu_tensor.strides();
|
||||
auto options = cpu_tensor.options().device(torch::kCUDA);
|
||||
|
||||
// use default no-op deleter, since the memory is owned by the original CPU
|
||||
// tensor
|
||||
torch::Tensor cuda_tensor =
|
||||
torch::from_blob(device_ptr, sizes, strides, options);
|
||||
|
||||
TORCH_CHECK(cuda_tensor.device().is_cuda(),
|
||||
"Resulting tensor is not on CUDA device");
|
||||
|
||||
return cuda_tensor;
|
||||
}
|
||||
Reference in New Issue
Block a user