support multi npu partially
This commit is contained in:
@@ -20,6 +20,7 @@
|
||||
#include <atomic>
|
||||
|
||||
#include "idle_offload/shm_worker.h"
|
||||
#include "idle_offload/npu_helper.h"
|
||||
|
||||
extern "C" {
|
||||
|
||||
@@ -474,8 +475,10 @@ static PyObject* python_create_and_map(PyObject* self, PyObject* args) {
|
||||
static PyObject* py_init_module_offload(PyObject* self, PyObject* args) {
|
||||
PyObject* malloc_callback = nullptr;
|
||||
PyObject* free_callback = nullptr;
|
||||
unsigned long long device = 0;
|
||||
|
||||
if (!PyArg_ParseTuple(args, "OO", &malloc_callback, &free_callback)) {
|
||||
if (!PyArg_ParseTuple(args, "OOK", &malloc_callback, &free_callback,
|
||||
&device)) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -497,7 +500,13 @@ static PyObject* py_init_module_offload(PyObject* self, PyObject* args) {
|
||||
}
|
||||
g_initialized.store(true);
|
||||
|
||||
shm_worker = new ShmWorker();
|
||||
std::vector<int> gpu_ids = get_npu_ids();
|
||||
if (device >= gpu_ids.size()) {
|
||||
throw std::runtime_error("Invalid device id: " + std::to_string(device) +
|
||||
" " + __FILE__ + ":" + std::to_string(__LINE__));
|
||||
}
|
||||
int gpu_id = gpu_ids[device];
|
||||
|
||||
// get pid
|
||||
aclError error_code;
|
||||
int32_t pid;
|
||||
@@ -508,11 +517,12 @@ static PyObject* py_init_module_offload(PyObject* self, PyObject* args) {
|
||||
std::to_string(error_code) + " " + __FILE__ + ":" +
|
||||
std::to_string(__LINE__));
|
||||
}
|
||||
|
||||
shm_worker = new ShmWorker();
|
||||
uint64_t shareable_handle;
|
||||
shm_worker->register_worker(pid, &shareable_handle, &g_size);
|
||||
shm_worker->register_worker(pid, gpu_id, &shareable_handle, &g_size);
|
||||
|
||||
// import shareable handle
|
||||
uint32_t device = 0;
|
||||
aclrtDrvMemHandle memHandle;
|
||||
error_code =
|
||||
aclrtMemImportFromShareableHandle(shareable_handle, device, &memHandle);
|
||||
@@ -570,9 +580,16 @@ static PyObject* python_get_mem_info_offload(PyObject* self, PyObject* args) {
|
||||
return tuple;
|
||||
}
|
||||
|
||||
static PyObject* python_lock_gpu_offload(PyObject* self, PyObject* args) {
|
||||
bool prev_is_self = shm_worker->lock_gpu();
|
||||
return PyBool_FromLong(prev_is_self);
|
||||
static PyObject* python_try_lock_gpu_offload(PyObject* self, PyObject* args) {
|
||||
bool prev_is_self = false;
|
||||
bool success = shm_worker->try_lock_gpu(prev_is_self);
|
||||
PyObject* tuple = PyTuple_New(2);
|
||||
if (!tuple) {
|
||||
return nullptr;
|
||||
}
|
||||
PyTuple_SetItem(tuple, 0, PyBool_FromLong(success));
|
||||
PyTuple_SetItem(tuple, 1, PyBool_FromLong(prev_is_self));
|
||||
return tuple;
|
||||
}
|
||||
|
||||
static PyObject* python_unlock_gpu_offload(PyObject* self, PyObject* args) {
|
||||
@@ -597,7 +614,7 @@ static PyMethodDef module_methods[] = {
|
||||
"Unmap and release memory on the device."},
|
||||
{"python_get_mem_info_offload", (PyCFunction)python_get_mem_info_offload,
|
||||
METH_NOARGS, "Get mem info in the reserved pool."},
|
||||
{"python_lock_gpu_offload", (PyCFunction)python_lock_gpu_offload,
|
||||
{"python_try_lock_gpu_offload", (PyCFunction)python_try_lock_gpu_offload,
|
||||
METH_NOARGS, "Lock GPU."},
|
||||
{"python_unlock_gpu_offload", (PyCFunction)python_unlock_gpu_offload,
|
||||
METH_NOARGS, "Unlock GPU."},
|
||||
|
||||
Reference in New Issue
Block a user