support multi npu partially

This commit is contained in:
starkwj
2026-01-08 06:54:33 +00:00
parent fa0fb46853
commit 2a571d8bc8
12 changed files with 331 additions and 160 deletions

View File

@@ -20,6 +20,7 @@
#include <atomic>
#include "idle_offload/shm_worker.h"
#include "idle_offload/npu_helper.h"
extern "C" {
@@ -474,8 +475,10 @@ static PyObject* python_create_and_map(PyObject* self, PyObject* args) {
static PyObject* py_init_module_offload(PyObject* self, PyObject* args) {
PyObject* malloc_callback = nullptr;
PyObject* free_callback = nullptr;
unsigned long long device = 0;
if (!PyArg_ParseTuple(args, "OO", &malloc_callback, &free_callback)) {
if (!PyArg_ParseTuple(args, "OOK", &malloc_callback, &free_callback,
&device)) {
return nullptr;
}
@@ -497,7 +500,13 @@ static PyObject* py_init_module_offload(PyObject* self, PyObject* args) {
}
g_initialized.store(true);
shm_worker = new ShmWorker();
std::vector<int> gpu_ids = get_npu_ids();
if (device >= gpu_ids.size()) {
throw std::runtime_error("Invalid device id: " + std::to_string(device) +
" " + __FILE__ + ":" + std::to_string(__LINE__));
}
int gpu_id = gpu_ids[device];
// get pid
aclError error_code;
int32_t pid;
@@ -508,11 +517,12 @@ static PyObject* py_init_module_offload(PyObject* self, PyObject* args) {
std::to_string(error_code) + " " + __FILE__ + ":" +
std::to_string(__LINE__));
}
shm_worker = new ShmWorker();
uint64_t shareable_handle;
shm_worker->register_worker(pid, &shareable_handle, &g_size);
shm_worker->register_worker(pid, gpu_id, &shareable_handle, &g_size);
// import shareable handle
uint32_t device = 0;
aclrtDrvMemHandle memHandle;
error_code =
aclrtMemImportFromShareableHandle(shareable_handle, device, &memHandle);
@@ -570,9 +580,16 @@ static PyObject* python_get_mem_info_offload(PyObject* self, PyObject* args) {
return tuple;
}
static PyObject* python_lock_gpu_offload(PyObject* self, PyObject* args) {
bool prev_is_self = shm_worker->lock_gpu();
return PyBool_FromLong(prev_is_self);
static PyObject* python_try_lock_gpu_offload(PyObject* self, PyObject* args) {
bool prev_is_self = false;
bool success = shm_worker->try_lock_gpu(prev_is_self);
PyObject* tuple = PyTuple_New(2);
if (!tuple) {
return nullptr;
}
PyTuple_SetItem(tuple, 0, PyBool_FromLong(success));
PyTuple_SetItem(tuple, 1, PyBool_FromLong(prev_is_self));
return tuple;
}
static PyObject* python_unlock_gpu_offload(PyObject* self, PyObject* args) {
@@ -597,7 +614,7 @@ static PyMethodDef module_methods[] = {
"Unmap and release memory on the device."},
{"python_get_mem_info_offload", (PyCFunction)python_get_mem_info_offload,
METH_NOARGS, "Get mem info in the reserved pool."},
{"python_lock_gpu_offload", (PyCFunction)python_lock_gpu_offload,
{"python_try_lock_gpu_offload", (PyCFunction)python_try_lock_gpu_offload,
METH_NOARGS, "Lock GPU."},
{"python_unlock_gpu_offload", (PyCFunction)python_unlock_gpu_offload,
METH_NOARGS, "Unlock GPU."},