#include #include #include #include #include #include #include #include #include #include #include "acl/acl.h" #include "shm_manager.h" #include "npu_helper.h" #include "spdlog/spdlog.h" static constexpr size_t reserved_mem_size = 8ul * 1024 * 1024 * 1024; // 8GB static ShmManager *shm_manager = nullptr; void handle_signal(int sig) { if (shm_manager) { shm_manager->stop_busy_loop(); } } void install_signal_handlers() { struct sigaction sa{}; sa.sa_handler = handle_signal; sigemptyset(&sa.sa_mask); sa.sa_flags = 0; sigaction(SIGINT, &sa, nullptr); sigaction(SIGTERM, &sa, nullptr); sigaction(SIGHUP, &sa, nullptr); } void ensure_context(unsigned long long device) { aclrtContext pctx; aclrtGetCurrentContext(&pctx); if (!pctx) { // Ensure device context. aclrtCreateContext(&pctx, device); aclrtSetCurrentContext(pctx); } } void init_acl() { int32_t deviceId=0; aclError ret = aclrtSetDevice(deviceId); if (ret != ACL_ERROR_NONE) { throw std::runtime_error("aclrtSetDevice failed with acl error code: " + std::to_string(ret) + " " + __FILE__ + ":" + std::to_string(__LINE__)); } } void reset_pids(const std::vector &pids, const std::vector &shareable_handles) { int cnt = pids.size(); if (cnt <= 0) { return; } int32_t pids_data[cnt]; memcpy(pids_data, pids.data(), cnt * sizeof(int32_t)); for (int i = 0; i < shareable_handles.size(); ++i) { uint64_t shareable_handle = shareable_handles[i]; aclError error_code = aclrtMemSetPidToShareableHandle(shareable_handle, pids_data, cnt); if (error_code != 0) { spdlog::error("aclrtMemSetPidToShareableHandle failed, error_code: {}", error_code); throw std::runtime_error("aclrtMemSetPidToShareableHandle failed"); } } spdlog::info("aclrtMemSetPidToShareableHandle succeeded, num_pids: {}", cnt); } void alloc_physical(uint32_t device_id, aclrtDrvMemHandle &out_mem_handle, size_t &out_g_size) { aclError error_code; size_t free_mem = 0, total = 0; error_code = aclrtGetMemInfo(ACL_HBM_MEM, &free_mem, &total); if (error_code != 0) { spdlog::error("aclrtGetMemInfo failed, error_code: {}", error_code); throw std::runtime_error("aclrtGetMemInfo failed"); } else { spdlog::info("aclrtGetMemInfo succeeded, free_mem: {}, total: {}", free_mem, total); } aclrtPhysicalMemProp prop = {}; prop.handleType = ACL_MEM_HANDLE_TYPE_NONE; prop.allocationType = ACL_MEM_ALLOCATION_TYPE_PINNED; prop.memAttr = ACL_HBM_MEM_HUGE; prop.location.id = device_id; prop.location.type = ACL_MEM_LOCATION_TYPE_DEVICE; prop.reserve = 0; size_t granularity; error_code = aclrtMemGetAllocationGranularity( &prop, ACL_RT_MEM_ALLOC_GRANULARITY_MINIMUM, &granularity); if (error_code != 0) { spdlog::error("aclrtMemGetAllocationGranularity failed, error_code: {}", error_code); throw std::runtime_error("aclrtMemGetAllocationGranularity failed"); } else { spdlog::info("aclrtMemGetAllocationGranularity succeeded, granularity: {}", granularity); } if (free_mem < reserved_mem_size) { spdlog::error("Not enough free memory to reserve: {}, free_mem: {}", reserved_mem_size, free_mem); throw std::runtime_error("Not enough free memory to reserve"); } out_g_size = free_mem - reserved_mem_size; out_g_size = (out_g_size / granularity) * granularity; // allocate physical memory error_code = aclrtMallocPhysical(&out_mem_handle, out_g_size, &prop, 0); if (error_code != 0) { spdlog::error("aclrtMallocPhysical failed, error_code: {}", error_code); throw std::runtime_error("aclrtMallocPhysical failed"); } else { spdlog::info("device {} aclrtMallocPhysical succeeded, size: {}", device_id, out_g_size); } } void start_daemon() { init_acl(); std::vector npu_ids = get_npu_ids(); std::vector mem_handles; std::vector shareable_handles; // shm shm_manager = new ShmManager(); for (int i = 0; i < npu_ids.size(); ++i) { uint32_t device_id = i; int npu_id = npu_ids[i]; spdlog::info("Setting up device id {} - npu id {}", device_id, npu_id); aclError error_code = aclrtSetDevice(device_id); if (error_code != ACL_ERROR_NONE) { throw std::runtime_error("aclrtSetDevice failed with acl error code: " + std::to_string(error_code) + " " + __FILE__ + ":" + std::to_string(__LINE__)); } // alloc physical aclrtDrvMemHandle mem_handle; size_t g_size; alloc_physical(device_id, mem_handle, g_size); mem_handles.push_back(mem_handle); // export uint64_t shareable_handle; error_code = aclrtMemExportToShareableHandle( mem_handle, ACL_MEM_HANDLE_TYPE_NONE, ACL_RT_VMM_EXPORT_FLAG_DEFAULT, &shareable_handle); if (error_code != 0) { spdlog::error("aclrtMemExportToShareableHandle failed, error_code: {}", error_code); throw std::runtime_error("aclrtMemExportToShareableHandle failed"); } else { spdlog::info( "aclrtMemExportToShareableHandle succeeded, shareable_handle: {}", shareable_handle); } shm_manager->set_gpu_info(npu_id, g_size, shareable_handle); shareable_handles.push_back(shareable_handle); } shm_manager->register_callback_on_worker_change( [&](const std::vector &pids) { reset_pids(pids, shareable_handles); }); // start busy loop shm_manager->run_busy_loop(); // stopped by signal delete shm_manager; shm_manager = nullptr; // free physical memory for (auto mem_handle : mem_handles) { aclError error_code = aclrtFreePhysical(mem_handle); if (error_code != 0) { spdlog::error("aclrtFreePhysical failed, error_code: {}", error_code); throw std::runtime_error("aclrtFreePhysical failed"); } } } int main() { install_signal_handlers(); start_daemon(); return 0; }