add vxpu
This commit is contained in:
177
vllm_kunlun/csrc/vxpu_offload/vxpu_daemon.cpp
Normal file
177
vllm_kunlun/csrc/vxpu_offload/vxpu_daemon.cpp
Normal file
@@ -0,0 +1,177 @@
|
||||
#include <iostream>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <string.h>
|
||||
#include <vector>
|
||||
#include <atomic>
|
||||
#include <signal.h>
|
||||
|
||||
#include "xpu_helper.h"
|
||||
#include "shm_manager.h"
|
||||
#include "spdlog/spdlog.h"
|
||||
|
||||
#include "xpu/runtime.h"
|
||||
#include "xpu/xpuml.h"
|
||||
|
||||
|
||||
static ShmManager *shm_manager = nullptr;
|
||||
|
||||
void handle_signal(int sig) {
|
||||
if (shm_manager) {
|
||||
shm_manager->stop_busy_loop();
|
||||
}
|
||||
}
|
||||
|
||||
void install_signal_handlers() {
|
||||
struct sigaction sa{};
|
||||
sa.sa_handler = handle_signal;
|
||||
sigemptyset(&sa.sa_mask);
|
||||
sa.sa_flags = 0;
|
||||
|
||||
sigaction(SIGINT, &sa, nullptr);
|
||||
sigaction(SIGTERM, &sa, nullptr);
|
||||
sigaction(SIGHUP, &sa, nullptr);
|
||||
}
|
||||
|
||||
size_t get_reserved_vram_size() {
|
||||
const char *env_p = std::getenv("VXPU_RESERVED_VRAM_SIZE_GB");
|
||||
size_t reserved_vram_size = 8ul * 1024 * 1024 * 1024; // default 8GB
|
||||
if (env_p) {
|
||||
try {
|
||||
size_t size_gb = std::stoul(env_p);
|
||||
reserved_vram_size = size_gb * 1024 * 1024 * 1024;
|
||||
} catch (const std::exception &e) {
|
||||
spdlog::warn("Failed to parse VXPU_RESERVED_VRAM_SIZE_GB: {}, using "
|
||||
"default 8GB",
|
||||
e.what());
|
||||
}
|
||||
}
|
||||
return reserved_vram_size;
|
||||
}
|
||||
|
||||
void start_daemon() {
|
||||
int device_count = 0;
|
||||
int ret = xpu_device_count(&device_count);
|
||||
if (ret != XPU_SUCCESS) {
|
||||
throw std::runtime_error(
|
||||
"xpu_device_count failed with error code: " + std::to_string(ret) +
|
||||
" " + __FILE__ + ":" + std::to_string(__LINE__));
|
||||
}
|
||||
|
||||
std::vector<void *> dev_ptrs;
|
||||
// shm
|
||||
shm_manager = new ShmManager();
|
||||
|
||||
size_t reserved_vram_size = get_reserved_vram_size();
|
||||
spdlog::info("Reserved gpu memory size per device: {:.1f} GB",
|
||||
reserved_vram_size / (1024.0 * 1024 * 1024));
|
||||
|
||||
xpumlInit();
|
||||
for (int i = 0; i < device_count; ++i) {
|
||||
int ret = xpu_set_device(i);
|
||||
if (ret != XPU_SUCCESS) {
|
||||
throw std::runtime_error(
|
||||
"xpu_set_device failed with error code: " + std::to_string(ret) +
|
||||
" " + __FILE__ + ":" + std::to_string(__LINE__));
|
||||
}
|
||||
uint64_t attr;
|
||||
ret = xpu_device_get_attr(&attr, XPUATTR_PCI_ADDRESS, i);
|
||||
if (ret != XPU_SUCCESS) {
|
||||
throw std::runtime_error(
|
||||
"xpu_device_get_attr failed with error code: " + std::to_string(ret) +
|
||||
" " + __FILE__ + ":" + std::to_string(__LINE__));
|
||||
}
|
||||
uint32_t pci_addr = static_cast<uint32_t>(attr);
|
||||
spdlog::info("Setting up device id {} - {:04X}.{:02X}.{:02X}.{:X}", i,
|
||||
((pci_addr >> 16) & 0xFFFF), ((pci_addr >> 8) & 0xFF),
|
||||
((pci_addr >> 3) & 0x1F), (pci_addr & 0x7));
|
||||
|
||||
// get free memory size
|
||||
xpumlDevice_t xpuml_device;
|
||||
xpumlReturn_t ml_ret = xpumlDeviceGetHandleByIndex(i, &xpuml_device);
|
||||
if (ml_ret != XPUML_SUCCESS) {
|
||||
throw std::runtime_error(
|
||||
"xpumlDeviceGetHandleByIndex failed with error code: " +
|
||||
std::to_string(ml_ret) + " " + __FILE__ + ":" +
|
||||
std::to_string(__LINE__));
|
||||
}
|
||||
xpumlMemory_t mem_info;
|
||||
ml_ret = xpumlDeviceGetMemoryInfo(xpuml_device, &mem_info);
|
||||
if (ml_ret != XPUML_SUCCESS) {
|
||||
throw std::runtime_error(
|
||||
"xpumlDeviceGetMemoryInfo failed with error code: " +
|
||||
std::to_string(ml_ret) + " " + __FILE__ + ":" +
|
||||
std::to_string(__LINE__));
|
||||
}
|
||||
size_t total_g_mem = mem_info.totalGlobalMemory;
|
||||
size_t free_g_mem = mem_info.freeGlobalMemory;
|
||||
size_t granularity = 2ul * 1024 * 1024; // 2MB
|
||||
if (free_g_mem < reserved_vram_size) {
|
||||
spdlog::error("Not enough free memory to reserve: {}, free_g_mem: {}",
|
||||
reserved_vram_size, free_g_mem);
|
||||
throw std::runtime_error("Not enough free memory to reserve");
|
||||
}
|
||||
size_t g_size =
|
||||
(free_g_mem - reserved_vram_size) / granularity * granularity;
|
||||
// allocate
|
||||
void *dev_ptr = nullptr;
|
||||
ret = xpu_malloc(&dev_ptr, g_size);
|
||||
if (ret != XPU_SUCCESS) {
|
||||
throw std::runtime_error(
|
||||
"xpu_malloc failed with error code: " + std::to_string(ret) + " " +
|
||||
__FILE__ + ":" + std::to_string(__LINE__));
|
||||
}
|
||||
spdlog::info("device {} xpu_malloc succeeded, size: {}", i, g_size);
|
||||
dev_ptrs.push_back(dev_ptr);
|
||||
|
||||
// get memhandle
|
||||
XPUIpcMemHandle handle;
|
||||
ret = xpu_ipc_get_memhandle(&handle, dev_ptr);
|
||||
if (ret != XPU_SUCCESS) {
|
||||
throw std::runtime_error(
|
||||
"xpu_ipc_get_memhandle failed with error code: " +
|
||||
std::to_string(ret) + " " + __FILE__ + ":" +
|
||||
std::to_string(__LINE__));
|
||||
}
|
||||
|
||||
// shm set gpu info
|
||||
shm_manager->set_xpu_info(i, pci_addr, g_size, handle);
|
||||
}
|
||||
xpumlShutdown();
|
||||
|
||||
// start busy loop
|
||||
shm_manager->run_busy_loop();
|
||||
|
||||
// stopped by signal
|
||||
delete shm_manager;
|
||||
shm_manager = nullptr;
|
||||
|
||||
// free physical memory
|
||||
for (int i = 0; i < device_count; ++i) {
|
||||
int ret = xpu_set_device(i);
|
||||
if (ret != XPU_SUCCESS) {
|
||||
spdlog::error("xpu_set_device failed during cleanup, error code: {}",
|
||||
ret);
|
||||
continue;
|
||||
}
|
||||
ret = xpu_free(dev_ptrs[i]);
|
||||
if (ret != XPU_SUCCESS) {
|
||||
spdlog::error("xpu_free failed during cleanup, error code: {}", ret);
|
||||
} else {
|
||||
spdlog::info("device {} xpu_free succeeded during cleanup", i);
|
||||
}
|
||||
}
|
||||
dev_ptrs.clear();
|
||||
}
|
||||
|
||||
int main() {
|
||||
install_signal_handlers();
|
||||
|
||||
start_daemon();
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user