support multi npu partially

This commit is contained in:
starkwj
2026-01-08 06:54:33 +00:00
parent fa0fb46853
commit 90fe3527ba
12 changed files with 285 additions and 160 deletions

View File

@@ -19,6 +19,7 @@
#define MAX_WORKERS 60
#define MAX_DEVICES 16
// static constexpr const char *SHM_NAME = "/vllm_acl_vnpu_offload_shm";
static inline std::string get_shm_name() {
const char *env_shm_name = getenv("VLLM_IDLE_OFFLOAD_SHM_NAME");
@@ -69,11 +70,14 @@ static inline uint64_t pack_unlocked_tgid(int32_t tgid) {
// mmap usually page-aligned
struct alignas(64) ShmHelper {
struct VramInfo {
uint64_t total_vmem_size;
uint64_t shareable_handle;
};
VramInfo vram_info[MAX_DEVICES]; // support max 16 NPUs
// GPU lock flag
std::atomic<uint64_t> gpu_flag;
uint64_t total_vmem_size;
uint64_t shareable_handle;
uint8_t _padding[64 - sizeof(std::atomic<uint64_t>) - 2 * sizeof(uint64_t)];
std::atomic<uint64_t> gpu_flag[MAX_DEVICES];
// uint8_t _padding1[64 - sizeof(std::atomic<uint64_t>)];
// request
enum RequestType: uint32_t {
@@ -105,14 +109,16 @@ struct alignas(64) ShmHelper {
WorkerHeartBeat heart_beats[MAX_WORKERS];
void init() {
gpu_flag.store(0, std::memory_order_release);
memset(vram_info, 0, sizeof(vram_info));
for (size_t i = 0; i < MAX_DEVICES; ++i) {
gpu_flag[i].store(0, std::memory_order_release);
}
req_ready.store(READY_STATE_NO_REQUEST, std::memory_order_release);
}
void set_gpu_info(uint64_t vmem_size, uint64_t shared_handle) {
total_vmem_size = vmem_size;
shareable_handle = shared_handle;
init();
void set_gpu_info(int gpu_id, uint64_t vmem_size, uint64_t shared_handle) {
vram_info[gpu_id].total_vmem_size = vmem_size;
vram_info[gpu_id].shareable_handle = shared_handle;
}
};