Add feature: priority

Signed-off-by: Jing Wang <jingwang96@qq.com>
This commit is contained in:
Jing Wang
2026-05-12 11:51:57 +00:00
parent d627a45881
commit b6549b6e38
11 changed files with 382 additions and 66 deletions

View File

@@ -16,8 +16,8 @@
#include "spdlog/spdlog.h"
#define MAX_WORKERS 60
#define MAX_DEVICES 16
#define MAX_WORKERS 64
#define MAX_DEVICES 32
static inline std::string get_shm_name() {
const char *env_shm_name = getenv("VLLM_VNPU_SHM_NAME");
@@ -34,7 +34,7 @@ static inline std::string get_shm_name() {
}
static constexpr uint32_t heartbeat_us = 1000; // microseconds
static constexpr uint32_t heartbeat_check_everyN = 50;
static constexpr uint32_t heartbeat_check_everyN = 100;
static constexpr uint32_t heartbeat_timeout_us =
heartbeat_check_everyN * heartbeat_us;
@@ -52,6 +52,8 @@ static inline uint64_t heartbeat_ts_us() {
.count());
}
// GPU flag layout (64 bits):
// [lock (1 bit) | reserved (31 bits) | tgid (32 bits)]
static inline uint32_t unpack_lock_field(uint64_t gpu_flag) {
return static_cast<uint32_t>(gpu_flag >> 32);
}
@@ -68,16 +70,43 @@ static inline uint64_t pack_unlocked_tgid(int32_t tgid) {
return static_cast<uint64_t>(tgid);
}
// waiting_worker_flag layout (64 bits):
// [ device_id (5 bits) | priority (3 bits) | timestamp (24 bits) | tgid (32 bits)]
static inline uint32_t unpack_waiting_device_id(uint64_t flag) {
return static_cast<uint32_t>(flag >> 59);
}
static inline uint16_t unpack_waiting_priority(uint64_t flag) {
return static_cast<uint16_t>((flag >> 56) & 0x7);
}
static inline uint32_t unpack_waiting_timestamp_ms(uint64_t flag) {
return static_cast<uint32_t>((flag >> 32) & 0xFFFFFF);
}
static inline int32_t unpack_waiting_tgid(uint64_t flag) {
return static_cast<int32_t>(flag & 0xFFFFFFFF);
}
static inline uint64_t pack_waiting_flag(uint32_t device_id, uint16_t priority,
uint32_t timestamp, int32_t tgid) {
return (static_cast<uint64_t>(device_id & 0x1F) << 59) |
(static_cast<uint64_t>(priority & 0x7) << 56) |
(static_cast<uint64_t>(timestamp & 0xFFFFFF) << 32) |
(static_cast<uint64_t>(tgid) & 0xFFFFFFFF);
}
// mmap usually page-aligned
struct alignas(64) ShmHelper {
struct VramInfo {
uint64_t total_vmem_size;
uint64_t shareable_handle;
};
VramInfo vram_info[MAX_DEVICES]; // support max 16 NPUs
VramInfo vram_info[MAX_DEVICES]; // support max 32 devices
// GPU lock flag
std::atomic<uint64_t> gpu_flag[MAX_DEVICES];
// uint8_t _padding1[64 - sizeof(std::atomic<uint64_t>)];
std::atomic<uint64_t> waiting_worker_flags[MAX_WORKERS];
// request
enum RequestType: uint32_t {