@@ -16,8 +16,8 @@
|
||||
#include "spdlog/spdlog.h"
|
||||
|
||||
|
||||
#define MAX_WORKERS 60
|
||||
#define MAX_DEVICES 16
|
||||
#define MAX_WORKERS 64
|
||||
#define MAX_DEVICES 32
|
||||
|
||||
static inline std::string get_shm_name() {
|
||||
const char *env_shm_name = getenv("VLLM_VNPU_SHM_NAME");
|
||||
@@ -34,7 +34,7 @@ static inline std::string get_shm_name() {
|
||||
}
|
||||
|
||||
static constexpr uint32_t heartbeat_us = 1000; // microseconds
|
||||
static constexpr uint32_t heartbeat_check_everyN = 50;
|
||||
static constexpr uint32_t heartbeat_check_everyN = 100;
|
||||
static constexpr uint32_t heartbeat_timeout_us =
|
||||
heartbeat_check_everyN * heartbeat_us;
|
||||
|
||||
@@ -52,6 +52,8 @@ static inline uint64_t heartbeat_ts_us() {
|
||||
.count());
|
||||
}
|
||||
|
||||
// GPU flag layout (64 bits):
|
||||
// [lock (1 bit) | reserved (31 bits) | tgid (32 bits)]
|
||||
static inline uint32_t unpack_lock_field(uint64_t gpu_flag) {
|
||||
return static_cast<uint32_t>(gpu_flag >> 32);
|
||||
}
|
||||
@@ -68,16 +70,43 @@ static inline uint64_t pack_unlocked_tgid(int32_t tgid) {
|
||||
return static_cast<uint64_t>(tgid);
|
||||
}
|
||||
|
||||
// waiting_worker_flag layout (64 bits):
|
||||
// [ device_id (5 bits) | priority (3 bits) | timestamp (24 bits) | tgid (32 bits)]
|
||||
|
||||
static inline uint32_t unpack_waiting_device_id(uint64_t flag) {
|
||||
return static_cast<uint32_t>(flag >> 59);
|
||||
}
|
||||
|
||||
static inline uint16_t unpack_waiting_priority(uint64_t flag) {
|
||||
return static_cast<uint16_t>((flag >> 56) & 0x7);
|
||||
}
|
||||
|
||||
static inline uint32_t unpack_waiting_timestamp_ms(uint64_t flag) {
|
||||
return static_cast<uint32_t>((flag >> 32) & 0xFFFFFF);
|
||||
}
|
||||
|
||||
static inline int32_t unpack_waiting_tgid(uint64_t flag) {
|
||||
return static_cast<int32_t>(flag & 0xFFFFFFFF);
|
||||
}
|
||||
|
||||
static inline uint64_t pack_waiting_flag(uint32_t device_id, uint16_t priority,
|
||||
uint32_t timestamp, int32_t tgid) {
|
||||
return (static_cast<uint64_t>(device_id & 0x1F) << 59) |
|
||||
(static_cast<uint64_t>(priority & 0x7) << 56) |
|
||||
(static_cast<uint64_t>(timestamp & 0xFFFFFF) << 32) |
|
||||
(static_cast<uint64_t>(tgid) & 0xFFFFFFFF);
|
||||
}
|
||||
|
||||
// mmap usually page-aligned
|
||||
struct alignas(64) ShmHelper {
|
||||
struct VramInfo {
|
||||
uint64_t total_vmem_size;
|
||||
uint64_t shareable_handle;
|
||||
};
|
||||
VramInfo vram_info[MAX_DEVICES]; // support max 16 NPUs
|
||||
VramInfo vram_info[MAX_DEVICES]; // support max 32 devices
|
||||
// GPU lock flag
|
||||
std::atomic<uint64_t> gpu_flag[MAX_DEVICES];
|
||||
// uint8_t _padding1[64 - sizeof(std::atomic<uint64_t>)];
|
||||
std::atomic<uint64_t> waiting_worker_flags[MAX_WORKERS];
|
||||
|
||||
// request
|
||||
enum RequestType: uint32_t {
|
||||
|
||||
Reference in New Issue
Block a user