Add feature: priority

Signed-off-by: Jing Wang <jingwang96@qq.com>
This commit is contained in:
Jing Wang
2026-05-12 11:51:57 +00:00
parent d627a45881
commit b6549b6e38
11 changed files with 382 additions and 66 deletions

View File

@@ -1,7 +1,29 @@
#include "shm_worker.h"
static inline uint16_t get_shm_priority() {
const char *env_priority = getenv("VLLM_VNPU_PRIORITY");
if (env_priority) {
try {
int p = std::stoi(env_priority);
if (p >= 0 && p <= 7) {
return static_cast<uint16_t>(p);
} else {
spdlog::warn("VLLM_VNPU_PRIORITY should be between 0 and 7, got {}. Using default 0.", p);
}
} catch (...) {
spdlog::warn("Invalid VLLM_VNPU_PRIORITY format. Using default 0.");
}
}
return 0;
}
ShmWorker::ShmWorker() {
this->priority = get_shm_priority();
this->waiting_timestamp = 0;
this->is_waiting = false;
this->is_holding_lock = false;
spdlog::info("vNPU worker initialized with priority {}", priority);
std::string shm_name = get_shm_name();
int shm_fd = shm_open(shm_name.c_str(), O_RDWR, 0666);
if (shm_fd == -1) {
@@ -40,16 +62,18 @@ bool ShmWorker::register_worker(int32_t tgid, int gpu_id,
if (slot == -1) {
return false;
}
this->shm_slot = slot;
*out_shareable_handle = shm_helper->vram_info[gpu_id].shareable_handle;
*out_vmem_size = shm_helper->vram_info[gpu_id].total_vmem_size;
stop_heart_beat.store(false, std::memory_order_release);
heart_beat_thread = std::thread(&ShmWorker::heart_beat_loop, this, slot);
heart_beat_thread = std::thread(&ShmWorker::heart_beat_loop, this);
return true;
}
void ShmWorker::heart_beat_loop(int slot) {
void ShmWorker::heart_beat_loop() {
int slot = this->shm_slot;
while (!stop_heart_beat.load(std::memory_order_acquire)) {
// update heart beat
int32_t shm_tgid =
@@ -64,6 +88,7 @@ void ShmWorker::heart_beat_loop(int slot) {
spdlog::error("TGID {} failed to re-register as worker", tgid);
throw std::runtime_error("Failed to re-register as worker");
}
this->shm_slot = slot;
}
uint64_t now = heartbeat_ts_us();
shm_helper->heart_beats[slot].timestamp.store(now,
@@ -72,32 +97,95 @@ void ShmWorker::heart_beat_loop(int slot) {
}
}
void ShmWorker::start_wait() {
if (is_waiting) return; // Keep the older timestamp if already waiting
// Use lower 24 bits of millisecond timestamp
waiting_timestamp = static_cast<uint32_t>((heartbeat_ts_us() / 1000) & 0xFFFFFF);
uint64_t flag = pack_waiting_flag(this->gpu_id, this->priority, waiting_timestamp, this->tgid);
shm_helper->waiting_worker_flags[this->shm_slot].store(flag, std::memory_order_release);
is_waiting = true;
}
void ShmWorker::cancel_wait() {
if (!is_waiting) return;
shm_helper->waiting_worker_flags[this->shm_slot].store(0, std::memory_order_release);
is_waiting = false;
}
bool ShmWorker::has_higher_priority_waiter() {
for (int i = 0; i < MAX_WORKERS; ++i) {
if (i == this->shm_slot) continue;
uint64_t flag = shm_helper->waiting_worker_flags[i].load(std::memory_order_acquire);
if (flag == 0) continue;
if (unpack_waiting_device_id(flag) != this->gpu_id) continue;
uint16_t other_prio = unpack_waiting_priority(flag);
if (other_prio > this->priority) {
return true; // Found a waiter with higher priority
} else if (other_prio == this->priority) {
if (this->is_holding_lock) {
// doesn't need to yield to same priority waiters
continue;
}
if (!this->is_waiting) {
// an earlier waiter with the same priority
return true;
}
uint32_t other_ts = unpack_waiting_timestamp_ms(flag);
// Same priority, compare timestamps (handle 24-bit wrap-around)
// Using 24-bit unsigned subtraction. If the difference is in the lower half,
// my timestamp is greater (i.e., I started waiting later).
uint32_t diff = (this->waiting_timestamp - other_ts) & 0xFFFFFF;
if (diff > 0 && diff < 0x800000) {
return true; // The other worker started waiting earlier
} else if (diff == 0 && unpack_waiting_tgid(flag) < this->tgid) {
// using tgid if timestamps happen to be exactly the same
return true;
}
}
}
return false;
}
bool ShmWorker::try_lock_gpu(bool &out_self_hold) {
static int retry_cnt = 0;
uint64_t old_flag =
shm_helper->gpu_flag[gpu_id].load(std::memory_order_acquire);
if (unpack_lock_field(old_flag) == 0) { // free
// Check priority: yield if there are higher priority waiters, or same priority waiters who have waited longer.
if (has_higher_priority_waiter()) {
out_self_hold = false;
return false;
}
uint64_t new_flag = pack_locked_tgid(tgid);
if (shm_helper->gpu_flag[gpu_id].compare_exchange_weak(
old_flag, new_flag, std::memory_order_acq_rel,
std::memory_order_acquire)) {
spdlog::info("TGID {} acquired GPU {} lock", tgid, gpu_id);
// spdlog::info("TGID {} acquired GPU {} lock", tgid, gpu_id);
int32_t prev_tgid = unpack_tgid_field(old_flag);
out_self_hold = prev_tgid == tgid;
retry_cnt = 0;
this->is_holding_lock = true;
return true;
}
} else { // locked
if (unpack_tgid_field(old_flag) == tgid) {
spdlog::info("TGID {} already holds the GPU {} lock", tgid, gpu_id);
// spdlog::info("TGID {} already holds the GPU {} lock", tgid, gpu_id);
out_self_hold = true;
retry_cnt = 0;
this->is_holding_lock = true;
return true;
}
}
// failed
if (++retry_cnt % 2000 == 0) {
if (++retry_cnt % 10000 == 0) {
spdlog::info(
"TGID {} trying to acquire GPU {} lock, current lock holder TGID {}",
tgid, gpu_id, unpack_tgid_field(old_flag));
@@ -116,19 +204,23 @@ bool ShmWorker::lock_gpu(bool &out_self_hold) {
}
}
void ShmWorker::unlock_gpu() {
void ShmWorker::unlock_gpu(bool keep_wait) {
if (!keep_wait) {
cancel_wait();
}
uint64_t old_flag =
shm_helper->gpu_flag[gpu_id].load(std::memory_order_acquire);
if (unpack_tgid_field(old_flag) != tgid) {
// spdlog::warn("previous gpu flag {} does not match expected locked flag for "
// "TGID {}. This may be a bug, unless during startup.",
// old_flag, tgid);
spdlog::info("TGID {} does not hold GPU {} lock", tgid, gpu_id);
if (!keep_wait) {
spdlog::info("unlock: TGID {} does not hold GPU {} lock", tgid, gpu_id);
}
} else {
uint64_t new_flag = pack_unlocked_tgid(tgid);
shm_helper->gpu_flag[gpu_id].store(new_flag, std::memory_order_release);
spdlog::info("TGID {} released GPU {} lock", tgid, gpu_id);
// spdlog::info("TGID {} released GPU {} lock", tgid, gpu_id);
}
this->is_holding_lock = false;
}
uint64_t ShmWorker::make_request(uint32_t type, uint64_t parameter) {