@@ -1,7 +1,29 @@
|
||||
#include "shm_worker.h"
|
||||
|
||||
static inline uint16_t get_shm_priority() {
|
||||
const char *env_priority = getenv("VLLM_VNPU_PRIORITY");
|
||||
if (env_priority) {
|
||||
try {
|
||||
int p = std::stoi(env_priority);
|
||||
if (p >= 0 && p <= 7) {
|
||||
return static_cast<uint16_t>(p);
|
||||
} else {
|
||||
spdlog::warn("VLLM_VNPU_PRIORITY should be between 0 and 7, got {}. Using default 0.", p);
|
||||
}
|
||||
} catch (...) {
|
||||
spdlog::warn("Invalid VLLM_VNPU_PRIORITY format. Using default 0.");
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
ShmWorker::ShmWorker() {
|
||||
this->priority = get_shm_priority();
|
||||
this->waiting_timestamp = 0;
|
||||
this->is_waiting = false;
|
||||
this->is_holding_lock = false;
|
||||
spdlog::info("vNPU worker initialized with priority {}", priority);
|
||||
std::string shm_name = get_shm_name();
|
||||
int shm_fd = shm_open(shm_name.c_str(), O_RDWR, 0666);
|
||||
if (shm_fd == -1) {
|
||||
@@ -40,16 +62,18 @@ bool ShmWorker::register_worker(int32_t tgid, int gpu_id,
|
||||
if (slot == -1) {
|
||||
return false;
|
||||
}
|
||||
this->shm_slot = slot;
|
||||
|
||||
*out_shareable_handle = shm_helper->vram_info[gpu_id].shareable_handle;
|
||||
*out_vmem_size = shm_helper->vram_info[gpu_id].total_vmem_size;
|
||||
|
||||
stop_heart_beat.store(false, std::memory_order_release);
|
||||
heart_beat_thread = std::thread(&ShmWorker::heart_beat_loop, this, slot);
|
||||
heart_beat_thread = std::thread(&ShmWorker::heart_beat_loop, this);
|
||||
return true;
|
||||
}
|
||||
|
||||
void ShmWorker::heart_beat_loop(int slot) {
|
||||
void ShmWorker::heart_beat_loop() {
|
||||
int slot = this->shm_slot;
|
||||
while (!stop_heart_beat.load(std::memory_order_acquire)) {
|
||||
// update heart beat
|
||||
int32_t shm_tgid =
|
||||
@@ -64,6 +88,7 @@ void ShmWorker::heart_beat_loop(int slot) {
|
||||
spdlog::error("TGID {} failed to re-register as worker", tgid);
|
||||
throw std::runtime_error("Failed to re-register as worker");
|
||||
}
|
||||
this->shm_slot = slot;
|
||||
}
|
||||
uint64_t now = heartbeat_ts_us();
|
||||
shm_helper->heart_beats[slot].timestamp.store(now,
|
||||
@@ -72,32 +97,95 @@ void ShmWorker::heart_beat_loop(int slot) {
|
||||
}
|
||||
}
|
||||
|
||||
void ShmWorker::start_wait() {
|
||||
if (is_waiting) return; // Keep the older timestamp if already waiting
|
||||
|
||||
// Use lower 24 bits of millisecond timestamp
|
||||
waiting_timestamp = static_cast<uint32_t>((heartbeat_ts_us() / 1000) & 0xFFFFFF);
|
||||
|
||||
uint64_t flag = pack_waiting_flag(this->gpu_id, this->priority, waiting_timestamp, this->tgid);
|
||||
shm_helper->waiting_worker_flags[this->shm_slot].store(flag, std::memory_order_release);
|
||||
is_waiting = true;
|
||||
}
|
||||
|
||||
void ShmWorker::cancel_wait() {
|
||||
if (!is_waiting) return;
|
||||
|
||||
shm_helper->waiting_worker_flags[this->shm_slot].store(0, std::memory_order_release);
|
||||
is_waiting = false;
|
||||
}
|
||||
|
||||
bool ShmWorker::has_higher_priority_waiter() {
|
||||
for (int i = 0; i < MAX_WORKERS; ++i) {
|
||||
if (i == this->shm_slot) continue;
|
||||
|
||||
uint64_t flag = shm_helper->waiting_worker_flags[i].load(std::memory_order_acquire);
|
||||
if (flag == 0) continue;
|
||||
if (unpack_waiting_device_id(flag) != this->gpu_id) continue;
|
||||
|
||||
uint16_t other_prio = unpack_waiting_priority(flag);
|
||||
|
||||
if (other_prio > this->priority) {
|
||||
return true; // Found a waiter with higher priority
|
||||
} else if (other_prio == this->priority) {
|
||||
if (this->is_holding_lock) {
|
||||
// doesn't need to yield to same priority waiters
|
||||
continue;
|
||||
}
|
||||
if (!this->is_waiting) {
|
||||
// an earlier waiter with the same priority
|
||||
return true;
|
||||
}
|
||||
uint32_t other_ts = unpack_waiting_timestamp_ms(flag);
|
||||
// Same priority, compare timestamps (handle 24-bit wrap-around)
|
||||
// Using 24-bit unsigned subtraction. If the difference is in the lower half,
|
||||
// my timestamp is greater (i.e., I started waiting later).
|
||||
uint32_t diff = (this->waiting_timestamp - other_ts) & 0xFFFFFF;
|
||||
if (diff > 0 && diff < 0x800000) {
|
||||
return true; // The other worker started waiting earlier
|
||||
} else if (diff == 0 && unpack_waiting_tgid(flag) < this->tgid) {
|
||||
// using tgid if timestamps happen to be exactly the same
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ShmWorker::try_lock_gpu(bool &out_self_hold) {
|
||||
static int retry_cnt = 0;
|
||||
|
||||
uint64_t old_flag =
|
||||
shm_helper->gpu_flag[gpu_id].load(std::memory_order_acquire);
|
||||
if (unpack_lock_field(old_flag) == 0) { // free
|
||||
// Check priority: yield if there are higher priority waiters, or same priority waiters who have waited longer.
|
||||
if (has_higher_priority_waiter()) {
|
||||
out_self_hold = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
uint64_t new_flag = pack_locked_tgid(tgid);
|
||||
if (shm_helper->gpu_flag[gpu_id].compare_exchange_weak(
|
||||
old_flag, new_flag, std::memory_order_acq_rel,
|
||||
std::memory_order_acquire)) {
|
||||
spdlog::info("TGID {} acquired GPU {} lock", tgid, gpu_id);
|
||||
// spdlog::info("TGID {} acquired GPU {} lock", tgid, gpu_id);
|
||||
int32_t prev_tgid = unpack_tgid_field(old_flag);
|
||||
out_self_hold = prev_tgid == tgid;
|
||||
retry_cnt = 0;
|
||||
this->is_holding_lock = true;
|
||||
return true;
|
||||
}
|
||||
} else { // locked
|
||||
if (unpack_tgid_field(old_flag) == tgid) {
|
||||
spdlog::info("TGID {} already holds the GPU {} lock", tgid, gpu_id);
|
||||
// spdlog::info("TGID {} already holds the GPU {} lock", tgid, gpu_id);
|
||||
out_self_hold = true;
|
||||
retry_cnt = 0;
|
||||
this->is_holding_lock = true;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
// failed
|
||||
if (++retry_cnt % 2000 == 0) {
|
||||
if (++retry_cnt % 10000 == 0) {
|
||||
spdlog::info(
|
||||
"TGID {} trying to acquire GPU {} lock, current lock holder TGID {}",
|
||||
tgid, gpu_id, unpack_tgid_field(old_flag));
|
||||
@@ -116,19 +204,23 @@ bool ShmWorker::lock_gpu(bool &out_self_hold) {
|
||||
}
|
||||
}
|
||||
|
||||
void ShmWorker::unlock_gpu() {
|
||||
void ShmWorker::unlock_gpu(bool keep_wait) {
|
||||
if (!keep_wait) {
|
||||
cancel_wait();
|
||||
}
|
||||
|
||||
uint64_t old_flag =
|
||||
shm_helper->gpu_flag[gpu_id].load(std::memory_order_acquire);
|
||||
if (unpack_tgid_field(old_flag) != tgid) {
|
||||
// spdlog::warn("previous gpu flag {} does not match expected locked flag for "
|
||||
// "TGID {}. This may be a bug, unless during startup.",
|
||||
// old_flag, tgid);
|
||||
spdlog::info("TGID {} does not hold GPU {} lock", tgid, gpu_id);
|
||||
if (!keep_wait) {
|
||||
spdlog::info("unlock: TGID {} does not hold GPU {} lock", tgid, gpu_id);
|
||||
}
|
||||
} else {
|
||||
uint64_t new_flag = pack_unlocked_tgid(tgid);
|
||||
shm_helper->gpu_flag[gpu_id].store(new_flag, std::memory_order_release);
|
||||
spdlog::info("TGID {} released GPU {} lock", tgid, gpu_id);
|
||||
// spdlog::info("TGID {} released GPU {} lock", tgid, gpu_id);
|
||||
}
|
||||
this->is_holding_lock = false;
|
||||
}
|
||||
|
||||
uint64_t ShmWorker::make_request(uint32_t type, uint64_t parameter) {
|
||||
|
||||
Reference in New Issue
Block a user