support multi npu partially
This commit is contained in:
@@ -19,6 +19,7 @@
|
||||
|
||||
|
||||
#define MAX_WORKERS 60
|
||||
#define MAX_DEVICES 16
|
||||
// static constexpr const char *SHM_NAME = "/vllm_acl_vnpu_offload_shm";
|
||||
static inline std::string get_shm_name() {
|
||||
const char *env_shm_name = getenv("VLLM_IDLE_OFFLOAD_SHM_NAME");
|
||||
@@ -69,11 +70,14 @@ static inline uint64_t pack_unlocked_tgid(int32_t tgid) {
|
||||
|
||||
// mmap usually page-aligned
|
||||
struct alignas(64) ShmHelper {
|
||||
struct VramInfo {
|
||||
uint64_t total_vmem_size;
|
||||
uint64_t shareable_handle;
|
||||
};
|
||||
VramInfo vram_info[MAX_DEVICES]; // support max 16 NPUs
|
||||
// GPU lock flag
|
||||
std::atomic<uint64_t> gpu_flag;
|
||||
uint64_t total_vmem_size;
|
||||
uint64_t shareable_handle;
|
||||
uint8_t _padding[64 - sizeof(std::atomic<uint64_t>) - 2 * sizeof(uint64_t)];
|
||||
std::atomic<uint64_t> gpu_flag[MAX_DEVICES];
|
||||
// uint8_t _padding1[64 - sizeof(std::atomic<uint64_t>)];
|
||||
|
||||
// request
|
||||
enum RequestType: uint32_t {
|
||||
@@ -105,14 +109,16 @@ struct alignas(64) ShmHelper {
|
||||
WorkerHeartBeat heart_beats[MAX_WORKERS];
|
||||
|
||||
void init() {
|
||||
gpu_flag.store(0, std::memory_order_release);
|
||||
memset(vram_info, 0, sizeof(vram_info));
|
||||
for (size_t i = 0; i < MAX_DEVICES; ++i) {
|
||||
gpu_flag[i].store(0, std::memory_order_release);
|
||||
}
|
||||
req_ready.store(READY_STATE_NO_REQUEST, std::memory_order_release);
|
||||
}
|
||||
|
||||
void set_gpu_info(uint64_t vmem_size, uint64_t shared_handle) {
|
||||
total_vmem_size = vmem_size;
|
||||
shareable_handle = shared_handle;
|
||||
init();
|
||||
void set_gpu_info(int gpu_id, uint64_t vmem_size, uint64_t shared_handle) {
|
||||
vram_info[gpu_id].total_vmem_size = vmem_size;
|
||||
vram_info[gpu_id].shareable_handle = shared_handle;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user