Files
xc-llm-ascend/csrc/vnpu_offload/npu_helper.h
2026-02-11 06:27:58 +00:00

87 lines
2.5 KiB
C++

#include <vector>
#include <string>
#include <stdint.h>
#include <filesystem>
#include <algorithm>
#include "spdlog/spdlog.h"
#include "acl/acl.h"
static inline std::vector<int> get_available_devices() {
namespace fs = std::filesystem;
std::vector<int> devices;
const std::string dev_path = "/dev";
const std::string prefix = "davinci";
if (!fs::exists(dev_path)) {
return devices;
}
try {
for (const auto &entry : fs::directory_iterator(dev_path)) {
std::string filename = entry.path().filename().string();
if (filename.rfind(prefix, 0) == 0) {
std::string suffix = filename.substr(prefix.length());
// filter not digit suffix
if (!suffix.empty() &&
std::all_of(suffix.begin(), suffix.end(),
[](unsigned char c) { return std::isdigit(c); })) {
try {
int id = std::stoi(suffix);
devices.push_back(id);
} catch (...) {
}
}
}
}
} catch (const fs::filesystem_error &e) {
spdlog::error("Error accessing /dev: {}", e.what());
}
std::sort(devices.begin(), devices.end());
return devices;
}
static inline std::vector<int> get_npu_ids() {
std::vector<int> available_devices = get_available_devices();
std::vector<int> npu_ids;
uint32_t device_count = 0;
aclError error_code = aclrtGetDeviceCount(&device_count);
if (error_code != 0) {
spdlog::error("Failed to get NPU device count, error code: {}", error_code);
throw std::runtime_error("Failed to get NPU device count");
}
if (device_count > available_devices.size()) {
spdlog::error("The number of available NPU devices ({}) is less than the "
"number of devices reported by ACL ({}).",
available_devices.size(), device_count);
throw std::runtime_error("Inconsistent NPU device count");
}
const char *env_available_npu = getenv("ASCEND_RT_VISIBLE_DEVICES");
if (env_available_npu) {
std::string npu_str(env_available_npu);
size_t start = 0;
while (start < npu_str.size()) {
size_t next = npu_str.find(',', start);
if (next == std::string::npos) {
next = npu_str.size();
}
int device_id = std::stoi(npu_str.substr(start, next - start));
npu_ids.push_back(available_devices[device_id]);
start = next + 1;
if (npu_ids.size() >= device_count) {
break;
}
}
} else {
npu_ids.insert(npu_ids.end(), available_devices.begin(),
available_devices.begin() + device_count);
}
return npu_ids;
}