feat(模型加载): 添加sync_to_temp选项支持临时目录加载
添加sync_to_temp参数控制是否将模型文件复制到临时目录后再加载
This commit is contained in:
29
Dockerfile
Normal file
29
Dockerfile
Normal file
@@ -0,0 +1,29 @@
|
||||
FROM corex:4.3.8 as builder
|
||||
ADD . /app
|
||||
WORKDIR /app
|
||||
RUN sed -i 's/-x cuda/-x ivcore/g' /usr/local/share/cmake-3.25/Modules/Compiler/Clang-CUDA.cmake &&\
|
||||
cmake -S . -B build \
|
||||
-DGGML_CUDA=ON \
|
||||
-DLLAMA_CURL=OFF \
|
||||
-DCMAKE_CUDA_ARCHITECTURES=ivcore11 \
|
||||
-DCMAKE_CUDA_FLAGS="-x ivcore -std=c++17" \
|
||||
-DCMAKE_CUDA_STANDARD=17 \
|
||||
-DCMAKE_CXX_STANDARD=17 \
|
||||
-DGGML_CUDA_FA=OFF \
|
||||
-DBUILD_SHARED_LIBS=OFF \
|
||||
-DGGML_CUDA_FORCE_CUBLAS=ON \
|
||||
-DGGML_CPU=ON &&\
|
||||
cmake --build build --config Release -j \
|
||||
--target llama-server llama-cli
|
||||
|
||||
|
||||
|
||||
FROM ubuntu
|
||||
WORKDIR /app
|
||||
COPY --from=builder /usr/local/corex/ /usr/local/corex/
|
||||
COPY --from=builder /usr/local/openmpi/lib/ /usr/local/openmpi/lib/
|
||||
COPY --from=builder /app/build/bin/llama-server /app/llama-server
|
||||
COPY --from=builder /app/build/bin/llama-cli /app/llama-cli
|
||||
RUN apt update && apt install -y libgomp1
|
||||
ENV PATH=/usr/local/corex/bin:/usr/local/openmpi/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
|
||||
ENV LD_LIBRARY_PATH=/usr/local/corex/lib64:/usr/local/openmpi/lib:/usr/local/lib
|
||||
@@ -2039,6 +2039,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
params.use_mmap = value;
|
||||
}
|
||||
).set_env("LLAMA_ARG_MMAP"));
|
||||
add_opt(common_arg(
|
||||
{"--sync-to-temp"},
|
||||
{"--no-sync-to-temp"},
|
||||
string_format("whether to copy model to temporary directory before loading (default: %s)", params.sync_to_temp ? "enabled" : "disabled"),
|
||||
[](common_params & params, bool value) {
|
||||
params.sync_to_temp = value;
|
||||
}
|
||||
).set_env("LLAMA_ARG_SYNC_TO_TEMP"));
|
||||
add_opt(common_arg(
|
||||
{"--numa"}, "TYPE",
|
||||
"attempt optimizations that help on some NUMA systems\n"
|
||||
|
||||
@@ -1353,6 +1353,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
||||
mparams.check_tensors = params.check_tensors;
|
||||
mparams.use_extra_bufts = !params.no_extra_bufts;
|
||||
mparams.no_host = params.no_host;
|
||||
mparams.sync_to_temp = params.sync_to_temp;
|
||||
|
||||
if (params.kv_overrides.empty()) {
|
||||
mparams.kv_overrides = NULL;
|
||||
|
||||
@@ -430,6 +430,7 @@ struct common_params {
|
||||
bool no_op_offload = false; // globally disable offload host tensor operations to device
|
||||
bool no_extra_bufts = false; // disable extra buffer types (used for weight repacking)
|
||||
bool no_host = false; // bypass host buffer allowing extra buffers to be used
|
||||
bool sync_to_temp = false; // copy model to temporary directory before loading
|
||||
|
||||
bool single_turn = false; // single turn chat conversation
|
||||
|
||||
|
||||
@@ -314,6 +314,7 @@ extern "C" {
|
||||
bool use_extra_bufts; // use extra buffer types (used for weight repacking)
|
||||
bool no_host; // bypass host buffer allowing extra buffers to be used
|
||||
bool no_alloc; // only load metadata and simulate memory allocations
|
||||
bool sync_to_temp; // copy model to temporary directory before loading
|
||||
};
|
||||
|
||||
// NOTE: changing the default values of parameters marked as [EXPERIMENTAL] may cause crashes or incorrect results in certain configurations
|
||||
|
||||
@@ -7743,6 +7743,7 @@ llama_model_params llama_model_default_params() {
|
||||
/*.use_extra_bufts =*/ true,
|
||||
/*.no_host =*/ false,
|
||||
/*.no_alloc =*/ false,
|
||||
/*.sync_to_temp =*/ false,
|
||||
};
|
||||
|
||||
return result;
|
||||
|
||||
@@ -19,9 +19,14 @@
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <cerrno>
|
||||
#include <filesystem>
|
||||
#include <stdexcept>
|
||||
#include <unistd.h>
|
||||
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||
@@ -940,7 +945,54 @@ static struct llama_model * llama_model_load_from_file_impl(
|
||||
props.memory_free/1024/1024);
|
||||
}
|
||||
|
||||
const int status = llama_model_load(path_model, splits, *model, params);
|
||||
// Handle sync_to_temp option
|
||||
std::string temp_model_path = path_model;
|
||||
std::string temp_dir;
|
||||
if (params.sync_to_temp) {
|
||||
// Create temporary directory
|
||||
char temp_dir_template[] = "/tmp/llama_model_XXXXXX";
|
||||
char * temp_dir_ptr = mkdtemp(temp_dir_template);
|
||||
if (!temp_dir_ptr) {
|
||||
LLAMA_LOG_ERROR("%s: failed to create temporary directory: %s\n", __func__, strerror(errno));
|
||||
llama_model_free(model);
|
||||
return nullptr;
|
||||
}
|
||||
temp_dir = temp_dir_ptr;
|
||||
|
||||
// Copy model file to temporary directory
|
||||
std::string model_filename = std::filesystem::path(path_model).filename();
|
||||
temp_model_path = temp_dir + "/" + model_filename;
|
||||
|
||||
try {
|
||||
LLAMA_LOG_INFO("%s: copying model to temporary directory: %s\n", __func__, temp_model_path.c_str());
|
||||
std::filesystem::copy_file(path_model, temp_model_path, std::filesystem::copy_options::overwrite_existing);
|
||||
|
||||
// Also copy split files if any
|
||||
for (size_t i = 1; i < splits.size(); ++i) {
|
||||
const std::string & split_path = splits[i];
|
||||
std::string split_filename = std::filesystem::path(split_path).filename();
|
||||
std::string temp_split_path = temp_dir + "/" + split_filename;
|
||||
std::filesystem::copy_file(split_path, temp_split_path, std::filesystem::copy_options::overwrite_existing);
|
||||
}
|
||||
} catch (const std::exception & e) {
|
||||
LLAMA_LOG_ERROR("%s: failed to copy model to temporary directory: %s\n", __func__, e.what());
|
||||
std::filesystem::remove_all(temp_dir);
|
||||
llama_model_free(model);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
const int status = llama_model_load(temp_model_path, splits, *model, params);
|
||||
|
||||
// Clean up temporary directory if created
|
||||
if (params.sync_to_temp && !temp_dir.empty()) {
|
||||
try {
|
||||
std::filesystem::remove_all(temp_dir);
|
||||
LLAMA_LOG_INFO("%s: cleaned up temporary directory: %s\n", __func__, temp_dir.c_str());
|
||||
} catch (const std::exception & e) {
|
||||
LLAMA_LOG_WARN("%s: failed to clean up temporary directory: %s\n", __func__, e.what());
|
||||
}
|
||||
}
|
||||
GGML_ASSERT(status <= 0);
|
||||
if (status < 0) {
|
||||
if (status == -1) {
|
||||
|
||||
Reference in New Issue
Block a user