feat(模型加载): 添加sync_to_temp选项支持临时目录加载
添加sync_to_temp参数控制是否将模型文件复制到临时目录后再加载
This commit is contained in:
@@ -2039,6 +2039,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
params.use_mmap = value;
|
||||
}
|
||||
).set_env("LLAMA_ARG_MMAP"));
|
||||
add_opt(common_arg(
|
||||
{"--sync-to-temp"},
|
||||
{"--no-sync-to-temp"},
|
||||
string_format("whether to copy model to temporary directory before loading (default: %s)", params.sync_to_temp ? "enabled" : "disabled"),
|
||||
[](common_params & params, bool value) {
|
||||
params.sync_to_temp = value;
|
||||
}
|
||||
).set_env("LLAMA_ARG_SYNC_TO_TEMP"));
|
||||
add_opt(common_arg(
|
||||
{"--numa"}, "TYPE",
|
||||
"attempt optimizations that help on some NUMA systems\n"
|
||||
|
||||
@@ -1353,6 +1353,7 @@ struct llama_model_params common_model_params_to_llama(common_params & params) {
|
||||
mparams.check_tensors = params.check_tensors;
|
||||
mparams.use_extra_bufts = !params.no_extra_bufts;
|
||||
mparams.no_host = params.no_host;
|
||||
mparams.sync_to_temp = params.sync_to_temp;
|
||||
|
||||
if (params.kv_overrides.empty()) {
|
||||
mparams.kv_overrides = NULL;
|
||||
|
||||
@@ -430,6 +430,7 @@ struct common_params {
|
||||
bool no_op_offload = false; // globally disable offload host tensor operations to device
|
||||
bool no_extra_bufts = false; // disable extra buffer types (used for weight repacking)
|
||||
bool no_host = false; // bypass host buffer allowing extra buffers to be used
|
||||
bool sync_to_temp = false; // copy model to temporary directory before loading
|
||||
|
||||
bool single_turn = false; // single turn chat conversation
|
||||
|
||||
|
||||
Reference in New Issue
Block a user