sync from b7516
This commit is contained in:
@@ -26,16 +26,16 @@ int main(int argc, char ** argv) {
|
||||
llama_numa_init(params.numa);
|
||||
auto mparams = common_model_params_to_llama(params);
|
||||
auto cparams = common_context_params_to_llama(params);
|
||||
const llama_params_fit_status status = llama_params_fit(params.model.path.c_str(), &mparams, &cparams,
|
||||
params.tensor_split, params.tensor_buft_overrides.data(), params.fit_params_target.data(), params.fit_params_min_ctx,
|
||||
const bool success = llama_params_fit(params.model.path.c_str(), &mparams, &cparams,
|
||||
params.tensor_split, params.tensor_buft_overrides.data(), params.fit_params_target, params.fit_params_min_ctx,
|
||||
params.verbosity >= 4 ? GGML_LOG_LEVEL_DEBUG : GGML_LOG_LEVEL_ERROR);
|
||||
if (status != LLAMA_PARAMS_FIT_STATUS_SUCCESS) {
|
||||
if (!success) {
|
||||
LOG_ERR("%s: failed to fit CLI arguments to free memory, exiting...\n", __func__);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
LOG_INF("%s: printing fitted CLI arguments to stdout...\n", __func__);
|
||||
common_log_flush(common_log_main());
|
||||
std::this_thread::sleep_for(10ms); // to avoid a race between stderr and stdout
|
||||
printf("-c %" PRIu32 " -ngl %" PRIu32, cparams.n_ctx, mparams.n_gpu_layers);
|
||||
|
||||
size_t nd = llama_max_devices();
|
||||
|
||||
Reference in New Issue
Block a user