llama : add option to override model tensor buffers (#11397)

* llama : add option to override tensor buffers

* ggml : fix possible underflow in ggml_nbytes
This commit is contained in:
Diego Devesa
2025-04-02 14:52:01 +02:00
committed by GitHub
parent a10b36c91a
commit e0e912f49b
12 changed files with 108 additions and 9 deletions

View File

@@ -92,7 +92,7 @@ static int llama_model_load(const std::string & fname, std::vector<std::string>
model.t_start_us = tm.t_start_us;
try {
llama_model_loader ml(fname, splits, params.use_mmap, params.check_tensors, params.kv_overrides);
llama_model_loader ml(fname, splits, params.use_mmap, params.check_tensors, params.kv_overrides, params.tensor_buft_overrides);
ml.print_info();