metal : refactor + optimize v2 (#15995)

* metal : improve naming

* metal : refactor device

ggml-ci

* cont : props

ggml-ci

* metal : apply ggml_mem_ranges_t

ggml-ci

* metal : remove GGML_METAL_USE_BF16

ggml-ci

* metal : refactor device buffer

ggml-ci

* cont : fix naming

* metal : sync before destroying the backend

ggml-ci

* metal : refactor context

ggml-ci

* metal : migrate ggml-metal.m to ggml-metal.cpp

ggml-ci

* metal : adjust ops API

ggml-ci

* metal : use C++ to store piplienes

ggml-ci

* metal : migrate ops to separate functions

ggml-ci

* metal : add ggml_metal_library_t

ggml-ci

* metal : improve naming

ggml-ci

* metal : cleanp

ggml-ci

* metal : add support for GGML_OP_LOG

ggml-ci

* metal : fix error handling

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-09-17 20:38:12 +03:00
committed by GitHub
parent a7a98e0fff
commit 0320ac5264
19 changed files with 7873 additions and 7184 deletions

View File

@@ -25,27 +25,27 @@ enum ggml_mem_range_type {
// can be added to the set without violating the constraints (i.e. if it can be executed concurrently with the
// tasks already in the set)
//
struct ggml_mem_ranges;
typedef struct ggml_mem_ranges * ggml_mem_ranges_t;
struct ggml_mem_ranges * ggml_mem_ranges_init(int debug);
void ggml_mem_ranges_free(struct ggml_mem_ranges * mrs);
ggml_mem_ranges_t ggml_mem_ranges_init(int debug);
void ggml_mem_ranges_free(ggml_mem_ranges_t mrs);
// remove all ranges from the set
void ggml_mem_ranges_reset(struct ggml_mem_ranges * mrs);
void ggml_mem_ranges_reset(ggml_mem_ranges_t mrs);
// add src or dst ranges to track
bool ggml_mem_ranges_add(struct ggml_mem_ranges * mrs, const struct ggml_tensor * tensor);
bool ggml_mem_ranges_add(ggml_mem_ranges_t mrs, const struct ggml_tensor * tensor);
// return false if:
// - new src range overlaps with any existing dst range
// - new dst range overlaps with any existing range (src or dst)
bool ggml_mem_ranges_check(const struct ggml_mem_ranges * mrs, const struct ggml_tensor * tensor);
bool ggml_mem_ranges_check(ggml_mem_ranges_t mrs, const struct ggml_tensor * tensor);
// reorder the nodes in the graph to improve concurrency, while respecting fusion
//
// note: this implementation is generic and not specific to metal
// if it proves to work well, we can start using it for other backends in the future
void ggml_metal_graph_optimize(struct ggml_cgraph * gf);
void ggml_graph_optimize(struct ggml_cgraph * gf);
#ifdef __cplusplus
}