enginex-ascend-910-llama.cpp/ggml/src/ggml-metal/ggml-metal-common.h

// helper functions for ggml-metal that are too difficult to implement in Objective-C

#pragma once

#include <stdbool.h>

#ifdef __cplusplus
extern "C" {
#endif

struct ggml_tensor;
struct ggml_cgraph;

enum ggml_mem_range_type {
    MEM_RANGE_TYPE_SRC = 0,
    MEM_RANGE_TYPE_DST = 1,
};

// a helper object that can be used for reordering operations to improve concurrency
//
// the fundamental idea is that a set of tasks (either ggml ops, or something else) can run concurrently if they
//   don't write to a memory that is being read by another task or written to by another task in the set
//
// with this structure, we can add tasks to the set, setting memory constraints. we can also check if a new task
//   can be added to the set without violating the constraints (i.e. if it can be executed concurrently with the
//   tasks already in the set)
//
typedef struct ggml_mem_ranges * ggml_mem_ranges_t;

ggml_mem_ranges_t ggml_mem_ranges_init(int debug);
void ggml_mem_ranges_free(ggml_mem_ranges_t mrs);

// remove all ranges from the set
void ggml_mem_ranges_reset(ggml_mem_ranges_t mrs);

// add src or dst ranges to track
bool ggml_mem_ranges_add(ggml_mem_ranges_t mrs, const struct ggml_tensor * tensor);

// return false if:
// - new src range overlaps with any existing dst range
// - new dst range overlaps with any existing range (src or dst)
bool ggml_mem_ranges_check(ggml_mem_ranges_t mrs, const struct ggml_tensor * tensor);

// reorder the nodes in the graph to improve concurrency, while respecting fusion
//
// note: this implementation is generic and not specific to metal
//       if it proves to work well, we can start using it for other backends in the future
void ggml_graph_optimize(struct ggml_cgraph * gf);

#ifdef __cplusplus
}
#endif
metal : allow ops to run concurrently (#15929) * metal : run graphs ops concurrently ggml-ci * cont : add flags for debugging and disabling concurrency ggml-ci * cont : refactor and handle fusing ggml-ci * cont : simplify - no need to use GPU address ggml-ci * cont : prepare mem ranges for reuse + add ggml-metal-common.cpp ggml-ci * cont : avoid redundant keywords in cpp [no ci] * metal : reorder graph for better concurrency ggml-ci * metal : fix race on mem pool buffers ggml-ci * cont : add env GGML_METAL_GRAPH_OPTIMIZE_DISABLE ggml-ci * cont : refactor, optimize, add comments ggml-ci * cont : refactor ggml-metal.m ggml-ci * minor : update logs [no ci] 2025-09-13 13:54:28 +03:00			`// helper functions for ggml-metal that are too difficult to implement in Objective-C`

			`#pragma once`

			`#include <stdbool.h>`

			`#ifdef __cplusplus`
			`extern "C" {`
			`#endif`

			`struct ggml_tensor;`
			`struct ggml_cgraph;`

			`enum ggml_mem_range_type {`
			`MEM_RANGE_TYPE_SRC = 0,`
			`MEM_RANGE_TYPE_DST = 1,`
			`};`

			`// a helper object that can be used for reordering operations to improve concurrency`
			`//`
			`// the fundamental idea is that a set of tasks (either ggml ops, or something else) can run concurrently if they`
			`// don't write to a memory that is being read by another task or written to by another task in the set`
			`//`
			`// with this structure, we can add tasks to the set, setting memory constraints. we can also check if a new task`
			`// can be added to the set without violating the constraints (i.e. if it can be executed concurrently with the`
			`// tasks already in the set)`
			`//`
metal : refactor + optimize v2 (#15995) * metal : improve naming * metal : refactor device ggml-ci * cont : props ggml-ci * metal : apply ggml_mem_ranges_t ggml-ci * metal : remove GGML_METAL_USE_BF16 ggml-ci * metal : refactor device buffer ggml-ci * cont : fix naming * metal : sync before destroying the backend ggml-ci * metal : refactor context ggml-ci * metal : migrate ggml-metal.m to ggml-metal.cpp ggml-ci * metal : adjust ops API ggml-ci * metal : use C++ to store piplienes ggml-ci * metal : migrate ops to separate functions ggml-ci * metal : add ggml_metal_library_t ggml-ci * metal : improve naming ggml-ci * metal : cleanp ggml-ci * metal : add support for GGML_OP_LOG ggml-ci * metal : fix error handling ggml-ci 2025-09-17 20:38:12 +03:00			`typedef struct ggml_mem_ranges * ggml_mem_ranges_t;`
metal : allow ops to run concurrently (#15929) * metal : run graphs ops concurrently ggml-ci * cont : add flags for debugging and disabling concurrency ggml-ci * cont : refactor and handle fusing ggml-ci * cont : simplify - no need to use GPU address ggml-ci * cont : prepare mem ranges for reuse + add ggml-metal-common.cpp ggml-ci * cont : avoid redundant keywords in cpp [no ci] * metal : reorder graph for better concurrency ggml-ci * metal : fix race on mem pool buffers ggml-ci * cont : add env GGML_METAL_GRAPH_OPTIMIZE_DISABLE ggml-ci * cont : refactor, optimize, add comments ggml-ci * cont : refactor ggml-metal.m ggml-ci * minor : update logs [no ci] 2025-09-13 13:54:28 +03:00
metal : refactor + optimize v2 (#15995) * metal : improve naming * metal : refactor device ggml-ci * cont : props ggml-ci * metal : apply ggml_mem_ranges_t ggml-ci * metal : remove GGML_METAL_USE_BF16 ggml-ci * metal : refactor device buffer ggml-ci * cont : fix naming * metal : sync before destroying the backend ggml-ci * metal : refactor context ggml-ci * metal : migrate ggml-metal.m to ggml-metal.cpp ggml-ci * metal : adjust ops API ggml-ci * metal : use C++ to store piplienes ggml-ci * metal : migrate ops to separate functions ggml-ci * metal : add ggml_metal_library_t ggml-ci * metal : improve naming ggml-ci * metal : cleanp ggml-ci * metal : add support for GGML_OP_LOG ggml-ci * metal : fix error handling ggml-ci 2025-09-17 20:38:12 +03:00			`ggml_mem_ranges_t ggml_mem_ranges_init(int debug);`
			`void ggml_mem_ranges_free(ggml_mem_ranges_t mrs);`
metal : allow ops to run concurrently (#15929) * metal : run graphs ops concurrently ggml-ci * cont : add flags for debugging and disabling concurrency ggml-ci * cont : refactor and handle fusing ggml-ci * cont : simplify - no need to use GPU address ggml-ci * cont : prepare mem ranges for reuse + add ggml-metal-common.cpp ggml-ci * cont : avoid redundant keywords in cpp [no ci] * metal : reorder graph for better concurrency ggml-ci * metal : fix race on mem pool buffers ggml-ci * cont : add env GGML_METAL_GRAPH_OPTIMIZE_DISABLE ggml-ci * cont : refactor, optimize, add comments ggml-ci * cont : refactor ggml-metal.m ggml-ci * minor : update logs [no ci] 2025-09-13 13:54:28 +03:00
			`// remove all ranges from the set`
metal : refactor + optimize v2 (#15995) * metal : improve naming * metal : refactor device ggml-ci * cont : props ggml-ci * metal : apply ggml_mem_ranges_t ggml-ci * metal : remove GGML_METAL_USE_BF16 ggml-ci * metal : refactor device buffer ggml-ci * cont : fix naming * metal : sync before destroying the backend ggml-ci * metal : refactor context ggml-ci * metal : migrate ggml-metal.m to ggml-metal.cpp ggml-ci * metal : adjust ops API ggml-ci * metal : use C++ to store piplienes ggml-ci * metal : migrate ops to separate functions ggml-ci * metal : add ggml_metal_library_t ggml-ci * metal : improve naming ggml-ci * metal : cleanp ggml-ci * metal : add support for GGML_OP_LOG ggml-ci * metal : fix error handling ggml-ci 2025-09-17 20:38:12 +03:00			`void ggml_mem_ranges_reset(ggml_mem_ranges_t mrs);`
metal : allow ops to run concurrently (#15929) * metal : run graphs ops concurrently ggml-ci * cont : add flags for debugging and disabling concurrency ggml-ci * cont : refactor and handle fusing ggml-ci * cont : simplify - no need to use GPU address ggml-ci * cont : prepare mem ranges for reuse + add ggml-metal-common.cpp ggml-ci * cont : avoid redundant keywords in cpp [no ci] * metal : reorder graph for better concurrency ggml-ci * metal : fix race on mem pool buffers ggml-ci * cont : add env GGML_METAL_GRAPH_OPTIMIZE_DISABLE ggml-ci * cont : refactor, optimize, add comments ggml-ci * cont : refactor ggml-metal.m ggml-ci * minor : update logs [no ci] 2025-09-13 13:54:28 +03:00
			`// add src or dst ranges to track`
metal : refactor + optimize v2 (#15995) * metal : improve naming * metal : refactor device ggml-ci * cont : props ggml-ci * metal : apply ggml_mem_ranges_t ggml-ci * metal : remove GGML_METAL_USE_BF16 ggml-ci * metal : refactor device buffer ggml-ci * cont : fix naming * metal : sync before destroying the backend ggml-ci * metal : refactor context ggml-ci * metal : migrate ggml-metal.m to ggml-metal.cpp ggml-ci * metal : adjust ops API ggml-ci * metal : use C++ to store piplienes ggml-ci * metal : migrate ops to separate functions ggml-ci * metal : add ggml_metal_library_t ggml-ci * metal : improve naming ggml-ci * metal : cleanp ggml-ci * metal : add support for GGML_OP_LOG ggml-ci * metal : fix error handling ggml-ci 2025-09-17 20:38:12 +03:00			`bool ggml_mem_ranges_add(ggml_mem_ranges_t mrs, const struct ggml_tensor * tensor);`
metal : allow ops to run concurrently (#15929) * metal : run graphs ops concurrently ggml-ci * cont : add flags for debugging and disabling concurrency ggml-ci * cont : refactor and handle fusing ggml-ci * cont : simplify - no need to use GPU address ggml-ci * cont : prepare mem ranges for reuse + add ggml-metal-common.cpp ggml-ci * cont : avoid redundant keywords in cpp [no ci] * metal : reorder graph for better concurrency ggml-ci * metal : fix race on mem pool buffers ggml-ci * cont : add env GGML_METAL_GRAPH_OPTIMIZE_DISABLE ggml-ci * cont : refactor, optimize, add comments ggml-ci * cont : refactor ggml-metal.m ggml-ci * minor : update logs [no ci] 2025-09-13 13:54:28 +03:00
			`// return false if:`
			`// - new src range overlaps with any existing dst range`
			`// - new dst range overlaps with any existing range (src or dst)`
metal : refactor + optimize v2 (#15995) * metal : improve naming * metal : refactor device ggml-ci * cont : props ggml-ci * metal : apply ggml_mem_ranges_t ggml-ci * metal : remove GGML_METAL_USE_BF16 ggml-ci * metal : refactor device buffer ggml-ci * cont : fix naming * metal : sync before destroying the backend ggml-ci * metal : refactor context ggml-ci * metal : migrate ggml-metal.m to ggml-metal.cpp ggml-ci * metal : adjust ops API ggml-ci * metal : use C++ to store piplienes ggml-ci * metal : migrate ops to separate functions ggml-ci * metal : add ggml_metal_library_t ggml-ci * metal : improve naming ggml-ci * metal : cleanp ggml-ci * metal : add support for GGML_OP_LOG ggml-ci * metal : fix error handling ggml-ci 2025-09-17 20:38:12 +03:00			`bool ggml_mem_ranges_check(ggml_mem_ranges_t mrs, const struct ggml_tensor * tensor);`
metal : allow ops to run concurrently (#15929) * metal : run graphs ops concurrently ggml-ci * cont : add flags for debugging and disabling concurrency ggml-ci * cont : refactor and handle fusing ggml-ci * cont : simplify - no need to use GPU address ggml-ci * cont : prepare mem ranges for reuse + add ggml-metal-common.cpp ggml-ci * cont : avoid redundant keywords in cpp [no ci] * metal : reorder graph for better concurrency ggml-ci * metal : fix race on mem pool buffers ggml-ci * cont : add env GGML_METAL_GRAPH_OPTIMIZE_DISABLE ggml-ci * cont : refactor, optimize, add comments ggml-ci * cont : refactor ggml-metal.m ggml-ci * minor : update logs [no ci] 2025-09-13 13:54:28 +03:00
			`// reorder the nodes in the graph to improve concurrency, while respecting fusion`
			`//`
			`// note: this implementation is generic and not specific to metal`
			`// if it proves to work well, we can start using it for other backends in the future`
metal : refactor + optimize v2 (#15995) * metal : improve naming * metal : refactor device ggml-ci * cont : props ggml-ci * metal : apply ggml_mem_ranges_t ggml-ci * metal : remove GGML_METAL_USE_BF16 ggml-ci * metal : refactor device buffer ggml-ci * cont : fix naming * metal : sync before destroying the backend ggml-ci * metal : refactor context ggml-ci * metal : migrate ggml-metal.m to ggml-metal.cpp ggml-ci * metal : adjust ops API ggml-ci * metal : use C++ to store piplienes ggml-ci * metal : migrate ops to separate functions ggml-ci * metal : add ggml_metal_library_t ggml-ci * metal : improve naming ggml-ci * metal : cleanp ggml-ci * metal : add support for GGML_OP_LOG ggml-ci * metal : fix error handling ggml-ci 2025-09-17 20:38:12 +03:00			`void ggml_graph_optimize(struct ggml_cgraph * gf);`
metal : allow ops to run concurrently (#15929) * metal : run graphs ops concurrently ggml-ci * cont : add flags for debugging and disabling concurrency ggml-ci * cont : refactor and handle fusing ggml-ci * cont : simplify - no need to use GPU address ggml-ci * cont : prepare mem ranges for reuse + add ggml-metal-common.cpp ggml-ci * cont : avoid redundant keywords in cpp [no ci] * metal : reorder graph for better concurrency ggml-ci * metal : fix race on mem pool buffers ggml-ci * cont : add env GGML_METAL_GRAPH_OPTIMIZE_DISABLE ggml-ci * cont : refactor, optimize, add comments ggml-ci * cont : refactor ggml-metal.m ggml-ci * minor : update logs [no ci] 2025-09-13 13:54:28 +03:00
			`#ifdef __cplusplus`
			`}`
			`#endif`