* metal : improve naming * metal : refactor device ggml-ci * cont : props ggml-ci * metal : apply ggml_mem_ranges_t ggml-ci * metal : remove GGML_METAL_USE_BF16 ggml-ci * metal : refactor device buffer ggml-ci * cont : fix naming * metal : sync before destroying the backend ggml-ci * metal : refactor context ggml-ci * metal : migrate ggml-metal.m to ggml-metal.cpp ggml-ci * metal : adjust ops API ggml-ci * metal : use C++ to store piplienes ggml-ci * metal : migrate ops to separate functions ggml-ci * metal : add ggml_metal_library_t ggml-ci * metal : improve naming ggml-ci * metal : cleanp ggml-ci * metal : add support for GGML_OP_LOG ggml-ci * metal : fix error handling ggml-ci
53 lines
1.7 KiB
C
53 lines
1.7 KiB
C
// helper functions for ggml-metal that are too difficult to implement in Objective-C
|
|
|
|
#pragma once
|
|
|
|
#include <stdbool.h>
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
struct ggml_tensor;
|
|
struct ggml_cgraph;
|
|
|
|
enum ggml_mem_range_type {
|
|
MEM_RANGE_TYPE_SRC = 0,
|
|
MEM_RANGE_TYPE_DST = 1,
|
|
};
|
|
|
|
// a helper object that can be used for reordering operations to improve concurrency
|
|
//
|
|
// the fundamental idea is that a set of tasks (either ggml ops, or something else) can run concurrently if they
|
|
// don't write to a memory that is being read by another task or written to by another task in the set
|
|
//
|
|
// with this structure, we can add tasks to the set, setting memory constraints. we can also check if a new task
|
|
// can be added to the set without violating the constraints (i.e. if it can be executed concurrently with the
|
|
// tasks already in the set)
|
|
//
|
|
typedef struct ggml_mem_ranges * ggml_mem_ranges_t;
|
|
|
|
ggml_mem_ranges_t ggml_mem_ranges_init(int debug);
|
|
void ggml_mem_ranges_free(ggml_mem_ranges_t mrs);
|
|
|
|
// remove all ranges from the set
|
|
void ggml_mem_ranges_reset(ggml_mem_ranges_t mrs);
|
|
|
|
// add src or dst ranges to track
|
|
bool ggml_mem_ranges_add(ggml_mem_ranges_t mrs, const struct ggml_tensor * tensor);
|
|
|
|
// return false if:
|
|
// - new src range overlaps with any existing dst range
|
|
// - new dst range overlaps with any existing range (src or dst)
|
|
bool ggml_mem_ranges_check(ggml_mem_ranges_t mrs, const struct ggml_tensor * tensor);
|
|
|
|
// reorder the nodes in the graph to improve concurrency, while respecting fusion
|
|
//
|
|
// note: this implementation is generic and not specific to metal
|
|
// if it proves to work well, we can start using it for other backends in the future
|
|
void ggml_graph_optimize(struct ggml_cgraph * gf);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|