metal : allow ops to run concurrently (#15929)
* metal : run graphs ops concurrently ggml-ci * cont : add flags for debugging and disabling concurrency ggml-ci * cont : refactor and handle fusing ggml-ci * cont : simplify - no need to use GPU address ggml-ci * cont : prepare mem ranges for reuse + add ggml-metal-common.cpp ggml-ci * cont : avoid redundant keywords in cpp [no ci] * metal : reorder graph for better concurrency ggml-ci * metal : fix race on mem pool buffers ggml-ci * cont : add env GGML_METAL_GRAPH_OPTIMIZE_DISABLE ggml-ci * cont : refactor, optimize, add comments ggml-ci * cont : refactor ggml-metal.m ggml-ci * minor : update logs [no ci]
This commit is contained in:
52
ggml/src/ggml-metal/ggml-metal-common.h
Normal file
52
ggml/src/ggml-metal/ggml-metal-common.h
Normal file
@@ -0,0 +1,52 @@
|
||||
// helper functions for ggml-metal that are too difficult to implement in Objective-C
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct ggml_tensor;
|
||||
struct ggml_cgraph;
|
||||
|
||||
enum ggml_mem_range_type {
|
||||
MEM_RANGE_TYPE_SRC = 0,
|
||||
MEM_RANGE_TYPE_DST = 1,
|
||||
};
|
||||
|
||||
// a helper object that can be used for reordering operations to improve concurrency
|
||||
//
|
||||
// the fundamental idea is that a set of tasks (either ggml ops, or something else) can run concurrently if they
|
||||
// don't write to a memory that is being read by another task or written to by another task in the set
|
||||
//
|
||||
// with this structure, we can add tasks to the set, setting memory constraints. we can also check if a new task
|
||||
// can be added to the set without violating the constraints (i.e. if it can be executed concurrently with the
|
||||
// tasks already in the set)
|
||||
//
|
||||
struct ggml_mem_ranges;
|
||||
|
||||
struct ggml_mem_ranges * ggml_mem_ranges_init(int debug);
|
||||
void ggml_mem_ranges_free(struct ggml_mem_ranges * mrs);
|
||||
|
||||
// remove all ranges from the set
|
||||
void ggml_mem_ranges_reset(struct ggml_mem_ranges * mrs);
|
||||
|
||||
// add src or dst ranges to track
|
||||
bool ggml_mem_ranges_add(struct ggml_mem_ranges * mrs, const struct ggml_tensor * tensor);
|
||||
|
||||
// return false if:
|
||||
// - new src range overlaps with any existing dst range
|
||||
// - new dst range overlaps with any existing range (src or dst)
|
||||
bool ggml_mem_ranges_check(const struct ggml_mem_ranges * mrs, const struct ggml_tensor * tensor);
|
||||
|
||||
// reorder the nodes in the graph to improve concurrency, while respecting fusion
|
||||
//
|
||||
// note: this implementation is generic and not specific to metal
|
||||
// if it proves to work well, we can start using it for other backends in the future
|
||||
void ggml_metal_graph_optimize(struct ggml_cgraph * gf);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
Reference in New Issue
Block a user