Files
enginex-ascend-910-llama.cpp/ggml/src/CMakeLists.txt

417 lines
15 KiB
CMake
Raw Normal View History

include(CheckCXXCompilerFlag)
include("../cmake/common.cmake")
add_compile_definitions(GGML_SCHED_MAX_COPIES=${GGML_SCHED_MAX_COPIES})
# enable libstdc++ assertions for debug builds
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
endif()
if (NOT MSVC)
if (GGML_SANITIZE_THREAD)
add_compile_options(-fsanitize=thread)
link_libraries (-fsanitize=thread)
endif()
if (GGML_SANITIZE_ADDRESS)
add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
link_libraries (-fsanitize=address)
endif()
if (GGML_SANITIZE_UNDEFINED)
add_compile_options(-fsanitize=undefined)
link_libraries (-fsanitize=undefined)
endif()
endif()
if (GGML_FATAL_WARNINGS)
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
list(APPEND C_FLAGS -Werror)
list(APPEND CXX_FLAGS -Werror)
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
add_compile_options(/WX)
endif()
endif()
if (GGML_ALL_WARNINGS)
if (NOT MSVC)
list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
-Werror=implicit-int -Werror=implicit-function-declaration)
list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
list(APPEND C_FLAGS ${WARNING_FLAGS})
list(APPEND CXX_FLAGS ${WARNING_FLAGS})
ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
"$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
else()
# todo : msvc
set(C_FLAGS "")
set(CXX_FLAGS "")
endif()
endif()
if (GGML_LTO)
include(CheckIPOSupported)
check_ipo_supported(RESULT result OUTPUT output)
if (result)
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
else()
message(WARNING "IPO is not supported: ${output}")
endif()
endif()
if (GGML_CCACHE AND NOT CMAKE_C_COMPILER_LAUNCHER AND NOT CMAKE_CXX_COMPILER_LAUNCHER)
find_program(GGML_CCACHE_FOUND ccache)
find_program(GGML_SCCACHE_FOUND sccache)
if (GGML_CCACHE_FOUND OR GGML_SCCACHE_FOUND)
if(GGML_CCACHE_FOUND)
set(GGML_CCACHE_VARIANT ccache)
else()
set(GGML_CCACHE_VARIANT sccache)
endif()
# TODO: should not be set globally
if (GGML_SYCL AND GGML_CCACHE_FOUND AND WIN32)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "ccache compiler_type=icl")
else ()
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${GGML_CCACHE_VARIANT}")
endif ()
set(ENV{CCACHE_SLOPPINESS} time_macros)
message(STATUS "${GGML_CCACHE_VARIANT} found, compilation results will be cached. Disable with GGML_CCACHE=OFF.")
else()
message(STATUS "Warning: ccache not found - consider installing it for faster compilation or disable this warning with GGML_CCACHE=OFF")
endif ()
endif()
# this version of Apple ld64 is buggy
execute_process(
COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v
ERROR_VARIABLE output
OUTPUT_QUIET
)
if (output MATCHES "dyld-1015\.7")
add_compile_definitions(HAVE_BUGGY_APPLE_LINKER)
endif()
# architecture specific
# TODO: probably these flags need to be tweaked on some architectures
# feel free to update the Makefile for your architecture and send a pull request or issue
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
if (MSVC)
string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR)
message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}")
else ()
set(CMAKE_GENERATOR_PLATFORM_LWR "")
endif ()
ggml_get_system_arch()
message(STATUS "GGML_SYSTEM_ARCH: ${GGML_SYSTEM_ARCH}")
if (NOT MSVC)
if (GGML_STATIC)
add_link_options(-static)
if (MINGW)
add_link_options(-static-libgcc -static-libstdc++)
endif()
endif()
if (GGML_GPROF)
add_compile_options(-pg)
endif()
endif()
if (MINGW)
add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
endif()
#
# POSIX conformance
#
# clock_gettime came in POSIX.1b (1993)
# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
# posix_memalign came in POSIX.1-2001 / SUSv3
# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
# Somehow in OpenBSD whenever POSIX conformance is specified
# some string functions rely on locale_t availability,
# which was introduced in POSIX.1-2008, forcing us to go higher
if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
add_compile_definitions(_XOPEN_SOURCE=700)
else()
add_compile_definitions(_XOPEN_SOURCE=600)
endif()
# Data types, macros and functions related to controlling CPU affinity and
# some memory allocation are available on Linux through GNU extensions in libc
Threadpool: take 2 (#8672) * Introduce ggml_compute_threadpool - OpenMP functional: check - Vanilla ggml functional: Check - ggml w/threadpool functional: Check - OpenMP no regression: No glaring problems - Vanilla ggml no regression: No glaring problems - ggml w/threadpool no regression: No glaring problems * Minor fixes * fixed use after release bug * fixed a harmless race condition * Fix Android bulid issue * fix more race conditions * fix deadlock for cases where cgraph.n_nodes == 1 and fix --poll case * threadpool: use cpu_get_num_math to set the default number of threadpool threads This way we avoid using E-Cores and Hyperthreaded siblings. * bench: create fresh threadpool for each test For benchmarking it's better to start a fresh pool for each test with the exact number of threads needed for that test. Having larger pools is suboptimal (causes more load, etc). * atomics: always use stdatomics with clang and use relaxed memory order when polling in ggml_barrier This also removes sched_yield() calls from ggml_barrier() to match OpenMP behavior. * threadpool: make polling the default to match openmp behavior All command line args now allow for setting poll to 0 (false). * threadpool: do not wakeup threads in already paused threadpool * fix potential race condition in check_for_work * threadpool: do not create two threadpools if their params are identical * threadpool: reduce pause/resume/wakeup overhead in common cases We now start threadpool in paused state only if we have two. The resume is now implicit (ie new work) which allows for reduced locking and context-switch overhead. * threadpool: add support for hybrid polling poll params (--poll, ...) now specify "polling level", i.e. how aggresively we poll before waiting on cond.var. poll=0 means no polling, 1 means poll for 128K rounds then wait, 2 for 256K rounds, ... The default value of 50 (ie 50x128K rounds) seems like a decent default across modern platforms. We can tune this further as things evolve. * threadpool: reduce the number of barrier required New work is now indicated with an atomic counter that is incremented for each new graph that needs to be computed. This removes the need for extra barrier for clearing the "new_work" and removes the special case for trivial graphs. * threadpool: remove special-casing for disposable threadpools With the efficient hybrid polling there is no need to make disposable pools any different. This simplifies the overall logic and reduces branching. Include n_threads in debug print for disposable threadpool. Declare pause and stop flags as atomic_bool This doesn't actually generate any memory barriers and simply informs the thread sanitizer that these flags can be written & read by different threads without locking. * threadpool: do not clear barrier counters between graphs computes (fixes race with small graphs) This fixes the race condition with very small graphs where the main thread happens to start a new graph while the workers are just about to exit from barriers. * threadpool: use relaxed order for chunk sync Full memory barrier is an overkill for this since each thread works on different chunk * threadpool: remove abort_callback from threadpool state * threadpool: better naming for thread/cpumask releated functions * threadpool: consistent use of int type for n_threads params * threadpool: add support for ggml_threadpool_params_default/init Also removes the need for explicit mask_specified param. all-zero cpumask means use default (usually inherited) cpu affinity mask. * threadpool: move typedef into ggml.h * threadpool: fix apply_priority() function name * threadpool: fix swift wrapper errors due to n_threads int type cleanup * threadpool: enable --cpu-mask and other threadpool related options only if threadpool is enabled * threadpool: replace checks for compute_thread ret code with proper status check * threadpool: simplify threadpool init logic and fix main thread affinity application Most of the init code is now exactly the same between threadpool and openmp. * threadpool: update threadpool resume/pause function names * threadpool: enable openmp by default for now * threadpool: don't forget to free workers state when omp is enabled * threadpool: avoid updating process priority on the platforms that do not require it On Windows we need to change overall process priority class in order to set thread priorities, but on Linux, Mac, etc we do not need to touch the overall process settings. * threadpool: update calling thread prio and affinity only at start/resume This avoids extra syscalls for each graph_compute() * llama-bench: turn threadpool params into vectors, add output headers, etc * llama-bench: add support for cool off between tests --delay This helps for long running tests on platforms that are thermally limited (phones, laptops, etc). --delay (disabled by default) introduces the sleep for N seconds before starting each test. * threadpool: move process priority setting into the apps (bench and cli) This avoids changing the overall process priority on Windows for the apps that use ggml/llama.cpp directy. * threadpool: move all pause/resume logic into ggml * threadpool: futher api cleanup and prep for future refactoring All threadpool related functions and structs use ggml_threadpool prefix. * threadpool: minor indent fixes * threadpool: improve setprioty error message * Update examples/llama-bench/llama-bench.cpp Co-authored-by: slaren <slarengh@gmail.com> * threadpool: fix indent in set_threadpool call * use int32_t for n_thread type in public llama.cpp API * threadpool: use _new and _free instead of _create and _release * fix two more public APIs to use int32_t for n_threads * build: set _GNU_SOURCE for Adroid --------- Co-authored-by: Max Krasnyansky <quic_maxk@quicinc.com> Co-authored-by: fmz <quic_fzaghlou@quic.com> Co-authored-by: Max Krasnyansky <max.krasnyansky@gmail.com> Co-authored-by: slaren <slarengh@gmail.com>
2024-08-29 19:20:53 -04:00
if (CMAKE_SYSTEM_NAME MATCHES "Linux" OR CMAKE_SYSTEM_NAME MATCHES "Android")
add_compile_definitions(_GNU_SOURCE)
endif()
# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
# and on macOS its availability depends on enabling Darwin extensions
# similarly on DragonFly, enabling BSD extensions is necessary
if (
CMAKE_SYSTEM_NAME MATCHES "Darwin" OR
CMAKE_SYSTEM_NAME MATCHES "iOS" OR
CMAKE_SYSTEM_NAME MATCHES "tvOS" OR
CMAKE_SYSTEM_NAME MATCHES "DragonFly"
)
add_compile_definitions(_DARWIN_C_SOURCE)
endif()
# alloca is a non-standard interface that is not visible on BSDs when
# POSIX conformance is specified, but not all of them provide a clean way
# to enable it in such cases
if (CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
add_compile_definitions(__BSD_VISIBLE)
endif()
if (CMAKE_SYSTEM_NAME MATCHES "NetBSD")
add_compile_definitions(_NETBSD_SOURCE)
endif()
if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD")
add_compile_definitions(_BSD_SOURCE)
endif()
if (WIN32)
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
endif()
# ggml
if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS)
message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS")
endif()
add_library(ggml-base
../include/ggml.h
../include/ggml-alloc.h
../include/ggml-backend.h
../include/ggml-cpp.h
../include/ggml-opt.h
../include/gguf.h
ggml.c
ggml.cpp
ggml-alloc.c
ggml-backend.cpp
ggml-opt.cpp
ggml-threading.cpp
ggml-threading.h
ggml-quants.c
ggml-quants.h
gguf.cpp)
target_include_directories(ggml-base PRIVATE .)
if (GGML_BACKEND_DL)
target_compile_definitions(ggml-base PUBLIC GGML_BACKEND_DL)
endif()
add_library(ggml
ggml-backend-reg.cpp)
add_library(ggml::ggml ALIAS ggml)
if (GGML_BACKEND_DIR)
if (NOT GGML_BACKEND_DL)
message(FATAL_ERROR "GGML_BACKEND_DIR requires GGML_BACKEND_DL")
endif()
target_compile_definitions(ggml PUBLIC GGML_BACKEND_DIR="${GGML_BACKEND_DIR}")
endif()
target_link_libraries(ggml PUBLIC ggml-base)
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
target_link_libraries(ggml PRIVATE dl)
endif()
function(ggml_add_backend_library backend)
if (GGML_BACKEND_DL)
add_library(${backend} MODULE ${ARGN})
# write the shared library to the output directory
set_target_properties(${backend} PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_DL)
add_dependencies(ggml ${backend})
if (GGML_BACKEND_DIR)
install(TARGETS ${backend} LIBRARY DESTINATION ${GGML_BACKEND_DIR})
else()
install(TARGETS ${backend} LIBRARY DESTINATION ${CMAKE_INSTALL_BINDIR})
endif()
else()
add_library(${backend} ${ARGN})
target_link_libraries(ggml PUBLIC ${backend})
install(TARGETS ${backend} LIBRARY)
endif()
target_link_libraries(${backend} PRIVATE ggml-base)
target_include_directories(${backend} PRIVATE ..)
if (${BUILD_SHARED_LIBS})
target_compile_definitions(${backend} PRIVATE GGML_BACKEND_BUILD)
target_compile_definitions(${backend} PUBLIC GGML_BACKEND_SHARED)
endif()
if(NOT GGML_AVAILABLE_BACKENDS)
set(GGML_AVAILABLE_BACKENDS "${backend}"
CACHE INTERNAL "List of backends for cmake package")
else()
list(FIND GGML_AVAILABLE_BACKENDS "${backend}" has_backend)
if(has_backend EQUAL -1)
set(GGML_AVAILABLE_BACKENDS "${GGML_AVAILABLE_BACKENDS};${backend}"
CACHE INTERNAL "List of backends for cmake package")
endif()
endif()
endfunction()
function(ggml_add_backend backend)
string(TOUPPER "GGML_${backend}" backend_id)
if (${backend_id})
string(TOLOWER "ggml-${backend}" backend_target)
add_subdirectory(${backend_target})
message(STATUS "Including ${backend} backend")
if (NOT GGML_BACKEND_DL)
string(TOUPPER "GGML_USE_${backend}" backend_use)
target_compile_definitions(ggml PUBLIC ${backend_use})
endif()
endif()
endfunction()
function(ggml_add_cpu_backend_variant tag_name)
set(GGML_CPU_TAG_NAME ${tag_name})
# other: OPENMP LLAMAFILE CPU_HBM
if (GGML_SYSTEM_ARCH STREQUAL "x86")
foreach (feat NATIVE
SSE42
AVX AVX2 BMI2 AVX_VNNI FMA F16C
AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16
AMX_TILE AMX_INT8 AMX_BF16)
set(GGML_${feat} OFF)
endforeach()
foreach (feat ${ARGN})
set(GGML_${feat} ON)
endforeach()
elseif (GGML_SYSTEM_ARCH STREQUAL "ARM")
foreach (feat ${ARGN})
set(GGML_INTERNAL_${feat} ON)
endforeach()
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
foreach (feat ${ARGN})
set(GGML_INTERNAL_${feat} ON)
endforeach()
endif()
ggml_add_cpu_backend_variant_impl(${tag_name})
endfunction()
ggml_add_backend(CPU)
if (GGML_CPU_ALL_VARIANTS)
if (NOT GGML_BACKEND_DL)
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
elseif (GGML_CPU_ARM_ARCH)
message(FATAL_ERROR "Cannot use both GGML_CPU_ARM_ARCH and GGML_CPU_ALL_VARIANTS")
endif()
if (GGML_SYSTEM_ARCH STREQUAL "x86")
ggml_add_cpu_backend_variant(x64)
ggml_add_cpu_backend_variant(sse42 SSE42)
ggml_add_cpu_backend_variant(sandybridge SSE42 AVX)
ggml_add_cpu_backend_variant(haswell SSE42 AVX F16C AVX2 BMI2 FMA)
ggml_add_cpu_backend_variant(skylakex SSE42 AVX F16C AVX2 BMI2 FMA AVX512)
ggml_add_cpu_backend_variant(icelake SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI)
ggml_add_cpu_backend_variant(alderlake SSE42 AVX F16C AVX2 BMI2 FMA AVX_VNNI)
if (NOT MSVC)
# MSVC doesn't support AMX
ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8)
endif()
elseif(GGML_SYSTEM_ARCH STREQUAL "ARM")
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
# Many of these features are optional so we build versions with popular
# combinations and name the backends based on the version they were
# first released with
ggml_add_cpu_backend_variant(armv8.0_1)
ggml_add_cpu_backend_variant(armv8.2_1 DOTPROD)
ggml_add_cpu_backend_variant(armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC)
ggml_add_cpu_backend_variant(armv8.2_3 DOTPROD FP16_VECTOR_ARITHMETIC SVE)
ggml_add_cpu_backend_variant(armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8)
ggml_add_cpu_backend_variant(armv8.6_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2)
ggml_add_cpu_backend_variant(armv9.2_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SME)
ggml_add_cpu_backend_variant(armv9.2_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2 SME)
elseif (CMAKE_SYSTEM_NAME MATCHES "Android")
# Android-specific backends with SoC-compatible feature sets
ggml_add_cpu_backend_variant(android_armv8.0_1)
ggml_add_cpu_backend_variant(android_armv8.2_1 DOTPROD)
ggml_add_cpu_backend_variant(android_armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC)
ggml_add_cpu_backend_variant(android_armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC MATMUL_INT8)
elseif (APPLE)
ggml_add_cpu_backend_variant(apple_m1 DOTPROD)
ggml_add_cpu_backend_variant(apple_m2_m3 DOTPROD MATMUL_INT8)
ggml_add_cpu_backend_variant(apple_m4 DOTPROD MATMUL_INT8 NOSVE SME)
else()
message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}")
endif()
elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC")
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
ggml_add_cpu_backend_variant(power0)
ggml_add_cpu_backend_variant(power7_1 POWER7)
ggml_add_cpu_backend_variant(power7_2 POWER7 VSX)
ggml_add_cpu_backend_variant(power8_1 POWER8)
ggml_add_cpu_backend_variant(power8_2 POWER8 VSX)
ggml_add_cpu_backend_variant(power9 POWER9 VSX)
ggml_add_cpu_backend_variant(power10 POWER10 VSX)
ggml_add_cpu_backend_variant(power11 POWER11 VSX)
else()
message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}")
endif()
else()
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}")
endif()
elseif (GGML_CPU)
ggml_add_cpu_backend_variant_impl("")
endif()
ggml_add_backend(BLAS)
ggml_add_backend(CANN)
ggml_add_backend(CUDA)
ggml_add_backend(HIP)
ggml_add_backend(METAL)
ggml_add_backend(MUSA)
ggml_add_backend(RPC)
ggml_add_backend(SYCL)
ggml_add_backend(Vulkan)
ggml_add_backend(WebGPU)
ggml: initial IBM zDNN backend (#14975) * ggml-zdnn: inital backend impl Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> ggml-zdnn: temp change z17 to arch15 Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> ggml-zdnn: fix build bugs Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: tensor->extra logging check Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> ggml-zdnn: add layout name mapping, ztensor information Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> ggml-zdnn: separate logging into its own line Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> ggml-zdnn: add shape comparison Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> ggml-zdnn: add ggml_tensor shape log Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> ggml-zdnn: fix incorrect shape logging Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add output buffer check Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: run compute and store into tensor->extra Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add set_tensor Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add more loggers Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: update set_tensor logging to check only for matmul Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: last working matmul version Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add comments to prevent accidentally deleting lines Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: support op out_prod Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: update op out_prod to use tensor->extra Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: rewrite the backend implementation Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: bugfix new impl Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: fix compiler warnings and bugfixes Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: test ztensor finding in init_tensor Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: implement at least 1 op to test Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: assign tensor->extra to buffer Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add check for view tensors to prevent init_tensor Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: rework init_tensor to create new buffers Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: switch to std vector instead of array Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: switch buffers back and set to arbitrary number Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: impl init_tensor Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: update supports_op matmul matrix Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: fix incorrect ztensor shape, reduce memory padding Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: code clean up Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: impl matmul Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: fix compiler error missing type Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: fix missing data transform call Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add bias init_tensor Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: tighten memory usage, change string allocation Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add bias ztensor and data free Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add bias data transform Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add more debug info for extra buffer transform Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add logger to check if mat mul ops go through set_tensor Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: activate bias transform in matmul Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: move weights transform into mulmat Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add more safeguards in matmul Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: fix sequencing of transforms Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: bugfix transform ztensor vs origtensor Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: figure out why sigtrap is happening Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: fix sigsegv Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: move everything back to local declaration Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: move bias data to local also Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: bring back working matmul Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: rewrite into mre Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: fix missing vector import Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: fix missing vector import in header Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: attempt to fix sigsegv Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: fix missing load tensor Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: fix invalid ztensor buffer release Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add logging to debug free buffer Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: remove free_buffer debug info Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add parmblkformat detections Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add nnpa installed detection Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add zdnn_init call for static libs Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add init_tensor Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: attempt at fixing invalid buffer Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: switch to using deque to fix pointer deref problem Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add weights logging to check Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: attempt to use unique ptr Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add tensor to pre_tfm_desc logging Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add inputs logging Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: disable op_none initialisation for testing Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: fix missing return from init_tensor Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: load ztensors in cgraph exec Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: work on moving output ztensor as well Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: disable logging and breakpoints for full test Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: attempt at manually changing the layout Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: attempt at using default nwhc format instead Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: disable global load ztensor for now Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: fix errorenous output load tensor Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: add guards to prevent loading ztensor if transformed Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: code cleanup Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: bring load ztensor back to init routine Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: code clean up Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: fix ztensor deallocation abort stabilise ggml <-> zdnn api Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: clean up matmul selection Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: clean up project structure Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: update documentation, prepare for upstream Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * chore: add codeowners Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: disable batched matmul Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: attempt at fixing tensor views during matmul Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: deny all view tensors directly Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: fix pr comments Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * docs: update ops docs for zdnn Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: redo test-backend-ops for ops.md Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: fix typo in build-s390x.md Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * codeowners: remove taronaeo for now Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * Revert "codeowners: remove taronaeo for now" This reverts commit 411ea4ed78d08778967bd0bd33a6538cfcbe082f. * ggml-zdnn: remove unused ggml_zdnn macro Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> --------- Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
2025-08-15 21:11:22 +08:00
ggml_add_backend(zDNN)
Introducing experimental OpenCL backend with support for Qualcomm Adreno GPUs (#10693) * [cl][adreno] Add Adreno GPU support Add new OpenCL backend to support Adreno GPUs --------- Co-authored-by: Skyler Szot <quic_sszot@quicinc.com> Co-authored-by: Shangqing Gu <quic_shawngu@quicinc.com> Co-authored-by: Alexander Angus <quic_aangus@quicinc.com> Co-authored-by: Hongqiang Wang <quic_wangh@quicinc.com> Co-authored-by: Max Krasnyansky <quic_maxk@quicinc.com> * [cl][ci] Add workflow for CL * [cl][adreno] Fix memory leak for non SMALL_ALLOC path * opencl: integrate backend dyn.load interface and fix compiler and format warnings * opencl: remove small-alloc support and fix build errors for non-opencl platforms * opencl: fixed merge conflict (MUSA added twice in cmake) * opencl-ci: use RUNNER_TEMP instead of github.workspace * opencl: fix embed tool invocation with python3 * opencl: CI workflow fixes * opencl: Clean up small-alloc in CMake files * opencl: cleanup ggml-opencl2 header file * opencl: use ulong for offsets and strides in ADD kernel * opencl: use cl_ulong for all offsets * opencl: use cl_ulong for sizes and strides * opencl: use `GGML_LOG_xxx` instead of `fprintf(stderr, ...)` * opencl: rename backend `opencl2` -> `opencl` * opencl: rename kernel files `ggml-opencl2` -> `ggml-opencl` * opencl: make OpenCL required, remove redundant lib and inc directories * `ggml-base`, `..` and `.` are added by `ggml_add_backend_library` * opencl: rename backend - funcs, structs, etc `opencl2` -> `opencl` * opencl: remove copyright marker since main license already covers * opencl: replace some more OPENCL2 leftovers * opencl: remove limits on `tensor_extra` * opencl: use pools for `tensor_extra` * opencl: fix compiler warnings with GCC and Clang Still getting the warning about clCreateCmdQueue being obsolete. Will fix that separately. * opencl: fail gracefully if opencl devices are not available Also for unsupported GPUs. * opencl: fix MSVC builds (string length error) * opencl: check for various requirements, allow deprecated API * opencl: update log message for unsupported GPUs --------- Co-authored-by: Skyler Szot <quic_sszot@quicinc.com> Co-authored-by: Shangqing Gu <quic_shawngu@quicinc.com> Co-authored-by: Alexander Angus <quic_aangus@quicinc.com> Co-authored-by: Hongqiang Wang <quic_wangh@quicinc.com> Co-authored-by: Max Krasnyansky <quic_maxk@quicinc.com>
2024-12-13 12:23:52 -08:00
ggml_add_backend(OpenCL)
foreach (target ggml-base ggml)
target_include_directories(${target} PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include> $<INSTALL_INTERFACE:include>)
target_compile_features (${target} PRIVATE c_std_11 cxx_std_17) # don't bump
endforeach()
target_link_libraries(ggml-base PRIVATE Threads::Threads)
find_library(MATH_LIBRARY m)
if (MATH_LIBRARY)
if (NOT WIN32 OR NOT DEFINED ENV{ONEAPI_ROOT})
target_link_libraries(ggml-base PRIVATE m)
endif()
endif()
if (CMAKE_SYSTEM_NAME MATCHES "Android")
target_link_libraries(ggml-base PRIVATE dl)
endif()
if(CMAKE_SYSTEM_NAME MATCHES "visionOS")
target_compile_definitions(ggml-base PUBLIC _DARWIN_C_SOURCE)
endif()
if (BUILD_SHARED_LIBS)
foreach (target ggml-base ggml)
set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(${target} PRIVATE GGML_BUILD)
target_compile_definitions(${target} PUBLIC GGML_SHARED)
endforeach()
endif()