cuda : rename build flag to LLAMA_CUDA (#6299)

2024-03-26 01:16:01 +01:00
parent b06c16ef9f
commit 280345968d
28 changed files with 129 additions and 115 deletions
--- a/scripts/LlamaConfig.cmake.in
+++ b/scripts/LlamaConfig.cmake.in
@@ -3,7 +3,7 @@ set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
 set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
 set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
 set(LLAMA_BLAS @LLAMA_BLAS@)
-set(LLAMA_CUBLAS @LLAMA_CUBLAS@)
+set(LLAMA_CUDA @LLAMA_CUDA@)
 set(LLAMA_METAL @LLAMA_METAL@)
 set(LLAMA_MPI @LLAMA_MPI@)
 set(LLAMA_CLBLAST @LLAMA_CLBLAST@)
@@ -27,7 +27,7 @@ if (LLAMA_BLAS)
    find_package(BLAS REQUIRED)
 endif()

-if (LLAMA_CUBLAS)
+if (LLAMA_CUDA)
    find_package(CUDAToolkit REQUIRED)
 endif()

--- a/scripts/compare-commits.sh
+++ b/scripts/compare-commits.sh
@@ -23,7 +23,7 @@ fi
 make_opts=""

 if [[ "$backend" == "cuda" ]]; then
-    make_opts="LLAMA_CUBLAS=1"
+    make_opts="LLAMA_CUDA=1"
 fi

 git checkout $1
--- a/scripts/pod-llama.sh
+++ b/scripts/pod-llama.sh
@@ -42,7 +42,7 @@ git clone https://github.com/ggerganov/llama.cpp

 cd llama.cpp

-LLAMA_CUBLAS=1 make -j
+LLAMA_CUDA=1 make -j

 ln -sfn /workspace/TinyLlama-1.1B-Chat-v0.3  ./models/tinyllama-1b
 ln -sfn /workspace/CodeLlama-7b-hf           ./models/codellama-7b
@@ -60,7 +60,7 @@ cd /workspace/llama.cpp
 mkdir build-cublas
 cd build-cublas

-cmake -DLLAMA_CUBLAS=1 ../
+cmake -DLLAMA_CUDA=1 ../
 make -j

 if [ "$1" -eq "0" ]; then
@@ -186,17 +186,17 @@ if [ "$1" -eq "1" ]; then
    # batched
    cd /workspace/llama.cpp

-    LLAMA_CUBLAS=1 make -j && ./batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999
+    LLAMA_CUDA=1 make -j && ./batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999

    # batched-bench
    cd /workspace/llama.cpp

-    LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32
+    LLAMA_CUDA=1 make -j && ./batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32

    # parallel
    cd /workspace/llama.cpp

-    LLAMA_CUBLAS=1 make -j && ./parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb
+    LLAMA_CUDA=1 make -j && ./parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb

 fi

@@ -204,10 +204,10 @@ fi
 #if [ "$1" -eq "7" ]; then
 #    cd /workspace/llama.cpp
 #
-#    LLAMA_CUBLAS=1 make -j && ./speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0
+#    LLAMA_CUDA=1 make -j && ./speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0
 #fi

 # more benches
-#LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf  4096 1 99 1 512,3200 128,128,800 1
-#LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
+#LLAMA_CUDA=1 make -j && ./batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf  4096 1 99 1 512,3200 128,128,800 1
+#LLAMA_CUDA=1 make -j && ./batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1

--- a/scripts/server-llm.sh
+++ b/scripts/server-llm.sh
@@ -380,7 +380,7 @@ fi

 if [[ "$backend" == "cuda" ]]; then
    printf "[+] Building with CUDA backend\n"
-    LLAMA_CUBLAS=1 make -j server $log
+    LLAMA_CUDA=1 make -j server $log
 elif [[ "$backend" == "cpu" ]]; then
    printf "[+] Building with CPU backend\n"
    make -j server $log