|
|
|
|
@@ -42,7 +42,7 @@ git clone https://github.com/ggerganov/llama.cpp
|
|
|
|
|
|
|
|
|
|
cd llama.cpp
|
|
|
|
|
|
|
|
|
|
LLAMA_CUBLAS=1 make -j
|
|
|
|
|
LLAMA_CUDA=1 make -j
|
|
|
|
|
|
|
|
|
|
ln -sfn /workspace/TinyLlama-1.1B-Chat-v0.3 ./models/tinyllama-1b
|
|
|
|
|
ln -sfn /workspace/CodeLlama-7b-hf ./models/codellama-7b
|
|
|
|
|
@@ -60,7 +60,7 @@ cd /workspace/llama.cpp
|
|
|
|
|
mkdir build-cublas
|
|
|
|
|
cd build-cublas
|
|
|
|
|
|
|
|
|
|
cmake -DLLAMA_CUBLAS=1 ../
|
|
|
|
|
cmake -DLLAMA_CUDA=1 ../
|
|
|
|
|
make -j
|
|
|
|
|
|
|
|
|
|
if [ "$1" -eq "0" ]; then
|
|
|
|
|
@@ -186,17 +186,17 @@ if [ "$1" -eq "1" ]; then
|
|
|
|
|
# batched
|
|
|
|
|
cd /workspace/llama.cpp
|
|
|
|
|
|
|
|
|
|
LLAMA_CUBLAS=1 make -j && ./batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999
|
|
|
|
|
LLAMA_CUDA=1 make -j && ./batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999
|
|
|
|
|
|
|
|
|
|
# batched-bench
|
|
|
|
|
cd /workspace/llama.cpp
|
|
|
|
|
|
|
|
|
|
LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32
|
|
|
|
|
LLAMA_CUDA=1 make -j && ./batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32
|
|
|
|
|
|
|
|
|
|
# parallel
|
|
|
|
|
cd /workspace/llama.cpp
|
|
|
|
|
|
|
|
|
|
LLAMA_CUBLAS=1 make -j && ./parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb
|
|
|
|
|
LLAMA_CUDA=1 make -j && ./parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb
|
|
|
|
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
@@ -204,10 +204,10 @@ fi
|
|
|
|
|
#if [ "$1" -eq "7" ]; then
|
|
|
|
|
# cd /workspace/llama.cpp
|
|
|
|
|
#
|
|
|
|
|
# LLAMA_CUBLAS=1 make -j && ./speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0
|
|
|
|
|
# LLAMA_CUDA=1 make -j && ./speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0
|
|
|
|
|
#fi
|
|
|
|
|
|
|
|
|
|
# more benches
|
|
|
|
|
#LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
|
|
|
|
|
#LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
|
|
|
|
|
#LLAMA_CUDA=1 make -j && ./batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
|
|
|
|
|
#LLAMA_CUDA=1 make -j && ./batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
|
|
|
|
|
|
|
|
|
|
|