Add quantize-stats command for testing quantization (#728)

Command that calculates some statistics over the errors introduced by quantization, like mean square error, max error and some percentile errors for layer weights. Should be useful for testing quantization improvements. Exposes some internal state from ggml and llama for testing
2023-04-08 00:09:18 +02:00
parent 698f7b5d63
commit 62cfc54f77
9 changed files with 415 additions and 17 deletions
--- a/5
+++ b/5
@@ -149,7 +149,7 @@ common.o: examples/common.cpp examples/common.h
 	$(CXX) $(CXXFLAGS) -c examples/common.cpp -o common.o

 clean:
-	rm -vf *.o main quantize perplexity embedding
+	rm -vf *.o main quantize quantize-stats perplexity embedding

 main: examples/main/main.cpp ggml.o llama.o common.o
 	$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o llama.o common.o -o main $(LDFLAGS)
@@ -160,6 +160,9 @@ main: examples/main/main.cpp ggml.o llama.o common.o
 quantize: examples/quantize/quantize.cpp ggml.o llama.o
 	$(CXX) $(CXXFLAGS) examples/quantize/quantize.cpp ggml.o llama.o -o quantize $(LDFLAGS)

+quantize-stats: examples/quantize-stats/quantize-stats.cpp ggml.o llama.o
+	$(CXX) $(CXXFLAGS) examples/quantize-stats/quantize-stats.cpp ggml.o llama.o -o quantize-stats $(LDFLAGS)
+
 perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o
 	$(CXX) $(CXXFLAGS) examples/perplexity/perplexity.cpp ggml.o llama.o common.o -o perplexity $(LDFLAGS)