go-skynet · mudler · Jun 10, 2023 · Jun 10, 2023 · Jun 10, 2023 · Jun 10, 2023
diff --git a/Makefile b/Makefile
@@ -191,10 +191,10 @@ llama.cpp/k_quants.o: llama.cpp/ggml.o
 	cd build && cp -rf CMakeFiles/ggml.dir/k_quants.c.o ../llama.cpp/k_quants.o
 
 llama.cpp/llama.o:
-	$(MAKE) -C llama.cpp llama.o
+	cd build && make llama.o && cp -rf CMakeFiles/llama.dir/llama.cpp.o ../llama.cpp/llama.o
 
 llama.cpp/common.o:
-	$(MAKE) -C llama.cpp common.o
+	cd build && make common && cp -rf examples/CMakeFiles/common.dir/common.cpp.o ../llama.cpp/common.o
 
 binding.o: llama.cpp/ggml.o llama.cpp/llama.o llama.cpp/common.o
 	$(CXX) $(CXXFLAGS) -I./llama.cpp -I./llama.cpp/examples binding.cpp -o binding.o -c $(LDFLAGS)

diff --git a/README.md b/README.md
@@ -73,7 +73,9 @@ ggml_opencl: device FP16 support: true
 
 ```
 BUILD_TYPE=metal make libbinding.a
-CGO_LDFLAGS="-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders" LIBRARY_PATH=$PWD C_INCLUDE_PATH=$PWD go run ./examples -m "/model/path/here" -t 14
+CGO_LDFLAGS="-framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders" LIBRARY_PATH=$PWD C_INCLUDE_PATH=$PWD go build ./examples/main.go
+cp build/bin/ggml-metal.metal .
+./main -m "/model/path/here" -t 1 -ngl 1
 ```
 
 Enjoy!

diff --git a/examples/main.go b/examples/main.go
@@ -13,15 +13,17 @@ import (
 )
 
 var (
-	threads = 4
-	tokens  = 128
+	threads   = 4
+	tokens    = 128
+	gpulayers = 0
 )
 
 func main() {
 	var model string
 
 	flags := flag.NewFlagSet(os.Args[0], flag.ExitOnError)
 	flags.StringVar(&model, "m", "./models/7B/ggml-model-q4_0.bin", "path to q4_0.bin model file to load")
+	flags.IntVar(&gpulayers, "ngl", 0, "Number of GPU layers to use")
 	flags.IntVar(&threads, "t", runtime.NumCPU(), "number of threads to use during computation")
 	flags.IntVar(&tokens, "n", 512, "number of tokens to predict")
 
@@ -30,7 +32,7 @@ func main() {
 		fmt.Printf("Parsing program arguments failed: %s", err)
 		os.Exit(1)
 	}
-	l, err := llama.New(model, llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings)
+	l, err := llama.New(model, llama.EnableF16Memory, llama.SetContext(128), llama.EnableEmbeddings, llama.SetGPULayers(gpulayers))
 	if err != nil {
 		fmt.Println("Loading the model failed:", err.Error())
 		os.Exit(1)