Skip to content

Commit 307ef9a

Browse files
committed
update Makefile
1 parent dddf377 commit 307ef9a

File tree

9 files changed

+78
-49
lines changed

9 files changed

+78
-49
lines changed

.devops/nix/package.nix

+2-2
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,9 @@ effectiveStdenv.mkDerivation (finalAttrs: {
126126
};
127127

128128
postPatch = ''
129-
substituteInPlace ./ggml/src/ggml-metal.m \
129+
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
130130
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
131-
substituteInPlace ./ggml/src/ggml-metal.m \
131+
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
132132
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
133133
'';
134134

Makefile

+51-34
Original file line numberDiff line numberDiff line change
@@ -523,11 +523,11 @@ ifndef GGML_NO_ACCELERATE
523523
# Mac OS - include Accelerate framework.
524524
# `-framework Accelerate` works both with Apple Silicon and Mac Intel
525525
ifeq ($(UNAME_S),Darwin)
526-
MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
526+
MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE
527527
MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
528528
MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
529529
MK_LDFLAGS += -framework Accelerate
530-
OBJ_GGML += ggml/src/ggml-blas.o
530+
OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
531531
endif
532532
endif # GGML_NO_ACCELERATE
533533

@@ -552,36 +552,36 @@ ifdef GGML_OPENBLAS
552552
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
553553
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
554554
MK_LDFLAGS += $(shell pkg-config --libs openblas)
555-
OBJ_GGML += ggml/src/ggml-blas.o
555+
OBJ_GGML += ggml/src/ggml-blas/ggml-blas.o
556556
endif # GGML_OPENBLAS
557557

558558
ifdef GGML_OPENBLAS64
559559
MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
560560
MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
561561
MK_LDFLAGS += $(shell pkg-config --libs openblas64)
562-
OBJ_GGML += ggml/src/ggml-blas.o
562+
OBJ_GGML += src/ggml-blas/ggml-blas.o
563563
endif # GGML_OPENBLAS64
564564

565565
ifdef GGML_BLIS
566566
MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_BLIS -I/usr/local/include/blis -I/usr/include/blis
567567
MK_LDFLAGS += -lblis -L/usr/local/lib
568-
OBJ_GGML += ggml/src/ggml-blas.o
568+
OBJ_GGML += src/ggml-blas/ggml-blas.o
569569
endif # GGML_BLIS
570570

571571
ifdef GGML_NVPL
572572
MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_NVPL -DNVPL_ILP64 -I/usr/local/include/nvpl_blas -I/usr/include/nvpl_blas
573573
MK_LDFLAGS += -L/usr/local/lib -lnvpl_blas_core -lnvpl_blas_ilp64_gomp
574-
OBJ_GGML += ggml/src/ggml-blas.o
574+
OBJ_GGML += src/ggml-blas/ggml-blas.o
575575
endif # GGML_NVPL
576576

577577
ifndef GGML_NO_LLAMAFILE
578578
MK_CPPFLAGS += -DGGML_USE_LLAMAFILE
579-
OBJ_GGML += ggml/src/llamafile/sgemm.o
579+
OBJ_GGML += ggml/src/ggml-cpu/llamafile/sgemm.o
580580
endif
581581

582582
ifndef GGML_NO_AMX
583583
MK_CPPFLAGS += -DGGML_USE_AMX
584-
OBJ_GGML += ggml/src/ggml-amx.o ggml/src/ggml-amx/mmq.o
584+
OBJ_GGML += ggml/src/ggml-amx/ggml-amx.o ggml/src/ggml-amx/mmq.o
585585
endif
586586

587587
ifdef GGML_RPC
@@ -623,7 +623,7 @@ ifdef GGML_CUDA
623623
MK_NVCCFLAGS += -use_fast_math
624624
endif # GGML_MUSA
625625

626-
OBJ_GGML += ggml/src/ggml-cuda.o
626+
OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
627627
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
628628
OBJ_GGML += $(OBJ_CUDA_TMPL)
629629

@@ -742,8 +742,8 @@ ggml/src/ggml-cuda/%.o: \
742742
ggml/src/ggml-cuda/common.cuh
743743
$(NVCC_COMPILE)
744744

745-
ggml/src/ggml-cuda.o: \
746-
ggml/src/ggml-cuda.cu \
745+
ggml/src/ggml-cuda/ggml-cuda.o: \
746+
ggml/src/ggml-cuda/ggml-cuda.cu \
747747
ggml/include/ggml-cuda.h \
748748
ggml/include/ggml.h \
749749
ggml/include/ggml-backend.h \
@@ -852,12 +852,12 @@ ifdef GGML_CUDA_NO_PEER_COPY
852852
HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
853853
endif # GGML_CUDA_NO_PEER_COPY
854854

855-
OBJ_GGML += ggml/src/ggml-cuda.o
855+
OBJ_GGML += ggml/src/ggml-cuda/ggml-cuda.o
856856
OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
857857
OBJ_GGML += $(OBJ_CUDA_TMPL)
858858

859-
ggml/src/ggml-cuda.o: \
860-
ggml/src/ggml-cuda.cu \
859+
ggml/src/ggml-cuda/ggml-cuda.o: \
860+
ggml/src/ggml-cuda/ggml-cuda.cu \
861861
ggml/include/ggml-cuda.h \
862862
ggml/include/ggml.h \
863863
ggml/include/ggml-backend.h \
@@ -877,7 +877,7 @@ endif # GGML_HIPBLAS
877877
ifdef GGML_METAL
878878
MK_CPPFLAGS += -DGGML_USE_METAL
879879
MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
880-
OBJ_GGML += ggml/src/ggml-metal.o
880+
OBJ_GGML += ggml/src/ggml-metal/ggml-metal.o
881881

882882
ifdef GGML_METAL_USE_BF16
883883
MK_CPPFLAGS += -DGGML_METAL_USE_BF16
@@ -892,18 +892,18 @@ endif
892892
endif # GGML_METAL
893893

894894
ifdef GGML_METAL
895-
ggml/src/ggml-metal.o: \
896-
ggml/src/ggml-metal.m \
895+
ggml/src/ggml-metal/ggml-metal.o: \
896+
ggml/src/ggml-metal/ggml-metal.m \
897897
ggml/include/ggml-metal.h \
898898
ggml/include/ggml.h
899899
$(CC) $(CFLAGS) -c $< -o $@
900900

901901
ifdef GGML_METAL_EMBED_LIBRARY
902902
ggml/src/ggml-metal-embed.o: \
903-
ggml/src/ggml-metal.metal \
903+
ggml/src/ggml-metal/ggml-metal.metal \
904904
ggml/src/ggml-common.h
905905
@echo "Embedding Metal library"
906-
@sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal
906+
@sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal
907907
$(eval TEMP_ASSEMBLY=$(shell mktemp -d))
908908
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s
909909
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
@@ -919,11 +919,16 @@ endif # GGML_METAL
919919

920920
OBJ_GGML += \
921921
ggml/src/ggml.o \
922-
ggml/src/ggml-cpu.o \
922+
ggml/src/ggml-aarch64.o \
923923
ggml/src/ggml-alloc.o \
924924
ggml/src/ggml-backend.o \
925+
ggml/src/ggml-backend-reg.o \
925926
ggml/src/ggml-quants.o \
926-
ggml/src/ggml-aarch64.o
927+
ggml/src/ggml-threading.o \
928+
ggml/src/ggml-cpu/ggml-cpu.o \
929+
ggml/src/ggml-cpu/ggml-cpu-cpp.o \
930+
ggml/src/ggml-cpu/ggml-cpu-aarch64.o \
931+
ggml/src/ggml-cpu/ggml-cpu-quants.o
927932

928933
OBJ_LLAMA = \
929934
src/llama.o \
@@ -1051,12 +1056,23 @@ ggml/src/ggml.o: \
10511056
ggml/include/ggml.h
10521057
$(CC) $(CFLAGS) -c $< -o $@
10531058

1054-
ggml/src/ggml-cpu.o: \
1055-
ggml/src/ggml-cpu.c \
1059+
ggml/src/ggml-threading.o: \
1060+
ggml/src/ggml-threading.cpp \
1061+
ggml/include/ggml.h
1062+
$(CXX) $(XXCFLAGS) -c $< -o $@
1063+
1064+
ggml/src/ggml-cpu/ggml-cpu.o: \
1065+
ggml/src/ggml-cpu/ggml-cpu.c \
10561066
ggml/include/ggml.h \
10571067
ggml/src/ggml-common.h
10581068
$(CC) $(CFLAGS) -c $< -o $@
10591069

1070+
ggml/src/ggml-cpu/ggml-cpu-cpp.o: \
1071+
ggml/src/ggml-cpu/ggml-cpu.cpp \
1072+
ggml/include/ggml.h \
1073+
ggml/src/ggml-common.h
1074+
$(CXX) $(CXXFLAGS) -c $< -o $@
1075+
10601076
ggml/src/ggml-alloc.o: \
10611077
ggml/src/ggml-alloc.c \
10621078
ggml/include/ggml.h \
@@ -1084,22 +1100,22 @@ ggml/src/ggml-aarch64.o: \
10841100
ggml/src/ggml-common.h
10851101
$(CC) $(CFLAGS) -c $< -o $@
10861102

1087-
ggml/src/ggml-blas.o: \
1088-
ggml/src/ggml-blas.cpp \
1103+
ggml/src/ggml-blas/ggml-blas.o: \
1104+
ggml/src/ggml-blas/ggml-blas.cpp \
10891105
ggml/include/ggml-blas.h
10901106
$(CXX) $(CXXFLAGS) -c $< -o $@
10911107

10921108
ifndef GGML_NO_LLAMAFILE
1093-
ggml/src/llamafile/sgemm.o: \
1094-
ggml/src/llamafile/sgemm.cpp \
1095-
ggml/src/llamafile/sgemm.h \
1109+
ggml/src/ggml-cpu/llamafile/sgemm.o: \
1110+
ggml/src/ggml-cpu/llamafile/sgemm.cpp \
1111+
ggml/src/ggml-cpu/llamafile/sgemm.h \
10961112
ggml/include/ggml.h
1097-
$(CXX) $(CXXFLAGS) -c $< -o $@
1113+
$(CXX) $(CXXFLAGS) -c $< -o $@ -I ggml/src -I ggml/src/ggml-cpu
10981114
endif # GGML_NO_LLAMAFILE
10991115

11001116
ifndef GGML_NO_AMX
1101-
ggml/src/ggml-amx.o: \
1102-
ggml/src/ggml-amx.cpp \
1117+
ggml/src/ggml-amx/ggml-amx.o: \
1118+
ggml/src/ggml-amx/ggml-amx.cpp \
11031119
ggml/include/ggml-amx.h
11041120
$(CXX) $(CXXFLAGS) -c $< -o $@
11051121

@@ -1250,10 +1266,11 @@ clean:
12501266
rm -rvf ggml/*.a
12511267
rm -rvf ggml/*.dll
12521268
rm -rvf ggml/*.so
1253-
rm -vrf ggml/src/*.o
1254-
rm -rvf ggml/src/llamafile/*.o
1269+
rm -rvf ggml/src/*.o
1270+
rm -rvf ggml/src/ggml-cpu/*.o
1271+
rm -rvf ggml/src/ggml-cpu/llamafile/*.o
12551272
rm -rvf common/build-info.cpp
1256-
rm -vrf ggml/src/ggml-metal-embed.metal
1273+
rm -vrf ggml/src/ggml-metal/ggml-metal-embed.metal
12571274
rm -vrf ggml/src/ggml-cuda/*.o
12581275
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
12591276
rm -vrf ggml/src/ggml-amx/*.o

Package.swift

+2-2
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ var cSettings: [CSetting] = [
3030
]
3131

3232
#if canImport(Darwin)
33-
sources.append("ggml/src/ggml-metal.m")
34-
resources.append(.process("ggml/src/ggml-metal.metal"))
33+
sources.append("ggml/src/ggml-metal/ggml-metal.m")
34+
resources.append(.process("ggml/src/ggml-metal/ggml-metal.metal"))
3535
linkerSettings.append(.linkedFramework("Accelerate"))
3636
cSettings.append(
3737
contentsOf: [

ggml/src/ggml-amx/ggml-amx.cpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -421,9 +421,18 @@ ggml_backend_reg_t ggml_backend_amx_reg(void) {
421421

422422
#else // if defined(__AMX_INT8__)
423423

424+
ggml_backend_buffer_type_t ggml_backend_amx_buffer_type(void) {
425+
return nullptr;
426+
}
427+
428+
bool ggml_backend_is_amx(ggml_backend_t backend) {
429+
GGML_UNUSED(backend);
430+
return false;
431+
}
432+
424433
ggml_backend_t ggml_backend_amx_init(void) {
425434
fprintf(stderr, "GGML is not compiled with AMX support!\n");
426-
return ggml_backend_t{};
435+
return nullptr;
427436
}
428437

429438
void ggml_backend_amx_set_n_threads(ggml_backend_t backend_amx, int n_threads) {

ggml/src/ggml-cpu/ggml-cpu.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
#endif
4545

4646
#ifdef GGML_USE_LLAMAFILE
47-
#include <llamafile/sgemm.h>
47+
#include "llamafile/sgemm.h"
4848
#endif
4949

5050
#if defined(_MSC_VER)

ggml/src/ggml-impl.h

+4
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
#include <arm_neon.h>
2323
#endif
2424

25+
#if defined(__F16C__)
26+
#include <immintrin.h>
27+
#endif
28+
2529
#ifdef __cplusplus
2630
extern "C" {
2731
#endif

ggml/src/ggml-metal/ggml-metal.metal

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,8 @@ using namespace metal;
1515
// ref: https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
1616
//
1717
// cmd:
18-
// .../usr/bin/metal -dM -E -c ggml/src/ggml-metal.metal
19-
// .../usr/bin/metal -dM -E -c -target air64-apple-ios14.0 ggml/src/ggml-metal.metal
18+
// .../usr/bin/metal -dM -E -c ggml/src/ggml-metal/ggml-metal.metal
19+
// .../usr/bin/metal -dM -E -c -target air64-apple-ios14.0 ggml/src/ggml-metal/ggml-metal.metal
2020
//
2121
#if __METAL_VERSION__ < 310 && defined(GGML_METAL_USE_BF16)
2222
#undef GGML_METAL_USE_BF16

ggml/src/ggml.c

+6-6
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,7 @@ void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, int64_t n) {
368368
void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
369369
int64_t i = 0;
370370
#if defined(__F16C__)
371-
if (ggml_cpu_has_f16c()) {
371+
//if (ggml_cpu_has_f16c()) {
372372
for (; i + 7 < n; i += 8) {
373373
__m256 x_vec = _mm256_loadu_ps(x + i);
374374
__m128i y_vec = _mm256_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
@@ -379,7 +379,7 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
379379
__m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT);
380380
_mm_storel_epi64((__m128i *)(y + i), y_vec);
381381
}
382-
}
382+
//}
383383
#endif
384384
for (; i < n; i++) {
385385
y[i] = GGML_FP32_TO_FP16(x[i]);
@@ -389,7 +389,7 @@ void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, int64_t n) {
389389
void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
390390
int64_t i = 0;
391391
#if defined(__AVX512F__)
392-
if (ggml_cpu_has_avx512()) {
392+
//if (ggml_cpu_has_avx512()) {
393393
for (; i + 16 <= n; i += 16) {
394394
_mm512_storeu_ps(y + i,
395395
_mm512_castsi512_ps(
@@ -399,10 +399,10 @@ void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
399399
(const __m256i *)(x + i))),
400400
16)));
401401
}
402-
}
402+
//}
403403
#endif
404404
#if defined(__AVX2__)
405-
if (ggml_cpu_has_avx2()) {
405+
//if (ggml_cpu_has_avx2()) {
406406
for (; i + 8 <= n; i += 8) {
407407
_mm256_storeu_ps(y + i,
408408
_mm256_castsi256_ps(
@@ -412,7 +412,7 @@ void ggml_bf16_to_fp32_row(const ggml_bf16_t * x, float * y, int64_t n) {
412412
(const __m128i *)(x + i))),
413413
16)));
414414
}
415-
}
415+
//}
416416
#endif
417417
for (; i < n; i++) {
418418
y[i] = GGML_BF16_TO_FP32(x[i]);

pocs/vdot/vdot.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,6 @@ int main(int argc, char** argv) {
237237
int n4 = useQ4_1 ? kVecSize / QK4_1 : kVecSize / QK4_0; n4 = 64*((n4 + 63)/64);
238238
int n8 = kVecSize / QK8_0; n8 = 64*((n8 + 63)/64);
239239

240-
const auto * funcs = ggml_get_type_traits(useQ4_1 ? GGML_TYPE_Q4_1 : GGML_TYPE_Q4_0);
241240
const auto * funcs_cpu = ggml_get_type_traits_cpu(useQ4_1 ? GGML_TYPE_Q4_1 : GGML_TYPE_Q4_0);
242241

243242
std::vector<block_q4_0> q40;

0 commit comments

Comments
 (0)