@@ -5,24 +5,169 @@ const builtin = @import("builtin");
5
5
comptime {
6
6
if (builtin .object_format != .c ) {
7
7
@export (& memcpy , .{ .name = "memcpy" , .linkage = common .linkage , .visibility = common .visibility });
8
+ @export (& memmove , .{ .name = "memmove" , .linkage = common .linkage , .visibility = common .visibility });
8
9
}
9
10
}
10
11
11
- pub fn memcpy (noalias dest : ? [* ]u8 , noalias src : ? [* ]const u8 , len : usize ) callconv (.C ) ? [* ]u8 {
12
- @setRuntimeSafety (false );
12
+ const llvm_cannot_lower = switch (builtin .cpu .arch ) {
13
+ .arm , .armeb , .thumb , .thumbeb = > builtin .zig_backend == .stage2_llvm ,
14
+ else = > false ,
15
+ };
13
16
14
- if (len != 0 ) {
15
- var d = dest .? ;
16
- var s = src .? ;
17
- var n = len ;
18
- while (true ) {
19
- d [0 ] = s [0 ];
20
- n -= 1 ;
21
- if (n == 0 ) break ;
22
- d += 1 ;
23
- s += 1 ;
17
+ fn memcpy (noalias opt_dest : ? [* ]u8 , noalias opt_src : ? [* ]const u8 , len : usize ) callconv (.C ) ? [* ]u8 {
18
+ if (llvm_cannot_lower ) {
19
+ for (0.. len ) | i | opt_dest .? [i ] = opt_src .? [i ];
20
+ return opt_dest ;
21
+ } else {
22
+ return memmove (opt_dest , opt_src , len );
23
+ }
24
+ }
25
+
26
+ /// A port of https://github.com/facebook/folly/blob/1c8bc50e88804e2a7361a57cd9b551dd10f6c5fd/folly/memcpy.S
27
+ fn memmove (opt_dest : ? [* ]u8 , opt_src : ? [* ]const u8 , len : usize ) callconv (.C ) ? [* ]u8 {
28
+ if (llvm_cannot_lower ) {
29
+ if (@intFromPtr (opt_dest ) < @intFromPtr (opt_src )) {
30
+ for (0.. len ) | i | opt_dest .? [i ] = opt_src .? [i ];
31
+ return opt_dest ;
32
+ } else {
33
+ var index = len ;
34
+ while (index != 0 ) {
35
+ index -= 1 ;
36
+ opt_dest .? [index ] = opt_src .? [index ];
37
+ }
38
+ return opt_dest ;
24
39
}
25
40
}
26
41
42
+ if (len == 0 ) {
43
+ @branchHint (.unlikely );
44
+ return opt_dest ;
45
+ }
46
+
47
+ const dest = opt_dest .? ;
48
+ const src = opt_src .? ;
49
+
50
+ if (len < 8 ) {
51
+ @branchHint (.unlikely );
52
+ if (len == 1 ) {
53
+ @branchHint (.unlikely );
54
+ dest [0 ] = src [0 ];
55
+ } else if (len >= 4 ) {
56
+ @branchHint (.unlikely );
57
+ blockCopy (dest , src , 4 , len );
58
+ } else {
59
+ blockCopy (dest , src , 2 , len );
60
+ }
61
+ return dest ;
62
+ }
63
+
64
+ if (len > 32 ) {
65
+ @branchHint (.unlikely );
66
+ if (len > 256 ) {
67
+ @branchHint (.unlikely );
68
+ copyMove (dest , src , len );
69
+ return dest ;
70
+ }
71
+ copyLong (dest , src , len );
72
+ return dest ;
73
+ }
74
+
75
+ if (len > 16 ) {
76
+ @branchHint (.unlikely );
77
+ blockCopy (dest , src , 16 , len );
78
+ return dest ;
79
+ }
80
+
81
+ blockCopy (dest , src , 8 , len );
82
+
27
83
return dest ;
28
84
}
85
+
86
+ inline fn blockCopy (dest : [* ]u8 , src : [* ]const u8 , block_size : comptime_int , len : usize ) void {
87
+ const first = @as (* align (1 ) const @Vector (block_size , u8 ), src [0.. block_size ]).* ;
88
+ const second = @as (* align (1 ) const @Vector (block_size , u8 ), src [len - block_size .. ][0.. block_size ]).* ;
89
+ dest [0.. block_size ].* = first ;
90
+ dest [len - block_size .. ][0.. block_size ].* = second ;
91
+ }
92
+
93
+ inline fn copyLong (dest : [* ]u8 , src : [* ]const u8 , len : usize ) void {
94
+ var array : [8 ]@Vector (32 , u8 ) = undefined ;
95
+
96
+ inline for (.{ 64 , 128 , 192 , 256 }, 0.. ) | N , i | {
97
+ array [i * 2 ] = src [(N / 2 ) - 32 .. ][0.. 32].* ;
98
+ array [(i * 2 ) + 1 ] = src [len - N / 2 .. ][0.. 32].* ;
99
+
100
+ if (len <= N ) {
101
+ @branchHint (.unlikely );
102
+ for (0.. i + 1 ) | j | {
103
+ dest [j * 32 .. ][0.. 32].* = array [j * 2 ];
104
+ dest [len - ((j * 32 ) + 32 ) .. ][0.. 32].* = array [(j * 2 ) + 1 ];
105
+ }
106
+ return ;
107
+ }
108
+ }
109
+ }
110
+
111
+ inline fn copyMove (dest : [* ]u8 , src : [* ]const u8 , len : usize ) void {
112
+ if (@intFromPtr (src ) >= @intFromPtr (dest )) {
113
+ @branchHint (.unlikely );
114
+ copyForward (dest , src , len );
115
+ } else if (@intFromPtr (src ) + len > @intFromPtr (dest )) {
116
+ @branchHint (.unlikely );
117
+ overlapBwd (dest , src , len );
118
+ } else {
119
+ copyForward (dest , src , len );
120
+ }
121
+ }
122
+
123
+ inline fn copyForward (dest : [* ]u8 , src : [* ]const u8 , len : usize ) void {
124
+ const tail : @Vector (32 , u8 ) = src [len - 32 .. ][0.. 32].* ;
125
+
126
+ const N : usize = len & ~ @as (usize , 127 );
127
+ var i : usize = 0 ;
128
+
129
+ while (i < N ) : (i += 128 ) {
130
+ dest [i .. ][0.. 32].* = src [i .. ][0.. 32].* ;
131
+ dest [i + 32 .. ][0.. 32].* = src [i + 32 .. ][0.. 32].* ;
132
+ dest [i + 64 .. ][0.. 32].* = src [i + 64 .. ][0.. 32].* ;
133
+ dest [i + 96 .. ][0.. 32].* = src [i + 96 .. ][0.. 32].* ;
134
+ }
135
+
136
+ if (len - i <= 32 ) {
137
+ @branchHint (.unlikely );
138
+ dest [len - 32 .. ][0.. 32].* = tail ;
139
+ } else {
140
+ copyLong (dest [i .. ], src [i .. ], len - i );
141
+ }
142
+ }
143
+
144
+ inline fn overlapBwd (dest : [* ]u8 , src : [* ]const u8 , len : usize ) void {
145
+ var array : [5 ]@Vector (32 , u8 ) = undefined ;
146
+ array [0 ] = src [len - 32 .. ][0.. 32].* ;
147
+ inline for (1.. 5) | i | array [i ] = src [(i - 1 ) << 5 .. ][0.. 32].* ;
148
+
149
+ const end : usize = (@intFromPtr (dest ) + len - 32 ) & 31 ;
150
+ const range = len - end ;
151
+ var s = src + range ;
152
+ var d = dest + range ;
153
+
154
+ while (@intFromPtr (s ) > @intFromPtr (src + 128 )) {
155
+ // zig fmt: off
156
+ const first = @as (* align (1 ) const @Vector (32 , u8 ), @ptrCast (s - 32 )).* ;
157
+ const second = @as (* align (1 ) const @Vector (32 , u8 ), @ptrCast (s - 64 )).* ;
158
+ const third = @as (* align (1 ) const @Vector (32 , u8 ), @ptrCast (s - 96 )).* ;
159
+ const fourth = @as (* align (1 ) const @Vector (32 , u8 ), @ptrCast (s - 128 )).* ;
160
+
161
+ @as (* align (32 ) @Vector (32 , u8 ), @alignCast (@ptrCast (d - 32 ))).* = first ;
162
+ @as (* align (32 ) @Vector (32 , u8 ), @alignCast (@ptrCast (d - 64 ))).* = second ;
163
+ @as (* align (32 ) @Vector (32 , u8 ), @alignCast (@ptrCast (d - 96 ))).* = third ;
164
+ @as (* align (32 ) @Vector (32 , u8 ), @alignCast (@ptrCast (d - 128 ))).* = fourth ;
165
+ // zig fmt: on
166
+
167
+ s -= 128 ;
168
+ d -= 128 ;
169
+ }
170
+
171
+ inline for (array [1.. ], 0.. ) | vec , i | dest [i * 32 .. ][0.. 32].* = vec ;
172
+ dest [len - 32 .. ][0.. 32].* = array [0 ];
173
+ }
0 commit comments