Skip to content

Commit f38d7a9

Browse files
authored
Merge pull request #22513 from ziglang/memcpy
enhance memcpy and remove redundant implementations
2 parents f9a4377 + f7f6217 commit f38d7a9

File tree

51 files changed

+163
-3267
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+163
-3267
lines changed

lib/compiler_rt.zig

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,7 +233,6 @@ comptime {
233233

234234
_ = @import("compiler_rt/memcpy.zig");
235235
_ = @import("compiler_rt/memset.zig");
236-
_ = @import("compiler_rt/memmove.zig");
237236
_ = @import("compiler_rt/memcmp.zig");
238237
_ = @import("compiler_rt/bcmp.zig");
239238
_ = @import("compiler_rt/ssp.zig");

lib/compiler_rt/memcpy.zig

Lines changed: 157 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,169 @@ const builtin = @import("builtin");
55
comptime {
66
if (builtin.object_format != .c) {
77
@export(&memcpy, .{ .name = "memcpy", .linkage = common.linkage, .visibility = common.visibility });
8+
@export(&memmove, .{ .name = "memmove", .linkage = common.linkage, .visibility = common.visibility });
89
}
910
}
1011

11-
pub fn memcpy(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
12-
@setRuntimeSafety(false);
12+
const llvm_cannot_lower = switch (builtin.cpu.arch) {
13+
.arm, .armeb, .thumb, .thumbeb => builtin.zig_backend == .stage2_llvm,
14+
else => false,
15+
};
1316

14-
if (len != 0) {
15-
var d = dest.?;
16-
var s = src.?;
17-
var n = len;
18-
while (true) {
19-
d[0] = s[0];
20-
n -= 1;
21-
if (n == 0) break;
22-
d += 1;
23-
s += 1;
17+
fn memcpy(noalias opt_dest: ?[*]u8, noalias opt_src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
18+
if (llvm_cannot_lower) {
19+
for (0..len) |i| opt_dest.?[i] = opt_src.?[i];
20+
return opt_dest;
21+
} else {
22+
return memmove(opt_dest, opt_src, len);
23+
}
24+
}
25+
26+
/// A port of https://github.com/facebook/folly/blob/1c8bc50e88804e2a7361a57cd9b551dd10f6c5fd/folly/memcpy.S
27+
fn memmove(opt_dest: ?[*]u8, opt_src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
28+
if (llvm_cannot_lower) {
29+
if (@intFromPtr(opt_dest) < @intFromPtr(opt_src)) {
30+
for (0..len) |i| opt_dest.?[i] = opt_src.?[i];
31+
return opt_dest;
32+
} else {
33+
var index = len;
34+
while (index != 0) {
35+
index -= 1;
36+
opt_dest.?[index] = opt_src.?[index];
37+
}
38+
return opt_dest;
2439
}
2540
}
2641

42+
if (len == 0) {
43+
@branchHint(.unlikely);
44+
return opt_dest;
45+
}
46+
47+
const dest = opt_dest.?;
48+
const src = opt_src.?;
49+
50+
if (len < 8) {
51+
@branchHint(.unlikely);
52+
if (len == 1) {
53+
@branchHint(.unlikely);
54+
dest[0] = src[0];
55+
} else if (len >= 4) {
56+
@branchHint(.unlikely);
57+
blockCopy(dest, src, 4, len);
58+
} else {
59+
blockCopy(dest, src, 2, len);
60+
}
61+
return dest;
62+
}
63+
64+
if (len > 32) {
65+
@branchHint(.unlikely);
66+
if (len > 256) {
67+
@branchHint(.unlikely);
68+
copyMove(dest, src, len);
69+
return dest;
70+
}
71+
copyLong(dest, src, len);
72+
return dest;
73+
}
74+
75+
if (len > 16) {
76+
@branchHint(.unlikely);
77+
blockCopy(dest, src, 16, len);
78+
return dest;
79+
}
80+
81+
blockCopy(dest, src, 8, len);
82+
2783
return dest;
2884
}
85+
86+
inline fn blockCopy(dest: [*]u8, src: [*]const u8, block_size: comptime_int, len: usize) void {
87+
const first = @as(*align(1) const @Vector(block_size, u8), src[0..block_size]).*;
88+
const second = @as(*align(1) const @Vector(block_size, u8), src[len - block_size ..][0..block_size]).*;
89+
dest[0..block_size].* = first;
90+
dest[len - block_size ..][0..block_size].* = second;
91+
}
92+
93+
inline fn copyLong(dest: [*]u8, src: [*]const u8, len: usize) void {
94+
var array: [8]@Vector(32, u8) = undefined;
95+
96+
inline for (.{ 64, 128, 192, 256 }, 0..) |N, i| {
97+
array[i * 2] = src[(N / 2) - 32 ..][0..32].*;
98+
array[(i * 2) + 1] = src[len - N / 2 ..][0..32].*;
99+
100+
if (len <= N) {
101+
@branchHint(.unlikely);
102+
for (0..i + 1) |j| {
103+
dest[j * 32 ..][0..32].* = array[j * 2];
104+
dest[len - ((j * 32) + 32) ..][0..32].* = array[(j * 2) + 1];
105+
}
106+
return;
107+
}
108+
}
109+
}
110+
111+
inline fn copyMove(dest: [*]u8, src: [*]const u8, len: usize) void {
112+
if (@intFromPtr(src) >= @intFromPtr(dest)) {
113+
@branchHint(.unlikely);
114+
copyForward(dest, src, len);
115+
} else if (@intFromPtr(src) + len > @intFromPtr(dest)) {
116+
@branchHint(.unlikely);
117+
overlapBwd(dest, src, len);
118+
} else {
119+
copyForward(dest, src, len);
120+
}
121+
}
122+
123+
inline fn copyForward(dest: [*]u8, src: [*]const u8, len: usize) void {
124+
const tail: @Vector(32, u8) = src[len - 32 ..][0..32].*;
125+
126+
const N: usize = len & ~@as(usize, 127);
127+
var i: usize = 0;
128+
129+
while (i < N) : (i += 128) {
130+
dest[i..][0..32].* = src[i..][0..32].*;
131+
dest[i + 32 ..][0..32].* = src[i + 32 ..][0..32].*;
132+
dest[i + 64 ..][0..32].* = src[i + 64 ..][0..32].*;
133+
dest[i + 96 ..][0..32].* = src[i + 96 ..][0..32].*;
134+
}
135+
136+
if (len - i <= 32) {
137+
@branchHint(.unlikely);
138+
dest[len - 32 ..][0..32].* = tail;
139+
} else {
140+
copyLong(dest[i..], src[i..], len - i);
141+
}
142+
}
143+
144+
inline fn overlapBwd(dest: [*]u8, src: [*]const u8, len: usize) void {
145+
var array: [5]@Vector(32, u8) = undefined;
146+
array[0] = src[len - 32 ..][0..32].*;
147+
inline for (1..5) |i| array[i] = src[(i - 1) << 5 ..][0..32].*;
148+
149+
const end: usize = (@intFromPtr(dest) + len - 32) & 31;
150+
const range = len - end;
151+
var s = src + range;
152+
var d = dest + range;
153+
154+
while (@intFromPtr(s) > @intFromPtr(src + 128)) {
155+
// zig fmt: off
156+
const first = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 32)).*;
157+
const second = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 64)).*;
158+
const third = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 96)).*;
159+
const fourth = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 128)).*;
160+
161+
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 32))).* = first;
162+
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 64))).* = second;
163+
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 96))).* = third;
164+
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 128))).* = fourth;
165+
// zig fmt: on
166+
167+
s -= 128;
168+
d -= 128;
169+
}
170+
171+
inline for (array[1..], 0..) |vec, i| dest[i * 32 ..][0..32].* = vec;
172+
dest[len - 32 ..][0..32].* = array[0];
173+
}

lib/compiler_rt/memmove.zig

Lines changed: 0 additions & 25 deletions
This file was deleted.

0 commit comments

Comments
 (0)