Skip to content

Commit

Permalink
Merge pull request #22513 from ziglang/memcpy
Browse files Browse the repository at this point in the history
enhance memcpy and remove redundant implementations
  • Loading branch information
andrewrk authored Jan 18, 2025
2 parents f9a4377 + f7f6217 commit f38d7a9
Show file tree
Hide file tree
Showing 51 changed files with 163 additions and 3,267 deletions.
1 change: 0 additions & 1 deletion lib/compiler_rt.zig
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,6 @@ comptime {

_ = @import("compiler_rt/memcpy.zig");
_ = @import("compiler_rt/memset.zig");
_ = @import("compiler_rt/memmove.zig");
_ = @import("compiler_rt/memcmp.zig");
_ = @import("compiler_rt/bcmp.zig");
_ = @import("compiler_rt/ssp.zig");
Expand Down
169 changes: 157 additions & 12 deletions lib/compiler_rt/memcpy.zig
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,169 @@ const builtin = @import("builtin");
comptime {
if (builtin.object_format != .c) {
@export(&memcpy, .{ .name = "memcpy", .linkage = common.linkage, .visibility = common.visibility });
@export(&memmove, .{ .name = "memmove", .linkage = common.linkage, .visibility = common.visibility });
}
}

pub fn memcpy(noalias dest: ?[*]u8, noalias src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
@setRuntimeSafety(false);
const llvm_cannot_lower = switch (builtin.cpu.arch) {
.arm, .armeb, .thumb, .thumbeb => builtin.zig_backend == .stage2_llvm,
else => false,
};

if (len != 0) {
var d = dest.?;
var s = src.?;
var n = len;
while (true) {
d[0] = s[0];
n -= 1;
if (n == 0) break;
d += 1;
s += 1;
fn memcpy(noalias opt_dest: ?[*]u8, noalias opt_src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
if (llvm_cannot_lower) {
for (0..len) |i| opt_dest.?[i] = opt_src.?[i];
return opt_dest;
} else {
return memmove(opt_dest, opt_src, len);
}
}

/// A port of https://github.com/facebook/folly/blob/1c8bc50e88804e2a7361a57cd9b551dd10f6c5fd/folly/memcpy.S
fn memmove(opt_dest: ?[*]u8, opt_src: ?[*]const u8, len: usize) callconv(.C) ?[*]u8 {
if (llvm_cannot_lower) {
if (@intFromPtr(opt_dest) < @intFromPtr(opt_src)) {
for (0..len) |i| opt_dest.?[i] = opt_src.?[i];
return opt_dest;
} else {
var index = len;
while (index != 0) {
index -= 1;
opt_dest.?[index] = opt_src.?[index];
}
return opt_dest;
}
}

if (len == 0) {
@branchHint(.unlikely);
return opt_dest;
}

const dest = opt_dest.?;
const src = opt_src.?;

if (len < 8) {
@branchHint(.unlikely);
if (len == 1) {
@branchHint(.unlikely);
dest[0] = src[0];
} else if (len >= 4) {
@branchHint(.unlikely);
blockCopy(dest, src, 4, len);
} else {
blockCopy(dest, src, 2, len);
}
return dest;
}

if (len > 32) {
@branchHint(.unlikely);
if (len > 256) {
@branchHint(.unlikely);
copyMove(dest, src, len);
return dest;
}
copyLong(dest, src, len);
return dest;
}

if (len > 16) {
@branchHint(.unlikely);
blockCopy(dest, src, 16, len);
return dest;
}

blockCopy(dest, src, 8, len);

return dest;
}

inline fn blockCopy(dest: [*]u8, src: [*]const u8, block_size: comptime_int, len: usize) void {
const first = @as(*align(1) const @Vector(block_size, u8), src[0..block_size]).*;
const second = @as(*align(1) const @Vector(block_size, u8), src[len - block_size ..][0..block_size]).*;
dest[0..block_size].* = first;
dest[len - block_size ..][0..block_size].* = second;
}

inline fn copyLong(dest: [*]u8, src: [*]const u8, len: usize) void {
var array: [8]@Vector(32, u8) = undefined;

inline for (.{ 64, 128, 192, 256 }, 0..) |N, i| {
array[i * 2] = src[(N / 2) - 32 ..][0..32].*;
array[(i * 2) + 1] = src[len - N / 2 ..][0..32].*;

if (len <= N) {
@branchHint(.unlikely);
for (0..i + 1) |j| {
dest[j * 32 ..][0..32].* = array[j * 2];
dest[len - ((j * 32) + 32) ..][0..32].* = array[(j * 2) + 1];
}
return;
}
}
}

inline fn copyMove(dest: [*]u8, src: [*]const u8, len: usize) void {
if (@intFromPtr(src) >= @intFromPtr(dest)) {
@branchHint(.unlikely);
copyForward(dest, src, len);
} else if (@intFromPtr(src) + len > @intFromPtr(dest)) {
@branchHint(.unlikely);
overlapBwd(dest, src, len);
} else {
copyForward(dest, src, len);
}
}

inline fn copyForward(dest: [*]u8, src: [*]const u8, len: usize) void {
const tail: @Vector(32, u8) = src[len - 32 ..][0..32].*;

const N: usize = len & ~@as(usize, 127);
var i: usize = 0;

while (i < N) : (i += 128) {
dest[i..][0..32].* = src[i..][0..32].*;
dest[i + 32 ..][0..32].* = src[i + 32 ..][0..32].*;
dest[i + 64 ..][0..32].* = src[i + 64 ..][0..32].*;
dest[i + 96 ..][0..32].* = src[i + 96 ..][0..32].*;
}

if (len - i <= 32) {
@branchHint(.unlikely);
dest[len - 32 ..][0..32].* = tail;
} else {
copyLong(dest[i..], src[i..], len - i);
}
}

inline fn overlapBwd(dest: [*]u8, src: [*]const u8, len: usize) void {
var array: [5]@Vector(32, u8) = undefined;
array[0] = src[len - 32 ..][0..32].*;
inline for (1..5) |i| array[i] = src[(i - 1) << 5 ..][0..32].*;

const end: usize = (@intFromPtr(dest) + len - 32) & 31;
const range = len - end;
var s = src + range;
var d = dest + range;

while (@intFromPtr(s) > @intFromPtr(src + 128)) {
// zig fmt: off
const first = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 32)).*;
const second = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 64)).*;
const third = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 96)).*;
const fourth = @as(*align(1) const @Vector(32, u8), @ptrCast(s - 128)).*;

@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 32))).* = first;
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 64))).* = second;
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 96))).* = third;
@as(*align(32) @Vector(32, u8), @alignCast(@ptrCast(d - 128))).* = fourth;
// zig fmt: on

s -= 128;
d -= 128;
}

inline for (array[1..], 0..) |vec, i| dest[i * 32 ..][0..32].* = vec;
dest[len - 32 ..][0..32].* = array[0];
}
25 changes: 0 additions & 25 deletions lib/compiler_rt/memmove.zig

This file was deleted.

Loading

0 comments on commit f38d7a9

Please sign in to comment.