Skip to content

Commit 81ff91e

Browse files
committed
8339531: Improve performance of MemorySegment::mismatch
Reviewed-by: mcimadamore
1 parent ab9b72c commit 81ff91e

File tree

7 files changed

+550
-202
lines changed

7 files changed

+550
-202
lines changed

src/java.base/share/classes/java/lang/foreign/MemorySegment.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import java.util.stream.Stream;
4444
import jdk.internal.foreign.AbstractMemorySegmentImpl;
4545
import jdk.internal.foreign.MemorySessionImpl;
46+
import jdk.internal.foreign.SegmentBulkOperations;
4647
import jdk.internal.foreign.SegmentFactories;
4748
import jdk.internal.javac.Restricted;
4849
import jdk.internal.reflect.CallerSensitive;
@@ -1571,7 +1572,7 @@ static MemorySegment ofAddress(long address) {
15711572
static void copy(MemorySegment srcSegment, long srcOffset,
15721573
MemorySegment dstSegment, long dstOffset, long bytes) {
15731574

1574-
AbstractMemorySegmentImpl.copy((AbstractMemorySegmentImpl) srcSegment, srcOffset,
1575+
SegmentBulkOperations.copy((AbstractMemorySegmentImpl) srcSegment, srcOffset,
15751576
(AbstractMemorySegmentImpl) dstSegment, dstOffset,
15761577
bytes);
15771578
}
@@ -2635,8 +2636,9 @@ static void copy(Object srcArray, int srcIndex,
26352636
*/
26362637
static long mismatch(MemorySegment srcSegment, long srcFromOffset, long srcToOffset,
26372638
MemorySegment dstSegment, long dstFromOffset, long dstToOffset) {
2638-
return AbstractMemorySegmentImpl.mismatch(srcSegment, srcFromOffset, srcToOffset,
2639-
dstSegment, dstFromOffset, dstToOffset);
2639+
return SegmentBulkOperations.mismatch(
2640+
(AbstractMemorySegmentImpl)Objects.requireNonNull(srcSegment), srcFromOffset, srcToOffset,
2641+
(AbstractMemorySegmentImpl)Objects.requireNonNull(dstSegment), dstFromOffset, dstToOffset);
26402642
}
26412643

26422644
/**

src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java

Lines changed: 5 additions & 173 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,6 @@ public abstract sealed class AbstractMemorySegmentImpl
7272
implements MemorySegment, SegmentAllocator, BiFunction<String, List<Number>, RuntimeException>
7373
permits HeapMemorySegmentImpl, NativeMemorySegmentImpl {
7474

75-
private static final ScopedMemoryAccess SCOPED_MEMORY_ACCESS = ScopedMemoryAccess.getScopedMemoryAccess();
76-
7775
static final JavaNioAccess NIO_ACCESS = SharedSecrets.getJavaNioAccess();
7876

7977
final long length;
@@ -189,53 +187,10 @@ public Stream<MemorySegment> elements(MemoryLayout elementLayout) {
189187
return StreamSupport.stream(spliterator(elementLayout), false);
190188
}
191189

192-
// FILL_NATIVE_THRESHOLD must be a power of two and should be greater than 2^3
193-
// Update the value for Aarch64 once 8338975 is fixed.
194-
private static final long FILL_NATIVE_THRESHOLD = 1L << (Architecture.isAARCH64() ? 10 : 5);
195-
196-
@Override
197190
@ForceInline
191+
@Override
198192
public final MemorySegment fill(byte value) {
199-
checkReadOnly(false);
200-
if (length == 0) {
201-
// Implicit state check
202-
checkValidState();
203-
} else if (length < FILL_NATIVE_THRESHOLD) {
204-
// 0 <= length < FILL_NATIVE_LIMIT : 0...0X...XXXX
205-
206-
// Handle smaller segments directly without transitioning to native code
207-
final long u = Byte.toUnsignedLong(value);
208-
final long longValue = u << 56 | u << 48 | u << 40 | u << 32 | u << 24 | u << 16 | u << 8 | u;
209-
210-
int offset = 0;
211-
// 0...0X...X000
212-
final int limit = (int) (length & (FILL_NATIVE_THRESHOLD - 8));
213-
for (; offset < limit; offset += 8) {
214-
SCOPED_MEMORY_ACCESS.putLong(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, longValue);
215-
}
216-
int remaining = (int) length - limit;
217-
// 0...0X00
218-
if (remaining >= 4) {
219-
SCOPED_MEMORY_ACCESS.putInt(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, (int) longValue);
220-
offset += 4;
221-
remaining -= 4;
222-
}
223-
// 0...00X0
224-
if (remaining >= 2) {
225-
SCOPED_MEMORY_ACCESS.putShort(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, (short) longValue);
226-
offset += 2;
227-
remaining -= 2;
228-
}
229-
// 0...000X
230-
if (remaining == 1) {
231-
SCOPED_MEMORY_ACCESS.putByte(sessionImpl(), unsafeGetBase(), unsafeGetOffset() + offset, value);
232-
}
233-
// We have now fully handled 0...0X...XXXX
234-
} else {
235-
// Handle larger segments via native calls
236-
SCOPED_MEMORY_ACCESS.setMemory(sessionImpl(), unsafeGetBase(), unsafeGetOffset(), length, value);
237-
}
238-
return this;
193+
return SegmentBulkOperations.fill(this, value);
239194
}
240195

241196
@Override
@@ -244,38 +199,6 @@ public MemorySegment allocate(long byteSize, long byteAlignment) {
244199
return asSlice(0, byteSize, byteAlignment);
245200
}
246201

247-
/**
248-
* Mismatch over long lengths.
249-
*/
250-
public static long vectorizedMismatchLargeForBytes(MemorySessionImpl aSession, MemorySessionImpl bSession,
251-
Object a, long aOffset,
252-
Object b, long bOffset,
253-
long length) {
254-
long off = 0;
255-
long remaining = length;
256-
int i, size;
257-
boolean lastSubRange = false;
258-
while (remaining > 7 && !lastSubRange) {
259-
if (remaining > Integer.MAX_VALUE) {
260-
size = Integer.MAX_VALUE;
261-
} else {
262-
size = (int) remaining;
263-
lastSubRange = true;
264-
}
265-
i = SCOPED_MEMORY_ACCESS.vectorizedMismatch(aSession, bSession,
266-
a, aOffset + off,
267-
b, bOffset + off,
268-
size, ArraysSupport.LOG2_ARRAY_BYTE_INDEX_SCALE);
269-
if (i >= 0)
270-
return off + i;
271-
272-
i = size - ~i;
273-
off += i;
274-
remaining -= i;
275-
}
276-
return ~remaining;
277-
}
278-
279202
@Override
280203
public final ByteBuffer asByteBuffer() {
281204
checkArraySize("ByteBuffer", 1);
@@ -314,7 +237,7 @@ public final Optional<MemorySegment> asOverlappingSlice(MemorySegment other) {
314237
}
315238

316239
@ForceInline
317-
private boolean overlaps(AbstractMemorySegmentImpl that) {
240+
boolean overlaps(AbstractMemorySegmentImpl that) {
318241
if (unsafeGetBase() == that.unsafeGetBase()) { // both either native or the same heap segment
319242
final long thisStart = this.unsafeGetOffset();
320243
final long thatStart = that.unsafeGetOffset();
@@ -334,7 +257,8 @@ public MemorySegment copyFrom(MemorySegment src) {
334257
@Override
335258
public long mismatch(MemorySegment other) {
336259
Objects.requireNonNull(other);
337-
return MemorySegment.mismatch(this, 0, byteSize(), other, 0, other.byteSize());
260+
return SegmentBulkOperations.mismatch(this, 0, byteSize(),
261+
(AbstractMemorySegmentImpl) other, 0, other.byteSize());
338262
}
339263

340264
@Override
@@ -650,64 +574,6 @@ private static Object bufferRef(Buffer buffer) {
650574
}
651575
}
652576

653-
// COPY_NATIVE_THRESHOLD must be a power of two and should be greater than 2^3
654-
private static final long COPY_NATIVE_THRESHOLD = 1 << 6;
655-
656-
@ForceInline
657-
public static void copy(AbstractMemorySegmentImpl src, long srcOffset,
658-
AbstractMemorySegmentImpl dst, long dstOffset,
659-
long size) {
660-
661-
Utils.checkNonNegativeIndex(size, "size");
662-
// Implicit null check for src and dst
663-
src.checkAccess(srcOffset, size, true);
664-
dst.checkAccess(dstOffset, size, false);
665-
666-
if (size <= 0) {
667-
// Do nothing
668-
} else if (size < COPY_NATIVE_THRESHOLD && !src.overlaps(dst)) {
669-
// 0 < size < FILL_NATIVE_LIMIT : 0...0X...XXXX
670-
//
671-
// Strictly, we could check for !src.asSlice(srcOffset, size).overlaps(dst.asSlice(dstOffset, size) but
672-
// this is a bit slower and it likely very unusual there is any difference in the outcome. Also, if there
673-
// is an overlap, we could tolerate one particular direction of overlap (but not the other).
674-
675-
// 0...0X...X000
676-
final int limit = (int) (size & (COPY_NATIVE_THRESHOLD - 8));
677-
int offset = 0;
678-
for (; offset < limit; offset += 8) {
679-
final long v = SCOPED_MEMORY_ACCESS.getLong(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
680-
SCOPED_MEMORY_ACCESS.putLong(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
681-
}
682-
int remaining = (int) size - offset;
683-
// 0...0X00
684-
if (remaining >= 4) {
685-
final int v = SCOPED_MEMORY_ACCESS.getInt(src.sessionImpl(), src.unsafeGetBase(),src.unsafeGetOffset() + srcOffset + offset);
686-
SCOPED_MEMORY_ACCESS.putInt(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
687-
offset += 4;
688-
remaining -= 4;
689-
}
690-
// 0...00X0
691-
if (remaining >= 2) {
692-
final short v = SCOPED_MEMORY_ACCESS.getShort(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
693-
SCOPED_MEMORY_ACCESS.putShort(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
694-
offset += 2;
695-
remaining -=2;
696-
}
697-
// 0...000X
698-
if (remaining == 1) {
699-
final byte v = SCOPED_MEMORY_ACCESS.getByte(src.sessionImpl(), src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset + offset);
700-
SCOPED_MEMORY_ACCESS.putByte(dst.sessionImpl(), dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset + offset, v);
701-
}
702-
// We have now fully handled 0...0X...XXXX
703-
} else {
704-
// For larger sizes, the transition to native code pays off
705-
SCOPED_MEMORY_ACCESS.copyMemory(src.sessionImpl(), dst.sessionImpl(),
706-
src.unsafeGetBase(), src.unsafeGetOffset() + srcOffset,
707-
dst.unsafeGetBase(), dst.unsafeGetOffset() + dstOffset, size);
708-
}
709-
}
710-
711577
@ForceInline
712578
public static void copy(MemorySegment srcSegment, ValueLayout srcElementLayout, long srcOffset,
713579
MemorySegment dstSegment, ValueLayout dstElementLayout, long dstOffset,
@@ -794,40 +660,6 @@ public static void copy(Object srcArray, int srcIndex,
794660
}
795661
}
796662

797-
public static long mismatch(MemorySegment srcSegment, long srcFromOffset, long srcToOffset,
798-
MemorySegment dstSegment, long dstFromOffset, long dstToOffset) {
799-
AbstractMemorySegmentImpl srcImpl = (AbstractMemorySegmentImpl)Objects.requireNonNull(srcSegment);
800-
AbstractMemorySegmentImpl dstImpl = (AbstractMemorySegmentImpl)Objects.requireNonNull(dstSegment);
801-
long srcBytes = srcToOffset - srcFromOffset;
802-
long dstBytes = dstToOffset - dstFromOffset;
803-
srcImpl.checkAccess(srcFromOffset, srcBytes, true);
804-
dstImpl.checkAccess(dstFromOffset, dstBytes, true);
805-
806-
long bytes = Math.min(srcBytes, dstBytes);
807-
long i = 0;
808-
if (bytes > 7) {
809-
if (srcImpl.get(JAVA_BYTE, srcFromOffset) != dstImpl.get(JAVA_BYTE, dstFromOffset)) {
810-
return 0;
811-
}
812-
i = AbstractMemorySegmentImpl.vectorizedMismatchLargeForBytes(srcImpl.sessionImpl(), dstImpl.sessionImpl(),
813-
srcImpl.unsafeGetBase(), srcImpl.unsafeGetOffset() + srcFromOffset,
814-
dstImpl.unsafeGetBase(), dstImpl.unsafeGetOffset() + dstFromOffset,
815-
bytes);
816-
if (i >= 0) {
817-
return i;
818-
}
819-
long remaining = ~i;
820-
assert remaining < 8 : "remaining greater than 7: " + remaining;
821-
i = bytes - remaining;
822-
}
823-
for (; i < bytes; i++) {
824-
if (srcImpl.get(JAVA_BYTE, srcFromOffset + i) != dstImpl.get(JAVA_BYTE, dstFromOffset + i)) {
825-
return i;
826-
}
827-
}
828-
return srcBytes != dstBytes ? bytes : -1;
829-
}
830-
831663
private static int getScaleFactor(Buffer buffer) {
832664
return switch (buffer) {
833665
case ByteBuffer _ -> 0;

0 commit comments

Comments
 (0)