diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index a18ac3250dc86..52f908c9e98bf 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -914,11 +914,10 @@ private static byte[] encodeWithEncoder( return ba; } - int blen = (coder == LATIN1) ? ae.encodeFromLatin1(val, 0, len, ba) - : ae.encodeFromUTF16(val, 0, len, ba); - if (blen != -1) { - return trimArray(ba, blen); - } + int blen = coder == LATIN1 + ? ae.encodeFromLatin1(val, 0, len, ba, 0) + : ae.encodeFromUTF16(val, 0, len, ba, 0); + return trimArray(ba, blen); } byte[] ba = new byte[en]; diff --git a/src/java.base/share/classes/sun/nio/cs/ArrayEncoder.java b/src/java.base/share/classes/sun/nio/cs/ArrayEncoder.java index b4ced428b33ab..16a6d5df00305 100644 --- a/src/java.base/share/classes/sun/nio/cs/ArrayEncoder.java +++ b/src/java.base/share/classes/sun/nio/cs/ArrayEncoder.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2009, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,25 +25,17 @@ package sun.nio.cs; -/* - * FastPath char[]/byte[] -> byte[] encoder, REPLACE on malformed input or - * unmappable input. +/** + * Fast-path for {@code byte[]}-to-{@code byte[]} encoding, + * {@link java.nio.charset.CodingErrorAction#REPLACE REPLACE} on malformed + * input, or unmappable input. */ - public interface ArrayEncoder { - // is only used by j.u.zip.ZipCoder for utf8 - int encode(char[] src, int off, int len, byte[] dst); + int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst, int dp); - default int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) { - return -1; - } + int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst, int dp); - default int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) { - return -1; - } + boolean isASCIICompatible(); - default boolean isASCIICompatible() { - return false; - } } diff --git a/src/java.base/share/classes/sun/nio/cs/CESU_8.java b/src/java.base/share/classes/sun/nio/cs/CESU_8.java index 9b907bcbc65b2..409b375ec8896 100644 --- a/src/java.base/share/classes/sun/nio/cs/CESU_8.java +++ b/src/java.base/share/classes/sun/nio/cs/CESU_8.java @@ -394,8 +394,7 @@ public int decode(byte[] sa, int sp, int len, char[] da) { } } - private static class Encoder extends CharsetEncoder - implements ArrayEncoder { + private static class Encoder extends CharsetEncoder { private Encoder(Charset cs) { super(cs, 1.1f, 3.0f); @@ -544,48 +543,6 @@ protected final CoderResult encodeLoop(CharBuffer src, return encodeBufferLoop(src, dst); } - // returns -1 if there is malformed char(s) and the - // "action" for malformed input is not REPLACE. - public int encode(char[] sa, int sp, int len, byte[] da) { - int sl = sp + len; - int dp = 0; - - // Handle ASCII-only prefix - int n = JLA.encodeASCII(sa, sp, da, dp, Math.min(len, da.length)); - sp += n; - dp += n; - - while (sp < sl) { - char c = sa[sp++]; - if (c < 0x80) { - // Have at most seven bits - da[dp++] = (byte)c; - } else if (c < 0x800) { - // 2 bytes, 11 bits - da[dp++] = (byte)(0xc0 | (c >> 6)); - da[dp++] = (byte)(0x80 | (c & 0x3f)); - } else if (Character.isSurrogate(c)) { - if (sgp == null) - sgp = new Surrogate.Parser(); - int uc = sgp.parse(c, sa, sp - 1, sl); - if (uc < 0) { - if (malformedInputAction() != CodingErrorAction.REPLACE) - return -1; - da[dp++] = replacement()[0]; - } else { - to3Bytes(da, dp, Character.highSurrogate(uc)); - dp += 3; - to3Bytes(da, dp, Character.lowSurrogate(uc)); - dp += 3; - sp++; // 2 chars - } - } else { - // 3 bytes, 16 bits - to3Bytes(da, dp, c); - dp += 3; - } - } - return dp; - } } + } diff --git a/src/java.base/share/classes/sun/nio/cs/DoubleByte.java b/src/java.base/share/classes/sun/nio/cs/DoubleByte.java index 165e1e21c0fab..0969669a35b66 100644 --- a/src/java.base/share/classes/sun/nio/cs/DoubleByte.java +++ b/src/java.base/share/classes/sun/nio/cs/DoubleByte.java @@ -682,40 +682,7 @@ protected void implReplaceWith(byte[] newReplacement) { } @Override - public int encode(char[] src, int sp, int len, byte[] dst) { - int dp = 0; - int sl = sp + len; - if (isASCIICompatible) { - int n = JLA.encodeASCII(src, sp, dst, dp, len); - sp += n; - dp += n; - } - while (sp < sl) { - char c = src[sp++]; - int bb = encodeChar(c); - if (bb == UNMAPPABLE_ENCODING) { - if (Character.isHighSurrogate(c) && sp < sl && - Character.isLowSurrogate(src[sp])) { - sp++; - } - dst[dp++] = repl[0]; - if (repl.length > 1) - dst[dp++] = repl[1]; - continue; - } //else - if (bb > MAX_SINGLEBYTE) { // DoubleByte - dst[dp++] = (byte)(bb >> 8); - dst[dp++] = (byte)bb; - } else { // SingleByte - dst[dp++] = (byte)bb; - } - } - return dp; - } - - @Override - public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) { - int dp = 0; + public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst, int dp) { int sl = sp + len; while (sp < sl) { char c = (char)(src[sp++] & 0xff); @@ -740,8 +707,7 @@ public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) { } @Override - public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) { - int dp = 0; + public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst, int dp) { int sl = sp + len; while (sp < sl) { char c = StringUTF16.getChar(src, sp++); @@ -1000,49 +966,7 @@ protected CoderResult encodeBufferLoop(CharBuffer src, ByteBuffer dst) { } @Override - public int encode(char[] src, int sp, int len, byte[] dst) { - int dp = 0; - int sl = sp + len; - while (sp < sl) { - char c = src[sp++]; - int bb = encodeChar(c); - - if (bb == UNMAPPABLE_ENCODING) { - if (Character.isHighSurrogate(c) && sp < sl && - Character.isLowSurrogate(src[sp])) { - sp++; - } - dst[dp++] = repl[0]; - if (repl.length > 1) - dst[dp++] = repl[1]; - continue; - } //else - if (bb > MAX_SINGLEBYTE) { // DoubleByte - if (currentState == SBCS) { - currentState = DBCS; - dst[dp++] = SO; - } - dst[dp++] = (byte)(bb >> 8); - dst[dp++] = (byte)bb; - } else { // SingleByte - if (currentState == DBCS) { - currentState = SBCS; - dst[dp++] = SI; - } - dst[dp++] = (byte)bb; - } - } - - if (currentState == DBCS) { - currentState = SBCS; - dst[dp++] = SI; - } - return dp; - } - - @Override - public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) { - int dp = 0; + public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst, int dp) { int sl = sp + len; while (sp < sl) { char c = (char)(src[sp++] & 0xff); @@ -1077,8 +1001,7 @@ public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) { } @Override - public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) { - int dp = 0; + public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst, int dp) { int sl = sp + len; while (sp < sl) { char c = StringUTF16.getChar(src, sp++); diff --git a/src/java.base/share/classes/sun/nio/cs/HKSCS.java b/src/java.base/share/classes/sun/nio/cs/HKSCS.java index cfe9f879c048e..f96fbf6be292b 100644 --- a/src/java.base/share/classes/sun/nio/cs/HKSCS.java +++ b/src/java.base/share/classes/sun/nio/cs/HKSCS.java @@ -352,37 +352,9 @@ protected CoderResult encodeLoop(CharBuffer src, ByteBuffer dst) { return encodeBufferLoop(src, dst); } - public int encode(char[] src, int sp, int len, byte[] dst) { - int dp = 0; - int sl = sp + len; - while (sp < sl) { - char c = src[sp++]; - int bb = encodeChar(c); - if (bb == UNMAPPABLE_ENCODING) { - if (!Character.isHighSurrogate(c) || sp == sl || - !Character.isLowSurrogate(src[sp]) || - (bb = encodeSupp(Character.toCodePoint(c, src[sp++]))) - == UNMAPPABLE_ENCODING) { - dst[dp++] = repl[0]; - if (repl.length > 1) - dst[dp++] = repl[1]; - continue; - } - } - if (bb > MAX_SINGLEBYTE) { // DoubleByte - dst[dp++] = (byte)(bb >> 8); - dst[dp++] = (byte)bb; - } else { // SingleByte - dst[dp++] = (byte)bb; - } - } - return dp; - } - - public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) { - int dp = 0; + @Override + public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst, int dp) { int sl = sp + len; - int dl = dst.length; while (sp < sl) { char c = StringUTF16.getChar(src, sp++); int bb = encodeChar(c); diff --git a/src/java.base/share/classes/sun/nio/cs/SingleByte.java b/src/java.base/share/classes/sun/nio/cs/SingleByte.java index 8efa6b295ff0f..a5bf06cb251c6 100644 --- a/src/java.base/share/classes/sun/nio/cs/SingleByte.java +++ b/src/java.base/share/classes/sun/nio/cs/SingleByte.java @@ -290,32 +290,8 @@ protected void implReplaceWith(byte[] newReplacement) { repl = newReplacement[0]; } - public int encode(char[] src, int sp, int len, byte[] dst) { - int dp = 0; - int sl = sp + Math.min(len, dst.length); - while (sp < sl) { - char c = src[sp++]; - int b = encode(c); - if (b != UNMAPPABLE_ENCODING) { - dst[dp++] = (byte)b; - continue; - } - if (Character.isHighSurrogate(c) && sp < sl && - Character.isLowSurrogate(src[sp])) { - if (len > dst.length) { - sl++; - len--; - } - sp++; - } - dst[dp++] = repl; - } - return dp; - } - @Override - public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) { - int dp = 0; + public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst, int dp) { int sl = sp + Math.min(len, dst.length); while (sp < sl) { char c = (char)(src[sp++] & 0xff); @@ -330,8 +306,7 @@ public int encodeFromLatin1(byte[] src, int sp, int len, byte[] dst) { } @Override - public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst) { - int dp = 0; + public int encodeFromUTF16(byte[] src, int sp, int len, byte[] dst, int dp) { int sl = sp + Math.min(len, dst.length); while (sp < sl) { char c = StringUTF16.getChar(src, sp++); diff --git a/test/jdk/sun/nio/cs/TestEncoderReplaceLatin1.java b/test/jdk/sun/nio/cs/TestEncoderReplaceLatin1.java index 401f365073419..972bc521bb6df 100644 --- a/test/jdk/sun/nio/cs/TestEncoderReplaceLatin1.java +++ b/test/jdk/sun/nio/cs/TestEncoderReplaceLatin1.java @@ -192,7 +192,7 @@ static void testCharsetEncoderReplace(CharsetEncoder encoder, char[] unmappable, /** * Verifies {@linkplain CoderResult#isUnmappable() unmappable} character * {@linkplain CodingErrorAction#REPLACE replacement} using {@link - * ArrayEncoder#encodeFromLatin1(byte[], int, int, byte[]) + * ArrayEncoder#encodeFromLatin1(byte[], int, int, byte[], int) * ArrayEncoder::encodeFromLatin1}. */ private static void testArrayEncoderLatin1Replace(CharsetEncoder encoder, char unmappable, byte[] replacement) { @@ -202,7 +202,7 @@ private static void testArrayEncoderLatin1Replace(CharsetEncoder encoder, char u } byte[] sa = {(byte) unmappable}; byte[] da = new byte[replacement.length]; - int dp = arrayEncoder.encodeFromLatin1(sa, 0, 1, da); + int dp = arrayEncoder.encodeFromLatin1(sa, 0, 1, da, 0); assertTrue(dp == replacement.length && Arrays.equals(da, replacement), () -> { Object context = Map.of( "dp", dp, diff --git a/test/jdk/sun/nio/cs/TestEncoderReplaceUTF16.java b/test/jdk/sun/nio/cs/TestEncoderReplaceUTF16.java index a93dac16ab63f..6ab49d91c6a03 100644 --- a/test/jdk/sun/nio/cs/TestEncoderReplaceUTF16.java +++ b/test/jdk/sun/nio/cs/TestEncoderReplaceUTF16.java @@ -182,7 +182,7 @@ private static byte[] utf16Bytes(char[] cs) { /** * Verifies {@linkplain CoderResult#isUnmappable() unmappable} character * {@linkplain CodingErrorAction#REPLACE replacement} using {@link - * ArrayEncoder#encodeFromUTF16(byte[], int, int, byte[]) + * ArrayEncoder#encodeFromUTF16(byte[], int, int, byte[], int) * ArrayEncoder::encodeFromUTF16}. */ private static void testArrayEncoderUTF16Replace(CharsetEncoder encoder, byte[] unmappableUTF16Bytes, byte[] replacement) { @@ -191,7 +191,7 @@ private static void testArrayEncoderUTF16Replace(CharsetEncoder encoder, byte[] return; } byte[] da = new byte[replacement.length]; - int dp = arrayEncoder.encodeFromUTF16(unmappableUTF16Bytes, 0, unmappableUTF16Bytes.length >>> 1, da); + int dp = arrayEncoder.encodeFromUTF16(unmappableUTF16Bytes, 0, unmappableUTF16Bytes.length >>> 1, da, 0); assertTrue(dp == replacement.length && Arrays.equals(da, replacement), () -> { Object context = Map.of( "dp", dp, diff --git a/test/jdk/sun/nio/cs/TestStringCoding.java b/test/jdk/sun/nio/cs/TestStringCoding.java index d27efc35adac3..d708ef180a238 100644 --- a/test/jdk/sun/nio/cs/TestStringCoding.java +++ b/test/jdk/sun/nio/cs/TestStringCoding.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,6 +24,7 @@ /* @test * @bug 6636323 6636319 7040220 7096080 7183053 8080248 8054307 * @summary Test if StringCoding and NIO result have the same de/encoding result + * @library /test/lib * @modules java.base/sun.nio.cs * @run main/othervm/timeout=2000 TestStringCoding * @key randomness @@ -32,6 +33,10 @@ import java.util.*; import java.nio.*; import java.nio.charset.*; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static jdk.test.lib.Asserts.assertEquals; public class TestStringCoding { public static void main(String[] args) throws Throwable { @@ -195,29 +200,70 @@ static void testSurrogates(Charset cs) throws Throwable { if (cs.name().equals("UTF-8") || // utf8 handles surrogates cs.name().equals("CESU-8")) // utf8 handles surrogates return; - enc.replaceWith(new byte[] { (byte)'A'}); - sun.nio.cs.ArrayEncoder cae = (sun.nio.cs.ArrayEncoder)enc; - - String str = "ab\uD800\uDC00\uD800\uDC00cd"; - byte[] ba = new byte[str.length() - 2]; - int n = cae.encode(str.toCharArray(), 0, str.length(), ba); - if (n != 6 || !"abAAcd".equals(new String(ba, cs.name()))) - throw new RuntimeException("encode1(surrogates) failed -> " - + cs.name()); - ba = new byte[str.length()]; - n = cae.encode(str.toCharArray(), 0, str.length(), ba); - if (n != 6 || !"abAAcd".equals(new String(ba, 0, n, - cs.name()))) - throw new RuntimeException("encode2(surrogates) failed -> " - + cs.name()); - str = "ab\uD800B\uDC00Bcd"; - ba = new byte[str.length()]; - n = cae.encode(str.toCharArray(), 0, str.length(), ba); - if (n != 8 || !"abABABcd".equals(new String(ba, 0, n, - cs.name()))) - throw new RuntimeException("encode3(surrogates) failed -> " - + cs.name()); + // Configure the replacement sequence + enc.replaceWith(new byte[]{(byte) 'A'}); + + // Test `String::new(byte[], Charset)` with surrogate-pair + { + var srcStr = "ab\uD800\uDC00\uD800\uDC00cd"; + assertEquals(8, srcStr.length()); + var srcBuf = CharBuffer.wrap(srcStr.toCharArray(), 0, 8); + var dstBuf = ByteBuffer.allocate(6); + var cr = enc.encode(srcBuf, dstBuf, true); + if (cr.isError()) { + cr.throwException(); + } + var dstArr = dstBuf.array(); + assertEquals( + 6, dstBuf.position(), + "Was expecting 6 items, found: " + Map.of( + "position", dstBuf.position(), + "array", prettyPrintBytes(dstArr))); + var dstStr = new String(dstArr, cs); + assertEquals("abAAcd", dstStr); + } + + // Test `String::new(byte[], int, int, Charset)` with surrogate-pair + { + var srcStr = "ab\uD800\uDC00\uD800\uDC00cd"; + assertEquals(8, srcStr.length()); + var srcBuf = CharBuffer.wrap(srcStr.toCharArray(), 0, 8); + var dstBuf = ByteBuffer.allocate(8); + var cr = enc.encode(srcBuf, dstBuf, true); + if (cr.isError()) { + cr.throwException(); + } + var dstArr = dstBuf.array(); + assertEquals( + 6, dstBuf.position(), + "Was expecting 6 items, found: " + Map.of( + "position", dstBuf.position(), + "array", prettyPrintBytes(dstArr))); + var dstStr = new String(dstArr, 0, 6, cs); + assertEquals("abAAcd", dstStr); + } + + // Test `String::new(byte[], int, int, Charset)` with a dangling + // high- and low-surrogate + { + var srcStr = "ab\uD800B\uDC00Bcd"; + var srcBuf = CharBuffer.wrap(srcStr.toCharArray(), 0, 8); + var dstBuf = ByteBuffer.allocate(8); + var cr = enc.encode(srcBuf, dstBuf, true); + if (cr.isError()) { + cr.throwException(); + } + var dstArr = dstBuf.array(); + assertEquals( + 8, dstBuf.position(), + "Was expecting 8 items, found: " + Map.of( + "position", dstBuf.position(), + "array", prettyPrintBytes(dstArr))); + var dstStr = new String(dstArr, 0, 8, cs); + assertEquals("abABABcd", dstStr); + } + /* sun.nio.cs.ArrayDeEncoder works on the assumption that the invoker (StringCoder) allocates enough output buf, utf8 and double-byte coder does not check the output buffer limit. @@ -242,4 +288,9 @@ static void testSurrogates(Charset cs) throws Throwable { } } } + + private static String prettyPrintBytes(byte[] bs) { + return "[" + HexFormat.ofDelimiter(", ").withPrefix("0x").formatHex(bs) + "]"; + } + }