Skip to content

Commit 4c25457

Browse files
committed
Write 4-byte characters (surrogate pairs) instead of escapes
1 parent 89b2381 commit 4c25457

File tree

3 files changed

+39
-2
lines changed

3 files changed

+39
-2
lines changed

src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import java.io.*;
44
import java.math.BigDecimal;
55
import java.math.BigInteger;
6+
import java.nio.charset.StandardCharsets;
67

78
import com.fasterxml.jackson.core.*;
89
import com.fasterxml.jackson.core.io.CharTypes;
@@ -659,6 +660,10 @@ public void writeUTF8String(byte[] text, int offset, int len) throws IOException
659660
_outputBuffer[_outputTail++] = _quoteChar;
660661
}
661662

663+
private boolean isSurrogatePair(char ch) {
664+
return (ch & 0xD800) == 0xD800;
665+
}
666+
662667
/*
663668
/**********************************************************
664669
/* Output method implementations, unprocessed ("raw")
@@ -1510,7 +1515,14 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
15101515
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
15111516
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
15121517
} else {
1513-
outputPtr = _outputMultiByteChar(ch, outputPtr);
1518+
// multibyte character
1519+
if (isSurrogatePair((char) ch) && offset < end) {
1520+
char highSurrogate = (char) ch;
1521+
char lowSurrogate = cbuf[offset++];
1522+
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1523+
} else {
1524+
outputPtr = _outputMultiByteChar(ch, outputPtr);
1525+
}
15141526
}
15151527
}
15161528
_outputTail = outputPtr;
@@ -1548,7 +1560,14 @@ private final void _writeStringSegment2(final String text, int offset, final int
15481560
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
15491561
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
15501562
} else {
1551-
outputPtr = _outputMultiByteChar(ch, outputPtr);
1563+
// multibyte character
1564+
if (isSurrogatePair((char) ch) && offset < end) {
1565+
char highSurrogate = (char) ch;
1566+
char lowSurrogate = text.charAt(offset++);
1567+
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1568+
} else {
1569+
outputPtr = _outputMultiByteChar(ch, outputPtr);
1570+
}
15521571
}
15531572
}
15541573
_outputTail = outputPtr;
@@ -2133,6 +2152,13 @@ protected final void _outputSurrogates(int surr1, int surr2) throws IOException
21332152
bbuf[_outputTail++] = (byte) (0x80 | (c & 0x3f));
21342153
}
21352154

2155+
private int _outputSurrogatePair(char highSurrogate, char lowSurrogate, int outputPtr) {
2156+
String s = String.valueOf(highSurrogate) + lowSurrogate;
2157+
byte[] bytes = s.getBytes(StandardCharsets.UTF_8);
2158+
System.arraycopy(bytes, 0, _outputBuffer, outputPtr, bytes.length);
2159+
return outputPtr + bytes.length;
2160+
}
2161+
21362162
/**
21372163
*
21382164
* @param ch

src/test/java/com/fasterxml/jackson/core/json/StringGenerationTest.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,17 @@ void longerRandomMultiChunk() throws Exception
8686
}
8787
}
8888

89+
@Test
90+
public void testWritingSurrogatePairs() throws IOException {
91+
ByteArrayOutputStream stream = new ByteArrayOutputStream();
92+
JsonGenerator generator = FACTORY.createGenerator(stream, JsonEncoding.UTF8);
93+
String string = "システム\uD867\uDE3D"; // システム𩸽
94+
generator.writeString(string);
95+
generator.flush();
96+
generator.close();
97+
assertEquals("\"" + string + "\"", stream.toString());
98+
}
99+
89100
/*
90101
/**********************************************************
91102
/* Internal methods

0 commit comments

Comments
 (0)