diff --git a/tests/test_document.py b/tests/test_document.py
index 62afd04..3fd9b40 100644
--- a/tests/test_document.py
+++ b/tests/test_document.py
@@ -25,6 +25,61 @@ def test_document_from_str():
     assert doc.as_obj == {"hello": "world"}
 
 
+def test_document_unicode():
+    value = '["bar�"]'
+    doc = Document(value)
+    assert doc.dumps() == value
+    assert doc.as_obj == ['bar�']
+
+    value = '["bar\uFFFD"]'
+    doc = Document(value)
+    assert doc.dumps() == value
+    assert doc.as_obj == ['bar�']
+
+    assert doc.dumps(flags=WriterFlags.ESCAPE_UNICODE) == '["bar\\uFFFD"]'
+
+def test_document_unicode_stdlib():
+
+    # Adapted tests from cpython lib/tests/test_json/test_unicode.py
+
+    # test_encoding3
+    value = '"\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}"'
+    doc = Document(value)
+    assert doc.dumps() == value
+    assert doc.dumps(flags=WriterFlags.ESCAPE_UNICODE) == '"\\u03B1\\u03A9"'
+    assert doc.as_obj == '\u03b1\u03a9'
+
+    # test_encoding4
+    value = '\N{GREEK SMALL LETTER ALPHA}\N{GREEK CAPITAL LETTER OMEGA}'
+    doc = Document([value])
+    assert doc.dumps() == f'["{value}"]'
+    assert doc.dumps(flags=WriterFlags.ESCAPE_UNICODE) == '["\\u03B1\\u03A9"]'
+    assert doc.as_obj == ['\u03b1\u03a9']
+
+    # test_big_unicode_encode
+    value = '"\U0001d120"'
+    doc = Document(value)
+    assert doc.dumps() == value
+    assert doc.dumps(flags=WriterFlags.ESCAPE_UNICODE) == '"\\uD834\\uDD20"'
+    assert doc.as_obj == '𝄠'
+
+    # test_big_unicode_decode
+    value = '"z\U0001d120x"'
+    doc = Document(value)
+    assert doc.dumps() == value
+    assert doc.dumps(flags=WriterFlags.ESCAPE_UNICODE) == '"z\\uD834\\uDD20x"'
+    assert doc.as_obj == 'z𝄠x'
+
+    def loads(s: str, reader_flags=0):
+        '''Load a string as json.'''
+        return Document(s, flags=reader_flags).as_obj
+
+    # test_unicode_decode
+    for i in range(0, 0xd7ff):
+        u = chr(i)
+        value = '"\\u{0:04x}"'.format(i)
+        assert loads(value) == u
+
 def test_document_types():
     """Ensure each primitive type can be upcast (which does not have its own
     dedicated test.)"""
diff --git a/yyjson/document.c b/yyjson/document.c
index 69c6c5f..dda4456 100644
--- a/yyjson/document.c
+++ b/yyjson/document.c
@@ -22,7 +22,7 @@ static PyObject *path = NULL;
 static inline size_t num_utf8_chars(const char *src, size_t len) {
   size_t count = 0;
   for (size_t i = 0; i < len; i++) {
-    if (yyjson_likely(src[i] >> 6 != 2)) {
+    if (yyjson_likely((src[i] & 0xff) >> 6 != 2)) {
       count++;
     }
   }