diff --git a/ext/json/ext/generator/extconf.rb b/ext/json/ext/generator/extconf.rb index 078068cf..71ba695d 100644 --- a/ext/json/ext/generator/extconf.rb +++ b/ext/json/ext/generator/extconf.rb @@ -6,5 +6,40 @@ else append_cflags("-std=c99") $defs << "-DJSON_GENERATOR" + + if enable_config('generator-use-simd', default=true) + if RbConfig::CONFIG['host_cpu'] =~ /^(arm.*|aarch64.*)/ + # Try to compile a small program using NEON instructions + if have_header('arm_neon.h') + have_type('uint8x16_t', headers=['arm_neon.h']) && try_compile(<<~'SRC') + #include + int main() { + uint8x16_t test = vdupq_n_u8(32); + return 0; + } + SRC + $defs.push("-DENABLE_SIMD") + + if enable_config('generator-use-neon-lut', default=false) + $defs.push('-DUSE_NEON_LUT') + end + end + end + + if have_type('__m128i', headers=['x86intrin.h']) && try_compile(<<~'SRC', opt='-msse2') + #include + int main() { + __m128i test = _mm_set1_epi8(32); + return 0; + } + SRC + $defs.push("-DENABLE_SIMD") + end + + have_header('cpuid.h') + end + + create_header + create_makefile 'json/ext/generator' end diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index b2fcd2b2..7b9d7923 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -4,6 +4,8 @@ #include #include +#include "simd.h" + /* ruby api and some helpers */ typedef struct JSON_Generator_StateStruct { @@ -108,12 +110,27 @@ typedef struct _search_state { const char *end; const char *cursor; FBuffer *buffer; + +#ifdef ENABLE_SIMD + const char *chunk_base; + uint8_t has_matches; + +#ifdef HAVE_SIMD_NEON + uint64_t matches_mask; +#elif HAVE_SIMD_SSE2 + uint16_t matches_mask; +#else +#error "Unknown SIMD Implementation." +#endif /* HAVE_SIMD_NEON */ +#endif /* ENABLE_SIMD */ } search_state; static inline void search_flush(search_state *search) { - fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor); - search->cursor = search->ptr; + if (search->cursor < search->ptr) { + fbuffer_append(search->buffer, search->cursor, search->ptr - search->cursor); + search->cursor = search->ptr; + } } static const unsigned char escape_table_basic[256] = { @@ -129,6 +146,8 @@ static const unsigned char escape_table_basic[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; +unsigned char (*search_escape_basic_impl)(search_state *); + static inline unsigned char search_escape_basic(search_state *search) { while (search->ptr < search->end) { @@ -185,7 +204,7 @@ static inline void escape_UTF8_char_basic(search_state *search) { */ static inline void convert_UTF8_to_JSON(search_state *search) { - while (search_escape_basic(search)) { + while (search_escape_basic_impl(search)) { escape_UTF8_char_basic(search); } } @@ -226,6 +245,387 @@ static inline void escape_UTF8_char(search_state *search, unsigned char ch_len) search->cursor = (search->ptr += ch_len); } +#ifdef ENABLE_SIMD + +#ifdef HAVE_SIMD_NEON +#ifdef USE_NEON_LUT +struct _simd_state { + + struct { + uint8x16x4_t escape_table_basic[2]; + } neon; +}; + +static struct _simd_state simd_state; +#endif /* USE_NEON_LUT */ +#endif /* HAVE_SIMD_NEON */ +#endif /* ENABLE_SIMD */ + +#ifdef ENABLE_SIMD +#ifdef HAVE_SIMD_NEON + +static inline unsigned char neon_next_match(search_state *search) { + uint64_t mask = search->matches_mask; + if (mask > 0) { + uint32_t index = trailing_zeros64(mask) >> 2; + + // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character. + // If we want to use a similar approach for full escaping we'll need to ensure: + // search->chunk_base + index >= search->ptr + // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match + // is one byte after the previous match then: + // search->chunk_base + index == search->ptr + search->ptr = search->chunk_base + index; + mask &= mask - 1; + search->matches_mask = mask; + search_flush(search); + return 1; + } + return 0; +} + +// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon +static inline uint64_t neon_match_mask(uint8x16_t matches) { + const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4); + const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0); + return mask & 0x8888888888888888ull; +} + +#ifdef USE_NEON_LUT +static inline uint8x16_t neon_lut_update(uint8x16_t chunk) { + uint8x16_t tmp1 = vqtbl4q_u8(simd_state.neon.escape_table_basic[0], chunk); + uint8x16_t tmp2 = vqtbl4q_u8(simd_state.neon.escape_table_basic[1], veorq_u8(chunk, vdupq_n_u8(0x40))); + uint8x16_t result = vorrq_u8(tmp1, tmp2); + return result; +} + +static inline unsigned char search_escape_basic_neon_advance_lut(search_state *search) { + while (search->ptr+sizeof(uint8x16_t) < search->end) { + uint8x16_t chunk = vld1q_u8((const unsigned char *)search->ptr); + uint8x16_t result = neon_lut_update(chunk); + + if (vmaxvq_u8(result) == 0) { + search->ptr += sizeof(uint8x16_t); + continue; + } + + search->matches_mask = neon_match_mask(vceqq_u8(result, vdupq_n_u8(9))); + search->has_matches = 1; + search->chunk_base = search->ptr; + return neon_next_match(search); + } + + // There are fewer than 16 bytes left. + unsigned long remaining = (search->end - search->ptr); + if (remaining >= 8) { + // Flush the buffer so everything up until the last 'remaining' characters are unflushed. + search_flush(search); + + FBuffer *buf = search->buffer; + fbuffer_inc_capa(buf, sizeof(uint8x16_t)); + + char *s = (buf->ptr + buf->len); + + memset(s, 'X', sizeof(uint8x16_t)); + + // Optimistically copy the remaining characters to the output FBuffer. If there are no characters + // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage. + memcpy(s, search->ptr, remaining); + + uint8x16_t chunk = vld1q_u8((const unsigned char *) s); + uint8x16_t result = neon_lut_update(chunk); + if (vmaxvq_u8(result) == 0) { + // Nothing to escape, ensure search_flush doesn't do anything by setting + // search->cursor to search->ptr. + buf->len += remaining; + search->ptr = search->end; + search->cursor = search->end; + return 0; + } + } + + return 0; +} + +#else + +static inline uint8x16_t neon_rules_update(uint8x16_t chunk) { + const uint8x16_t lower_bound = vdupq_n_u8(' '); + const uint8x16_t backslash = vdupq_n_u8('\\'); + const uint8x16_t dblquote = vdupq_n_u8('\"'); + + uint8x16_t too_low = vcltq_u8(chunk, lower_bound); + uint8x16_t has_backslash = vceqq_u8(chunk, backslash); + uint8x16_t has_dblquote = vceqq_u8(chunk, dblquote); + uint8x16_t needs_escape = vorrq_u8(too_low, vorrq_u8(has_backslash, has_dblquote)); + + return needs_escape; +} + +static unsigned char search_escape_basic_neon_advance_rules(search_state *search) { + /* + * The code below implements an SIMD-based algorithm to determine if N bytes at a time + * need to be escaped. + * + * Assume the ptr = "Te\sting!" (the double quotes are included in the string) + * + * The explanination will be limited to the first 8 bytes of the string for simplicity. However + * the vector insructions may work on larger vectors. + * + * First, we load three constants 'lower_bound', 'backslash' and 'dblquote" in vector registers. + * + * lower_bound: [20 20 20 20 20 20 20 20] + * backslash: [5C 5C 5C 5C 5C 5C 5C 5C] + * dblquote: [22 22 22 22 22 22 22 22] + * + * Next we load the first chunk of the ptr: + * [22 54 65 5C 73 74 69 6E] (" T e \ s t i n) + * + * First we check if any byte in chunk is less than 32 (0x20). This returns the following vector + * as no bytes are less than 32 (0x20): + * [0 0 0 0 0 0 0 0] + * + * Next, we check if any byte in chunk is equal to a backslash: + * [0 0 0 FF 0 0 0 0] + * + * Finally we check if any byte in chunk is equal to a double quote: + * [FF 0 0 0 0 0 0 0] + * + * Now we have three vectors where each byte indicates if the corresponding byte in chunk + * needs to be escaped. We combine these vectors with a series of logical OR instructions. + * This is the needs_escape vector and it is equal to: + * [FF 0 0 FF 0 0 0 0] + * + * For ARM Neon specifically, we check if the maximum number in the vector is 0. The maximum of + * the needs_escape vector is FF. Therefore, we know there is at least one byte that needs to be + * escaped. + * + * If the maximum of the needs_escape vector is 0, none of the bytes need to be escaped and + * we advance pos by the width of the vector. + * + * To determine how to escape characters, we look at each value in the needs_escape vector and take + * the appropriate action. + */ + while (search->ptr+sizeof(uint8x16_t) < search->end) { + uint8x16_t chunk = vld1q_u8((const unsigned char *)search->ptr); + uint8x16_t needs_escape = neon_rules_update(chunk); + + if (vmaxvq_u8(needs_escape) == 0) { + search->ptr += sizeof(uint8x16_t); + continue; + } + + search->matches_mask = neon_match_mask(needs_escape); + search->has_matches = 1; + search->chunk_base = search->ptr; + return neon_next_match(search); + } + + // There are fewer than 16 bytes left. + unsigned long remaining = (search->end - search->ptr); + if (remaining >= 8) { + // Flush the buffer so everything up until the last 'remaining' characters are unflushed. + search_flush(search); + + FBuffer *buf = search->buffer; + fbuffer_inc_capa(buf, sizeof(uint8x16_t)); + + char *s = (buf->ptr + buf->len); + + memset(s, 'X', sizeof(uint8x16_t)); + + // Optimistically copy the remaining characters to the output FBuffer. If there are no characters + // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage. + memcpy(s, search->ptr, remaining); + + uint8x16_t chunk = vld1q_u8((const unsigned char *) s); + uint8x16_t result = neon_rules_update(chunk); + if (vmaxvq_u8(result) == 0) { + // Nothing to escape, ensure search_flush doesn't do anything by setting + // search->cursor to search->ptr. + buf->len += remaining; + search->ptr = search->end; + search->cursor = search->end; + return 0; + } + } + + return 0; +} +#endif /* USE_NEON_LUT */ + +static inline unsigned char search_escape_basic_neon(search_state *search) +{ + if (RB_UNLIKELY(search->has_matches)) { + // There are more matches if search->matches_mask > 0. + if (search->matches_mask > 0) { + if (RB_LIKELY(neon_next_match(search))) { + return 1; + } + } else { + // neon_next_match will only advance search->ptr up to the last matching character. + // Skip over any characters in the last chunk that occur after the last match. + search->has_matches = 0; + search->ptr = search->chunk_base+sizeof(uint8x16_t); + } + } +#ifdef USE_NEON_LUT + if (search_escape_basic_neon_advance_lut(search)) { + return 1; + } +#else + if (search_escape_basic_neon_advance_rules(search)) { + return 1; + } +#endif /* USE_NEON_LUT */ + if (search->ptr < search->end) { + return search_escape_basic(search); + } + + search_flush(search); + return 0; +} +#endif /* HAVE_SIMD_NEON */ + +#ifdef HAVE_SIMD_SSE2 + +// #define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a) +// #define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a) +// #define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1)) +// #define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a) + +static inline unsigned char sse2_next_match(search_state *search) { + int mask = search->matches_mask; + if (mask > 0) { + int index = trailing_zeros(mask); + + // It is assumed escape_UTF8_char_basic will only ever increase search->ptr by at most one character. + // If we want to use a similar approach for full escaping we'll need to ensure: + // search->chunk_base + index >= search->ptr + // However, since we know escape_UTF8_char_basic only increases search->ptr by one, if the next match + // is one byte after the previous match then: + // search->chunk_base + index == search->ptr + search->ptr = search->chunk_base + index; + mask &= mask - 1; + search->matches_mask = mask; + search_flush(search); + return 1; + } + return 0; +} + +#ifdef __GNUC__ +#pragma GCC push_options +#pragma GCC target ("sse2") +#endif /* __GNUC__ */ + +#ifdef __clang__ +__attribute__((target("sse2"))) +#endif /* __clang__ */ +static inline __m128i sse2_update(__m128i chunk) { + const __m128i lower_bound = _mm_set1_epi8(' '); + const __m128i backslash = _mm_set1_epi8('\\'); + const __m128i dblquote = _mm_set1_epi8('\"'); + const __m128i high_bit = _mm_set1_epi8(0x80); + + // __m128i too_low = _mm_cmplt_epu8(chunk, lower_bound); + + // This is a signed comparison. We need special handling for bytes > 127. + __m128i too_low = _mm_cmplt_epi8(chunk, lower_bound); + + // Determine which bytes have the high bit set and remove them from 'too_low'. + __m128i high_bit_set = _mm_cmpeq_epi8(_mm_and_si128(chunk, high_bit), high_bit); + too_low = _mm_andnot_si128(high_bit_set, too_low); + + __m128i has_backslash = _mm_cmpeq_epi8(chunk, backslash); + __m128i has_dblquote = _mm_cmpeq_epi8(chunk, dblquote); + __m128i needs_escape = _mm_or_si128(too_low, _mm_or_si128(has_backslash, has_dblquote)); + return needs_escape; +} + +#ifdef __clang__ +__attribute__((target("sse2"))) +#endif /* __clang__ */ +static unsigned char search_escape_basic_sse2(search_state *search) { + if (RB_UNLIKELY(search->has_matches)) { + // There are more matches if search->matches_mask > 0. + if (search->matches_mask > 0) { + if (RB_LIKELY(sse2_next_match(search))) { + return 1; + } + } else { + // sse2_next_match will only advance search->ptr up to the last matching character. + // Skip over any characters in the last chunk that occur after the last match. + search->has_matches = 0; + search->ptr = search->chunk_base+sizeof(__m128i); + } + } + + while (search->ptr+sizeof(__m128i) < search->end) { + __m128i chunk = _mm_loadu_si128((__m128i const*)search->ptr); + __m128i needs_escape = sse2_update(chunk); + + int needs_escape_mask = _mm_movemask_epi8(needs_escape); + + if (needs_escape_mask == 0) { + search->ptr += sizeof(__m128i); + continue; + } + + search->has_matches = 1; + search->matches_mask = needs_escape_mask; + search->chunk_base = search->ptr; + return sse2_next_match(search); + } + + // There are fewer than 16 bytes left. + unsigned long remaining = (search->end - search->ptr); + if (remaining >= 8) { + // Flush the buffer so everything up until the last 'remaining' characters are unflushed. + search_flush(search); + + FBuffer *buf = search->buffer; + fbuffer_inc_capa(buf, sizeof(__m128i)); + + char *s = (buf->ptr + buf->len); + + memset(s, 'X', sizeof(__m128i)); + + // Optimistically copy the remaining characters to the output FBuffer. If there are no characters + // to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage. + memcpy(s, search->ptr, remaining); + + __m128i chunk = _mm_loadu_si128((__m128i const *) s); + __m128i needs_escape = sse2_update(chunk); + + int needs_escape_mask = _mm_movemask_epi8(needs_escape); + + if (needs_escape_mask == 0) { + // Nothing to escape, ensure search_flush doesn't do anything by setting + // search->cursor to search->ptr. + buf->len += remaining; + search->ptr = search->end; + search->cursor = search->end; + return 0; + } + } + + if (search->ptr < search->end) { + return search_escape_basic(search); + } + + search_flush(search); + return 0; +} + +#ifdef __GNUC__ +#pragma GCC reset_options +#endif /* __GNUC__ */ + +#endif /* HAVE_SIMD_SSE2 */ + +#endif /* ENABLE_SIMD */ + static const unsigned char script_safe_escape_table[256] = { // ASCII Control Characters 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, @@ -973,6 +1373,12 @@ static void generate_json_string(FBuffer *buffer, struct generate_json_data *dat search.cursor = search.ptr; search.end = search.ptr + len; +#ifdef ENABLE_SIMD + search.matches_mask = 0; + search.has_matches = 0; + search.chunk_base = NULL; +#endif /* ENABLE_SIMD */ + switch(rb_enc_str_coderange(obj)) { case ENC_CODERANGE_7BIT: case ENC_CODERANGE_VALID: @@ -1134,6 +1540,20 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc) return Qundef; } +/* SIMD Utilities (if enabled) */ +#ifdef ENABLE_SIMD + +#ifdef HAVE_SIMD_NEON +#ifdef USE_NEON_LUT +static void initialize_simd_neon(void) { + simd_state.neon.escape_table_basic[0] = load_uint8x16_4(escape_table_basic); + simd_state.neon.escape_table_basic[1] = load_uint8x16_4(escape_table_basic+64); +} +#endif /* USE_NEON_LUT */ +#endif /* HAVE_NEON_SIMD */ + +#endif + static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io) { GET_STATE(self); @@ -1790,4 +2210,27 @@ void Init_generator(void) binary_encindex = rb_ascii8bit_encindex(); rb_require("json/ext/generator/state"); + + + switch(find_simd_implementation()) { +#ifdef ENABLE_SIMD +#ifdef HAVE_SIMD_NEON + case SIMD_NEON: + /* Initialize ARM Neon SIMD Implementation. */ +#ifdef USE_NEON_LUT + initialize_simd_neon(); +#endif /* USE_NEON_LUT */ + search_escape_basic_impl = search_escape_basic_neon; + break; +#endif /* HAVE_SIMD_NEON */ +#ifdef HAVE_SIMD_SSE2 + case SIMD_SSE2: + search_escape_basic_impl = search_escape_basic_sse2; + break; +#endif /* HAVE_SIMD_SSE2 */ +#endif /* ENABLE_SIMD */ + default: + search_escape_basic_impl = search_escape_basic; + break; + } } diff --git a/ext/json/ext/generator/simd.h b/ext/json/ext/generator/simd.h new file mode 100644 index 00000000..f5884817 --- /dev/null +++ b/ext/json/ext/generator/simd.h @@ -0,0 +1,131 @@ +#include "extconf.h" + +typedef enum { + SIMD_NONE, + SIMD_NEON, + SIMD_SSE2 +} SIMD_Implementation; + +#ifdef ENABLE_SIMD + +#ifdef __clang__ + #if __has_builtin(__builtin_ctzll) + #define HAVE_BUILTIN_CTZLL 1 + #else + #define HAVE_BUILTIN_CTZLL 0 + #endif +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) + #define HAVE_BUILTIN_CTZLL 1 +#else + #define HAVE_BUILTIN_CTZLL 0 +#endif + +static inline uint32_t trailing_zeros64(uint64_t input) { +#if HAVE_BUILTIN_CTZLL + return __builtin_ctzll(input); +#else + uint32_t trailing_zeros = 0; + uint64_t temp = input; + while ((temp & 1) == 0 && temp > 0) { + trailing_zeros++; + temp >>= 1; + } + return trailing_zeros; +#endif +} + +static inline int trailing_zeros(int input) { + #if HAVE_BUILTIN_CTZLL + return __builtin_ctz(input); + #else + int trailing_zeros = 0; + int temp = input; + while ((temp & 1) == 0 && temp > 0) { + trailing_zeros++; + temp >>= 1; + } + return trailing_zeros; + #endif + } + +#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64) +#include + +#define FIND_SIMD_IMPLEMENTATION_DEFINED 1 +SIMD_Implementation find_simd_implementation() { + return SIMD_NEON; +} + +#define HAVE_SIMD_NEON 1 + +uint8x16x4_t load_uint8x16_4(const unsigned char *table) { + uint8x16x4_t tab; + tab.val[0] = vld1q_u8(table); + tab.val[1] = vld1q_u8(table+16); + tab.val[2] = vld1q_u8(table+32); + tab.val[3] = vld1q_u8(table+48); + return tab; +} + +void print_uint8x16(char *msg, uint8x16_t vec) { + printf("%s\n[ ", msg); + uint8_t store[16] = {0}; + vst1q_u8(store, vec); + for(int i=0; i<16; i++) { + printf("%3d ", store[i]); + } + printf("]\n"); +} + +#endif /* ARM Neon Support.*/ + +#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) + +#ifdef HAVE_X86INTRIN_H +#include + +#define HAVE_SIMD_SSE2 1 + +void print_m128i(const char *prefix, __m128i vec) { + uint8_t r[16]; + _mm_storeu_si128((__m128i *) r, vec); + + printf("%s = [ ", prefix); + for(int i=0; i<16; i++) { + printf("%02x ", r[i]); + } + printf("]\n"); +} + +#ifdef HAVE_CPUID_H +#define FIND_SIMD_IMPLEMENTATION_DEFINED 1 + +#include +#endif /* HAVE_CPUID_H */ + +SIMD_Implementation find_simd_implementation(void) { + +#if defined(__GNUC__ ) || defined(__clang__) +#ifdef __GNUC__ + __builtin_cpu_init(); +#endif /* __GNUC__ */ + + // TODO Revisit. I think the SSE version now only uses SSE2 instructions. + if (__builtin_cpu_supports("sse2")) { + return SIMD_SSE2; + } +#endif /* __GNUC__ || __clang__*/ + + return SIMD_NONE; +} + +#endif /* HAVE_X86INTRIN_H */ +#endif /* X86_64 Support */ + +#endif /* ENABLE_SIMD */ + +#ifndef FIND_SIMD_IMPLEMENTATION_DEFINED +SIMD_Implementation find_simd_implementation(void) { + return SIMD_NONE; +} +#endif \ No newline at end of file diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index d97f0505..f4621fa2 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -427,18 +427,34 @@ def test_backslash json = '["\\\\.(?i:gif|jpe?g|png)$"]' assert_equal json, generate(data) # - data = [ '\\"' ] - json = '["\\\\\""]' + data = [ '\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$\\.(?i:gif|jpe?g|png)$' ] + json = '["\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$\\\\.(?i:gif|jpe?g|png)$"]' + assert_equal json, generate(data) + # + data = [ '\\"\\"\\"\\"\\"\\"\\"\\"\\"\\"\\"' ] + json = '["\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\"\\\\\""]' assert_equal json, generate(data) # data = [ '/' ] json = '["/"]' assert_equal json, generate(data) # + data = [ '////////////////////////////////////////////////////////////////////////////////////' ] + json = '["////////////////////////////////////////////////////////////////////////////////////"]' + assert_equal json, generate(data) + # data = [ '/' ] json = '["\/"]' assert_equal json, generate(data, :script_safe => true) # + data = [ '///////////' ] + json = '["\/\/\/\/\/\/\/\/\/\/\/"]' + assert_equal json, generate(data, :script_safe => true) + # + data = [ '///////////////////////////////////////////////////////' ] + json = '["\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/"]' + assert_equal json, generate(data, :script_safe => true) + # data = [ "\u2028\u2029" ] json = '["\u2028\u2029"]' assert_equal json, generate(data, :script_safe => true) @@ -455,6 +471,10 @@ def test_backslash json = '["\""]' assert_equal json, generate(data) # + data = ['"""""""""""""""""""""""""'] + json = '["\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\"\""]' + assert_equal json, generate(data) + # data = ["'"] json = '["\\\'"]' assert_equal '["\'"]', generate(data) @@ -462,6 +482,30 @@ def test_backslash data = ["倩", "瀨"] json = '["倩","瀨"]' assert_equal json, generate(data, script_safe: true) + # + data = '["This is a "test" of the emergency broadcast system."]' + json = "\"[\\\"This is a \\\"test\\\" of the emergency broadcast system.\\\"]\"" + assert_equal json, generate(data) + # + data = '\tThis is a test of the emergency broadcast system.' + json = "\"\\\\tThis is a test of the emergency broadcast system.\"" + assert_equal json, generate(data) + # + data = 'This\tis a test of the emergency broadcast system.' + json = "\"This\\\\tis a test of the emergency broadcast system.\"" + assert_equal json, generate(data) + # + data = 'This is\ta test of the emergency broadcast system.' + json = "\"This is\\\\ta test of the emergency broadcast system.\"" + assert_equal json, generate(data) + # + data = 'This is a test of the emergency broadcast\tsystem.' + json = "\"This is a test of the emergency broadcast\\\\tsystem.\"" + assert_equal json, generate(data) + # + data = 'This is a test of the emergency broadcast\tsystem.\n' + json = "\"This is a test of the emergency broadcast\\\\tsystem.\\\\n\"" + assert_equal json, generate(data) end def test_string_subclass