diff --git a/RunTest b/RunTest index fab728b2a..b6e1ff92f 100755 --- a/RunTest +++ b/RunTest @@ -307,8 +307,8 @@ while [ $# -gt 0 ] ; do malloc|-malloc) malloc=yes;; nojit|-nojit) nojit=yes;; sim|-sim) shift; sim=$1;; - valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all-non-file --error-exitcode=70";; - valgrind-log|-valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all-non-file --log-file=report.%p ";; + valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --leak-check=yes --errors-for-leak-kinds=all --smc-check=all-non-file --error-exitcode=70";; + valgrind-log|-valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=yes --errors-for-leak-kinds=all --error-limit=no --smc-check=all-non-file --log-file=report.%p ";; ~*) if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then skip="$skip `expr "$1" : '~\([0-9]*\)*$'`" diff --git a/src/pcre2.h.generic b/src/pcre2.h.generic index 761f72175..29fa47729 100644 --- a/src/pcre2.h.generic +++ b/src/pcre2.h.generic @@ -435,6 +435,10 @@ released, the numbers must not be changed. */ #define PCRE2_ERROR_JIT_UNSUPPORTED (-68) #define PCRE2_ERROR_REPLACECASE (-69) #define PCRE2_ERROR_TOOLARGEREPLACE (-70) +#define PCRE2_ERROR_DIFFERENT_SUBJECT (-71) +#define PCRE2_ERROR_DIFFERENT_LENGTH (-72) +#define PCRE2_ERROR_DIFFERENT_OFFSET (-73) +#define PCRE2_ERROR_BADUTFCAPTURE (-74) /* Request types for pcre2_pattern_info() */ diff --git a/src/pcre2.h.in b/src/pcre2.h.in index 93d44a636..42f75f04c 100644 --- a/src/pcre2.h.in +++ b/src/pcre2.h.in @@ -435,6 +435,10 @@ released, the numbers must not be changed. */ #define PCRE2_ERROR_JIT_UNSUPPORTED (-68) #define PCRE2_ERROR_REPLACECASE (-69) #define PCRE2_ERROR_TOOLARGEREPLACE (-70) +#define PCRE2_ERROR_DIFFERENT_SUBJECT (-71) +#define PCRE2_ERROR_DIFFERENT_LENGTH (-72) +#define PCRE2_ERROR_DIFFERENT_OFFSET (-73) +#define PCRE2_ERROR_BADUTFCAPTURE (-74) /* Request types for pcre2_pattern_info() */ diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c index c74d07dc0..a947945b3 100644 --- a/src/pcre2_dfa_match.c +++ b/src/pcre2_dfa_match.c @@ -615,8 +615,7 @@ if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT) { if (current_subject <= start_subject) break; current_subject--; - ACROSSCHAR(current_subject > start_subject, current_subject, - current_subject--); + BACKCHARTEST(current_subject, start_subject); } } else @@ -3386,46 +3385,61 @@ rws->next = NULL; rws->size = RWS_BASE_SIZE; rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE; -/* Recognize NULL, length 0 as an empty string. */ - -if (subject == NULL && length == 0) subject = null_str; - -/* Plausibility checks */ +if (match_data == NULL) return PCRE2_ERROR_NULL; -if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; -if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL) - return PCRE2_ERROR_NULL; +/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT, +free the memory that was obtained. */ +if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0) + { + match_data->memctl.free((void *)match_data->subject, + match_data->memctl.memory_data); + match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT; + } +/* store data needed by pcre2_substitute */ +match_data->subject = match_data->original_subject = subject; if (length == PCRE2_ZERO_TERMINATED) { length = PRIV(strlen)(subject); was_zero_terminated = 1; } +match_data->subject_length = length; +match_data->start_offset = start_offset; + -if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE; -if (start_offset > length) return PCRE2_ERROR_BADOFFSET; +/* Recognize NULL, length 0 as an empty string. */ + +if (subject == NULL && length == 0) subject = null_str; + +/* Plausibility checks */ + +if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return match_data->rc = PCRE2_ERROR_BADOPTION; +if (re == NULL || subject == NULL || workspace == NULL) return match_data->rc = PCRE2_ERROR_NULL; + +if (wscount < 20) return match_data->rc = PCRE2_ERROR_DFA_WSSIZE; +if (start_offset > length) return match_data->rc = PCRE2_ERROR_BADOFFSET; /* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same time. */ if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 && ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0) - return PCRE2_ERROR_BADOPTION; + return match_data->rc = PCRE2_ERROR_BADOPTION; /* Invalid UTF support is not available for DFA matching. */ if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0) - return PCRE2_ERROR_DFA_UINVALID_UTF; + return match_data->rc = PCRE2_ERROR_DFA_UINVALID_UTF; /* Check that the first field in the block is the magic number. If it is not, return with PCRE2_ERROR_BADMAGIC. */ -if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; +if (re->magic_number != MAGIC_NUMBER) return match_data->rc = PCRE2_ERROR_BADMAGIC; /* Check the code unit width. */ if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8) - return PCRE2_ERROR_BADMODE; + return match_data->rc = PCRE2_ERROR_BADMODE; /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the options variable for this function. Users of PCRE2 who are not calling the @@ -3452,7 +3466,7 @@ if ((options & PCRE2_DFA_RESTART) != 0) { if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 || workspace[1] > (int)((wscount - 2)/INTS_PER_STATEBLOCK)) - return PCRE2_ERROR_DFA_BADRESTART; + return match_data->rc = PCRE2_ERROR_DFA_BADRESTART; } /* Set some local values */ @@ -3500,7 +3514,7 @@ else if (mcontext->offset_limit != PCRE2_UNSET) { if ((re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0) - return PCRE2_ERROR_BADOFFSETLIMIT; + return match_data->rc = PCRE2_ERROR_BADOFFSETLIMIT; bumpalong_limit = subject + mcontext->offset_limit; } mb->callout = mcontext->callout; @@ -3569,7 +3583,7 @@ switch(re->newline_convention) default: PCRE2_DEBUG_UNREACHABLE(); - return PCRE2_ERROR_INTERNAL; + return match_data->rc = PCRE2_ERROR_INTERNAL; } /* Check a UTF string for validity if required. For 8-bit and 16-bit strings, @@ -3590,7 +3604,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0) #if PCRE2_CODE_UNIT_WIDTH != 32 unsigned int i; if (start_match < end_subject && NOT_FIRSTCU(*start_match)) - return PCRE2_ERROR_BADUTFOFFSET; + return match_data->rc = PCRE2_ERROR_BADUTFOFFSET; for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--) { check_subject--; @@ -3667,20 +3681,9 @@ if ((re->flags & PCRE2_LASTSET) != 0) } } -/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT, -free the memory that was obtained. */ - -if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0) - { - match_data->memctl.free((void *)match_data->subject, - match_data->memctl.memory_data); - match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT; - } - /* Fill in fields that are always returned in the match data. */ match_data->code = re; -match_data->subject = NULL; /* Default for no match */ match_data->mark = NULL; match_data->matchedby = PCRE2_MATCHEDBY_DFA_INTERPRETER; @@ -3718,7 +3721,7 @@ for (;;) while (t < end_subject && !IS_NEWLINE(t)) { t++; - ACROSSCHAR(t < end_subject, t, t++); + FORWARDCHARTEST(t, end_subject); } } else @@ -3863,7 +3866,7 @@ for (;;) while (start_match < end_subject && !WAS_NEWLINE(start_match)) { start_match++; - ACROSSCHAR(start_match < end_subject, start_match, start_match++); + FORWARDCHARTEST(start_match, end_subject); } } else @@ -4039,8 +4042,6 @@ for (;;) match_data->ovector[0] = (PCRE2_SIZE)(start_match - subject); match_data->ovector[1] = (PCRE2_SIZE)(end_subject - subject); } - match_data->subject_length = length; - match_data->start_offset = start_offset; match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject); match_data->rightchar = (PCRE2_SIZE)(mb->last_used_ptr - subject); match_data->startchar = (PCRE2_SIZE)(start_match - subject); @@ -4051,13 +4052,13 @@ for (;;) length = CU2BYTES(length + was_zero_terminated); match_data->subject = match_data->memctl.malloc(length, match_data->memctl.memory_data); - if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY; + if (match_data->subject == NULL) return match_data->rc = PCRE2_ERROR_NOMEMORY; memcpy((void *)match_data->subject, subject, length); match_data->flags |= PCRE2_MD_COPIED_SUBJECT; } else { - if (rc >= 0 || rc == PCRE2_ERROR_PARTIAL) match_data->subject = subject; + if (rc >= 0 || rc == PCRE2_ERROR_PARTIAL) match_data->subject = subject == null_str ? NULL : subject; } goto EXIT; } @@ -4070,7 +4071,7 @@ for (;;) #ifdef SUPPORT_UNICODE if (utf) { - ACROSSCHAR(start_match < end_subject, start_match, start_match++); + FORWARDCHARTEST(start_match, end_subject); } #endif if (start_match > end_subject) break; @@ -4101,7 +4102,7 @@ while (rws->next != NULL) mb->memctl.free(next, mb->memctl.memory_data); } -return rc; +return match_data->rc = rc; } /* These #undefs are here to enable unity builds with CMake. */ diff --git a/src/pcre2_error.c b/src/pcre2_error.c index 23f80cf00..e281c4bd5 100644 --- a/src/pcre2_error.c +++ b/src/pcre2_error.c @@ -299,6 +299,10 @@ static const unsigned char match_error_texts[] = "error performing replacement case transformation\0" /* 70 */ "replacement too large (longer than PCRE2_SIZE)\0" + "substitute subject differs from prior pcre2_match call\0" + "substitute subject length differs from prior pcre2_match call\0" + "substitute start offset differs from prior pcre2_match call\0" + "capture group is not a valid UTF string\0" ; diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h index 02763c4f0..74de526d9 100644 --- a/src/pcre2_intmodedep.h +++ b/src/pcre2_intmodedep.h @@ -51,8 +51,8 @@ pcre2_printint_inc.h file). We undefine them here so that they can be re-defined multiple inclusions. Not all of these are used in pcre2test, but it's easier just to undefine them all. */ -#undef ACROSSCHAR #undef BACKCHAR +#undef BACKCHARTEST #undef BYTES2CU #undef CHMAX_255 #undef CU2BYTES @@ -274,9 +274,9 @@ UTF support is omitted, we don't even define them. */ #define PUTCHAR(c, p) (*p = c, 1) /* #define GETCHARLENTEST(c, eptr, len) */ /* #define BACKCHAR(eptr) */ +/* #define BACKCHARTEST(eptr,start) */ /* #define FORWARDCHAR(eptr) */ /* #define FORWARCCHARTEST(eptr,end) */ -/* #define ACROSSCHAR(condition, eptr, action) */ #else /* SUPPORT_UNICODE */ @@ -351,15 +351,12 @@ it is. This is called only in UTF-8 mode - we don't put a test within the macro because almost all calls are already within a block of UTF-8 only code. */ #define BACKCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr-- +#define BACKCHARTEST(eptr,start) while(eptr > start && (*eptr & 0xc0u) == 0x80u) eptr-- /* Same as above, just in the other direction. */ #define FORWARDCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr++ #define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0u) == 0x80u) eptr++ -/* Same as above, but it allows a fully customizable form. */ -#define ACROSSCHAR(condition, eptr, action) \ - while((condition) && ((*eptr) & 0xc0u) == 0x80u) action - /* Deposit a character into memory, returning the number of code units. */ #define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \ @@ -457,15 +454,12 @@ macro because almost all calls are already within a block of UTF-16 only code. */ #define BACKCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr-- +#define BACKCHARTEST(eptr,start) if (eptr > start && (*eptr & 0xfc00u) == 0xdc00u) eptr-- /* Same as above, just in the other direction. */ #define FORWARDCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr++ #define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00u) == 0xdc00u) eptr++ -/* Same as above, but it allows a fully customizable form. */ -#define ACROSSCHAR(condition, eptr, action) \ - if ((condition) && ((*eptr) & 0xfc00u) == 0xdc00u) action - /* Deposit a character into memory, returning the number of code units. */ #define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \ @@ -530,16 +524,13 @@ code. These are all no-ops since all UTF-32 characters fit into one PCRE2_UCHAR. */ #define BACKCHAR(eptr) do { } while (0) +#define BACKCHARTEST(eptr,start) do { } while (0) /* Same as above, just in the other direction. */ #define FORWARDCHAR(eptr) do { } while (0) #define FORWARDCHARTEST(eptr,end) do { } while (0) -/* Same as above, but it allows a fully customizable form. */ - -#define ACROSSCHAR(condition, eptr, action) do { } while (0) - /* Deposit a character into memory, returning the number of code units. */ #define PUTCHAR(c, p) (*p = c, 1) @@ -676,6 +667,7 @@ typedef struct pcre2_real_match_data { pcre2_memctl memctl; /* Memory control fields */ const pcre2_real_code *code; /* The pattern used for the match */ PCRE2_SPTR subject; /* The subject that was matched */ + PCRE2_SPTR original_subject; /* the pointer that was actually passed to pcre2_match */ PCRE2_SPTR mark; /* Pointer to last mark */ struct heapframe *heapframes; /* Backtracking frames heap memory */ PCRE2_SIZE heapframes_size; /* Malloc-ed size */ diff --git a/src/pcre2_jit_match_inc.h b/src/pcre2_jit_match_inc.h index 81cd9ccf9..27276120c 100644 --- a/src/pcre2_jit_match_inc.h +++ b/src/pcre2_jit_match_inc.h @@ -99,9 +99,8 @@ pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length, (void)length; (void)start_offset; (void)options; -(void)match_data; (void)mcontext; -return PCRE2_ERROR_JIT_BADOPTION; +return match_data->rc = PCRE2_ERROR_JIT_BADOPTION; #else /* SUPPORT_JIT */ @@ -118,13 +117,18 @@ jit_arguments arguments; int rc; int index = 0; +/* store data needed by pcre2_substitute */ +match_data->subject = match_data->original_subject = subject; +match_data->subject_length = length; +match_data->start_offset = start_offset; + if ((options & PCRE2_PARTIAL_HARD) != 0) index = 2; else if ((options & PCRE2_PARTIAL_SOFT) != 0) index = 1; if (functions == NULL || functions->executable_funcs[index] == NULL) - return PCRE2_ERROR_JIT_BADOPTION; + return match_data->rc = PCRE2_ERROR_JIT_BADOPTION; /* Sanity checks should be handled by pcre2_match. */ arguments.str = subject + start_offset; @@ -176,9 +180,6 @@ else if (rc > (int)oveccount) rc = 0; match_data->code = re; -match_data->subject = (rc >= 0 || rc == PCRE2_ERROR_PARTIAL)? subject : NULL; -match_data->subject_length = length; -match_data->start_offset = start_offset; match_data->rc = rc; match_data->startchar = arguments.startchar_ptr - subject; match_data->leftchar = 0; diff --git a/src/pcre2_match.c b/src/pcre2_match.c index c550b24d8..523f3b43d 100644 --- a/src/pcre2_match.c +++ b/src/pcre2_match.c @@ -1065,7 +1065,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, } Feptr++; #ifdef SUPPORT_UNICODE - if (utf) ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + if (utf) FORWARDCHARTEST(Feptr, mb->end_subject); #endif Fecode++; break; @@ -3255,7 +3255,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; } Feptr++; - ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + FORWARDCHARTEST(Feptr, mb->end_subject); } break; @@ -3268,7 +3268,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, RRETURN(MATCH_NOMATCH); } Feptr++; - ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + FORWARDCHARTEST(Feptr, mb->end_subject); } break; @@ -3422,7 +3422,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if (cc < 128 && (mb->ctypes[cc] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); Feptr++; - ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + FORWARDCHARTEST(Feptr, mb->end_subject); } break; @@ -3456,7 +3456,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if (cc < 128 && (mb->ctypes[cc] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); Feptr++; - ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + FORWARDCHARTEST(Feptr, mb->end_subject); } break; @@ -4703,7 +4703,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, if (mb->partial > 1) return PCRE2_ERROR_PARTIAL; } Feptr++; - ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + FORWARDCHARTEST(Feptr, mb->end_subject); } break; @@ -4718,7 +4718,7 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, break; } Feptr++; - ACROSSCHAR(Feptr < mb->end_subject, Feptr, Feptr++); + FORWARDCHARTEST(Feptr, mb->end_subject); } } else @@ -6966,6 +6966,7 @@ BOOL jit_checked_utf = FALSE; #endif #endif /* SUPPORT_UNICODE */ +PCRE2_SIZE byte_length; PCRE2_SIZE frame_size; PCRE2_SIZE heapframes_size; @@ -6976,35 +6977,61 @@ pcre2_callout_block cb; match_block actual_match_block; match_block *mb = &actual_match_block; +if (match_data == NULL) return PCRE2_ERROR_NULL; + +/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT, +free the memory that was obtained. Set the field to NULL for no match cases. */ +if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0) + { + match_data->memctl.free((void *)match_data->subject, + match_data->memctl.memory_data); + match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT; + } + +/* store data needed by pcre2_substitute */ +match_data->subject = match_data->original_subject = subject; +if (length == PCRE2_ZERO_TERMINATED) + { + length = PRIV(strlen)(subject); + was_zero_terminated = 1; + } +match_data->subject_length = length; +match_data->start_offset = start_offset; + /* Recognize NULL, length 0 as an empty string. */ if (subject == NULL && length == 0) subject = null_str; /* Plausibility checks */ -if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; -if (code == NULL || subject == NULL || match_data == NULL) - return PCRE2_ERROR_NULL; +if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return match_data->rc = PCRE2_ERROR_BADOPTION; +if (code == NULL || subject == NULL) return match_data->rc = PCRE2_ERROR_NULL; + +/* save a copy of the subject, and use it for all future operations */ +if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0) + { + byte_length = CU2BYTES(length + was_zero_terminated); + match_data->subject = match_data->memctl.malloc(byte_length, + match_data->memctl.memory_data); + if (match_data->subject == NULL) return match_data->rc = PCRE2_ERROR_NOMEMORY; + subject = memcpy((void *)match_data->subject, subject, byte_length); + match_data->flags |= PCRE2_MD_COPIED_SUBJECT; + } start_match = subject + start_offset; req_cu_ptr = start_match - 1; -if (length == PCRE2_ZERO_TERMINATED) - { - length = PRIV(strlen)(subject); - was_zero_terminated = 1; - } true_end_subject = end_subject = subject + length; -if (start_offset > length) return PCRE2_ERROR_BADOFFSET; +if (start_offset > length) return match_data->rc = PCRE2_ERROR_BADOFFSET; /* Check that the first field in the block is the magic number. */ -if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC; +if (re->magic_number != MAGIC_NUMBER) return match_data->rc = PCRE2_ERROR_BADMAGIC; /* Check the code unit width. */ if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8) - return PCRE2_ERROR_BADMODE; + return match_data->rc = PCRE2_ERROR_BADMODE; /* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the options variable for this function. Users of PCRE2 who are not calling the @@ -7051,25 +7078,14 @@ time. */ if (mb->partial != 0 && ((re->overall_options | options) & PCRE2_ENDANCHORED) != 0) - return PCRE2_ERROR_BADOPTION; + return match_data->rc = PCRE2_ERROR_BADOPTION; /* It is an error to set an offset limit without setting the flag at compile time. */ if (mcontext != NULL && mcontext->offset_limit != PCRE2_UNSET && (re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0) - return PCRE2_ERROR_BADOFFSETLIMIT; - -/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT, -free the memory that was obtained. Set the field to NULL for no match cases. */ - -if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0) - { - match_data->memctl.free((void *)match_data->subject, - match_data->memctl.memory_data); - match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT; - } -match_data->subject = NULL; + return match_data->rc = PCRE2_ERROR_BADOFFSETLIMIT; /* Zero the error offset in case the first code unit is invalid UTF. */ @@ -7098,11 +7114,11 @@ if (use_jit) #if PCRE2_CODE_UNIT_WIDTH != 32 if (start_match < end_subject && NOT_FIRSTCU(*start_match)) { - if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET; + if (start_offset > 0) return match_data->rc = PCRE2_ERROR_BADUTFOFFSET; #if PCRE2_CODE_UNIT_WIDTH == 8 - return PCRE2_ERROR_UTF8_ERR20; /* Isolated 0x80 byte */ + return match_data->rc = PCRE2_ERROR_UTF8_ERR20; /* Isolated 0x80 byte */ #else - return PCRE2_ERROR_UTF16_ERR3; /* Isolated low surrogate */ + return match_data->rc = PCRE2_ERROR_UTF16_ERR3; /* Isolated low surrogate */ #endif } #endif /* WIDTH != 32 */ @@ -7151,22 +7167,15 @@ if (use_jit) /* If JIT returns BADOPTION, which means that the selected complete or partial matching mode was not compiled, fall through to the interpreter. */ - rc = pcre2_jit_match(code, subject, length, start_offset, options, + /* pcre2_jit_match will set both match_data->subject and + match_data->original_subject to the value we pass it, so do a little + juggling to undo this */ + subject = match_data->original_subject; + rc = pcre2_jit_match(code, match_data->subject, length, start_offset, options, match_data, mcontext); - if (rc != PCRE2_ERROR_JIT_BADOPTION) - { - match_data->subject_length = length; - if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0) - { - length = CU2BYTES(length + was_zero_terminated); - match_data->subject = match_data->memctl.malloc(length, - match_data->memctl.memory_data); - if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY; - memcpy((void *)match_data->subject, subject, length); - match_data->flags |= PCRE2_MD_COPIED_SUBJECT; - } - return rc; - } + match_data->original_subject = subject; + subject = match_data->subject; + if (rc != PCRE2_ERROR_JIT_BADOPTION) return match_data->rc = rc; } #endif /* SUPPORT_JIT */ @@ -7221,11 +7230,11 @@ if (utf && } else if (start_match < end_subject && NOT_FIRSTCU(*start_match)) { - if (start_offset > 0) return PCRE2_ERROR_BADUTFOFFSET; + if (start_offset > 0) return match_data->rc = PCRE2_ERROR_BADUTFOFFSET; #if PCRE2_CODE_UNIT_WIDTH == 8 - return PCRE2_ERROR_UTF8_ERR20; /* Isolated 0x80 byte */ + return match_data->rc = PCRE2_ERROR_UTF8_ERR20; /* Isolated 0x80 byte */ #else - return PCRE2_ERROR_UTF16_ERR3; /* Isolated low surrogate */ + return match_data->rc = PCRE2_ERROR_UTF16_ERR3; /* Isolated low surrogate */ #endif } #endif /* WIDTH != 32 */ @@ -7400,7 +7409,7 @@ switch(re->newline_convention) default: PCRE2_DEBUG_UNREACHABLE(); - return PCRE2_ERROR_INTERNAL; + return match_data->rc = PCRE2_ERROR_INTERNAL; } /* The backtracking frames have fixed data at the front, and a PCRE2_SIZE @@ -7442,7 +7451,7 @@ if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE; if (heapframes_size / 1024 > mb->heap_limit) { PCRE2_SIZE max_size = 1024 * mb->heap_limit; - if (max_size < frame_size) return PCRE2_ERROR_HEAPLIMIT; + if (max_size < frame_size) return match_data->rc = PCRE2_ERROR_HEAPLIMIT; heapframes_size = max_size; } @@ -7458,7 +7467,7 @@ if (match_data->heapframes_size < heapframes_size) if (match_data->heapframes == NULL) { match_data->heapframes_size = 0; - return PCRE2_ERROR_NOMEMORY; + return match_data->rc = PCRE2_ERROR_NOMEMORY; } match_data->heapframes_size = heapframes_size; } @@ -7563,7 +7572,7 @@ for(;;) while (t < end_subject && !IS_NEWLINE(t)) { t++; - ACROSSCHAR(t < end_subject, t, t++); + FORWARDCHARTEST(t, end_subject); } } else @@ -7715,7 +7724,7 @@ for(;;) while (start_match < end_subject && !WAS_NEWLINE(start_match)) { start_match++; - ACROSSCHAR(start_match < end_subject, start_match, start_match++); + FORWARDCHARTEST(start_match, end_subject); } } else @@ -7946,8 +7955,7 @@ for(;;) new_start_match = start_match + 1; #ifdef SUPPORT_UNICODE if (utf) - ACROSSCHAR(new_start_match < end_subject, new_start_match, - new_start_match++); + FORWARDCHARTEST(new_start_match, end_subject); #endif break; @@ -8102,23 +8110,10 @@ if (rc == MATCH_MATCH) { match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)? 0 : (int)mb->end_offset_top/2 + 1; - match_data->subject_length = length; - match_data->start_offset = start_offset; match_data->startchar = start_match - subject; match_data->leftchar = mb->start_used_ptr - subject; match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)? mb->last_used_ptr : mb->end_match_ptr) - subject; - if ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0) - { - length = CU2BYTES(length + was_zero_terminated); - match_data->subject = match_data->memctl.malloc(length, - match_data->memctl.memory_data); - if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY; - memcpy((void *)match_data->subject, subject, length); - match_data->flags |= PCRE2_MD_COPIED_SUBJECT; - } - else match_data->subject = subject; - return match_data->rc; } @@ -8139,9 +8134,6 @@ PCRE2_ERROR_PARTIAL. */ else if (match_partial != NULL) { - match_data->subject = subject; - match_data->subject_length = length; - match_data->start_offset = start_offset; match_data->ovector[0] = match_partial - subject; match_data->ovector[1] = end_subject - subject; match_data->startchar = match_partial - subject; diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c index c5400dc54..2a03ef12c 100644 --- a/src/pcre2_substitute.c +++ b/src/pcre2_substitute.c @@ -755,6 +755,7 @@ PCRE2_SIZE extra_needed = 0; PCRE2_SIZE buff_offset, buff_length, lengthleft, fraglength; PCRE2_SIZE *ovector; PCRE2_SIZE ovecsave[2] = { 0, 0 }; +PCRE2_SPTR capture_start, capture_end; pcre2_substitute_callout_block scb; PCRE2_SIZE sub_start_extra_needed; PCRE2_SIZE (*substitute_case_callout)(PCRE2_SPTR, PCRE2_SIZE, PCRE2_UCHAR *, @@ -795,8 +796,49 @@ repend = replacement + rlength; pointer in the match data may be NULL after a no-match. */ use_existing_match = ((options & PCRE2_SUBSTITUTE_MATCHED) != 0); +if (use_existing_match && match_data == NULL) return PCRE2_ERROR_NULL; + replacement_only = ((options & PCRE2_SUBSTITUTE_REPLACEMENT_ONLY) != 0); +if (use_existing_match && match_data->rc < PCRE2_ERROR_NOMATCH) + /* Return early, as the rest of the match_data may not have been initialised */ + return match_data->rc; + +/* If we are using PCRE2_SUBSTITUTE_MATCHED and the preceeding call to pcre2_match +used PCRE2_COPY_MATCHED_SUBJECT, then use the copy that pcre2_match made. */ +if (use_existing_match && ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)) + { + if (subject != NULL && match_data->original_subject != subject) + return PCRE2_ERROR_DIFFERENT_SUBJECT; + + /* For convenience, NULL and PCRE2_ZERO_TERMINATED means to just use the saved + length. Otherwise, we check that the given length is the same.*/ + if (subject == NULL && length == PCRE2_ZERO_TERMINATED) + length = match_data->subject_length; + else if (length == PCRE2_ZERO_TERMINATED && match_data->subject_length != PRIV(strlen)(subject)) + return PCRE2_ERROR_DIFFERENT_LENGTH; + else if (length != PCRE2_ZERO_TERMINATED && match_data->subject_length != length) + return PCRE2_ERROR_DIFFERENT_LENGTH; + + subject = match_data->subject; + } +else + { + if (use_existing_match && match_data->original_subject != subject) + return PCRE2_ERROR_DIFFERENT_SUBJECT; + + /* Find length of zero-terminated subject */ + + if (length == PCRE2_ZERO_TERMINATED) + length = subject? PRIV(strlen)(subject) : 0; + + if (use_existing_match && match_data->subject_length != length) + return PCRE2_ERROR_DIFFERENT_LENGTH; + + if (use_existing_match && match_data->start_offset != start_offset) + return PCRE2_ERROR_DIFFERENT_OFFSET; + } + /* If starting from an existing match, there must be an externally provided match data block. We create an internal match_data block in two cases: (a) an external one is not supplied (and we are not starting from an existing match); @@ -813,7 +855,6 @@ have to be changes below. */ if (match_data == NULL) { pcre2_general_context gcontext; - if (use_existing_match) return PCRE2_ERROR_NULL; gcontext.memctl = (mcontext == NULL)? ((pcre2_real_code *)code)->memctl : ((pcre2_real_match_context *)mcontext)->memctl; @@ -831,6 +872,75 @@ else if (use_existing_match) ((pcre2_real_match_context *)mcontext)->memctl; pairs = (code->top_bracket + 1 < match_data->oveccount)? code->top_bracket + 1 : match_data->oveccount; + + /* Ensure that if the subject has been modified, we won't get any invalid + UTF output (If the subject was copied, we assume the copy was not modified). */ +#ifdef SUPPORT_UNICODE + if (match_data->rc > PCRE2_ERROR_NOMATCH && utf && (match_data->flags & PCRE2_MD_COPIED_SUBJECT) == 0) + { + /* although other UTF errors store the offset in match_data->startchar, + we can't do that as we promised not to modify match_data, so we + store the offset is in blen */ + + /* check that the entire subject is valid */ + if ((options & PCRE2_NO_UTF_CHECK) == 0 + && (code->overall_options & PCRE2_MATCH_INVALID_UTF) == 0 + && (rc = PRIV(valid_utf)(subject, length, blength)) != 0) + goto EXIT; + + /* check each capture group, and return BADUTFCAPTURE if something went wrong */ + for (int i = 0; i < pairs; i++) + { + capture_start = subject + match_data->ovector[2*i]; + capture_end = subject + match_data->ovector[2*i + 1]; + + /* \K can be used to get capture_end < capture_start, but we'll return a + PCRE2_ERROR_BADSUBSPATTERN later on anway */ + if (capture_start == subject + PCRE2_UNSET || capture_end < capture_start) continue; + + /* If PCRE2_MATCH_INVALID_UTF was used, the subject itself can contain + invalid UTF, but matches and capture groups are not allowed to */ + if ((options & PCRE2_NO_UTF_CHECK) == 0 + && (code->overall_options & PCRE2_MATCH_INVALID_UTF) != 0) + { + + /* If \C was used, the capture group may start or end inside a unicode + character, so we first enlarge the capture group bounds to include + entire characters */ + if ((code->flags & PCRE2_HASBKC) != 0) + { + if (capture_start < subject + length) BACKCHARTEST(capture_start, subject); + FORWARDCHARTEST(capture_end, subject + length); + } + + if (PRIV(valid_utf)(capture_start, capture_end - capture_start, blength) != 0) + goto BADUTFCAPTURE; + } + + /* otherwise, unless \C was used, we need to check that the start and + end of the capture isn't in the middle of a single unicode character + (this needs to be checked even if the capture is empty)*/ + else if ((code->flags & PCRE2_HASBKC) == 0) + { + /* capture_start < subject + length can only be false if the capture is + empty, and is at the end of the subject */ + if (capture_start < subject + length && NOT_FIRSTCU(*capture_start)) + { + *blength = capture_start - subject; + goto BADUTFCAPTURE; + } + /* capture_end > subject can only be false if the capture is empty + and is at the begining of the subject */ + if (capture_end > subject && HAS_EXTRALEN(*(capture_end - 1))) + { + *blength = capture_end - subject; + goto BADUTFCAPTURE; + } + } + } + } +#endif /* SUPPORT_UNICODE */ + internal_match_data = pcre2_match_data_create(match_data->oveccount, &gcontext); if (internal_match_data == NULL) return PCRE2_ERROR_NOMEMORY; @@ -838,6 +948,10 @@ else if (use_existing_match) + 2*pairs*sizeof(PCRE2_SIZE)); internal_match_data->heapframes = NULL; internal_match_data->heapframes_size = 0; + /* Ensure that the subject is not freed when internal_match_data is */ + internal_match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT; + /* ensure that pcre2_match doesn't make an unnecessary copy of the subject */ + options &= ~PCRE2_COPY_MATCHED_SUBJECT; match_data = internal_match_data; } @@ -853,19 +967,6 @@ scb.input = subject; scb.output = (PCRE2_SPTR)buffer; scb.ovector = ovector; -/* A NULL subject of zero length is treated as an empty string. */ - -if (subject == NULL) - { - if (length != 0) return PCRE2_ERROR_NULL; - subject = null_str; - } - -/* Find length of zero-terminated subject */ - -if (length == PCRE2_ZERO_TERMINATED) - length = subject? PRIV(strlen)(subject) : 0; - /* Check UTF replacement string if necessary. */ #ifdef SUPPORT_UNICODE @@ -1652,6 +1753,10 @@ goto EXIT; rc = PCRE2_ERROR_TOOLARGEREPLACE; goto EXIT; +BADUTFCAPTURE: +rc = PCRE2_ERROR_BADUTFCAPTURE; +goto EXIT; + BAD: rc = PCRE2_ERROR_BADREPLACEMENT; goto PTREXIT; diff --git a/src/pcre2_substring.c b/src/pcre2_substring.c index 4e9743ae8..9419dc769 100644 --- a/src/pcre2_substring.c +++ b/src/pcre2_substring.c @@ -122,7 +122,7 @@ PCRE2_SIZE size; rc = pcre2_substring_length_bynumber(match_data, stringnumber, &size); if (rc < 0) return rc; if (size + 1 > *sizeptr) return PCRE2_ERROR_NOMEMORY; -memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2], +if (size != 0) memcpy(buffer, match_data->subject + match_data->ovector[stringnumber*2], CU2BYTES(size)); buffer[size] = 0; *sizeptr = size; @@ -214,7 +214,7 @@ yield = PRIV(memctl_malloc)(sizeof(pcre2_memctl) + (size + 1)*PCRE2_CODE_UNIT_WIDTH, (pcre2_memctl *)match_data); if (yield == NULL) return PCRE2_ERROR_NOMEMORY; yield = (PCRE2_UCHAR *)(((char *)yield) + sizeof(pcre2_memctl)); -memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2], +if (size != 0) memcpy(yield, match_data->subject + match_data->ovector[stringnumber*2], CU2BYTES(size)); yield[size] = 0; *stringptr = yield; diff --git a/src/pcre2test.c b/src/pcre2test.c index 1afcbaf68..fc4cae22f 100644 --- a/src/pcre2test.c +++ b/src/pcre2test.c @@ -112,7 +112,6 @@ required for different environments. */ #define INTERACTIVE(f) isatty(fileno(f)) - /* ---------------------- System-specific definitions ---------------------- */ /* A number of things vary for Windows builds. Originally, pcretest opened its @@ -221,6 +220,9 @@ claim to be C99 don't support it (hence DISABLE_PERCENT_ZT). */ #define PARENS_NEST_DEFAULT 220 /* Default parentheses nest limit */ #define PATSTACKSIZE 20 /* Pattern stack for save/restore testing */ #define REPLACE_MODSIZE 100 /* Field for reading 8-bit replacement */ +#define SUBSTITUTE_SUBJECT_MODSIZE 100 /* Field for reading 8-bit subject for substitute */ +#define SUBSTITUTE_OPTIONS_MODSIZE 73 /* Field for overriding match options when calling substitute*/ +/* (its just big enough to store every such option, seperated by '|'s and terminated by '\0') */ #define VERSION_SIZE 64 /* Size of buffer for the version strings */ /* Default JIT compile options */ @@ -230,8 +232,11 @@ claim to be C99 don't support it (hence DISABLE_PERCENT_ZT). */ PCRE2_JIT_PARTIAL_HARD) /* Make sure the buffer into which replacement strings are copied is big enough -to hold them as 32-bit code units. */ +to hold them as 32-bit code units. The same buffer size is also used +for the substitute subject (if different from the match one), and the result +of a substitution. */ +/* must not be greater than initial dbuffer_size */ #define REPLACE_BUFFSIZE 1024 /* This is a byte value */ /* Execution modes */ @@ -582,6 +587,9 @@ so many of them that they are split into two fields. */ #define CTL2_NULL_REPLACEMENT 0x00004000u #define CTL2_FRAMESIZE 0x00008000u #define CTL2_SUBSTITUTE_CASE_CALLOUT 0x00010000u +#define CTL2_NULL_SUBSTITUTE_SUBJECT 0x00020000u +#define CTL2_SUBSTITUTE_OVERWRITE 0x00040000u +#define CTL2_SUBSTITUTE_ZERO_TERMINATE 0x00080000u #define CTL2_HEAPFRAMES_SIZE 0x20000000u /* Informational */ #define CTL2_NL_SET 0x40000000u /* Informational */ @@ -627,6 +635,7 @@ typedef struct patctl { /* Structure for pattern modifiers. */ uint8_t replacement[REPLACE_MODSIZE]; /* So must this */ uint32_t substitute_skip; /* Must be in same position as datctl */ uint32_t substitute_stop; /* Must be in same position as datctl */ + uint8_t substitute_options[SUBSTITUTE_OPTIONS_MODSIZE]; /* and this */ uint32_t jit; uint32_t stackguard_test; uint32_t tables_id; @@ -649,6 +658,8 @@ typedef struct datctl { /* Structure for data line modifiers. */ uint8_t replacement[REPLACE_MODSIZE]; /* So must this */ uint32_t substitute_skip; /* Must be in same position as patctl */ uint32_t substitute_stop; /* Must be in same position as patctl */ + uint8_t substitute_options[SUBSTITUTE_OPTIONS_MODSIZE]; /* and this */ + uint8_t substitute_subject[SUBSTITUTE_SUBJECT_MODSIZE]; uint32_t startend[2]; uint32_t cerror[2]; uint32_t cfail[2]; @@ -657,6 +668,7 @@ typedef struct datctl { /* Structure for data line modifiers. */ int32_t get_numbers[MAXCPYGET]; uint32_t oveccount; PCRE2_SIZE offset; + PCRE2_SIZE substitute_offset; uint8_t copy_names[LENCPYGET]; uint8_t get_names[LENCPYGET]; } datctl; @@ -800,6 +812,7 @@ static modstruct modlist[] = { { "null_pattern", MOD_PAT, MOD_CTL, CTL2_NULL_PATTERN, PO(control2) }, { "null_replacement", MOD_DAT, MOD_CTL, CTL2_NULL_REPLACEMENT, DO(control2) }, { "null_subject", MOD_DAT, MOD_CTL, CTL2_NULL_SUBJECT, DO(control2) }, + { "null_substitute_subject", MOD_DAT, MOD_CTL, CTL2_NULL_SUBSTITUTE_SUBJECT, DO(control2) }, { "offset", MOD_DAT, MOD_SIZ, 0, DO(offset) }, { "offset_limit", MOD_CTM, MOD_SIZ, 0, MO(offset_limit)}, { "optimization_full", MOD_CTC, MOD_OPTMZ, PCRE2_OPTIMIZATION_FULL, 0 }, @@ -831,12 +844,17 @@ static modstruct modlist[] = { { "substitute_extended", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_EXTENDED, PO(control2) }, { "substitute_literal", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_LITERAL, PO(control2) }, { "substitute_matched", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_MATCHED, PO(control2) }, + { "substitute_offset", MOD_DAT, MOD_SIZ, 0, DO(substitute_offset) }, + { "substitute_options", MOD_PND, MOD_STR, SUBSTITUTE_OPTIONS_MODSIZE, PD(substitute_options) }, { "substitute_overflow_length", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_OVERFLOW_LENGTH, PO(control2) }, + { "substitute_overwrite", MOD_DAT, MOD_CTL, CTL2_SUBSTITUTE_OVERWRITE, DO(control2) }, { "substitute_replacement_only", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_REPLACEMENT_ONLY, PO(control2) }, { "substitute_skip", MOD_PND, MOD_INT, 0, PO(substitute_skip) }, { "substitute_stop", MOD_PND, MOD_INT, 0, PO(substitute_stop) }, + { "substitute_subject", MOD_DAT, MOD_STR, SUBSTITUTE_SUBJECT_MODSIZE, DO(substitute_subject) }, { "substitute_unknown_unset", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNKNOWN_UNSET, PO(control2) }, { "substitute_unset_empty", MOD_PND, MOD_CTL, CTL2_SUBSTITUTE_UNSET_EMPTY, PO(control2) }, + { "substitute_zero_terminate", MOD_DAT, MOD_CTL, CTL2_SUBSTITUTE_ZERO_TERMINATE, DO(control2) }, { "tables", MOD_PAT, MOD_INT, 0, PO(tables_id) }, { "turkish_casing", MOD_CTC, MOD_OPT, PCRE2_EXTRA_TURKISH_CASING, CO(extra_options) }, { "ucp", MOD_PATP, MOD_OPT, PCRE2_UCP, PO(options) }, @@ -1095,6 +1113,7 @@ static uint8_t *buffer = NULL; /* The dbuffer is where all processed data lines are put. In non-8-bit modes it is cast as needed. For long data lines it grows as necessary. */ +/* this initial value must be at least REPLACE_BUFFSIZE*/ static size_t dbuffer_size = 1u << 14; /* Initial size, bytes */ static uint8_t *dbuffer = NULL; @@ -1137,6 +1156,68 @@ static PCRE2_SIZE pbuffer32_size = 0; /* Set only when needed */ static uint32_t *pbuffer32 = NULL; #endif +/* ---------------------- Colour definitions ---------------------- */ + +/* Input text that was a comment, when echoing back to the terminal */ +static int const clr_comment = 37; /* grey */ +/* Other input text that is echoed back to the terminal */ +static int const clr_input = 39; /* default foreground colour */ +/* Colour of output that represents a pcre2api error */ +static int const clr_api_error = 35; /* magenta */ +/* Colour of error messages for the test script itself +(i.e. pcr2test error, not a pcre2api error) */ +static int const clr_test_error = 31; /* red */ +/* Colour for profiling information, which doesn't have a "right" answer */ +static int const clr_profiling = 34; /* blue */ +/* Colour of normal output */ +static int const clr_output = 32; /* green */ +/* Colour for anything not printed with an explicit colour +(such as a valgrind errors) */ +static int const clr_unexpected = 33; /* yellow */ + +static BOOL colour_on; + +/* start a block of colour (but only if colour_on) */ +static void +colour_begin(int clr, FILE* f) +{ +if(f != NULL && colour_on) fprintf(f, "\x1b[%dm", clr); +} + +/* end a block of colour (but only if colour_on) */ +static void +colour_end(FILE* f) +{ +colour_begin(clr_unexpected, f); +} + +/* wraps a string ltieral in blue */ +#define PROMPT(literal) (colour_on ? "\x1b[34m" literal "\x1b[m" : literal) + + +/* this is the body of a variadic function that does a fprintf to the given file +wrapped in the given colour, rerturning the result of the inner fprintf. */ +#define COLOUR_PRINTF_BODY(colour, file, fmt) \ + { \ + int ret; \ + colour_begin(colour, file); \ + va_list args; \ + va_start(args, fmt); \ + vfprintf(file, fmt, args); \ + va_end(args); \ + colour_end(file); \ + return ret; \ + } + +/* cprintf is like printf, but it takes a colour and writes to outfile */ +static int +cprintf(int colour, const char* fmt, ...) COLOUR_PRINTF_BODY(colour, outfile, fmt) + +/* fatal_printf is like printf, but for printing fatal errors to stdout*/ +static int +fatal_printf(const char* fmt, ...) COLOUR_PRINTF_BODY(clr_test_error, stderr, fmt) + +#undef COLOUR_PRINTF_BODY /* ---------------- Macros that work in all modes ----------------- */ @@ -1203,29 +1284,29 @@ are supported. */ memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)); \ else memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32)) -#define PCHARS(lv, p, offset, len, utf, f) \ +#define PCHARS(c, lv, p, offset, len, utf, f) \ if (test_mode == PCRE32_MODE) \ - lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \ + lv = pchars32(c, (PCRE2_SPTR32)(p)+offset, len, utf, f); \ else if (test_mode == PCRE16_MODE) \ - lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \ + lv = pchars16(c, (PCRE2_SPTR16)(p)+offset, len, utf, f); \ else \ - lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f) + lv = pchars8(c, (PCRE2_SPTR8)(p)+offset, len, utf, f) -#define PCHARSV(p, offset, len, utf, f) \ +#define PCHARSV(c, p, offset, len, utf, f) \ if (test_mode == PCRE32_MODE) \ - (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f); \ + (void)pchars32(c, (PCRE2_SPTR32)(p)+offset, len, utf, f); \ else if (test_mode == PCRE16_MODE) \ - (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f); \ + (void)pchars16(c, (PCRE2_SPTR16)(p)+offset, len, utf, f); \ else \ - (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f) + (void)pchars8(c, (PCRE2_SPTR8)(p)+offset, len, utf, f) -#define PTRUNCV(p, p_len, offset, left, utf, f) \ +#define PTRUNCV(c, p, p_len, offset, left, utf, f) \ if (test_mode == PCRE32_MODE) \ - ptrunc32((PCRE2_SPTR32)(p), p_len, offset, left, utf, f); \ + ptrunc32(c, (PCRE2_SPTR32)(p), p_len, offset, left, utf, f); \ else if (test_mode == PCRE16_MODE) \ - ptrunc16((PCRE2_SPTR16)(p), p_len, offset, left, utf, f); \ + ptrunc16(c, (PCRE2_SPTR16)(p), p_len, offset, left, utf, f); \ else \ - ptrunc8((PCRE2_SPTR8)(p), p_len, offset, left, utf, f) + ptrunc8(c, (PCRE2_SPTR8)(p), p_len, offset, left, utf, f) #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ if (test_mode == PCRE8_MODE) \ @@ -1423,13 +1504,13 @@ are supported. */ else \ a = pcre2_pattern_info_32(G(b,32),c,d) -#define PCRE2_PRINTINT(a) \ +#define PCRE2_PRINTINT(c, a) \ if (test_mode == PCRE8_MODE) \ - pcre2_printint_8(compiled_code8,outfile,a); \ + pcre2_printint_clr_8(c,compiled_code8,outfile,a); \ else if (test_mode == PCRE16_MODE) \ - pcre2_printint_16(compiled_code16,outfile,a); \ + pcre2_printint_clr_16(c,compiled_code16,outfile,a); \ else \ - pcre2_printint_32(compiled_code32,outfile,a) + pcre2_printint_clr_32(c,compiled_code32,outfile,a) #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ if (test_mode == PCRE8_MODE) \ @@ -1802,23 +1883,23 @@ the three different cases. */ else \ memcpy(G(a,BITTWO),G(b,BITTWO),sizeof(G(pcre2_compile_context_,BITTWO))) -#define PCHARS(lv, p, offset, len, utf, f) \ +#define PCHARS(c, lv, p, offset, len, utf, f) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ - lv = G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \ + lv = G(pchars,BITONE)(c, (G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \ else \ - lv = G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f) + lv = G(pchars,BITTWO)(c, (G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f) -#define PCHARSV(p, offset, len, utf, f) \ +#define PCHARSV(c, p, offset, len, utf, f) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ - (void)G(pchars,BITONE)((G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \ + (void)G(pchars,BITONE)(c, (G(PCRE2_SPTR,BITONE))(p)+offset, len, utf, f); \ else \ - (void)G(pchars,BITTWO)((G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f) + (void)G(pchars,BITTWO)(c, (G(PCRE2_SPTR,BITTWO))(p)+offset, len, utf, f) -#define PTRUNCV(p, p_len, offset, left, utf, f) \ +#define PTRUNCV(c, p, p_len, offset, left, utf, f) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ - G(ptrunc,BITONE)((G(PCRE2_SPTR,BITONE))(p), p_len, offset, left, utf, f); \ + G(ptrunc,BITONE)(c, (G(PCRE2_SPTR,BITONE))(p), p_len, offset, left, utf, f); \ else \ - G(ptrunc,BITTWO)((G(PCRE2_SPTR,BITTWO))(p), p_len, offset, left, utf, f) + G(ptrunc,BITTWO)(c, (G(PCRE2_SPTR,BITTWO))(p), p_len, offset, left, utf, f) #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ @@ -1986,11 +2067,11 @@ the three different cases. */ else \ a = G(pcre2_pattern_info_,BITTWO)(G(b,BITTWO),c,d) -#define PCRE2_PRINTINT(a) \ +#define PCRE2_PRINTINT(c,a) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ - G(pcre2_printint_,BITONE)(G(compiled_code,BITONE),outfile,a); \ + G(pcre2_printint_clr_,BITONE)(c,G(compiled_code,BITONE),outfile,a); \ else \ - G(pcre2_printint_,BITTWO)(G(compiled_code,BITTWO),outfile,a) + G(pcre2_printint_clr_,BITTWO)(c,G(compiled_code,BITTWO),outfile,a) #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ if (test_mode == G(G(PCRE,BITONE),_MODE)) \ @@ -2254,12 +2335,12 @@ the three different cases. */ #define DATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_match_context_8)) #define FLD(a,b) G(a,8)->b #define PATCTXCPY(a,b) memcpy(G(a,8),G(b,8),sizeof(pcre2_compile_context_8)) -#define PCHARS(lv, p, offset, len, utf, f) \ - lv = pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f) -#define PCHARSV(p, offset, len, utf, f) \ - (void)pchars8((PCRE2_SPTR8)(p)+offset, len, utf, f) -#define PTRUNCV(p, p_len, offset, left, utf, f) \ - ptrunc8((PCRE2_SPTR8)(p), p_len, offset, left, utf, f) +#define PCHARS(c, lv, p, offset, len, utf, f) \ + lv = pchars8(c, (PCRE2_SPTR8)(p)+offset, len, utf, f) +#define PCHARSV(c, p, offset, len, utf, f) \ + (void)pchars8(c, (PCRE2_SPTR8)(p)+offset, len, utf, f) +#define PTRUNCV(c, p, p_len, offset, left, utf, f) \ + ptrunc8(c, (PCRE2_SPTR8)(p), p_len, offset, left, utf, f) #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ a = pcre2_callout_enumerate_8(compiled_code8, \ (int (*)(struct pcre2_callout_enumerate_block_8 *, void *))b,c) @@ -2297,7 +2378,7 @@ the three different cases. */ #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_8(G(a,8)) #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_8(G(b,8),c,d,(PCRE2_UCHAR8 **)e,f,G(g,8)) #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_8(G(b,8),c,d) -#define PCRE2_PRINTINT(a) pcre2_printint_8(compiled_code8,outfile,a) +#define PCRE2_PRINTINT(c,a) pcre2_printint_clr_8(c,compiled_code8,outfile,a) #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ r = pcre2_serialize_decode_8((pcre2_code_8 **)a,b,c,G(d,8)) #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ @@ -2371,12 +2452,12 @@ the three different cases. */ #define DATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_match_context_16)) #define FLD(a,b) G(a,16)->b #define PATCTXCPY(a,b) memcpy(G(a,16),G(b,16),sizeof(pcre2_compile_context_16)) -#define PCHARS(lv, p, offset, len, utf, f) \ - lv = pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f) -#define PCHARSV(p, offset, len, utf, f) \ - (void)pchars16((PCRE2_SPTR16)(p)+offset, len, utf, f) -#define PTRUNCV(p, p_len, offset, left, utf, f) \ - ptrunc16((PCRE2_SPTR16)(p), p_len, offset, left, utf, f) +#define PCHARS(c, lv, p, offset, len, utf, f) \ + lv = pchars16(c, (PCRE2_SPTR16)(p)+offset, len, utf, f) +#define PCHARSV(c, p, offset, len, utf, f) \ + (void)pchars16(c, (PCRE2_SPTR16)(p)+offset, len, utf, f) +#define PTRUNCV(c, p, p_len, offset, left, utf, f) \ + ptrunc16(c, (PCRE2_SPTR16)(p), p_len, offset, left, utf, f) #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ a = pcre2_callout_enumerate_16(compiled_code16, \ (int (*)(struct pcre2_callout_enumerate_block_16 *, void *))b,c) @@ -2414,7 +2495,7 @@ the three different cases. */ #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_16(G(a,16)) #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_16(G(b,16),c,d,(PCRE2_UCHAR16 **)e,f,G(g,16)) #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_16(G(b,16),c,d) -#define PCRE2_PRINTINT(a) pcre2_printint_16(compiled_code16,outfile,a) +#define PCRE2_PRINTINT(c,a) pcre2_printint_clr_16(c,compiled_code16,outfile,a) #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ r = pcre2_serialize_decode_16((pcre2_code_16 **)a,b,c,G(d,16)) #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ @@ -2486,12 +2567,12 @@ the three different cases. */ #define DATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_match_context_32)) #define FLD(a,b) G(a,32)->b #define PATCTXCPY(a,b) memcpy(G(a,32),G(b,32),sizeof(pcre2_compile_context_32)) -#define PCHARS(lv, p, offset, len, utf, f) \ - lv = pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f) -#define PCHARSV(p, offset, len, utf, f) \ - (void)pchars32((PCRE2_SPTR32)(p)+offset, len, utf, f) -#define PTRUNCV(p, p_len, offset, left, utf, f) \ - ptrunc32((PCRE2_SPTR32)(p), p_len, offset, left, utf, f) +#define PCHARS(c, lv, p, offset, len, utf, f) \ + lv = pchars32(c, (PCRE2_SPTR32)(p)+offset, len, utf, f) +#define PCHARSV(c, p, offset, len, utf, f) \ + (void)pchars32(c, (PCRE2_SPTR32)(p)+offset, len, utf, f) +#define PTRUNCV(c, p, p_len, offset, left, utf, f) \ + ptrunc32(c, (PCRE2_SPTR32)(p), p_len, offset, left, utf, f) #define PCRE2_CALLOUT_ENUMERATE(a,b,c) \ a = pcre2_callout_enumerate_32(compiled_code32, \ (int (*)(struct pcre2_callout_enumerate_block_32 *, void *))b,c) @@ -2529,7 +2610,7 @@ the three different cases. */ #define PCRE2_MATCH_DATA_FREE(a) pcre2_match_data_free_32(G(a,32)) #define PCRE2_PATTERN_CONVERT(a,b,c,d,e,f,g) a = pcre2_pattern_convert_32(G(b,32),c,d,(PCRE2_UCHAR32 **)e,f,G(g,32)) #define PCRE2_PATTERN_INFO(a,b,c,d) a = pcre2_pattern_info_32(G(b,32),c,d) -#define PCRE2_PRINTINT(a) pcre2_printint_32(compiled_code32,outfile,a) +#define PCRE2_PRINTINT(c,a) pcre2_printint_clr_32(c,compiled_code32,outfile,a) #define PCRE2_SERIALIZE_DECODE(r,a,b,c,d) \ r = pcre2_serialize_decode_32((pcre2_code_32 **)a,b,c,G(d,32)) #define PCRE2_SERIALIZE_ENCODE(r,a,b,c,d,e) \ @@ -3008,13 +3089,13 @@ if (show_memory) { if (block == NULL) { - fprintf(outfile, "** malloc() failed for %" SIZ_FORM "\n", size); + cprintf(clr_test_error, "** malloc() failed for %" SIZ_FORM "\n", size); } else { - fprintf(outfile, "malloc %5" SIZ_FORM, size); + cprintf(clr_profiling, "malloc %5" SIZ_FORM, size); #ifdef DEBUG_SHOW_MALLOC_ADDRESSES - fprintf(outfile, " %p", block); /* Not portable */ + cprintf(clr_profiling, " %p", block); /* Not portable */ #endif if (malloclistptr < MALLOCLISTSIZE) { @@ -3022,7 +3103,7 @@ if (show_memory) malloclistlength[malloclistptr++] = size; } else - fprintf(outfile, " (not remembered)"); + cprintf(clr_test_error, " (not remembered)"); fprintf(outfile, "\n"); } } @@ -3037,12 +3118,12 @@ if (show_memory && block != NULL) uint32_t i, j; BOOL found = FALSE; - fprintf(outfile, "free"); + cprintf(clr_profiling, "free"); for (i = 0; i < malloclistptr; i++) { if (block == malloclist[i]) { - fprintf(outfile, " %5" SIZ_FORM, malloclistlength[i]); + cprintf(clr_profiling, " %5" SIZ_FORM, malloclistlength[i]); malloclistptr--; for (j = i; j < malloclistptr; j++) { @@ -3053,9 +3134,9 @@ if (show_memory && block != NULL) break; } } - if (!found) fprintf(outfile, " unremembered block"); + if (!found) cprintf(clr_profiling, " unremembered block"); #ifdef DEBUG_SHOW_MALLOC_ADDRESSES - fprintf(outfile, " %p", block); /* Not portable */ + cprintf(clr_profiling, " %p", block); /* Not portable */ #endif fprintf(outfile, "\n"); } @@ -3373,7 +3454,16 @@ if (f != NULL) n = fprintf(f, "\\x{%02x}", c); return n >= 0 ? n : 0; } - +/* like pchar but add colour */ +static int +cpchar(int clr, uint32_t c, BOOL utf, FILE *f) +{ +int res; +colour_begin(clr, f); +res = pchar(c, utf, f); +colour_end(f); +return res; +} #ifdef SUPPORT_PCRE2_16 /************************************************* @@ -3414,11 +3504,12 @@ For printing *MARK strings, a negative length is given, indicating that the length is in the first code unit. If handed a NULL file, this function just counts chars without printing (because pchar() does that). */ -static int pchars8(PCRE2_SPTR8 p, ptrdiff_t length, BOOL utf, FILE *f) +static int pchars8(int clr, PCRE2_SPTR8 p, ptrdiff_t length, BOOL utf, FILE *f) { PCRE2_SPTR8 end; uint32_t c = 0; int yield = 0; +colour_begin(clr, f); if (length < 0) length = *p++; end = p + length; while (length-- > 0) @@ -3437,7 +3528,7 @@ while (length-- > 0) c = *p++; yield += pchar(c, utf, f); } - +colour_end(f); return yield; } #endif @@ -3453,11 +3544,12 @@ For printing *MARK strings, a negative length is given, indicating that the length is in the first code unit. If handed a NULL file, just counts chars without printing. */ -static int pchars16(PCRE2_SPTR16 p, ptrdiff_t length, BOOL utf, FILE *f) +static int pchars16(int clr, PCRE2_SPTR16 p, ptrdiff_t length, BOOL utf, FILE *f) { PCRE2_SPTR16 end; uint32_t c = 0; int yield = 0; +colour_begin(clr, f); if (length < 0) length = *p++; end = p + length; while (length-- > 0) @@ -3476,6 +3568,7 @@ while (length-- > 0) c = *p++; yield += pchar(c, utf, f); } +colour_end(f); return yield; } #endif /* SUPPORT_PCRE2_16 */ @@ -3492,9 +3585,10 @@ For printing *MARK strings, a negative length is given, indicating that the length is in the first code unit. If handed a NULL file, just counts chars without printing. */ -static int pchars32(PCRE2_SPTR32 p, ptrdiff_t length, BOOL utf, FILE *f) +static int pchars32(int clr, PCRE2_SPTR32 p, ptrdiff_t length, BOOL utf, FILE *f) { int yield = 0; +colour_begin(clr, f); (void)(utf); /* Avoid compiler warning */ if (length < 0) length = *p++; while (length-- > 0) @@ -3502,6 +3596,7 @@ while (length-- > 0) uint32_t c = *p++; yield += pchar(c, utf, f); } +colour_end(f); return yield; } #endif /* SUPPORT_PCRE2_32 */ @@ -3516,13 +3611,14 @@ return yield; the offset to print from/to. If left is true, prints up to the offset, truncated; otherwise prints from the offset to the right, truncated. */ -static void ptrunc8(PCRE2_SPTR8 p, size_t p_len, size_t offset, BOOL left, +static void ptrunc8(int clr, PCRE2_SPTR8 p, size_t p_len, size_t offset, BOOL left, BOOL utf, FILE *f) { PCRE2_SPTR8 start = p + offset; PCRE2_SPTR8 end = p + offset; size_t printed = 0; +colour_begin(clr, f); (void)(utf); /* Avoid compiler warning */ if (left) @@ -3549,6 +3645,8 @@ else if (left && start > p) fprintf(f, "..."); for (; start < end; start++) fprintf(f, "%c", CHAR_OUTPUT(*start)); if (!left && end < p + p_len) fprintf(f, "..."); + +colour_end(f); } #endif @@ -3562,13 +3660,14 @@ if (!left && end < p + p_len) fprintf(f, "..."); the offset to print from/to. If left is true, prints up to the offset, truncated; otherwise prints from the offset to the right, truncated. */ -static void ptrunc16(PCRE2_SPTR16 p, size_t p_len, size_t offset, BOOL left, +static void ptrunc16(int clr, PCRE2_SPTR16 p, size_t p_len, size_t offset, BOOL left, BOOL utf, FILE *f) { PCRE2_SPTR16 start = p + offset; PCRE2_SPTR16 end = p + offset; size_t printed = 0; +colour_begin(clr, f); if (left) { while (start > p && printed < 10) @@ -3607,6 +3706,8 @@ while (start < end) fputc((int)c, f); } if (!left && end < p + p_len) fprintf(f, "..."); + +colour_end(f); } #endif /* SUPPORT_PCRE2_16 */ @@ -3619,12 +3720,13 @@ if (!left && end < p + p_len) fprintf(f, "..."); /* Must handle UTF-32 strings in utf mode. Passed the total input string, and the offset to print from/to. If left is true, prints up to the offset, truncated; otherwise prints from the offset to the right, truncated. */ -static void ptrunc32(PCRE2_SPTR32 p, size_t p_len, size_t offset, BOOL left, +static void ptrunc32(int clr, PCRE2_SPTR32 p, size_t p_len, size_t offset, BOOL left, BOOL utf, FILE *f) { PCRE2_SPTR32 start = p + offset; PCRE2_SPTR32 end = p + offset; +colour_begin(clr, f); (void)(utf); /* Avoid compiler warning */ if (left) @@ -3650,10 +3752,60 @@ while (start < end) fputc((int)c, f); } if (!left && end < p + p_len) fprintf(f, "..."); +colour_end(f); +} +#endif /* SUPPORT_PCRE2_32 */ + + +#ifdef SUPPORT_PCRE2_8 +/************************************************* +* Print 8-bit compiled pattern (in colour) * +*************************************************/ + +static void +pcre2_printint_clr_8(int clr, pcre2_code_8 *re, FILE *f, BOOL print_lengths) +{ + colour_begin(clr, f); + pcre2_printint_8(re, f, print_lengths); + colour_end(f); +} +#endif /* SUPPORT_PCRE2_8 */ + + +#ifdef SUPPORT_PCRE2_16 +/************************************************* +* Print 16-bit compiled pattern (in colour) * +*************************************************/ + +static void +pcre2_printint_clr_16(int clr, pcre2_code_16 *re, FILE *f, BOOL print_lengths) +{ + colour_begin(clr, f); + pcre2_printint_16(re, f, print_lengths); + colour_end(f); +} +#endif /* SUPPORT_PCRE2_16 */ + + + +#ifdef SUPPORT_PCRE2_32 +/************************************************* +* Print 32-bit compiled pattern (in colour) * +*************************************************/ + +static void +pcre2_printint_clr_32(int clr, pcre2_code_32 *re, FILE *f, BOOL print_lengths) +{ + colour_begin(clr, f); + pcre2_printint_32(re, f, print_lengths); + colour_end(f); } #endif /* SUPPORT_PCRE2_32 */ +//////////////////////////////////////////////////////////////////////////////////// +//// END OF FUNCTION DEFINITION CHANGES ////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////// #ifdef SUPPORT_PCRE2_16 /************************************************* @@ -3704,7 +3856,7 @@ if (pbuffer16_size < 2*len + 2) pbuffer16 = (uint16_t *)malloc(pbuffer16_size); if (pbuffer16 == NULL) { - fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n", + fatal_printf("pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n", pbuffer16_size); exit(1); } @@ -3792,7 +3944,7 @@ if (pbuffer32_size < 4*len + 4) pbuffer32 = (uint32_t *)malloc(pbuffer32_size); if (pbuffer32 == NULL) { - fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n", + fatal_printf("pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n", pbuffer32_size); exit(1); } @@ -3854,7 +4006,7 @@ uint8_t *new_pbuffer8 = (uint8_t *)malloc(new_pbuffer8_size); if (new_buffer == NULL || new_pbuffer8 == NULL) { - fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed\n", + fatal_printf("pcre2test: malloc(%" SIZ_FORM ") failed\n", new_pbuffer8_size); exit(1); } @@ -3919,7 +4071,7 @@ for (;;) dlen = strlen(s); if (dlen > rlen - 2) { - fprintf(outfile, "** Interactive input exceeds buffer space\n"); + cprintf(clr_test_error, "** Interactive input exceeds buffer space\n"); exit(1); } if (dlen > 0) add_history(s); @@ -3937,7 +4089,7 @@ for (;;) /* Read the next line by normal means, prompting if the file is a tty. */ - if (INTERACTIVE(f)) printf("%s", prompt); + if (INTERACTIVE(f)) cprintf(clr_profiling, "%s", prompt); if (fgets((char *)here, rlen_trunc, f) == NULL) return (here == start)? NULL : start; @@ -3958,8 +4110,8 @@ for (;;) if (dlen < (unsigned)rlen_trunc - 1 && !feof(f)) { - fprintf(outfile, "** Binary zero encountered in input\n"); - fprintf(outfile, "** pcre2test run abandoned\n"); + cprintf(clr_test_error, "** Binary zero encountered in input\n"); + cprintf(clr_test_error, "** pcre2test run abandoned\n"); exit(1); } } @@ -4079,7 +4231,7 @@ if (restrict_for_perl_test) switch(m->which) break; default: - fprintf(outfile, "** \"%s\" is not allowed in a Perl-compatible test\n", + cprintf(clr_test_error, "** \"%s\" is not allowed in a Perl-compatible test\n", m->name); return NULL; } @@ -4120,9 +4272,9 @@ switch (m->which) if (field == NULL) { if (c == 0) - fprintf(outfile, "** \"%s\" is not valid here\n", m->name); + cprintf(clr_test_error, "** \"%s\" is not valid here\n", m->name); else - fprintf(outfile, "** /%c is not valid here\n", c); + cprintf(clr_test_error, "** /%c is not valid here\n", c); return NULL; } @@ -4210,9 +4362,9 @@ for (;;) if (!first) { - fprintf(outfile, "** Unrecognized modifier \"%.*s\"\n", (int)(ep-p), p); + cprintf(clr_test_error, "** Unrecognized modifier \"%.*s\"\n", (int)(ep-p), p); if (ep - p == 1) - fprintf(outfile, "** Single-character modifiers must come first\n"); + cprintf(clr_test_error, "** Single-character modifiers must come first\n"); return FALSE; } @@ -4225,7 +4377,7 @@ for (;;) if (i >= C1MODLISTCOUNT) { - fprintf(outfile, "** Unrecognized modifier '%c' in modifier string " + cprintf(clr_test_error, "** Unrecognized modifier '%c' in modifier string " "\"%.*s\"\n", *p, (int)(ep-mp), mp); return FALSE; } @@ -4241,7 +4393,7 @@ for (;;) strlen(c1modlist[i].fullname)); if (index < 0) { - fprintf(outfile, "** Internal error: single-character equivalent " + cprintf(clr_test_error, "** Internal error: single-character equivalent " "modifier \"%s\" not found\n", c1modlist[i].fullname); return FALSE; } @@ -4275,12 +4427,12 @@ for (;;) { if (*pp++ != '=') { - fprintf(outfile, "** '=' expected after \"%s\"\n", m->name); + cprintf(clr_test_error, "** '=' expected after \"%s\"\n", m->name); return FALSE; } if (off) { - fprintf(outfile, "** '-' is not valid for \"%s\"\n", m->name); + cprintf(clr_test_error, "** '-' is not valid for \"%s\"\n", m->name); return FALSE; } } @@ -4289,7 +4441,7 @@ for (;;) else if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0) { - fprintf(outfile, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p); + cprintf(clr_test_error, "** Unrecognized modifier '%.*s'\n", (int)(ep-p), p); return FALSE; } @@ -4465,7 +4617,7 @@ for (;;) field = (char *)field + sizeof(int32_t); if (ct <= 0) { - fprintf(outfile, "** Too many numeric \"%s\" modifiers\n", m->name); + cprintf(clr_test_error, "** Too many numeric \"%s\" modifiers\n", m->name); return FALSE; } } @@ -4483,13 +4635,13 @@ for (;;) { if (len > MAX_NAME_SIZE) { - fprintf(outfile, "** Group name in \"%s\" is too long\n", m->name); + cprintf(clr_test_error, "** Group name in \"%s\" is too long\n", m->name); return FALSE; } while (*nn != 0) nn += strlen(nn) + 1; if (nn + len + 2 - (char *)field > LENCPYGET) { - fprintf(outfile, "** Too many characters in named \"%s\" modifiers\n", + cprintf(clr_test_error, "** Too many characters in named \"%s\" modifiers\n", m->name); return FALSE; } @@ -4504,7 +4656,7 @@ for (;;) case MOD_STR: if (len + 1 > m->value) { - fprintf(outfile, "** Overlong value for \"%s\" (max %d code units)\n", + cprintf(clr_test_error, "** Overlong value for \"%s\" (max %d code units)\n", m->name, m->value - 1); return FALSE; } @@ -4516,7 +4668,7 @@ for (;;) if (*pp != ',' && *pp != '\n' && *pp != ' ' && *pp != 0) { - fprintf(outfile, "** Comma expected after modifier item \"%s\"\n", m->name); + cprintf(clr_test_error, "** Comma expected after modifier item \"%s\"\n", m->name); return FALSE; } @@ -4528,7 +4680,7 @@ for (;;) pctl->locale[0] != 0 || (pctl->control & NOTPOP_CONTROLS) != 0)) { - fprintf(outfile, "** \"%s\" is not valid here\n", m->name); + cprintf(clr_test_error, "** \"%s\" is not valid here\n", m->name); return FALSE; } } @@ -4536,7 +4688,7 @@ for (;;) return TRUE; INVALID_VALUE: -fprintf(outfile, "** Invalid value in \"%.*s\"\n", (int)(ep-p), p); +cprintf(clr_test_error, "** Invalid value in \"%.*s\"\n", (int)(ep-p), p); return FALSE; } @@ -4565,10 +4717,10 @@ PCRE2_PATTERN_INFO(rc, compiled_code, what, where); if (rc >= 0) return 0; if (rc != PCRE2_ERROR_UNSET || !unsetok) { - fprintf(outfile, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode, + cprintf(clr_test_error, "Error %d from pcre2_pattern_info_%d(%d)\n", rc, test_mode, what); if (rc == PCRE2_ERROR_BADMODE) - fprintf(outfile, "Running in %d-bit mode but pattern was compiled in " + cprintf(clr_test_error, "Running in %d-bit mode but pattern was compiled in " "%d-bit mode\n", test_mode, 8 * (FLD(compiled_code, flags) & PCRE2_MODE_MASK)); } @@ -4784,7 +4936,7 @@ when 8-bit mode is supported. */ static void prmsg(const char **msg, const char *s) { -fprintf(outfile, "%s %s", *msg, s); +cprintf(clr_test_error, "%s %s", *msg, s); *msg = ""; } #endif /* SUPPORT_PCRE2_8 */ @@ -4808,9 +4960,9 @@ Returns: nothing */ static void -show_controls(uint32_t controls, uint32_t controls2, const char *before) +show_controls(int clr, uint32_t controls, uint32_t controls2, const char *before) { -fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", +cprintf(clr, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", before, ((controls & CTL_AFTERTEXT) != 0)? " aftertext" : "", ((controls & CTL_ALLAFTERTEXT) != 0)? " allaftertext" : "", @@ -4844,6 +4996,7 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s ((controls & CTL_NULLCONTEXT) != 0)? " null_context" : "", ((controls2 & CTL2_NULL_REPLACEMENT) != 0)? " null_replacement" : "", ((controls2 & CTL2_NULL_SUBJECT) != 0)? " null_subject" : "", + ((controls2 & CTL2_NULL_SUBSTITUTE_SUBJECT) != 0)? " null_substitute_subject" : "", ((controls & CTL_POSIX) != 0)? " posix" : "", ((controls & CTL_POSIX_NOSUB) != 0)? " posix_nosub" : "", ((controls & CTL_PUSH) != 0)? " push" : "", @@ -4856,9 +5009,11 @@ fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s ((controls2 & CTL2_SUBSTITUTE_LITERAL) != 0)? " substitute_literal" : "", ((controls2 & CTL2_SUBSTITUTE_MATCHED) != 0)? " substitute_matched" : "", ((controls2 & CTL2_SUBSTITUTE_OVERFLOW_LENGTH) != 0)? " substitute_overflow_length" : "", + ((controls2 & CTL2_SUBSTITUTE_OVERWRITE) != 0)? " substitute_overwrite" : "", ((controls2 & CTL2_SUBSTITUTE_REPLACEMENT_ONLY) != 0)? " substitute_replacement_only" : "", ((controls2 & CTL2_SUBSTITUTE_UNKNOWN_UNSET) != 0)? " substitute_unknown_unset" : "", ((controls2 & CTL2_SUBSTITUTE_UNSET_EMPTY) != 0)? " substitute_unset_empty" : "", + ((controls2 & CTL2_SUBSTITUTE_ZERO_TERMINATE) != 0)? " substitute_zero_terminate" : "", ((controls & CTL_USE_LENGTH) != 0)? " use_length" : "", ((controls & CTL_UTF8_INPUT) != 0)? " utf8_input" : "", ((controls & CTL_ZERO_TERMINATE) != 0)? " zero_terminate" : ""); @@ -4881,10 +5036,10 @@ Returns: nothing */ static void -show_compile_options(uint32_t options, const char *before, const char *after) +show_compile_options(int clr, uint32_t options, const char *before, const char *after) { -if (options == 0) fprintf(outfile, "%s %s", before, after); -else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", +if (options == 0) cprintf(clr, "%s %s", before, after); +else cprintf(clr, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", before, ((options & PCRE2_ALT_BSUX) != 0)? " alt_bsux" : "", ((options & PCRE2_ALT_CIRCUMFLEX) != 0)? " alt_circumflex" : "", @@ -4926,7 +5081,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s% *************************************************/ /* Called from show_pattern_info() and for unsupported POSIX options. - +show_compile_options Arguments: options an options word before text to print before @@ -4936,11 +5091,11 @@ Returns: nothing */ static void -show_compile_extra_options(uint32_t options, const char *before, +show_compile_extra_options(int clr, uint32_t options, const char *before, const char *after) { -if (options == 0) fprintf(outfile, "%s %s", before, after); -else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", +if (options == 0) cprintf(clr, "%s %s", before, after); +else cprintf(clr, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", before, ((options & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) != 0) ? " allow_lookaround_bsk" : "", ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "", @@ -4979,8 +5134,8 @@ Returns: nothing static void show_optimize_flags(uint32_t flags, const char *before, const char *after) { -if (flags == 0) fprintf(outfile, "%s%s", before, after); -else fprintf(outfile, "%s%s%s%s%s%s%s", +if (flags == 0) cprintf(clr_output, "%s%s", before, after); +else cprintf(clr_output, "%s%s%s%s%s%s%s", before, ((flags & PCRE2_OPTIM_AUTO_POSSESS) != 0) ? "auto_possess" : "", ((flags & PCRE2_OPTIM_AUTO_POSSESS) != 0 && (flags >> 1) != 0) ? "," : "", @@ -4999,9 +5154,9 @@ else fprintf(outfile, "%s%s%s%s%s%s%s", /* Called for unsupported POSIX options. */ static void -show_match_options(uint32_t options) +show_match_options(int clr, uint32_t options) { -fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s", +cprintf(clr, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s", ((options & PCRE2_ANCHORED) != 0)? " anchored" : "", ((options & PCRE2_COPY_MATCHED_SUBJECT) != 0)? " copy_matched_subject" : "", ((options & PCRE2_DFA_RESTART) != 0)? " dfa_restart" : "", @@ -5053,15 +5208,15 @@ if (test_mode == PCRE32_MODE) cblock_size = sizeof(pcre2_real_code_32); integer overflow. */ data_size = (PCRE2_SIZE)name_count * (PCRE2_SIZE)name_entry_size * (PCRE2_SIZE)code_unit_size; -fprintf(outfile, "Memory allocation - code size : %" SIZ_FORM "\n", size - +cprintf(clr_profiling, "Memory allocation - code size : %" SIZ_FORM "\n", size - cblock_size - data_size); if (data_size != 0) - fprintf(outfile, "Memory allocation - data size : %" SIZ_FORM "\n", data_size); + cprintf(clr_profiling, "Memory allocation - data size : %" SIZ_FORM "\n", data_size); if (pat_patctl.jit != 0) { (void)pattern_info(PCRE2_INFO_JITSIZE, &size, FALSE); - fprintf(outfile, "Memory allocation - JIT code : %" SIZ_FORM "\n", size); + cprintf(clr_profiling, "Memory allocation - JIT code : %" SIZ_FORM "\n", size); } } @@ -5076,7 +5231,7 @@ show_framesize(void) { PCRE2_SIZE frame_size; (void)pattern_info(PCRE2_INFO_FRAMESIZE, &frame_size, FALSE); -fprintf(outfile, "Frame size for pcre2_match(): %" SIZ_FORM "\n", frame_size); +cprintf(clr_profiling, "Frame size for pcre2_match(): %" SIZ_FORM "\n", frame_size); } @@ -5090,7 +5245,7 @@ show_heapframes_size(void) { PCRE2_SIZE heapframes_size; PCRE2_GET_MATCH_DATA_HEAPFRAMES_SIZE(heapframes_size, match_data); -fprintf(outfile, "Heapframes size in match_data: %" SIZ_FORM "\n", +cprintf(clr_profiling, "Heapframes size in match_data: %" SIZ_FORM "\n", heapframes_size); } @@ -5107,14 +5262,14 @@ int len; PCRE2_GET_ERROR_MESSAGE(len, errorcode); if (len < 0) { - fprintf(outfile, "\n** pcre2test internal error: cannot interpret error " + cprintf(clr_test_error, "\n** pcre2test internal error: cannot interpret error " "number\n** Unexpected return (%d) from pcre2_get_error_message()\n", len); } else { - fprintf(outfile, "%s", before); - PCHARSV(errorbuffer, 0, len, FALSE, outfile); - fprintf(outfile, "%s", after); + cprintf(clr_api_error, "%s", before); + PCHARSV(clr_api_error, errorbuffer, 0, len, FALSE, outfile); + cprintf(clr_api_error, "%s", after); } return len >= 0; } @@ -5146,12 +5301,12 @@ PCRE2_SIZE next_item_length = cb->next_item_length; (void)callout_data; /* Not currently displayed */ -fprintf(outfile, "Callout "); +cprintf(clr_output, "Callout "); if (cb->callout_string != NULL) { uint32_t delimiter = CODE_UNIT(cb->callout_string, -1); - fprintf(outfile, "%c", CHAR_OUTPUT(delimiter)); - PCHARSV(cb->callout_string, 0, + cprintf(clr_output, "%c", CHAR_OUTPUT(delimiter)); + PCHARSV(clr_output, cb->callout_string, 0, cb->callout_string_length, utf, outfile); for (i = 0; callout_start_delims[i] != 0; i++) if (delimiter == callout_start_delims[i]) @@ -5159,14 +5314,14 @@ if (cb->callout_string != NULL) delimiter = callout_end_delims[i]; break; } - fprintf(outfile, "%c ", CHAR_OUTPUT(delimiter)); + cprintf(clr_output, "%c ", CHAR_OUTPUT(delimiter)); } -else fprintf(outfile, "%d ", cb->callout_number); +else cprintf(clr_output, "%d ", cb->callout_number); if (next_item_length == 0 && CODE_UNIT(pattern_string, cb->pattern_position) != 0) next_item_length = 1; -PCHARSV(pattern_string, cb->pattern_position, next_item_length, utf, outfile); -fprintf(outfile, "\n"); +PCHARSV(clr_output, pattern_string, cb->pattern_position, next_item_length, utf, outfile); +cprintf(clr_output, "\n"); return 0; } @@ -5195,8 +5350,8 @@ BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; if ((pat_patctl.control & (CTL_BINCODE|CTL_FULLBINCODE)) != 0) { - fprintf(outfile, "------------------------------------------------------------------\n"); - PCRE2_PRINTINT((pat_patctl.control & CTL_FULLBINCODE) != 0); + cprintf(clr_output, "------------------------------------------------------------------\n"); + PCRE2_PRINTINT(clr_output, (pat_patctl.control & CTL_FULLBINCODE) != 0); } if ((pat_patctl.control & CTL_INFO) != 0) @@ -5281,31 +5436,31 @@ if ((pat_patctl.control & CTL_INFO) != 0) != 0) return PR_ABEND; - fprintf(outfile, "Capture group count = %d\n", capture_count); + cprintf(clr_output, "Capture group count = %d\n", capture_count); if (backrefmax > 0) - fprintf(outfile, "Max back reference = %d\n", backrefmax); + cprintf(clr_output, "Max back reference = %d\n", backrefmax); if (maxlookbehind > 0) - fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind); + cprintf(clr_output, "Max lookbehind = %d\n", maxlookbehind); if (heap_limit_set) - fprintf(outfile, "Heap limit = %u\n", heap_limit); + cprintf(clr_output, "Heap limit = %u\n", heap_limit); if (match_limit_set) - fprintf(outfile, "Match limit = %u\n", match_limit); + cprintf(clr_output, "Match limit = %u\n", match_limit); if (depth_limit_set) - fprintf(outfile, "Depth limit = %u\n", depth_limit); + cprintf(clr_output, "Depth limit = %u\n", depth_limit); if (namecount > 0) { - fprintf(outfile, "Named capture groups:\n"); + cprintf(clr_output, "Named capture groups:\n"); for (; namecount > 0; namecount--) { int imm2_size = test_mode == PCRE8_MODE ? 2 : 1; size_t length = STRLEN(nametable + imm2_size); - fprintf(outfile, " "); + cprintf(clr_output, " "); /* In UTF mode the name may be a UTF string containing non-ASCII letters and digits. We must output it as a UTF-8 string. In non-UTF mode, @@ -5322,7 +5477,7 @@ if ((pat_patctl.control & CTL_INFO) != 0) { uint8_t u8buff[6]; int len = ord_to_utf8(*nameptr++, u8buff); - fprintf(outfile, "%.*s", len, u8buff); + cprintf(clr_output, "%.*s", len, u8buff); } } #endif @@ -5340,33 +5495,33 @@ if ((pat_patctl.control & CTL_INFO) != 0) if (ord_rc > 0) nameptr += ord_rc; else c = *nameptr++; len = ord_to_utf8(c, u8buff); - fprintf(outfile, "%.*s", len, u8buff); + cprintf(clr_output, "%.*s", len, u8buff); } } #endif #ifdef SUPPORT_PCRE2_8 if (test_mode == PCRE8_MODE) - fprintf(outfile, "%s", (PCRE2_SPTR8)nametable + imm2_size); + cprintf(clr_output, "%s", (PCRE2_SPTR8)nametable + imm2_size); #endif } else /* Not UTF mode */ { - PCHARSV(nametable, imm2_size, length, FALSE, outfile); + PCHARSV(clr_output, nametable, imm2_size, length, FALSE, outfile); } while (length++ < nameentrysize - imm2_size) putc(' ', outfile); #ifdef SUPPORT_PCRE2_32 if (test_mode == PCRE32_MODE) - fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0])); + cprintf(clr_output, "%3d\n", (int)(((PCRE2_SPTR32)nametable)[0])); #endif #ifdef SUPPORT_PCRE2_16 if (test_mode == PCRE16_MODE) - fprintf(outfile, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0])); + cprintf(clr_output, "%3d\n", (int)(((PCRE2_SPTR16)nametable)[0])); #endif #ifdef SUPPORT_PCRE2_8 if (test_mode == PCRE8_MODE) - fprintf(outfile, "%3d\n", (int)( + cprintf(clr_output, "%3d\n", (int)( ((((PCRE2_SPTR8)nametable)[0]) << 8) | ((PCRE2_SPTR8)nametable)[1])); #endif @@ -5374,9 +5529,9 @@ if ((pat_patctl.control & CTL_INFO) != 0) } } - if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n"); - if (hasbackslashc) fprintf(outfile, "Contains \\C\n"); - if (match_empty) fprintf(outfile, "May match empty string\n"); + if (hascrorlf) cprintf(clr_output, "Contains explicit CR or LF match\n"); + if (hasbackslashc) cprintf(clr_output, "Contains \\C\n"); + if (match_empty) cprintf(clr_output, "May match empty string\n"); pattern_info(PCRE2_INFO_ARGOPTIONS, &compile_options, FALSE); pattern_info(PCRE2_INFO_ALLOPTIONS, &overall_options, FALSE); @@ -5400,25 +5555,25 @@ if ((pat_patctl.control & CTL_INFO) != 0) if ((compile_options|overall_options) != 0) { if (compile_options == overall_options) - show_compile_options(compile_options, "Options:", "\n"); + show_compile_options(clr_output, compile_options, "Options:", "\n"); else { - show_compile_options(compile_options, "Compile options:", "\n"); - show_compile_options(overall_options, "Overall options:", "\n"); + show_compile_options(clr_output, compile_options, "Compile options:", "\n"); + show_compile_options(clr_output, overall_options, "Overall options:", "\n"); } } if (extra_options != 0) - show_compile_extra_options(extra_options, "Extra options:", "\n"); + show_compile_extra_options(clr_output, extra_options, "Extra options:", "\n"); if (FLD(compiled_code, optimization_flags) != PCRE2_OPTIMIZATION_ALL) show_optimize_flags(FLD(compiled_code, optimization_flags), "Optimizations: ", "\n"); - if (jchanged) fprintf(outfile, "Duplicate name status changes\n"); + if (jchanged) cprintf(clr_output, "Duplicate name status changes\n"); if ((pat_patctl.control2 & CTL2_BSR_SET) != 0 || (FLD(compiled_code, flags) & PCRE2_BSR_SET) != 0) - fprintf(outfile, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)? + cprintf(clr_output, "\\R matches %s\n", (bsr_convention == PCRE2_BSR_UNICODE)? "any Unicode newline" : "CR, LF, or CRLF"); if ((FLD(compiled_code, flags) & PCRE2_NL_SET) != 0) @@ -5426,27 +5581,27 @@ if ((pat_patctl.control & CTL_INFO) != 0) switch (newline_convention) { case PCRE2_NEWLINE_CR: - fprintf(outfile, "Forced newline is CR\n"); + cprintf(clr_output, "Forced newline is CR\n"); break; case PCRE2_NEWLINE_LF: - fprintf(outfile, "Forced newline is LF\n"); + cprintf(clr_output, "Forced newline is LF\n"); break; case PCRE2_NEWLINE_CRLF: - fprintf(outfile, "Forced newline is CRLF\n"); + cprintf(clr_output, "Forced newline is CRLF\n"); break; case PCRE2_NEWLINE_ANYCRLF: - fprintf(outfile, "Forced newline is CR, LF, or CRLF\n"); + cprintf(clr_output, "Forced newline is CR, LF, or CRLF\n"); break; case PCRE2_NEWLINE_ANY: - fprintf(outfile, "Forced newline is any Unicode newline\n"); + cprintf(clr_output, "Forced newline is any Unicode newline\n"); break; case PCRE2_NEWLINE_NUL: - fprintf(outfile, "Forced newline is NUL\n"); + cprintf(clr_output, "Forced newline is NUL\n"); break; default: @@ -5456,7 +5611,7 @@ if ((pat_patctl.control & CTL_INFO) != 0) if (first_ctype == 2) { - fprintf(outfile, "First code unit at start or follows newline\n"); + cprintf(clr_output, "First code unit at start or follows newline\n"); } else if (first_ctype == 1) { @@ -5464,23 +5619,23 @@ if ((pat_patctl.control & CTL_INFO) != 0) ((FLD(compiled_code, flags) & PCRE2_FIRSTCASELESS) == 0)? "" : " (caseless)"; if (first_cunit != 0xff && PRINTABLE(first_cunit)) - fprintf(outfile, "First code unit = \'%c\'%s\n", CHAR_OUTPUT(first_cunit), + cprintf(clr_output, "First code unit = \'%c\'%s\n", CHAR_OUTPUT(first_cunit), caseless); else { - fprintf(outfile, "First code unit = "); + cprintf(clr_output, "First code unit = "); if (first_cunit == 0xff) - fprintf(outfile, "\\xff"); + cprintf(clr_output, "\\xff"); else - pchar(first_cunit, FALSE, outfile); - fprintf(outfile, "%s\n", caseless); + cpchar(clr_output, first_cunit, FALSE, outfile); + cprintf(clr_output, "%s\n", caseless); } } else if (start_bits != NULL) { int input; int c = 24; - fprintf(outfile, "Starting code units:"); + cprintf(clr_output, "Starting code units:"); for (input = 0; input < 256; input++) { int i = CHAR_INPUT_HEX(input); @@ -5488,17 +5643,17 @@ if ((pat_patctl.control & CTL_INFO) != 0) { if (c > 75) { - fprintf(outfile, "\n "); + cprintf(clr_output, "\n "); c = 2; } if (PRINTABLE(i) && i != CHAR_SPACE) { - fprintf(outfile, " %c", CHAR_OUTPUT(i)); + cprintf(clr_output, " %c", CHAR_OUTPUT(i)); c += 2; } else { - fprintf(outfile, " \\x%02x", CHAR_OUTPUT_HEX(i)); + cprintf(clr_output, " \\x%02x", CHAR_OUTPUT_HEX(i)); c += 5; } } @@ -5512,33 +5667,33 @@ if ((pat_patctl.control & CTL_INFO) != 0) ((FLD(compiled_code, flags) & PCRE2_LASTCASELESS) == 0)? "" : " (caseless)"; if (PRINTABLE(last_cunit)) - fprintf(outfile, "Last code unit = \'%c\'%s\n", CHAR_OUTPUT(last_cunit), + cprintf(clr_output, "Last code unit = \'%c\'%s\n", CHAR_OUTPUT(last_cunit), caseless); else { - fprintf(outfile, "Last code unit = "); - pchar(last_cunit, FALSE, outfile); - fprintf(outfile, "%s\n", caseless); + cprintf(clr_output, "Last code unit = "); + cpchar(clr_output, last_cunit, FALSE, outfile); + cprintf(clr_output, "%s\n", caseless); } } if ((FLD(compiled_code, optimization_flags) & PCRE2_OPTIM_START_OPTIMIZE) != 0) - fprintf(outfile, "Subject length lower bound = %d\n", minlength); + cprintf(clr_output, "Subject length lower bound = %d\n", minlength); if (pat_patctl.jit != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0) { #ifdef SUPPORT_JIT if (FLD(compiled_code, executable_jit) != NULL) - fprintf(outfile, "JIT compilation was successful\n"); + cprintf(clr_output, "JIT compilation was successful\n"); else { - fprintf(outfile, "JIT compilation was not successful"); + cprintf(clr_output, "JIT compilation was not successful"); if (jitrc != 0 && !print_error_message(jitrc, " (", ")")) return PR_ABEND; fprintf(outfile, "\n"); } #else - fprintf(outfile, "JIT support is not available in this version of PCRE2\n"); + cprintf(clr_output, "JIT support is not available in this version of PCRE2\n"); #endif } } @@ -5549,7 +5704,7 @@ if ((pat_patctl.control & CTL_CALLOUT_INFO) != 0) PCRE2_CALLOUT_ENUMERATE(errorcode, callout_callback, 0); if (errorcode != 0) { - fprintf(outfile, "Callout enumerate failed: error %d: ", errorcode); + cprintf(clr_test_error, "Callout enumerate failed: error %d: ", errorcode); if (errorcode < 0 && !print_error_message(errorcode, "", "\n")) return PR_ABEND; return PR_SKIP; @@ -5577,7 +5732,7 @@ Returns: FALSE if print_error_message() fails static BOOL serial_error(int rc, const char *msg) { -fprintf(outfile, "%s failed: error %d: ", msg, rc); +cprintf(clr_api_error, "%s failed: error %d: ", msg, rc); return print_error_message(rc, "", "\n"); } @@ -5609,7 +5764,7 @@ while (endf > filename && isspace((unsigned char)endf[-1])) endf--; if (endf == filename) { - fprintf(outfile, "** File name expected after %s\n", name); + cprintf(clr_test_error, "** File name expected after %s\n", name); return PR_ABEND; } @@ -5617,7 +5772,7 @@ if (endf == filename) *fptr = fopen((const char *)filename, mode); if (*fptr == NULL) { - fprintf(outfile, "** Failed to open \"%s\": %s\n", filename, strerror(errno)); + cprintf(clr_test_error, "** Failed to open \"%s\": %s\n", filename, strerror(errno)); return PR_ABEND; } @@ -5678,14 +5833,14 @@ argptr = buffer + cmdlen + 1; if (restrict_for_perl_test && cmd != CMD_PATTERN && cmd != CMD_SUBJECT && cmd != CMD_IF && cmd != CMD_ENDIF) { - fprintf(outfile, "** #%s is not allowed after #perltest\n", cmdname); + cprintf(clr_test_error, "** #%s is not allowed after #perltest\n", cmdname); return PR_ABEND; } switch(cmd) { case CMD_UNKNOWN: - fprintf(outfile, "** Unknown command: %s", buffer); + cprintf(clr_test_error, "** Unknown command: %s", buffer); break; case CMD_FORBID_UTF: @@ -5743,10 +5898,11 @@ switch(cmd) case CMD_POPCOPY: if (patstacknext <= 0) { - fprintf(outfile, "** Can't pop off an empty stack\n"); + cprintf(clr_test_error, "** Can't pop off an empty stack\n"); return PR_SKIP; } memset(&pat_patctl, 0, sizeof(patctl)); /* Completely unset */ + pat_patctl.substitute_options[0] = 0xFF; /* A value of 0 means the empty string, so we use 0xFF to mean unset */ if (!decode_modifiers(argptr, CTX_POPPAT, &pat_patctl, NULL)) return PR_SKIP; @@ -5777,7 +5933,7 @@ switch(cmd) case CMD_SAVE: if (patstacknext <= 0) { - fprintf(outfile, "** No stacked patterns to save\n"); + cprintf(clr_test_error, "** No stacked patterns to save\n"); return PR_OK; } @@ -5802,7 +5958,7 @@ switch(cmd) for (i = 0; i < 4; i++) fputc((serial_size >> (i*8)) & 255, f); if (fwrite(serial, 1, serial_size, f) != serial_size) { - fprintf(outfile, "** Wrong return from fwrite()\n"); + cprintf(clr_test_error, "** Wrong return from fwrite()\n"); fclose(f); return PR_ABEND; } @@ -5829,7 +5985,7 @@ switch(cmd) serial = malloc(serial_size); if (serial == NULL) { - fprintf(outfile, "** Failed to get memory (size %" SIZ_FORM ") for #load\n", + cprintf(clr_test_error, "** Failed to get memory (size %" SIZ_FORM ") for #load\n", serial_size); fclose(f); return PR_ABEND; @@ -5840,7 +5996,7 @@ switch(cmd) if (i != serial_size) { - fprintf(outfile, "** Wrong return from fread()\n"); + cprintf(clr_test_error, "** Wrong return from fread()\n"); yield = PR_ABEND; } else @@ -5854,10 +6010,10 @@ switch(cmd) { if (rc + patstacknext > PATSTACKSIZE) { - fprintf(outfile, "** Not enough space on pattern stack for %d pattern%s\n", + cprintf(clr_test_error, "** Not enough space on pattern stack for %d pattern%s\n", rc, (rc == 1)? "" : "s"); rc = PATSTACKSIZE - patstacknext; - fprintf(outfile, "** Decoding %d pattern%s\n", rc, + cprintf(clr_test_error, "** Decoding %d pattern%s\n", rc, (rc == 1)? "" : "s"); } PCRE2_SERIALIZE_DECODE(rc, patstack + patstacknext, rc, serial, @@ -5887,12 +6043,12 @@ switch(cmd) if (tables3 == NULL) { - fprintf(outfile, "** Failed: malloc failed for #loadtables\n"); + cprintf(clr_test_error, "** Failed: malloc failed for #loadtables\n"); yield = PR_ABEND; } else if (fread(tables3, 1, loadtables_length, f) != loadtables_length) { - fprintf(outfile, "** Wrong return from fread()\n"); + cprintf(clr_test_error, "** Wrong return from fread()\n"); yield = PR_ABEND; } @@ -5902,7 +6058,7 @@ switch(cmd) case CMD_IF: if (inside_if) { - fprintf(outfile, "** Nested #if not supported\n"); + cprintf(clr_test_error, "** Nested #if not supported\n"); return PR_ABEND; } @@ -5929,7 +6085,7 @@ switch(cmd) } if (i == COPTLISTCOUNT) { - fprintf(outfile, "** Unknown condition: %s\n", buffer); + cprintf(clr_test_error, "** Unknown condition: %s\n", buffer); return PR_ABEND; } @@ -5943,7 +6099,7 @@ switch(cmd) case CMD_ENDIF: if (!inside_if) { - fprintf(outfile, "** Unexpected #endif\n"); + cprintf(clr_test_error, "** Unexpected #endif\n"); return PR_ABEND; } inside_if = FALSE; @@ -5990,7 +6146,7 @@ PCRE2_SIZE erroroffset; if (restrict_for_perl_test && delimiter != '/') { - fprintf(outfile, "** The only allowed delimiter after #perltest is '/'\n"); + cprintf(clr_test_error, "** The only allowed delimiter after #perltest is '/'\n"); return PR_ABEND; } @@ -6011,12 +6167,12 @@ for(;;) p++; } if (*p != 0) break; - if ((p = extend_inputline(infile, p, " > ")) == NULL) + if ((p = extend_inputline(infile, p, PROMPT(" > "))) == NULL) { - fprintf(outfile, "** Unexpected EOF\n"); + cprintf(clr_test_error, "** Unexpected EOF\n"); return PR_ABEND; } - if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)p); + if (!INTERACTIVE(infile)) cprintf(clr_input, "%s", (char *)p); } /* If the first character after the delimiter is backslash, make the pattern @@ -6046,12 +6202,12 @@ if ((pat_patctl.control & CTL_UTF8_INPUT) != 0) { if (test_mode == PCRE8_MODE) { - fprintf(outfile, "** The utf8_input modifier is not allowed in 8-bit mode\n"); + cprintf(clr_test_error, "** The utf8_input modifier is not allowed in 8-bit mode\n"); return PR_SKIP; } if (utf) { - fprintf(outfile, "** The utf and utf8_input modifiers are mutually exclusive\n"); + cprintf(clr_test_error, "** The utf and utf8_input modifiers are mutually exclusive\n"); return PR_SKIP; } } @@ -6061,7 +6217,7 @@ if ((pat_patctl.control & CTL_UTF8_INPUT) != 0) if (pat_patctl.convert_type != CONVERT_UNSET && (pat_patctl.control & CTL_POSIX) != 0) { - fprintf(outfile, "** The convert and posix modifiers are mutually exclusive\n"); + cprintf(clr_test_error, "** The convert and posix modifiers are mutually exclusive\n"); return PR_SKIP; } @@ -6073,7 +6229,7 @@ for (k = 0; k < sizeof(exclusive_pat_controls)/sizeof(uint32_t); k++) uint32_t c = pat_patctl.control & exclusive_pat_controls[k]; if (c != 0 && c != (c & (~c+1))) { - show_controls(c, 0, "** Not allowed together:"); + show_controls(clr_test_error, c, 0, "** Not allowed together:"); fprintf(outfile, "\n"); return PR_SKIP; } @@ -6112,7 +6268,7 @@ if ((pat_patctl.control & CTL_HEXPAT) != 0) d = *pp; if (d == 0) { - fprintf(outfile, "** Missing closing quote in hex pattern: " + cprintf(clr_test_error, "** Missing closing quote in hex pattern: " "opening quote is at offset %" PTR_FORM ".\n", pq - buffer - 2); return PR_SKIP; } @@ -6127,19 +6283,19 @@ if ((pat_patctl.control & CTL_HEXPAT) != 0) { if (!isxdigit(c)) { - fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %" + cprintf(clr_test_error, "** Unexpected non-hex-digit '%c' at offset %" PTR_FORM " in hex pattern: quote missing?\n", c, pp - buffer - 2); return PR_SKIP; } if (*pp == 0) { - fprintf(outfile, "** Odd number of digits in hex pattern\n"); + cprintf(clr_test_error, "** Odd number of digits in hex pattern\n"); return PR_SKIP; } d = *pp; if (!isxdigit(d)) { - fprintf(outfile, "** Unexpected non-hex-digit '%c' at offset %" + cprintf(clr_test_error, "** Unexpected non-hex-digit '%c' at offset %" PTR_FORM " in hex pattern: quote missing?\n", d, pp - buffer - 1); return PR_SKIP; } @@ -6186,7 +6342,7 @@ else if ((pat_patctl.control & CTL_EXPAND) != 0) uli = strtoul((const char *)pe, &endptr, 10); if (U32OVERFLOW(uli)) { - fprintf(outfile, "** Pattern repeat count too large\n"); + cprintf(clr_test_error, "** Pattern repeat count too large\n"); return PR_SKIP; } @@ -6196,7 +6352,7 @@ else if ((pat_patctl.control & CTL_EXPAND) != 0) { if (i == 0) { - fprintf(outfile, "** Zero repeat not allowed\n"); + cprintf(clr_test_error, "** Zero repeat not allowed\n"); return PR_SKIP; } pc += 2; @@ -6235,7 +6391,7 @@ else if ((pat_patctl.control & CTL_EXPAND) != 0) patlen = pt - pbuffer8; if ((pat_patctl.control & CTL_INFO) != 0) - fprintf(outfile, "Expanded: %s\n", pbuffer8); + cprintf(clr_output, "Expanded: %s\n", pbuffer8); } /* Neither hex nor expanded, just copy the input verbatim. */ @@ -6251,12 +6407,12 @@ if (pat_patctl.locale[0] != 0) { if (pat_patctl.tables_id != 0) { - fprintf(outfile, "** 'Locale' and 'tables' must not both be set\n"); + cprintf(clr_test_error, "** 'Locale' and 'tables' must not both be set\n"); return PR_SKIP; } if (setlocale(LC_CTYPE, (const char *)pat_patctl.locale) == NULL) { - fprintf(outfile, "** Failed to set locale \"%s\"\n", pat_patctl.locale); + cprintf(clr_test_error, "** Failed to set locale \"%s\"\n", pat_patctl.locale); return PR_SKIP; } if (strcmp((const char *)pat_patctl.locale, (const char *)locale_name) != 0) @@ -6280,7 +6436,7 @@ else switch (pat_patctl.tables_id) case 3: if (tables3 == NULL) { - fprintf(outfile, "** 'Tables = 3' is invalid: binary tables have not " + cprintf(clr_test_error, "** 'Tables = 3' is invalid: binary tables have not " "been loaded\n"); return PR_SKIP; } @@ -6288,7 +6444,7 @@ else switch (pat_patctl.tables_id) break; default: - fprintf(outfile, "** 'Tables' must specify 0, 1, 2, or 3.\n"); + cprintf(clr_test_error, "** 'Tables' must specify 0, 1, 2, or 3.\n"); return PR_SKIP; } @@ -6315,7 +6471,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0) if (test_mode != PCRE8_MODE) { - fprintf(outfile, "** The POSIX interface is available only in 8-bit mode\n"); + cprintf(clr_test_error, "** The POSIX interface is available only in 8-bit mode\n"); return PR_SKIP; } @@ -6331,7 +6487,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0) if ((pat_patctl.options & ~POSIX_SUPPORTED_COMPILE_OPTIONS) != 0) { - show_compile_options( + show_compile_options(clr_test_error, pat_patctl.options & (uint32_t)(~POSIX_SUPPORTED_COMPILE_OPTIONS), msg, ""); msg = ""; @@ -6340,7 +6496,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0) if ((FLD(pat_context, extra_options) & (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS)) != 0) { - show_compile_extra_options( + show_compile_extra_options(clr_test_error, FLD(pat_context, extra_options) & (uint32_t)(~POSIX_SUPPORTED_COMPILE_EXTRA_OPTIONS), msg, ""); msg = ""; @@ -6349,7 +6505,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0) if ((pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS)) != 0 || (pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2)) != 0) { - show_controls( + show_controls(clr_test_error, pat_patctl.control & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS), pat_patctl.control2 & (uint32_t)(~POSIX_SUPPORTED_COMPILE_CONTROLS2), msg); @@ -6416,12 +6572,12 @@ if ((pat_patctl.control & CTL_POSIX) != 0) Therefore, we print a maximum of one less than the size of the buffer. */ psize = (int)bsize - 1; - fprintf(outfile, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8); + cprintf(clr_api_error, "Failed: POSIX code %d: %.*s\n", rc, psize, pbuffer8); if (usize > bsize) { - fprintf(outfile, "** regerror() message truncated\n"); + cprintf(clr_test_error, "** regerror() message truncated\n"); if (memcmp(pbuffer8 + bsize, "DEADBEEF", 8) != 0) - fprintf(outfile, "** regerror() buffer overflow\n"); + cprintf(clr_test_error, "** regerror() buffer overflow\n"); } return PR_SKIP; } @@ -6439,7 +6595,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0) preg.re_match_data == NULL || preg.re_cflags != cflags) { - fprintf(outfile, + cprintf(clr_test_error, "** The regcomp() function returned zero (success), but the values set\n" "** in the preg block are not valid for PCRE2. Check that pcre2test is\n" "** linked with PCRE2's pcre2posix module (-lpcre2-posix) and not with\n" @@ -6459,13 +6615,13 @@ if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0) { if (pat_patctl.replacement[0] != 0) { - fprintf(outfile, "** Replacement text is not supported with 'push'.\n"); + cprintf(clr_test_error, "** Replacement text is not supported with 'push'.\n"); return PR_OK; } if ((pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS) != 0 || (pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2) != 0) { - show_controls(pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS, + show_controls(clr_test_error, pat_patctl.control & ~PUSH_SUPPORTED_COMPILE_CONTROLS, pat_patctl.control2 & ~PUSH_SUPPORTED_COMPILE_CONTROLS2, "** Ignored when compiled pattern is stacked with 'push':"); fprintf(outfile, "\n"); @@ -6473,7 +6629,7 @@ if ((pat_patctl.control & (CTL_PUSH|CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0) if ((pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS) != 0 || (pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2) != 0) { - show_controls(pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS, + show_controls(clr_test_error, pat_patctl.control & PUSH_COMPILE_ONLY_CONTROLS, pat_patctl.control2 & PUSH_COMPILE_ONLY_CONTROLS2, "** Applies only to compile when pattern is stacked with 'push':"); fprintf(outfile, "\n"); @@ -6499,17 +6655,17 @@ if (test_mode == PCRE32_MODE) errorcode = to32(pbuffer8, utf, &patlen); switch(errorcode) { case -1: - fprintf(outfile, "** Failed: invalid UTF-8 string cannot be " + cprintf(clr_test_error, "** Failed: invalid UTF-8 string cannot be " "converted to %d-bit string\n", (test_mode == PCRE16_MODE)? 16:32); return PR_SKIP; case -2: - fprintf(outfile, "** Failed: character value greater than 0x10ffff " + cprintf(clr_test_error, "** Failed: character value greater than 0x10ffff " "cannot be converted to UTF\n"); return PR_SKIP; case -3: - fprintf(outfile, "** Failed: character value greater than 0xffff " + cprintf(clr_test_error, "** Failed: character value greater than 0xffff " "cannot be converted to 16-bit in non-UTF mode\n"); return PR_SKIP; @@ -6535,7 +6691,7 @@ if (pat_patctl.convert_type != CONVERT_UNSET) converted_pattern = malloc(converted_length * code_unit_size); if (converted_pattern == NULL) { - fprintf(outfile, "** Failed: malloc failed for converted pattern\n"); + cprintf(clr_test_error, "** Failed: malloc failed for converted pattern\n"); return PR_SKIP; } } @@ -6554,7 +6710,7 @@ if (pat_patctl.convert_type != CONVERT_UNSET) PCRE2_SET_GLOB_ESCAPE(rc, con_context, CHAR_INPUT(escape)); if (rc != 0) { - fprintf(outfile, "** Invalid glob escape '%c'\n", + cprintf(clr_api_error, "** Invalid glob escape '%c'\n", pat_patctl.convert_glob_escape); convert_return = PR_SKIP; goto CONVERT_FINISH; @@ -6567,7 +6723,7 @@ if (pat_patctl.convert_type != CONVERT_UNSET) PCRE2_SET_GLOB_SEPARATOR(rc, con_context, CHAR_INPUT(separator)); if (rc != 0) { - fprintf(outfile, "** Invalid glob separator '%c'\n", + cprintf(clr_api_error, "** Invalid glob separator '%c'\n", pat_patctl.convert_glob_separator); convert_return = PR_SKIP; goto CONVERT_FINISH; @@ -6579,7 +6735,7 @@ if (pat_patctl.convert_type != CONVERT_UNSET) if (rc != 0) { - fprintf(outfile, "** Pattern conversion error at offset %" SIZ_FORM ": ", + cprintf(clr_api_error, "** Pattern conversion error at offset %" SIZ_FORM ": ", converted_length); convert_return = print_error_message(rc, "", "\n")? PR_SKIP:PR_ABEND; } @@ -6589,7 +6745,7 @@ if (pat_patctl.convert_type != CONVERT_UNSET) else { BOOL toolong; - PCHARSV(converted_pattern, 0, converted_length, utf, outfile); + PCHARSV(clr_output, converted_pattern, 0, converted_length, utf, outfile); fprintf(outfile, "\n"); if (test_mode == PCRE8_MODE) @@ -6601,7 +6757,7 @@ if (pat_patctl.convert_type != CONVERT_UNSET) if (toolong) { - fprintf(outfile, "** Pattern conversion is too long for the buffer\n"); + cprintf(clr_api_error, "** Pattern conversion is too long for the buffer\n"); convert_return = PR_SKIP; } else @@ -6714,7 +6870,7 @@ if (timeit > 0) { SUB1(pcre2_code_free, compiled_code); } } total_compile_time += time_taken; - fprintf(outfile, "Compile time %8.4f microseconds\n", + cprintf(clr_profiling, "Compile time %8.4f microseconds\n", ((1000000 / CLOCKS_PER_SEC) * (double)time_taken) / timeit); } @@ -6743,7 +6899,7 @@ if (malloc_testing) if (i < target_mallocs && !(TEST(compiled_code, ==, NULL) && errorcode == PCRE2_ERROR_HEAP_FAILED)) { - fprintf(outfile, "** malloc() compile test did not fail as expected (%d)\n", + cprintf(clr_test_error, "** malloc() compile test did not fail as expected (%d)\n", errorcode); return PR_ABEND; } @@ -6813,20 +6969,20 @@ if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0) use_pat_context); if (TEST(compiled_code, ==, NULL)) { - fprintf(outfile, "** Unexpected - pattern compilation not successful\n"); + cprintf(clr_test_error, "** Unexpected - pattern compilation not successful\n"); return PR_ABEND; } if (jitrc != 0) { - fprintf(outfile, "JIT compilation was not successful"); + cprintf(clr_test_error, "JIT compilation was not successful"); if (!print_error_message(jitrc, " (", ")\n")) return PR_ABEND; break; } } total_jit_compile_time += time_taken; if (jitrc == 0) - fprintf(outfile, "JIT compile %8.4f microseconds\n", + cprintf(clr_profiling, "JIT compile %8.4f microseconds\n", ((1000000 / CLOCKS_PER_SEC) * (double)time_taken) / timeit); } @@ -6845,7 +7001,7 @@ if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0) use_pat_context); if (TEST(compiled_code, ==, NULL)) { - fprintf(outfile, "** Unexpected - pattern compilation not successful\n"); + cprintf(clr_test_error, "** Unexpected - pattern compilation not successful\n"); return PR_ABEND; } @@ -6855,7 +7011,7 @@ if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0) if (i < target_mallocs && jitrc != PCRE2_ERROR_NOMEMORY) { - fprintf(outfile, "** malloc() JIT compile test did not fail as expected (%d)\n", + cprintf(clr_test_error, "** malloc() JIT compile test did not fail as expected (%d)\n", jitrc); return PR_ABEND; } @@ -6867,7 +7023,7 @@ if (TEST(compiled_code, !=, NULL) && pat_patctl.jit != 0) if (jitrc != 0 && (pat_patctl.control & CTL_JITVERIFY) != 0) { - fprintf(outfile, "JIT compilation was not successful"); + cprintf(clr_api_error, "JIT compilation was not successful"); if (!print_error_message(jitrc, " (", ")\n")) return PR_ABEND; } } @@ -6879,7 +7035,7 @@ if (TEST(compiled_code, ==, NULL)) { int direction = error_direction(errorcode, erroroffset); - fprintf(outfile, "Failed: error %d at offset %d: ", errorcode, + cprintf(clr_api_error, "Failed: error %d at offset %d: ", errorcode, (int)erroroffset); if (!print_error_message(errorcode, "", "\n")) return PR_ABEND; @@ -6898,7 +7054,7 @@ if (TEST(compiled_code, ==, NULL)) n = utf8_to_ord(q, q_end, &cc); if (n <= 0) { - fprintf(outfile, "** Erroroffset %d splits a UTF-8 character\n", (int)erroroffset); + cprintf(clr_test_error, "** Erroroffset %d splits a UTF-8 character\n", (int)erroroffset); return PR_ABEND; } } @@ -6912,7 +7068,7 @@ if (TEST(compiled_code, ==, NULL)) n = utf16_to_ord(q, q_end, &cc); if (n <= 0) { - fprintf(outfile, "** Erroroffset %d splits a UTF-16 character\n", (int)erroroffset); + cprintf(clr_test_error, "** Erroroffset %d splits a UTF-16 character\n", (int)erroroffset); return PR_ABEND; } } @@ -6922,9 +7078,9 @@ if (TEST(compiled_code, ==, NULL)) if (direction < 0) { - fprintf(outfile, "** Error code %d not implemented in error_direction().\n", errorcode); - fprintf(outfile, " error_direction() should usually return '1' for newly-added errors,\n"); - fprintf(outfile, " and the offset should be just to the right of the bad character.\n"); + cprintf(clr_test_error, "** Error code %d not implemented in error_direction().\n", errorcode); + cprintf(clr_test_error, " error_direction() should usually return '1' for newly-added errors,\n"); + cprintf(clr_test_error, " and the offset should be just to the right of the bad character.\n"); return PR_ABEND; } @@ -6933,24 +7089,24 @@ if (TEST(compiled_code, ==, NULL)) PCRE2_SIZE full_patlen = (patlen != PCRE2_ZERO_TERMINATED)? patlen : STRLEN(CASTVAR(void *, pbuffer)); - fprintf(outfile, " here: "); + cprintf(clr_api_error, " here: "); if (erroroffset > 0) { - PTRUNCV(CASTVAR(void *, pbuffer), full_patlen, erroroffset, TRUE, utf, outfile); - fprintf(outfile, " "); + PTRUNCV(clr_api_error, CASTVAR(void *, pbuffer), full_patlen, erroroffset, TRUE, utf, outfile); + cprintf(clr_api_error, " "); } - fprintf(outfile, (direction == 1)? "|<--|" : (direction == 2)? "|-->|" : "|<-->|"); + cprintf(clr_api_error, (direction == 1)? "|<--|" : (direction == 2)? "|-->|" : "|<-->|"); if (erroroffset < full_patlen) { - fprintf(outfile, " "); - PTRUNCV(CASTVAR(void *, pbuffer), full_patlen, erroroffset, FALSE, utf, outfile); + cprintf(clr_api_error, " "); + PTRUNCV(clr_api_error, CASTVAR(void *, pbuffer), full_patlen, erroroffset, FALSE, utf, outfile); } fprintf(outfile, "\n"); } else if (erroroffset != 0) { - fprintf(outfile, "** Unexpected non-zero erroroffset %d for error code %d\n", + cprintf(clr_test_error, "** Unexpected non-zero erroroffset %d for error code %d\n", (int)erroroffset, errorcode); return PR_ABEND; } @@ -6966,7 +7122,7 @@ if (forbid_utf != 0) { if ((FLD(compiled_code, flags) & PCRE2_HASBKPORX) != 0) { - fprintf(outfile, "** \\P, \\p, and \\X are not allowed after the " + cprintf(clr_test_error, "** \\P, \\p, and \\X are not allowed after the " "#forbid_utf command\n"); return PR_SKIP; } @@ -7007,7 +7163,7 @@ if ((pat_patctl.control & CTL_PUSH) != 0) { if (patstacknext >= PATSTACKSIZE) { - fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE); + cprintf(clr_test_error, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE); return PR_ABEND; } patstack[patstacknext++] = PTR(compiled_code); @@ -7022,7 +7178,7 @@ if ((pat_patctl.control & (CTL_PUSHCOPY|CTL_PUSHTABLESCOPY)) != 0) { if (patstacknext >= PATSTACKSIZE) { - fprintf(outfile, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE); + cprintf(clr_test_error, "** Too many pushed patterns (max %d)\n", PATSTACKSIZE); return PR_ABEND; } if ((pat_patctl.control & CTL_PUSHCOPY) != 0) @@ -7149,7 +7305,7 @@ for (;;) { if ((mid & 0x80000000u) != 0) { - fprintf(outfile, "Can't find minimum %s limit: check pattern for " + cprintf(clr_api_error, "Can't find minimum %s limit: check pattern for " "restriction\n", msg); break; } @@ -7169,12 +7325,12 @@ for (;;) if (errnumber == PCRE2_ERROR_HEAPLIMIT && mid < stack_start) { - fprintf(outfile, "Minimum %s limit = 0\n", msg); + cprintf(clr_output, "Minimum %s limit = 0\n", msg); break; } if (mid == min + 1) { - fprintf(outfile, "Minimum %s limit = %d\n", msg, mid); + cprintf(clr_output, "Minimum %s limit = %d\n", msg, mid); break; } max = mid; @@ -7211,31 +7367,31 @@ int yield = 0; BOOL utf = (FLD(compiled_code, overall_options) & PCRE2_UTF) != 0; (void)data_ptr; /* Not used */ -fprintf(outfile, "%2d(%d) Old %" SIZ_FORM " %" SIZ_FORM " \"", +cprintf(clr_output, "%2d(%d) Old %" SIZ_FORM " %" SIZ_FORM " \"", scb->subscount, scb->oveccount, scb->ovector[0], scb->ovector[1]); -PCHARSV(scb->input, scb->ovector[0], scb->ovector[1] - scb->ovector[0], +PCHARSV(clr_output, scb->input, scb->ovector[0], scb->ovector[1] - scb->ovector[0], utf, outfile); -fprintf(outfile, "\" New %" SIZ_FORM " %" SIZ_FORM " \"", +cprintf(clr_output, "\" New %" SIZ_FORM " %" SIZ_FORM " \"", scb->output_offsets[0], scb->output_offsets[1]); -PCHARSV(scb->output, scb->output_offsets[0], +PCHARSV(clr_output, scb->output, scb->output_offsets[0], scb->output_offsets[1] - scb->output_offsets[0], utf, outfile); if (scb->subscount == dat_datctl.substitute_stop) { yield = -1; - fprintf(outfile, " STOPPED"); + cprintf(clr_output, " STOPPED"); } else if (scb->subscount == dat_datctl.substitute_skip) { yield = +1; - fprintf(outfile, " SKIPPED"); + cprintf(clr_output, " SKIPPED"); } -fprintf(outfile, "\"\n"); +cprintf(clr_output, "\"\n"); return yield; } @@ -7503,15 +7659,15 @@ if ((dat_datctl.control2 & CTL2_CALLOUT_EXTRA) != 0) switch (cb->callout_flags) { case PCRE2_CALLOUT_BACKTRACK: - fprintf(f, "Backtrack\n"); + cprintf(clr_output, "Backtrack\n"); break; case PCRE2_CALLOUT_STARTMATCH|PCRE2_CALLOUT_BACKTRACK: - fprintf(f, "Backtrack\nNo other matching paths\n"); + cprintf(clr_output, "Backtrack\nNo other matching paths\n"); /* Fall through */ case PCRE2_CALLOUT_STARTMATCH: - fprintf(f, "New match attempt\n"); + cprintf(clr_output, "New match attempt\n"); break; default: @@ -7527,9 +7683,9 @@ isn't a tidy way to fit it in the rest of the data. */ if (cb->callout_string != NULL) { uint32_t delimiter = CODE_UNIT(cb->callout_string, -1); - fprintf(outfile, "Callout (%" SIZ_FORM "): %c", + cprintf(clr_output, "Callout (%" SIZ_FORM "): %c", cb->callout_string_offset, CHAR_OUTPUT(delimiter)); - PCHARSV(cb->callout_string, 0, + PCHARSV(clr_output, cb->callout_string, 0, cb->callout_string_length, utf, outfile); for (i = 0; callout_start_delims[i] != 0; i++) if (delimiter == callout_start_delims[i]) @@ -7537,7 +7693,7 @@ if (cb->callout_string != NULL) delimiter = callout_end_delims[i]; break; } - fprintf(outfile, "%c", CHAR_OUTPUT(delimiter)); + cprintf(clr_output, "%c", CHAR_OUTPUT(delimiter)); if (!callout_capture) fprintf(outfile, "\n"); } @@ -7546,16 +7702,16 @@ if (cb->callout_string != NULL) if (callout_capture) { if (cb->callout_string == NULL) - fprintf(outfile, "Callout %d:", cb->callout_number); - fprintf(outfile, " last capture = %d\n", cb->capture_last); + cprintf(clr_output, "Callout %d:", cb->callout_number); + cprintf(clr_output, " last capture = %d\n", cb->capture_last); for (i = 2; i < cb->capture_top * 2; i += 2) { - fprintf(outfile, "%2d: ", i/2); + cprintf(clr_output, "%2d: ", i/2); if (cb->offset_vector[i] == PCRE2_UNSET) - fprintf(outfile, ""); + cprintf(clr_api_error, ""); else { - PCHARSV(cb->subject, cb->offset_vector[i], + PCHARSV(clr_output, cb->subject, cb->offset_vector[i], cb->offset_vector[i+1] - cb->offset_vector[i], utf, f); } fprintf(outfile, "\n"); @@ -7569,11 +7725,11 @@ lengths of the substrings. */ if (callout_where) { - if (f != NULL) fprintf(f, "--->"); + if (f != NULL) cprintf(clr_output, "--->"); /* The subject before the match start. */ - PCHARS(pre_start, cb->subject, 0, cb->start_match, utf, f); + PCHARS(clr_output, pre_start, cb->subject, 0, cb->start_match, utf, f); /* If a lookbehind is involved, the current position may be earlier than the match start. If so, use the match start instead. */ @@ -7583,17 +7739,17 @@ if (callout_where) /* The subject between the match start and the current position. */ - PCHARS(post_start, cb->subject, cb->start_match, + PCHARS(clr_output, post_start, cb->subject, cb->start_match, current_position - cb->start_match, utf, f); /* Print from the current position to the end. */ - PCHARSV(cb->subject, current_position, cb->subject_length - current_position, + PCHARSV(clr_output, cb->subject, current_position, cb->subject_length - current_position, utf, f); /* Calculate the total subject printed length (no print). */ - PCHARS(subject_length, cb->subject, 0, cb->subject_length, utf, NULL); + PCHARS(clr_output, subject_length, cb->subject, 0, cb->subject_length, utf, NULL); if (f != NULL) fprintf(f, "\n"); @@ -7604,36 +7760,36 @@ if (callout_where) if (cb->callout_number == 255) { - fprintf(outfile, "%+3d ", (int)cb->pattern_position); - if (cb->pattern_position > 99) fprintf(outfile, "\n "); + cprintf(clr_output, "%+3d ", (int)cb->pattern_position); + if (cb->pattern_position > 99) cprintf(clr_output, "\n "); } else { - if (callout_capture || cb->callout_string != NULL) fprintf(outfile, " "); - else fprintf(outfile, "%3d ", cb->callout_number); + if (callout_capture || cb->callout_string != NULL) cprintf(clr_output, " "); + else cprintf(clr_output, "%3d ", cb->callout_number); } /* Now show position indicators */ - for (i = 0; i < pre_start; i++) fprintf(outfile, " "); - fprintf(outfile, "^"); + for (i = 0; i < pre_start; i++) cprintf(clr_output, " "); + cprintf(clr_output, "^"); if (post_start > 0) { - for (i = 0; i < post_start - 1; i++) fprintf(outfile, " "); - fprintf(outfile, "^"); + for (i = 0; i < post_start - 1; i++) cprintf(clr_output, " "); + cprintf(clr_output, "^"); } for (i = 0; i < subject_length - pre_start - post_start + 4; i++) - fprintf(outfile, " "); + cprintf(clr_output, " "); if (cb->next_item_length != 0) { - PCHARSV(CASTVAR(void *, pbuffer), cb->pattern_position, + PCHARSV(clr_output, CASTVAR(void *, pbuffer), cb->pattern_position, (int)(cb->next_item_length), utf, outfile); } else - fprintf(outfile, "End of pattern"); + cprintf(clr_output, "End of pattern"); fprintf(outfile, "\n"); } @@ -7645,11 +7801,11 @@ first_callout = FALSE; if (cb->mark != last_callout_mark) { if (cb->mark == NULL) - fprintf(outfile, "Latest Mark: \n"); + cprintf(clr_output, "Latest Mark: \n"); else { - fprintf(outfile, "Latest Mark: "); - PCHARSV(cb->mark, -1, -1, utf, outfile); + cprintf(clr_output, "Latest Mark: "); + PCHARSV(clr_output, cb->mark, -1, -1, utf, outfile); putc('\n', outfile); } last_callout_mark = cb->mark; @@ -7662,7 +7818,7 @@ if (callout_data_ptr != NULL) int callout_data = *((int32_t *)callout_data_ptr); if (callout_data != 0) { - fprintf(outfile, "Callout data = %d\n", callout_data); + cprintf(clr_output, "Callout data = %d\n", callout_data); return callout_data; } } @@ -7716,7 +7872,7 @@ for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++) PCRE2_SUBSTRING_COPY_BYNUMBER(rc, match_data, n, copybuffer, &length); if (rc < 0) { - fprintf(outfile, "Copy substring %d failed (%d): ", n, rc); + cprintf(clr_api_error, "Copy substring %d failed (%d): ", n, rc); if (!print_error_message(rc, "", "\n")) return FALSE; } else @@ -7724,17 +7880,17 @@ for (i = 0; i < MAXCPYGET && dat_datctl.copy_numbers[i] >= 0; i++) PCRE2_SUBSTRING_LENGTH_BYNUMBER(rc, match_data, n, &length2); if (rc < 0) { - fprintf(outfile, "Get substring %d length failed (%d): ", n, rc); + cprintf(clr_api_error, "Get substring %d length failed (%d): ", n, rc); if (!print_error_message(rc, "", "\n")) return FALSE; } else if (length2 != length) { - fprintf(outfile, "Mismatched substring lengths: %" + cprintf(clr_test_error, "Mismatched substring lengths: %" SIZ_FORM " %" SIZ_FORM "\n", length, length2); } - fprintf(outfile, "%2dC ", n); - PCHARSV(copybuffer, 0, length, utf, outfile); - fprintf(outfile, " (%" SIZ_FORM ")\n", length); + cprintf(clr_output, "%2dC ", n); + PCHARSV(clr_output, copybuffer, 0, length, utf, outfile); + cprintf(clr_output, " (%" SIZ_FORM ")\n", length); } } @@ -7768,13 +7924,13 @@ for (;;) PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer); if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING) - fprintf(outfile, "Number not found for group \"%s\"\n", nptr); + cprintf(clr_api_error, "Number not found for group \"%s\"\n", nptr); length = sizeof(copybuffer)/code_unit_size; PCRE2_SUBSTRING_COPY_BYNAME(rc, match_data, pbuffer, copybuffer, &length); if (rc < 0) { - fprintf(outfile, "Copy substring \"%s\" failed (%d): ", nptr, rc); + cprintf(clr_api_error, "Copy substring \"%s\" failed (%d): ", nptr, rc); if (!print_error_message(rc, "", "\n")) return FALSE; } else @@ -7782,19 +7938,19 @@ for (;;) PCRE2_SUBSTRING_LENGTH_BYNAME(rc, match_data, pbuffer, &length2); if (rc < 0) { - fprintf(outfile, "Get substring \"%s\" length failed (%d): ", nptr, rc); + cprintf(clr_api_error, "Get substring \"%s\" length failed (%d): ", nptr, rc); if (!print_error_message(rc, "", "\n")) return FALSE; } else if (length2 != length) { - fprintf(outfile, "Mismatched substring lengths: %" + cprintf(clr_api_error, "Mismatched substring lengths: %" SIZ_FORM " %" SIZ_FORM "\n", length, length2); } - fprintf(outfile, " C "); - PCHARSV(copybuffer, 0, length, utf, outfile); - fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr); - if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber); - else fprintf(outfile, " (non-unique)\n"); + cprintf(clr_output, " C "); + PCHARSV(clr_output, copybuffer, 0, length, utf, outfile); + cprintf(clr_output, " (%" SIZ_FORM ") %s", length, nptr); + if (groupnumber >= 0) cprintf(clr_output, " (group %d)\n", groupnumber); + else cprintf(clr_output, " (non-unique)\n"); } nptr += namelen + 1; } @@ -7810,14 +7966,14 @@ for (i = 0; i < MAXCPYGET && dat_datctl.get_numbers[i] >= 0; i++) PCRE2_SUBSTRING_GET_BYNUMBER(rc, match_data, n, &gotbuffer, &length); if (rc < 0) { - fprintf(outfile, "Get substring %d failed (%d): ", n, rc); + cprintf(clr_api_error, "Get substring %d failed (%d): ", n, rc); if (!print_error_message(rc, "", "\n")) return FALSE; } else { - fprintf(outfile, "%2dG ", n); - PCHARSV(gotbuffer, 0, length, utf, outfile); - fprintf(outfile, " (%" SIZ_FORM ")\n", length); + cprintf(clr_output, "%2dG ", n); + PCHARSV(clr_output, gotbuffer, 0, length, utf, outfile); + cprintf(clr_output, " (%" SIZ_FORM ")\n", length); PCRE2_SUBSTRING_FREE(gotbuffer); } } @@ -7852,21 +8008,21 @@ for (;;) PCRE2_SUBSTRING_NUMBER_FROM_NAME(groupnumber, compiled_code, pbuffer); if (groupnumber < 0 && groupnumber != PCRE2_ERROR_NOUNIQUESUBSTRING) - fprintf(outfile, "Number not found for group \"%s\"\n", nptr); + cprintf(clr_api_error, "Number not found for group \"%s\"\n", nptr); PCRE2_SUBSTRING_GET_BYNAME(rc, match_data, pbuffer, &gotbuffer, &length); if (rc < 0) { - fprintf(outfile, "Get substring \"%s\" failed (%d): ", nptr, rc); + cprintf(clr_api_error, "Get substring \"%s\" failed (%d): ", nptr, rc); if (!print_error_message(rc, "", "\n")) return FALSE; } else { - fprintf(outfile, " G "); - PCHARSV(gotbuffer, 0, length, utf, outfile); - fprintf(outfile, " (%" SIZ_FORM ") %s", length, nptr); - if (groupnumber >= 0) fprintf(outfile, " (group %d)\n", groupnumber); - else fprintf(outfile, " (non-unique)\n"); + cprintf(clr_output, " G "); + PCHARSV(clr_output, gotbuffer, 0, length, utf, outfile); + cprintf(clr_output, " (%" SIZ_FORM ") %s", length, nptr); + if (groupnumber >= 0) cprintf(clr_output, " (group %d)\n", groupnumber); + else cprintf(clr_output, " (non-unique)\n"); PCRE2_SUBSTRING_FREE(gotbuffer); } nptr += namelen + 1; @@ -7882,19 +8038,19 @@ if ((dat_datctl.control & CTL_GETALL) != 0) PCRE2_SUBSTRING_LIST_GET(rc, match_data, &stringlist, &lengths); if (rc < 0) { - fprintf(outfile, "get substring list failed (%d): ", rc); + cprintf(clr_api_error, "get substring list failed (%d): ", rc); if (!print_error_message(rc, "", "\n")) return FALSE; } else { for (i = 0; i < capcount; i++) { - fprintf(outfile, "%2dL ", i); - PCHARSV(stringlist[i], 0, lengths[i], utf, outfile); + cprintf(clr_output, "%2dL ", i); + PCHARSV(clr_output, stringlist[i], 0, lengths[i], utf, outfile); putc('\n', outfile); } if (stringlist[i] != NULL) - fprintf(outfile, "string list not terminated by NULL\n"); + cprintf(clr_test_error, "string list not terminated by NULL\n"); PCRE2_SUBSTRING_LIST_FREE(stringlist); } } @@ -7928,13 +8084,13 @@ for (i = 0; i < 2*oveccount; i += 2) PCRE2_SIZE start = ovector[i]; PCRE2_SIZE end = ovector[i+1]; - fprintf(outfile, "%2d: ", i/2); + cprintf(clr_output, "%2d: ", i/2); if (start == PCRE2_UNSET && end == PCRE2_UNSET) - fprintf(outfile, "\n"); + cprintf(clr_api_error, "\n"); else if (start == JUNK_OFFSET && end == JUNK_OFFSET) - fprintf(outfile, "\n"); + cprintf(clr_output, "\n"); else - fprintf(outfile, "%ld %ld\n", (unsigned long int)start, + cprintf(clr_output, "%ld %ld\n", (unsigned long int)start, (unsigned long int)end); } } @@ -7956,7 +8112,7 @@ Returns: PR_OK continue processing next line static int process_data(void) { -PCRE2_SIZE len, ulen, arg_ulen; +PCRE2_SIZE len, ulen, arg_ulen, blen; uint32_t gmatched; uint32_t c, k; uint32_t g_notempty = 0; @@ -7999,6 +8155,8 @@ if (dat_datctl.substitute_skip == 0) dat_datctl.substitute_skip = pat_patctl.substitute_skip; if (dat_datctl.substitute_stop == 0) dat_datctl.substitute_stop = pat_patctl.substitute_stop; +if (dat_datctl.substitute_options[0] == 0xFF) /* Default value wasn't overriden */ + strcpy((char *)dat_datctl.substitute_options, (char *)pat_patctl.substitute_options); /* Initialize for scanning the data line. */ @@ -8021,8 +8179,8 @@ while (isspace(*p)) len--; } -/* Check that the data is well-formed UTF-8 if we're in UTF mode. To create -invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */ +/* Check that the data (but not the modifiers) is well-formed UTF-8 if we're in UTF mode. +To create invalid input to pcre2_match(), you must use \x?? or \x{} sequences. */ if (utf) { @@ -8031,10 +8189,12 @@ if (utf) int n = 1; uint8_t *q_end = p + len; - for (q = p; n > 0 && *q; q += n) n = utf8_to_ord(q, q_end, &cc); + for (q = p; n > 0 && *q && !(*q == '\\' && *(q + 1) == '='); q += n) + n = utf8_to_ord(q, q_end, &cc); + if (n <= 0) { - fprintf(outfile, "** Failed: invalid UTF-8 string cannot be used as input " + cprintf(clr_test_error, "** Failed: invalid UTF-8 string cannot be used as input " "in UTF mode\n"); return PR_OK; } @@ -8063,7 +8223,7 @@ if (dbuffer == NULL || needlen >= dbuffer_size) dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size); if (dbuffer == NULL) { - fprintf(stderr, "pcre2test: realloc(%" SIZ_FORM ") failed\n", dbuffer_size); + fatal_printf("pcre2test: realloc(%" SIZ_FORM ") failed\n", dbuffer_size); exit(1); } } @@ -8089,14 +8249,14 @@ while ((c = *p++) != 0) if (*p++ != '{') { - fprintf(outfile, "** Expected '{' after \\[....]\n"); + cprintf(clr_test_error, "** Expected '{' after \\[....]\n"); return PR_OK; } li = strtol((const char *)p, &endptr, 10); if (S32OVERFLOW(li)) { - fprintf(outfile, "** Repeat count too large\n"); + cprintf(clr_test_error, "** Repeat count too large\n"); return PR_OK; } i = (int)li; @@ -8104,20 +8264,20 @@ while ((c = *p++) != 0) p = (uint8_t *)endptr; if (*p++ != '}') { - fprintf(outfile, "** Expected '}' after \\[...]{...\n"); + cprintf(clr_test_error, "** Expected '}' after \\[...]{...\n"); return PR_OK; } if (i-- <= 0) { - fprintf(outfile, "** Zero or negative repeat not allowed\n"); + cprintf(clr_test_error, "** Zero or negative repeat not allowed\n"); return PR_OK; } replen = CAST8VAR(q) - start_rep; if (i > 0 && replen > (SIZE_MAX - needlen) / i) { - fprintf(outfile, "** Expanded content too large\n"); + cprintf(clr_test_error, "** Expanded content too large\n"); return PR_OK; } needlen += replen * i; @@ -8134,7 +8294,7 @@ while ((c = *p++) != 0) dbuffer = (uint8_t *)realloc(dbuffer, dbuffer_size); if (dbuffer == NULL) { - fprintf(stderr, "pcre2test: realloc(%" SIZ_FORM ") failed\n", + fatal_printf("pcre2test: realloc(%" SIZ_FORM ") failed\n", dbuffer_size); exit(1); } @@ -8209,7 +8369,7 @@ while ((c = *p++) != 0) { if (c >= 0x20000000u) { - fprintf(outfile, "** \\o{ escape too large\n"); + cprintf(clr_test_error, "** \\o{ escape too large\n"); return PR_OK; } else c = c * 8 + (*pt - '0'); @@ -8217,7 +8377,7 @@ while ((c = *p++) != 0) c = CHAR_OUTPUT(CHAR_INPUT_HEX(c)); if (i == 0 || *pt != '}') { - fprintf(outfile, "** Malformed \\o{ escape\n"); + cprintf(clr_test_error, "** Malformed \\o{ escape\n"); return PR_OK; } else p = pt + 1; @@ -8239,7 +8399,7 @@ while ((c = *p++) != 0) { if (++i == 9) { - fprintf(outfile, "** Too many hex digits in \\x{...} item; " + cprintf(clr_test_error, "** Too many hex digits in \\x{...} item; " "using only the first eight.\n"); while (isxdigit(*pt)) pt++; break; @@ -8249,7 +8409,7 @@ while ((c = *p++) != 0) c = CHAR_OUTPUT(CHAR_INPUT_HEX(c)); if (i == 0 || *pt != '}') { - fprintf(outfile, "** Malformed \\x{ escape\n"); + cprintf(clr_test_error, "** Malformed \\x{ escape\n"); return PR_OK; } else p = pt + 1; @@ -8292,7 +8452,7 @@ while ((c = *p++) != 0) } } #endif - fprintf(outfile, "** Malformed \\N{U+ escape\n"); + cprintf(clr_test_error, "** Malformed \\N{U+ escape\n"); return PR_OK; case 0: /* \ followed by EOF allows for an empty line */ @@ -8305,7 +8465,7 @@ while ((c = *p++) != 0) case '[': /* \[ introduces a replicated character sequence */ if (start_rep != NULL) { - fprintf(outfile, "** Nested replication is not supported\n"); + cprintf(clr_test_error, "** Nested replication is not supported\n"); return PR_OK; } start_rep = CAST8VAR(q); @@ -8314,7 +8474,7 @@ while ((c = *p++) != 0) default: if (isalnum(c)) { - fprintf(outfile, "** Unrecognized escape sequence \"\\%c\"\n", c); + cprintf(clr_test_error, "** Unrecognized escape sequence \"\\%c\"\n", c); return PR_OK; } } @@ -8330,9 +8490,9 @@ while ((c = *p++) != 0) { if (c > 0xffu) { - fprintf(outfile, "** Character \\x{%x} is greater than 255 " + cprintf(clr_test_error, "** Character \\x{%x} is greater than 255 " "and UTF-8 mode is not enabled.\n", c); - fprintf(outfile, "** Truncation will probably give the wrong " + cprintf(clr_test_error, "** Truncation will probably give the wrong " "result.\n"); } *q8++ = (uint8_t)c; @@ -8341,12 +8501,12 @@ while ((c = *p++) != 0) { if (c > 0x7fffffff) { - fprintf(outfile, "** Character \\N{U+%x} is greater than 0x7fffffff " + cprintf(clr_test_error, "** Character \\N{U+%x} is greater than 0x7fffffff " "and therefore cannot be encoded as UTF-8\n", c); return PR_OK; } else if (encoding == FORCE_UTF && c > MAX_UTF_CODE_POINT) - fprintf(outfile, "** Warning: character \\N{U+%x} is greater than " + cprintf(clr_test_error, "** Warning: character \\N{U+%x} is greater than " "0x%x and should not be encoded as UTF-8\n", c, MAX_UTF_CODE_POINT); q8 += ord_to_utf8(c, q8); @@ -8363,9 +8523,9 @@ while ((c = *p++) != 0) { if (c > 0xffffu) { - fprintf(outfile, "** Character \\x{%x} is greater than 0xffff " + cprintf(clr_test_error, "** Character \\x{%x} is greater than 0xffff " "and UTF-16 mode is not enabled.\n", c); - fprintf(outfile, "** Truncation will probably give the wrong " + cprintf(clr_test_error, "** Truncation will probably give the wrong " "result.\n"); } *q16++ = (uint16_t)c; @@ -8374,7 +8534,7 @@ while ((c = *p++) != 0) { if (c > MAX_UTF_CODE_POINT) { - fprintf(outfile, "** Failed: character \\N{U+%x} is greater than " + cprintf(clr_test_error, "** Failed: character \\N{U+%x} is greater than " "0x%x and therefore cannot be encoded as UTF-16\n", c, MAX_UTF_CODE_POINT); return PR_OK; @@ -8388,7 +8548,7 @@ while ((c = *p++) != 0) else { if (encoding == FORCE_UTF && 0xe000u > c && c >= 0xd800u) - fprintf(outfile, "** Warning: character \\N{U+%x} is a surrogate " + cprintf(clr_test_error, "** Warning: character \\N{U+%x} is a surrogate " "and should not be encoded as UTF-16\n", c); *q16++ = c; } @@ -8399,7 +8559,7 @@ while ((c = *p++) != 0) if (test_mode == PCRE32_MODE) { if (encoding == FORCE_UTF && c > MAX_UTF_CODE_POINT) - fprintf(outfile, "** Warning: character \\N{U+%x} is greater than " + cprintf(clr_test_error, "** Warning: character \\N{U+%x} is greater than " "0x%x and should not be encoded as UTF-32\n", c, MAX_UTF_CODE_POINT); *q32++ = c; @@ -8431,7 +8591,7 @@ for (k = 0; k < sizeof(exclusive_dat_controls)/sizeof(uint32_t); k++) c = dat_datctl.control & exclusive_dat_controls[k]; if (c != 0 && c != (c & (~c+1))) { - show_controls(c, 0, "** Not allowed together:"); + show_controls(clr_test_error, c, 0, "** Not allowed together:"); fprintf(outfile, "\n"); return PR_OK; } @@ -8442,29 +8602,59 @@ if (dat_datctl.replacement[0] != 0) if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0 && (dat_datctl.control & CTL_NULLCONTEXT) != 0) { - fprintf(outfile, "** Replacement callouts are not supported with null_context.\n"); + cprintf(clr_test_error, "** Replacement callouts are not supported with null_context.\n"); return PR_OK; } if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CASE_CALLOUT) != 0 && (dat_datctl.control & CTL_NULLCONTEXT) != 0) { - fprintf(outfile, "** Replacement case callouts are not supported with null_context.\n"); + cprintf(clr_test_error, "** Replacement case callouts are not supported with null_context.\n"); return PR_OK; } if ((dat_datctl.control & CTL_ALLCAPTURES) != 0) - fprintf(outfile, "** Ignored with replacement text: allcaptures\n"); + cprintf(clr_test_error, "** Ignored with replacement text: allcaptures\n"); } +if ((dat_datctl.substitute_offset < PCRE2_SIZE_MAX) && (dat_datctl.control2 & CTL2_SUBSTITUTE_MATCHED) == 0) + { + cprintf(clr_test_error, "** substitute_offset is only supported with substitute_matched\n"); + return PR_OK; + } +if (dat_datctl.substitute_options[0] != 0xFF && (dat_datctl.control2 & CTL2_SUBSTITUTE_MATCHED) == 0) + { + cprintf(clr_test_error, "** substitute_options is only supported with substitute_matched\n"); + return PR_OK; + } +if ((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERWRITE) != 0 && dat_datctl.substitute_subject[0] == 0xFF) + { + cprintf(clr_test_error, "** substitute_overwrite is only supported with substitute_subject\n"); + return PR_OK; + } +if ((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERWRITE) != 0 && (dat_datctl.control2 & CTL2_NULL_SUBJECT) != 0) + { + cprintf(clr_test_error, "** substitute_overwrite is not supported with null_subject\n"); + return PR_OK; + } +if (dat_datctl.substitute_subject[0] != 0xFF && (dat_datctl.control2 & CTL2_SUBSTITUTE_MATCHED) == 0) + { + cprintf(clr_test_error, "** substitute_subject is only supported with substitute_matched\n"); + return PR_OK; + } +if ((dat_datctl.control2 & CTL2_SUBSTITUTE_ZERO_TERMINATE) != 0 && dat_datctl.substitute_subject[0] == 0xFF) + { + cprintf(clr_test_error, "** substitute_zero_terminate is only supported with substitute_subject\n"); + return PR_OK; + } /* Warn for modifiers that are ignored for DFA. */ if ((dat_datctl.control & CTL_DFA) != 0) { if ((dat_datctl.control & CTL_ALLCAPTURES) != 0) - fprintf(outfile, "** Ignored for DFA matching: allcaptures\n"); + cprintf(clr_test_error, "** Ignored for DFA matching: allcaptures\n"); if ((dat_datctl.control2 & CTL2_HEAPFRAMES_SIZE) != 0) - fprintf(outfile, "** Ignored for DFA matching: heapframes_size\n"); + cprintf(clr_test_error, "** Ignored for DFA matching: heapframes_size\n"); } /* We now have the subject in dbuffer, with len containing the byte length, and @@ -8478,11 +8668,11 @@ the unused start of the buffer unaddressable. If we are using the POSIX interface, or testing zero-termination, we must include the terminating zero in the usable data. */ -c = code_unit_size * (((pat_patctl.control & CTL_POSIX) + +blen = len + code_unit_size * (((pat_patctl.control & CTL_POSIX) + (dat_datctl.control & CTL_ZERO_TERMINATE) != 0)? 1:0); -pp = memmove(dbuffer + dbuffer_size - len - c, dbuffer, len + c); +pp = memmove(dbuffer + dbuffer_size - blen, dbuffer, blen); #ifdef SUPPORT_VALGRIND - VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - (len + c)); + VALGRIND_MAKE_MEM_NOACCESS(dbuffer, dbuffer_size - blen); #endif #if defined(EBCDIC) && !EBCDIC_IO @@ -8519,15 +8709,15 @@ if ((pat_patctl.control & CTL_POSIX) != 0) if ((dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS) != 0) { - fprintf(outfile, "%s", msg); - show_match_options(dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS); + cprintf(clr_test_error, "%s", msg); + show_match_options(clr_test_error, dat_datctl.options & ~POSIX_SUPPORTED_MATCH_OPTIONS); msg = ""; } if ((dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS) != 0 || (dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2) != 0) { - show_controls(dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS, + show_controls(clr_test_error, dat_datctl.control & ~POSIX_SUPPORTED_MATCH_CONTROLS, dat_datctl.control2 & ~POSIX_SUPPORTED_MATCH_CONTROLS2, msg); msg = ""; } @@ -8539,7 +8729,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0) pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * dat_datctl.oveccount); if (pmatch == NULL) { - fprintf(outfile, "** Failed to get memory for recording matching " + cprintf(clr_test_error, "** Failed to get memory for recording matching " "information (size set = %du)\n", dat_datctl.oveccount); return PR_OK; } @@ -8561,12 +8751,12 @@ if ((pat_patctl.control & CTL_POSIX) != 0) if (rc != 0) { (void)regerror(rc, &preg, (char *)pbuffer8, pbuffer8_size); - fprintf(outfile, "No match: POSIX code %d: %s\n", rc, pbuffer8); + cprintf(clr_api_error, "No match: POSIX code %d: %s\n", rc, pbuffer8); } else if ((pat_patctl.control & CTL_POSIX_NOSUB) != 0) - fprintf(outfile, "Matched with REG_NOSUB\n"); + cprintf(clr_output, "Matched with REG_NOSUB\n"); else if (dat_datctl.oveccount == 0) - fprintf(outfile, "Matched without capture\n"); + cprintf(clr_output, "Matched without capture\n"); else { size_t i, j; @@ -8578,26 +8768,26 @@ if ((pat_patctl.control & CTL_POSIX) != 0) PCRE2_SIZE start = pmatch[i].rm_so; PCRE2_SIZE end = pmatch[i].rm_eo; for (j = last_printed + 1; j < i; j++) - fprintf(outfile, "%2d: \n", (int)j); + cprintf(clr_api_error, "%2d: \n", (int)j); last_printed = i; if (start > end) { start = pmatch[i].rm_eo; end = pmatch[i].rm_so; - fprintf(outfile, "Start of matched string is beyond its end - " + cprintf(clr_api_error, "Start of matched string is beyond its end - " "displaying from end to start.\n"); } - fprintf(outfile, "%2d: ", (int)i); - PCHARSV(pp, start, end - start, utf, outfile); + cprintf(clr_output, "%2d: ", (int)i); + PCHARSV(clr_output, pp, start, end - start, utf, outfile); fprintf(outfile, "\n"); if ((i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0) || (dat_datctl.control & CTL_ALLAFTERTEXT) != 0) { - fprintf(outfile, "%2d+ ", (int)i); + cprintf(clr_output, "%2d+ ", (int)i); /* Note: don't use the start/end variables here because we want to show the text from what is reported as the end. */ - PCHARSV(pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile); + PCHARSV(clr_output, pp, pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf, outfile); fprintf(outfile, "\n"); } } } @@ -8611,7 +8801,7 @@ if ((pat_patctl.control & CTL_POSIX) != 0) modifiers. */ if (dat_datctl.startend[0] != CFORE_UNSET) - fprintf(outfile, "** \\=posix_startend ignored for non-POSIX matching\n"); + cprintf(clr_test_error, "** \\=posix_startend ignored for non-POSIX matching\n"); /* ALLUSEDTEXT is not supported with JIT, but JIT is not used with DFA matching, even if the JIT compiler was used. */ @@ -8619,7 +8809,7 @@ matching, even if the JIT compiler was used. */ if ((dat_datctl.control & (CTL_ALLUSEDTEXT|CTL_DFA)) == CTL_ALLUSEDTEXT && FLD(compiled_code, executable_jit) != NULL) { - fprintf(outfile, "** Showing all consulted text is not supported by JIT: ignored\n"); + cprintf(clr_test_error, "** Showing all consulted text is not supported by JIT: ignored\n"); dat_datctl.control &= ~CTL_ALLUSEDTEXT; } @@ -8641,7 +8831,7 @@ show_memory = (dat_datctl.control & CTL_MEMORY) != 0; if (show_memory && (pat_patctl.control & dat_datctl.control & CTL_NULLCONTEXT) != 0) - fprintf(outfile, "** \\=memory requires either a pattern or a subject " + cprintf(clr_test_error, "** \\=memory requires either a pattern or a subject " "context: ignored\n"); /* Create and assign a JIT stack if requested. */ @@ -8698,7 +8888,7 @@ else if (CASTVAR(void *, match_data) == NULL) { - fprintf(outfile, "** Failed to get memory for recording matching " + cprintf(clr_test_error, "** Failed to get memory for recording matching " "information (size requested: %d)\n", dat_datctl.oveccount); max_oveccount = 0; return PR_OK; @@ -8723,24 +8913,27 @@ PCRE2_GET_OVECTOR_COUNT(oveccount, match_data); if (dat_datctl.replacement[0] != 0 && (dat_datctl.control & CTL_DFA) != 0) { - fprintf(outfile, "** Ignored for DFA matching: replace\n"); + cprintf(clr_test_error, "** Ignored for DFA matching: replace\n"); dat_datctl.replacement[0] = 0; } /* If a replacement string is provided, call pcre2_substitute() instead of or after one of the matching functions. First we have to convert the replacement -string to the appropriate width. */ +string to the appropriate width. This also needs to be done if a seperate +substitute_subject string is defined.*/ if (dat_datctl.replacement[0] != 0) { int rc; uint8_t *pr; uint8_t rbuffer[REPLACE_BUFFSIZE]; + uint8_t sbuffer[REPLACE_BUFFSIZE]; uint8_t nbuffer[REPLACE_BUFFSIZE]; uint8_t *rbptr; - uint32_t xoptions; + uint8_t *sbptr; + uint32_t xoptions; /* Options for substitute calls*/ uint32_t emoption; /* External match option */ - PCRE2_SIZE j, rlen, nsize, nsize_input, erroroffset; + PCRE2_SIZE j, rlen, slen, sblen, nsize, nsize_input, erroroffset; BOOL badutf = FALSE; #ifdef SUPPORT_PCRE2_8 @@ -8759,10 +8952,139 @@ if (dat_datctl.replacement[0] != 0) for (j = 0; j < 2*oveccount; j++) ovector[j] = JUNK_OFFSET; if (timeitm) - fprintf(outfile, "** Timing is not supported with replace: ignored\n"); + cprintf(clr_test_error, "** Timing is not supported with replace: ignored\n"); if ((dat_datctl.control & CTL_ALTGLOBAL) != 0) - fprintf(outfile, "** Altglobal is not supported with replace: ignored\n"); + cprintf(clr_test_error, "** Altglobal is not supported with replace: ignored\n"); + + /* This is done twice, once for the replacement, and once for the substitute string */ + for (int i = 0; i < 2; i++) + { + BOOL replace = i == 0; + + if (!replace && dat_datctl.substitute_subject[0] == 0xFF) + { + /* We aren't using a substitute_subject at all */ + sbptr = ((dat_datctl.control2 & CTL2_NULL_SUBSTITUTE_SUBJECT) == 0)? pp : NULL; + slen = arg_ulen; + break; + } + + SETCASTPTR(r, replace ? rbuffer : sbuffer); /* Sets r8, r16, or r32, as appropriate. */ + pr = replace ? dat_datctl.replacement : dat_datctl.substitute_subject; + + /* If the replacement starts with '[]' we interpret that as length + value for the replacement subject buffer. (We do not do this for the substitute_subject)*/ + + if (replace) + { + nsize = REPLACE_BUFFSIZE/code_unit_size; + if (*pr == '[') + { + PCRE2_SIZE n = 0; + while ((c = *(++pr)) >= '0' && c <= '9') n = n * 10 + (c - '0'); + if (*pr++ != ']') + { + cprintf(clr_test_error, "Bad buffer size in replacement string\n"); + return PR_OK; + } + if (n > nsize) + { + cprintf(clr_test_error, "Replacement buffer setting (%" SIZ_FORM ") is too " + "large (max %" SIZ_FORM ")\n", n, nsize); + return PR_OK; + } + nsize = n; + } + } + /* Now copy the replacement/substitute subject string to a buffer of the appropriate width. No + escape processing is done for replacements/substitute subjects. In UTF mode, check for an invalid + UTF-8 input string, and if it is invalid, just copy its code units without + UTF interpretation. This provides a means of checking that an invalid string + is detected. Otherwise, UTF-8 can be used to include wide characters in a + replacement/substitute subject. */ + + if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset); + + /* Not UTF or invalid UTF-8: just copy the code units. */ + + if (!utf || badutf) + { + while ((c = *pr++) != 0) + { +#if defined(EBCDIC) && !EBCDIC_IO + c = ascii_to_ebcdic(c); +#endif +#ifdef SUPPORT_PCRE2_8 + if (test_mode == PCRE8_MODE) *r8++ = c; +#endif +#ifdef SUPPORT_PCRE2_16 + if (test_mode == PCRE16_MODE) *r16++ = c; +#endif +#ifdef SUPPORT_PCRE2_32 + if (test_mode == PCRE32_MODE) *r32++ = c; +#endif + } + } + + /* Valid UTF-8 replacement/substitute subject string */ + + else while ((c = *pr++) != 0) + { + if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); } + +#ifdef SUPPORT_PCRE2_8 + if (test_mode == PCRE8_MODE) r8 += ord_to_utf8(c, r8); +#endif + +#ifdef SUPPORT_PCRE2_16 + if (test_mode == PCRE16_MODE) + { + if (c >= 0x10000u) + { + c-= 0x10000u; + *r16++ = 0xd800 | (c >> 10); + *r16++ = 0xdc00 | (c & 0x3ff); + } + else *r16++ = c; + } +#endif + +#ifdef SUPPORT_PCRE2_32 + if (test_mode == PCRE32_MODE) *r32++ = c; +#endif + } + + SET(*r, 0); + + if (replace) + { + if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0) + rlen = PCRE2_ZERO_TERMINATED; + else + rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size; + } + else + { + sblen = CASTVAR(uint8_t *, r) - sbuffer; /* length in bytes */ + if ((dat_datctl.control2 & CTL2_SUBSTITUTE_ZERO_TERMINATE) != 0) + { + sblen += code_unit_size; /* add the size of terminating null */ + slen = PCRE2_ZERO_TERMINATED; + } + else + { + slen = sblen/code_unit_size; + } + } + + /* There is a special option to set the replacement/substitute_subject to NULL in order to test + that case. */ + if (replace) + rbptr = ((dat_datctl.control2 & CTL2_NULL_REPLACEMENT) == 0)? rbuffer : NULL; + else + sbptr = ((dat_datctl.control2 & CTL2_NULL_SUBSTITUTE_SUBJECT) == 0)? sbuffer : NULL;; + } /* Check for a test that does substitution after an initial external match. If this is set, we run the external match, but leave the interpretation of @@ -8773,6 +9095,27 @@ if (dat_datctl.replacement[0] != 0) if (emoption != 0) { + if ((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERWRITE) != 0) + { + if (dbuffer_size < REPLACE_BUFFSIZE) + { + cprintf(clr_test_error, "** Internal error: subject buffer size (%" SIZ_FORM ")" + "is smaller than substitute_subject buffer size (%" SIZ_FORM ")\n", + dbuffer_size, (PCRE2_SIZE)REPLACE_BUFFSIZE); + return PR_OK; + } + if (sblen > blen) + { + /* move the original subject to the left, so theres enough space + for the substitute_subject */ +#ifdef SUPPORT_VALGRIND + // cancel out the VALGRIND_MAKE_MEM_NOACCESS that was used previously + VALGRIND_MAKE_MEM_UNDEFINED(pp - (sblen - blen), sblen - blen); +#endif + pp = memmove(pp - (sblen - blen), pp, blen); + } + } + if ((pat_patctl.control & CTL_JITFAST) != 0) { PCRE2_JIT_MATCH(rc, compiled_code, pp, arg_ulen, dat_datctl.offset, @@ -8783,6 +9126,13 @@ if (dat_datctl.replacement[0] != 0) PCRE2_MATCH(rc, compiled_code, pp, arg_ulen, dat_datctl.offset, dat_datctl.options, match_data, use_dat_context); } + +#ifdef SUPPORT_VALGRIND + /* if we are going to pass a new subject pointer to substitute, mark the original one as freed*/ + if ((dat_datctl.substitute_subject[0] != 0xFF) && ((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERWRITE) == 0)) { + VALGRIND_MAKE_MEM_NOACCESS(pp, blen); + } +#endif } xoptions = emoption | @@ -8800,95 +9150,29 @@ if (dat_datctl.replacement[0] != 0) PCRE2_SUBSTITUTE_UNKNOWN_UNSET) | (((dat_datctl.control2 & CTL2_SUBSTITUTE_UNSET_EMPTY) == 0)? 0 : PCRE2_SUBSTITUTE_UNSET_EMPTY); - - SETCASTPTR(r, rbuffer); /* Sets r8, r16, or r32, as appropriate. */ - pr = dat_datctl.replacement; - - /* If the replacement starts with '[]' we interpret that as length - value for the replacement buffer. */ - - nsize = REPLACE_BUFFSIZE/code_unit_size; - if (*pr == '[') + /* susbistute_options wasn't provided */ + if (dat_datctl.substitute_options[0] == 0xFF) { - PCRE2_SIZE n = 0; - while ((c = *(++pr)) >= '0' && c <= '9') n = n * 10 + (c - '0'); - if (*pr++ != ']') - { - fprintf(outfile, "Bad buffer size in replacement string\n"); - return PR_OK; - } - if (n > nsize) - { - fprintf(outfile, "Replacement buffer setting (%" SIZ_FORM ") is too " - "large (max %" SIZ_FORM ")\n", n, nsize); - return PR_OK; - } - nsize = n; + /* Use same options as match */ + xoptions |= dat_datctl.options; } - - /* Now copy the replacement string to a buffer of the appropriate width. No - escape processing is done for replacements. In UTF mode, check for an invalid - UTF-8 input string, and if it is invalid, just copy its code units without - UTF interpretation. This provides a means of checking that an invalid string - is detected. Otherwise, UTF-8 can be used to include wide characters in a - replacement. */ - - if (utf) badutf = valid_utf(pr, strlen((const char *)pr), &erroroffset); - - /* Not UTF or invalid UTF-8: just copy the code units. */ - - if (!utf || badutf) - { - while ((c = *pr++) != 0) - { -#if defined(EBCDIC) && !EBCDIC_IO - c = ascii_to_ebcdic(c); -#endif -#ifdef SUPPORT_PCRE2_8 - if (test_mode == PCRE8_MODE) *r8++ = c; -#endif -#ifdef SUPPORT_PCRE2_16 - if (test_mode == PCRE16_MODE) *r16++ = c; -#endif -#ifdef SUPPORT_PCRE2_32 - if (test_mode == PCRE32_MODE) *r32++ = c; -#endif - } - } - - /* Valid UTF-8 replacement string */ - - else while ((c = *pr++) != 0) + /* have to pass substitute_options */ + else for (const uint8_t *opt_start = &dat_datctl.substitute_options[0], *opt_end = opt_start; + opt_start != 0; opt_start = opt_end) + { + while (*opt_end != '|' && *opt_end != 0) opt_end++; + int index = scan_modifiers(opt_start, opt_start - opt_end); + /* Either no modifier was found with the name, or it isn't an option + that is supported for both pcre2_match and pcre2_substitute. */ + if (index < 0 || modlist[index].type != MOD_OPT || (modlist[index].value & ( + PCRE2_ANCHORED | PCRE2_NO_UTF_CHECK | PCRE2_NO_JIT | PCRE2_ENDANCHORED | + PCRE2_NOTBOL | PCRE2_NOTEOL | PCRE2_NOTEMPTY | PCRE2_NOTEMPTY_ATSTART)) == 0) { - if (HASUTF8EXTRALEN(c)) { GETUTF8INC(c, pr); } - -#ifdef SUPPORT_PCRE2_8 - if (test_mode == PCRE8_MODE) r8 += ord_to_utf8(c, r8); -#endif - -#ifdef SUPPORT_PCRE2_16 - if (test_mode == PCRE16_MODE) - { - if (c >= 0x10000u) - { - c-= 0x10000u; - *r16++ = 0xd800 | (c >> 10); - *r16++ = 0xdc00 | (c & 0x3ff); - } - else *r16++ = c; - } -#endif - -#ifdef SUPPORT_PCRE2_32 - if (test_mode == PCRE32_MODE) *r32++ = c; -#endif + cprintf(clr_test_error, "** Unsupported option in substitute_options \"%.*s\"\n", (int)(opt_end-opt_start), opt_start); + return PR_OK; } - - SET(*r, 0); - if ((dat_datctl.control & CTL_ZERO_TERMINATE) != 0) - rlen = PCRE2_ZERO_TERMINATED; - else - rlen = (CASTVAR(uint8_t *, r) - rbuffer)/code_unit_size; + xoptions |= modlist[index].value; + } if ((dat_datctl.control2 & CTL2_SUBSTITUTE_CALLOUT) != 0) { @@ -8908,16 +9192,33 @@ if (dat_datctl.replacement[0] != 0) PCRE2_SET_SUBSTITUTE_CASE_CALLOUT_NULL(dat_context); /* No callout */ } - /* There is a special option to set the replacement to NULL in order to test - that case. */ + if ((dat_datctl.control2 & CTL2_SUBSTITUTE_OVERWRITE) != 0) + { +#ifdef SUPPORT_VALGRIND + if (sblen < blen) + { + /* substitute_subject is smaller than the original, so mark the extra + bytes as inaccessible */ + VALGRIND_MAKE_MEM_NOACCESS(pp + sblen, blen - sblen); + } + else + { + /* substitute_subject is longer, so mark the extra bytes as accessible */ + VALGRIND_MAKE_MEM_UNDEFINED(pp + blen, sblen - blen); + } +#endif - rbptr = ((dat_datctl.control2 & CTL2_NULL_REPLACEMENT) == 0)? rbuffer : NULL; + /* copy the substitute_subject, making sure to include the terminating null + if pcre2_substitute needs it */ + sbptr = memcpy(pp, sbuffer, sblen); + } if (malloc_testing) CLEAR_HEAP_FRAMES(); mallocs_called = 0; nsize_input = nsize; - PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset, - dat_datctl.options|xoptions, match_data, use_dat_context, + PCRE2_SUBSTITUTE(rc, compiled_code, sbptr, slen, + dat_datctl.substitute_offset < PCRE2_SIZE_MAX ? dat_datctl.substitute_offset : dat_datctl.offset, + xoptions, match_data, use_dat_context, rbptr, rlen, nbuffer, &nsize); /* For malloc testing, we repeat the substitution. */ @@ -8930,14 +9231,14 @@ if (dat_datctl.replacement[0] != 0) mallocs_until_failure = i; nsize = nsize_input; - PCRE2_SUBSTITUTE(rc, compiled_code, pp, arg_ulen, dat_datctl.offset, - dat_datctl.options|xoptions, match_data, use_dat_context, + PCRE2_SUBSTITUTE(rc, compiled_code, sbptr, slen, dat_datctl.offset, + xoptions, match_data, use_dat_context, rbptr, rlen, nbuffer, &nsize); mallocs_until_failure = INT_MAX; if (i < target_mallocs && rc != PCRE2_ERROR_NOMEMORY) { - fprintf(outfile, "** malloc() Substitution test did not fail as expected (%d)\n", + cprintf(clr_test_error, "** malloc() Substitution test did not fail as expected (%d)\n", rc); return PR_ABEND; } @@ -8946,38 +9247,51 @@ if (dat_datctl.replacement[0] != 0) if (rc < 0) { - fprintf(outfile, "Failed: error %d", rc); + /* A UTF error may be in the subject, or the replacement string, pcre2_substitute doesn't say which one */ + BOOL in_either = rc <= PCRE2_ERROR_UTF8_ERR1 && rc >= PCRE2_ERROR_UTF32_ERR2; + BOOL in_subject = rc == PCRE2_ERROR_BADUTFCAPTURE; + + cprintf(clr_api_error, "Failed: error %d", rc); if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET) - fprintf(outfile, " at offset %ld in replacement", (long int)nsize); - fprintf(outfile, ": "); + cprintf(clr_api_error, " at offset %ld in %s", (long int)nsize, in_either ? "subject or replacement" : in_subject ? "subject" : "replacement"); + + cprintf(clr_api_error, ": "); if (!print_error_message(rc, "", "")) return PR_ABEND; if (rc == PCRE2_ERROR_NOMEMORY && (xoptions & PCRE2_SUBSTITUTE_OVERFLOW_LENGTH) != 0) - fprintf(outfile, ": %ld code units are needed", (long int)nsize); + cprintf(clr_api_error, ": %ld code units are needed", (long int)nsize); if (rc != PCRE2_ERROR_NOMEMORY && nsize != PCRE2_UNSET) { - PCRE2_SIZE full_rlen = (rlen != PCRE2_ZERO_TERMINATED)? rlen : - STRLEN(rbptr); - - fprintf(outfile, "\n here: "); - if (nsize > 0) - { - PTRUNCV(rbptr, full_rlen, nsize, TRUE, utf, outfile); - fprintf(outfile, " "); - } - fprintf(outfile, "|<--|"); - if (nsize < full_rlen) + for (uint8_t *bptr = sbptr; bptr != NULL; bptr = bptr == sbptr ? rbptr : NULL) { - fprintf(outfile, " "); - PTRUNCV(rbptr, full_rlen, nsize, FALSE, utf, outfile); + if (bptr == sbptr ? in_subject || in_either : !in_subject || in_either) + { + PCRE2_SIZE len = bptr == sbptr ? slen : rlen; + + PCRE2_SIZE full_len = (len != PCRE2_ZERO_TERMINATED)? len : + STRLEN(bptr); + + cprintf(clr_api_error, "\n %s here: ", bptr == rbptr && in_either ? "or" : " "); + if (nsize > 0) + { + PTRUNCV(clr_api_error, bptr, full_len, nsize, TRUE, utf, outfile); + cprintf(clr_api_error, " "); + } + cprintf(clr_api_error, "|<--|"); + if (nsize < full_len) + { + cprintf(clr_api_error, " "); + PTRUNCV(clr_api_error, bptr, full_len, nsize, FALSE, utf, outfile); + } + } } } } else { - fprintf(outfile, "%2d: ", rc); - PCHARSV(nbuffer, 0, nsize, utf, outfile); + cprintf(clr_output, "%2d: ", rc); + PCHARSV(clr_output, nbuffer, 0, nsize, utf, outfile); } fprintf(outfile, "\n"); @@ -9024,7 +9338,7 @@ for (gmatched = 0;; gmatched++) { if ((dat_datctl.options & PCRE2_DFA_RESTART) != 0) { - fprintf(outfile, "Timing DFA restarts is not supported\n"); + cprintf(clr_test_error, "Timing DFA restarts is not supported\n"); return PR_OK; } if (dfa_workspace == NULL) @@ -9060,7 +9374,7 @@ for (gmatched = 0;; gmatched++) } } total_match_time += (time_taken = clock() - start_time); - fprintf(outfile, "Match time %7.4f microseconds\n", + cprintf(clr_profiling, "Match time %7.4f microseconds\n", ((1000000 / CLOCKS_PER_SEC) * (double)time_taken) / timeitm); } @@ -9093,7 +9407,7 @@ for (gmatched = 0;; gmatched++) if (capcount == 0) { - fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n"); + cprintf(clr_test_error, "Matched, but offsets vector is too small to show all matches\n"); capcount = dat_datctl.oveccount; } } @@ -9132,7 +9446,7 @@ for (gmatched = 0;; gmatched++) use_dat_context, dfa_workspace, DFA_WS_DIMENSION); if (capcount == 0) { - fprintf(outfile, "Matched, but offsets vector is too small to show all matches\n"); + cprintf(clr_api_error, "Matched, but offsets vector is too small to show all matches\n"); capcount = dat_datctl.oveccount; } } @@ -9146,7 +9460,7 @@ for (gmatched = 0;; gmatched++) dat_datctl.options | g_notempty, match_data, use_dat_context); if (capcount == 0) { - fprintf(outfile, "Matched, but too many substrings\n"); + cprintf(clr_api_error, "Matched, but too many substrings\n"); capcount = dat_datctl.oveccount; } } @@ -9186,7 +9500,7 @@ for (gmatched = 0;; gmatched++) if (i < target_mallocs && capcount != PCRE2_ERROR_NOMEMORY) { - fprintf(outfile, "** malloc() match test did not fail as expected (%d)\n", + cprintf(clr_test_error, "** malloc() match test did not fail as expected (%d)\n", capcount); return PR_ABEND; } @@ -9204,7 +9518,7 @@ for (gmatched = 0;; gmatched++) PCRE2_NEXT_MATCH(rc_nextmatch, match_data, &tmp_offset, &tmp_options); if (rc_nextmatch || tmp_offset != 0xcd || tmp_options != 0xcd) { - fprintf(outfile, "** unexpected pcre2_next_match() for rc < 0\n"); + cprintf(clr_test_error, "** unexpected pcre2_next_match() for rc < 0\n"); return PR_ABEND; } } @@ -9229,7 +9543,7 @@ for (gmatched = 0;; gmatched++) if ((unsigned)capcount > oveccount) /* Check for lunatic return value */ { - fprintf(outfile, + cprintf(clr_test_error, "** PCRE2 error: returned count %d is too big for ovector count %d\n", capcount, oveccount); return PR_ABEND; @@ -9242,15 +9556,15 @@ for (gmatched = 0;; gmatched++) (pat_patctl.control & CTL_JITFAST) == 0) { if ((FLD(match_data, flags) & PCRE2_MD_COPIED_SUBJECT) == 0) - fprintf(outfile, + cprintf(clr_test_error, "** PCRE2 error: flag not set after copy_matched_subject\n"); if (CASTFLD(const void *, match_data, subject) == pp) - fprintf(outfile, + cprintf(clr_test_error, "** PCRE2 error: copy_matched_subject has not copied\n"); if (memcmp(CASTFLD(const void *, match_data, subject), pp, ulen) != 0) - fprintf(outfile, + cprintf(clr_test_error, "** PCRE2 error: copy_matched_subject mismatch\n"); } @@ -9275,7 +9589,7 @@ for (gmatched = 0;; gmatched++) pp + code_unit_size * ovector[0] == ovecsave[0] && pp + code_unit_size * ovector[1] == ovecsave[1]) { - fprintf(outfile, "global repeat returned the same match as previous\n"); + cprintf(clr_api_error, "global repeat returned the same match as previous\n"); goto NEXT_MATCH; } @@ -9293,7 +9607,7 @@ for (gmatched = 0;; gmatched++) (ovector[1] == ovector[0] && ovecsave[1] != ovecsave[0] && pp + code_unit_size * ovector[1] == ovecsave[1]))) { - fprintf(outfile, + cprintf(clr_test_error, "** PCRE2 error: global repeat did not make progress\n"); return PR_ABEND; } @@ -9329,17 +9643,17 @@ for (gmatched = 0;; gmatched++) { start = ovector[i+1]; end = ovector[i]; - fprintf(outfile, "Start of matched string is beyond its end - " + cprintf(clr_api_error, "Start of matched string is beyond its end - " "displaying from end to start.\n"); } - fprintf(outfile, "%2d: ", i/2); + cprintf(clr_output, "%2d: ", i/2); /* Check for an unset group */ if (start == PCRE2_UNSET && end == PCRE2_UNSET) { - fprintf(outfile, "\n"); + cprintf(clr_api_error, "\n"); continue; } @@ -9353,9 +9667,9 @@ for (gmatched = 0;; gmatched++) if (((dat_datctl.control & CTL_DFA) != 0 || i >= (int)(2*maxcapcount + 2)) && start == JUNK_OFFSET && end == JUNK_OFFSET) - fprintf(outfile, "\n"); + cprintf(clr_output, "\n"); else - fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n", + cprintf(clr_test_error, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n", (unsigned long int)start, (unsigned long int)end); continue; } @@ -9383,15 +9697,15 @@ for (gmatched = 0;; gmatched++) if (showallused) { - PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile); - PCHARS(lmiddle, pp, start, end - start, utf, outfile); - PCHARS(lright, pp, end, rightchar - end, utf, outfile); + PCHARS(clr_output, lleft, pp, leftchar, start - leftchar, utf, outfile); + PCHARS(clr_output, lmiddle, pp, start, end - start, utf, outfile); + PCHARS(clr_output, lright, pp, end, rightchar - end, utf, outfile); if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) - fprintf(outfile, " (JIT)"); - fprintf(outfile, "\n "); - for (j = 0; j < lleft; j++) fprintf(outfile, "<"); - for (j = 0; j < lmiddle; j++) fprintf(outfile, " "); - for (j = 0; j < lright; j++) fprintf(outfile, ">"); + cprintf(clr_output, " (JIT)"); + cprintf(clr_output, "\n "); + for (j = 0; j < lleft; j++) cprintf(clr_output, "<"); + for (j = 0; j < lmiddle; j++) cprintf(clr_output, " "); + for (j = 0; j < lright; j++) cprintf(clr_output, ">"); } /* When a pattern contains \K, the start of match position may be @@ -9402,14 +9716,14 @@ for (gmatched = 0;; gmatched++) { PCRE2_SIZE startchar; PCRE2_GET_STARTCHAR(startchar, match_data); - PCHARS(lleft, pp, startchar, start - startchar, utf, outfile); - PCHARSV(pp, start, end - start, utf, outfile); + PCHARS(clr_output, lleft, pp, startchar, start - startchar, utf, outfile); + PCHARSV(clr_output, pp, start, end - start, utf, outfile); if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) - fprintf(outfile, " (JIT)"); + cprintf(clr_output, " (JIT)"); if (startchar != start) { - fprintf(outfile, "\n "); - for (j = 0; j < lleft; j++) fprintf(outfile, "^"); + cprintf(clr_output, "\n "); + for (j = 0; j < lleft; j++) cprintf(clr_output, "^"); } } @@ -9417,9 +9731,9 @@ for (gmatched = 0;; gmatched++) else { - PCHARSV(pp, start, end - start, utf, outfile); + PCHARSV(clr_output, pp, start, end - start, utf, outfile); if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) - fprintf(outfile, " (JIT)"); + cprintf(clr_output, " (JIT)"); } } @@ -9427,7 +9741,7 @@ for (gmatched = 0;; gmatched++) else { - PCHARSV(pp, start, end - start, utf, outfile); + PCHARSV(clr_output, pp, start, end - start, utf, outfile); } fprintf(outfile, "\n"); @@ -9438,8 +9752,8 @@ for (gmatched = 0;; gmatched++) if ((dat_datctl.control & CTL_ALLAFTERTEXT) != 0 || (i == 0 && (dat_datctl.control & CTL_AFTERTEXT) != 0)) { - fprintf(outfile, "%2d+ ", i/2); - PCHARSV(pp, ovector[i+1], ulen - ovector[i+1], utf, outfile); + cprintf(clr_output, "%2d+ ", i/2); + PCHARSV(clr_output, pp, ovector[i+1], ulen - ovector[i+1], utf, outfile); fprintf(outfile, "\n"); } } @@ -9449,8 +9763,8 @@ for (gmatched = 0;; gmatched++) if ((dat_datctl.control & CTL_MARK) != 0 && TESTFLD(match_data, mark, !=, NULL)) { - fprintf(outfile, "MK: "); - PCHARSV(CASTFLD(const void *, match_data, mark), -1, -1, utf, outfile); + cprintf(clr_output, "MK: "); + PCHARSV(clr_output, CASTFLD(const void *, match_data, mark), -1, -1, utf, outfile); fprintf(outfile, "\n"); } @@ -9477,34 +9791,34 @@ for (gmatched = 0;; gmatched++) } else leftchar = ovector[0]; - fprintf(outfile, "Partial match"); + cprintf(clr_api_error, "Partial match"); if ((dat_datctl.control & CTL_MARK) != 0 && TESTFLD(match_data, mark, !=, NULL)) { - fprintf(outfile, ", mark="); - PCHARS(rubriclength, CASTFLD(const void *, match_data, mark), -1, -1, utf, + cprintf(clr_api_error, ", mark="); + PCHARS(clr_api_error, rubriclength, CASTFLD(const void *, match_data, mark), -1, -1, utf, outfile); rubriclength += 7; } - fprintf(outfile, ": "); + cprintf(clr_api_error, ": "); rubriclength += 15; - PCHARS(backlength, pp, leftchar, ovector[0] - leftchar, utf, outfile); - PCHARSV(pp, ovector[0], ovector[1] - ovector[0], utf, outfile); + PCHARS(clr_api_error, backlength, pp, leftchar, ovector[0] - leftchar, utf, outfile); + PCHARSV(clr_api_error, pp, ovector[0], ovector[1] - ovector[0], utf, outfile); if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) - fprintf(outfile, " (JIT)"); + cprintf(clr_api_error, " (JIT)"); fprintf(outfile, "\n"); if (backlength != 0) { - for (int i = 0; i < rubriclength; i++) fprintf(outfile, " "); - for (int i = 0; i < backlength; i++) fprintf(outfile, "<"); + for (int i = 0; i < rubriclength; i++) cprintf(clr_api_error, " "); + for (int i = 0; i < backlength; i++) cprintf(clr_api_error, "<"); fprintf(outfile, "\n"); } if (ulen != ovector[1]) - fprintf(outfile, "** ovector[1] is not equal to the subject length: " + cprintf(clr_test_error, "** ovector[1] is not equal to the subject length: " "%ld != %ld\n", (unsigned long int)ovector[1], (unsigned long int)ulen); /* Process copy/get strings */ @@ -9529,15 +9843,15 @@ for (gmatched = 0;; gmatched++) case PCRE2_ERROR_NOMATCH: if (gmatched == 0) { - fprintf(outfile, "No match"); + cprintf(clr_api_error, "No match"); if ((dat_datctl.control & CTL_MARK) != 0 && TESTFLD(match_data, mark, !=, NULL)) { - fprintf(outfile, ", mark = "); - PCHARSV(CASTFLD(const void *, match_data, mark), -1, -1, utf, outfile); + cprintf(clr_api_error, ", mark = "); + PCHARSV(clr_api_error, CASTFLD(const void *, match_data, mark), -1, -1, utf, outfile); } if ((pat_patctl.control & CTL_JITVERIFY) != 0 && jit_was_used) - fprintf(outfile, " (JIT)"); + cprintf(clr_api_error, " (JIT)"); fprintf(outfile, "\n"); /* "allvector" outputs the entire vector */ @@ -9548,18 +9862,18 @@ for (gmatched = 0;; gmatched++) break; case PCRE2_ERROR_BADUTFOFFSET: - fprintf(outfile, "Error %d (bad UTF-%d offset)\n", capcount, test_mode); + cprintf(clr_api_error, "Error %d (bad UTF-%d offset)\n", capcount, test_mode); break; default: - fprintf(outfile, "Failed: error %d: ", capcount); + cprintf(clr_api_error, "Failed: error %d: ", capcount); if (!print_error_message(capcount, "", "")) return PR_ABEND; if (capcount <= PCRE2_ERROR_UTF8_ERR1 && capcount >= PCRE2_ERROR_UTF32_ERR2) { PCRE2_SIZE startchar; PCRE2_GET_STARTCHAR(startchar, match_data); - fprintf(outfile, " at offset %" SIZ_FORM, startchar); + cprintf(clr_api_error, " at offset %" SIZ_FORM, startchar); } fprintf(outfile, "\n"); break; @@ -9719,6 +10033,7 @@ printf(" -32 use the 32-bit library\n"); printf(" -ac set default pattern modifier PCRE2_AUTO_CALLOUT\n"); printf(" -AC as -ac, but also set subject 'callout_extra' modifier\n"); printf(" -b set default pattern modifier 'fullbincode'\n"); +printf(" -c show output in colour\n"); printf(" -C show PCRE2 compile-time options and exit\n"); printf(" -C arg show a specific compile-time option and exit with its\n"); printf(" value if numeric (else 0). The arg can be:\n"); @@ -9789,7 +10104,7 @@ if (arg != NULL && arg[0] != '-') if (i >= COPTLISTCOUNT) { - fprintf(stderr, "** Unknown -C option \"%s\"\n", arg); + fatal_printf("** Unknown -C option \"%s\"\n", arg); return 0; } @@ -10030,7 +10345,7 @@ display_properties(BOOL wantscripts) { #ifndef SUPPORT_UNICODE (void)wantscripts; -printf("** This version of PCRE2 was compiled without Unicode support.\n"); +fatal_printf("** This version of PCRE2 was compiled without Unicode support.\n"); #else uint16_t seentypes[1024]; @@ -10268,7 +10583,7 @@ preprocessor. */ if (PO(options) != DO(options) || PO(control) != DO(control) || PO(control2) != DO(control2)) { - fprintf(stderr, "** Coding error: " + fatal_printf("** Coding error: " "options and control offsets for pattern and data must be the same.\n"); return 1; } @@ -10289,7 +10604,7 @@ if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) != PCRE2_CONFIG(PCRE2_CONFIG_UNICODE, NULL) != sizeof(uint32_t) || PCRE2_CONFIG(PCRE2_CONFIG_MATCHLIMIT, NULL) != sizeof(uint32_t)) { - fprintf(stderr, "** Error in pcre2_config(): bad length\n"); + fatal_printf("** Error in pcre2_config(): bad length\n"); return 1; } @@ -10298,7 +10613,7 @@ if (PCRE2_CONFIG(PCRE2_CONFIG_VERSION, NULL) != if (PCRE2_CONFIG(999, NULL) != PCRE2_ERROR_BADOPTION || PCRE2_CONFIG(999, &temp) != PCRE2_ERROR_BADOPTION) { - fprintf(stderr, "** Error in pcre2_config(): bad option not diagnosed\n"); + fatal_printf("** Error in pcre2_config(): bad option not diagnosed\n"); return 1; } @@ -10329,6 +10644,7 @@ locale_name[0] = 0; memset(&def_patctl, 0, sizeof(patctl)); def_patctl.convert_type = CONVERT_UNSET; +def_patctl.substitute_options[0] = 0xFF; /* tell's us to use the same options as match */ memset(&def_datctl, 0, sizeof(datctl)); def_datctl.oveccount = DEFAULT_OVECCOUNT; @@ -10337,6 +10653,9 @@ def_datctl.get_numbers[0] = -1; def_datctl.startend[0] = def_datctl.startend[1] = CFORE_UNSET; def_datctl.cerror[0] = def_datctl.cerror[1] = CFORE_UNSET; def_datctl.cfail[0] = def_datctl.cfail[1] = CFORE_UNSET; +def_datctl.substitute_subject[0] = 0xFF; /* tell's us to use the same subject as for match */ +def_datctl.substitute_offset = PCRE2_SIZE_MAX; /* tell's us to use the offset field instead */ +def_datctl.substitute_options[0] = 0xFF; /* tell's us to use the same options as match */ /* Scan command line options. */ @@ -10390,7 +10709,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) (void)pcre2_set_bsr_8(pat_context8, 999); (void)pcre2_set_newline_8(pat_context8, 999); #else - fprintf(stderr, + fatal_printf( "** This version of PCRE2 was built without 8-bit support\n"); exit(1); #endif @@ -10404,7 +10723,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) (void)pcre2_set_bsr_16(pat_context16, 999); (void)pcre2_set_newline_16(pat_context16, 999); #else - fprintf(stderr, + fatal_printf( "** This version of PCRE2 was built without 16-bit support\n"); exit(1); #endif @@ -10418,7 +10737,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) (void)pcre2_set_bsr_32(pat_context32, 999); (void)pcre2_set_newline_32(pat_context32, 999); #else - fprintf(stderr, + fatal_printf( "** This version of PCRE2 was built without 32-bit support\n"); exit(1); #endif @@ -10438,7 +10757,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) ((uli = strtoul(argv[op+1], &endptr, 10)), *endptr == 0)) { #if defined(_WIN32) || defined(WIN32) || defined(__HAIKU__) || defined(NATIVE_ZOS) || defined(__VMS) - fprintf(stderr, "pcre2test: -S is not supported on this OS\n"); + fatal_printf("pcre2test: -S is not supported on this OS\n"); exit(1); #else int rc = 0; @@ -10446,7 +10765,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) struct rlimit rlim, rlim_old; if (uli > INT32_MAX / (1024 * 1024)) { - fprintf(stderr, "** Argument for -S is too big\n"); + fatal_printf("** Argument for -S is too big\n"); exit(1); } stack_size = (uint32_t)uli; @@ -10455,15 +10774,15 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) rlim.rlim_cur = stack_size * 1024 * 1024; if (rlim.rlim_max != RLIM_INFINITY && rlim.rlim_cur > rlim.rlim_max) { - fprintf(stderr, + fatal_printf( "pcre2test: requested stack size %luMiB is greater than hard limit ", (unsigned long int)stack_size); if (rlim.rlim_max % (1024*1024) == 0) - fprintf(stderr, "%luMiB\n", (unsigned long)(rlim.rlim_max/(1024*1024))); + fatal_printf("%luMiB\n", (unsigned long)(rlim.rlim_max/(1024*1024))); else if (rlim.rlim_max % 1024 == 0) - fprintf(stderr, "%luKiB\n", (unsigned long)(rlim.rlim_max/1024)); + fatal_printf("%luKiB\n", (unsigned long)(rlim.rlim_max/1024)); else - fprintf(stderr, "%lu bytes\n", (unsigned long)(rlim.rlim_max)); + fatal_printf("%lu bytes\n", (unsigned long)(rlim.rlim_max)); exit(1); } if (rlim_old.rlim_cur != RLIM_INFINITY && rlim_old.rlim_cur <= INT32_MAX && @@ -10471,7 +10790,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) rc = setrlimit(RLIMIT_STACK, &rlim); if (rc != 0) { - fprintf(stderr, "pcre2test: setting stack size %luMiB failed: %s\n", + fatal_printf("pcre2test: setting stack size %luMiB failed: %s\n", (unsigned long int)stack_size, strerror(errno)); exit(1); } @@ -10489,6 +10808,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) } else if (strcmp(arg, "-ac") == 0) def_patctl.options |= PCRE2_AUTO_CALLOUT; else if (strcmp(arg, "-b") == 0) def_patctl.control |= CTL_FULLBINCODE; + else if (strcmp(arg, "-c") == 0) colour_on = TRUE; else if (strcmp(arg, "-d") == 0) def_patctl.control |= CTL_DEBUG; else if (strcmp(arg, "-dfa") == 0) def_datctl.control |= CTL_DFA; else if (strcmp(arg, "-i") == 0) def_patctl.control |= CTL_INFO; @@ -10499,7 +10819,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) else if (arg[4] == 'f') def_patctl.control |= CTL_JITFAST; def_patctl.jit = JIT_DEFAULT; /* full & partial */ #ifndef SUPPORT_JIT - fprintf(stderr, "** Warning: JIT support is not available: " + fatal_printf("** Warning: JIT support is not available: " "-jit[fast|verify] calls functions that do nothing.\n"); #endif } @@ -10515,12 +10835,12 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) { if (uli == 0) { - fprintf(stderr, "** Argument for %s must not be zero\n", arg); + fatal_printf("** Argument for %s must not be zero\n", arg); exit(1); } if (U32OVERFLOW(uli)) { - fprintf(stderr, "** Argument for %s is too big\n", arg); + fatal_printf("** Argument for %s is too big\n", arg); exit(1); } timeitm = (int)uli; @@ -10577,7 +10897,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) CHECK_VALUE_EXISTS: if (argc <= 2) { - fprintf(stderr, "** Missing value for %s\n", arg); + fatal_printf("** Missing value for %s\n", arg); yield = 1; goto EXIT; } @@ -10589,7 +10909,7 @@ while (argc > 1 && argv[op][0] == '-' && argv[op][1] != 0) else { - fprintf(stderr, "** Unknown or malformed option \"%s\"\n", arg); + fatal_printf("** Unknown or malformed option \"%s\"\n", arg); usage(); yield = 1; goto EXIT; @@ -10618,7 +10938,7 @@ least 128 code units, because it is used for retrieving error messages. */ pbuffer16 = (uint16_t *)malloc(pbuffer16_size); if (pbuffer16 == NULL) { - fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n", + fatal_printf("pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer16\n", pbuffer16_size); yield = 1; goto EXIT; @@ -10633,7 +10953,7 @@ least 128 code units, because it is used for retrieving error messages. */ pbuffer32 = (uint32_t *)malloc(pbuffer32_size); if (pbuffer32 == NULL) { - fprintf(stderr, "pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n", + fatal_printf("pcre2test: malloc(%" SIZ_FORM ") failed for pbuffer32\n", pbuffer32_size); yield = 1; goto EXIT; @@ -10643,38 +10963,39 @@ least 128 code units, because it is used for retrieving error messages. */ /* Loop along a list of error numbers. */ + outfile = stdout; for (;;) { li = strtol(arg_error, &endptr, 10); if (S32OVERFLOW(li) || (*endptr != 0 && *endptr != ',')) { - fprintf(stderr, "** \"%s\" is not a valid error number list\n", arg_error); + fatal_printf("** \"%s\" is not a valid error number list\n", arg_error); yield = 1; goto EXIT; } errcode = (int)li; - printf("Error %d: ", errcode); + cprintf(clr_api_error, "Error %d: ", errcode); PCRE2_GET_ERROR_MESSAGE(len, errcode); if (len < 0) { switch (len) { case PCRE2_ERROR_BADDATA: - printf("PCRE2_ERROR_BADDATA (unknown error number)"); + cprintf(clr_test_error, "PCRE2_ERROR_BADDATA (unknown error number)"); break; case PCRE2_ERROR_NOMEMORY: - printf("PCRE2_ERROR_NOMEMORY (buffer too small)"); + cprintf(clr_test_error, "PCRE2_ERROR_NOMEMORY (buffer too small)"); break; default: - printf("Unexpected return (%d) from pcre2_get_error_message()", len); + cprintf(clr_test_error, "Unexpected return (%d) from pcre2_get_error_message()", len); break; } } else { - PCHARSV(errorbuffer, 0, len, FALSE, stdout); + PCHARSV(clr_api_error, errorbuffer, 0, len, FALSE, stdout); } printf("\n"); if (*endptr == 0) goto EXIT; @@ -10774,7 +11095,7 @@ if (argc > 1 && strcmp(argv[op], "-") != 0) infile = fopen(argv[op], INPUT_MODE); if (infile == NULL) { - printf("** Failed to open \"%s\": %s\n", argv[op], strerror(errno)); + fatal_printf("** Failed to open \"%s\": %s\n", argv[op], strerror(errno)); yield = 1; goto EXIT; } @@ -10789,7 +11110,7 @@ if (argc > 2) outfile = fopen(argv[op+1], OUTPUT_MODE); if (outfile == NULL) { - printf("** Failed to open \"%s\": %s\n", argv[op+1], strerror(errno)); + fatal_printf("** Failed to open \"%s\": %s\n", argv[op+1], strerror(errno)); yield = 1; goto EXIT; } @@ -10799,6 +11120,10 @@ if (argc > 2) if (!quiet) print_version(outfile, TRUE); +/* Ensure anything printed after this point that +is not given an explicit colour, is printed in clr_unexpected */ +colour_end(outfile); + SET(compiled_code, NULL); #ifdef SUPPORT_PCRE2_8 @@ -10811,11 +11136,12 @@ while (notdone) uint8_t *p; int rc = PR_OK; BOOL expectdata = TEST(compiled_code, !=, NULL); + BOOL is_comment; #ifdef SUPPORT_PCRE2_8 expectdata |= preg.re_pcre2_code != NULL; #endif - if (extend_inputline(infile, buffer, expectdata? "data> " : " re> ") == NULL) + if (extend_inputline(infile, buffer, expectdata? PROMPT("data> ") : PROMPT(" re> ")) == NULL) break; /* Pre-process input lines with #if...#endif. */ @@ -10830,9 +11156,10 @@ while (notdone) /* Begin processing the line. */ - if (!INTERACTIVE(infile)) fprintf(outfile, "%s", (char *)buffer); - fflush(outfile); p = buffer; + is_comment = p[0] == '#' && (isspace(p[1]) || p[1] == '!' || p[1] == 0); + if (!INTERACTIVE(infile)) cprintf(is_comment ? clr_comment : clr_input, "%s", (char *)buffer); + fflush(outfile); if (preprocess_only && *p != '#') continue; @@ -10879,7 +11206,7 @@ while (notdone) else if (*p == '#') { - if (isspace(p[1]) || p[1] == '!' || p[1] == 0) continue; + if (is_comment) continue; rc = process_command(); } @@ -10894,7 +11221,7 @@ while (notdone) while (isspace(*p)) p++; if (*p != 0) { - fprintf(outfile, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer, + cprintf(clr_test_error, "** Invalid pattern delimiter '%c' (x%x).\n", *buffer, *buffer); rc = PR_SKIP; } @@ -10904,7 +11231,7 @@ while (notdone) else if (rc == PR_ENDIF) skipping_endif = TRUE; else if (rc == PR_ABEND) { - fprintf(outfile, "** pcre2test run abandoned\n"); + cprintf(clr_test_error, "** pcre2test run abandoned\n"); yield = 1; goto EXIT; } @@ -10914,7 +11241,7 @@ while (notdone) if (skipping_endif) { - fprintf(outfile, "** Expected #endif\n"); + cprintf(clr_test_error, "** Expected #endif\n"); yield = 1; goto EXIT; } @@ -10924,18 +11251,18 @@ if (INTERACTIVE(infile)) fprintf(outfile, "\n"); if (showtotaltimes) { const char *pad = ""; - fprintf(outfile, "--------------------------------------\n"); + cprintf(clr_profiling, "--------------------------------------\n"); if (timeit > 0) { - fprintf(outfile, "Total compile time %8.2f microseconds\n", + cprintf(clr_profiling, "Total compile time %8.2f microseconds\n", ((1000000 / CLOCKS_PER_SEC) * (double)total_compile_time) / timeit); if (total_jit_compile_time > 0) - fprintf(outfile, "Total JIT compile %8.2f microseconds\n", + cprintf(clr_profiling, "Total JIT compile %8.2f microseconds\n", ((1000000 / CLOCKS_PER_SEC) * (double)total_jit_compile_time) / \ timeit); pad = " "; } - fprintf(outfile, "Total match time %s%8.2f microseconds\n", pad, + cprintf(clr_profiling, "Total match time %s%8.2f microseconds\n", pad, ((1000000 / CLOCKS_PER_SEC) * (double)total_match_time) / timeitm); } diff --git a/testdata/testinput2 b/testdata/testinput2 index 0cac6a7e4..af8a56111 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -8011,4 +8011,18 @@ a)"xI # -------------- +# Test that a memory leak has been fixed +/x/replace=r,substitute_matched + x\=null_subject + +# Test that a couple of double frees have been fixed +/foo/replace=bar,substitute_matched + foo\=copy_matched_subject + foo\=global,copy_matched_subject + +# Tests for reading matches from NULL subjects +// + \=null_subject,copy=0,get=0,getall + \=null_subject,copy=0,get=0,getall,dfa + # End of testinput2 diff --git a/testdata/testinputNEW b/testdata/testinputNEW new file mode 100644 index 000000000..107d535ea --- /dev/null +++ b/testdata/testinputNEW @@ -0,0 +1,98 @@ +# The test regex and subjects we will use for the following +/\w+|^$/replace=<$&>,global,substitute_matched + \=substitute_subject=,substitute_overwrite + \=null_subject + ::: + foo\=zero_terminate + foo|bar\=offset=1,substitute_offset=1 + F|OOBAR\=offset=1 + +# #################### +# ERRORS +# #################### + +# substitute_subject has the same contents, but a different pointer +# Failed: error -71: substitute subject differs from prior pcre2_match call +/\w+|^$/replace=<$&>,global,substitute_matched + \=null_subject,substitute_subject= + \=null_substitute_subject + \=substitute_subject= + foo\=substitute_subject=foo + + +# Failed: error -72: substitute subject length differs from prior pcre2_match call +# Changing the length is always an error +# (using PCRE2_ZERO_TERMINATED but with different length strings also counts) +/\w+|^$/replace=<$&>,global,substitute_matched + \=substitute_subject=x,substitute_overwrite + \=substitute_subject=x,substitute_overwrite,copy_matched_subject,zero_terminate,substitute_zero_terminate + foo\=substitute_subject=foooo,substitute_overwrite,copy_matched_subject,substitute_zero_terminate + :::\=substitute_subject=::,substitute_overwrite,copy_matched_subject,zero_terminate + foo\=substitute_subject=x,substitute_overwrite + +# Changing the start offset is not allowed +# Failed: error -73: substitute start offset differs from prior pcre2_match call +/\w+|^$/replace=<$&>,global,substitute_matched + foo\=substitute_offset=1 + :::\=offset=1,substitute_offset=0 + foo\=offset=1,substitute_offset=2 + :::\=offset=1,substitute_offset=10 + +# From a strict reading of the documentation, +# using a different pointer is NOT allowed with PCRE2_COPY_MATCHED_SUBJECT +# Failed: error -71: substitute subject differs from prior pcre2_match call +/\w+|^$/replace=<$&>,global,substitute_matched + \=substitute_subject=,copy_matched_subject + \=null_substitute_subject,copy_matched_subject + \=null_subject,substitute_subject=,copy_matched_subject + foo\=substitute_subject=foo,copy_matched_subject + :::\=substitute_subject=:::,copy_matched_subject + +# Tests demonstrating the precedence of errors is: prior_match error, pointer, length, then start offset +/\w+|^$/replace=<$&>,global,substitute_matched + x\=null_subject,substitute_subject=x + foo\=substitute_subject=x + :::\=substitute_subject=:::,offset=1,substitute_offset=0 + foo\=substitute_subject=x,offset=1,substitute_offset=0 + :::\=substitute_subject=x,substitute_overwrite,offset=1,substitute_offset=0 + +# #################### +# ALLOWED AND GOOD +# #################### + +# For simplicity, PCRE2_ZERO_TERMINATED vs a concrete length doesn't count as changing the length +/\w+|^$/replace=<$&>,global,substitute_matched + foo\=substitute_subject=foo,substitute_overwrite,substitute_zero_terminate + :::\=substitute_subject=:::,substitute_overwrite,zero_terminate + foo\=zero_terminate,substitute_subject=foo,substitute_overwrite + +# #################### +# ALLOWED AND BAD +# #################### + +# Keeping the length the same, the offset the same, and the pointer the same +# Is ok, even if the contents DIFFER +# (also has some fun with copy_matched_subject) +/\w+|^$/replace=<$&>,global,substitute_matched + foo|bar\=offset=1,substitute_subject=F|OOBAR,substitute_overwrite,zero_terminate + +# Using null tells substitute to use the copied match_subject +/\w+|^$/replace=<$&>,global,substitute_matched + foo|bar\=offset=1,null_substitute_subject,copy_matched_subject + :::\=offset=1,null_substitute_subject,copy_matched_subject + +# Still need the length to be correct when using null +# Failed: error -72: substitute subject length differs from prior pcre2_match call +/\w+|^$/replace=<$&>,global,substitute_matched + foo\=substitute_subject=x,null_substitute_subject,copy_matched_subject + :::\=substitute_subject=x,null_substitute_subject,copy_matched_subject + +# Can use substitute_zero_terminate with copy_matched_subject +/\w+|^$/replace=<$&>,global,substitute_matched + foo|bar\=offset=1,substitute_subject=x,substitute_overwrite,copy_matched_subject,substitute_zero_terminate + :::\=offset=1,substitute_subject=x,substitute_overwrite,copy_matched_subject,substitute_zero_terminate + +# Can modify text with copy_matched_subject, original text is used +/\w+|^$/replace=<$&>,global,substitute_matched + foo|bar\=offset=1,substitute_subject=F|OOBAR,substitute_overwrite,copy_matched_subject + :::\=offset=1,substitute_subject=foo,substitute_overwrite,copy_matched_subject diff --git a/testdata/testinputNEW8 b/testdata/testinputNEW8 new file mode 100644 index 000000000..ad5a7b9f6 --- /dev/null +++ b/testdata/testinputNEW8 @@ -0,0 +1,276 @@ +# UTF-8 Specific tests +# (substitute_subject does not support escape sequences, so +# invalid UTF-8 bytes are given literally, +# The normal subject however requires you use \x escapes) + +# ############ +# Non-UTF mode +# ############ + +# (the first subject is a valid UTF string, the second is a truncatated UTF string, and the third has an invalid UTF byte) +/(\d+)|(?=<)|(?<=>)/replace=<$&:$1>,substitute_unset_empty,substitute_matched + 1234\=substitute_overwrite,substitute_subject=😀 + <---\=substitute_overwrite,substitute_subject=😀 + --->\=substitute_overwrite,substitute_subject=😀 + --34\=substitute_overwrite,substitute_subject=😀 + ->--\=substitute_overwrite,substitute_subject=😀 + 123---\=substitute_overwrite,substitute_subject=∞😠+ 12----\=substitute_overwrite,substitute_subject=∞😠+ ----->\=substitute_overwrite,substitute_subject=∞😠+ <-----\=substitute_overwrite,substitute_subject=∞😠+ 12345\=substitute_overwrite,substitute_subject=À😀 + -1234\=substitute_overwrite,substitute_subject=À😀 + ---->\=substitute_overwrite,substitute_subject=À😀 + <----\=substitute_overwrite,substitute_subject=À😀 + +# Proof that each of the above results can be obtained without using substitute_overwrite/substitute_subject +# (albeit with different regexes) +/^(....)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched + 😀 + +/^()/replace=<$&:$1>,substitute_unset_empty,substitute_matched + 😀 + +/()$/replace=<$&:$1>,substitute_unset_empty,substitute_matched + 😀 + +/(..)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched + 😀 + +/(?<=^..)()/replace=<$&:$1>,substitute_unset_empty,substitute_matched + 😀 + +/^(...)/replace=<$&:$1>,substitute_unset_empty,substitute_matched + ∞😠+ +/^(..)/replace=<$&:$1>,substitute_unset_empty,substitute_matched + ∞😠+ +/()$/replace=<$&:$1>,substitute_unset_empty,substitute_matched + ∞😠+ +/^()/replace=<$&:$1>,substitute_unset_empty,substitute_matched + ∞😠+ +/(.*)/replace=<$&:$1>,substitute_unset_empty,substitute_matched + À😀 + +/(.{4})$/replace=<$&:$1>,substitute_unset_empty,substitute_matched + À😀 + +/()$/replace=<$&:$1>,substitute_unset_empty,substitute_matched + À😀 + +/^()/replace=<$&:$1>,substitute_unset_empty,substitute_matched + À😀 + +# ############################## +# The above tests with PCRE2_UTF +# ############################## + +/(\d+)|(?=<)|(?<=>)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 1234\=substitute_overwrite,substitute_subject=😀 + <---\=substitute_overwrite,substitute_subject=😀 + --->\=substitute_overwrite,substitute_subject=😀 + --34\=substitute_overwrite,substitute_subject=😀 + ->--\=substitute_overwrite,substitute_subject=😀 + 123---\=substitute_overwrite,substitute_subject=∞😠+ 12----\=substitute_overwrite,substitute_subject=∞😠+ ----->\=substitute_overwrite,substitute_subject=∞😠+ <-----\=substitute_overwrite,substitute_subject=∞😠+ 12345\=substitute_overwrite,substitute_subject=À😀 + -1234\=substitute_overwrite,substitute_subject=À😀 + ---->\=substitute_overwrite,substitute_subject=À😀 + <----\=substitute_overwrite,substitute_subject=À😀 + +# Proof that each of the above non-error results can be obtained without using substitute_overwrite/substitute_subject +# (albeit with different regexes) + +/(.)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + +/^()/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + +/()$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + +# ##################################### +# The above tests with PCRE2_UTF and \C +# ##################################### + +/(\d+)|(?=<)|(?<=>)|($\C^)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 1234\=substitute_overwrite,substitute_subject=😀 + <---\=substitute_overwrite,substitute_subject=😀 + --->\=substitute_overwrite,substitute_subject=😀 + --34\=substitute_overwrite,substitute_subject=😀 + ->--\=substitute_overwrite,substitute_subject=😀 + 123---\=substitute_overwrite,substitute_subject=∞😠+ 12----\=substitute_overwrite,substitute_subject=∞😠+ ----->\=substitute_overwrite,substitute_subject=∞😠+ <-----\=substitute_overwrite,substitute_subject=∞😠+ 12345\=substitute_overwrite,substitute_subject=À😀 + -1234\=substitute_overwrite,substitute_subject=À😀 + ---->\=substitute_overwrite,substitute_subject=À😀 + <----\=substitute_overwrite,substitute_subject=À😀 + +# Proof that each of the above non-error results can be obtained without using substitute_overwrite/substitute_subject +# (albeit with different regexes) + +/^(\C\C\C\C)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + +/^(\C??)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + +/(C??)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + +/\C\C\K(\C\C)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + +/\C\C\K()/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + + +# ############################################ +# The above tests with PCRE2_MATCH_INVALID_UTF +# ############################################ + +/(\d+)|(?=<)|(?<=>)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + 1234\=substitute_overwrite,substitute_subject=😀 + <---\=substitute_overwrite,substitute_subject=😀 + --->\=substitute_overwrite,substitute_subject=😀 + --34\=substitute_overwrite,substitute_subject=😀 + ->--\=substitute_overwrite,substitute_subject=😀 + 123---\=substitute_overwrite,substitute_subject=∞😠+ 12----\=substitute_overwrite,substitute_subject=∞😠+ ----->\=substitute_overwrite,substitute_subject=∞😠+ <-----\=substitute_overwrite,substitute_subject=∞😠+ 12345\=substitute_overwrite,substitute_subject=À😀 + -1234\=substitute_overwrite,substitute_subject=À😀 + ---->\=substitute_overwrite,substitute_subject=À😀 + <----\=substitute_overwrite,substitute_subject=À😀 + +# Proof that most of the above non-error results can be obtained without using substitute_overwrite/substitute_subject +# I believe the ->--\=substitute_overwrite,substitute_subject=😀 case is impossible though + +/(.)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + 😀 + +/^()/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + 😀 + +/()$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + 😀 + +/^(.)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + ∞\xF0\x9F\x98 + +# This behaves differently with JIT vs. without JIT, so it is commented out +# /()$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf +# ∞\xF0\x9F\x98 + +/^()/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + ∞\xF0\x9F\x98 + +/(.)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + \xC0😀 + +/()$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + \xC0😀 + +/^()/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + \xC0😀 + +# ################################################### +# The above tests with PCRE2_MATCH_INVALID_UTF and \C +# ################################################### + +/(\d+)|(?=<)|(?<=>)|(^\C$)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + 1234\=substitute_overwrite,substitute_subject=😀 + <---\=substitute_overwrite,substitute_subject=😀 + --->\=substitute_overwrite,substitute_subject=😀 + --34\=substitute_overwrite,substitute_subject=😀 + ->--\=substitute_overwrite,substitute_subject=😀 + 123---\=substitute_overwrite,substitute_subject=∞😠+ 12----\=substitute_overwrite,substitute_subject=∞😠+ ----->\=substitute_overwrite,substitute_subject=∞😠+ <-----\=substitute_overwrite,substitute_subject=∞😠+ 12345\=substitute_overwrite,substitute_subject=À😀 + -1234\=substitute_overwrite,substitute_subject=À😀 + ---->\=substitute_overwrite,substitute_subject=À😀 + <----\=substitute_overwrite,substitute_subject=À😀 + +# Proof that each of the above non-error results can be obtained without using substitute_overwrite/substitute_subject +# (albeit with different regexes) + +/^(\C\C\C\C)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + +/^(\C??)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + +/(C??)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + +/\C\C\K(\C\C)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + +/\C\C\K()/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + +/^(\C\C\C)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + ∞\xF0\x9F\x98 + +/^(\C\C)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + ∞\xF0\x9F\x98 + +/(\C??)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + ∞\xF0\x9F\x98 + +/^(\C??)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + ∞\xF0\x9F\x98 + +/(\C\C\C\C)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + \xC0😀 + +/(\C??)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + \xC0😀 + +/^(\C??)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + \xC0😀 + +# ############################################# +# The above tests with PCRE2_NO_UTF_CHECK +# (Excluding cases where the entire subject string isn't valid UTF) +# ############################################# + +/(\d+)|(?=<)|(?<=>)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf,no_utf_check + 1234\=substitute_overwrite,substitute_subject=😀,no_utf_check + <---\=substitute_overwrite,substitute_subject=😀,no_utf_check + --->\=substitute_overwrite,substitute_subject=😀,no_utf_check + --34\=substitute_overwrite,substitute_subject=😀,no_utf_check + ->--\=substitute_overwrite,substitute_subject=😀,no_utf_check + +/(\d+)|(?=<)|(?<=>)|($\C^)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf,no_utf_check + 1234\=substitute_overwrite,substitute_subject=😀,no_utf_check + <---\=substitute_overwrite,substitute_subject=😀,no_utf_check + --->\=substitute_overwrite,substitute_subject=😀,no_utf_check + --34\=substitute_overwrite,substitute_subject=😀,no_utf_check + ->--\=substitute_overwrite,substitute_subject=😀,no_utf_check + +/(\d+)|(?=<)|(?<=>)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + 1234\=substitute_overwrite,substitute_subject=😀,no_utf_check + <---\=substitute_overwrite,substitute_subject=😀,no_utf_check + --->\=substitute_overwrite,substitute_subject=😀,no_utf_check + --34\=substitute_overwrite,substitute_subject=😀,no_utf_check + ->--\=substitute_overwrite,substitute_subject=😀,no_utf_check + + +/(\d+)|(?=<)|(?<=>)|(^\C$)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + 1234\=substitute_overwrite,substitute_subject=😀,no_utf_check + <---\=substitute_overwrite,substitute_subject=😀,no_utf_check + --->\=substitute_overwrite,substitute_subject=😀,no_utf_check + --34\=substitute_overwrite,substitute_subject=😀,no_utf_check + ->--\=substitute_overwrite,substitute_subject=😀,no_utf_check diff --git a/testdata/testoutput2 b/testdata/testoutput2 index dd19a8808..95df982fd 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -23059,6 +23059,31 @@ Failed: error 217 at offset 27: expected capture group number or name # -------------- +# Test that a memory leak has been fixed +/x/replace=r,substitute_matched + x\=null_subject +Failed: error -51: NULL argument passed with non-zero length + +# Test that a couple of double frees have been fixed +/foo/replace=bar,substitute_matched + foo\=copy_matched_subject + 1: bar + foo\=global,copy_matched_subject + 1: bar + +# Tests for reading matches from NULL subjects +// + \=null_subject,copy=0,get=0,getall + 0: + 0C (0) + 0G (0) + 0L + \=null_subject,copy=0,get=0,getall,dfa + 0: + 0C (0) + 0G (0) + 0L + # End of testinput2 Error -80: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data diff --git a/testdata/testoutputNEW b/testdata/testoutputNEW new file mode 100644 index 000000000..1c449496b --- /dev/null +++ b/testdata/testoutputNEW @@ -0,0 +1,139 @@ +# The test regex and subjects we will use for the following +/\w+|^$/replace=<$&>,global,substitute_matched + \=substitute_subject=,substitute_overwrite + 1: <> + \=null_subject + 1: <> + ::: + 0: ::: + foo\=zero_terminate + 1: + foo|bar\=offset=1,substitute_offset=1 + 2: f| + F|OOBAR\=offset=1 + 1: F| + +# #################### +# ERRORS +# #################### + +# substitute_subject has the same contents, but a different pointer +# Failed: error -71: substitute subject differs from prior pcre2_match call +/\w+|^$/replace=<$&>,global,substitute_matched + \=null_subject,substitute_subject= +Failed: error -71: substitute subject differs from prior pcre2_match call + \=null_substitute_subject +Failed: error -71: substitute subject differs from prior pcre2_match call + \=substitute_subject= +Failed: error -71: substitute subject differs from prior pcre2_match call + foo\=substitute_subject=foo +Failed: error -71: substitute subject differs from prior pcre2_match call + + +# Failed: error -72: substitute subject length differs from prior pcre2_match call +# Changing the length is always an error +# (using PCRE2_ZERO_TERMINATED but with different length strings also counts) +/\w+|^$/replace=<$&>,global,substitute_matched + \=substitute_subject=x,substitute_overwrite +Failed: error -72: substitute subject length differs from prior pcre2_match call + \=substitute_subject=x,substitute_overwrite,copy_matched_subject,zero_terminate,substitute_zero_terminate +Failed: error -72: substitute subject length differs from prior pcre2_match call + foo\=substitute_subject=foooo,substitute_overwrite,copy_matched_subject,substitute_zero_terminate +Failed: error -72: substitute subject length differs from prior pcre2_match call + :::\=substitute_subject=::,substitute_overwrite,copy_matched_subject,zero_terminate +Failed: error -72: substitute subject length differs from prior pcre2_match call + foo\=substitute_subject=x,substitute_overwrite +Failed: error -72: substitute subject length differs from prior pcre2_match call + +# Changing the start offset is not allowed +# Failed: error -73: substitute start offset differs from prior pcre2_match call +/\w+|^$/replace=<$&>,global,substitute_matched + foo\=substitute_offset=1 +Failed: error -73: substitute start offset differs from prior pcre2_match call + :::\=offset=1,substitute_offset=0 +Failed: error -73: substitute start offset differs from prior pcre2_match call + foo\=offset=1,substitute_offset=2 +Failed: error -73: substitute start offset differs from prior pcre2_match call + :::\=offset=1,substitute_offset=10 +Failed: error -73: substitute start offset differs from prior pcre2_match call + +# From a strict reading of the documentation, +# using a different pointer is NOT allowed with PCRE2_COPY_MATCHED_SUBJECT +# Failed: error -71: substitute subject differs from prior pcre2_match call +/\w+|^$/replace=<$&>,global,substitute_matched + \=substitute_subject=,copy_matched_subject +Failed: error -71: substitute subject differs from prior pcre2_match call + \=null_substitute_subject,copy_matched_subject + 1: <> + \=null_subject,substitute_subject=,copy_matched_subject +Failed: error -71: substitute subject differs from prior pcre2_match call + foo\=substitute_subject=foo,copy_matched_subject +Failed: error -71: substitute subject differs from prior pcre2_match call + :::\=substitute_subject=:::,copy_matched_subject +Failed: error -71: substitute subject differs from prior pcre2_match call + +# Tests demonstrating the precedence of errors is: prior_match error, pointer, length, then start offset +/\w+|^$/replace=<$&>,global,substitute_matched + x\=null_subject,substitute_subject=x +Failed: error -51: NULL argument passed with non-zero length + foo\=substitute_subject=x +Failed: error -71: substitute subject differs from prior pcre2_match call + :::\=substitute_subject=:::,offset=1,substitute_offset=0 +Failed: error -71: substitute subject differs from prior pcre2_match call + foo\=substitute_subject=x,offset=1,substitute_offset=0 +Failed: error -71: substitute subject differs from prior pcre2_match call + :::\=substitute_subject=x,substitute_overwrite,offset=1,substitute_offset=0 +Failed: error -72: substitute subject length differs from prior pcre2_match call + +# #################### +# ALLOWED AND GOOD +# #################### + +# For simplicity, PCRE2_ZERO_TERMINATED vs a concrete length doesn't count as changing the length +/\w+|^$/replace=<$&>,global,substitute_matched + foo\=substitute_subject=foo,substitute_overwrite,substitute_zero_terminate + 1: + :::\=substitute_subject=:::,substitute_overwrite,zero_terminate + 0: ::: + foo\=zero_terminate,substitute_subject=foo,substitute_overwrite + 1: + +# #################### +# ALLOWED AND BAD +# #################### + +# Keeping the length the same, the offset the same, and the pointer the same +# Is ok, even if the contents DIFFER +# (also has some fun with copy_matched_subject) +/\w+|^$/replace=<$&>,global,substitute_matched + foo|bar\=offset=1,substitute_subject=F|OOBAR,substitute_overwrite,zero_terminate + 2: F<|O> + +# Using null tells substitute to use the copied match_subject +/\w+|^$/replace=<$&>,global,substitute_matched + foo|bar\=offset=1,null_substitute_subject,copy_matched_subject + 2: f| + :::\=offset=1,null_substitute_subject,copy_matched_subject + 0: ::: + +# Still need the length to be correct when using null +# Failed: error -72: substitute subject length differs from prior pcre2_match call +/\w+|^$/replace=<$&>,global,substitute_matched + foo\=substitute_subject=x,null_substitute_subject,copy_matched_subject +Failed: error -72: substitute subject length differs from prior pcre2_match call + :::\=substitute_subject=x,null_substitute_subject,copy_matched_subject +Failed: error -72: substitute subject length differs from prior pcre2_match call + +# Can use substitute_zero_terminate with copy_matched_subject +/\w+|^$/replace=<$&>,global,substitute_matched + foo|bar\=offset=1,substitute_subject=x,substitute_overwrite,copy_matched_subject,substitute_zero_terminate +Failed: error -72: substitute subject length differs from prior pcre2_match call + :::\=offset=1,substitute_subject=x,substitute_overwrite,copy_matched_subject,substitute_zero_terminate +Failed: error -72: substitute subject length differs from prior pcre2_match call + +# Can modify text with copy_matched_subject, original text is used +/\w+|^$/replace=<$&>,global,substitute_matched + foo|bar\=offset=1,substitute_subject=F|OOBAR,substitute_overwrite,copy_matched_subject + 2: f| + :::\=offset=1,substitute_subject=foo,substitute_overwrite,copy_matched_subject + 0: ::: diff --git a/testdata/testoutputNEW8 b/testdata/testoutputNEW8 new file mode 100644 index 000000000..48b9b965b --- /dev/null +++ b/testdata/testoutputNEW8 @@ -0,0 +1,444 @@ +# UTF-8 Specific tests +# (substitute_subject does not support escape sequences, so +# invalid UTF-8 bytes are given literally, +# The normal subject however requires you use \x escapes) + +# ############ +# Non-UTF mode +# ############ + +# (the first subject is a valid UTF string, the second is a truncatated UTF string, and the third has an invalid UTF byte) +/(\d+)|(?=<)|(?<=>)/replace=<$&:$1>,substitute_unset_empty,substitute_matched + 1234\=substitute_overwrite,substitute_subject=😀 + 1: <\xf0\x9f\x98\x80:\xf0\x9f\x98\x80> + <---\=substitute_overwrite,substitute_subject=😀 + 1: <:>\xf0\x9f\x98\x80 + --->\=substitute_overwrite,substitute_subject=😀 + 1: \xf0\x9f\x98\x80<:> + --34\=substitute_overwrite,substitute_subject=😀 + 1: \xf0\x9f<\x98\x80:\x98\x80> + ->--\=substitute_overwrite,substitute_subject=😀 + 1: \xf0\x9f<:>\x98\x80 + 123---\=substitute_overwrite,substitute_subject=∞😠+ 1: <\xe2\x88\x9e:\xe2\x88\x9e>\xf0\x9f\x98 + 12----\=substitute_overwrite,substitute_subject=∞😠+ 1: <\xe2\x88:\xe2\x88>\x9e\xf0\x9f\x98 + ----->\=substitute_overwrite,substitute_subject=∞😠+ 1: \xe2\x88\x9e\xf0\x9f\x98<:> + <-----\=substitute_overwrite,substitute_subject=∞😠+ 1: <:>\xe2\x88\x9e\xf0\x9f\x98 + 12345\=substitute_overwrite,substitute_subject=À😀 + 1: <\xc0\xf0\x9f\x98\x80:\xc0\xf0\x9f\x98\x80> + -1234\=substitute_overwrite,substitute_subject=À😀 + 1: \xc0<\xf0\x9f\x98\x80:\xf0\x9f\x98\x80> + ---->\=substitute_overwrite,substitute_subject=À😀 + 1: \xc0\xf0\x9f\x98\x80<:> + <----\=substitute_overwrite,substitute_subject=À😀 + 1: <:>\xc0\xf0\x9f\x98\x80 + +# Proof that each of the above results can be obtained without using substitute_overwrite/substitute_subject +# (albeit with different regexes) +/^(....)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched + 😀 + 1: <\xf0\x9f\x98\x80:\xf0\x9f\x98\x80> + +/^()/replace=<$&:$1>,substitute_unset_empty,substitute_matched + 😀 + 1: <:>\xf0\x9f\x98\x80 + +/()$/replace=<$&:$1>,substitute_unset_empty,substitute_matched + 😀 + 1: \xf0\x9f\x98\x80<:> + +/(..)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched + 😀 + 1: \xf0\x9f<\x98\x80:\x98\x80> + +/(?<=^..)()/replace=<$&:$1>,substitute_unset_empty,substitute_matched + 😀 + 1: \xf0\x9f<:>\x98\x80 + +/^(...)/replace=<$&:$1>,substitute_unset_empty,substitute_matched + ∞😠+ 1: <\xe2\x88\x9e:\xe2\x88\x9e>\xf0\x9f\x98 + +/^(..)/replace=<$&:$1>,substitute_unset_empty,substitute_matched + ∞😠+ 1: <\xe2\x88:\xe2\x88>\x9e\xf0\x9f\x98 + +/()$/replace=<$&:$1>,substitute_unset_empty,substitute_matched + ∞😠+ 1: \xe2\x88\x9e\xf0\x9f\x98<:> + +/^()/replace=<$&:$1>,substitute_unset_empty,substitute_matched + ∞😠+ 1: <:>\xe2\x88\x9e\xf0\x9f\x98 + +/(.*)/replace=<$&:$1>,substitute_unset_empty,substitute_matched + À😀 + 1: <\xc0\xf0\x9f\x98\x80:\xc0\xf0\x9f\x98\x80> + +/(.{4})$/replace=<$&:$1>,substitute_unset_empty,substitute_matched + À😀 + 1: \xc0<\xf0\x9f\x98\x80:\xf0\x9f\x98\x80> + +/()$/replace=<$&:$1>,substitute_unset_empty,substitute_matched + À😀 + 1: \xc0\xf0\x9f\x98\x80<:> + +/^()/replace=<$&:$1>,substitute_unset_empty,substitute_matched + À😀 + 1: <:>\xc0\xf0\x9f\x98\x80 + +# ############################## +# The above tests with PCRE2_UTF +# ############################## + +/(\d+)|(?=<)|(?<=>)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 1234\=substitute_overwrite,substitute_subject=😀 + 1: <\x{1f600}:\x{1f600}> + <---\=substitute_overwrite,substitute_subject=😀 + 1: <:>\x{1f600} + --->\=substitute_overwrite,substitute_subject=😀 + 1: \x{1f600}<:> + --34\=substitute_overwrite,substitute_subject=😀 +Failed: error -74 at offset 2 in subject: capture group is not a valid UTF string + here: ðŸ |<--| ˜€ + ->--\=substitute_overwrite,substitute_subject=😀 +Failed: error -74 at offset 2 in subject: capture group is not a valid UTF string + here: ðŸ |<--| ˜€ + 123---\=substitute_overwrite,substitute_subject=∞😠+Failed: error -3 at offset 3 in subject or replacement: UTF-8 error: 1 byte missing at end + here: ∞ |<--| 😠+ or here: <$& |<--| :$1> + 12----\=substitute_overwrite,substitute_subject=∞😠+Failed: error -3 at offset 3 in subject or replacement: UTF-8 error: 1 byte missing at end + here: ∞ |<--| 😠+ or here: <$& |<--| :$1> + ----->\=substitute_overwrite,substitute_subject=∞😠+Failed: error -3 at offset 3 in subject or replacement: UTF-8 error: 1 byte missing at end + here: ∞ |<--| 😠+ or here: <$& |<--| :$1> + <-----\=substitute_overwrite,substitute_subject=∞😠+Failed: error -3 at offset 3 in subject or replacement: UTF-8 error: 1 byte missing at end + here: ∞ |<--| 😠+ or here: <$& |<--| :$1> + 12345\=substitute_overwrite,substitute_subject=À😀 +Failed: error -8 at offset 0 in subject or replacement: UTF-8 error: byte 2 top bits not 0x80 + here: |<--| À😀 + or here: |<--| <$&:$1> + -1234\=substitute_overwrite,substitute_subject=À😀 +Failed: error -8 at offset 0 in subject or replacement: UTF-8 error: byte 2 top bits not 0x80 + here: |<--| À😀 + or here: |<--| <$&:$1> + ---->\=substitute_overwrite,substitute_subject=À😀 +Failed: error -8 at offset 0 in subject or replacement: UTF-8 error: byte 2 top bits not 0x80 + here: |<--| À😀 + or here: |<--| <$&:$1> + <----\=substitute_overwrite,substitute_subject=À😀 +Failed: error -8 at offset 0 in subject or replacement: UTF-8 error: byte 2 top bits not 0x80 + here: |<--| À😀 + or here: |<--| <$&:$1> + +# Proof that each of the above non-error results can be obtained without using substitute_overwrite/substitute_subject +# (albeit with different regexes) + +/(.)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + 1: <\x{1f600}:\x{1f600}> + +/^()/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + 1: <:>\x{1f600} + +/()$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + 1: \x{1f600}<:> + +# ##################################### +# The above tests with PCRE2_UTF and \C +# ##################################### + +/(\d+)|(?=<)|(?<=>)|($\C^)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 1234\=substitute_overwrite,substitute_subject=😀 + 1: <\x{1f600}:\x{1f600}> + <---\=substitute_overwrite,substitute_subject=😀 + 1: <:>\x{1f600} + --->\=substitute_overwrite,substitute_subject=😀 + 1: \x{1f600}<:> + --34\=substitute_overwrite,substitute_subject=😀 + 1: \x{f0}\x{9f}<\x{98}\x{80}:\x{98}\x{80}> + ->--\=substitute_overwrite,substitute_subject=😀 + 1: \x{f0}\x{9f}<:>\x{98}\x{80} + 123---\=substitute_overwrite,substitute_subject=∞😠+Failed: error -3 at offset 3 in subject or replacement: UTF-8 error: 1 byte missing at end + here: ∞ |<--| 😠+ or here: <$& |<--| :$1> + 12----\=substitute_overwrite,substitute_subject=∞😠+Failed: error -3 at offset 3 in subject or replacement: UTF-8 error: 1 byte missing at end + here: ∞ |<--| 😠+ or here: <$& |<--| :$1> + ----->\=substitute_overwrite,substitute_subject=∞😠+Failed: error -3 at offset 3 in subject or replacement: UTF-8 error: 1 byte missing at end + here: ∞ |<--| 😠+ or here: <$& |<--| :$1> + <-----\=substitute_overwrite,substitute_subject=∞😠+Failed: error -3 at offset 3 in subject or replacement: UTF-8 error: 1 byte missing at end + here: ∞ |<--| 😠+ or here: <$& |<--| :$1> + 12345\=substitute_overwrite,substitute_subject=À😀 +Failed: error -8 at offset 0 in subject or replacement: UTF-8 error: byte 2 top bits not 0x80 + here: |<--| À😀 + or here: |<--| <$&:$1> + -1234\=substitute_overwrite,substitute_subject=À😀 +Failed: error -8 at offset 0 in subject or replacement: UTF-8 error: byte 2 top bits not 0x80 + here: |<--| À😀 + or here: |<--| <$&:$1> + ---->\=substitute_overwrite,substitute_subject=À😀 +Failed: error -8 at offset 0 in subject or replacement: UTF-8 error: byte 2 top bits not 0x80 + here: |<--| À😀 + or here: |<--| <$&:$1> + <----\=substitute_overwrite,substitute_subject=À😀 +Failed: error -8 at offset 0 in subject or replacement: UTF-8 error: byte 2 top bits not 0x80 + here: |<--| À😀 + or here: |<--| <$&:$1> + +# Proof that each of the above non-error results can be obtained without using substitute_overwrite/substitute_subject +# (albeit with different regexes) + +/^(\C\C\C\C)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + 1: <\x{1f600}:\x{1f600}> + +/^(\C??)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + 1: <:>\x{1f600} + +/(C??)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + 1: \x{1f600}<:> + +/\C\C\K(\C\C)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + 1: \x{f0}\x{9f}<\x{98}\x{80}:\x{98}\x{80}> + +/\C\C\K()/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + 1: \x{f0}\x{9f}<:>\x{98}\x{80} + + +# ############################################ +# The above tests with PCRE2_MATCH_INVALID_UTF +# ############################################ + +/(\d+)|(?=<)|(?<=>)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + 1234\=substitute_overwrite,substitute_subject=😀 + 1: <\x{1f600}:\x{1f600}> + <---\=substitute_overwrite,substitute_subject=😀 + 1: <:>\x{1f600} + --->\=substitute_overwrite,substitute_subject=😀 + 1: \x{1f600}<:> + --34\=substitute_overwrite,substitute_subject=😀 +Failed: error -74 at offset 0 in subject: capture group is not a valid UTF string + here: |<--| 😀 + ->--\=substitute_overwrite,substitute_subject=😀 + 1: \x{f0}\x{9f}<:>\x{98}\x{80} + 123---\=substitute_overwrite,substitute_subject=∞😠+ 1: <\x{221e}:\x{221e}>\x{f0}\x{9f}\x{98} + 12----\=substitute_overwrite,substitute_subject=∞😠+Failed: error -74 at offset 0 in subject: capture group is not a valid UTF string + here: |<--| ∞😠+ ----->\=substitute_overwrite,substitute_subject=∞😠+ 1: \x{221e}\x{f0}\x{9f}\x{98}<:> + <-----\=substitute_overwrite,substitute_subject=∞😠+ 1: <:>\x{221e}\x{f0}\x{9f}\x{98} + 12345\=substitute_overwrite,substitute_subject=À😀 +Failed: error -74 at offset 0 in subject: capture group is not a valid UTF string + here: |<--| À😀 + -1234\=substitute_overwrite,substitute_subject=À😀 + 1: \x{c0}<\x{1f600}:\x{1f600}> + ---->\=substitute_overwrite,substitute_subject=À😀 + 1: \x{c0}\x{1f600}<:> + <----\=substitute_overwrite,substitute_subject=À😀 + 1: <:>\x{c0}\x{1f600} + +# Proof that most of the above non-error results can be obtained without using substitute_overwrite/substitute_subject +# I believe the ->--\=substitute_overwrite,substitute_subject=😀 case is impossible though + +/(.)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + 😀 + 1: <\x{1f600}:\x{1f600}> + +/^()/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + 😀 + 1: <:>\x{1f600} + +/()$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + 😀 + 1: \x{1f600}<:> + +/^(.)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + ∞\xF0\x9F\x98 + 1: <\x{221e}:\x{221e}>\x{f0}\x{9f}\x{98} + +# This behaves differently with JIT vs. without JIT, so it is commented out +# /()$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf +# ∞\xF0\x9F\x98 + +/^()/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + ∞\xF0\x9F\x98 + 1: <:>\x{221e}\x{f0}\x{9f}\x{98} + +/(.)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + \xC0😀 + 1: \x{c0}<\x{1f600}:\x{1f600}> + +/()$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + \xC0😀 + 1: \x{c0}\x{1f600}<:> + +/^()/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + \xC0😀 + 1: <:>\x{c0}\x{1f600} + +# ################################################### +# The above tests with PCRE2_MATCH_INVALID_UTF and \C +# ################################################### + +/(\d+)|(?=<)|(?<=>)|(^\C$)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + 1234\=substitute_overwrite,substitute_subject=😀 + 1: <\x{1f600}:\x{1f600}> + <---\=substitute_overwrite,substitute_subject=😀 + 1: <:>\x{1f600} + --->\=substitute_overwrite,substitute_subject=😀 + 1: \x{1f600}<:> + --34\=substitute_overwrite,substitute_subject=😀 + 1: \x{f0}\x{9f}<\x{98}\x{80}:\x{98}\x{80}> + ->--\=substitute_overwrite,substitute_subject=😀 + 1: \x{f0}\x{9f}<:>\x{98}\x{80} + 123---\=substitute_overwrite,substitute_subject=∞😠+ 1: <\x{221e}:\x{221e}>\x{f0}\x{9f}\x{98} + 12----\=substitute_overwrite,substitute_subject=∞😠+ 1: <\x{e2}\x{88}:\x{e2}\x{88}>\x{9e}\x{f0}\x{9f}\x{98} + ----->\=substitute_overwrite,substitute_subject=∞😠+ 1: \x{221e}\x{f0}\x{9f}\x{98}<:> + <-----\=substitute_overwrite,substitute_subject=∞😠+ 1: <:>\x{221e}\x{f0}\x{9f}\x{98} + 12345\=substitute_overwrite,substitute_subject=À😀 +Failed: error -74 at offset 0 in subject: capture group is not a valid UTF string + here: |<--| À😀 + -1234\=substitute_overwrite,substitute_subject=À😀 + 1: \x{c0}<\x{1f600}:\x{1f600}> + ---->\=substitute_overwrite,substitute_subject=À😀 + 1: \x{c0}\x{1f600}<:> + <----\=substitute_overwrite,substitute_subject=À😀 + 1: <:>\x{c0}\x{1f600} + +# Proof that each of the above non-error results can be obtained without using substitute_overwrite/substitute_subject +# (albeit with different regexes) + +/^(\C\C\C\C)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + 1: <\x{1f600}:\x{1f600}> + +/^(\C??)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + 1: <:>\x{1f600} + +/(C??)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + 1: \x{1f600}<:> + +/\C\C\K(\C\C)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + 1: \x{f0}\x{9f}<\x{98}\x{80}:\x{98}\x{80}> + +/\C\C\K()/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf + 😀 + 1: \x{f0}\x{9f}<:>\x{98}\x{80} + +/^(\C\C\C)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + ∞\xF0\x9F\x98 + 1: <\x{221e}:\x{221e}>\x{f0}\x{9f}\x{98} + +/^(\C\C)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + ∞\xF0\x9F\x98 + 1: <\x{e2}\x{88}:\x{e2}\x{88}>\x{9e}\x{f0}\x{9f}\x{98} + +/(\C??)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + ∞\xF0\x9F\x98 + 0: \x{221e}\x{f0}\x{9f}\x{98} + +/^(\C??)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + ∞\xF0\x9F\x98 + 1: <:>\x{221e}\x{f0}\x{9f}\x{98} + +/(\C\C\C\C)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + \xC0😀 + 1: \x{c0}<\x{1f600}:\x{1f600}> + +/(\C??)$/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + \xC0😀 + 1: \x{c0}<\x{1f600}:\x{1f600}> + +/^(\C??)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + \xC0😀 + 1: <:>\x{c0}\x{1f600} + +# ############################################# +# The above tests with PCRE2_NO_UTF_CHECK +# (Excluding cases where the entire subject string isn't valid UTF) +# ############################################# + +/(\d+)|(?=<)|(?<=>)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf,no_utf_check + 1234\=substitute_overwrite,substitute_subject=😀,no_utf_check + 1: <\x{1f600}:\x{1f600}> + <---\=substitute_overwrite,substitute_subject=😀,no_utf_check + 1: <:>\x{1f600} + --->\=substitute_overwrite,substitute_subject=😀,no_utf_check + 1: \x{1f600}<:> + --34\=substitute_overwrite,substitute_subject=😀,no_utf_check +Failed: error -74 at offset 2 in subject: capture group is not a valid UTF string + here: ðŸ |<--| ˜€ + ->--\=substitute_overwrite,substitute_subject=😀,no_utf_check +Failed: error -74 at offset 2 in subject: capture group is not a valid UTF string + here: ðŸ |<--| ˜€ + +/(\d+)|(?=<)|(?<=>)|($\C^)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,utf,no_utf_check + 1234\=substitute_overwrite,substitute_subject=😀,no_utf_check + 1: <\x{1f600}:\x{1f600}> + <---\=substitute_overwrite,substitute_subject=😀,no_utf_check + 1: <:>\x{1f600} + --->\=substitute_overwrite,substitute_subject=😀,no_utf_check + 1: \x{1f600}<:> + --34\=substitute_overwrite,substitute_subject=😀,no_utf_check + 1: \x{f0}\x{9f}<\x{98}\x{80}:\x{98}\x{80}> + ->--\=substitute_overwrite,substitute_subject=😀,no_utf_check + 1: \x{f0}\x{9f}<:>\x{98}\x{80} + +/(\d+)|(?=<)|(?<=>)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + 1234\=substitute_overwrite,substitute_subject=😀,no_utf_check + 1: <\x{1f600}:\x{1f600}> + <---\=substitute_overwrite,substitute_subject=😀,no_utf_check + 1: <:>\x{1f600} + --->\=substitute_overwrite,substitute_subject=😀,no_utf_check + 1: \x{1f600}<:> + --34\=substitute_overwrite,substitute_subject=😀,no_utf_check +Failed: error -74 at offset 2 in subject: capture group is not a valid UTF string + here: ðŸ |<--| ˜€ + ->--\=substitute_overwrite,substitute_subject=😀,no_utf_check +Failed: error -74 at offset 2 in subject: capture group is not a valid UTF string + here: ðŸ |<--| ˜€ + + +/(\d+)|(?=<)|(?<=>)|(^\C$)/replace=<$&:$1>,substitute_unset_empty,substitute_matched,match_invalid_utf + 1234\=substitute_overwrite,substitute_subject=😀,no_utf_check + 1: <\x{1f600}:\x{1f600}> + <---\=substitute_overwrite,substitute_subject=😀,no_utf_check + 1: <:>\x{1f600} + --->\=substitute_overwrite,substitute_subject=😀,no_utf_check + 1: \x{1f600}<:> + --34\=substitute_overwrite,substitute_subject=😀,no_utf_check + 1: \x{f0}\x{9f}<\x{98}\x{80}:\x{98}\x{80}> + ->--\=substitute_overwrite,substitute_subject=😀,no_utf_check + 1: \x{f0}\x{9f}<:>\x{98}\x{80}