Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions RunTest
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,8 @@ while [ $# -gt 0 ] ; do
malloc|-malloc) malloc=yes;;
nojit|-nojit) nojit=yes;;
sim|-sim) shift; sim=$1;;
valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --smc-check=all-non-file --error-exitcode=70";;
valgrind-log|-valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=no --error-limit=no --smc-check=all-non-file --log-file=report.%p ";;
valgrind|-valgrind) valgrind="valgrind --tool=memcheck -q --leak-check=yes --errors-for-leak-kinds=all --smc-check=all-non-file --error-exitcode=70";;
valgrind-log|-valgrind-log) valgrind="valgrind --tool=memcheck --num-callers=30 --leak-check=yes --errors-for-leak-kinds=all --error-limit=no --smc-check=all-non-file --log-file=report.%p ";;
~*)
if expr "$1" : '~[0-9][0-9]*$' >/dev/null; then
skip="$skip `expr "$1" : '~\([0-9]*\)*$'`"
Expand Down
4 changes: 4 additions & 0 deletions src/pcre2.h.generic
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,10 @@ released, the numbers must not be changed. */
#define PCRE2_ERROR_JIT_UNSUPPORTED (-68)
#define PCRE2_ERROR_REPLACECASE (-69)
#define PCRE2_ERROR_TOOLARGEREPLACE (-70)
#define PCRE2_ERROR_DIFFERENT_SUBJECT (-71)
#define PCRE2_ERROR_DIFFERENT_LENGTH (-72)
#define PCRE2_ERROR_DIFFERENT_OFFSET (-73)
#define PCRE2_ERROR_BADUTFCAPTURE (-74)


/* Request types for pcre2_pattern_info() */
Expand Down
4 changes: 4 additions & 0 deletions src/pcre2.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,10 @@ released, the numbers must not be changed. */
#define PCRE2_ERROR_JIT_UNSUPPORTED (-68)
#define PCRE2_ERROR_REPLACECASE (-69)
#define PCRE2_ERROR_TOOLARGEREPLACE (-70)
#define PCRE2_ERROR_DIFFERENT_SUBJECT (-71)
#define PCRE2_ERROR_DIFFERENT_LENGTH (-72)
#define PCRE2_ERROR_DIFFERENT_OFFSET (-73)
#define PCRE2_ERROR_BADUTFCAPTURE (-74)


/* Request types for pcre2_pattern_info() */
Expand Down
79 changes: 40 additions & 39 deletions src/pcre2_dfa_match.c
Original file line number Diff line number Diff line change
Expand Up @@ -615,8 +615,7 @@ if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
{
if (current_subject <= start_subject) break;
current_subject--;
ACROSSCHAR(current_subject > start_subject, current_subject,
current_subject--);
BACKCHARTEST(current_subject, start_subject);
}
}
else
Expand Down Expand Up @@ -3386,46 +3385,61 @@ rws->next = NULL;
rws->size = RWS_BASE_SIZE;
rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE;

/* Recognize NULL, length 0 as an empty string. */

if (subject == NULL && length == 0) subject = null_str;

/* Plausibility checks */
if (match_data == NULL) return PCRE2_ERROR_NULL;

if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION;
if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL)
return PCRE2_ERROR_NULL;
/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
free the memory that was obtained. */
if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
{
match_data->memctl.free((void *)match_data->subject,
match_data->memctl.memory_data);
match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
}

/* store data needed by pcre2_substitute */
match_data->subject = match_data->original_subject = subject;
if (length == PCRE2_ZERO_TERMINATED)
{
length = PRIV(strlen)(subject);
was_zero_terminated = 1;
}
match_data->subject_length = length;
match_data->start_offset = start_offset;


if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE;
if (start_offset > length) return PCRE2_ERROR_BADOFFSET;
/* Recognize NULL, length 0 as an empty string. */

if (subject == NULL && length == 0) subject = null_str;

/* Plausibility checks */

if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return match_data->rc = PCRE2_ERROR_BADOPTION;
if (re == NULL || subject == NULL || workspace == NULL) return match_data->rc = PCRE2_ERROR_NULL;

if (wscount < 20) return match_data->rc = PCRE2_ERROR_DFA_WSSIZE;
if (start_offset > length) return match_data->rc = PCRE2_ERROR_BADOFFSET;

/* Partial matching and PCRE2_ENDANCHORED are currently not allowed at the same
time. */

if ((options & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) != 0 &&
((re->overall_options | options) & PCRE2_ENDANCHORED) != 0)
return PCRE2_ERROR_BADOPTION;
return match_data->rc = PCRE2_ERROR_BADOPTION;

/* Invalid UTF support is not available for DFA matching. */

if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)
return PCRE2_ERROR_DFA_UINVALID_UTF;
return match_data->rc = PCRE2_ERROR_DFA_UINVALID_UTF;

/* Check that the first field in the block is the magic number. If it is not,
return with PCRE2_ERROR_BADMAGIC. */

if (re->magic_number != MAGIC_NUMBER) return PCRE2_ERROR_BADMAGIC;
if (re->magic_number != MAGIC_NUMBER) return match_data->rc = PCRE2_ERROR_BADMAGIC;

/* Check the code unit width. */

if ((re->flags & PCRE2_MODE_MASK) != PCRE2_CODE_UNIT_WIDTH/8)
return PCRE2_ERROR_BADMODE;
return match_data->rc = PCRE2_ERROR_BADMODE;

/* PCRE2_NOTEMPTY and PCRE2_NOTEMPTY_ATSTART are match-time flags in the
options variable for this function. Users of PCRE2 who are not calling the
Expand All @@ -3452,7 +3466,7 @@ if ((options & PCRE2_DFA_RESTART) != 0)
{
if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
workspace[1] > (int)((wscount - 2)/INTS_PER_STATEBLOCK))
return PCRE2_ERROR_DFA_BADRESTART;
return match_data->rc = PCRE2_ERROR_DFA_BADRESTART;
}

/* Set some local values */
Expand Down Expand Up @@ -3500,7 +3514,7 @@ else
if (mcontext->offset_limit != PCRE2_UNSET)
{
if ((re->overall_options & PCRE2_USE_OFFSET_LIMIT) == 0)
return PCRE2_ERROR_BADOFFSETLIMIT;
return match_data->rc = PCRE2_ERROR_BADOFFSETLIMIT;
bumpalong_limit = subject + mcontext->offset_limit;
}
mb->callout = mcontext->callout;
Expand Down Expand Up @@ -3569,7 +3583,7 @@ switch(re->newline_convention)

default:
PCRE2_DEBUG_UNREACHABLE();
return PCRE2_ERROR_INTERNAL;
return match_data->rc = PCRE2_ERROR_INTERNAL;
}

/* Check a UTF string for validity if required. For 8-bit and 16-bit strings,
Expand All @@ -3590,7 +3604,7 @@ if (utf && (options & PCRE2_NO_UTF_CHECK) == 0)
#if PCRE2_CODE_UNIT_WIDTH != 32
unsigned int i;
if (start_match < end_subject && NOT_FIRSTCU(*start_match))
return PCRE2_ERROR_BADUTFOFFSET;
return match_data->rc = PCRE2_ERROR_BADUTFOFFSET;
for (i = re->max_lookbehind; i > 0 && check_subject > subject; i--)
{
check_subject--;
Expand Down Expand Up @@ -3667,20 +3681,9 @@ if ((re->flags & PCRE2_LASTSET) != 0)
}
}

/* If the match data block was previously used with PCRE2_COPY_MATCHED_SUBJECT,
free the memory that was obtained. */

if ((match_data->flags & PCRE2_MD_COPIED_SUBJECT) != 0)
{
match_data->memctl.free((void *)match_data->subject,
match_data->memctl.memory_data);
match_data->flags &= ~PCRE2_MD_COPIED_SUBJECT;
}

/* Fill in fields that are always returned in the match data. */

match_data->code = re;
match_data->subject = NULL; /* Default for no match */
match_data->mark = NULL;
match_data->matchedby = PCRE2_MATCHEDBY_DFA_INTERPRETER;

Expand Down Expand Up @@ -3718,7 +3721,7 @@ for (;;)
while (t < end_subject && !IS_NEWLINE(t))
{
t++;
ACROSSCHAR(t < end_subject, t, t++);
FORWARDCHARTEST(t, end_subject);
}
}
else
Expand Down Expand Up @@ -3863,7 +3866,7 @@ for (;;)
while (start_match < end_subject && !WAS_NEWLINE(start_match))
{
start_match++;
ACROSSCHAR(start_match < end_subject, start_match, start_match++);
FORWARDCHARTEST(start_match, end_subject);
}
}
else
Expand Down Expand Up @@ -4039,8 +4042,6 @@ for (;;)
match_data->ovector[0] = (PCRE2_SIZE)(start_match - subject);
match_data->ovector[1] = (PCRE2_SIZE)(end_subject - subject);
}
match_data->subject_length = length;
match_data->start_offset = start_offset;
match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject);
match_data->rightchar = (PCRE2_SIZE)(mb->last_used_ptr - subject);
match_data->startchar = (PCRE2_SIZE)(start_match - subject);
Expand All @@ -4051,13 +4052,13 @@ for (;;)
length = CU2BYTES(length + was_zero_terminated);
match_data->subject = match_data->memctl.malloc(length,
match_data->memctl.memory_data);
if (match_data->subject == NULL) return PCRE2_ERROR_NOMEMORY;
if (match_data->subject == NULL) return match_data->rc = PCRE2_ERROR_NOMEMORY;
memcpy((void *)match_data->subject, subject, length);
match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
}
else
{
if (rc >= 0 || rc == PCRE2_ERROR_PARTIAL) match_data->subject = subject;
if (rc >= 0 || rc == PCRE2_ERROR_PARTIAL) match_data->subject = subject == null_str ? NULL : subject;
}
goto EXIT;
}
Expand All @@ -4070,7 +4071,7 @@ for (;;)
#ifdef SUPPORT_UNICODE
if (utf)
{
ACROSSCHAR(start_match < end_subject, start_match, start_match++);
FORWARDCHARTEST(start_match, end_subject);
}
#endif
if (start_match > end_subject) break;
Expand Down Expand Up @@ -4101,7 +4102,7 @@ while (rws->next != NULL)
mb->memctl.free(next, mb->memctl.memory_data);
}

return rc;
return match_data->rc = rc;
}

/* These #undefs are here to enable unity builds with CMake. */
Expand Down
4 changes: 4 additions & 0 deletions src/pcre2_error.c
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,10 @@ static const unsigned char match_error_texts[] =
"error performing replacement case transformation\0"
/* 70 */
"replacement too large (longer than PCRE2_SIZE)\0"
"substitute subject differs from prior pcre2_match call\0"
"substitute subject length differs from prior pcre2_match call\0"
"substitute start offset differs from prior pcre2_match call\0"
"capture group is not a valid UTF string\0"
;


Expand Down
20 changes: 6 additions & 14 deletions src/pcre2_intmodedep.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,8 @@ pcre2_printint_inc.h file). We undefine them here so that they can be re-defined
multiple inclusions. Not all of these are used in pcre2test, but it's easier
just to undefine them all. */

#undef ACROSSCHAR
#undef BACKCHAR
#undef BACKCHARTEST
#undef BYTES2CU
#undef CHMAX_255
#undef CU2BYTES
Expand Down Expand Up @@ -274,9 +274,9 @@ UTF support is omitted, we don't even define them. */
#define PUTCHAR(c, p) (*p = c, 1)
/* #define GETCHARLENTEST(c, eptr, len) */
/* #define BACKCHAR(eptr) */
/* #define BACKCHARTEST(eptr,start) */
/* #define FORWARDCHAR(eptr) */
/* #define FORWARCCHARTEST(eptr,end) */
/* #define ACROSSCHAR(condition, eptr, action) */

#else /* SUPPORT_UNICODE */

Expand Down Expand Up @@ -351,15 +351,12 @@ it is. This is called only in UTF-8 mode - we don't put a test within the macro
because almost all calls are already within a block of UTF-8 only code. */

#define BACKCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr--
#define BACKCHARTEST(eptr,start) while(eptr > start && (*eptr & 0xc0u) == 0x80u) eptr--

/* Same as above, just in the other direction. */
#define FORWARDCHAR(eptr) while((*eptr & 0xc0u) == 0x80u) eptr++
#define FORWARDCHARTEST(eptr,end) while(eptr < end && (*eptr & 0xc0u) == 0x80u) eptr++

/* Same as above, but it allows a fully customizable form. */
#define ACROSSCHAR(condition, eptr, action) \
while((condition) && ((*eptr) & 0xc0u) == 0x80u) action

/* Deposit a character into memory, returning the number of code units. */

#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
Expand Down Expand Up @@ -457,15 +454,12 @@ macro because almost all calls are already within a block of UTF-16 only
code. */

#define BACKCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr--
#define BACKCHARTEST(eptr,start) if (eptr > start && (*eptr & 0xfc00u) == 0xdc00u) eptr--

/* Same as above, just in the other direction. */
#define FORWARDCHAR(eptr) if ((*eptr & 0xfc00u) == 0xdc00u) eptr++
#define FORWARDCHARTEST(eptr,end) if (eptr < end && (*eptr & 0xfc00u) == 0xdc00u) eptr++

/* Same as above, but it allows a fully customizable form. */
#define ACROSSCHAR(condition, eptr, action) \
if ((condition) && ((*eptr) & 0xfc00u) == 0xdc00u) action

/* Deposit a character into memory, returning the number of code units. */

#define PUTCHAR(c, p) ((utf && c > MAX_UTF_SINGLE_CU)? \
Expand Down Expand Up @@ -530,16 +524,13 @@ code.
These are all no-ops since all UTF-32 characters fit into one PCRE2_UCHAR. */

#define BACKCHAR(eptr) do { } while (0)
#define BACKCHARTEST(eptr,start) do { } while (0)

/* Same as above, just in the other direction. */

#define FORWARDCHAR(eptr) do { } while (0)
#define FORWARDCHARTEST(eptr,end) do { } while (0)

/* Same as above, but it allows a fully customizable form. */

#define ACROSSCHAR(condition, eptr, action) do { } while (0)

/* Deposit a character into memory, returning the number of code units. */

#define PUTCHAR(c, p) (*p = c, 1)
Expand Down Expand Up @@ -676,6 +667,7 @@ typedef struct pcre2_real_match_data {
pcre2_memctl memctl; /* Memory control fields */
const pcre2_real_code *code; /* The pattern used for the match */
PCRE2_SPTR subject; /* The subject that was matched */
PCRE2_SPTR original_subject; /* the pointer that was actually passed to pcre2_match */
PCRE2_SPTR mark; /* Pointer to last mark */
struct heapframe *heapframes; /* Backtracking frames heap memory */
PCRE2_SIZE heapframes_size; /* Malloc-ed size */
Expand Down
13 changes: 7 additions & 6 deletions src/pcre2_jit_match_inc.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,8 @@ pcre2_jit_match(const pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length,
(void)length;
(void)start_offset;
(void)options;
(void)match_data;
(void)mcontext;
return PCRE2_ERROR_JIT_BADOPTION;
return match_data->rc = PCRE2_ERROR_JIT_BADOPTION;

#else /* SUPPORT_JIT */

Expand All @@ -118,13 +117,18 @@ jit_arguments arguments;
int rc;
int index = 0;

/* store data needed by pcre2_substitute */
match_data->subject = match_data->original_subject = subject;
match_data->subject_length = length;
match_data->start_offset = start_offset;

if ((options & PCRE2_PARTIAL_HARD) != 0)
index = 2;
else if ((options & PCRE2_PARTIAL_SOFT) != 0)
index = 1;

if (functions == NULL || functions->executable_funcs[index] == NULL)
return PCRE2_ERROR_JIT_BADOPTION;
return match_data->rc = PCRE2_ERROR_JIT_BADOPTION;

/* Sanity checks should be handled by pcre2_match. */
arguments.str = subject + start_offset;
Expand Down Expand Up @@ -176,9 +180,6 @@ else
if (rc > (int)oveccount)
rc = 0;
match_data->code = re;
match_data->subject = (rc >= 0 || rc == PCRE2_ERROR_PARTIAL)? subject : NULL;
match_data->subject_length = length;
match_data->start_offset = start_offset;
match_data->rc = rc;
match_data->startchar = arguments.startchar_ptr - subject;
match_data->leftchar = 0;
Expand Down
Loading