From 9af0992e597fbc6ffdf7e240d0ea20fc17336c12 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 27 Sep 2025 18:46:30 -0600 Subject: [PATCH 01/10] embed.fnc: Change NULLOK to NN for several functions Inspection showed these were wrongly categorized --- embed.fnc | 8 ++++---- proto.h | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/embed.fnc b/embed.fnc index 6c4412df7d63..5f3c2db4b7fb 100644 --- a/embed.fnc +++ b/embed.fnc @@ -3687,26 +3687,26 @@ Cp |UV |to_uni_upper |UV c \ |NN STRLEN *lenp Cp |UV |to_utf8_fold_flags_ \ |NN const U8 *p \ - |NULLOK const U8 *e \ + |NN const U8 *e \ |NN U8 *ustrp \ |NULLOK STRLEN *lenp \ |U8 flags Cp |UV |to_utf8_lower_flags_ \ |NN const U8 *p \ - |NULLOK const U8 *e \ + |NN const U8 *e \ |NN U8 *ustrp \ |NULLOK STRLEN *lenp \ |bool flags Cp |UV |to_utf8_title_flags_ \ |NN const U8 *p \ - |NULLOK const U8 *e \ + |NN const U8 *e \ |NN U8 *ustrp \ |NULLOK STRLEN *lenp \ |bool flags Cp |UV |to_utf8_upper_flags_ \ |NN const U8 *p \ - |NULLOK const U8 *e \ + |NN const U8 *e \ |NN U8 *ustrp \ |NULLOK STRLEN *lenp \ |bool flags diff --git a/proto.h b/proto.h index fca80a5ef80d..5302f6977f21 100644 --- a/proto.h +++ b/proto.h @@ -5296,22 +5296,22 @@ Perl_to_uni_upper(pTHX_ UV c, U8 *p, STRLEN *lenp); PERL_CALLCONV UV Perl_to_utf8_fold_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, U8 flags); #define PERL_ARGS_ASSERT_TO_UTF8_FOLD_FLAGS_ \ - assert(p); assert(ustrp) + assert(p); assert(e); assert(ustrp) PERL_CALLCONV UV Perl_to_utf8_lower_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, bool flags); #define PERL_ARGS_ASSERT_TO_UTF8_LOWER_FLAGS_ \ - assert(p); assert(ustrp) + assert(p); assert(e); assert(ustrp) PERL_CALLCONV UV Perl_to_utf8_title_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, bool flags); #define PERL_ARGS_ASSERT_TO_UTF8_TITLE_FLAGS_ \ - assert(p); assert(ustrp) + assert(p); assert(e); assert(ustrp) PERL_CALLCONV UV Perl_to_utf8_upper_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, bool flags); #define PERL_ARGS_ASSERT_TO_UTF8_UPPER_FLAGS_ \ - assert(p); assert(ustrp) + assert(p); assert(e); assert(ustrp) PERL_CALLCONV bool Perl_try_amagic_bin(pTHX_ int method, int flags); From 395d2fe5648256654775ea260ebc1e1f4bd33809 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 7 Oct 2025 12:19:56 -0600 Subject: [PATCH 02/10] Move some ARGS_ASSERT macros to function start Historically the asserts had to be placed after any declarations because of limitations in the C89 Standard that have been removed in C99 which we are now following. Placing the assertions at the function beginning is clearer, and stops any issues with code below using a variable prior to its assertion. --- regexec.c | 29 +++++++++++------------------ toke.c | 17 +++++++---------- utf8.c | 37 +++++++++++++++++-------------------- util.c | 19 +++++++++---------- 4 files changed, 44 insertions(+), 58 deletions(-) diff --git a/regexec.c b/regexec.c index cbb22eace0a1..b0e7dde98c14 100644 --- a/regexec.c +++ b/regexec.c @@ -626,14 +626,13 @@ S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character, const U8* e) STATIC U8 * S_find_span_end(U8 * s, const U8 * send, const U8 span_byte) { + PERL_ARGS_ASSERT_FIND_SPAN_END; + assert(send >= s); + /* Returns the position of the first byte in the sequence between 's' and * 'send-1' inclusive that isn't 'span_byte'; returns 'send' if none found. * */ - PERL_ARGS_ASSERT_FIND_SPAN_END; - - assert(send >= s); - if ((STRLEN) (send - s) >= PERL_WORDSIZE + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s) - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK)) @@ -700,16 +699,15 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte) STATIC U8 * S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask) { + PERL_ARGS_ASSERT_FIND_NEXT_MASKED; + assert(send >= s); + assert((byte & mask) == byte); + /* Returns the position of the first byte in the sequence between 's' * and 'send-1' inclusive that when ANDed with 'mask' yields 'byte'; * returns 'send' if none found. It uses word-level operations instead of * byte to speed up the process */ - PERL_ARGS_ASSERT_FIND_NEXT_MASKED; - - assert(send >= s); - assert((byte & mask) == byte); - #ifndef EBCDIC if ((STRLEN) (send - s) >= PERL_WORDSIZE @@ -778,17 +776,16 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask) STATIC U8 * S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask) { + PERL_ARGS_ASSERT_FIND_SPAN_END_MASK; + assert(send >= s); + assert((span_byte & mask) == span_byte); + /* Returns the position of the first byte in the sequence between 's' and * 'send-1' inclusive that when ANDed with 'mask' isn't 'span_byte'. * 'span_byte' should have been ANDed with 'mask' in the call of this * function. Returns 'send' if none found. Works like find_span_end(), * except for the AND */ - PERL_ARGS_ASSERT_FIND_SPAN_END_MASK; - - assert(send >= s); - assert((span_byte & mask) == span_byte); - if ((STRLEN) (send - s) >= PERL_WORDSIZE + PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s) - (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK)) @@ -11793,10 +11790,6 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target) bool retval = true; SCX_enum * ret_script = NULL; - assert(send >= s); - - PERL_ARGS_ASSERT_ISSCRIPT_RUN; - /* All code points in 0..255 are either Common or Latin, so must be a * script run. We can return immediately unless we need to know which * script it is. */ diff --git a/toke.c b/toke.c index 439a4154ffd6..09067b1550ba 100644 --- a/toke.c +++ b/toke.c @@ -1886,16 +1886,15 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash) STATIC void S_incline(pTHX_ const char *s, const char *end) { + PERL_ARGS_ASSERT_INCLINE; + assert(end >= s); + const char *t; const char *n; const char *e; line_t line_num; UV uv; - PERL_ARGS_ASSERT_INCLINE; - - assert(end >= s); - COPLINE_INC_WITH_HERELINES; if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL && s+1 == PL_bufend && *s == ';') { @@ -2855,6 +2854,10 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s, const bool is_utf8, const char ** error_msg) { + PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; + assert(e >= s); + assert(s > (char *) 3); + /* points to first character of interior of \N{}, to one beyond the * interior, hence to the "}". Finds what the name resolves to, returning * an SV* containing it; NULL if no valid one found. @@ -2875,12 +2878,6 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s, const char* context = s - 3; STRLEN context_len = e - context + 1; /* include all of \N{...} */ - - PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; - - assert(e >= s); - assert(s > (char *) 3); - while (s < e && isBLANK(*s)) { s++; } diff --git a/utf8.c b/utf8.c index 4d8106d2ef1a..9aa8c0ece519 100644 --- a/utf8.c +++ b/utf8.c @@ -724,6 +724,10 @@ S_does_utf8_overflow(const U8 * const s, const U8 * e) STRLEN Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags) { + PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_; + assert(e > s); + assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE)); + SSize_t len, full_len; /* An internal helper function. @@ -751,12 +755,6 @@ Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags) * determined with just the first one or two bytes. * */ - - PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_; - - assert(e > s); - assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE)); - full_len = UTF8SKIP(s); len = e - s; @@ -841,6 +839,10 @@ Size_t Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e, const bool require_partial) { + PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_; + assert(s0 < e); + assert(*s0 == I8_TO_NATIVE_UTF8(0xFF)); + /* This is called to determine if the UTF-8 sequence starting at s0 and * continuing for up to one full character of bytes, but looking no further * than 'e - 1', is legal. *s0 must be 0xFF (or whatever the native @@ -867,11 +869,6 @@ Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e, const U8 *s = s0 + 1; const U8 *send = e; - PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_; - - assert(s0 < e); - assert(*s0 == I8_TO_NATIVE_UTF8(0xFF)); - send = s + MIN(UTF8_MAXBYTES - 1, e - s); while (s < send) { if (! UTF8_IS_CONTINUATION(*s)) { @@ -4247,6 +4244,9 @@ STATIC UV S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e, U8 * ustrp, STRLEN *lenp) { + PERL_ARGS_ASSERT_TURKIC_FC; + assert(e > p); + /* Returns 0 if the foldcase of the input UTF-8 encoded sequence from * p0..e-1 according to Turkic rules is the same as for non-Turkic. * Otherwise, it returns the first code point of the Turkic foldcased @@ -4257,9 +4257,6 @@ S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e, * I WITH DOT ABOVE form a case pair, as do 'I' and LATIN SMALL LETTER * DOTLESS I */ - PERL_ARGS_ASSERT_TURKIC_FC; - assert(e > p); - if (UNLIKELY(*p == 'I')) { *lenp = 2; ustrp[0] = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I); @@ -4282,15 +4279,15 @@ STATIC UV S_turkic_lc(pTHX_ const U8 * const p0, const U8 * const e, U8 * ustrp, STRLEN *lenp) { + PERL_ARGS_ASSERT_TURKIC_LC; + assert(e > p0); + /* Returns 0 if the lowercase of the input UTF-8 encoded sequence from * p0..e-1 according to Turkic rules is the same as for non-Turkic. * Otherwise, it returns the first code point of the Turkic lowercased * sequence, and the entire sequence will be stored in *ustrp. ustrp will * contain *lenp bytes */ - PERL_ARGS_ASSERT_TURKIC_LC; - assert(e > p0); - /* A 'I' requires context as to what to do */ if (UNLIKELY(*p0 == 'I')) { const U8 * p = p0 + 1; @@ -4328,6 +4325,9 @@ STATIC UV S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e, U8 * ustrp, STRLEN *lenp) { + PERL_ARGS_ASSERT_TURKIC_UC; + assert(e > p); + /* Returns 0 if the upper or title-case of the input UTF-8 encoded sequence * from p0..e-1 according to Turkic rules is the same as for non-Turkic. * Otherwise, it returns the first code point of the Turkic upper or @@ -4338,9 +4338,6 @@ S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e, * I WITH DOT ABOVE form a case pair, as do 'I' and LATIN SMALL LETTER * DOTLESS I */ - PERL_ARGS_ASSERT_TURKIC_UC; - assert(e > p); - if (*p == 'i') { *lenp = 2; ustrp[0] = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE); diff --git a/util.c b/util.c index f052a6610c04..e0849d315916 100644 --- a/util.c +++ b/util.c @@ -612,13 +612,13 @@ Perl_delimcpy_no_escape(char *to, const char *to_end, const char *from, const char *from_end, const int delim, I32 *retlen) { + PERL_ARGS_ASSERT_DELIMCPY_NO_ESCAPE; + const char * delim_pos; ptrdiff_t from_len = from_end - from; ptrdiff_t to_len = to_end - to; SSize_t copy_len; - PERL_ARGS_ASSERT_DELIMCPY_NO_ESCAPE; - assert(from_len >= 0); assert(to_len >= 0); @@ -717,14 +717,14 @@ Perl_delimcpy(char *to, const char *to_end, const char *from, const char *from_end, const int delim, I32 *retlen) { - const char * const orig_to = to; - ptrdiff_t copy_len = 0; - bool stopped_early = FALSE; /* Ran out of room to copy to */ - PERL_ARGS_ASSERT_DELIMCPY; assert(from_end >= from); assert(to_end >= to); + const char * const orig_to = to; + ptrdiff_t copy_len = 0; + bool stopped_early = FALSE; /* Ran out of room to copy to */ + /* Don't use the loop for the trivial case of the first character being the * delimiter; otherwise would have to worry inside the loop about backing * up before the start of 'from' */ @@ -1099,6 +1099,9 @@ a littlestr of "ab\n", SvTAIL matches as: char * Perl_fbm_instr(pTHX_ unsigned char *big, unsigned char *bigend, SV *littlestr, U32 flags) { + PERL_ARGS_ASSERT_FBM_INSTR; + assert(bigend >= big); + unsigned char *s; STRLEN l; const unsigned char *little = (const unsigned char *)SvPV_const(littlestr,l); @@ -1107,10 +1110,6 @@ Perl_fbm_instr(pTHX_ unsigned char *big, unsigned char *bigend, SV *littlestr, U bool valid = SvVALID(littlestr); bool tail = valid ? cBOOL(SvTAIL(littlestr)) : FALSE; - PERL_ARGS_ASSERT_FBM_INSTR; - - assert(bigend >= big); - if ((STRLEN)(bigend - big) < littlelen) { if ( tail && ((STRLEN)(bigend - big) == littlelen - 1) From fd095094c23e9381af2818b3f66db4c13435f598 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 27 Sep 2025 16:29:48 -0600 Subject: [PATCH 03/10] regen/embed.pl: Remove redundant push --- regen/embed.pl | 2 -- 1 file changed, 2 deletions(-) diff --git a/regen/embed.pl b/regen/embed.pl index 7254be71b47a..e427fa6f8c5e 100755 --- a/regen/embed.pl +++ b/regen/embed.pl @@ -321,8 +321,6 @@ sub generate_proto_h { warn "$func: $arg should not have NZ\n" if $nz; } - push( @nonnull, $n ) if $nn; - # Make sure each arg has at least a type and a var name. # An arg of "int" is valid C, but want it to be "int foo". my $argtype = ( $arg =~ m/^(\w+(?:\s*\*+)?)/ )[0]; From 1998ffdcdbc4739e821e88f22081583fb4c0134c Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 27 Sep 2025 16:33:03 -0600 Subject: [PATCH 04/10] regen/embed.pl: Comments/white-space Wrap to fit in 80 column terminal window --- regen/embed.pl | 105 ++++++++++++++++++++++++++++++------------------- 1 file changed, 64 insertions(+), 41 deletions(-) diff --git a/regen/embed.pl b/regen/embed.pl index e427fa6f8c5e..adbaf15ab380 100755 --- a/regen/embed.pl +++ b/regen/embed.pl @@ -182,11 +182,13 @@ sub generate_proto_h { if $flags =~ tr/Sp// > 1; if ($has_mflag) { if ($flags =~ /S/) { - die_at_end "$plain_func: m and S flags are mutually exclusive"; + die_at_end + "$plain_func: m and S flags are mutually exclusive"; } } else { - die_at_end "$plain_func: u flag only usable with m" if $flags =~ /u/; + die_at_end "$plain_func: u flag only usable with m" + if $flags =~ /u/; } my ($static_flag, @extra_static_flags)= $flags =~/([SsIi])/g; @@ -194,7 +196,8 @@ sub generate_proto_h { if (@extra_static_flags) { my $flags_str = join ", ", $static_flag, @extra_static_flags; $flags_str =~ s/, (\w)\z/ and $1/; - die_at_end "$plain_func: flags $flags_str are mutually exclusive\n"; + die_at_end + "$plain_func: flags $flags_str are mutually exclusive\n"; } my $static_inline = 0; @@ -217,7 +220,8 @@ sub generate_proto_h { }->{$static_flag}; } $retval = "$type $retval"; - die_at_end "Don't declare static function '$plain_func' pure" if $flags =~ /P/; + die_at_end "Don't declare static function '$plain_func' pure" + if $flags =~ /P/; $static_inline = $type =~ /^PERL_STATIC(?:_FORCE)?_INLINE/; } else { @@ -243,26 +247,26 @@ sub generate_proto_h { if $flags =~ /M/ && $flags !~ /p/; my $C_required_flags = '[pIimbs]'; die_at_end - "For '$plain_func', C flag requires one of $C_required_flags] flags" - if $flags =~ /C/ - && ($flags !~ /$C_required_flags/ + "For '$plain_func', C flag requires one of $C_required_flags] flags" + if $flags =~ /C/ + && ($flags !~ /$C_required_flags/ - # Notwithstanding the - # above, if the name won't - # clash with a user name, - # it's ok. - && $plain_func !~ /^[Pp]erl/); + # Notwithstanding the + # above, if the name won't + # clash with a user name, + # it's ok. + && $plain_func !~ /^[Pp]erl/); die_at_end "For '$plain_func', X flag requires one of [Iip] flags" - if $flags =~ /X/ && $flags !~ /[Iip]/; + if $flags =~ /X/ && $flags !~ /[Iip]/; die_at_end "For '$plain_func', X and m flags are mutually exclusive" if $flags =~ /X/ && $has_mflag; die_at_end "For '$plain_func', [Ii] with [ACX] requires p flag" - if $flags =~ /[Ii]/ && $flags =~ /[ACX]/ && $flags !~ /p/; + if $flags =~ /[Ii]/ && $flags =~ /[ACX]/ && $flags !~ /p/; die_at_end "For '$plain_func', b and m flags are mutually exclusive" . " (try M flag)" if $flags =~ /b/ && $has_mflag; die_at_end "For '$plain_func', b flag without M flag requires D flag" - if $flags =~ /b/ && $flags !~ /M/ && $flags !~ /D/; + if $flags =~ /b/ && $flags !~ /M/ && $flags !~ /D/; die_at_end "For '$plain_func', I and i flags are mutually exclusive" if $flags =~ tr/Ii// > 1; @@ -273,8 +277,9 @@ sub generate_proto_h { $ret .= @$args ? "pTHX_ " : "pTHX"; } if (@$args) { - die_at_end "$plain_func: n flag is contradicted by having arguments" - if $flags =~ /n/; + die_at_end + "$plain_func: n flag is contradicted by having arguments" + if $flags =~ /n/; my $n; for my $arg ( @$args ) { ++$n; @@ -302,8 +307,11 @@ sub generate_proto_h { $arg =~ s/\s+$//; $arg =~ s/\s{2,}/ /g; - die_at_end ":$func: $arg Use only one of NN, NULLOK, and NZ" - if 0 + $nn + $nz + $nullok > 1; + # Note that we don't care if you say e.g., 'NN' multiple + # times + die_at_end + ":$func: $arg Use only one of NN, NULLOK, and NZ" + if 0 + $nn + $nz + $nullok > 1; push( @nonnull, $n ) if $nn; @@ -345,13 +353,15 @@ sub generate_proto_h { && exists $type_asserts{$argtype}) { my $type_assert = - $type_asserts{$argtype} =~ s/__arg__/$argname/gr; - $type_assert = "!$argname || $type_assert" if $nullok; + $type_asserts{$argtype} =~ s/__arg__/$argname/gr; + $type_assert = "!$argname || $type_assert" + if $nullok; push @asserts, "assert($type_assert)"; } } - } - } + } # End of this argument + } # End of loop through all arguments + $ret .= join ", ", @$args; } else { @@ -403,7 +413,8 @@ sub generate_proto_h { $argc = 0; my @fmts = grep $args->[$_] =~ /\b(f|pat|fmt)$/, 0..$#$args; if (@fmts != 1) { - die "embed.pl: '$plain_func': can't determine pattern arg\n"; + die + "embed.pl: '$plain_func': can't determine pattern arg\n"; } $pat = $fmts[0] + 1; } @@ -411,7 +422,8 @@ sub generate_proto_h { ? '__attribute__format__' : '__attribute__format__null_ok__'; if ($plain_func =~ /strftime/) { - push @attrs, sprintf "%s(__strftime__,%s1,0)", $macro, $prefix; + push @attrs, sprintf "%s(__strftime__,%s1,0)", + $macro, $prefix; } else { push @attrs, sprintf "%s(__printf__,%s%d,%s)", $macro, @@ -504,7 +516,8 @@ sub generate_proto_h { # re-align defines so that the definitions line up at the 48th col # as much as possible. if ($line_data->{sub_type} eq "#define") { - $line_data->{line}=~s/^(\s*#\s*define\s+\S+?(?:\([^()]*\))?\s)(\s*)(\S+)/ + $line_data->{line} =~ + s/^(\s*#\s*define\s+\S+?(?:\([^()]*\))?\s)(\s*)(\S+)/ sprintf "%-48s%s", $1, $3/e; } }; @@ -580,7 +593,8 @@ sub embed_h { } my $level= $_->{level}; my $embed= $_->{embed} or next; - my ($flags,$retval,$func,$args) = @{$embed}{qw(flags return_type name args)}; + my ($flags,$retval,$func,$args) = + @{$embed}{qw(flags return_type name args)}; my $full_name = full_name($func, $flags); next if $full_name eq $func; # Don't output a no-op. @@ -636,10 +650,11 @@ sub embed_h { my $use_va_list = $argc && $args->[-1] =~ /\.\.\./; if($use_va_list) { - # CPP has trouble with empty __VA_ARGS__ and comma joining, - # so we'll have to eat an extra params here. + # CPP has trouble with empty __VA_ARGS__ and comma + # joining, so we'll have to eat an extra params here. if($argc < 2) { - die "Cannot use ... as the only parameter to a macro ($func)\n"; + die "Cannot use ... as the only parameter to a macro" + . " ($func)\n"; } $argc -= 2; } @@ -664,15 +679,19 @@ sub embed_h { } $ret .= ")\n"; if($has_compat_macro{$func}) { - # Make older ones available only when !MULTIPLICITY or PERL_CORE or PERL_WANT_VARARGS - # These should not be done uncondtionally because existing - # code might call e.g. warn() without aTHX in scope. - $ret = "#${ind}if !defined(MULTIPLICITY) || defined(PERL_CORE) || defined(PERL_WANT_VARARGS)\n" . - $ret . - "#${ind}endif\n"; + # Make older ones available only when !MULTIPLICITY or + # PERL_CORE or PERL_WANT_VARARGS These should not be done + # uncondtionally because existing code might call e.g. + # warn() without aTHX in scope. + $ret = "#${ind}if !defined(MULTIPLICITY)" + . " || defined(PERL_CORE)" + . " || defined(PERL_WANT_VARARGS)\n" + . $ret + . "#${ind}endif\n"; } } - $ret = "#${ind}ifndef NO_MATHOMS\n$ret#${ind}endif\n" if $flags =~ /b/; + $ret = "#${ind}ifndef NO_MATHOMS\n$ret#${ind}endif\n" + if $flags =~ /b/; } $lines .= $ret; } @@ -720,7 +739,7 @@ sub generate_embed_h { * disable them. */ # define sv_setptrobj(rv,ptr,name) sv_setref_iv(rv,name,PTR2IV(ptr)) - # define sv_setptrref(rv,ptr) sv_setref_iv(rv,NULL,PTR2IV(ptr)) + # define sv_setptrref(rv,ptr) sv_setref_iv(rv,NULL,PTR2IV(ptr)) #endif #if !defined(PERL_CORE) && !defined(PERL_NOCOMPAT) @@ -736,7 +755,8 @@ sub generate_embed_h { foreach (@$all) { my $embed= $_->{embed} or next; - my ($flags, $retval, $func, $args) = @{$embed}{qw(flags return_type name args)}; + my ($flags, $retval, $func, $args) = + @{$embed}{qw(flags return_type name args)}; next unless $flags =~ /O/; my $alist = join ",", @az[0..$#$args]; @@ -750,7 +770,9 @@ sub generate_embed_h { provides a set of compatibility functions that don't take an extra argument but grab the context pointer using the macro dTHX. */ - #if defined(MULTIPLICITY) && !defined(PERL_NO_SHORT_NAMES) && !defined(PERL_WANT_VARARGS) + #if defined(MULTIPLICITY) \ + && !defined(PERL_NO_SHORT_NAMES) \ + && !defined(PERL_WANT_VARARGS) END foreach (@have_compatibility_macros) { @@ -811,7 +833,8 @@ sub generate_embedvar_h { sub update_headers { my ($all, $api, $ext, $core) = setup_embed(); # see regen/embed_lib.pl generate_proto_h($all); - die_at_end "$unflagged_pointers pointer arguments to clean up\n" if $unflagged_pointers; + die_at_end "$unflagged_pointers pointer arguments to clean up\n" + if $unflagged_pointers; generate_embed_h($all, $api, $ext, $core); generate_embedvar_h(); die "$error_count errors found" if $error_count; From 6d8318d50e7db6672067c8456ed187fa8f408f7a Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 6 Oct 2025 06:15:18 -0600 Subject: [PATCH 05/10] embed.fnc: Comments only This: * reorders some comments to make more sense * makes minor clarifications * removes comments that no longer make sense * adds comments about moving ARGS_ASSERT macros to the top of their functions when the code around them gets changed anyway. --- embed.fnc | 57 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 19 deletions(-) diff --git a/embed.fnc b/embed.fnc index 5f3c2db4b7fb..8fe44fd1c9b2 100644 --- a/embed.fnc +++ b/embed.fnc @@ -158,10 +158,34 @@ : that performs parameter sanity validation. If the function is named : 'foo()', the generated macro will be named 'PERL_ARGS_ASSERT_FOO'. You : should place a call to that macro in foo() before any other code. It will -: automatically expand to whatever checking is currently generated for foo +: automatically expand to whatever checking is currently generated for 'foo' : (often none). These are in the form of assert() calls, so they are only : activated for DEBUGGING builds. : +: Currently, it is optional to include an empty ARGS_ASSERT macro in your +: functions. But a porting test enforces that a non-empty one does get +: included. The call should be at the top of your function so that the +: sanity checks have passed before anything tries to use an argument. When +: writing a new function, add the macro even if not required, and you'll +: never have to go back and add one later when more checks do get added. +: +: (Much of the perl core was written assuming the ARGS_ASSERT macro needed to +: be placed after any declarations because of the C89 Standard. That is no +: longer true with C99; feel free when modifying code in the vicinity to move +: this call to the very beginning of the function.) +: +: The contents of ARGS_ASSERT are determined by +: 1) constraints you give in this file. Each such constraint is +: positioned in the input between the '|' that marks the beginning of +: a parameter definition, and the the definition itself, like +: |NN const char * const name +: 2) the internal logic used by code that reads this file. +: 3) explicit asserts that you add in this file. +: +: Sections below give more details of each item. +: +: *** Pointer Parameter Constraints +: : You must specify what checking is needed for all pointer arguments. If the : pointer is allowed to point to NULL, prefix that argument with 'NULLOK' : (following the template of the many entries in this file that have that). @@ -169,17 +193,23 @@ : The reason for this requirement is to tell the maintainers that you have : considered the question about the argument, and this is the answer. : +: *** Non-pointer Parameter Constraints +: : For a numeric argument, you may specify that it can't be 0 by using 'NZ' : +: *** Automatically generated checks +: : regen/embed.pl may automatically add further checking for any argument as -: it deems desirable. You can override this by specifying 'NOCHECK' +: it deems desirable. You can disable this by specifying 'NOCHECK' for the +: parameter +: +: Currently this further checking is just for pointer parameters that point +: to AVs, CVs or HVs. The check is that the SV being pointed to is of the +: intended type, by inspecting its SvTYPE(). For some functions this check +: may be inappropriate, as in rare cases the arguments passed may not be of +: the correct type. As already mentioned, NOCHECK suppresses this check. : -: Currently this further checking is just for pointer parameters that -: point to AVs, CVs or HVs. The check is that the SV being pointed to is -: of the intended type, by inspecting its SvTYPE(). For some functions -: this check may be inappropriate, as in rare cases the arguments passed -: may not be of the correct type. As already mentioned, NOCHECK -: suppresses this check. +: *** Your custom checks : : You can specify your own checking beyond these by adding any number of : assert() calls to any given entry after its final argument. Whatever you @@ -189,19 +219,8 @@ : will make it less visible to a maintainer than keeping it in the function : it applies to : -: Currently, it is optional to include an empty ARGS_ASSERT macro in your -: functions. But a porting test enforces that a non-empty one is included. -: The call should be at the top of your function so that the sanity checks -: have passed before anything tries to use an argument. When writing a new -: function, add the macro even if not required, and you'll never have to go -: back and add one later when more checks do get added -: : AUTOMATIC DOCUMENTATION GENERATION : -: Just below is a description of the relevant parts of the automatic -: documentation generation system which heavily involves this file. Below that -: is a description of all the flags used in this file. -: : Scattered around the perl source are lines of the form: : : =for apidoc name ... From fcce41e17dfc10eb9836b921766fa16c9d60bace Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 6 Oct 2025 06:33:26 -0600 Subject: [PATCH 06/10] regen/embed.pl: Add ability to assert(s < e) Where s is a pointer into a string, and e is the end of it. --- autodoc.pl | 2 +- embed.fnc | 69 +++++++++++++++++--- regen/embed.pl | 174 +++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 232 insertions(+), 13 deletions(-) diff --git a/autodoc.pl b/autodoc.pl index 711246f15664..4ff15ff3f80c 100644 --- a/autodoc.pl +++ b/autodoc.pl @@ -620,7 +620,7 @@ sub check_and_add_proto_defn { $flags .= "n" if $flags =~ /#/; # No threads, arguments for #ifdef my @munged_args= $args_ref->@*; - s/\b(?:NN|NULLOK)\b\s+//g for @munged_args; + s/\b(?:NN|NULLOK|[SM]PTR|EPTRQ?)\b\s+//g for @munged_args; my $flags_sans_d = $flags; my $docs_expected = $flags_sans_d =~ s/d//g; diff --git a/embed.fnc b/embed.fnc index 8fe44fd1c9b2..6c97c56f28d7 100644 --- a/embed.fnc +++ b/embed.fnc @@ -182,20 +182,73 @@ : 2) the internal logic used by code that reads this file. : 3) explicit asserts that you add in this file. : -: Sections below give more details of each item. +: Sections below give more details of each item. For readability, +: constraints are split into two sections, one for pointer parameters, and +: one for the rest. : : *** Pointer Parameter Constraints : -: You must specify what checking is needed for all pointer arguments. If the -: pointer is allowed to point to NULL, prefix that argument with 'NULLOK' -: (following the template of the many entries in this file that have that). -: If it can't be NULL, use 'NN' (again many entries herein do that). -: The reason for this requirement is to tell the maintainers that you have -: considered the question about the argument, and this is the answer. +: Every pointer parameter must have a constraint; one of the following: +: +: NN means the called function is expecting this pointer parameter to be +: non-NULL, and likely is not equipped to handle it being NULL. +: NULLOK means the called function definitely can handle this parameter +: being NULL. The reason you need to specify this at all is to tell +: future maintainers that you have considered the question about the +: parameter, and this is the answer. +: SPTR means that not only must this pointer parameter be non-NULL, it +: points to a position in a character string, which the called +: function is not to look behind. If a parameter is marked with this +: constraint, another parameter to the function must be marked with +: one of the constraints below in this list. +: EPTR means that not only must this pointer parameter be non-NULL, it +: points to the position one byte beyond the end of a character +: string. The called function is not to look at the byte in that +: position or any higher ones. If a parameter is marked with this +: constraint, another parameter to the function must be marked with +: SPTR, or MPTR (described just below). It also is fine to have +: both an SPTR parameter and an MPTR one. +: MPTR means that not only must this pointer parameter be non-NULL, it +: points to a position somewhere in the middle of a character string. +: If a parameter is marked with this constraint, another parameter to +: the function must be marked with one of SPTR, EPTR, or EPTRQ +: (described just below). It also is fine to have both an SPTR +: parameter and an EPTR (or EPTRQ) one. +: EPTRQ is like EPTR, but the called function is equpped to handle the case +: where the input SPTR and/or MPTR are equal to this parameter; they +: don't have to be strictly less than it. If a parameter is marked +: with this constraint, no parameter may be marked as EPTR. +: +: To summarize, either +: SPTR <= MPTR < EPTR +: or +: SPTR <= MPTR <= EPTRQ +: In each equation all three or any two of the constraints must be present. +: +: When only two constraints are present and one of them is either EPTR or +: EPTRQ, the difference between the remaining SPTR or MPTR becomes somewhat +: fuzzy; the generated assertion will be the same whichever constraint is +: used. You should choose the one that makes the most sense for the +: semantics of the parameter. For example, there are currently some +: functions with parameters named 'curpos', and no SPTR parameter exists. +: The name of the parameter clearly indicates it isn't necessarily the +: starting position of the string, so using MPTR as the constraint makes the +: most sense. +: +: The parameters for the function can be in any order, except if a function +: has multiple different character strings, all the parameters for the first +: string must be positioned in the function call before any of the parameters +: for the second, and so forth. (This accommodates the very few existing +: functions that have multiple strings passed to them, without needing to +: create a more general mechanism, like possibly SPTR1..EPTR1, SPTR2..EPTR2.) : : *** Non-pointer Parameter Constraints : -: For a numeric argument, you may specify that it can't be 0 by using 'NZ' +: Only a single constraint is currently available to you to use; it is for +: parameters that are some sort of integer +: +: NZ means the called function is expecting this parameter to be +: non-zero, and is not equipped to handle it being 0. : : *** Automatically generated checks : diff --git a/regen/embed.pl b/regen/embed.pl index adbaf15ab380..467a0fba8bad 100755 --- a/regen/embed.pl +++ b/regen/embed.pl @@ -281,6 +281,8 @@ sub generate_proto_h { "$plain_func: n flag is contradicted by having arguments" if $flags =~ /n/; my $n; + my @bounded_strings; + for my $arg ( @$args ) { ++$n; @@ -296,8 +298,29 @@ sub generate_proto_h { die_at_end "$plain_func: func: m flag required for" . '"literal" argument' unless $has_mflag; } - else { - my $nn = ( $arg =~ s/\bNN\b// ); + else { # Look for constraints about this argument + + my $ptr_type; # E, M, and S are the three types + # corresponding respectively to EPTR(Q)?, + # MPTR, and SPTR + my $equal = ""; # EPTRQ is just an EPTR with this set to + # "=" + if ($arg =~ s/ \b ( [EMS] ) PTR (Q)? \b //x) {; + $ptr_type = $1; + if (defined $2) { + die_at_end ": $func: Q only valid with EPTR" + if $ptr_type ne 'E'; + $equal = "="; + } + elsif ($ptr_type eq 'M') { + # A middle position always is <= + $equal = "="; + } + } + + # A $ptr_type is a specialized 'nn' + my $nn = (defined $ptr_type) + ( $arg =~ s/\bNN\b// ); + my $nz = ( $arg =~ s/\bNZ\b// ); my $nullok = ( $arg =~ s/\bNULLOK\b// ); my $nocheck = ( $arg =~ s/\bNOCHECK\b// ); @@ -310,7 +333,8 @@ sub generate_proto_h { # Note that we don't care if you say e.g., 'NN' multiple # times die_at_end - ":$func: $arg Use only one of NN, NULLOK, and NZ" + ":$func: $arg Use only one of NN (including" + . " EPTR, EPTRQ, MPTR, SPTR), NULLOK, or NZ" if 0 + $nn + $nz + $nullok > 1; push( @nonnull, $n ) if $nn; @@ -322,7 +346,8 @@ sub generate_proto_h { # pointer. if ($args_assert_line && $arg =~ /\*/) { if ($nn + $nullok == 0) { - warn "$func: $arg needs NN or NULLOK\n"; + warn "$func: $arg needs one of: NN, EPTR, EPTRQ," + . " MPTR, SPTR, or NULLOK\n"; ++$unflagged_pointers; } @@ -358,10 +383,151 @@ sub generate_proto_h { if $nullok; push @asserts, "assert($type_assert)"; } + + # If this is a pointer to a character string argument, + # we need extra work. + if ($ptr_type) { + + # For these, not only does the parameter have to + # be non-NULL, but every dereference of it has to + # too. + # + # First, get all the '*" derefs, except one. + my $derefs = "*" x (($arg =~ tr/*//) - 1); + + # Then add the asserts that each dereferenced + # layer is non-NULL. + for (my $i = 1; $i <= length $derefs; $i++) { + push @asserts, "assert(" + . substr($derefs, 0, $i) + . "$argname)"; + } + + # Save the data we need later + my %entry = ( + argname => $argname, + equal => $equal, + deref => $derefs, + ); + + # The motivation for all this is that some string + # pointer parameters have constraints, such as + # that the starting position can't be beyond the + # ending one. Unfortunately, the function's + # parameters can be positioned in its prototype so + # that the pointer to the ending position comes + # before the pointer to the starting one, and this + # can't be changed because they are API. To cope + # with this, we use the array below to save just + # the crucial information about each while parsing + # the parameters. After all information is + # gathered, we go through and handle it. An entry + # looks like this after all the parameters are + # parsed: + # { + # 'M' => { + # 'equal' => '=', + # 'argname' => 'curpos', + # 'deref' => '' + # }, + # 'E' => { + # 'equal' => '', + # 'argname' => 'strend', + # 'deref' => '' + # }, + # 'S' => { + # 'equal' => '', + # 'deref' => '', + # 'argname' => 'strbeg' + # } + # } + # + # Only two of the keys need be present. + # If the function has multiple string parameters, + # the [0] entry in @bounded_strings will be for + # the first string, [1] for the second, and so on. + # + # Here, we are in the middle of parsing the + # parameters. We add this parameter to the + # current string's boundary constraints hash, + # or create a new string if necessary. The new + # string's data is pushed as a new element onto + # the array. + # + # A new element is created if the array is empty, + # or if there is already an existing hash element + # for the new key. For example, you can't have + # two EPTRs for the same string, so the second + # must be for a new string. + # + # Otherwise we presume this hash value is for the + # most recent string in the array. If we have an + # EPTR, and an MPTR comes along, assume that it is + # for the same string as the EPTR. + # + # This hack works as long as all parameters for the + # current string come before any of the next + # string, which is the case for all existing + # function calls, and any new ones can be + # fashioned to conform. + if ( @bounded_strings + && ! defined $bounded_strings[-1]{$ptr_type}) + { + $bounded_strings[-1]{$ptr_type} = \%entry; + } + else { + push @bounded_strings, + { $ptr_type => \%entry }; + } + } # End of special handling of string bounds } } # End of this argument } # End of loop through all arguments + # We have looped through all arguments, and for any bounded string + # ones, we have saved the information needed to generate things + # like + # assert(s < e) + foreach my $string (@bounded_strings) { + + # We need at least two bounds + if (1 == ( (defined $string->{S}) + + (defined $string->{M}) + + (defined $string->{E}))) + { + my ($type, $object) = each %$string; + die_at_end + "$func: Missing PTR constraint for string given by " + . $object->{argname}; + next; + } + + # But three or any two bounds work. We may need to generate + # two asserts, so loop to do so, skipping any missing one. + for my $i (["S", "E"], ["S", "M"], ["M", "E"]) { + + # We don't need an assert for the whole span if we have an + # intermediate one. + next if defined $string->{M} && $i->[0] eq 'S' + && $i->[1] eq 'E'; + + my $lower = $string->{$i->[0]} or next; + my $upper = $string->{$i->[1]} or next; + + # This reduces to either; + # assert(lower < upper); + # or + # assert(lower <= upper); + # + # There might also be some derefences, like **lower + push @asserts, "assert(" + . "$lower->{deref}$lower->{argname}" + . " <$upper->{equal} " + . "$upper->{deref}$upper->{argname}" + . ")"; + } + } + $ret .= join ", ", @$args; } else { From 6a8042cc652d81f7f332fed409283550f507ef2a Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 27 Sep 2025 16:41:14 -0600 Subject: [PATCH 07/10] Try out new asserts in a couple cases This uses the previous commit's new abilities on a couple of sample functions, so you can see the changes it makes in two small bites. --- embed.fnc | 8 ++++---- proto.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/embed.fnc b/embed.fnc index 6c97c56f28d7..afc5b5d088cb 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1939,8 +1939,8 @@ CTopr |void |locale_panic |NN const char *msg \ p |OP * |localize |NN OP *o \ |I32 lex CTp |UV |long_valid_utf8_to_uv \ - |NN const U8 * const s \ - |NN const U8 * const e + |SPTR const U8 * const s \ + |EPTR const U8 * const e ARdp |I32 |looks_like_number \ |NN SV * const sv CRTip |unsigned|lsbit_pos32 |U32 word @@ -5298,8 +5298,8 @@ ST |char |first_symbol |NN const char *pat \ |NN const char *patend RS |const char *|get_num |NN const char *patptr \ |NN SSize_t *lenptr -S |const char *|group_end |NN const char *patptr \ - |NN const char *patend \ +S |const char *|group_end |SPTR const char *patptr \ + |EPTR const char *patend \ |char ender RS |SV * |is_an_int |NN const char *s \ |STRLEN l diff --git a/proto.h b/proto.h index 5302f6977f21..9b69510232e3 100644 --- a/proto.h +++ b/proto.h @@ -2035,7 +2035,7 @@ Perl_localize(pTHX_ OP *o, I32 lex) PERL_CALLCONV UV Perl_long_valid_utf8_to_uv(const U8 * const s, const U8 * const e); #define PERL_ARGS_ASSERT_LONG_VALID_UTF8_TO_UV \ - assert(s); assert(e) + assert(s); assert(e); assert(s < e) PERL_CALLCONV I32 Perl_looks_like_number(pTHX_ SV * const sv) @@ -8083,7 +8083,7 @@ S_get_num(pTHX_ const char *patptr, SSize_t *lenptr) STATIC const char * S_group_end(pTHX_ const char *patptr, const char *patend, char ender); # define PERL_ARGS_ASSERT_GROUP_END \ - assert(patptr); assert(patend) + assert(patptr); assert(patend); assert(patptr < patend) STATIC SV * S_is_an_int(pTHX_ const char *s, STRLEN l) From 541d89997ee7000e24ffb004d8ed6a11913ac6e2 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 6 Oct 2025 07:34:18 -0600 Subject: [PATCH 08/10] embed.fnc: Add ptr assertions for apparently non-problematic I went through the declarations in embed.fnc and added PTR constraints for all the ones that looked to have pointers to the beginning and end of a string. I then ran the test suite, and reverted any that had problems. Then I looked at the code for each one remaining to see if it was equipped to handle the case where the end == the beginning, and removed those. This is the result. Testing in the field may reveal others that the test suite missed; we can fix those as they occur. I removed now redundant asserts that were in the functions, and now are included in the ARGS_ASSERT macros --- embed.fnc | 84 +++++++++++++++++++++++++++---------------------------- proto.h | 41 ++++++++++++++------------- utf8.c | 6 +--- 3 files changed, 64 insertions(+), 67 deletions(-) diff --git a/embed.fnc b/embed.fnc index afc5b5d088cb..e0114deb54cf 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1346,8 +1346,8 @@ Adpx |void |forbid_outofblock_ops \ |NN const char *blockname p |void |force_locale_unlock Cp |void |force_out_malformed_utf8_message_ \ - |NN const U8 * const p \ - |NN const U8 * const e \ + |SPTR const U8 * const p \ + |EPTR const U8 * const e \ |U32 flags \ |const bool die_here Adfp |char * |form |NN const char *pat \ @@ -1811,12 +1811,12 @@ ARTdip |Size_t |isUTF8_CHAR_flags \ |NN const U8 * const e \ |const U32 flags CPRTp |STRLEN |is_utf8_char_helper_ \ - |NN const U8 * const s \ - |NN const U8 *e \ + |SPTR const U8 * const s \ + |EPTR const U8 *e \ |const U32 flags CPRTp |Size_t |is_utf8_FF_helper_ \ - |NN const U8 * const s0 \ - |NN const U8 * const e \ + |SPTR const U8 * const s0 \ + |EPTR const U8 * const e \ |const bool require_partial ATdmp |bool |is_utf8_fixed_width_buf_flags \ |NN const U8 * const s \ @@ -1834,18 +1834,18 @@ ATdip |bool |is_utf8_fixed_width_buf_loclen_flags \ |NULLOK STRLEN *el \ |const U32 flags CRp |Size_t |is_utf8_FOO_ |const U8 classnum \ - |NN const U8 *p \ - |NN const U8 * const e + |SPTR const U8 *p \ + |EPTR const U8 * const e ARTdip |bool |is_utf8_invariant_string_loc \ |NN const U8 * const s \ |STRLEN len \ |NULLOK const U8 **ep CRp |Size_t |is_utf8_perl_idcont_ \ - |NN const U8 *p \ - |NN const U8 * const e + |SPTR const U8 *p \ + |EPTR const U8 * const e CRp |Size_t |is_utf8_perl_idstart_ \ - |NN const U8 *p \ - |NN const U8 * const e + |SPTR const U8 *p \ + |EPTR const U8 * const e ARTdmp |bool |is_utf8_string |NN const U8 *s \ |STRLEN len ARTdip |bool |is_utf8_string_flags \ @@ -1873,11 +1873,11 @@ ATdip |bool |is_utf8_string_loclen_flags \ |NULLOK STRLEN *el \ |const U32 flags APTdmp |bool |is_utf8_valid_partial_char \ - |NN const U8 * const s0 \ - |NN const U8 * const e + |SPTR const U8 * const s0 \ + |EPTR const U8 * const e ARTdip |bool |is_utf8_valid_partial_char_flags \ - |NN const U8 * const s0 \ - |NN const U8 * const e \ + |SPTR const U8 * const s0 \ + |EPTR const U8 * const e \ |const U32 flags : Used in perly.y @@ -3139,12 +3139,12 @@ Adp |const char *|scan_version \ |NN const char *s \ |NN SV *rv \ |bool qv -Adp |char * |scan_vstring |NN const char *s \ - |NN const char * const e \ +Adp |char * |scan_vstring |SPTR const char *s \ + |EPTR const char * const e \ |NN SV *sv EXpx |char * |scan_word |NN char *s \ - |NN char *dest \ - |NN char *dest_end \ + |SPTR char *dest \ + |EPTR char *dest_end \ |int allow_package \ |NN STRLEN *slp Cp |U32 |seed @@ -3758,27 +3758,27 @@ Cp |UV |to_uni_upper |UV c \ |NN U8 *p \ |NN STRLEN *lenp Cp |UV |to_utf8_fold_flags_ \ - |NN const U8 *p \ - |NN const U8 *e \ + |SPTR const U8 *p \ + |EPTR const U8 *e \ |NN U8 *ustrp \ |NULLOK STRLEN *lenp \ |U8 flags Cp |UV |to_utf8_lower_flags_ \ - |NN const U8 *p \ - |NN const U8 *e \ + |SPTR const U8 *p \ + |EPTR const U8 *e \ |NN U8 *ustrp \ |NULLOK STRLEN *lenp \ |bool flags Cp |UV |to_utf8_title_flags_ \ - |NN const U8 *p \ - |NN const U8 *e \ + |SPTR const U8 *p \ + |EPTR const U8 *e \ |NN U8 *ustrp \ |NULLOK STRLEN *lenp \ |bool flags Cp |UV |to_utf8_upper_flags_ \ - |NN const U8 *p \ - |NN const U8 *e \ + |SPTR const U8 *p \ + |EPTR const U8 *e \ |NN U8 *ustrp \ |NULLOK STRLEN *lenp \ |bool flags @@ -5847,8 +5847,8 @@ Ei |I32 |foldEQ_latin1_s2_folded \ ERS |bool |isFOO_lc |const U8 classnum \ |const U8 character ERS |bool |isFOO_utf8_lc |const U8 classnum \ - |NN const U8 *character \ - |NN const U8 *e + |SPTR const U8 *character \ + |EPTR const U8 *e ERS |bool |isGCB |const GCB_enum before \ |const GCB_enum after \ |NN const U8 * const strbeg \ @@ -5892,8 +5892,8 @@ ERST |U8 * |reghopmaybe3 |NN U8 *s \ |NN const U8 * const lim ERS |bool |reginclass |NULLOK regexp * const prog \ |NN const regnode * const n \ - |NN const U8 * const p \ - |NN const U8 * const p_end \ + |SPTR const U8 * const p \ + |EPTR const U8 * const p_end \ |bool const utf8_target ERS |SSize_t|regmatch |NN regmatch_info *reginfo \ |NN char *startpos \ @@ -6181,8 +6181,8 @@ RS |char * |scan_const |NN char *start RS |char * |scan_formline |NN char *s RS |char * |scan_heredoc |NN char *s S |char * |scan_ident |NN char *s \ - |NN char *dest \ - |NN char *dest_end \ + |SPTR char *dest \ + |EPTR char *dest_end \ |bool chk_unary RS |char * |scan_inputsymbol \ |NN char *start @@ -6247,8 +6247,8 @@ RS |UV |check_locale_boundary_crossing \ |NN U8 * const ustrp \ |NN STRLEN *lenp RTi |int |does_utf8_overflow \ - |NN const U8 * const s \ - |NN const U8 *e + |SPTR const U8 * const s \ + |EPTR const U8 *e RTi |int |isFF_overlong |NN const U8 * const s \ |const STRLEN len RTi |SSize_t|is_utf8_overlong \ @@ -6278,16 +6278,16 @@ S |UV |to_utf8_case_ |const UV original \ |NULLOK const U32 * const * const aux_tables \ |NULLOK const U8 * const aux_table_lengths \ |NN const char * const normal -S |UV |turkic_fc |NN const U8 * const p \ - |NN const U8 * const e \ +S |UV |turkic_fc |SPTR const U8 * const p \ + |EPTR const U8 * const e \ |NN U8 *ustrp \ |NN STRLEN *lenp -S |UV |turkic_lc |NN const U8 * const p0 \ - |NN const U8 * const e \ +S |UV |turkic_lc |SPTR const U8 * const p0 \ + |EPTR const U8 * const e \ |NN U8 *ustrp \ |NN STRLEN *lenp -S |UV |turkic_uc |NN const U8 * const p \ - |NN const U8 * const e \ +S |UV |turkic_uc |SPTR const U8 * const p \ + |EPTR const U8 * const e \ |NN U8 *ustrp \ |NN STRLEN *lenp RS |char * |unexpected_non_continuation_text \ diff --git a/proto.h b/proto.h index 9b69510232e3..4a081b9821f0 100644 --- a/proto.h +++ b/proto.h @@ -1131,7 +1131,7 @@ Perl_force_locale_unlock(pTHX) PERL_CALLCONV void Perl_force_out_malformed_utf8_message_(pTHX_ const U8 * const p, const U8 * const e, U32 flags, const bool die_here); #define PERL_ARGS_ASSERT_FORCE_OUT_MALFORMED_UTF8_MESSAGE_ \ - assert(p); assert(e) + assert(p); assert(e); assert(p < e) PERL_CALLCONV char * Perl_form(pTHX_ const char *pat, ...) @@ -1858,13 +1858,13 @@ Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e, const bool requ __attribute__warn_unused_result__ __attribute__pure__; #define PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_ \ - assert(s0); assert(e) + assert(s0); assert(e); assert(s0 < e) PERL_CALLCONV Size_t Perl_is_utf8_FOO_(pTHX_ const U8 classnum, const U8 *p, const U8 * const e) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_IS_UTF8_FOO_ \ - assert(p); assert(e) + assert(p); assert(e); assert(p < e) /* PERL_CALLCONV STRLEN Perl_is_utf8_char_buf(const U8 *buf, const U8 *buf_end); */ @@ -1874,7 +1874,7 @@ Perl_is_utf8_char_helper_(const U8 * const s, const U8 *e, const U32 flags) __attribute__warn_unused_result__ __attribute__pure__; #define PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_ \ - assert(s); assert(e) + assert(s); assert(e); assert(s < e) /* PERL_CALLCONV bool Perl_is_utf8_fixed_width_buf_flags(const U8 * const s, STRLEN len, const U32 flags); */ @@ -1886,13 +1886,13 @@ PERL_CALLCONV Size_t Perl_is_utf8_perl_idcont_(pTHX_ const U8 *p, const U8 * const e) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_IS_UTF8_PERL_IDCONT_ \ - assert(p); assert(e) + assert(p); assert(e); assert(p < e) PERL_CALLCONV Size_t Perl_is_utf8_perl_idstart_(pTHX_ const U8 *p, const U8 * const e) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_IS_UTF8_PERL_IDSTART_ \ - assert(p); assert(e) + assert(p); assert(e); assert(p < e) /* PERL_CALLCONV bool Perl_is_utf8_string(const U8 *s, STRLEN len) @@ -4239,10 +4239,11 @@ Perl_scan_version(pTHX_ const char *s, SV *rv, bool qv); PERL_CALLCONV char * Perl_scan_vstring(pTHX_ const char *s, const char * const e, SV *sv); #define PERL_ARGS_ASSERT_SCAN_VSTRING \ - assert(s); assert(e); assert(sv) + assert(s); assert(e); assert(sv); assert(s < e) #define PERL_ARGS_ASSERT_SCAN_WORD \ - assert(s); assert(dest); assert(dest_end); assert(slp) + assert(s); assert(dest); assert(dest_end); assert(slp); \ + assert(dest < dest_end) PERL_CALLCONV U32 Perl_seed(pTHX); @@ -5296,22 +5297,22 @@ Perl_to_uni_upper(pTHX_ UV c, U8 *p, STRLEN *lenp); PERL_CALLCONV UV Perl_to_utf8_fold_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, U8 flags); #define PERL_ARGS_ASSERT_TO_UTF8_FOLD_FLAGS_ \ - assert(p); assert(e); assert(ustrp) + assert(p); assert(e); assert(ustrp); assert(p < e) PERL_CALLCONV UV Perl_to_utf8_lower_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, bool flags); #define PERL_ARGS_ASSERT_TO_UTF8_LOWER_FLAGS_ \ - assert(p); assert(e); assert(ustrp) + assert(p); assert(e); assert(ustrp); assert(p < e) PERL_CALLCONV UV Perl_to_utf8_title_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, bool flags); #define PERL_ARGS_ASSERT_TO_UTF8_TITLE_FLAGS_ \ - assert(p); assert(e); assert(ustrp) + assert(p); assert(e); assert(ustrp); assert(p < e) PERL_CALLCONV UV Perl_to_utf8_upper_flags_(pTHX_ const U8 *p, const U8 *e, U8 *ustrp, STRLEN *lenp, bool flags); #define PERL_ARGS_ASSERT_TO_UTF8_UPPER_FLAGS_ \ - assert(p); assert(e); assert(ustrp) + assert(p); assert(e); assert(ustrp); assert(p < e) PERL_CALLCONV bool Perl_try_amagic_bin(pTHX_ int method, int flags); @@ -8879,7 +8880,7 @@ S_unwind_scan_frames(pTHX_ void *p); # define PERL_ARGS_ASSERT_ISFOO_LC # define PERL_ARGS_ASSERT_ISFOO_UTF8_LC \ - assert(character); assert(e) + assert(character); assert(e); assert(character < e) # define PERL_ARGS_ASSERT_ISGCB \ assert(strbeg); assert(curpos) @@ -8912,7 +8913,7 @@ S_unwind_scan_frames(pTHX_ void *p); assert(s); assert(lim) # define PERL_ARGS_ASSERT_REGINCLASS \ - assert(n); assert(p); assert(p_end) + assert(n); assert(p); assert(p_end); assert(p < p_end) # define PERL_ARGS_ASSERT_REGMATCH \ assert(reginfo); assert(startpos); assert(prog) @@ -9471,7 +9472,7 @@ S_scan_heredoc(pTHX_ char *s) STATIC char * S_scan_ident(pTHX_ char *s, char *dest, char *dest_end, bool chk_unary); # define PERL_ARGS_ASSERT_SCAN_IDENT \ - assert(s); assert(dest); assert(dest_end) + assert(s); assert(dest); assert(dest_end); assert(dest < dest_end) STATIC char * S_scan_inputsymbol(pTHX_ char *start) @@ -9618,17 +9619,17 @@ S_to_utf8_case_(pTHX_ const UV original, const U8 *p, U8 *ustrp, STRLEN *lenp, S STATIC UV S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e, U8 *ustrp, STRLEN *lenp); # define PERL_ARGS_ASSERT_TURKIC_FC \ - assert(p); assert(e); assert(ustrp); assert(lenp) + assert(p); assert(e); assert(ustrp); assert(lenp); assert(p < e) STATIC UV S_turkic_lc(pTHX_ const U8 * const p0, const U8 * const e, U8 *ustrp, STRLEN *lenp); # define PERL_ARGS_ASSERT_TURKIC_LC \ - assert(p0); assert(e); assert(ustrp); assert(lenp) + assert(p0); assert(e); assert(ustrp); assert(lenp); assert(p0 < e) STATIC UV S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e, U8 *ustrp, STRLEN *lenp); # define PERL_ARGS_ASSERT_TURKIC_UC \ - assert(p); assert(e); assert(ustrp); assert(lenp) + assert(p); assert(e); assert(ustrp); assert(lenp); assert(p < e) STATIC char * S_unexpected_non_continuation_text(pTHX_ const U8 * const s, STRLEN print_len, const STRLEN non_cont_byte_pos, const STRLEN expect_len) @@ -9648,7 +9649,7 @@ PERL_STATIC_INLINE int S_does_utf8_overflow(const U8 * const s, const U8 *e) __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_DOES_UTF8_OVERFLOW \ - assert(s); assert(e) + assert(s); assert(e); assert(s < e) PERL_STATIC_INLINE int S_isFF_overlong(const U8 * const s, const STRLEN len) @@ -10008,7 +10009,7 @@ PERL_STATIC_INLINE bool Perl_is_utf8_valid_partial_char_flags(const U8 * const s0, const U8 * const e, const U32 flags) __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_IS_UTF8_VALID_PARTIAL_CHAR_FLAGS \ - assert(s0); assert(e) + assert(s0); assert(e); assert(s0 < e) PERL_STATIC_INLINE unsigned Perl_lsbit_pos32(U32 word) diff --git a/utf8.c b/utf8.c index 9aa8c0ece519..d42afdb597ee 100644 --- a/utf8.c +++ b/utf8.c @@ -725,7 +725,6 @@ STRLEN Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags) { PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_; - assert(e > s); assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE)); SSize_t len, full_len; @@ -755,6 +754,7 @@ Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags) * determined with just the first one or two bytes. * */ + full_len = UTF8SKIP(s); len = e - s; @@ -840,7 +840,6 @@ Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e, const bool require_partial) { PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_; - assert(s0 < e); assert(*s0 == I8_TO_NATIVE_UTF8(0xFF)); /* This is called to determine if the UTF-8 sequence starting at s0 and @@ -4245,7 +4244,6 @@ S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e, U8 * ustrp, STRLEN *lenp) { PERL_ARGS_ASSERT_TURKIC_FC; - assert(e > p); /* Returns 0 if the foldcase of the input UTF-8 encoded sequence from * p0..e-1 according to Turkic rules is the same as for non-Turkic. @@ -4280,7 +4278,6 @@ S_turkic_lc(pTHX_ const U8 * const p0, const U8 * const e, U8 * ustrp, STRLEN *lenp) { PERL_ARGS_ASSERT_TURKIC_LC; - assert(e > p0); /* Returns 0 if the lowercase of the input UTF-8 encoded sequence from * p0..e-1 according to Turkic rules is the same as for non-Turkic. @@ -4326,7 +4323,6 @@ S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e, U8 * ustrp, STRLEN *lenp) { PERL_ARGS_ASSERT_TURKIC_UC; - assert(e > p); /* Returns 0 if the upper or title-case of the input UTF-8 encoded sequence * from p0..e-1 according to Turkic rules is the same as for non-Turkic. From a967a9cb248e2453e87771b6445e29652646bf90 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 27 Sep 2025 17:16:56 -0600 Subject: [PATCH 09/10] embed.fnc: Add MPTR constraints This is the first use of the new MPTR constraint that is used to generate an assertion that a pointer is somewhere in the middle of a string. I removed now redundant asserts that were in the functions, and now are included in the ARGS_ASSERT macros --- embed.fnc | 102 +++++++++++++++++++++++++++--------------------------- inline.h | 3 +- proto.h | 57 +++++++++++++++++++----------- 3 files changed, 88 insertions(+), 74 deletions(-) diff --git a/embed.fnc b/embed.fnc index e0114deb54cf..d56b4b4b58c8 100644 --- a/embed.fnc +++ b/embed.fnc @@ -2708,9 +2708,9 @@ Fop |void |populate_isa |NN const char *name \ Adhp |REGEXP *|pregcomp |NN SV * const pattern \ |const U32 flags Adhp |I32 |pregexec |NN REGEXP * const prog \ - |NN char *stringarg \ - |NN char *strend \ - |NN char *strbeg \ + |MPTR char *stringarg \ + |EPTR char *strend \ + |SPTR char *strbeg \ |SSize_t minend \ |NN SV *screamer \ |U32 nosave @@ -2821,9 +2821,9 @@ Xdp |struct refcounted_he *|refcounted_he_new_sv \ |U32 flags Cp |void |regdump |NN const regexp *r Cp |I32 |regexec_flags |NN REGEXP * const rx \ - |NN char *stringarg \ + |MPTR char *stringarg \ |NN char *strend \ - |NN char *strbeg \ + |SPTR char *strbeg \ |SSize_t minend \ |NN SV *sv \ |NULLOK void *data \ @@ -2889,8 +2889,8 @@ EXp |REGEXP *|reg_temp_copy |NULLOK REGEXP *dsv \ |NN REGEXP *ssv Cp |char * |re_intuit_start|NN REGEXP * const rx \ |NULLOK SV *sv \ - |NN const char * const strbeg \ - |NN char *strpos \ + |SPTR const char * const strbeg \ + |MPTR char *strpos \ |NN char *strend \ |const U32 flags \ |NULLOK re_scream_pos_data *data @@ -3808,13 +3808,13 @@ ARdip |IV |utf8_distance |NN const U8 *a \ |NN const U8 *b ARTdip |U8 * |utf8_hop |NN const U8 *s \ |SSize_t off -ARTdmp |U8 * |utf8_hop_back |NN const U8 *s \ +ARTdmp |U8 * |utf8_hop_back |MPTR const U8 *s \ |SSize_t off \ - |NN const U8 * const start + |SPTR const U8 * const start ARTdip |U8 * |utf8_hop_back_overshoot \ - |NN const U8 *s \ + |MPTR const U8 *s \ |SSize_t off \ - |NN const U8 * const start \ + |SPTR const U8 * const start \ |NULLOK SSize_t *remaining ARTdmp |U8 * |utf8_hop_forward \ |NN const U8 *s \ @@ -3826,14 +3826,14 @@ ARTdip |U8 * |utf8_hop_forward_overshoot \ |NN const U8 * const end \ |NULLOK SSize_t *remaining ARTdip |U8 * |utf8_hop_overshoot \ - |NN const U8 *s \ + |MPTR const U8 *s \ |SSize_t off \ - |NN const U8 * const start \ + |SPTR const U8 * const start \ |NN const U8 * const end \ |NULLOK SSize_t *remaining -ARTdmp |U8 * |utf8_hop_safe |NN const U8 *s \ +ARTdmp |U8 * |utf8_hop_safe |MPTR const U8 *s \ |SSize_t off \ - |NN const U8 * const start \ + |SPTR const U8 * const start \ |NN const U8 * const end ARdp |STRLEN |utf8_length |NN const U8 *s0 \ |NN const U8 *e @@ -4617,9 +4617,9 @@ S |void |maybe_multimagic_gv \ S |bool |parse_gv_stash_name \ |NN HV **stash \ |NN GV **gv \ - |NN const char **name \ + |MPTR const char **name \ |NN STRLEN *len \ - |NN const char *nambeg \ + |SPTR const char *nambeg \ |STRLEN full_len \ |const U32 is_utf8 \ |const I32 add @@ -5321,8 +5321,8 @@ S |SV ** |pack_rec |NN SV *cat \ RS |char * |sv_exp_grow |NN SV *sv \ |STRLEN needed S |SSize_t|unpack_rec |NN struct tempsym *symptr \ - |NN const char *s \ - |NN const char *strbeg \ + |MPTR const char *s \ + |SPTR const char *strbeg \ |NN const char *strend \ |NULLOK const char **new_s #endif /* defined(PERL_IN_PP_PACK_C) */ @@ -5494,9 +5494,9 @@ ERST |int |edit_distance |NN const UV *src \ |const SSize_t maxDistance ES |I32 |execute_wildcard \ |NN REGEXP * const prog \ - |NN char *stringarg \ + |MPTR char *stringarg \ |NN char *strend \ - |NN char *strbeg \ + |SPTR char *strbeg \ |SSize_t minend \ |NN SV *screamer \ |U32 nosave @@ -5681,9 +5681,9 @@ Ep |void |populate_invlist_from_bitmap \ #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || \ defined(PERL_IN_TOKE_C) -ERp |bool |is_grapheme |NN const U8 *strbeg \ - |NN const U8 *s \ - |NN const U8 *strend \ +ERp |bool |is_grapheme |SPTR const U8 *strbeg \ + |MPTR const U8 *s \ + |EPTR const U8 *strend \ |const UV cp #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || \ @@ -5793,30 +5793,30 @@ ES |void |unwind_scan_frames \ |NN void *p #endif /* defined(PERL_IN_REGCOMP_STUDY_C) */ #if defined(PERL_IN_REGEXEC_C) -ERS |LB_enum|advance_one_LB |NN U8 **curpos \ - |NN const U8 * const strend \ +ERS |LB_enum|advance_one_LB |MPTR U8 **curpos \ + |EPTR const U8 * const strend \ |const bool utf8_target -ERS |SB_enum|advance_one_SB |NN U8 **curpos \ - |NN const U8 * const strend \ +ERS |SB_enum|advance_one_SB |MPTR U8 **curpos \ + |EPTR const U8 * const strend \ |const bool utf8_target -ERS |WB_enum|advance_one_WB_|NN U8 **curpos \ - |NN const U8 * const strend \ +ERS |WB_enum|advance_one_WB_|MPTR U8 **curpos \ + |EPTR const U8 * const strend \ |const bool utf8_target \ |const bool skip_Extend_Format -ERS |GCB_enum|backup_one_GCB|NN const U8 * const strbeg \ - |NN U8 **curpos \ +ERS |GCB_enum|backup_one_GCB|SPTR const U8 * const strbeg \ + |MPTR U8 **curpos \ |const bool utf8_target -ERS |LB_enum|backup_one_LB_ |NN const U8 * const strbeg \ - |NN U8 **curpos \ +ERS |LB_enum|backup_one_LB_ |SPTR const U8 * const strbeg \ + |MPTR U8 **curpos \ |const bool utf8_target \ |bool skip_CM_ZWJ -ERS |SB_enum|backup_one_SB |NN const U8 * const strbeg \ - |NN U8 **curpos \ +ERS |SB_enum|backup_one_SB |SPTR const U8 * const strbeg \ + |MPTR U8 **curpos \ |const bool utf8_target ERS |WB_enum|backup_one_WB_but_over_Extend_FO \ |NN WB_enum *previous \ - |NN const U8 * const strbeg \ - |NN U8 **curpos \ + |SPTR const U8 * const strbeg \ + |MPTR U8 **curpos \ |const bool utf8_target EWi |void |capture_clear |NN regexp *rex \ |U16 from_ix \ @@ -5851,27 +5851,27 @@ ERS |bool |isFOO_utf8_lc |const U8 classnum \ |EPTR const U8 *e ERS |bool |isGCB |const GCB_enum before \ |const GCB_enum after \ - |NN const U8 * const strbeg \ - |NN const U8 * const curpos \ + |SPTR const U8 * const strbeg \ + |MPTR const U8 * const curpos \ |const bool utf8_target ERS |bool |isLB |LB_enum before \ |LB_enum after \ - |NN const U8 * const strbeg \ - |NN const U8 * const curpos \ - |NN const U8 * const strend \ + |SPTR const U8 * const strbeg \ + |MPTR const U8 * const curpos \ + |EPTR const U8 * const strend \ |const bool utf8_target ERS |bool |isSB |SB_enum before \ |SB_enum after \ - |NN const U8 * const strbeg \ - |NN const U8 * const curpos \ - |NN const U8 * const strend \ + |SPTR const U8 * const strbeg \ + |MPTR const U8 * const curpos \ + |EPTR const U8 * const strend \ |const bool utf8_target ERS |bool |isWB |WB_enum previous \ |WB_enum before \ |WB_enum after \ - |NN const U8 * const strbeg \ - |NN const U8 * const curpos \ - |NN const U8 * const strend \ + |SPTR const U8 * const strbeg \ + |MPTR const U8 * const curpos \ + |EPTR const U8 * const strend \ |const bool utf8_target ERST |I32 |reg_check_named_buff_matched \ |NN const regexp *rex \ @@ -6050,8 +6050,8 @@ S |const char *|sv_display|NN SV * const sv \ |STRLEN tmpbuf_size S |bool |sv_2iuv_common |NN SV * const sv S |STRLEN |sv_pos_b2u_midway \ - |NN const U8 * const s \ - |NN const U8 * const target \ + |SPTR const U8 * const s \ + |MPTR const U8 * const target \ |NN const U8 *end \ |STRLEN endu S |STRLEN |sv_pos_u2b_cached \ diff --git a/inline.h b/inline.h index 95f63d863a72..ceef29bb8b72 100644 --- a/inline.h +++ b/inline.h @@ -2963,7 +2963,6 @@ Perl_utf8_hop_back_overshoot(const U8 *s, SSize_t off, const U8 * const start, SSize_t *remaining) { PERL_ARGS_ASSERT_UTF8_HOP_BACK_OVERSHOOT; - assert(start <= s); assert(off <= 0); /* Note: if we know that the input is well-formed, we can do per-word @@ -3031,7 +3030,7 @@ Perl_utf8_hop_overshoot(const U8 *s, SSize_t off, { PERL_ARGS_ASSERT_UTF8_HOP_OVERSHOOT; - assert(start <= s && s <= end); + assert(s <= end); if (off >= 0) { return utf8_hop_forward_overshoot(s, off, end, remaining); diff --git a/proto.h b/proto.h index 4a081b9821f0..e5a5253495b4 100644 --- a/proto.h +++ b/proto.h @@ -3601,7 +3601,8 @@ PERL_CALLCONV I32 Perl_pregexec(pTHX_ REGEXP * const prog, char *stringarg, char *strend, char *strbeg, SSize_t minend, SV *screamer, U32 nosave); #define PERL_ARGS_ASSERT_PREGEXEC \ assert(prog); assert(stringarg); assert(strend); assert(strbeg); \ - assert(screamer) + assert(screamer); assert(strbeg <= stringarg); \ + assert(stringarg < strend) PERL_CALLCONV void Perl_pregfree(pTHX_ REGEXP *r); @@ -3690,7 +3691,8 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_rx_flags); PERL_CALLCONV char * Perl_re_intuit_start(pTHX_ REGEXP * const rx, SV *sv, const char * const strbeg, char *strpos, char *strend, const U32 flags, re_scream_pos_data *data); #define PERL_ARGS_ASSERT_RE_INTUIT_START \ - assert(rx); assert(strbeg); assert(strpos); assert(strend) + assert(rx); assert(strbeg); assert(strpos); assert(strend); \ + assert(strbeg <= strpos) PERL_CALLCONV SV * Perl_re_intuit_string(pTHX_ REGEXP * const r); @@ -3829,7 +3831,7 @@ PERL_CALLCONV I32 Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, char *strbeg, SSize_t minend, SV *sv, void *data, U32 flags); #define PERL_ARGS_ASSERT_REGEXEC_FLAGS \ assert(rx); assert(stringarg); assert(strend); assert(strbeg); \ - assert(sv) + assert(sv); assert(strbeg <= stringarg) PERL_CALLCONV void Perl_regfree_internal(pTHX_ REGEXP * const rx); @@ -7002,7 +7004,8 @@ S_maybe_multimagic_gv(pTHX_ GV *gv, const char *name, const svtype sv_type); STATIC bool S_parse_gv_stash_name(pTHX_ HV **stash, GV **gv, const char **name, STRLEN *len, const char *nambeg, STRLEN full_len, const U32 is_utf8, const I32 add); # define PERL_ARGS_ASSERT_PARSE_GV_STASH_NAME \ - assert(stash); assert(gv); assert(name); assert(len); assert(nambeg) + assert(stash); assert(gv); assert(name); assert(*name); assert(len); \ + assert(nambeg); assert(nambeg <= *name) STATIC void S_require_tie_mod(pTHX_ GV *gv, const char varname, const char *name, STRLEN len, const U32 flags); @@ -8132,7 +8135,8 @@ S_sv_exp_grow(pTHX_ SV *sv, STRLEN needed) STATIC SSize_t S_unpack_rec(pTHX_ struct tempsym *symptr, const char *s, const char *strbeg, const char *strend, const char **new_s); # define PERL_ARGS_ASSERT_UNPACK_REC \ - assert(symptr); assert(s); assert(strbeg); assert(strend) + assert(symptr); assert(s); assert(strbeg); assert(strend); \ + assert(strbeg <= s) #endif /* defined(PERL_IN_PP_PACK_C) */ #if defined(PERL_IN_PP_SORT_C) @@ -8417,7 +8421,7 @@ Perl_invlist_clone(pTHX_ SV * const invlist, SV *newlist); # define PERL_ARGS_ASSERT_EXECUTE_WILDCARD \ assert(prog); assert(stringarg); assert(strend); assert(strbeg); \ - assert(screamer) + assert(screamer); assert(strbeg <= stringarg) # define PERL_ARGS_ASSERT_GET_QUANTIFIER_VALUE \ assert(pRExC_state); assert(start); assert(end) @@ -8664,7 +8668,8 @@ Perl_populate_invlist_from_bitmap(pTHX_ const U8 *bitmap, const Size_t bitmap_le #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || \ defined(PERL_IN_TOKE_C) # define PERL_ARGS_ASSERT_IS_GRAPHEME \ - assert(strbeg); assert(s); assert(strend) + assert(strbeg); assert(s); assert(strend); assert(strbeg <= s); \ + assert(s < strend) #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_REGEXEC_C) || \ @@ -8845,25 +8850,32 @@ S_unwind_scan_frames(pTHX_ void *p); #endif /* defined(PERL_IN_REGCOMP_STUDY_C) */ #if defined(PERL_IN_REGEXEC_C) # define PERL_ARGS_ASSERT_ADVANCE_ONE_LB \ - assert(curpos); assert(strend) + assert(curpos); assert(*curpos); assert(strend); \ + assert(*curpos < strend) # define PERL_ARGS_ASSERT_ADVANCE_ONE_SB \ - assert(curpos); assert(strend) + assert(curpos); assert(*curpos); assert(strend); \ + assert(*curpos < strend) # define PERL_ARGS_ASSERT_ADVANCE_ONE_WB_ \ - assert(curpos); assert(strend) + assert(curpos); assert(*curpos); assert(strend); \ + assert(*curpos < strend) # define PERL_ARGS_ASSERT_BACKUP_ONE_GCB \ - assert(strbeg); assert(curpos) + assert(strbeg); assert(curpos); assert(*curpos); \ + assert(strbeg <= *curpos) # define PERL_ARGS_ASSERT_BACKUP_ONE_LB_ \ - assert(strbeg); assert(curpos) + assert(strbeg); assert(curpos); assert(*curpos); \ + assert(strbeg <= *curpos) # define PERL_ARGS_ASSERT_BACKUP_ONE_SB \ - assert(strbeg); assert(curpos) + assert(strbeg); assert(curpos); assert(*curpos); \ + assert(strbeg <= *curpos) # define PERL_ARGS_ASSERT_BACKUP_ONE_WB_BUT_OVER_EXTEND_FO \ - assert(previous); assert(strbeg); assert(curpos) + assert(previous); assert(strbeg); assert(curpos); assert(*curpos); \ + assert(strbeg <= *curpos) # define PERL_ARGS_ASSERT_FIND_BYCLASS \ assert(prog); assert(c); assert(s); assert(strend) @@ -8883,16 +8895,19 @@ S_unwind_scan_frames(pTHX_ void *p); assert(character); assert(e); assert(character < e) # define PERL_ARGS_ASSERT_ISGCB \ - assert(strbeg); assert(curpos) + assert(strbeg); assert(curpos); assert(strbeg <= curpos) # define PERL_ARGS_ASSERT_ISLB \ - assert(strbeg); assert(curpos); assert(strend) + assert(strbeg); assert(curpos); assert(strend); assert(strbeg <= curpos); \ + assert(curpos < strend) # define PERL_ARGS_ASSERT_ISSB \ - assert(strbeg); assert(curpos); assert(strend) + assert(strbeg); assert(curpos); assert(strend); assert(strbeg <= curpos); \ + assert(curpos < strend) # define PERL_ARGS_ASSERT_ISWB \ - assert(strbeg); assert(curpos); assert(strend) + assert(strbeg); assert(curpos); assert(strend); assert(strbeg <= curpos); \ + assert(curpos < strend) # define PERL_ARGS_ASSERT_REG_CHECK_NAMED_BUFF_MATCHED \ assert(rex); assert(scan) @@ -9249,7 +9264,7 @@ S_sv_display(pTHX_ SV * const sv, char *tmpbuf, STRLEN tmpbuf_size); STATIC STRLEN S_sv_pos_b2u_midway(pTHX_ const U8 * const s, const U8 * const target, const U8 *end, STRLEN endu); # define PERL_ARGS_ASSERT_SV_POS_B2U_MIDWAY \ - assert(s); assert(target); assert(end) + assert(s); assert(target); assert(end); assert(s <= target) STATIC STRLEN S_sv_pos_u2b_cached(pTHX_ SV * const sv, MAGIC ** const mgp, const U8 * const start, const U8 * const send, STRLEN uoffset, STRLEN uoffset0, STRLEN boffset0); @@ -10288,7 +10303,7 @@ PERL_STATIC_INLINE U8 * Perl_utf8_hop_back_overshoot(const U8 *s, SSize_t off, const U8 * const start, SSize_t *remaining) __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_UTF8_HOP_BACK_OVERSHOOT \ - assert(s); assert(start) + assert(s); assert(start); assert(start <= s) PERL_STATIC_INLINE U8 * Perl_utf8_hop_forward_overshoot(const U8 *s, SSize_t off, const U8 * const end, SSize_t *remaining) @@ -10300,7 +10315,7 @@ PERL_STATIC_INLINE U8 * Perl_utf8_hop_overshoot(const U8 *s, SSize_t off, const U8 * const start, const U8 * const end, SSize_t *remaining) __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_UTF8_HOP_OVERSHOOT \ - assert(s); assert(start); assert(end) + assert(s); assert(start); assert(end); assert(start <= s) PERL_STATIC_INLINE bool Perl_utf8_to_bytes_new_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp, void **free_me); From c324bdad691bbe6be2d109eb5990b1109c5b8091 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Tue, 7 Oct 2025 12:18:32 -0600 Subject: [PATCH 10/10] embed.fnc: Add EPTRQ constraints Generally, a pointer to a string upper bound actually is to one beyond the actual final byte in the string. This is sanctioned by the C Standard, and allows you to just subtract the lower bound from it to get its length, without having to add 1. But some functions are written to tolerate the upper bound pointer being set to the actual final byte. The EPTRQ constraint in embed.fnc is used for those; the assertion becomes 'l <= u' instead of strictly less-than. This commit is the first to use this type of constraint, and it applies it only to those functions whose documentation or behavior clearly indicate this is expected. I removed now redundant asserts that were in the functions, and now are included in the ARGS_ASSERT macros There's a dozen-ish ones where that isn't true. And they need to be investigated further before deciding their disposition. --- embed.fnc | 181 +++++++++++++++++++++++++++--------------------------- inline.h | 2 - proto.h | 74 ++++++++++++---------- regexec.c | 3 - toke.c | 2 - util.c | 6 -- 6 files changed, 130 insertions(+), 138 deletions(-) diff --git a/embed.fnc b/embed.fnc index d56b4b4b58c8..2827be40aa71 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1021,8 +1021,8 @@ CTp |Signal_t|csighandler3 |int sig \ |NULLOK Siginfo_t *info \ |NULLOK void *uap ATdmp |bool |c9strict_utf8_to_uv \ - |NN const U8 * const s \ - |NN const U8 * const e \ + |SPTR const U8 * const s \ + |EPTRQ const U8 * const e \ |NN UV *cp_p \ |NULLOK Size_t *advance_p EXp |regexp_engine const *|current_re_engine @@ -1095,17 +1095,17 @@ Rp |SV * |defelem_target |NN SV *sv \ |NULLOK MAGIC *mg : Used in op.c, perl.c px |void |delete_eval_scope -ATdp |char * |delimcpy |NN char *to \ - |NN const char *to_end \ - |NN const char *from \ - |NN const char *from_end \ +ATdp |char * |delimcpy |SPTR char *to \ + |EPTRQ const char *to_end \ + |SPTR const char *from \ + |EPTRQ const char *from_end \ |const int delim \ |NN I32 *retlen ETXdp |char * |delimcpy_no_escape \ - |NN char *to \ - |NN const char *to_end \ - |NN const char *from \ - |NN const char *from_end \ + |SPTR char *to \ + |EPTRQ const char *to_end \ + |SPTR const char *from \ + |EPTRQ const char *from_end \ |const int delim \ |NN I32 *retlen Cp |void |despatch_signals @@ -1282,8 +1282,8 @@ AOdp |SSize_t|eval_sv |NN SV *sv \ |I32 flags EMTpx |Size_t |expected_size |UV size ATdmp |bool |extended_utf8_to_uv \ - |NN const U8 * const s \ - |NN const U8 * const e \ + |SPTR const U8 * const s \ + |EPTRQ const U8 * const e \ |NN UV *cp_p \ |NULLOK Size_t *advance_p Adfp |void |fatal_warner |U32 err \ @@ -1291,8 +1291,8 @@ Adfp |void |fatal_warner |U32 err \ |... Adp |void |fbm_compile |NN SV *sv \ |U32 flags -ARdp |char * |fbm_instr |NN unsigned char *big \ - |NN unsigned char *bigend \ +ARdp |char * |fbm_instr |SPTR unsigned char *big \ + |EPTRQ unsigned char *bigend \ |NN SV *littlestr \ |U32 flags Adhp |SV * |filter_add |NULLOK filter_t funcp \ @@ -1356,8 +1356,8 @@ Adfp |char * |form |NN const char *pat \ p |void |free_tied_hv_pool Cp |void |free_tmps ERXp |SV * |get_and_check_backslash_N_name \ - |NN const char *s \ - |NN const char *e \ + |SPTR const char *s \ + |EPTRQ const char *e \ |const bool is_utf8 \ |NN const char **error_msg AOdp |AV * |get_av |NN const char *name \ @@ -1754,8 +1754,8 @@ p |bool |io_close |NN IO *io \ |bool is_explicit \ |bool warn_on_fail ARTdip |Size_t |isC9_STRICT_UTF8_CHAR \ - |NN const U8 * const s0 \ - |NN const U8 * const e + |SPTR const U8 * const s0 \ + |EPTRQ const U8 * const e ARTdmp |bool |is_c9strict_utf8_string \ |NN const U8 *s \ |STRLEN len @@ -1781,8 +1781,8 @@ ARdip |bool |is_safe_syscall|NN const char *pv \ |NN const char *what \ |NN const char *op_name ARTdip |Size_t |isSTRICT_UTF8_CHAR \ - |NN const U8 * const s0 \ - |NN const U8 * const e + |SPTR const U8 * const s0 \ + |EPTRQ const U8 * const e ARTdmp |bool |is_strict_utf8_string \ |NN const U8 *s \ |STRLEN len @@ -1801,14 +1801,14 @@ CRp |bool |is_uni_perl_idcont_ \ |UV c CRp |bool |is_uni_perl_idstart_ \ |UV c -ARTdip |Size_t |isUTF8_CHAR |NN const U8 * const s0 \ - |NN const U8 * const e +ARTdip |Size_t |isUTF8_CHAR |SPTR const U8 * const s0 \ + |EPTRQ const U8 * const e ATdmp |STRLEN |is_utf8_char_buf \ - |NN const U8 *buf \ - |NN const U8 *buf_end + |SPTR const U8 *buf \ + |EPTRQ const U8 *buf_end ARTdip |Size_t |isUTF8_CHAR_flags \ - |NN const U8 * const s0 \ - |NN const U8 * const e \ + |SPTR const U8 * const s0 \ + |EPTRQ const U8 * const e \ |const U32 flags CPRTp |STRLEN |is_utf8_char_helper_ \ |SPTR const U8 * const s \ @@ -2494,11 +2494,10 @@ dp |CV * |newXS_len_flags|NULLOK const char *name \ : Used in pp_hot.c and pp_sys.c p |PerlIO *|nextargv |NN GV *gv \ |bool nomagicopen -AMPTdp |char * |ninstr |NN const char *big \ - |NN const char *bigend \ - |NN const char *little \ - |NN const char *lend - +AMPTdp |char * |ninstr |SPTR const char *big \ + |EPTRQ const char *bigend \ + |SPTR const char *little \ + |EPTRQ const char *lend p |void |no_bareword_filehandle \ |NN const char *fhname Tefpr |void |noperl_die |NN const char *pat \ @@ -2926,10 +2925,10 @@ Admp |void |resume_compcv_and_save \ |NN struct suspended_compcv *buffer Admp |void |resume_compcv_final \ |NN struct suspended_compcv *buffer -APTdp |char * |rninstr |NN const char *big \ - |NN const char *bigend \ - |NN const char *little \ - |NN const char *lend +APTdp |char * |rninstr |SPTR const char *big \ + |EPTRQ const char *bigend \ + |SPTR const char *little \ + |EPTRQ const char *lend p |void |rpeep |NULLOK OP *o Adipx |void |rpp_context |NN SV **mark \ |U8 gimme \ @@ -3195,8 +3194,8 @@ dopx |PerlIO *|start_glob |NN SV *tmpglob \ Adp |I32 |start_subparse |I32 is_format \ |U32 flags ATdmp |bool |strict_utf8_to_uv \ - |NN const U8 * const s \ - |NN const U8 * const e \ + |SPTR const U8 * const s \ + |EPTRQ const U8 * const e \ |NN UV *cp_p \ |NULLOK Size_t *advance_p CRp |NV |str_to_version |NN SV *sv @@ -3829,14 +3828,14 @@ ARTdip |U8 * |utf8_hop_overshoot \ |MPTR const U8 *s \ |SSize_t off \ |SPTR const U8 * const start \ - |NN const U8 * const end \ + |EPTRQ const U8 * const end \ |NULLOK SSize_t *remaining ARTdmp |U8 * |utf8_hop_safe |MPTR const U8 *s \ |SSize_t off \ |SPTR const U8 * const start \ - |NN const U8 * const end -ARdp |STRLEN |utf8_length |NN const U8 *s0 \ - |NN const U8 *e + |EPTRQ const U8 * const end +ARdp |STRLEN |utf8_length |SPTR const U8 *s0 \ + |EPTRQ const U8 *e ATdmp |UV |utf8n_to_uvchr |NN const U8 *s \ |STRLEN curlen \ |NULLOK STRLEN *retlen \ @@ -3897,47 +3896,47 @@ EMXp |U8 * |utf16_to_utf8_reversed \ |NN U8 *d \ |Size_t bytelen \ |NN Size_t *newlen -ATdmp |bool |utf8_to_uv |NN const U8 * const s \ - |NN const U8 * const e \ +ATdmp |bool |utf8_to_uv |SPTR const U8 * const s \ + |EPTRQ const U8 * const e \ |NN UV *cp_p \ |NULLOK Size_t *advance_p ADbdp |UV |utf8_to_uvchr |NN const U8 *s \ |NULLOK STRLEN *retlen AMdip |UV |utf8_to_uvchr_buf \ - |NN const U8 *s \ - |NN const U8 *send \ + |SPTR const U8 *s \ + |EPTRQ const U8 *send \ |NULLOK STRLEN *retlen ATdmp |bool |utf8_to_uv_errors \ - |NN const U8 * const s \ - |NN const U8 * const e \ + |SPTR const U8 * const s \ + |EPTRQ const U8 * const e \ |NN UV *cp_p \ |NULLOK Size_t *advance_p \ |U32 flags \ |NULLOK U32 *errors ATdmp |bool |utf8_to_uv_flags \ - |NN const U8 * const s \ - |NN const U8 * const e \ + |SPTR const U8 * const s \ + |EPTRQ const U8 * const e \ |NN UV *cp_p \ |NULLOK Size_t *advance_p \ |U32 flags -ATdip |bool |utf8_to_uv_msgs|NN const U8 * const s0 \ - |NN const U8 *e \ +ATdip |bool |utf8_to_uv_msgs|SPTR const U8 * const s0 \ + |EPTRQ const U8 *e \ |NN UV *cp_p \ |NULLOK Size_t *advance_p \ |U32 flags \ |NULLOK U32 *errors \ |NULLOK AV **msgs CTp |bool |utf8_to_uv_msgs_helper_ \ - |NN const U8 * const s0 \ - |NN const U8 * const e \ + |SPTR const U8 * const s0 \ + |EPTRQ const U8 * const e \ |NN UV *cp_p \ |NULLOK Size_t *advance_p \ |U32 flags \ |NULLOK U32 *errors \ |NULLOK AV **msgs ATdip |UV |utf8_to_uv_or_die \ - |NN const U8 * const s \ - |NN const U8 *e \ + |SPTR const U8 * const s \ + |EPTRQ const U8 *e \ |NULLOK Size_t *advance_p CDbdp |UV |utf8_to_uvuni |NN const U8 *s \ |NULLOK STRLEN *retlen @@ -3986,8 +3985,8 @@ EXdpx |bool |validate_proto |NN SV *name \ |bool warn \ |bool curstash Adp |bool |valid_identifier_pve \ - |NN const char *s \ - |NN const char *end \ + |SPTR const char *s \ + |EPTRQ const char *end \ |U32 flags Adp |bool |valid_identifier_pvn \ |NN const char *s \ @@ -4521,24 +4520,24 @@ ERXp |SV * |_setup_canned_invlist \ ERXp |const char *|form_alien_digit_msg \ |const U8 which \ |const STRLEN valids_len \ - |NN const char * const first_bad \ - |NN const char * const send \ + |SPTR const char * const first_bad \ + |EPTR const char * const send \ |const bool UTF \ |const bool braced ERXp |bool |grok_bslash_c |const char source \ |NN U8 *result \ |NN const char **message \ |NULLOK U32 *packed_warn -ERXp |bool |grok_bslash_o |NN char **s \ - |NN const char * const send \ +ERXp |bool |grok_bslash_o |SPTR char **s \ + |EPTRQ const char * const send \ |NN UV *uv \ |NN const char **message \ |NULLOK U32 *packed_warn \ |const bool strict \ |const bool allow_UV_MAX \ |const bool utf8 -ERXp |bool |grok_bslash_x |NN char **s \ - |NN const char * const send \ +ERXp |bool |grok_bslash_x |SPTR char **s \ + |EPTRQ const char * const send \ |NN UV *uv \ |NN const char **message \ |NULLOK U32 *packed_warn \ @@ -4837,8 +4836,8 @@ S |void |new_collate |NN const char *newcoll \ |bool force # if defined(DEBUGGING) S |void |print_collxfrm_input_and_return \ - |NN const char *s \ - |NN const char *e \ + |SPTR const char *s \ + |EPTRQ const char *e \ |NULLOK const char *xbuf \ |const STRLEN xlen \ |const bool is_utf8 @@ -4903,8 +4902,8 @@ S |const char *|find_locale_from_environment \ # endif /* defined(USE_LOCALE) */ # if defined(USE_LOCALE) || defined(DEBUGGING) S |const char *|get_displayable_string \ - |NN const char * const s \ - |NN const char * const e \ + |SPTR const char * const s \ + |EPTRQ const char * const e \ |const bool is_utf8 # endif #endif /* defined(PERL_IN_LOCALE_C) */ @@ -5506,8 +5505,8 @@ ETi |Size_t |find_first_differing_byte_pos \ |const Size_t max ES |U32 |get_quantifier_value \ |NN RExC_state_t *pRExC_state \ - |NN const char *start \ - |NN const char *end + |SPTR const char *start \ + |EPTRQ const char *end ES |bool |grok_bslash_N |NN RExC_state_t *pRExC_state \ |NULLOK regnode_offset *nodep \ |NULLOK UV *code_point_p \ @@ -5694,8 +5693,8 @@ ETXp |UV |to_fold_latin1_|const U8 c \ |const unsigned int flags #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) -ERTXp |bool |regcurly |NN const char *s \ - |NN const char *e \ +ERTXp |bool |regcurly |SPTR const char *s \ + |EPTRQ const char *e \ |NULLOK const char *result[5] #endif #if defined(PERL_IN_REGCOMP_DEBUG_C) && defined(DEBUGGING) @@ -5824,20 +5823,20 @@ EWi |void |capture_clear |NN regexp *rex \ |NN const char *str ERS |char * |find_byclass |NN regexp *prog \ |NN const regnode *c \ - |NN char *s \ - |NN const char *strend \ + |SPTR char *s \ + |EPTRQ const char *strend \ |NULLOK regmatch_info *reginfo ERST |U8 * |find_next_masked \ - |NN U8 *s \ - |NN const U8 *send \ + |SPTR U8 *s \ + |EPTRQ const U8 *send \ |const U8 byte \ |const U8 mask -ERST |U8 * |find_span_end |NN U8 *s \ - |NN const U8 *send \ +ERST |U8 * |find_span_end |SPTR U8 *s \ + |EPTRQ const U8 *send \ |const U8 span_byte ERST |U8 * |find_span_end_mask \ - |NN U8 *s \ - |NN const U8 *send \ + |SPTR U8 *s \ + |EPTRQ const U8 *send \ |const U8 span_byte \ |const U8 mask Ei |I32 |foldEQ_latin1_s2_folded \ @@ -6057,20 +6056,20 @@ S |STRLEN |sv_pos_b2u_midway \ S |STRLEN |sv_pos_u2b_cached \ |NN SV * const sv \ |NN MAGIC ** const mgp \ - |NN const U8 * const start \ - |NN const U8 * const send \ + |SPTR const U8 * const start \ + |EPTRQ const U8 * const send \ |STRLEN uoffset \ |STRLEN uoffset0 \ |STRLEN boffset0 ST |STRLEN |sv_pos_u2b_forwards \ - |NN const U8 * const start \ - |NN const U8 * const send \ + |SPTR const U8 * const start \ + |EPTRQ const U8 * const send \ |NN STRLEN * const uoffset \ |NN bool * const at_end \ |NN bool *canonical_position ST |STRLEN |sv_pos_u2b_midway \ - |NN const U8 * const start \ - |NN const U8 *send \ + |SPTR const U8 * const start \ + |EPTRQ const U8 *send \ |STRLEN uoffset \ |const STRLEN uend i |void |sv_unglob |NN SV * const sv \ @@ -6146,10 +6145,10 @@ S |char * |force_word |NN char *start \ |int check_keyword \ |int allow_pack RS |SV * |get_and_check_backslash_N_name_wrapper \ - |NN const char *s \ - |NN const char * const e -S |void |incline |NN const char *s \ - |NN const char *end + |SPTR const char *s \ + |EPTRQ const char * const e +S |void |incline |SPTR const char *s \ + |EPTRQ const char *end S |int |intuit_method |NN char *start \ |NULLOK SV *ioname \ |NULLOK NOCHECK CV *cv @@ -6171,8 +6170,8 @@ So |SV * |new_constant |NULLOK const char *s \ |STRLEN typelen \ |NULLOK const char **error_msg S |void |parse_ident |NN char **s \ - |NN char **d \ - |NN char * const e \ + |SPTR char **d \ + |EPTRQ char * const e \ |int allow_package \ |bool is_utf8 \ |bool check_dollar diff --git a/inline.h b/inline.h index ceef29bb8b72..af1ec26c8b56 100644 --- a/inline.h +++ b/inline.h @@ -3030,8 +3030,6 @@ Perl_utf8_hop_overshoot(const U8 *s, SSize_t off, { PERL_ARGS_ASSERT_UTF8_HOP_OVERSHOOT; - assert(s <= end); - if (off >= 0) { return utf8_hop_forward_overshoot(s, off, end, remaining); } diff --git a/proto.h b/proto.h index e5a5253495b4..c7091999b98e 100644 --- a/proto.h +++ b/proto.h @@ -762,11 +762,11 @@ PERL_CALLCONV char * Perl_delimcpy(char *to, const char *to_end, const char *from, const char *from_end, const int delim, I32 *retlen); #define PERL_ARGS_ASSERT_DELIMCPY \ assert(to); assert(to_end); assert(from); assert(from_end); \ - assert(retlen) + assert(retlen); assert(to <= to_end); assert(from <= from_end) #define PERL_ARGS_ASSERT_DELIMCPY_NO_ESCAPE \ assert(to); assert(to_end); assert(from); assert(from_end); \ - assert(retlen) + assert(retlen); assert(to <= to_end); assert(from <= from_end) PERL_CALLCONV void Perl_despatch_signals(pTHX); @@ -1067,7 +1067,7 @@ PERL_CALLCONV char * Perl_fbm_instr(pTHX_ unsigned char *big, unsigned char *bigend, SV *littlestr, U32 flags) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_FBM_INSTR \ - assert(big); assert(bigend); assert(littlestr) + assert(big); assert(bigend); assert(littlestr); assert(big <= bigend) PERL_CALLCONV SV * Perl_filter_add(pTHX_ filter_t funcp, SV *datasv); @@ -1149,7 +1149,7 @@ Perl_free_tmps(pTHX); #define PERL_ARGS_ASSERT_FREE_TMPS #define PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME \ - assert(s); assert(e); assert(error_msg) + assert(s); assert(e); assert(error_msg); assert(s <= e) PERL_CALLCONV AV * Perl_get_av(pTHX_ const char *name, I32 flags); @@ -3202,7 +3202,8 @@ Perl_ninstr(const char *big, const char *bigend, const char *little, const char __attribute__warn_unused_result__ __attribute__pure__; #define PERL_ARGS_ASSERT_NINSTR \ - assert(big); assert(bigend); assert(little); assert(lend) + assert(big); assert(bigend); assert(little); assert(lend); \ + assert(big <= bigend); assert(little <= lend) PERL_CALLCONV void Perl_no_bareword_filehandle(pTHX_ const char *fhname) @@ -3886,7 +3887,8 @@ Perl_rninstr(const char *big, const char *bigend, const char *little, const char __attribute__warn_unused_result__ __attribute__pure__; #define PERL_ARGS_ASSERT_RNINSTR \ - assert(big); assert(bigend); assert(little); assert(lend) + assert(big); assert(bigend); assert(little); assert(lend); \ + assert(big <= bigend); assert(little <= lend) PERL_CALLCONV void Perl_rpeep(pTHX_ OP *o) @@ -5373,7 +5375,7 @@ PERL_CALLCONV STRLEN Perl_utf8_length(pTHX_ const U8 *s0, const U8 *e) __attribute__warn_unused_result__; #define PERL_ARGS_ASSERT_UTF8_LENGTH \ - assert(s0); assert(e) + assert(s0); assert(e); assert(s0 <= e) PERL_CALLCONV U8 * Perl_utf8_to_bytes(pTHX_ U8 *s, STRLEN *lenp); @@ -5400,7 +5402,7 @@ Perl_utf8_to_uv_flags(const U8 * const s, const U8 * const e, UV *cp_p, Size_t * PERL_CALLCONV bool Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, const U8 * const e, UV *cp_p, Size_t *advance_p, U32 flags, U32 *errors, AV **msgs); #define PERL_ARGS_ASSERT_UTF8_TO_UV_MSGS_HELPER_ \ - assert(s0); assert(e); assert(cp_p) + assert(s0); assert(e); assert(cp_p); assert(s0 <= e) /* PERL_CALLCONV UV Perl_utf8n_to_uvchr(const U8 *s, STRLEN curlen, STRLEN *retlen, const U32 flags); */ @@ -5437,7 +5439,7 @@ Perl_uvoffuni_to_utf8_flags_msgs(pTHX_ U8 *d, UV input_uv, const UV flags, HV ** PERL_CALLCONV bool Perl_valid_identifier_pve(pTHX_ const char *s, const char *end, U32 flags); #define PERL_ARGS_ASSERT_VALID_IDENTIFIER_PVE \ - assert(s); assert(end) + assert(s); assert(end); assert(s <= end) PERL_CALLCONV bool Perl_valid_identifier_pvn(pTHX_ const char *s, STRLEN len, U32 flags); @@ -6911,16 +6913,18 @@ _invlist_union(pTHX_ SV * const a, SV * const b, SV **output); */ #if defined(PERL_IN_DQUOTE_C) || defined(PERL_IN_REGCOMP_C) || \ defined(PERL_IN_TOKE_C) # define PERL_ARGS_ASSERT_FORM_ALIEN_DIGIT_MSG \ - assert(first_bad); assert(send) + assert(first_bad); assert(send); assert(first_bad < send) # define PERL_ARGS_ASSERT_GROK_BSLASH_C \ assert(result); assert(message) # define PERL_ARGS_ASSERT_GROK_BSLASH_O \ - assert(s); assert(send); assert(uv); assert(message) + assert(s); assert(*s); assert(send); assert(uv); assert(message); \ + assert(*s <= send) # define PERL_ARGS_ASSERT_GROK_BSLASH_X \ - assert(s); assert(send); assert(uv); assert(message) + assert(s); assert(*s); assert(send); assert(uv); assert(message); \ + assert(*s <= send) #endif /* defined(PERL_IN_DQUOTE_C) || defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) */ @@ -7291,7 +7295,7 @@ S_new_collate(pTHX_ const char *newcoll, bool force); STATIC void S_print_collxfrm_input_and_return(pTHX_ const char *s, const char *e, const char *xbuf, const STRLEN xlen, const bool is_utf8); # define PERL_ARGS_ASSERT_PRINT_COLLXFRM_INPUT_AND_RETURN \ - assert(s); assert(e) + assert(s); assert(e); assert(s <= e) # endif # endif /* defined(USE_LOCALE_COLLATE) */ @@ -7388,7 +7392,7 @@ S_find_locale_from_environment(pTHX_ const locale_category_index index); STATIC const char * S_get_displayable_string(pTHX_ const char * const s, const char * const e, const bool is_utf8); # define PERL_ARGS_ASSERT_GET_DISPLAYABLE_STRING \ - assert(s); assert(e) + assert(s); assert(e); assert(s <= e) # endif #endif /* defined(PERL_IN_LOCALE_C) */ @@ -8424,7 +8428,7 @@ Perl_invlist_clone(pTHX_ SV * const invlist, SV *newlist); assert(screamer); assert(strbeg <= stringarg) # define PERL_ARGS_ASSERT_GET_QUANTIFIER_VALUE \ - assert(pRExC_state); assert(start); assert(end) + assert(pRExC_state); assert(start); assert(end); assert(start <= end) # define PERL_ARGS_ASSERT_GROK_BSLASH_N \ assert(pRExC_state); assert(flagp) @@ -8680,7 +8684,7 @@ Perl_populate_invlist_from_bitmap(pTHX_ const U8 *bitmap, const Size_t bitmap_le #endif #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_TOKE_C) # define PERL_ARGS_ASSERT_REGCURLY \ - assert(s); assert(e) + assert(s); assert(e); assert(s <= e) # if defined(PERL_CORE) || defined(PERL_EXT) PERL_CALLCONV bool @@ -8878,16 +8882,16 @@ S_unwind_scan_frames(pTHX_ void *p); assert(strbeg <= *curpos) # define PERL_ARGS_ASSERT_FIND_BYCLASS \ - assert(prog); assert(c); assert(s); assert(strend) + assert(prog); assert(c); assert(s); assert(strend); assert(s <= strend) # define PERL_ARGS_ASSERT_FIND_NEXT_MASKED \ - assert(s); assert(send) + assert(s); assert(send); assert(s <= send) # define PERL_ARGS_ASSERT_FIND_SPAN_END \ - assert(s); assert(send) + assert(s); assert(send); assert(s <= send) # define PERL_ARGS_ASSERT_FIND_SPAN_END_MASK \ - assert(s); assert(send) + assert(s); assert(send); assert(s <= send) # define PERL_ARGS_ASSERT_ISFOO_LC @@ -9269,18 +9273,19 @@ S_sv_pos_b2u_midway(pTHX_ const U8 * const s, const U8 * const target, const U8 STATIC STRLEN S_sv_pos_u2b_cached(pTHX_ SV * const sv, MAGIC ** const mgp, const U8 * const start, const U8 * const send, STRLEN uoffset, STRLEN uoffset0, STRLEN boffset0); # define PERL_ARGS_ASSERT_SV_POS_U2B_CACHED \ - assert(sv); assert(mgp); assert(start); assert(send) + assert(sv); assert(mgp); assert(start); assert(send); \ + assert(start <= send) STATIC STRLEN S_sv_pos_u2b_forwards(const U8 * const start, const U8 * const send, STRLEN * const uoffset, bool * const at_end, bool *canonical_position); # define PERL_ARGS_ASSERT_SV_POS_U2B_FORWARDS \ assert(start); assert(send); assert(uoffset); assert(at_end); \ - assert(canonical_position) + assert(canonical_position); assert(start <= send) STATIC STRLEN S_sv_pos_u2b_midway(const U8 * const start, const U8 *send, STRLEN uoffset, const STRLEN uend); # define PERL_ARGS_ASSERT_SV_POS_U2B_MIDWAY \ - assert(start); assert(send) + assert(start); assert(send); assert(start <= send) STATIC void S_utf8_mg_len_cache_update(pTHX_ SV * const sv, MAGIC ** const mgp, const STRLEN ulen); @@ -9425,12 +9430,12 @@ STATIC SV * S_get_and_check_backslash_N_name_wrapper(pTHX_ const char *s, const char * const e) __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME_WRAPPER \ - assert(s); assert(e) + assert(s); assert(e); assert(s <= e) STATIC void S_incline(pTHX_ const char *s, const char *end); # define PERL_ARGS_ASSERT_INCLINE \ - assert(s); assert(end) + assert(s); assert(end); assert(s <= end) STATIC int S_intuit_method(pTHX_ char *start, SV *ioname, CV *cv); @@ -9460,7 +9465,7 @@ S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen, STATIC void S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8, bool check_dollar); # define PERL_ARGS_ASSERT_PARSE_IDENT \ - assert(s); assert(d); assert(e) + assert(s); assert(d); assert(*d); assert(e); assert(*d <= e) STATIC int S_pending_ident(pTHX); @@ -9957,25 +9962,25 @@ PERL_STATIC_INLINE Size_t Perl_isC9_STRICT_UTF8_CHAR(const U8 * const s0, const U8 * const e) __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_ISC9_STRICT_UTF8_CHAR \ - assert(s0); assert(e) + assert(s0); assert(e); assert(s0 <= e) PERL_STATIC_INLINE Size_t Perl_isSTRICT_UTF8_CHAR(const U8 * const s0, const U8 * const e) __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_ISSTRICT_UTF8_CHAR \ - assert(s0); assert(e) + assert(s0); assert(e); assert(s0 <= e) PERL_STATIC_INLINE Size_t Perl_isUTF8_CHAR(const U8 * const s0, const U8 * const e) __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_ISUTF8_CHAR \ - assert(s0); assert(e) + assert(s0); assert(e); assert(s0 <= e) PERL_STATIC_INLINE Size_t Perl_isUTF8_CHAR_flags(const U8 * const s0, const U8 * const e, const U32 flags) __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_ISUTF8_CHAR_FLAGS \ - assert(s0); assert(e) + assert(s0); assert(e); assert(s0 <= e) PERL_STATIC_INLINE bool Perl_is_c9strict_utf8_string_loclen(const U8 *s, STRLEN len, const U8 **ep, STRLEN *el); @@ -10315,7 +10320,8 @@ PERL_STATIC_INLINE U8 * Perl_utf8_hop_overshoot(const U8 *s, SSize_t off, const U8 * const start, const U8 * const end, SSize_t *remaining) __attribute__warn_unused_result__; # define PERL_ARGS_ASSERT_UTF8_HOP_OVERSHOOT \ - assert(s); assert(start); assert(end); assert(start <= s) + assert(s); assert(start); assert(end); assert(start <= s); \ + assert(s <= end) PERL_STATIC_INLINE bool Perl_utf8_to_bytes_new_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp, void **free_me); @@ -10335,17 +10341,17 @@ Perl_utf8_to_bytes_temp_pv(pTHX_ U8 const **s_ptr, STRLEN *lenp); PERL_STATIC_INLINE bool Perl_utf8_to_uv_msgs(const U8 * const s0, const U8 *e, UV *cp_p, Size_t *advance_p, U32 flags, U32 *errors, AV **msgs); # define PERL_ARGS_ASSERT_UTF8_TO_UV_MSGS \ - assert(s0); assert(e); assert(cp_p) + assert(s0); assert(e); assert(cp_p); assert(s0 <= e) PERL_STATIC_INLINE UV Perl_utf8_to_uv_or_die(const U8 * const s, const U8 *e, Size_t *advance_p); # define PERL_ARGS_ASSERT_UTF8_TO_UV_OR_DIE \ - assert(s); assert(e) + assert(s); assert(e); assert(s <= e) PERL_STATIC_INLINE UV Perl_utf8_to_uvchr_buf(pTHX_ const U8 *s, const U8 *send, STRLEN *retlen); # define PERL_ARGS_ASSERT_UTF8_TO_UVCHR_BUF \ - assert(s); assert(send) + assert(s); assert(send); assert(s <= send) PERL_STATIC_INLINE UV Perl_utf8n_to_uvchr_msgs(const U8 * const s0, STRLEN curlen, STRLEN *retlen, const U32 flags, U32 *errors, AV **msgs); diff --git a/regexec.c b/regexec.c index b0e7dde98c14..be9ec2cf338f 100644 --- a/regexec.c +++ b/regexec.c @@ -627,7 +627,6 @@ STATIC U8 * S_find_span_end(U8 * s, const U8 * send, const U8 span_byte) { PERL_ARGS_ASSERT_FIND_SPAN_END; - assert(send >= s); /* Returns the position of the first byte in the sequence between 's' and * 'send-1' inclusive that isn't 'span_byte'; returns 'send' if none found. @@ -700,7 +699,6 @@ STATIC U8 * S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask) { PERL_ARGS_ASSERT_FIND_NEXT_MASKED; - assert(send >= s); assert((byte & mask) == byte); /* Returns the position of the first byte in the sequence between 's' @@ -777,7 +775,6 @@ STATIC U8 * S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask) { PERL_ARGS_ASSERT_FIND_SPAN_END_MASK; - assert(send >= s); assert((span_byte & mask) == span_byte); /* Returns the position of the first byte in the sequence between 's' and diff --git a/toke.c b/toke.c index 09067b1550ba..e7d92ce0cf8f 100644 --- a/toke.c +++ b/toke.c @@ -1887,7 +1887,6 @@ STATIC void S_incline(pTHX_ const char *s, const char *end) { PERL_ARGS_ASSERT_INCLINE; - assert(end >= s); const char *t; const char *n; @@ -2855,7 +2854,6 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s, const char ** error_msg) { PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME; - assert(e >= s); assert(s > (char *) 3); /* points to first character of interior of \N{}, to one beyond the diff --git a/util.c b/util.c index e0849d315916..97ca9a55e712 100644 --- a/util.c +++ b/util.c @@ -619,9 +619,6 @@ Perl_delimcpy_no_escape(char *to, const char *to_end, ptrdiff_t to_len = to_end - to; SSize_t copy_len; - assert(from_len >= 0); - assert(to_len >= 0); - /* Look for the first delimiter in the source */ delim_pos = (const char *) memchr(from, delim, from_len); @@ -718,8 +715,6 @@ Perl_delimcpy(char *to, const char *to_end, const int delim, I32 *retlen) { PERL_ARGS_ASSERT_DELIMCPY; - assert(from_end >= from); - assert(to_end >= to); const char * const orig_to = to; ptrdiff_t copy_len = 0; @@ -1100,7 +1095,6 @@ char * Perl_fbm_instr(pTHX_ unsigned char *big, unsigned char *bigend, SV *littlestr, U32 flags) { PERL_ARGS_ASSERT_FBM_INSTR; - assert(bigend >= big); unsigned char *s; STRLEN l;