Skip to content
2 changes: 1 addition & 1 deletion autodoc.pl
Original file line number Diff line number Diff line change
Expand Up @@ -620,7 +620,7 @@ sub check_and_add_proto_defn {
$flags .= "n" if $flags =~ /#/; # No threads, arguments for #ifdef

my @munged_args= $args_ref->@*;
s/\b(?:NN|NULLOK)\b\s+//g for @munged_args;
s/\b(?:NN|NULLOK|[SM]PTR|EPTRQ?)\b\s+//g for @munged_args;

my $flags_sans_d = $flags;
my $docs_expected = $flags_sans_d =~ s/d//g;
Expand Down
501 changes: 286 additions & 215 deletions embed.fnc

Large diffs are not rendered by default.

3 changes: 0 additions & 3 deletions inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -2963,7 +2963,6 @@ Perl_utf8_hop_back_overshoot(const U8 *s, SSize_t off,
const U8 * const start, SSize_t *remaining)
{
PERL_ARGS_ASSERT_UTF8_HOP_BACK_OVERSHOOT;
assert(start <= s);
assert(off <= 0);

/* Note: if we know that the input is well-formed, we can do per-word
Expand Down Expand Up @@ -3031,8 +3030,6 @@ Perl_utf8_hop_overshoot(const U8 *s, SSize_t off,
{
PERL_ARGS_ASSERT_UTF8_HOP_OVERSHOOT;

assert(start <= s && s <= end);

if (off >= 0) {
return utf8_hop_forward_overshoot(s, off, end, remaining);
}
Expand Down
174 changes: 98 additions & 76 deletions proto.h

Large diffs are not rendered by default.

275 changes: 231 additions & 44 deletions regen/embed.pl

Large diffs are not rendered by default.

26 changes: 8 additions & 18 deletions regexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -626,14 +626,12 @@ S_isFOO_utf8_lc(pTHX_ const U8 classnum, const U8* character, const U8* e)
STATIC U8 *
S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
{
PERL_ARGS_ASSERT_FIND_SPAN_END;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commit Move some ARGS_ASSERT macros to function start

Personally I would add some context in the commit message on why it is moved/why it can be moved.

You already wrote it in embed.fnc:

: (Much of the perl core was written assuming the ARGS_ASSERT macro needed to
: be placed after any declarations because of the C89 Standard. That is no
: longer true with C99; feel free when modifying code in the vicinity to move
: this call to the very beginning of the function.)

Maybe something like: historically the asserts had tob be placed after any declarations because of the C89 Standard. This is no longer true with the C99 standard we're not following. So move these to the beginning of the function for clarity.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

changed to include the gist of your suggestion


/* Returns the position of the first byte in the sequence between 's' and
* 'send-1' inclusive that isn't 'span_byte'; returns 'send' if none found.
* */

PERL_ARGS_ASSERT_FIND_SPAN_END;

assert(send >= s);

if ((STRLEN) (send - s) >= PERL_WORDSIZE
+ PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
- (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
Expand Down Expand Up @@ -700,16 +698,14 @@ S_find_span_end(U8 * s, const U8 * send, const U8 span_byte)
STATIC U8 *
S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
{
PERL_ARGS_ASSERT_FIND_NEXT_MASKED;
assert((byte & mask) == byte);

/* Returns the position of the first byte in the sequence between 's'
* and 'send-1' inclusive that when ANDed with 'mask' yields 'byte';
* returns 'send' if none found. It uses word-level operations instead of
* byte to speed up the process */

PERL_ARGS_ASSERT_FIND_NEXT_MASKED;

assert(send >= s);
assert((byte & mask) == byte);

#ifndef EBCDIC

if ((STRLEN) (send - s) >= PERL_WORDSIZE
Expand Down Expand Up @@ -778,17 +774,15 @@ S_find_next_masked(U8 * s, const U8 * send, const U8 byte, const U8 mask)
STATIC U8 *
S_find_span_end_mask(U8 * s, const U8 * send, const U8 span_byte, const U8 mask)
{
PERL_ARGS_ASSERT_FIND_SPAN_END_MASK;
assert((span_byte & mask) == span_byte);

/* Returns the position of the first byte in the sequence between 's' and
* 'send-1' inclusive that when ANDed with 'mask' isn't 'span_byte'.
* 'span_byte' should have been ANDed with 'mask' in the call of this
* function. Returns 'send' if none found. Works like find_span_end(),
* except for the AND */

PERL_ARGS_ASSERT_FIND_SPAN_END_MASK;

assert(send >= s);
assert((span_byte & mask) == span_byte);

if ((STRLEN) (send - s) >= PERL_WORDSIZE
+ PERL_WORDSIZE * PERL_IS_SUBWORD_ADDR(s)
- (PTR2nat(s) & PERL_WORD_BOUNDARY_MASK))
Expand Down Expand Up @@ -11793,10 +11787,6 @@ Perl_isSCRIPT_RUN(pTHX_ const U8 * s, const U8 * send, const bool utf8_target)
bool retval = true;
SCX_enum * ret_script = NULL;

assert(send >= s);

PERL_ARGS_ASSERT_ISSCRIPT_RUN;

/* All code points in 0..255 are either Common or Latin, so must be a
* script run. We can return immediately unless we need to know which
* script it is. */
Expand Down
15 changes: 5 additions & 10 deletions toke.c
Original file line number Diff line number Diff line change
Expand Up @@ -1886,16 +1886,14 @@ Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn, bool curstash)
STATIC void
S_incline(pTHX_ const char *s, const char *end)
{
PERL_ARGS_ASSERT_INCLINE;

const char *t;
const char *n;
const char *e;
line_t line_num;
UV uv;

PERL_ARGS_ASSERT_INCLINE;

assert(end >= s);

COPLINE_INC_WITH_HERELINES;
if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
&& s+1 == PL_bufend && *s == ';') {
Expand Down Expand Up @@ -2855,6 +2853,9 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
const bool is_utf8,
const char ** error_msg)
{
PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
assert(s > (char *) 3);

/* <s> points to first character of interior of \N{}, <e> to one beyond the
* interior, hence to the "}". Finds what the name resolves to, returning
* an SV* containing it; NULL if no valid one found.
Expand All @@ -2875,12 +2876,6 @@ Perl_get_and_check_backslash_N_name(pTHX_ const char* s,
const char* context = s - 3;
STRLEN context_len = e - context + 1; /* include all of \N{...} */


PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;

assert(e >= s);
assert(s > (char *) 3);

while (s < e && isBLANK(*s)) {
s++;
}
Expand Down
31 changes: 12 additions & 19 deletions utf8.c
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,9 @@ S_does_utf8_overflow(const U8 * const s, const U8 * e)
STRLEN
Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags)
{
PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_;
assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE));

Comment on lines +727 to +729

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Commit embed.fnc: Add ptr assertions for apparently non-problematic

I think it would be better to move some of these changes in a separate commit.
In this commit two things happen:

  • the assert calls are moved to the beginning of the function (since we can now do that)
  • one of the asserts (assert(e > s)) is moved the PERL_ARGS_ASSERT macro.

Thinking some more: how I would likely do it:

  • commit 1: add SPTR/EPTR/MPTR in embed.fnc except for the functions in the utf8.c file
  • commit 2: move the asserts to the beginning of the function (and keep the assert e > s)
  • commit 3: add the SPTR/EPTR/MPTR in embed.fnc for these function and remove the assert(e > s).

In commit 3 it should be obvious that these are now moved to the PERL_ARGS_ASSERT macro. In the current commit it's not obvious because there are many changes in proto.h)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, for all affected .c files

SSize_t len, full_len;

/* An internal helper function.
Expand Down Expand Up @@ -752,11 +755,6 @@ Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags)
*
*/

PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_;

assert(e > s);
assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE));

full_len = UTF8SKIP(s);

len = e - s;
Expand Down Expand Up @@ -841,6 +839,9 @@ Size_t
Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e,
const bool require_partial)
{
PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_;
assert(*s0 == I8_TO_NATIVE_UTF8(0xFF));

/* This is called to determine if the UTF-8 sequence starting at s0 and
* continuing for up to one full character of bytes, but looking no further
* than 'e - 1', is legal. *s0 must be 0xFF (or whatever the native
Expand All @@ -867,11 +868,6 @@ Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e,
const U8 *s = s0 + 1;
const U8 *send = e;

PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_;

assert(s0 < e);
assert(*s0 == I8_TO_NATIVE_UTF8(0xFF));

send = s + MIN(UTF8_MAXBYTES - 1, e - s);
while (s < send) {
if (! UTF8_IS_CONTINUATION(*s)) {
Expand Down Expand Up @@ -4247,6 +4243,8 @@ STATIC UV
S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e,
U8 * ustrp, STRLEN *lenp)
{
PERL_ARGS_ASSERT_TURKIC_FC;

/* Returns 0 if the foldcase of the input UTF-8 encoded sequence from
* p0..e-1 according to Turkic rules is the same as for non-Turkic.
* Otherwise, it returns the first code point of the Turkic foldcased
Expand All @@ -4257,9 +4255,6 @@ S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e,
* I WITH DOT ABOVE form a case pair, as do 'I' and LATIN SMALL LETTER
* DOTLESS I */

PERL_ARGS_ASSERT_TURKIC_FC;
assert(e > p);

if (UNLIKELY(*p == 'I')) {
*lenp = 2;
ustrp[0] = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I);
Expand All @@ -4282,15 +4277,14 @@ STATIC UV
S_turkic_lc(pTHX_ const U8 * const p0, const U8 * const e,
U8 * ustrp, STRLEN *lenp)
{
PERL_ARGS_ASSERT_TURKIC_LC;

/* Returns 0 if the lowercase of the input UTF-8 encoded sequence from
* p0..e-1 according to Turkic rules is the same as for non-Turkic.
* Otherwise, it returns the first code point of the Turkic lowercased
* sequence, and the entire sequence will be stored in *ustrp. ustrp will
* contain *lenp bytes */

PERL_ARGS_ASSERT_TURKIC_LC;
assert(e > p0);

/* A 'I' requires context as to what to do */
if (UNLIKELY(*p0 == 'I')) {
const U8 * p = p0 + 1;
Expand Down Expand Up @@ -4328,6 +4322,8 @@ STATIC UV
S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e,
U8 * ustrp, STRLEN *lenp)
{
PERL_ARGS_ASSERT_TURKIC_UC;

/* Returns 0 if the upper or title-case of the input UTF-8 encoded sequence
* from p0..e-1 according to Turkic rules is the same as for non-Turkic.
* Otherwise, it returns the first code point of the Turkic upper or
Expand All @@ -4338,9 +4334,6 @@ S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e,
* I WITH DOT ABOVE form a case pair, as do 'I' and LATIN SMALL LETTER
* DOTLESS I */

PERL_ARGS_ASSERT_TURKIC_UC;
assert(e > p);

if (*p == 'i') {
*lenp = 2;
ustrp[0] = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE);
Expand Down
19 changes: 6 additions & 13 deletions util.c
Original file line number Diff line number Diff line change
Expand Up @@ -612,16 +612,13 @@ Perl_delimcpy_no_escape(char *to, const char *to_end,
const char *from, const char *from_end,
const int delim, I32 *retlen)
{
PERL_ARGS_ASSERT_DELIMCPY_NO_ESCAPE;

const char * delim_pos;
ptrdiff_t from_len = from_end - from;
ptrdiff_t to_len = to_end - to;
SSize_t copy_len;

PERL_ARGS_ASSERT_DELIMCPY_NO_ESCAPE;

assert(from_len >= 0);
assert(to_len >= 0);

/* Look for the first delimiter in the source */
delim_pos = (const char *) memchr(from, delim, from_len);

Expand Down Expand Up @@ -717,14 +714,12 @@ Perl_delimcpy(char *to, const char *to_end,
const char *from, const char *from_end,
const int delim, I32 *retlen)
{
PERL_ARGS_ASSERT_DELIMCPY;

const char * const orig_to = to;
ptrdiff_t copy_len = 0;
bool stopped_early = FALSE; /* Ran out of room to copy to */

PERL_ARGS_ASSERT_DELIMCPY;
assert(from_end >= from);
assert(to_end >= to);

/* Don't use the loop for the trivial case of the first character being the
* delimiter; otherwise would have to worry inside the loop about backing
* up before the start of 'from' */
Expand Down Expand Up @@ -1099,6 +1094,8 @@ a littlestr of "ab\n", SvTAIL matches as:
char *
Perl_fbm_instr(pTHX_ unsigned char *big, unsigned char *bigend, SV *littlestr, U32 flags)
{
PERL_ARGS_ASSERT_FBM_INSTR;

unsigned char *s;
STRLEN l;
const unsigned char *little = (const unsigned char *)SvPV_const(littlestr,l);
Expand All @@ -1107,10 +1104,6 @@ Perl_fbm_instr(pTHX_ unsigned char *big, unsigned char *bigend, SV *littlestr, U
bool valid = SvVALID(littlestr);
bool tail = valid ? cBOOL(SvTAIL(littlestr)) : FALSE;

PERL_ARGS_ASSERT_FBM_INSTR;

assert(bigend >= big);

if ((STRLEN)(bigend - big) < littlelen) {
if ( tail
&& ((STRLEN)(bigend - big) == littlelen - 1)
Expand Down
Loading