-
Notifications
You must be signed in to change notification settings - Fork 597
embed.fnc: Add ability to assert string ptr is in-bounds #23774
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9af0992
395d2fe
fd09509
1998ffd
6d8318d
fcce41e
6a8042c
541d899
a967a9c
c324bda
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -724,6 +724,9 @@ S_does_utf8_overflow(const U8 * const s, const U8 * e) | |
STRLEN | ||
Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags) | ||
{ | ||
PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_; | ||
assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE)); | ||
|
||
Comment on lines
+727
to
+729
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commit embed.fnc: Add ptr assertions for apparently non-problematic I think it would be better to move some of these changes in a separate commit.
Thinking some more: how I would likely do it:
In commit 3 it should be obvious that these are now moved to the PERL_ARGS_ASSERT macro. In the current commit it's not obvious because there are many changes in proto.h) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done, for all affected .c files |
||
SSize_t len, full_len; | ||
|
||
/* An internal helper function. | ||
|
@@ -752,11 +755,6 @@ Perl_is_utf8_char_helper_(const U8 * const s, const U8 * e, const U32 flags) | |
* | ||
*/ | ||
|
||
PERL_ARGS_ASSERT_IS_UTF8_CHAR_HELPER_; | ||
|
||
assert(e > s); | ||
assert(0 == (flags & ~UTF8_DISALLOW_ILLEGAL_INTERCHANGE)); | ||
|
||
full_len = UTF8SKIP(s); | ||
|
||
len = e - s; | ||
|
@@ -841,6 +839,9 @@ Size_t | |
Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e, | ||
const bool require_partial) | ||
{ | ||
PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_; | ||
assert(*s0 == I8_TO_NATIVE_UTF8(0xFF)); | ||
|
||
/* This is called to determine if the UTF-8 sequence starting at s0 and | ||
* continuing for up to one full character of bytes, but looking no further | ||
* than 'e - 1', is legal. *s0 must be 0xFF (or whatever the native | ||
|
@@ -867,11 +868,6 @@ Perl_is_utf8_FF_helper_(const U8 * const s0, const U8 * const e, | |
const U8 *s = s0 + 1; | ||
const U8 *send = e; | ||
|
||
PERL_ARGS_ASSERT_IS_UTF8_FF_HELPER_; | ||
|
||
assert(s0 < e); | ||
assert(*s0 == I8_TO_NATIVE_UTF8(0xFF)); | ||
|
||
send = s + MIN(UTF8_MAXBYTES - 1, e - s); | ||
while (s < send) { | ||
if (! UTF8_IS_CONTINUATION(*s)) { | ||
|
@@ -4247,6 +4243,8 @@ STATIC UV | |
S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e, | ||
U8 * ustrp, STRLEN *lenp) | ||
{ | ||
PERL_ARGS_ASSERT_TURKIC_FC; | ||
|
||
/* Returns 0 if the foldcase of the input UTF-8 encoded sequence from | ||
* p0..e-1 according to Turkic rules is the same as for non-Turkic. | ||
* Otherwise, it returns the first code point of the Turkic foldcased | ||
|
@@ -4257,9 +4255,6 @@ S_turkic_fc(pTHX_ const U8 * const p, const U8 * const e, | |
* I WITH DOT ABOVE form a case pair, as do 'I' and LATIN SMALL LETTER | ||
* DOTLESS I */ | ||
|
||
PERL_ARGS_ASSERT_TURKIC_FC; | ||
assert(e > p); | ||
|
||
if (UNLIKELY(*p == 'I')) { | ||
*lenp = 2; | ||
ustrp[0] = UTF8_TWO_BYTE_HI(LATIN_SMALL_LETTER_DOTLESS_I); | ||
|
@@ -4282,15 +4277,14 @@ STATIC UV | |
S_turkic_lc(pTHX_ const U8 * const p0, const U8 * const e, | ||
U8 * ustrp, STRLEN *lenp) | ||
{ | ||
PERL_ARGS_ASSERT_TURKIC_LC; | ||
|
||
/* Returns 0 if the lowercase of the input UTF-8 encoded sequence from | ||
* p0..e-1 according to Turkic rules is the same as for non-Turkic. | ||
* Otherwise, it returns the first code point of the Turkic lowercased | ||
* sequence, and the entire sequence will be stored in *ustrp. ustrp will | ||
* contain *lenp bytes */ | ||
|
||
PERL_ARGS_ASSERT_TURKIC_LC; | ||
assert(e > p0); | ||
|
||
/* A 'I' requires context as to what to do */ | ||
if (UNLIKELY(*p0 == 'I')) { | ||
const U8 * p = p0 + 1; | ||
|
@@ -4328,6 +4322,8 @@ STATIC UV | |
S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e, | ||
U8 * ustrp, STRLEN *lenp) | ||
{ | ||
PERL_ARGS_ASSERT_TURKIC_UC; | ||
|
||
/* Returns 0 if the upper or title-case of the input UTF-8 encoded sequence | ||
* from p0..e-1 according to Turkic rules is the same as for non-Turkic. | ||
* Otherwise, it returns the first code point of the Turkic upper or | ||
|
@@ -4338,9 +4334,6 @@ S_turkic_uc(pTHX_ const U8 * const p, const U8 * const e, | |
* I WITH DOT ABOVE form a case pair, as do 'I' and LATIN SMALL LETTER | ||
* DOTLESS I */ | ||
|
||
PERL_ARGS_ASSERT_TURKIC_UC; | ||
assert(e > p); | ||
|
||
if (*p == 'i') { | ||
*lenp = 2; | ||
ustrp[0] = UTF8_TWO_BYTE_HI(LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Commit Move some ARGS_ASSERT macros to function start
Personally I would add some context in the commit message on why it is moved/why it can be moved.
You already wrote it in embed.fnc:
Maybe something like: historically the asserts had tob be placed after any declarations because of the C89 Standard. This is no longer true with the C99 standard we're not following. So move these to the beginning of the function for clarity.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
changed to include the gist of your suggestion