Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions Lib/test/test_fstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -1866,6 +1866,65 @@ def test_gh139516(self):
f.write('''def f(a): pass\nf"{f(a=lambda: 'à'\n)}"'''.encode())
assert_python_ok(script)

def test_gh141271(self):
self.assertEqual(f'''{""" " # not comment """=}''', '""" " # not comment """=\' " # not comment \'')

self.assertEqual(
f'''{""" " # not comment
"""=}''',
'''""" " # not comment
"""=\' " # not comment\\n\'''')

self.assertEqual(
f'''{"\"" # comment
=}''',
'"\\"" \n=\'"\'')

self.assertEqual(
f'{ # comment A
(f'''
# not comment B
{ # comment C '
3 # comment D
* 2}''', '\n# not comment E\n6')=}',
" \n(f'''\n# not comment B\n{ \n3 \n* 2}''', '\\n# not comment E\\n6')=('\\n# not comment B\\n6', '\\n# not comment E\\n6')")

self.assertEqual(
f'{
f'{# 1 '
f"{# 2 "
None
=}"
=}'
}',
'''
f"{
None
=}"
=\'\\nNone\\n=None\'''')

self.assertEqual(
f'{
f'{# 1 '
f"{# 2 "
f'''{# 3 '
f"""{# 4 "
None
=}"""
=}'''
=}"
=}'
}',
'''
f"{
f\'\'\'{
f"""{
None
=}"""
=}\'\'\'
=}"
=\'\\nf\\\'\\\'\\\'{\\nf"""{\\nNone\\n=}"""\\n=}\\\'\\\'\\\'\\n=\\\'\\\\nf"""{\\\\nNone\\\\n=}"""\\\\n=\\\\\\\'\\\\\\\\nNone\\\\\\\\n=None\\\\\\\'\\\'\'''')


if __name__ == '__main__':
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix complex f and t-string quote and nested comments.
218 changes: 138 additions & 80 deletions Parser/lexer/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -110,103 +110,161 @@
}
}

static int
set_ftstring_expr(struct tok_state* tok, struct token *token, char c) {
assert(token != NULL);
assert(c == '}' || c == ':' || c == '!');
tokenizer_mode *tok_mode = TOK_GET_MODE(tok);

if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
return 0;
}
PyObject *res = NULL;

// Look for a # character outside of string literals
int hash_detected = 0;
int in_string = 0;
char quote_char = 0;
static Py_ssize_t
recurse_set_ftstring_expr(tokenizer_mode *tok_mode, char *result, Py_ssize_t *in_pos, Py_ssize_t out_pos)
{
Py_ssize_t i = *in_pos;
Py_ssize_t j = out_pos;
Py_ssize_t in_pos_start = *in_pos;
Py_ssize_t in_pos_end = tok_mode->last_expr_size - tok_mode->last_expr_end;
char *last_expr_buffer = tok_mode->last_expr_buffer;

int curly_depth = 1; // count these in expressions because of sets and dicts
int in_string = 0; // inside a string, constant or f or t
int is_string_ft; // string we are inside of is an f or t-string
char quote_char;
int is_triple_quote;

// Process each character
while (i < in_pos_end) {
char ch = last_expr_buffer[i++];

if (in_string) {
// Skip escaped characters (also harmless line continuations)
if (ch == '\\') {
result[j++] = '\\';
if (i < in_pos_end) {
result[j++] = last_expr_buffer[i++];
}
continue;
}

for (Py_ssize_t i = 0; i < tok_mode->last_expr_size - tok_mode->last_expr_end; i++) {
char ch = tok_mode->last_expr_buffer[i];
// Check for string end quotes
if (ch == quote_char) {
if (!is_triple_quote || i + 1 >= in_pos_end) {
in_string = 0;
}
else if (last_expr_buffer[i] == ch && last_expr_buffer[i + 1] == ch) {

Check warning on line 147 in Parser/lexer/lexer.c

View workflow job for this annotation

GitHub Actions / Cross build Linux

‘quote_char’ may be used uninitialized [-Wmaybe-uninitialized]
in_string = 0;
result[j++] = ch;

Check warning on line 149 in Parser/lexer/lexer.c

View workflow job for this annotation

GitHub Actions / Address sanitizer (ubuntu-24.04)

‘quote_char’ may be used uninitialized in this function [-Wmaybe-uninitialized]
result[j++] = ch;
i += 2;
}
result[j++] = ch;
continue;
}

// Skip escaped characters
if (ch == '\\') {
i++;
continue;
// If inside an f or t-string then check for expressions
if (ch == '{') {
result[j++] = '{';
if (is_string_ft && i < in_pos_end) {
// Double '{{' is doesn't start an expression
if (last_expr_buffer[i] == '{') {
result[j++] = '{';
i++;
}
else {
j = recurse_set_ftstring_expr(tok_mode, result, &i, j);
}
}
continue;
}
}
// In ftstring expression outside of actual string part
else {
// Skip comments
if (ch == '#') {
while (i < in_pos_end) {
if (last_expr_buffer[i++] == '\n') {
result[j++] = '\n';
break;
}
}
continue;
}

// Handle quotes
if (ch == '"' || ch == '\'') {
// The following if/else block works becase there is an off number
// of quotes in STRING tokens and the lexer only ever reaches this
// function with valid STRING tokens.
// For example: """hello"""
// First quote: in_string = 1
// Second quote: in_string = 0
// Third quote: in_string = 1
if (!in_string) {
in_string = 1;
// Handle string start quotes
if (ch == '"' || ch == '\'') {
quote_char = ch;
in_string = 1;
is_string_ft = 0;
is_triple_quote = 0;

// Check preceding characters to see if is f or t-string
if ((i - 2) >= in_pos_start) {
char ch_prev = last_expr_buffer[i - 2];

if (ch_prev == 'f' || ch_prev == 't') {
is_string_ft = 1;
}
// If may be raw f or t-string then check pre-preceding char
else if (ch_prev == 'r' && (i - 3) >= in_pos_start) {
char ch_prev_prev = last_expr_buffer[i - 3];

if (ch_prev_prev == 'f' || ch_prev_prev == 't') {
is_string_ft = 1;
}
}
}

// Check for triple quotes
if (i + 2 <= in_pos_end && last_expr_buffer[i] == ch && last_expr_buffer[i + 1] == ch) {
is_triple_quote = 1;
result[j++] = ch;
result[j++] = ch;
i += 2;
}
result[j++] = ch;

continue;
}
else if (ch == quote_char) {
in_string = 0;

// Count nested curlies
if (ch == '{') {
curly_depth++;
}
// Check for end of expression curlies
else if (ch == '}') {
if (!--curly_depth) {
result[j++] = '}';
break;
}
}
continue;
}

// Check for # outside strings
if (ch == '#' && !in_string) {
hash_detected = 1;
break;
}
// Copy other chars
result[j++] = ch;
}

*in_pos = i;
return j;
}

static int
set_ftstring_expr(struct tok_state* tok, struct token *token, char c)
{
assert(token != NULL);
assert(c == '}' || c == ':' || c == '!');
tokenizer_mode *tok_mode = TOK_GET_MODE(tok);

if (!(tok_mode->in_debug || tok_mode->string_kind == TSTRING) || token->metadata) {
return 0;
}
// If we found a # character in the expression, we need to handle comments
if (hash_detected) {
PyObject *res = NULL;

// If there is a '#' character in the expression, we need to handle possible comments
if (memchr(tok_mode->last_expr_buffer, '#', tok_mode->last_expr_size - tok_mode->last_expr_end) != NULL) {
// Allocate buffer for processed result
char *result = (char *)PyMem_Malloc((tok_mode->last_expr_size - tok_mode->last_expr_end + 1) * sizeof(char));
if (!result) {
return -1;
}

Py_ssize_t i = 0; // Input position
Py_ssize_t j = 0; // Output position
in_string = 0; // Whether we're in a string
quote_char = 0; // Current string quote char

// Process each character
while (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
char ch = tok_mode->last_expr_buffer[i];

// Handle string quotes
if (ch == '"' || ch == '\'') {
// See comment above to understand this part
if (!in_string) {
in_string = 1;
quote_char = ch;
} else if (ch == quote_char) {
in_string = 0;
}
result[j++] = ch;
}
// Skip comments
else if (ch == '#' && !in_string) {
while (i < tok_mode->last_expr_size - tok_mode->last_expr_end &&
tok_mode->last_expr_buffer[i] != '\n') {
i++;
}
if (i < tok_mode->last_expr_size - tok_mode->last_expr_end) {
result[j++] = '\n';
}
}
// Copy other chars
else {
result[j++] = ch;
}
i++;
}
Py_ssize_t in_pos = 0;
Py_ssize_t out_pos = recurse_set_ftstring_expr(tok_mode, result, &in_pos, 0);

result[j] = '\0'; // Null-terminate the result string
res = PyUnicode_DecodeUTF8(result, j, NULL);
result[out_pos] = '\0'; // Null-terminate the result string
res = PyUnicode_DecodeUTF8(result, out_pos, NULL);
PyMem_Free(result);
} else {
res = PyUnicode_DecodeUTF8(
Expand Down
Loading