Skip to content

Commit b0b6a3f

Browse files
committed
Update common mark code from upstream
1 parent 8a07a63 commit b0b6a3f

22 files changed

+14400
-10566
lines changed

lib/commonmarker/src/arena.c

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,15 +68,16 @@ static void *arena_calloc(size_t nmem, size_t size) {
6868
const size_t align = sizeof(size_t) - 1;
6969
sz = (sz + align) & ~align;
7070

71+
struct arena_chunk *chunk;
7172
if (sz > A->sz) {
72-
A->prev = alloc_arena_chunk(sz, A->prev);
73-
return (uint8_t *) A->prev->ptr + sizeof(size_t);
73+
A->prev = chunk = alloc_arena_chunk(sz, A->prev);
74+
} else if (sz > A->sz - A->used) {
75+
A = chunk = alloc_arena_chunk(A->sz + A->sz / 2, A);
76+
} else {
77+
chunk = A;
7478
}
75-
if (sz > A->sz - A->used) {
76-
A = alloc_arena_chunk(A->sz + A->sz / 2, A);
77-
}
78-
void *ptr = (uint8_t *) A->ptr + A->used;
79-
A->used += sz;
79+
void *ptr = (uint8_t *) chunk->ptr + chunk->used;
80+
chunk->used += sz;
8081
*((size_t *) ptr) = sz - sizeof(size_t);
8182
return (uint8_t *) ptr + sizeof(size_t);
8283
}
@@ -98,6 +99,6 @@ static void arena_free(void *ptr) {
9899

99100
cmark_mem CMARK_ARENA_MEM_ALLOCATOR = {arena_calloc, arena_realloc, arena_free};
100101

101-
cmark_mem *cmark_get_arena_mem_allocator() {
102+
cmark_mem *cmark_get_arena_mem_allocator(void) {
102103
return &CMARK_ARENA_MEM_ALLOCATOR;
103104
}

lib/commonmarker/src/blocks.c

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <stdlib.h>
99
#include <assert.h>
1010
#include <stdio.h>
11+
#include <limits.h>
1112

1213
#include "cmark_ctype.h"
1314
#include "syntax_extension.h"
@@ -26,6 +27,14 @@
2627
#define CODE_INDENT 4
2728
#define TAB_STOP 4
2829

30+
/**
31+
* Very deeply nested lists can cause quadratic performance issues.
32+
* This constant is used in open_new_blocks() to limit the nesting
33+
* depth. It is unlikely that a non-contrived markdown document will
34+
* be nested this deeply.
35+
*/
36+
#define MAX_LIST_DEPTH 100
37+
2938
#ifndef MIN
3039
#define MIN(x, y) ((x < y) ? x : y)
3140
#endif
@@ -639,6 +648,14 @@ static cmark_node *finalize_document(cmark_parser *parser) {
639648
}
640649

641650
finalize(parser, parser->root);
651+
652+
// Limit total size of extra content created from reference links to
653+
// document size to avoid superlinear growth. Always allow 100KB.
654+
if (parser->total_size > 100000)
655+
parser->refmap->max_ref_size = parser->total_size;
656+
else
657+
parser->refmap->max_ref_size = 100000;
658+
642659
process_inlines(parser, parser->refmap, parser->options);
643660
if (parser->options & CMARK_OPT_FOOTNOTES)
644661
process_footnotes(parser);
@@ -698,6 +715,11 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
698715
const unsigned char *end = buffer + len;
699716
static const uint8_t repl[] = {239, 191, 189};
700717

718+
if (len > UINT_MAX - parser->total_size)
719+
parser->total_size = UINT_MAX;
720+
else
721+
parser->total_size += len;
722+
701723
if (parser->last_buffer_ended_with_cr && *buffer == '\n') {
702724
// skip NL if last buffer ended with CR ; see #117
703725
buffer++;
@@ -1105,10 +1127,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
11051127
bool has_content;
11061128
int save_offset;
11071129
int save_column;
1130+
size_t depth = 0;
11081131

11091132
while (cont_type != CMARK_NODE_CODE_BLOCK &&
11101133
cont_type != CMARK_NODE_HTML_BLOCK) {
1111-
1134+
depth++;
11121135
S_find_first_nonspace(parser, input);
11131136
indented = parser->indent >= CODE_INDENT;
11141137

@@ -1194,22 +1217,25 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
11941217
parser->first_nonspace + 1);
11951218
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
11961219
} else if (!indented &&
1197-
parser->options & CMARK_OPT_FOOTNOTES &&
1220+
(parser->options & CMARK_OPT_FOOTNOTES) &&
1221+
depth < MAX_LIST_DEPTH &&
11981222
(matched = scan_footnote_definition(input, parser->first_nonspace))) {
11991223
cmark_chunk c = cmark_chunk_dup(input, parser->first_nonspace + 2, matched - 2);
1200-
cmark_chunk_to_cstr(parser->mem, &c);
12011224

12021225
while (c.data[c.len - 1] != ']')
12031226
--c.len;
12041227
--c.len;
12051228

1229+
cmark_chunk_to_cstr(parser->mem, &c);
1230+
12061231
S_advance_offset(parser, input, parser->first_nonspace + matched - parser->offset, false);
12071232
*container = add_child(parser, *container, CMARK_NODE_FOOTNOTE_DEFINITION, parser->first_nonspace + matched + 1);
12081233
(*container)->as.literal = c;
12091234

12101235
(*container)->internal_offset = matched;
12111236
} else if ((!indented || cont_type == CMARK_NODE_LIST) &&
12121237
parser->indent < 4 &&
1238+
depth < MAX_LIST_DEPTH &&
12131239
(matched = parse_list_marker(
12141240
parser->mem, input, parser->first_nonspace,
12151241
(*container)->type == CMARK_NODE_PARAGRAPH, &data))) {

lib/commonmarker/src/cmark-gfm-extension_api.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ typedef struct delimiter {
114114
struct delimiter *previous;
115115
struct delimiter *next;
116116
cmark_node *inl_text;
117+
bufsize_t position;
117118
bufsize_t length;
118119
unsigned char delim_char;
119120
int can_open;

lib/commonmarker/src/cmark-gfm.h

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,13 +111,13 @@ typedef struct cmark_mem {
111111
* realloc and free.
112112
*/
113113
CMARK_GFM_EXPORT
114-
cmark_mem *cmark_get_default_mem_allocator();
114+
cmark_mem *cmark_get_default_mem_allocator(void);
115115

116116
/** An arena allocator; uses system calloc to allocate large
117117
* slabs of memory. Memory in these slabs is not reused at all.
118118
*/
119119
CMARK_GFM_EXPORT
120-
cmark_mem *cmark_get_arena_mem_allocator();
120+
cmark_mem *cmark_get_arena_mem_allocator(void);
121121

122122
/** Resets the arena allocator, quickly returning all used memory
123123
* to the operating system.
@@ -225,6 +225,11 @@ CMARK_GFM_EXPORT cmark_node *cmark_node_first_child(cmark_node *node);
225225
*/
226226
CMARK_GFM_EXPORT cmark_node *cmark_node_last_child(cmark_node *node);
227227

228+
/** Returns the footnote reference of 'node', or NULL if 'node' doesn't have a
229+
* footnote reference.
230+
*/
231+
CMARK_GFM_EXPORT cmark_node *cmark_node_parent_footnote_def(cmark_node *node);
232+
228233
/**
229234
* ## Iterator
230235
*
@@ -408,6 +413,17 @@ CMARK_GFM_EXPORT int cmark_node_get_list_tight(cmark_node *node);
408413
*/
409414
CMARK_GFM_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight);
410415

416+
/**
417+
* Returns item index of 'node'. This is only used when rendering output
418+
* formats such as commonmark, which need to output the index. It is not
419+
* required for formats such as html or latex.
420+
*/
421+
CMARK_GFM_EXPORT int cmark_node_get_item_index(cmark_node *node);
422+
423+
/** Sets item index of 'node'. Returns 1 on success, 0 on failure.
424+
*/
425+
CMARK_GFM_EXPORT int cmark_node_set_item_index(cmark_node *node, int idx);
426+
411427
/** Returns the info string from a fenced code block.
412428
*/
413429
CMARK_GFM_EXPORT const char *cmark_node_get_fence_info(cmark_node *node);

lib/commonmarker/src/cmark.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,9 @@
1010
cmark_node_type CMARK_NODE_LAST_BLOCK = CMARK_NODE_FOOTNOTE_DEFINITION;
1111
cmark_node_type CMARK_NODE_LAST_INLINE = CMARK_NODE_FOOTNOTE_REFERENCE;
1212

13-
int cmark_version() { return CMARK_GFM_VERSION; }
13+
int cmark_version(void) { return CMARK_GFM_VERSION; }
1414

15-
const char *cmark_version_string() { return CMARK_GFM_VERSION_STRING; }
15+
const char *cmark_version_string(void) { return CMARK_GFM_VERSION_STRING; }
1616

1717
static void *xcalloc(size_t nmem, size_t size) {
1818
void *ptr = calloc(nmem, size);
@@ -38,7 +38,7 @@ static void xfree(void *ptr) {
3838

3939
cmark_mem CMARK_DEFAULT_MEM_ALLOCATOR = {xcalloc, xrealloc, xfree};
4040

41-
cmark_mem *cmark_get_default_mem_allocator() {
41+
cmark_mem *cmark_get_default_mem_allocator(void) {
4242
return &CMARK_DEFAULT_MEM_ALLOCATOR;
4343
}
4444

lib/commonmarker/src/commonmark.c

Lines changed: 20 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -153,23 +153,8 @@ static bool is_autolink(cmark_node *node) {
153153
link_text->as.literal.len) == 0);
154154
}
155155

156-
// if node is a block node, returns node.
157-
// otherwise returns first block-level node that is an ancestor of node.
158-
// if there is no block-level ancestor, returns NULL.
159-
static cmark_node *get_containing_block(cmark_node *node) {
160-
while (node) {
161-
if (CMARK_NODE_BLOCK_P(node)) {
162-
return node;
163-
} else {
164-
node = node->parent;
165-
}
166-
}
167-
return NULL;
168-
}
169-
170156
static int S_render_node(cmark_renderer *renderer, cmark_node *node,
171157
cmark_event_type ev_type, int options) {
172-
cmark_node *tmp;
173158
int list_number;
174159
cmark_delim_type list_delim;
175160
int numticks;
@@ -180,7 +165,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
180165
char fencechar[2] = {'\0', '\0'};
181166
size_t info_len, code_len;
182167
char listmarker[LISTMARKER_SIZE];
183-
char *emph_delim;
168+
const char *emph_delim;
184169
bool first_in_list_item;
185170
bufsize_t marker_width;
186171
bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) &&
@@ -189,14 +174,17 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
189174
// Don't adjust tight list status til we've started the list.
190175
// Otherwise we loose the blank line between a paragraph and
191176
// a following list.
192-
if (!(node->type == CMARK_NODE_ITEM && node->prev == NULL && entering)) {
193-
tmp = get_containing_block(node);
194-
renderer->in_tight_list_item =
195-
tmp && // tmp might be NULL if there is no containing block
196-
((tmp->type == CMARK_NODE_ITEM &&
197-
cmark_node_get_list_tight(tmp->parent)) ||
198-
(tmp && tmp->parent && tmp->parent->type == CMARK_NODE_ITEM &&
199-
cmark_node_get_list_tight(tmp->parent->parent)));
177+
if (entering) {
178+
if (node->parent && node->parent->type == CMARK_NODE_ITEM) {
179+
renderer->in_tight_list_item = node->parent->parent->as.list.tight;
180+
}
181+
} else {
182+
if (node->type == CMARK_NODE_LIST) {
183+
renderer->in_tight_list_item =
184+
node->parent &&
185+
node->parent->type == CMARK_NODE_ITEM &&
186+
node->parent->parent->as.list.tight;
187+
}
200188
}
201189

202190
if (node->extension && node->extension->commonmark_render_func) {
@@ -225,8 +213,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
225213
// this ensures that a following indented code block or list will be
226214
// inteprereted correctly.
227215
CR();
228-
// MBGJ: Do not output html in parser
229-
//LIT("<!-- end list -->");
216+
LIT("<!-- end list -->");
230217
BLANKLINE();
231218
}
232219
break;
@@ -235,13 +222,8 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
235222
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
236223
marker_width = 4;
237224
} else {
238-
list_number = cmark_node_get_list_start(node->parent);
225+
list_number = cmark_node_get_item_index(node);
239226
list_delim = cmark_node_get_list_delim(node->parent);
240-
tmp = node;
241-
while (tmp->prev) {
242-
tmp = tmp->prev;
243-
list_number += 1;
244-
}
245227
// we ensure a width of at least 4 so
246228
// we get nice transition from single digits
247229
// to double
@@ -406,10 +388,12 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
406388
break;
407389

408390
case CMARK_NODE_STRONG:
409-
if (entering) {
410-
LIT("**");
411-
} else {
412-
LIT("**");
391+
if (node->parent == NULL || node->parent->type != CMARK_NODE_STRONG) {
392+
if (entering) {
393+
LIT("**");
394+
} else {
395+
LIT("**");
396+
}
413397
}
414398
break;
415399

lib/commonmarker/src/html.c

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,16 @@ static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *
6363
if (renderer->written_footnote_ix >= renderer->footnote_ix)
6464
return false;
6565
renderer->written_footnote_ix = renderer->footnote_ix;
66+
char m[32];
67+
snprintf(m, sizeof(m), "%d", renderer->written_footnote_ix);
6668

6769
cmark_strbuf_puts(html, "<a href=\"#fnref-");
6870
houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
69-
cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref aria-label=\"Back to content\">↩</a>");
71+
cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"");
72+
cmark_strbuf_puts(html, m);
73+
cmark_strbuf_puts(html, "\" aria-label=\"Back to reference ");
74+
cmark_strbuf_puts(html, m);
75+
cmark_strbuf_puts(html, "\">↩</a>");
7076

7177
if (node->footnote.def_count > 1)
7278
{
@@ -78,7 +84,15 @@ static bool S_put_footnote_backref(cmark_html_renderer *renderer, cmark_strbuf *
7884
houdini_escape_href(html, node->as.literal.data, node->as.literal.len);
7985
cmark_strbuf_puts(html, "-");
8086
cmark_strbuf_puts(html, n);
81-
cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref aria-label=\"Back to content\">↩<sup class=\"footnote-ref\">");
87+
cmark_strbuf_puts(html, "\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"");
88+
cmark_strbuf_puts(html, m);
89+
cmark_strbuf_puts(html, "-");
90+
cmark_strbuf_puts(html, n);
91+
cmark_strbuf_puts(html, "\" aria-label=\"Back to reference ");
92+
cmark_strbuf_puts(html, m);
93+
cmark_strbuf_puts(html, "-");
94+
cmark_strbuf_puts(html, n);
95+
cmark_strbuf_puts(html, "\">↩<sup class=\"footnote-ref\">");
8296
cmark_strbuf_puts(html, n);
8397
cmark_strbuf_puts(html, "</sup></a>");
8498
}
@@ -350,10 +364,12 @@ static int S_render_node(cmark_html_renderer *renderer, cmark_node *node,
350364
break;
351365

352366
case CMARK_NODE_STRONG:
353-
if (entering) {
354-
cmark_strbuf_puts(html, "<strong>");
355-
} else {
356-
cmark_strbuf_puts(html, "</strong>");
367+
if (node->parent == NULL || node->parent->type != CMARK_NODE_STRONG) {
368+
if (entering) {
369+
cmark_strbuf_puts(html, "<strong>");
370+
} else {
371+
cmark_strbuf_puts(html, "</strong>");
372+
}
357373
}
358374
break;
359375

0 commit comments

Comments
 (0)