diff --git a/src/3rdparty/md4c/LICENSE.md b/src/3rdparty/md4c/LICENSE.md index d58ef9341d..2088ba453e 100644 --- a/src/3rdparty/md4c/LICENSE.md +++ b/src/3rdparty/md4c/LICENSE.md @@ -1,7 +1,7 @@ # The MIT License (MIT) -Copyright © 2016-2019 Martin Mitáš +Copyright © 2016-2020 Martin Mitáš Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), diff --git a/src/3rdparty/md4c/md4c.c b/src/3rdparty/md4c/md4c.c index 0119e7a55a..19ba212a09 100644 --- a/src/3rdparty/md4c/md4c.c +++ b/src/3rdparty/md4c/md4c.c @@ -72,6 +72,48 @@ #define FALSE 0 #endif +#define MD_LOG(msg) \ + do { \ + if(ctx->parser.debug_log != NULL) \ + ctx->parser.debug_log((msg), ctx->userdata); \ + } while(0) + +#ifdef DEBUG + #define MD_ASSERT(cond) \ + do { \ + if(!(cond)) { \ + MD_LOG(__FILE__ ":" STRINGIZE(__LINE__) ": " \ + "Assertion '" STRINGIZE(cond) "' failed."); \ + exit(1); \ + } \ + } while(0) + + #define MD_UNREACHABLE() MD_ASSERT(1 == 0) +#else + #ifdef __GNUC__ + #define MD_ASSERT(cond) do { if(!(cond)) __builtin_unreachable(); } while(0) + #define MD_UNREACHABLE() do { __builtin_unreachable(); } while(0) + #elif defined _MSC_VER && _MSC_VER > 120 + #define MD_ASSERT(cond) do { __assume(cond); } while(0) + #define MD_UNREACHABLE() do { __assume(0); } while(0) + #else + #define MD_ASSERT(cond) do {} while(0) + #define MD_UNREACHABLE() do {} while(0) + #endif +#endif + +/* For falling through case labels in switch statements. */ +#if defined __clang__ && __clang_major__ >= 12 + #define MD_FALLTHROUGH() __attribute__((fallthrough)) +#elif defined __GNUC__ && __GNUC__ >= 7 + #define MD_FALLTHROUGH() __attribute__((fallthrough)) +#else + #define MD_FALLTHROUGH() ((void)0) +#endif + +/* Suppress "unused parameter" warnings. */ +#define MD_UNUSED(x) ((void)x) + /************************ *** Internal Types *** @@ -230,41 +272,6 @@ struct MD_VERBATIMLINE_tag { }; -/******************* - *** Debugging *** - *******************/ - -#define MD_LOG(msg) \ - do { \ - if(ctx->parser.debug_log != NULL) \ - ctx->parser.debug_log((msg), ctx->userdata); \ - } while(0) - -#ifdef DEBUG - #define MD_ASSERT(cond) \ - do { \ - if(!(cond)) { \ - MD_LOG(__FILE__ ":" STRINGIZE(__LINE__) ": " \ - "Assertion '" STRINGIZE(cond) "' failed."); \ - exit(1); \ - } \ - } while(0) - - #define MD_UNREACHABLE() MD_ASSERT(1 == 0) -#else - #ifdef __GNUC__ - #define MD_ASSERT(cond) do { if(!(cond)) __builtin_unreachable(); } while(0) - #define MD_UNREACHABLE() do { __builtin_unreachable(); } while(0) - #elif defined _MSC_VER && _MSC_VER > 120 - #define MD_ASSERT(cond) do { __assume(cond); } while(0) - #define MD_UNREACHABLE() do { __assume(0); } while(0) - #else - #define MD_ASSERT(cond) do {} while(0) - #define MD_UNREACHABLE() do {} while(0) - #endif -#endif - - /***************** *** Helpers *** *****************/ @@ -466,7 +473,7 @@ md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const CHAR* str, SZ typedef struct MD_UNICODE_FOLD_INFO_tag MD_UNICODE_FOLD_INFO; struct MD_UNICODE_FOLD_INFO_tag { unsigned codepoints[3]; - int n_codepoints; + unsigned n_codepoints; }; @@ -670,7 +677,7 @@ struct MD_UNICODE_FOLD_INFO_tag { const unsigned* map; const unsigned* data; size_t map_size; - int n_codepoints; + unsigned n_codepoints; } FOLD_MAP_LIST[] = { { FOLD_MAP_1, FOLD_MAP_1_DATA, SIZEOF_ARRAY(FOLD_MAP_1), 1 }, { FOLD_MAP_2, FOLD_MAP_2_DATA, SIZEOF_ARRAY(FOLD_MAP_2), 2 }, @@ -695,7 +702,7 @@ struct MD_UNICODE_FOLD_INFO_tag { index = md_unicode_bsearch__(codepoint, FOLD_MAP_LIST[i].map, FOLD_MAP_LIST[i].map_size); if(index >= 0) { /* Found the mapping. */ - int n_codepoints = FOLD_MAP_LIST[i].n_codepoints; + unsigned n_codepoints = FOLD_MAP_LIST[i].n_codepoints; const unsigned* map = FOLD_MAP_LIST[i].map; const unsigned* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints); @@ -894,6 +901,8 @@ md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines, int line_index = 0; OFF off = beg; + MD_UNUSED(n_lines); + while(1) { const MD_LINE* line = &lines[line_index]; OFF line_end = line->end; @@ -1235,6 +1244,7 @@ static int md_is_hex_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end) { OFF off = beg; + MD_UNUSED(ctx); while(off < max_end && ISXDIGIT_(text[off]) && off - beg <= 8) off++; @@ -1251,6 +1261,7 @@ static int md_is_dec_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end) { OFF off = beg; + MD_UNUSED(ctx); while(off < max_end && ISDIGIT_(text[off]) && off - beg <= 8) off++; @@ -1267,6 +1278,7 @@ static int md_is_named_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end) { OFF off = beg; + MD_UNUSED(ctx); if(off < max_end && ISALPHA_(text[off])) off++; @@ -1372,6 +1384,8 @@ md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build, static void md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build) { + MD_UNUSED(ctx); + if(build->substr_alloc > 0) { free(build->text); free(build->substr_types); @@ -1547,12 +1561,6 @@ md_link_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size, goto whitespace; } - if(ISNEWLINE_(label[off])) { - /* Treat new lines as a whitespace. */ - off++; - goto whitespace; - } - codepoint = md_decode_unicode(label, off, size, &char_size); off += char_size; if(ISUNICODEWHITESPACE_(codepoint)) { @@ -1575,8 +1583,6 @@ md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size { OFF a_off; OFF b_off; - int a_reached_end = FALSE; - int b_reached_end = FALSE; MD_UNICODE_FOLD_INFO a_fi = { { 0 }, 0 }; MD_UNICODE_FOLD_INFO b_fi = { { 0 }, 0 }; OFF a_fi_off = 0; @@ -1585,17 +1591,17 @@ md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size a_off = md_skip_unicode_whitespace(a_label, 0, a_size); b_off = md_skip_unicode_whitespace(b_label, 0, b_size); - while(!a_reached_end || !b_reached_end) { + while(a_off < a_size || a_fi_off < a_fi.n_codepoints || + b_off < b_size || b_fi_off < b_fi.n_codepoints) + { /* If needed, load fold info for next char. */ if(a_fi_off >= a_fi.n_codepoints) { a_fi_off = 0; a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi); - a_reached_end = (a_off >= a_size); } if(b_fi_off >= b_fi.n_codepoints) { b_fi_off = 0; b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi); - b_reached_end = (b_off >= b_size); } cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off]; @@ -2671,7 +2677,7 @@ md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how) mark_index = mark->prev; break; } - /* Pass through. */ + MD_FALLTHROUGH(); default: mark_index--; break; @@ -3227,7 +3233,7 @@ md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode) /* Push a dummy as a reserve for a closer. */ PUSH_MARK('D', off, off, 0); off += 1 + suffix_size; - continue; + break; } } @@ -3524,6 +3530,10 @@ md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines) if(is_link) { /* Eat the 2nd "[...]". */ closer->end = next_closer->end; + + /* Do not analyze the label as a standalone link in the next + * iteration. */ + next_index = ctx->marks[next_index].prev; } } else { if(closer->end < ctx->size && CH(closer->end) == _T('(')) { @@ -3679,7 +3689,7 @@ md_analyze_emph(MD_CTX* ctx, int mark_index) /* If we can be a closer, try to resolve with the preceding opener. */ if(mark->flags & MD_MARK_POTENTIAL_CLOSER) { MD_MARK* opener = NULL; - int opener_index; + int opener_index = 0; if(mark->ch == _T('*')) { MD_MARKCHAIN* opener_chains[6]; @@ -3806,6 +3816,7 @@ md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index) int has_underscore_in_last_seg = FALSE; int has_underscore_in_next_to_last_seg = FALSE; int n_opened_parenthesis = 0; + int n_excess_parenthesis = 0; /* Check for domain. */ while(off < ctx->size) { @@ -3844,17 +3855,28 @@ md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index) if(n_opened_parenthesis > 0) n_opened_parenthesis--; else - break; + n_excess_parenthesis++; } off++; } - /* These cannot be last char In such case they are more likely normal - * punctuation. */ - if(ISANYOF(off-1, _T("?!.,:*_~"))) - off--; - /* Ok. Lets call it auto-link. Adapt opener and create closer to zero + /* Trim a trailing punctuation from the end. */ + while(TRUE) { + if(ISANYOF(off-1, _T("?!.,:*_~"))) { + off--; + } else if(CH(off-1) == ')' && n_excess_parenthesis > 0) { + /* Unmatched ')' can be in an interior of the path but not at the + * of it, so the auto-link may be safely nested in a parenthesis + * pair. */ + off--; + n_excess_parenthesis--; + } else { + break; + } + } + + /* Ok. Lets call it an auto-link. Adapt opener and create closer to zero * length so all the contents becomes the link text. */ MD_ASSERT(closer->ch == 'D'); opener->end = opener->beg; @@ -3918,6 +3940,8 @@ md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int mark_beg, int mark_end, const CHAR* mark_chars) { int i = mark_beg; + MD_UNUSED(lines); + MD_UNUSED(n_lines); while(i < mark_end) { MD_MARK* mark = &ctx->marks[i]; @@ -4136,7 +4160,7 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines) } break; } - /* Fall though. */ + MD_FALLTHROUGH(); case '*': /* Emphasis, strong emphasis. */ if(mark->flags & MD_MARK_OPENER) { @@ -4235,6 +4259,7 @@ md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines) break; } /* Pass through, if auto-link. */ + MD_FALLTHROUGH(); case '@': /* Permissive e-mail autolink. */ case ':': /* Permissive URL autolink. */ @@ -4491,12 +4516,14 @@ md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines lines[0].beg, lines[0].end, align, col_count)); MD_LEAVE_BLOCK(MD_BLOCK_THEAD, NULL); - MD_ENTER_BLOCK(MD_BLOCK_TBODY, NULL); - for(i = 2; i < n_lines; i++) { - MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TD, - lines[i].beg, lines[i].end, align, col_count)); + if(n_lines > 2) { + MD_ENTER_BLOCK(MD_BLOCK_TBODY, NULL); + for(i = 2; i < n_lines; i++) { + MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TD, + lines[i].beg, lines[i].end, align, col_count)); + } + MD_LEAVE_BLOCK(MD_BLOCK_TBODY, NULL); } - MD_LEAVE_BLOCK(MD_BLOCK_TBODY, NULL); abort: free(align); @@ -4665,6 +4692,7 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block) union { MD_BLOCK_H_DETAIL header; MD_BLOCK_CODE_DETAIL code; + MD_BLOCK_TABLE_DETAIL table; } det; MD_ATTRIBUTE_BUILD info_build; MD_ATTRIBUTE_BUILD lang_build; @@ -4693,6 +4721,12 @@ md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block) } break; + case MD_BLOCK_TABLE: + det.table.col_count = block->data; + det.table.head_row_count = 1; + det.table.body_row_count = block->n_lines - 2; + break; + default: /* Noop. */ break; @@ -5504,7 +5538,7 @@ md_enter_child_containers(MD_CTX* ctx, int n_children, unsigned data) case _T(')'): case _T('.'): is_ordered_list = TRUE; - /* Pass through */ + MD_FALLTHROUGH(); case _T('-'): case _T('+'): @@ -5550,7 +5584,7 @@ md_leave_child_containers(MD_CTX* ctx, int n_keep) case _T(')'): case _T('.'): is_ordered_list = TRUE; - /* Pass through */ + MD_FALLTHROUGH(); case _T('-'): case _T('+'): @@ -5586,11 +5620,11 @@ md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTA OFF off = beg; OFF max_end; - if(indent >= ctx->code_indent_offset) + if(off >= ctx->size || indent >= ctx->code_indent_offset) return FALSE; /* Check for block quote mark. */ - if(off < ctx->size && CH(off) == _T('>')) { + if(CH(off) == _T('>')) { off++; p_container->ch = _T('>'); p_container->is_loose = FALSE; @@ -5602,13 +5636,13 @@ md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTA } /* Check for list item bullet mark. */ - if(off+1 < ctx->size && ISANYOF(off, _T("-+*")) && (ISBLANK(off+1) || ISNEWLINE(off+1))) { + if(ISANYOF(off, _T("-+*")) && (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1))) { p_container->ch = CH(off); p_container->is_loose = FALSE; p_container->is_task = FALSE; p_container->mark_indent = indent; p_container->contents_indent = indent + 1; - *p_end = off + 1; + *p_end = off+1; return TRUE; } @@ -5621,16 +5655,16 @@ md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTA p_container->start = p_container->start * 10 + CH(off) - _T('0'); off++; } - if(off > beg && off+1 < ctx->size && + if(off > beg && (CH(off) == _T('.') || CH(off) == _T(')')) && - (ISBLANK(off+1) || ISNEWLINE(off+1))) + (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1))) { p_container->ch = CH(off); p_container->is_loose = FALSE; p_container->is_task = FALSE; p_container->mark_indent = indent; p_container->contents_indent = indent + off - beg + 1; - *p_end = off + 1; + *p_end = off+1; return TRUE; } @@ -5655,7 +5689,7 @@ md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end) return indent - total_indent; } -static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0 }; +static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0, 0, 0, 0 }; /* Analyze type of the line and find some its properties. This serves as a * main input for determining type and boundaries of a block. */ @@ -5786,12 +5820,14 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, #if 1 /* See https://github.com/mity/md4c/issues/6 * - * This ugly checking tests we are in (yet empty) list item but not - * its very first line (with the list item mark). + * This ugly checking tests we are in (yet empty) list item but + * not its very first line (i.e. not the line with the list + * item mark). * - * If we are such blank line, then any following non-blank line - * which would be part of this list item actually ends the list - * because "a list item can begin with at most one blank line." + * If we are such a blank line, then any following non-blank + * line which would be part of the list item actually has to + * end the list because according to the specification, "a list + * item can begin with at most one blank line." */ if(n_parents > 0 && ctx->containers[n_parents-1].ch != _T('>') && n_brothers + n_children == 0 && ctx->current_block == NULL && @@ -5806,9 +5842,10 @@ md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end, break; } else { #if 1 - /* This is 2nd half of the hack. If the flag is set (that is there - * were 2nd blank line at the start of the list item) and we would also - * belonging to such list item, than interrupt the list. */ + /* This is the 2nd half of the hack. If the flag is set (i.e. there + * was a 2nd blank line at the beginning of the list item) and if + * we would otherwise still belong to the list item, we enforce + * the end of the list. */ ctx->last_line_has_list_loosening_effect = FALSE; if(ctx->last_list_item_starts_with_two_blank_lines) { if(n_parents > 0 && ctx->containers[n_parents-1].ch != _T('>') && diff --git a/src/3rdparty/md4c/md4c.h b/src/3rdparty/md4c/md4c.h index 8bba71242d..95f78f9b9b 100644 --- a/src/3rdparty/md4c/md4c.h +++ b/src/3rdparty/md4c/md4c.h @@ -91,7 +91,8 @@ typedef enum MD_BLOCKTYPE { MD_BLOCK_P, /*