QtBase  v6.3.1
md4c.c
Go to the documentation of this file.
1 /*
2  * MD4C: Markdown parser for C
3  * (http://github.com/mity/md4c)
4  *
5  * Copyright (c) 2016-2020 Martin Mitas
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11  * and/or sell copies of the Software, and to permit persons to whom the
12  * Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23  * IN THE SOFTWARE.
24  */
25 
26 #include "md4c.h"
27 
28 #include <limits.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 
33 
34 /*****************************
35  *** Miscellaneous Stuff ***
36  *****************************/
37 
38 #if !defined(__STDC_VERSION__) || __STDC_VERSION__ < 199409L
39  /* C89/90 or old compilers in general may not understand "inline". */
40  #if defined __GNUC__
41  #define inline __inline__
42  #elif defined _MSC_VER
43  #define inline __inline
44  #else
45  #define inline
46  #endif
47 #endif
48 
49 /* Make the UTF-8 support the default. */
50 #if !defined MD4C_USE_ASCII && !defined MD4C_USE_UTF8 && !defined MD4C_USE_UTF16
51  #define MD4C_USE_UTF8
52 #endif
53 
54 /* Magic for making wide literals with MD4C_USE_UTF16. */
55 #ifdef _T
56  #undef _T
57 #endif
58 #if defined MD4C_USE_UTF16
59  #define _T(x) L##x
60 #else
61  #define _T(x) x
62 #endif
63 
64 /* Misc. macros. */
65 #define SIZEOF_ARRAY(a) (sizeof(a) / sizeof(a[0]))
66 
67 #define STRINGIZE_(x) #x
68 #define STRINGIZE(x) STRINGIZE_(x)
69 
70 #ifndef TRUE
71  #define TRUE 1
72  #define FALSE 0
73 #endif
74 
75 #define MD_LOG(msg) \
76  do { \
77  if(ctx->parser.debug_log != NULL) \
78  ctx->parser.debug_log((msg), ctx->userdata); \
79  } while(0)
80 
81 #ifdef DEBUG
82  #define MD_ASSERT(cond) \
83  do { \
84  if(!(cond)) { \
85  MD_LOG(__FILE__ ":" STRINGIZE(__LINE__) ": " \
86  "Assertion '" STRINGIZE(cond) "' failed."); \
87  exit(1); \
88  } \
89  } while(0)
90 
91  #define MD_UNREACHABLE() MD_ASSERT(1 == 0)
92 #else
93  #ifdef __GNUC__
94  #define MD_ASSERT(cond) do { if(!(cond)) __builtin_unreachable(); } while(0)
95  #define MD_UNREACHABLE() do { __builtin_unreachable(); } while(0)
96  #elif defined _MSC_VER && _MSC_VER > 120
97  #define MD_ASSERT(cond) do { __assume(cond); } while(0)
98  #define MD_UNREACHABLE() do { __assume(0); } while(0)
99  #else
100  #define MD_ASSERT(cond) do {} while(0)
101  #define MD_UNREACHABLE() do {} while(0)
102  #endif
103 #endif
104 
105 /* For falling through case labels in switch statements. */
106 #if defined __clang__ && __clang_major__ >= 12
107  #define MD_FALLTHROUGH() __attribute__((fallthrough))
108 #elif defined __GNUC__ && __GNUC__ >= 7
109  #define MD_FALLTHROUGH() __attribute__((fallthrough))
110 #else
111  #define MD_FALLTHROUGH() ((void)0)
112 #endif
113 
114 /* Suppress "unused parameter" warnings. */
115 #define MD_UNUSED(x) ((void)x)
116 
117 
118 /************************
119  *** Internal Types ***
120  ************************/
121 
122 /* These are omnipresent so lets save some typing. */
123 #define CHAR MD_CHAR
124 #define SZ MD_SIZE
125 #define OFF MD_OFFSET
126 
127 typedef struct MD_MARK_tag MD_MARK;
128 typedef struct MD_BLOCK_tag MD_BLOCK;
129 typedef struct MD_CONTAINER_tag MD_CONTAINER;
130 typedef struct MD_REF_DEF_tag MD_REF_DEF;
131 
132 
133 /* During analyzes of inline marks, we need to manage some "mark chains",
134  * of (yet unresolved) openers. This structure holds start/end of the chain.
135  * The chain internals are then realized through MD_MARK::prev and ::next.
136  */
137 typedef struct MD_MARKCHAIN_tag MD_MARKCHAIN;
139  int head; /* Index of first mark in the chain, or -1 if empty. */
140  int tail; /* Index of last mark in the chain, or -1 if empty. */
141 };
142 
143 /* Context propagated through all the parsing. */
144 typedef struct MD_CTX_tag MD_CTX;
145 struct MD_CTX_tag {
146  /* Immutable stuff (parameters of md_parse()). */
147  const CHAR* text;
150  void* userdata;
151 
152  /* When this is true, it allows some optimizations. */
154 
155  /* Helper temporary growing buffer. */
157  unsigned alloc_buffer;
158 
159  /* Reference definitions. */
165 
166  /* Stack of inline/span markers.
167  * This is only used for parsing a single block contents but by storing it
168  * here we may reuse the stack for subsequent blocks; i.e. we have fewer
169  * (re)allocations. */
171  int n_marks;
173 
174 #if defined MD4C_USE_UTF16
175  char mark_char_map[128];
176 #else
177  char mark_char_map[256];
178 #endif
179 
180  /* For resolving of inline spans. */
182 #define PTR_CHAIN (ctx->mark_chains[0])
183 #define TABLECELLBOUNDARIES (ctx->mark_chains[1])
184 #define ASTERISK_OPENERS_extraword_mod3_0 (ctx->mark_chains[2])
185 #define ASTERISK_OPENERS_extraword_mod3_1 (ctx->mark_chains[3])
186 #define ASTERISK_OPENERS_extraword_mod3_2 (ctx->mark_chains[4])
187 #define ASTERISK_OPENERS_intraword_mod3_0 (ctx->mark_chains[5])
188 #define ASTERISK_OPENERS_intraword_mod3_1 (ctx->mark_chains[6])
189 #define ASTERISK_OPENERS_intraword_mod3_2 (ctx->mark_chains[7])
190 #define UNDERSCORE_OPENERS (ctx->mark_chains[8])
191 #define TILDE_OPENERS_1 (ctx->mark_chains[9])
192 #define TILDE_OPENERS_2 (ctx->mark_chains[10])
193 #define BRACKET_OPENERS (ctx->mark_chains[11])
194 #define DOLLAR_OPENERS (ctx->mark_chains[12])
195 #define OPENERS_CHAIN_FIRST 2
196 #define OPENERS_CHAIN_LAST 12
197 
199 
200  /* For resolving links. */
203 
204  /* For resolving raw HTML. */
209 
210  /* For block analysis.
211  * Notes:
212  * -- It holds MD_BLOCK as well as MD_LINE structures. After each
213  * MD_BLOCK, its (multiple) MD_LINE(s) follow.
214  * -- For MD_BLOCK_HTML and MD_BLOCK_CODE, MD_VERBATIMLINE(s) are used
215  * instead of MD_LINE(s).
216  */
217  void* block_bytes;
221 
222  /* For container block analysis. */
226 
227  /* Minimal indentation to call the block "indented code block". */
229 
230  /* Contextual info for line analysis. */
231  SZ code_fence_length; /* For checking closing fence length. */
232  int html_block_type; /* For checking closing raw HTML condition. */
235 };
236 
249 };
250 typedef enum MD_LINETYPE_tag MD_LINETYPE;
251 
255  unsigned data : 16;
258  unsigned indent; /* Indentation level. */
259 };
260 
261 typedef struct MD_LINE_tag MD_LINE;
262 struct MD_LINE_tag {
265 };
266 
267 typedef struct MD_VERBATIMLINE_tag MD_VERBATIMLINE;
272 };
273 
274 
275 /*****************
276  *** Helpers ***
277  *****************/
278 
279 /* Character accessors. */
280 #define CH(off) (ctx->text[(off)])
281 #define STR(off) (ctx->text + (off))
282 
283 /* Character classification.
284  * Note we assume ASCII compatibility of code points < 128 here. */
285 #define ISIN_(ch, ch_min, ch_max) ((ch_min) <= (unsigned)(ch) && (unsigned)(ch) <= (ch_max))
286 #define ISANYOF_(ch, palette) ((ch) != _T('\0') && md_strchr((palette), (ch)) != NULL)
287 #define ISANYOF2_(ch, ch1, ch2) ((ch) == (ch1) || (ch) == (ch2))
288 #define ISANYOF3_(ch, ch1, ch2, ch3) ((ch) == (ch1) || (ch) == (ch2) || (ch) == (ch3))
289 #define ISASCII_(ch) ((unsigned)(ch) <= 127)
290 #define ISBLANK_(ch) (ISANYOF2_((ch), _T(' '), _T('\t')))
291 #define ISNEWLINE_(ch) (ISANYOF2_((ch), _T('\r'), _T('\n')))
292 #define ISWHITESPACE_(ch) (ISBLANK_(ch) || ISANYOF2_((ch), _T('\v'), _T('\f')))
293 #define ISCNTRL_(ch) ((unsigned)(ch) <= 31 || (unsigned)(ch) == 127)
294 #define ISPUNCT_(ch) (ISIN_(ch, 33, 47) || ISIN_(ch, 58, 64) || ISIN_(ch, 91, 96) || ISIN_(ch, 123, 126))
295 #define ISUPPER_(ch) (ISIN_(ch, _T('A'), _T('Z')))
296 #define ISLOWER_(ch) (ISIN_(ch, _T('a'), _T('z')))
297 #define ISALPHA_(ch) (ISUPPER_(ch) || ISLOWER_(ch))
298 #define ISDIGIT_(ch) (ISIN_(ch, _T('0'), _T('9')))
299 #define ISXDIGIT_(ch) (ISDIGIT_(ch) || ISIN_(ch, _T('A'), _T('F')) || ISIN_(ch, _T('a'), _T('f')))
300 #define ISALNUM_(ch) (ISALPHA_(ch) || ISDIGIT_(ch))
301 
302 #define ISANYOF(off, palette) ISANYOF_(CH(off), (palette))
303 #define ISANYOF2(off, ch1, ch2) ISANYOF2_(CH(off), (ch1), (ch2))
304 #define ISANYOF3(off, ch1, ch2, ch3) ISANYOF3_(CH(off), (ch1), (ch2), (ch3))
305 #define ISASCII(off) ISASCII_(CH(off))
306 #define ISBLANK(off) ISBLANK_(CH(off))
307 #define ISNEWLINE(off) ISNEWLINE_(CH(off))
308 #define ISWHITESPACE(off) ISWHITESPACE_(CH(off))
309 #define ISCNTRL(off) ISCNTRL_(CH(off))
310 #define ISPUNCT(off) ISPUNCT_(CH(off))
311 #define ISUPPER(off) ISUPPER_(CH(off))
312 #define ISLOWER(off) ISLOWER_(CH(off))
313 #define ISALPHA(off) ISALPHA_(CH(off))
314 #define ISDIGIT(off) ISDIGIT_(CH(off))
315 #define ISXDIGIT(off) ISXDIGIT_(CH(off))
316 #define ISALNUM(off) ISALNUM_(CH(off))
317 
318 
319 #if defined MD4C_USE_UTF16
320  #define md_strchr wcschr
321 #else
322  #define md_strchr strchr
323 #endif
324 
325 
326 /* Case insensitive check of string equality. */
327 static inline int
328 md_ascii_case_eq(const CHAR* s1, const CHAR* s2, SZ n)
329 {
330  OFF i;
331  for(i = 0; i < n; i++) {
332  CHAR ch1 = s1[i];
333  CHAR ch2 = s2[i];
334 
335  if(ISLOWER_(ch1))
336  ch1 += ('A'-'a');
337  if(ISLOWER_(ch2))
338  ch2 += ('A'-'a');
339  if(ch1 != ch2)
340  return FALSE;
341  }
342  return TRUE;
343 }
344 
345 static inline int
346 md_ascii_eq(const CHAR* s1, const CHAR* s2, SZ n)
347 {
348  return memcmp(s1, s2, n * sizeof(CHAR)) == 0;
349 }
350 
351 static int
352 md_text_with_null_replacement(MD_CTX* ctx, MD_TEXTTYPE type, const CHAR* str, SZ size)
353 {
354  OFF off = 0;
355  int ret = 0;
356 
357  while(1) {
358  while(off < size && str[off] != _T('\0'))
359  off++;
360 
361  if(off > 0) {
362  ret = ctx->parser.text(type, str, off, ctx->userdata);
363  if(ret != 0)
364  return ret;
365 
366  str += off;
367  size -= off;
368  off = 0;
369  }
370 
371  if(off >= size)
372  return 0;
373 
374  ret = ctx->parser.text(MD_TEXT_NULLCHAR, _T(""), 1, ctx->userdata);
375  if(ret != 0)
376  return ret;
377  off++;
378  }
379 }
380 
381 
382 #define MD_CHECK(func) \
383  do { \
384  ret = (func); \
385  if(ret < 0) \
386  goto abort; \
387  } while(0)
388 
389 
390 #define MD_TEMP_BUFFER(sz) \
391  do { \
392  if(sz > ctx->alloc_buffer) { \
393  CHAR* new_buffer; \
394  SZ new_size = ((sz) + (sz) / 2 + 128) & ~127; \
395  \
396  new_buffer = realloc(ctx->buffer, new_size); \
397  if(new_buffer == NULL) { \
398  MD_LOG("realloc() failed."); \
399  ret = -1; \
400  goto abort; \
401  } \
402  \
403  ctx->buffer = new_buffer; \
404  ctx->alloc_buffer = new_size; \
405  } \
406  } while(0)
407 
408 
409 #define MD_ENTER_BLOCK(type, arg) \
410  do { \
411  ret = ctx->parser.enter_block((type), (arg), ctx->userdata); \
412  if(ret != 0) { \
413  MD_LOG("Aborted from enter_block() callback."); \
414  goto abort; \
415  } \
416  } while(0)
417 
418 #define MD_LEAVE_BLOCK(type, arg) \
419  do { \
420  ret = ctx->parser.leave_block((type), (arg), ctx->userdata); \
421  if(ret != 0) { \
422  MD_LOG("Aborted from leave_block() callback."); \
423  goto abort; \
424  } \
425  } while(0)
426 
427 #define MD_ENTER_SPAN(type, arg) \
428  do { \
429  ret = ctx->parser.enter_span((type), (arg), ctx->userdata); \
430  if(ret != 0) { \
431  MD_LOG("Aborted from enter_span() callback."); \
432  goto abort; \
433  } \
434  } while(0)
435 
436 #define MD_LEAVE_SPAN(type, arg) \
437  do { \
438  ret = ctx->parser.leave_span((type), (arg), ctx->userdata); \
439  if(ret != 0) { \
440  MD_LOG("Aborted from leave_span() callback."); \
441  goto abort; \
442  } \
443  } while(0)
444 
445 #define MD_TEXT(type, str, size) \
446  do { \
447  if(size > 0) { \
448  ret = ctx->parser.text((type), (str), (size), ctx->userdata); \
449  if(ret != 0) { \
450  MD_LOG("Aborted from text() callback."); \
451  goto abort; \
452  } \
453  } \
454  } while(0)
455 
456 #define MD_TEXT_INSECURE(type, str, size) \
457  do { \
458  if(size > 0) { \
459  ret = md_text_with_null_replacement(ctx, type, str, size); \
460  if(ret != 0) { \
461  MD_LOG("Aborted from text() callback."); \
462  goto abort; \
463  } \
464  } \
465  } while(0)
466 
467 
468 
469 /*************************
470  *** Unicode Support ***
471  *************************/
472 
475  unsigned codepoints[3];
476  unsigned n_codepoints;
477 };
478 
479 
480 #if defined MD4C_USE_UTF16 || defined MD4C_USE_UTF8
481  /* Binary search over sorted "map" of codepoints. Consecutive sequences
482  * of codepoints may be encoded in the map by just using the
483  * (MIN_CODEPOINT | 0x40000000) and (MAX_CODEPOINT | 0x80000000).
484  *
485  * Returns index of the found record in the map (in the case of ranges,
486  * the minimal value is used); or -1 on failure. */
487  static int
488  md_unicode_bsearch__(unsigned codepoint, const unsigned* map, size_t map_size)
489  {
490  int beg, end;
491  int pivot_beg, pivot_end;
492 
493  beg = 0;
494  end = (int) map_size-1;
495  while(beg <= end) {
496  /* Pivot may be a range, not just a single value. */
497  pivot_beg = pivot_end = (beg + end) / 2;
498  if(map[pivot_end] & 0x40000000)
499  pivot_end++;
500  if(map[pivot_beg] & 0x80000000)
501  pivot_beg--;
502 
503  if(codepoint < (map[pivot_beg] & 0x00ffffff))
504  end = pivot_beg - 1;
505  else if(codepoint > (map[pivot_end] & 0x00ffffff))
506  beg = pivot_end + 1;
507  else
508  return pivot_beg;
509  }
510 
511  return -1;
512  }
513 
514  static int
515  md_is_unicode_whitespace__(unsigned codepoint)
516  {
517 #define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
518 #define S(cp) (cp)
519  /* Unicode "Zs" category.
520  * (generated by scripts/build_whitespace_map.py) */
521  static const unsigned WHITESPACE_MAP[] = {
522  S(0x0020), S(0x00a0), S(0x1680), R(0x2000,0x200a), S(0x202f), S(0x205f), S(0x3000)
523  };
524 #undef R
525 #undef S
526 
527  /* The ASCII ones are the most frequently used ones, also CommonMark
528  * specification requests few more in this range. */
529  if(codepoint <= 0x7f)
530  return ISWHITESPACE_(codepoint);
531 
532  return (md_unicode_bsearch__(codepoint, WHITESPACE_MAP, SIZEOF_ARRAY(WHITESPACE_MAP)) >= 0);
533  }
534 
535  static int
536  md_is_unicode_punct__(unsigned codepoint)
537  {
538 #define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
539 #define S(cp) (cp)
540  /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.
541  * (generated by scripts/build_punct_map.py) */
542  static const unsigned PUNCT_MAP[] = {
543  R(0x0021,0x0023), R(0x0025,0x002a), R(0x002c,0x002f), R(0x003a,0x003b), R(0x003f,0x0040),
544  R(0x005b,0x005d), S(0x005f), S(0x007b), S(0x007d), S(0x00a1), S(0x00a7), S(0x00ab), R(0x00b6,0x00b7),
545  S(0x00bb), S(0x00bf), S(0x037e), S(0x0387), R(0x055a,0x055f), R(0x0589,0x058a), S(0x05be), S(0x05c0),
546  S(0x05c3), S(0x05c6), R(0x05f3,0x05f4), R(0x0609,0x060a), R(0x060c,0x060d), S(0x061b), R(0x061e,0x061f),
547  R(0x066a,0x066d), S(0x06d4), R(0x0700,0x070d), R(0x07f7,0x07f9), R(0x0830,0x083e), S(0x085e),
548  R(0x0964,0x0965), S(0x0970), S(0x09fd), S(0x0a76), S(0x0af0), S(0x0c77), S(0x0c84), S(0x0df4), S(0x0e4f),
549  R(0x0e5a,0x0e5b), R(0x0f04,0x0f12), S(0x0f14), R(0x0f3a,0x0f3d), S(0x0f85), R(0x0fd0,0x0fd4),
550  R(0x0fd9,0x0fda), R(0x104a,0x104f), S(0x10fb), R(0x1360,0x1368), S(0x1400), S(0x166e), R(0x169b,0x169c),
551  R(0x16eb,0x16ed), R(0x1735,0x1736), R(0x17d4,0x17d6), R(0x17d8,0x17da), R(0x1800,0x180a),
552  R(0x1944,0x1945), R(0x1a1e,0x1a1f), R(0x1aa0,0x1aa6), R(0x1aa8,0x1aad), R(0x1b5a,0x1b60),
553  R(0x1bfc,0x1bff), R(0x1c3b,0x1c3f), R(0x1c7e,0x1c7f), R(0x1cc0,0x1cc7), S(0x1cd3), R(0x2010,0x2027),
554  R(0x2030,0x2043), R(0x2045,0x2051), R(0x2053,0x205e), R(0x207d,0x207e), R(0x208d,0x208e),
555  R(0x2308,0x230b), R(0x2329,0x232a), R(0x2768,0x2775), R(0x27c5,0x27c6), R(0x27e6,0x27ef),
556  R(0x2983,0x2998), R(0x29d8,0x29db), R(0x29fc,0x29fd), R(0x2cf9,0x2cfc), R(0x2cfe,0x2cff), S(0x2d70),
557  R(0x2e00,0x2e2e), R(0x2e30,0x2e4f), S(0x2e52), R(0x3001,0x3003), R(0x3008,0x3011), R(0x3014,0x301f),
558  S(0x3030), S(0x303d), S(0x30a0), S(0x30fb), R(0xa4fe,0xa4ff), R(0xa60d,0xa60f), S(0xa673), S(0xa67e),
559  R(0xa6f2,0xa6f7), R(0xa874,0xa877), R(0xa8ce,0xa8cf), R(0xa8f8,0xa8fa), S(0xa8fc), R(0xa92e,0xa92f),
560  S(0xa95f), R(0xa9c1,0xa9cd), R(0xa9de,0xa9df), R(0xaa5c,0xaa5f), R(0xaade,0xaadf), R(0xaaf0,0xaaf1),
561  S(0xabeb), R(0xfd3e,0xfd3f), R(0xfe10,0xfe19), R(0xfe30,0xfe52), R(0xfe54,0xfe61), S(0xfe63), S(0xfe68),
562  R(0xfe6a,0xfe6b), R(0xff01,0xff03), R(0xff05,0xff0a), R(0xff0c,0xff0f), R(0xff1a,0xff1b),
563  R(0xff1f,0xff20), R(0xff3b,0xff3d), S(0xff3f), S(0xff5b), S(0xff5d), R(0xff5f,0xff65), R(0x10100,0x10102),
564  S(0x1039f), S(0x103d0), S(0x1056f), S(0x10857), S(0x1091f), S(0x1093f), R(0x10a50,0x10a58), S(0x10a7f),
565  R(0x10af0,0x10af6), R(0x10b39,0x10b3f), R(0x10b99,0x10b9c), S(0x10ead), R(0x10f55,0x10f59),
566  R(0x11047,0x1104d), R(0x110bb,0x110bc), R(0x110be,0x110c1), R(0x11140,0x11143), R(0x11174,0x11175),
567  R(0x111c5,0x111c8), S(0x111cd), S(0x111db), R(0x111dd,0x111df), R(0x11238,0x1123d), S(0x112a9),
568  R(0x1144b,0x1144f), R(0x1145a,0x1145b), S(0x1145d), S(0x114c6), R(0x115c1,0x115d7), R(0x11641,0x11643),
569  R(0x11660,0x1166c), R(0x1173c,0x1173e), S(0x1183b), R(0x11944,0x11946), S(0x119e2), R(0x11a3f,0x11a46),
570  R(0x11a9a,0x11a9c), R(0x11a9e,0x11aa2), R(0x11c41,0x11c45), R(0x11c70,0x11c71), R(0x11ef7,0x11ef8),
571  S(0x11fff), R(0x12470,0x12474), R(0x16a6e,0x16a6f), S(0x16af5), R(0x16b37,0x16b3b), S(0x16b44),
572  R(0x16e97,0x16e9a), S(0x16fe2), S(0x1bc9f), R(0x1da87,0x1da8b), R(0x1e95e,0x1e95f)
573  };
574 #undef R
575 #undef S
576 
577  /* The ASCII ones are the most frequently used ones, also CommonMark
578  * specification requests few more in this range. */
579  if(codepoint <= 0x7f)
580  return ISPUNCT_(codepoint);
581 
582  return (md_unicode_bsearch__(codepoint, PUNCT_MAP, SIZEOF_ARRAY(PUNCT_MAP)) >= 0);
583  }
584 
585  static void
586  md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
587  {
588 #define R(cp_min, cp_max) ((cp_min) | 0x40000000), ((cp_max) | 0x80000000)
589 #define S(cp) (cp)
590  /* Unicode "Pc", "Pd", "Pe", "Pf", "Pi", "Po", "Ps" categories.
591  * (generated by scripts/build_folding_map.py) */
592  static const unsigned FOLD_MAP_1[] = {
593  R(0x0041,0x005a), S(0x00b5), R(0x00c0,0x00d6), R(0x00d8,0x00de), R(0x0100,0x012e), R(0x0132,0x0136),
594  R(0x0139,0x0147), R(0x014a,0x0176), S(0x0178), R(0x0179,0x017d), S(0x017f), S(0x0181), S(0x0182),
595  S(0x0184), S(0x0186), S(0x0187), S(0x0189), S(0x018a), S(0x018b), S(0x018e), S(0x018f), S(0x0190),
596  S(0x0191), S(0x0193), S(0x0194), S(0x0196), S(0x0197), S(0x0198), S(0x019c), S(0x019d), S(0x019f),
597  R(0x01a0,0x01a4), S(0x01a6), S(0x01a7), S(0x01a9), S(0x01ac), S(0x01ae), S(0x01af), S(0x01b1), S(0x01b2),
598  S(0x01b3), S(0x01b5), S(0x01b7), S(0x01b8), S(0x01bc), S(0x01c4), S(0x01c5), S(0x01c7), S(0x01c8),
599  S(0x01ca), R(0x01cb,0x01db), R(0x01de,0x01ee), S(0x01f1), S(0x01f2), S(0x01f4), S(0x01f6), S(0x01f7),
600  R(0x01f8,0x021e), S(0x0220), R(0x0222,0x0232), S(0x023a), S(0x023b), S(0x023d), S(0x023e), S(0x0241),
601  S(0x0243), S(0x0244), S(0x0245), R(0x0246,0x024e), S(0x0345), S(0x0370), S(0x0372), S(0x0376), S(0x037f),
602  S(0x0386), R(0x0388,0x038a), S(0x038c), S(0x038e), S(0x038f), R(0x0391,0x03a1), R(0x03a3,0x03ab),
603  S(0x03c2), S(0x03cf), S(0x03d0), S(0x03d1), S(0x03d5), S(0x03d6), R(0x03d8,0x03ee), S(0x03f0), S(0x03f1),
604  S(0x03f4), S(0x03f5), S(0x03f7), S(0x03f9), S(0x03fa), R(0x03fd,0x03ff), R(0x0400,0x040f),
605  R(0x0410,0x042f), R(0x0460,0x0480), R(0x048a,0x04be), S(0x04c0), R(0x04c1,0x04cd), R(0x04d0,0x052e),
606  R(0x0531,0x0556), R(0x10a0,0x10c5), S(0x10c7), S(0x10cd), R(0x13f8,0x13fd), S(0x1c80), S(0x1c81),
607  S(0x1c82), S(0x1c83), S(0x1c84), S(0x1c85), S(0x1c86), S(0x1c87), S(0x1c88), R(0x1c90,0x1cba),
608  R(0x1cbd,0x1cbf), R(0x1e00,0x1e94), S(0x1e9b), R(0x1ea0,0x1efe), R(0x1f08,0x1f0f), R(0x1f18,0x1f1d),
609  R(0x1f28,0x1f2f), R(0x1f38,0x1f3f), R(0x1f48,0x1f4d), S(0x1f59), S(0x1f5b), S(0x1f5d), S(0x1f5f),
610  R(0x1f68,0x1f6f), S(0x1fb8), S(0x1fb9), S(0x1fba), S(0x1fbb), S(0x1fbe), R(0x1fc8,0x1fcb), S(0x1fd8),
611  S(0x1fd9), S(0x1fda), S(0x1fdb), S(0x1fe8), S(0x1fe9), S(0x1fea), S(0x1feb), S(0x1fec), S(0x1ff8),
612  S(0x1ff9), S(0x1ffa), S(0x1ffb), S(0x2126), S(0x212a), S(0x212b), S(0x2132), R(0x2160,0x216f), S(0x2183),
613  R(0x24b6,0x24cf), R(0x2c00,0x2c2e), S(0x2c60), S(0x2c62), S(0x2c63), S(0x2c64), R(0x2c67,0x2c6b),
614  S(0x2c6d), S(0x2c6e), S(0x2c6f), S(0x2c70), S(0x2c72), S(0x2c75), S(0x2c7e), S(0x2c7f), R(0x2c80,0x2ce2),
615  S(0x2ceb), S(0x2ced), S(0x2cf2), R(0xa640,0xa66c), R(0xa680,0xa69a), R(0xa722,0xa72e), R(0xa732,0xa76e),
616  S(0xa779), S(0xa77b), S(0xa77d), R(0xa77e,0xa786), S(0xa78b), S(0xa78d), S(0xa790), S(0xa792),
617  R(0xa796,0xa7a8), S(0xa7aa), S(0xa7ab), S(0xa7ac), S(0xa7ad), S(0xa7ae), S(0xa7b0), S(0xa7b1), S(0xa7b2),
618  S(0xa7b3), R(0xa7b4,0xa7be), S(0xa7c2), S(0xa7c4), S(0xa7c5), S(0xa7c6), S(0xa7c7), S(0xa7c9), S(0xa7f5),
619  R(0xab70,0xabbf), R(0xff21,0xff3a), R(0x10400,0x10427), R(0x104b0,0x104d3), R(0x10c80,0x10cb2),
620  R(0x118a0,0x118bf), R(0x16e40,0x16e5f), R(0x1e900,0x1e921)
621  };
622  static const unsigned FOLD_MAP_1_DATA[] = {
623  0x0061, 0x007a, 0x03bc, 0x00e0, 0x00f6, 0x00f8, 0x00fe, 0x0101, 0x012f, 0x0133, 0x0137, 0x013a, 0x0148,
624  0x014b, 0x0177, 0x00ff, 0x017a, 0x017e, 0x0073, 0x0253, 0x0183, 0x0185, 0x0254, 0x0188, 0x0256, 0x0257,
625  0x018c, 0x01dd, 0x0259, 0x025b, 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026f, 0x0272, 0x0275,
626  0x01a1, 0x01a5, 0x0280, 0x01a8, 0x0283, 0x01ad, 0x0288, 0x01b0, 0x028a, 0x028b, 0x01b4, 0x01b6, 0x0292,
627  0x01b9, 0x01bd, 0x01c6, 0x01c6, 0x01c9, 0x01c9, 0x01cc, 0x01cc, 0x01dc, 0x01df, 0x01ef, 0x01f3, 0x01f3,
628  0x01f5, 0x0195, 0x01bf, 0x01f9, 0x021f, 0x019e, 0x0223, 0x0233, 0x2c65, 0x023c, 0x019a, 0x2c66, 0x0242,
629  0x0180, 0x0289, 0x028c, 0x0247, 0x024f, 0x03b9, 0x0371, 0x0373, 0x0377, 0x03f3, 0x03ac, 0x03ad, 0x03af,
630  0x03cc, 0x03cd, 0x03ce, 0x03b1, 0x03c1, 0x03c3, 0x03cb, 0x03c3, 0x03d7, 0x03b2, 0x03b8, 0x03c6, 0x03c0,
631  0x03d9, 0x03ef, 0x03ba, 0x03c1, 0x03b8, 0x03b5, 0x03f8, 0x03f2, 0x03fb, 0x037b, 0x037d, 0x0450, 0x045f,
632  0x0430, 0x044f, 0x0461, 0x0481, 0x048b, 0x04bf, 0x04cf, 0x04c2, 0x04ce, 0x04d1, 0x052f, 0x0561, 0x0586,
633  0x2d00, 0x2d25, 0x2d27, 0x2d2d, 0x13f0, 0x13f5, 0x0432, 0x0434, 0x043e, 0x0441, 0x0442, 0x0442, 0x044a,
634  0x0463, 0xa64b, 0x10d0, 0x10fa, 0x10fd, 0x10ff, 0x1e01, 0x1e95, 0x1e61, 0x1ea1, 0x1eff, 0x1f00, 0x1f07,
635  0x1f10, 0x1f15, 0x1f20, 0x1f27, 0x1f30, 0x1f37, 0x1f40, 0x1f45, 0x1f51, 0x1f53, 0x1f55, 0x1f57, 0x1f60,
636  0x1f67, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, 0x03b9, 0x1f72, 0x1f75, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, 0x1fe0,
637  0x1fe1, 0x1f7a, 0x1f7b, 0x1fe5, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, 0x03c9, 0x006b, 0x00e5, 0x214e, 0x2170,
638  0x217f, 0x2184, 0x24d0, 0x24e9, 0x2c30, 0x2c5e, 0x2c61, 0x026b, 0x1d7d, 0x027d, 0x2c68, 0x2c6c, 0x0251,
639  0x0271, 0x0250, 0x0252, 0x2c73, 0x2c76, 0x023f, 0x0240, 0x2c81, 0x2ce3, 0x2cec, 0x2cee, 0x2cf3, 0xa641,
640  0xa66d, 0xa681, 0xa69b, 0xa723, 0xa72f, 0xa733, 0xa76f, 0xa77a, 0xa77c, 0x1d79, 0xa77f, 0xa787, 0xa78c,
641  0x0265, 0xa791, 0xa793, 0xa797, 0xa7a9, 0x0266, 0x025c, 0x0261, 0x026c, 0x026a, 0x029e, 0x0287, 0x029d,
642  0xab53, 0xa7b5, 0xa7bf, 0xa7c3, 0xa794, 0x0282, 0x1d8e, 0xa7c8, 0xa7ca, 0xa7f6, 0x13a0, 0x13ef, 0xff41,
643  0xff5a, 0x10428, 0x1044f, 0x104d8, 0x104fb, 0x10cc0, 0x10cf2, 0x118c0, 0x118df, 0x16e60, 0x16e7f, 0x1e922,
644  0x1e943
645  };
646  static const unsigned FOLD_MAP_2[] = {
647  S(0x00df), S(0x0130), S(0x0149), S(0x01f0), S(0x0587), S(0x1e96), S(0x1e97), S(0x1e98), S(0x1e99),
648  S(0x1e9a), S(0x1e9e), S(0x1f50), R(0x1f80,0x1f87), R(0x1f88,0x1f8f), R(0x1f90,0x1f97), R(0x1f98,0x1f9f),
649  R(0x1fa0,0x1fa7), R(0x1fa8,0x1faf), S(0x1fb2), S(0x1fb3), S(0x1fb4), S(0x1fb6), S(0x1fbc), S(0x1fc2),
650  S(0x1fc3), S(0x1fc4), S(0x1fc6), S(0x1fcc), S(0x1fd6), S(0x1fe4), S(0x1fe6), S(0x1ff2), S(0x1ff3),
651  S(0x1ff4), S(0x1ff6), S(0x1ffc), S(0xfb00), S(0xfb01), S(0xfb02), S(0xfb05), S(0xfb06), S(0xfb13),
652  S(0xfb14), S(0xfb15), S(0xfb16), S(0xfb17)
653  };
654  static const unsigned FOLD_MAP_2_DATA[] = {
655  0x0073,0x0073, 0x0069,0x0307, 0x02bc,0x006e, 0x006a,0x030c, 0x0565,0x0582, 0x0068,0x0331, 0x0074,0x0308,
656  0x0077,0x030a, 0x0079,0x030a, 0x0061,0x02be, 0x0073,0x0073, 0x03c5,0x0313, 0x1f00,0x03b9, 0x1f07,0x03b9,
657  0x1f00,0x03b9, 0x1f07,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f20,0x03b9, 0x1f27,0x03b9, 0x1f60,0x03b9,
658  0x1f67,0x03b9, 0x1f60,0x03b9, 0x1f67,0x03b9, 0x1f70,0x03b9, 0x03b1,0x03b9, 0x03ac,0x03b9, 0x03b1,0x0342,
659  0x03b1,0x03b9, 0x1f74,0x03b9, 0x03b7,0x03b9, 0x03ae,0x03b9, 0x03b7,0x0342, 0x03b7,0x03b9, 0x03b9,0x0342,
660  0x03c1,0x0313, 0x03c5,0x0342, 0x1f7c,0x03b9, 0x03c9,0x03b9, 0x03ce,0x03b9, 0x03c9,0x0342, 0x03c9,0x03b9,
661  0x0066,0x0066, 0x0066,0x0069, 0x0066,0x006c, 0x0073,0x0074, 0x0073,0x0074, 0x0574,0x0576, 0x0574,0x0565,
662  0x0574,0x056b, 0x057e,0x0576, 0x0574,0x056d
663  };
664  static const unsigned FOLD_MAP_3[] = {
665  S(0x0390), S(0x03b0), S(0x1f52), S(0x1f54), S(0x1f56), S(0x1fb7), S(0x1fc7), S(0x1fd2), S(0x1fd3),
666  S(0x1fd7), S(0x1fe2), S(0x1fe3), S(0x1fe7), S(0x1ff7), S(0xfb03), S(0xfb04)
667  };
668  static const unsigned FOLD_MAP_3_DATA[] = {
669  0x03b9,0x0308,0x0301, 0x03c5,0x0308,0x0301, 0x03c5,0x0313,0x0300, 0x03c5,0x0313,0x0301,
670  0x03c5,0x0313,0x0342, 0x03b1,0x0342,0x03b9, 0x03b7,0x0342,0x03b9, 0x03b9,0x0308,0x0300,
671  0x03b9,0x0308,0x0301, 0x03b9,0x0308,0x0342, 0x03c5,0x0308,0x0300, 0x03c5,0x0308,0x0301,
672  0x03c5,0x0308,0x0342, 0x03c9,0x0342,0x03b9, 0x0066,0x0066,0x0069, 0x0066,0x0066,0x006c
673  };
674 #undef R
675 #undef S
676  static const struct {
677  const unsigned* map;
678  const unsigned* data;
679  size_t map_size;
680  unsigned n_codepoints;
681  } FOLD_MAP_LIST[] = {
682  { FOLD_MAP_1, FOLD_MAP_1_DATA, SIZEOF_ARRAY(FOLD_MAP_1), 1 },
683  { FOLD_MAP_2, FOLD_MAP_2_DATA, SIZEOF_ARRAY(FOLD_MAP_2), 2 },
684  { FOLD_MAP_3, FOLD_MAP_3_DATA, SIZEOF_ARRAY(FOLD_MAP_3), 3 }
685  };
686 
687  int i;
688 
689  /* Fast path for ASCII characters. */
690  if(codepoint <= 0x7f) {
691  info->codepoints[0] = codepoint;
692  if(ISUPPER_(codepoint))
693  info->codepoints[0] += 'a' - 'A';
694  info->n_codepoints = 1;
695  return;
696  }
697 
698  /* Try to locate the codepoint in any of the maps. */
699  for(i = 0; i < (int) SIZEOF_ARRAY(FOLD_MAP_LIST); i++) {
700  int index;
701 
702  index = md_unicode_bsearch__(codepoint, FOLD_MAP_LIST[i].map, FOLD_MAP_LIST[i].map_size);
703  if(index >= 0) {
704  /* Found the mapping. */
705  unsigned n_codepoints = FOLD_MAP_LIST[i].n_codepoints;
706  const unsigned* map = FOLD_MAP_LIST[i].map;
707  const unsigned* codepoints = FOLD_MAP_LIST[i].data + (index * n_codepoints);
708 
709  memcpy(info->codepoints, codepoints, sizeof(unsigned) * n_codepoints);
710  info->n_codepoints = n_codepoints;
711 
712  if(FOLD_MAP_LIST[i].map[index] != codepoint) {
713  /* The found mapping maps whole range of codepoints,
714  * i.e. we have to offset info->codepoints[0] accordingly. */
715  if((map[index] & 0x00ffffff)+1 == codepoints[0]) {
716  /* Alternating type of the range. */
717  info->codepoints[0] = codepoint + ((codepoint & 0x1) == (map[index] & 0x1) ? 1 : 0);
718  } else {
719  /* Range to range kind of mapping. */
720  info->codepoints[0] += (codepoint - (map[index] & 0x00ffffff));
721  }
722  }
723 
724  return;
725  }
726  }
727 
728  /* No mapping found. Map the codepoint to itself. */
729  info->codepoints[0] = codepoint;
730  info->n_codepoints = 1;
731  }
732 #endif
733 
734 
735 #if defined MD4C_USE_UTF16
736  #define IS_UTF16_SURROGATE_HI(word) (((WORD)(word) & 0xfc00) == 0xd800)
737  #define IS_UTF16_SURROGATE_LO(word) (((WORD)(word) & 0xfc00) == 0xdc00)
738  #define UTF16_DECODE_SURROGATE(hi, lo) (0x10000 + ((((unsigned)(hi) & 0x3ff) << 10) | (((unsigned)(lo) & 0x3ff) << 0)))
739 
740  static unsigned
741  md_decode_utf16le__(const CHAR* str, SZ str_size, SZ* p_size)
742  {
743  if(IS_UTF16_SURROGATE_HI(str[0])) {
744  if(1 < str_size && IS_UTF16_SURROGATE_LO(str[1])) {
745  if(p_size != NULL)
746  *p_size = 2;
747  return UTF16_DECODE_SURROGATE(str[0], str[1]);
748  }
749  }
750 
751  if(p_size != NULL)
752  *p_size = 1;
753  return str[0];
754  }
755 
756  static unsigned
757  md_decode_utf16le_before__(MD_CTX* ctx, OFF off)
758  {
759  if(off > 2 && IS_UTF16_SURROGATE_HI(CH(off-2)) && IS_UTF16_SURROGATE_LO(CH(off-1)))
760  return UTF16_DECODE_SURROGATE(CH(off-2), CH(off-1));
761 
762  return CH(off);
763  }
764 
765  /* No whitespace uses surrogates, so no decoding needed here. */
766  #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
767  #define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(CH(off))
768  #define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(CH((off)-1))
769 
770  #define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf16le__(STR(off), ctx->size - (off), NULL))
771  #define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf16le_before__(ctx, off))
772 
773  static inline int
774  md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
775  {
776  return md_decode_utf16le__(str+off, str_size-off, p_char_size);
777  }
778 #elif defined MD4C_USE_UTF8
779  #define IS_UTF8_LEAD1(byte) ((unsigned char)(byte) <= 0x7f)
780  #define IS_UTF8_LEAD2(byte) (((unsigned char)(byte) & 0xe0) == 0xc0)
781  #define IS_UTF8_LEAD3(byte) (((unsigned char)(byte) & 0xf0) == 0xe0)
782  #define IS_UTF8_LEAD4(byte) (((unsigned char)(byte) & 0xf8) == 0xf0)
783  #define IS_UTF8_TAIL(byte) (((unsigned char)(byte) & 0xc0) == 0x80)
784 
785  static unsigned
786  md_decode_utf8__(const CHAR* str, SZ str_size, SZ* p_size)
787  {
788  if(!IS_UTF8_LEAD1(str[0])) {
789  if(IS_UTF8_LEAD2(str[0])) {
790  if(1 < str_size && IS_UTF8_TAIL(str[1])) {
791  if(p_size != NULL)
792  *p_size = 2;
793 
794  return (((unsigned int)str[0] & 0x1f) << 6) |
795  (((unsigned int)str[1] & 0x3f) << 0);
796  }
797  } else if(IS_UTF8_LEAD3(str[0])) {
798  if(2 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2])) {
799  if(p_size != NULL)
800  *p_size = 3;
801 
802  return (((unsigned int)str[0] & 0x0f) << 12) |
803  (((unsigned int)str[1] & 0x3f) << 6) |
804  (((unsigned int)str[2] & 0x3f) << 0);
805  }
806  } else if(IS_UTF8_LEAD4(str[0])) {
807  if(3 < str_size && IS_UTF8_TAIL(str[1]) && IS_UTF8_TAIL(str[2]) && IS_UTF8_TAIL(str[3])) {
808  if(p_size != NULL)
809  *p_size = 4;
810 
811  return (((unsigned int)str[0] & 0x07) << 18) |
812  (((unsigned int)str[1] & 0x3f) << 12) |
813  (((unsigned int)str[2] & 0x3f) << 6) |
814  (((unsigned int)str[3] & 0x3f) << 0);
815  }
816  }
817  }
818 
819  if(p_size != NULL)
820  *p_size = 1;
821  return (unsigned) str[0];
822  }
823 
824  static unsigned
825  md_decode_utf8_before__(MD_CTX* ctx, OFF off)
826  {
827  if(!IS_UTF8_LEAD1(CH(off-1))) {
828  if(off > 1 && IS_UTF8_LEAD2(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
829  return (((unsigned int)CH(off-2) & 0x1f) << 6) |
830  (((unsigned int)CH(off-1) & 0x3f) << 0);
831 
832  if(off > 2 && IS_UTF8_LEAD3(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
833  return (((unsigned int)CH(off-3) & 0x0f) << 12) |
834  (((unsigned int)CH(off-2) & 0x3f) << 6) |
835  (((unsigned int)CH(off-1) & 0x3f) << 0);
836 
837  if(off > 3 && IS_UTF8_LEAD4(CH(off-4)) && IS_UTF8_TAIL(CH(off-3)) && IS_UTF8_TAIL(CH(off-2)) && IS_UTF8_TAIL(CH(off-1)))
838  return (((unsigned int)CH(off-4) & 0x07) << 18) |
839  (((unsigned int)CH(off-3) & 0x3f) << 12) |
840  (((unsigned int)CH(off-2) & 0x3f) << 6) |
841  (((unsigned int)CH(off-1) & 0x3f) << 0);
842  }
843 
844  return (unsigned) CH(off-1);
845  }
846 
847  #define ISUNICODEWHITESPACE_(codepoint) md_is_unicode_whitespace__(codepoint)
848  #define ISUNICODEWHITESPACE(off) md_is_unicode_whitespace__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
849  #define ISUNICODEWHITESPACEBEFORE(off) md_is_unicode_whitespace__(md_decode_utf8_before__(ctx, off))
850 
851  #define ISUNICODEPUNCT(off) md_is_unicode_punct__(md_decode_utf8__(STR(off), ctx->size - (off), NULL))
852  #define ISUNICODEPUNCTBEFORE(off) md_is_unicode_punct__(md_decode_utf8_before__(ctx, off))
853 
854  static inline unsigned
855  md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_char_size)
856  {
857  return md_decode_utf8__(str+off, str_size-off, p_char_size);
858  }
859 #else
860  #define ISUNICODEWHITESPACE_(codepoint) ISWHITESPACE_(codepoint)
861  #define ISUNICODEWHITESPACE(off) ISWHITESPACE(off)
862  #define ISUNICODEWHITESPACEBEFORE(off) ISWHITESPACE((off)-1)
863 
864  #define ISUNICODEPUNCT(off) ISPUNCT(off)
865  #define ISUNICODEPUNCTBEFORE(off) ISPUNCT((off)-1)
866 
867  static inline void
868  md_get_unicode_fold_info(unsigned codepoint, MD_UNICODE_FOLD_INFO* info)
869  {
870  info->codepoints[0] = codepoint;
871  if(ISUPPER_(codepoint))
872  info->codepoints[0] += 'a' - 'A';
873  info->n_codepoints = 1;
874  }
875 
876  static inline unsigned
877  md_decode_unicode(const CHAR* str, OFF off, SZ str_size, SZ* p_size)
878  {
879  *p_size = 1;
880  return (unsigned) str[off];
881  }
882 #endif
883 
884 
885 /*************************************
886  *** Helper string manipulations ***
887  *************************************/
888 
889 /* Fill buffer with copy of the string between 'beg' and 'end' but replace any
890  * line breaks with given replacement character.
891  *
892  * NOTE: Caller is responsible to make sure the buffer is large enough.
893  * (Given the output is always shorter then input, (end - beg) is good idea
894  * what the caller should allocate.)
895  */
896 static void
897 md_merge_lines(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
898  CHAR line_break_replacement_char, CHAR* buffer, SZ* p_size)
899 {
900  CHAR* ptr = buffer;
901  int line_index = 0;
902  OFF off = beg;
903 
904  MD_UNUSED(n_lines);
905 
906  while(1) {
907  const MD_LINE* line = &lines[line_index];
908  OFF line_end = line->end;
909  if(end < line_end)
910  line_end = end;
911 
912  while(off < line_end) {
913  *ptr = CH(off);
914  ptr++;
915  off++;
916  }
917 
918  if(off >= end) {
919  *p_size = (MD_SIZE)(ptr - buffer);
920  return;
921  }
922 
923  *ptr = line_break_replacement_char;
924  ptr++;
925 
926  line_index++;
927  off = lines[line_index].beg;
928  }
929 }
930 
931 /* Wrapper of md_merge_lines() which allocates new buffer for the output string.
932  */
933 static int
934 md_merge_lines_alloc(MD_CTX* ctx, OFF beg, OFF end, const MD_LINE* lines, int n_lines,
935  CHAR line_break_replacement_char, CHAR** p_str, SZ* p_size)
936 {
937  CHAR* buffer;
938 
939  buffer = (CHAR*) malloc(sizeof(CHAR) * (end - beg));
940  if(buffer == NULL) {
941  MD_LOG("malloc() failed.");
942  return -1;
943  }
944 
945  md_merge_lines(ctx, beg, end, lines, n_lines,
946  line_break_replacement_char, buffer, p_size);
947 
948  *p_str = buffer;
949  return 0;
950 }
951 
952 static OFF
953 md_skip_unicode_whitespace(const CHAR* label, OFF off, SZ size)
954 {
955  SZ char_size;
956  unsigned codepoint;
957 
958  while(off < size) {
959  codepoint = md_decode_unicode(label, off, size, &char_size);
960  if(!ISUNICODEWHITESPACE_(codepoint) && !ISNEWLINE_(label[off]))
961  break;
962  off += char_size;
963  }
964 
965  return off;
966 }
967 
968 
969 /******************************
970  *** Recognizing raw HTML ***
971  ******************************/
972 
973 /* md_is_html_tag() may be called when processing inlines (inline raw HTML)
974  * or when breaking document to blocks (checking for start of HTML block type 7).
975  *
976  * When breaking document to blocks, we do not yet know line boundaries, but
977  * in that case the whole tag has to live on a single line. We distinguish this
978  * by n_lines == 0.
979  */
980 static int
981 md_is_html_tag(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
982 {
983  int attr_state;
984  OFF off = beg;
985  OFF line_end = (n_lines > 0) ? lines[0].end : ctx->size;
986  int i = 0;
987 
988  MD_ASSERT(CH(beg) == _T('<'));
989 
990  if(off + 1 >= line_end)
991  return FALSE;
992  off++;
993 
994  /* For parsing attributes, we need a little state automaton below.
995  * State -1: no attributes are allowed.
996  * State 0: attribute could follow after some whitespace.
997  * State 1: after a whitespace (attribute name may follow).
998  * State 2: after attribute name ('=' MAY follow).
999  * State 3: after '=' (value specification MUST follow).
1000  * State 41: in middle of unquoted attribute value.
1001  * State 42: in middle of single-quoted attribute value.
1002  * State 43: in middle of double-quoted attribute value.
1003  */
1004  attr_state = 0;
1005 
1006  if(CH(off) == _T('/')) {
1007  /* Closer tag "</ ... >". No attributes may be present. */
1008  attr_state = -1;
1009  off++;
1010  }
1011 
1012  /* Tag name */
1013  if(off >= line_end || !ISALPHA(off))
1014  return FALSE;
1015  off++;
1016  while(off < line_end && (ISALNUM(off) || CH(off) == _T('-')))
1017  off++;
1018 
1019  /* (Optional) attributes (if not closer), (optional) '/' (if not closer)
1020  * and final '>'. */
1021  while(1) {
1022  while(off < line_end && !ISNEWLINE(off)) {
1023  if(attr_state > 40) {
1024  if(attr_state == 41 && (ISBLANK(off) || ISANYOF(off, _T("\"'=<>`")))) {
1025  attr_state = 0;
1026  off--; /* Put the char back for re-inspection in the new state. */
1027  } else if(attr_state == 42 && CH(off) == _T('\'')) {
1028  attr_state = 0;
1029  } else if(attr_state == 43 && CH(off) == _T('"')) {
1030  attr_state = 0;
1031  }
1032  off++;
1033  } else if(ISWHITESPACE(off)) {
1034  if(attr_state == 0)
1035  attr_state = 1;
1036  off++;
1037  } else if(attr_state <= 2 && CH(off) == _T('>')) {
1038  /* End. */
1039  goto done;
1040  } else if(attr_state <= 2 && CH(off) == _T('/') && off+1 < line_end && CH(off+1) == _T('>')) {
1041  /* End with digraph '/>' */
1042  off++;
1043  goto done;
1044  } else if((attr_state == 1 || attr_state == 2) && (ISALPHA(off) || CH(off) == _T('_') || CH(off) == _T(':'))) {
1045  off++;
1046  /* Attribute name */
1047  while(off < line_end && (ISALNUM(off) || ISANYOF(off, _T("_.:-"))))
1048  off++;
1049  attr_state = 2;
1050  } else if(attr_state == 2 && CH(off) == _T('=')) {
1051  /* Attribute assignment sign */
1052  off++;
1053  attr_state = 3;
1054  } else if(attr_state == 3) {
1055  /* Expecting start of attribute value. */
1056  if(CH(off) == _T('"'))
1057  attr_state = 43;
1058  else if(CH(off) == _T('\''))
1059  attr_state = 42;
1060  else if(!ISANYOF(off, _T("\"'=<>`")) && !ISNEWLINE(off))
1061  attr_state = 41;
1062  else
1063  return FALSE;
1064  off++;
1065  } else {
1066  /* Anything unexpected. */
1067  return FALSE;
1068  }
1069  }
1070 
1071  /* We have to be on a single line. See definition of start condition
1072  * of HTML block, type 7. */
1073  if(n_lines == 0)
1074  return FALSE;
1075 
1076  i++;
1077  if(i >= n_lines)
1078  return FALSE;
1079 
1080  off = lines[i].beg;
1081  line_end = lines[i].end;
1082 
1083  if(attr_state == 0 || attr_state == 41)
1084  attr_state = 1;
1085 
1086  if(off >= max_end)
1087  return FALSE;
1088  }
1089 
1090 done:
1091  if(off >= max_end)
1092  return FALSE;
1093 
1094  *p_end = off+1;
1095  return TRUE;
1096 }
1097 
1098 static int
1099 md_scan_for_html_closer(MD_CTX* ctx, const MD_CHAR* str, MD_SIZE len,
1100  const MD_LINE* lines, int n_lines,
1101  OFF beg, OFF max_end, OFF* p_end,
1102  OFF* p_scan_horizon)
1103 {
1104  OFF off = beg;
1105  int i = 0;
1106 
1107  if(off < *p_scan_horizon && *p_scan_horizon >= max_end - len) {
1108  /* We have already scanned the range up to the max_end so we know
1109  * there is nothing to see. */
1110  return FALSE;
1111  }
1112 
1113  while(TRUE) {
1114  while(off + len <= lines[i].end && off + len <= max_end) {
1115  if(md_ascii_eq(STR(off), str, len)) {
1116  /* Success. */
1117  *p_end = off + len;
1118  return TRUE;
1119  }
1120  off++;
1121  }
1122 
1123  i++;
1124  if(off >= max_end || i >= n_lines) {
1125  /* Failure. */
1126  *p_scan_horizon = off;
1127  return FALSE;
1128  }
1129 
1130  off = lines[i].beg;
1131  }
1132 }
1133 
1134 static int
1135 md_is_html_comment(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1136 {
1137  OFF off = beg;
1138 
1139  MD_ASSERT(CH(beg) == _T('<'));
1140 
1141  if(off + 4 >= lines[0].end)
1142  return FALSE;
1143  if(CH(off+1) != _T('!') || CH(off+2) != _T('-') || CH(off+3) != _T('-'))
1144  return FALSE;
1145  off += 4;
1146 
1147  /* ">" and "->" must not follow the opening. */
1148  if(off < lines[0].end && CH(off) == _T('>'))
1149  return FALSE;
1150  if(off+1 < lines[0].end && CH(off) == _T('-') && CH(off+1) == _T('>'))
1151  return FALSE;
1152 
1153  /* HTML comment must not contain "--", so we scan just for "--" instead
1154  * of "-->" and verify manually that '>' follows. */
1155  if(md_scan_for_html_closer(ctx, _T("--"), 2,
1156  lines, n_lines, off, max_end, p_end, &ctx->html_comment_horizon))
1157  {
1158  if(*p_end < max_end && CH(*p_end) == _T('>')) {
1159  *p_end = *p_end + 1;
1160  return TRUE;
1161  }
1162  }
1163 
1164  return FALSE;
1165 }
1166 
1167 static int
1168 md_is_html_processing_instruction(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1169 {
1170  OFF off = beg;
1171 
1172  if(off + 2 >= lines[0].end)
1173  return FALSE;
1174  if(CH(off+1) != _T('?'))
1175  return FALSE;
1176  off += 2;
1177 
1178  return md_scan_for_html_closer(ctx, _T("?>"), 2,
1179  lines, n_lines, off, max_end, p_end, &ctx->html_proc_instr_horizon);
1180 }
1181 
1182 static int
1183 md_is_html_declaration(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1184 {
1185  OFF off = beg;
1186 
1187  if(off + 2 >= lines[0].end)
1188  return FALSE;
1189  if(CH(off+1) != _T('!'))
1190  return FALSE;
1191  off += 2;
1192 
1193  /* Declaration name. */
1194  if(off >= lines[0].end || !ISALPHA(off))
1195  return FALSE;
1196  off++;
1197  while(off < lines[0].end && ISALPHA(off))
1198  off++;
1199  if(off < lines[0].end && !ISWHITESPACE(off))
1200  return FALSE;
1201 
1202  return md_scan_for_html_closer(ctx, _T(">"), 1,
1203  lines, n_lines, off, max_end, p_end, &ctx->html_decl_horizon);
1204 }
1205 
1206 static int
1207 md_is_html_cdata(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1208 {
1209  static const CHAR open_str[] = _T("<![CDATA[");
1210  static const SZ open_size = SIZEOF_ARRAY(open_str) - 1;
1211 
1212  OFF off = beg;
1213 
1214  if(off + open_size >= lines[0].end)
1215  return FALSE;
1216  if(memcmp(STR(off), open_str, open_size) != 0)
1217  return FALSE;
1218  off += open_size;
1219 
1220  if(lines[n_lines-1].end < max_end)
1221  max_end = lines[n_lines-1].end - 2;
1222 
1223  return md_scan_for_html_closer(ctx, _T("]]>"), 3,
1224  lines, n_lines, off, max_end, p_end, &ctx->html_cdata_horizon);
1225 }
1226 
1227 static int
1228 md_is_html_any(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg, OFF max_end, OFF* p_end)
1229 {
1230  MD_ASSERT(CH(beg) == _T('<'));
1231  return (md_is_html_tag(ctx, lines, n_lines, beg, max_end, p_end) ||
1232  md_is_html_comment(ctx, lines, n_lines, beg, max_end, p_end) ||
1233  md_is_html_processing_instruction(ctx, lines, n_lines, beg, max_end, p_end) ||
1234  md_is_html_declaration(ctx, lines, n_lines, beg, max_end, p_end) ||
1235  md_is_html_cdata(ctx, lines, n_lines, beg, max_end, p_end));
1236 }
1237 
1238 
1239 /****************************
1240  *** Recognizing Entity ***
1241  ****************************/
1242 
1243 static int
1244 md_is_hex_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1245 {
1246  OFF off = beg;
1247  MD_UNUSED(ctx);
1248 
1249  while(off < max_end && ISXDIGIT_(text[off]) && off - beg <= 8)
1250  off++;
1251 
1252  if(1 <= off - beg && off - beg <= 6) {
1253  *p_end = off;
1254  return TRUE;
1255  } else {
1256  return FALSE;
1257  }
1258 }
1259 
1260 static int
1261 md_is_dec_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1262 {
1263  OFF off = beg;
1264  MD_UNUSED(ctx);
1265 
1266  while(off < max_end && ISDIGIT_(text[off]) && off - beg <= 8)
1267  off++;
1268 
1269  if(1 <= off - beg && off - beg <= 7) {
1270  *p_end = off;
1271  return TRUE;
1272  } else {
1273  return FALSE;
1274  }
1275 }
1276 
1277 static int
1278 md_is_named_entity_contents(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1279 {
1280  OFF off = beg;
1281  MD_UNUSED(ctx);
1282 
1283  if(off < max_end && ISALPHA_(text[off]))
1284  off++;
1285  else
1286  return FALSE;
1287 
1288  while(off < max_end && ISALNUM_(text[off]) && off - beg <= 48)
1289  off++;
1290 
1291  if(2 <= off - beg && off - beg <= 48) {
1292  *p_end = off;
1293  return TRUE;
1294  } else {
1295  return FALSE;
1296  }
1297 }
1298 
1299 static int
1300 md_is_entity_str(MD_CTX* ctx, const CHAR* text, OFF beg, OFF max_end, OFF* p_end)
1301 {
1302  int is_contents;
1303  OFF off = beg;
1304 
1305  MD_ASSERT(text[off] == _T('&'));
1306  off++;
1307 
1308  if(off+2 < max_end && text[off] == _T('#') && (text[off+1] == _T('x') || text[off+1] == _T('X')))
1309  is_contents = md_is_hex_entity_contents(ctx, text, off+2, max_end, &off);
1310  else if(off+1 < max_end && text[off] == _T('#'))
1311  is_contents = md_is_dec_entity_contents(ctx, text, off+1, max_end, &off);
1312  else
1313  is_contents = md_is_named_entity_contents(ctx, text, off, max_end, &off);
1314 
1315  if(is_contents && off < max_end && text[off] == _T(';')) {
1316  *p_end = off+1;
1317  return TRUE;
1318  } else {
1319  return FALSE;
1320  }
1321 }
1322 
1323 static inline int
1324 md_is_entity(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
1325 {
1326  return md_is_entity_str(ctx, ctx->text, beg, max_end, p_end);
1327 }
1328 
1329 
1330 /******************************
1331  *** Attribute Management ***
1332  ******************************/
1333 
1343 };
1344 
1345 
1346 #define MD_BUILD_ATTR_NO_ESCAPES 0x0001
1347 
1348 static int
1349 md_build_attr_append_substr(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build,
1350  MD_TEXTTYPE type, OFF off)
1351 {
1352  if(build->substr_count >= build->substr_alloc) {
1353  MD_TEXTTYPE* new_substr_types;
1354  OFF* new_substr_offsets;
1355 
1356  build->substr_alloc = (build->substr_alloc > 0
1357  ? build->substr_alloc + build->substr_alloc / 2
1358  : 8);
1359  new_substr_types = (MD_TEXTTYPE*) realloc(build->substr_types,
1360  build->substr_alloc * sizeof(MD_TEXTTYPE));
1361  if(new_substr_types == NULL) {
1362  MD_LOG("realloc() failed.");
1363  return -1;
1364  }
1365  /* Note +1 to reserve space for final offset (== raw_size). */
1366  new_substr_offsets = (OFF*) realloc(build->substr_offsets,
1367  (build->substr_alloc+1) * sizeof(OFF));
1368  if(new_substr_offsets == NULL) {
1369  MD_LOG("realloc() failed.");
1370  free(new_substr_types);
1371  return -1;
1372  }
1373 
1374  build->substr_types = new_substr_types;
1375  build->substr_offsets = new_substr_offsets;
1376  }
1377 
1378  build->substr_types[build->substr_count] = type;
1379  build->substr_offsets[build->substr_count] = off;
1380  build->substr_count++;
1381  return 0;
1382 }
1383 
1384 static void
1385 md_free_attribute(MD_CTX* ctx, MD_ATTRIBUTE_BUILD* build)
1386 {
1387  MD_UNUSED(ctx);
1388 
1389  if(build->substr_alloc > 0) {
1390  free(build->text);
1391  free(build->substr_types);
1392  free(build->substr_offsets);
1393  }
1394 }
1395 
1396 static int
1397 md_build_attribute(MD_CTX* ctx, const CHAR* raw_text, SZ raw_size,
1399 {
1400  OFF raw_off, off;
1401  int is_trivial;
1402  int ret = 0;
1403 
1404  memset(build, 0, sizeof(MD_ATTRIBUTE_BUILD));
1405 
1406  /* If there is no backslash and no ampersand, build trivial attribute
1407  * without any malloc(). */
1408  is_trivial = TRUE;
1409  for(raw_off = 0; raw_off < raw_size; raw_off++) {
1410  if(ISANYOF3_(raw_text[raw_off], _T('\\'), _T('&'), _T('\0'))) {
1411  is_trivial = FALSE;
1412  break;
1413  }
1414  }
1415 
1416  if(is_trivial) {
1417  build->text = (CHAR*) (raw_size ? raw_text : NULL);
1418  build->substr_types = build->trivial_types;
1419  build->substr_offsets = build->trivial_offsets;
1420  build->substr_count = 1;
1421  build->substr_alloc = 0;
1422  build->trivial_types[0] = MD_TEXT_NORMAL;
1423  build->trivial_offsets[0] = 0;
1424  build->trivial_offsets[1] = raw_size;
1425  off = raw_size;
1426  } else {
1427  build->text = (CHAR*) malloc(raw_size * sizeof(CHAR));
1428  if(build->text == NULL) {
1429  MD_LOG("malloc() failed.");
1430  goto abort;
1431  }
1432 
1433  raw_off = 0;
1434  off = 0;
1435 
1436  while(raw_off < raw_size) {
1437  if(raw_text[raw_off] == _T('\0')) {
1438  MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NULLCHAR, off));
1439  memcpy(build->text + off, raw_text + raw_off, 1);
1440  off++;
1441  raw_off++;
1442  continue;
1443  }
1444 
1445  if(raw_text[raw_off] == _T('&')) {
1446  OFF ent_end;
1447 
1448  if(md_is_entity_str(ctx, raw_text, raw_off, raw_size, &ent_end)) {
1449  MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_ENTITY, off));
1450  memcpy(build->text + off, raw_text + raw_off, ent_end - raw_off);
1451  off += ent_end - raw_off;
1452  raw_off = ent_end;
1453  continue;
1454  }
1455  }
1456 
1457  if(build->substr_count == 0 || build->substr_types[build->substr_count-1] != MD_TEXT_NORMAL)
1458  MD_CHECK(md_build_attr_append_substr(ctx, build, MD_TEXT_NORMAL, off));
1459 
1460  if(!(flags & MD_BUILD_ATTR_NO_ESCAPES) &&
1461  raw_text[raw_off] == _T('\\') && raw_off+1 < raw_size &&
1462  (ISPUNCT_(raw_text[raw_off+1]) || ISNEWLINE_(raw_text[raw_off+1])))
1463  raw_off++;
1464 
1465  build->text[off++] = raw_text[raw_off++];
1466  }
1467  build->substr_offsets[build->substr_count] = off;
1468  }
1469 
1470  attr->text = build->text;
1471  attr->size = off;
1472  attr->substr_offsets = build->substr_offsets;
1473  attr->substr_types = build->substr_types;
1474  return 0;
1475 
1476 abort:
1477  md_free_attribute(ctx, build);
1478  return -1;
1479 }
1480 
1481 
1482 /*********************************************
1483  *** Dictionary of Reference Definitions ***
1484  *********************************************/
1485 
1486 #define MD_FNV1A_BASE 2166136261U
1487 #define MD_FNV1A_PRIME 16777619U
1488 
1489 static inline unsigned
1490 md_fnv1a(unsigned base, const void* data, size_t n)
1491 {
1492  const unsigned char* buf = (const unsigned char*) data;
1493  unsigned hash = base;
1494  size_t i;
1495 
1496  for(i = 0; i < n; i++) {
1497  hash ^= buf[i];
1498  hash *= MD_FNV1A_PRIME;
1499  }
1500 
1501  return hash;
1502 }
1503 
1504 
1508  unsigned hash;
1513  unsigned char label_needs_free : 1;
1514  unsigned char title_needs_free : 1;
1515 };
1516 
1517 /* Label equivalence is quite complicated with regards to whitespace and case
1518  * folding. This complicates computing a hash of it as well as direct comparison
1519  * of two labels. */
1520 
1521 static unsigned
1522 md_link_label_hash(const CHAR* label, SZ size)
1523 {
1524  unsigned hash = MD_FNV1A_BASE;
1525  OFF off;
1526  unsigned codepoint;
1527  int is_whitespace = FALSE;
1528 
1529  off = md_skip_unicode_whitespace(label, 0, size);
1530  while(off < size) {
1531  SZ char_size;
1532 
1533  codepoint = md_decode_unicode(label, off, size, &char_size);
1534  is_whitespace = ISUNICODEWHITESPACE_(codepoint) || ISNEWLINE_(label[off]);
1535 
1536  if(is_whitespace) {
1537  codepoint = ' ';
1538  hash = md_fnv1a(hash, &codepoint, sizeof(unsigned));
1539  off = md_skip_unicode_whitespace(label, off, size);
1540  } else {
1541  MD_UNICODE_FOLD_INFO fold_info;
1542 
1543  md_get_unicode_fold_info(codepoint, &fold_info);
1544  hash = md_fnv1a(hash, fold_info.codepoints, fold_info.n_codepoints * sizeof(unsigned));
1545  off += char_size;
1546  }
1547  }
1548 
1549  return hash;
1550 }
1551 
1552 static OFF
1553 md_link_label_cmp_load_fold_info(const CHAR* label, OFF off, SZ size,
1554  MD_UNICODE_FOLD_INFO* fold_info)
1555 {
1556  unsigned codepoint;
1557  SZ char_size;
1558 
1559  if(off >= size) {
1560  /* Treat end of a link label as a whitespace. */
1561  goto whitespace;
1562  }
1563 
1564  codepoint = md_decode_unicode(label, off, size, &char_size);
1565  off += char_size;
1566  if(ISUNICODEWHITESPACE_(codepoint)) {
1567  /* Treat all whitespace as equivalent */
1568  goto whitespace;
1569  }
1570 
1571  /* Get real folding info. */
1572  md_get_unicode_fold_info(codepoint, fold_info);
1573  return off;
1574 
1575 whitespace:
1576  fold_info->codepoints[0] = _T(' ');
1577  fold_info->n_codepoints = 1;
1578  return md_skip_unicode_whitespace(label, off, size);
1579 }
1580 
1581 static int
1582 md_link_label_cmp(const CHAR* a_label, SZ a_size, const CHAR* b_label, SZ b_size)
1583 {
1584  OFF a_off;
1585  OFF b_off;
1586  MD_UNICODE_FOLD_INFO a_fi = { { 0 }, 0 };
1587  MD_UNICODE_FOLD_INFO b_fi = { { 0 }, 0 };
1588  OFF a_fi_off = 0;
1589  OFF b_fi_off = 0;
1590  int cmp;
1591 
1592  a_off = md_skip_unicode_whitespace(a_label, 0, a_size);
1593  b_off = md_skip_unicode_whitespace(b_label, 0, b_size);
1594  while(a_off < a_size || a_fi_off < a_fi.n_codepoints ||
1595  b_off < b_size || b_fi_off < b_fi.n_codepoints)
1596  {
1597  /* If needed, load fold info for next char. */
1598  if(a_fi_off >= a_fi.n_codepoints) {
1599  a_fi_off = 0;
1600  a_off = md_link_label_cmp_load_fold_info(a_label, a_off, a_size, &a_fi);
1601  }
1602  if(b_fi_off >= b_fi.n_codepoints) {
1603  b_fi_off = 0;
1604  b_off = md_link_label_cmp_load_fold_info(b_label, b_off, b_size, &b_fi);
1605  }
1606 
1607  cmp = b_fi.codepoints[b_fi_off] - a_fi.codepoints[a_fi_off];
1608  if(cmp != 0)
1609  return cmp;
1610 
1611  a_fi_off++;
1612  b_fi_off++;
1613  }
1614 
1615  return 0;
1616 }
1617 
1618 typedef struct MD_REF_DEF_LIST_tag MD_REF_DEF_LIST;
1622  MD_REF_DEF* ref_defs[]; /* Valid items always point into ctx->ref_defs[] */
1623 };
1624 
1625 static int
1626 md_ref_def_cmp(const void* a, const void* b)
1627 {
1628  const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a;
1629  const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b;
1630 
1631  if(a_ref->hash < b_ref->hash)
1632  return -1;
1633  else if(a_ref->hash > b_ref->hash)
1634  return +1;
1635  else
1636  return md_link_label_cmp(a_ref->label, a_ref->label_size, b_ref->label, b_ref->label_size);
1637 }
1638 
1639 static int
1640 md_ref_def_cmp_for_sort(const void* a, const void* b)
1641 {
1642  int cmp;
1643 
1644  cmp = md_ref_def_cmp(a, b);
1645 
1646  /* Ensure stability of the sorting. */
1647  if(cmp == 0) {
1648  const MD_REF_DEF* a_ref = *(const MD_REF_DEF**)a;
1649  const MD_REF_DEF* b_ref = *(const MD_REF_DEF**)b;
1650 
1651  if(a_ref < b_ref)
1652  cmp = -1;
1653  else if(a_ref > b_ref)
1654  cmp = +1;
1655  else
1656  cmp = 0;
1657  }
1658 
1659  return cmp;
1660 }
1661 
1662 static int
1663 md_build_ref_def_hashtable(MD_CTX* ctx)
1664 {
1665  int i, j;
1666 
1667  if(ctx->n_ref_defs == 0)
1668  return 0;
1669 
1670  ctx->ref_def_hashtable_size = (ctx->n_ref_defs * 5) / 4;
1671  ctx->ref_def_hashtable = malloc(ctx->ref_def_hashtable_size * sizeof(void*));
1672  if(ctx->ref_def_hashtable == NULL) {
1673  MD_LOG("malloc() failed.");
1674  goto abort;
1675  }
1676  memset(ctx->ref_def_hashtable, 0, ctx->ref_def_hashtable_size * sizeof(void*));
1677 
1678  /* Each member of ctx->ref_def_hashtable[] can be:
1679  * -- NULL,
1680  * -- pointer to the MD_REF_DEF in ctx->ref_defs[], or
1681  * -- pointer to a MD_REF_DEF_LIST, which holds multiple pointers to
1682  * such MD_REF_DEFs.
1683  */
1684  for(i = 0; i < ctx->n_ref_defs; i++) {
1685  MD_REF_DEF* def = &ctx->ref_defs[i];
1686  void* bucket;
1688 
1689  def->hash = md_link_label_hash(def->label, def->label_size);
1690  bucket = ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size];
1691 
1692  if(bucket == NULL) {
1693  /* The bucket is empty. Make it just point to the def. */
1694  ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = def;
1695  continue;
1696  }
1697 
1698  if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1699  /* The bucket already contains one ref. def. Lets see whether it
1700  * is the same label (ref. def. duplicate) or different one
1701  * (hash conflict). */
1702  MD_REF_DEF* old_def = (MD_REF_DEF*) bucket;
1703 
1704  if(md_link_label_cmp(def->label, def->label_size, old_def->label, old_def->label_size) == 0) {
1705  /* Duplicate label: Ignore this ref. def. */
1706  continue;
1707  }
1708 
1709  /* Make the bucket complex, i.e. able to hold more ref. defs. */
1710  list = (MD_REF_DEF_LIST*) malloc(sizeof(MD_REF_DEF_LIST) + 2 * sizeof(MD_REF_DEF*));
1711  if(list == NULL) {
1712  MD_LOG("malloc() failed.");
1713  goto abort;
1714  }
1715  list->ref_defs[0] = old_def;
1716  list->ref_defs[1] = def;
1717  list->n_ref_defs = 2;
1718  list->alloc_ref_defs = 2;
1719  ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1720  continue;
1721  }
1722 
1723  /* Append the def to the complex bucket list.
1724  *
1725  * Note in this case we ignore potential duplicates to avoid expensive
1726  * iterating over the complex bucket. Below, we revisit all the complex
1727  * buckets and handle it more cheaply after the complex bucket contents
1728  * is sorted. */
1729  list = (MD_REF_DEF_LIST*) bucket;
1730  if(list->n_ref_defs >= list->alloc_ref_defs) {
1731  int alloc_ref_defs = list->alloc_ref_defs + list->alloc_ref_defs / 2;
1732  MD_REF_DEF_LIST* list_tmp = (MD_REF_DEF_LIST*) realloc(list,
1733  sizeof(MD_REF_DEF_LIST) + alloc_ref_defs * sizeof(MD_REF_DEF*));
1734  if(list_tmp == NULL) {
1735  MD_LOG("realloc() failed.");
1736  goto abort;
1737  }
1738  list = list_tmp;
1739  list->alloc_ref_defs = alloc_ref_defs;
1740  ctx->ref_def_hashtable[def->hash % ctx->ref_def_hashtable_size] = list;
1741  }
1742 
1743  list->ref_defs[list->n_ref_defs] = def;
1744  list->n_ref_defs++;
1745  }
1746 
1747  /* Sort the complex buckets so we can use bsearch() with them. */
1748  for(i = 0; i < ctx->ref_def_hashtable_size; i++) {
1749  void* bucket = ctx->ref_def_hashtable[i];
1751 
1752  if(bucket == NULL)
1753  continue;
1754  if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs)
1755  continue;
1756 
1757  list = (MD_REF_DEF_LIST*) bucket;
1758  qsort(list->ref_defs, list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp_for_sort);
1759 
1760  /* Disable all duplicates in the complex bucket by forcing all such
1761  * records to point to the 1st such ref. def. I.e. no matter which
1762  * record is found during the lookup, it will always point to the right
1763  * ref. def. in ctx->ref_defs[]. */
1764  for(j = 1; j < list->n_ref_defs; j++) {
1765  if(md_ref_def_cmp(&list->ref_defs[j-1], &list->ref_defs[j]) == 0)
1766  list->ref_defs[j] = list->ref_defs[j-1];
1767  }
1768  }
1769 
1770  return 0;
1771 
1772 abort:
1773  return -1;
1774 }
1775 
1776 static void
1777 md_free_ref_def_hashtable(MD_CTX* ctx)
1778 {
1779  if(ctx->ref_def_hashtable != NULL) {
1780  int i;
1781 
1782  for(i = 0; i < ctx->ref_def_hashtable_size; i++) {
1783  void* bucket = ctx->ref_def_hashtable[i];
1784  if(bucket == NULL)
1785  continue;
1786  if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs)
1787  continue;
1788  free(bucket);
1789  }
1790 
1791  free(ctx->ref_def_hashtable);
1792  }
1793 }
1794 
1795 static const MD_REF_DEF*
1796 md_lookup_ref_def(MD_CTX* ctx, const CHAR* label, SZ label_size)
1797 {
1798  unsigned hash;
1799  void* bucket;
1800 
1801  if(ctx->ref_def_hashtable_size == 0)
1802  return NULL;
1803 
1804  hash = md_link_label_hash(label, label_size);
1805  bucket = ctx->ref_def_hashtable[hash % ctx->ref_def_hashtable_size];
1806 
1807  if(bucket == NULL) {
1808  return NULL;
1809  } else if(ctx->ref_defs <= (MD_REF_DEF*) bucket && (MD_REF_DEF*) bucket < ctx->ref_defs + ctx->n_ref_defs) {
1810  const MD_REF_DEF* def = (MD_REF_DEF*) bucket;
1811 
1812  if(md_link_label_cmp(def->label, def->label_size, label, label_size) == 0)
1813  return def;
1814  else
1815  return NULL;
1816  } else {
1817  MD_REF_DEF_LIST* list = (MD_REF_DEF_LIST*) bucket;
1818  MD_REF_DEF key_buf;
1819  const MD_REF_DEF* key = &key_buf;
1820  const MD_REF_DEF** ret;
1821 
1822  key_buf.label = (CHAR*) label;
1823  key_buf.label_size = label_size;
1824  key_buf.hash = md_link_label_hash(key_buf.label, key_buf.label_size);
1825 
1826  ret = (const MD_REF_DEF**) bsearch(&key, list->ref_defs,
1827  list->n_ref_defs, sizeof(MD_REF_DEF*), md_ref_def_cmp);
1828  if(ret != NULL)
1829  return *ret;
1830  else
1831  return NULL;
1832  }
1833 }
1834 
1835 
1836 /***************************
1837  *** Recognizing Links ***
1838  ***************************/
1839 
1840 /* Note this code is partially shared between processing inlines and blocks
1841  * as reference definitions and links share some helper parser functions.
1842  */
1843 
1844 typedef struct MD_LINK_ATTR_tag MD_LINK_ATTR;
1848 
1852 };
1853 
1854 
1855 static int
1856 md_is_link_label(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
1857  OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
1858  OFF* p_contents_beg, OFF* p_contents_end)
1859 {
1860  OFF off = beg;
1861  OFF contents_beg = 0;
1862  OFF contents_end = 0;
1863  int line_index = 0;
1864  int len = 0;
1865 
1866  if(CH(off) != _T('['))
1867  return FALSE;
1868  off++;
1869 
1870  while(1) {
1871  OFF line_end = lines[line_index].end;
1872 
1873  while(off < line_end) {
1874  if(CH(off) == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
1875  if(contents_end == 0) {
1876  contents_beg = off;
1877  *p_beg_line_index = line_index;
1878  }
1879  contents_end = off + 2;
1880  off += 2;
1881  } else if(CH(off) == _T('[')) {
1882  return FALSE;
1883  } else if(CH(off) == _T(']')) {
1884  if(contents_beg < contents_end) {
1885  /* Success. */
1886  *p_contents_beg = contents_beg;
1887  *p_contents_end = contents_end;
1888  *p_end = off+1;
1889  *p_end_line_index = line_index;
1890  return TRUE;
1891  } else {
1892  /* Link label must have some non-whitespace contents. */
1893  return FALSE;
1894  }
1895  } else {
1896  unsigned codepoint;
1897  SZ char_size;
1898 
1899  codepoint = md_decode_unicode(ctx->text, off, ctx->size, &char_size);
1900  if(!ISUNICODEWHITESPACE_(codepoint)) {
1901  if(contents_end == 0) {
1902  contents_beg = off;
1903  *p_beg_line_index = line_index;
1904  }
1905  contents_end = off + char_size;
1906  }
1907 
1908  off += char_size;
1909  }
1910 
1911  len++;
1912  if(len > 999)
1913  return FALSE;
1914  }
1915 
1916  line_index++;
1917  len++;
1918  if(line_index < n_lines)
1919  off = lines[line_index].beg;
1920  else
1921  break;
1922  }
1923 
1924  return FALSE;
1925 }
1926 
1927 static int
1928 md_is_link_destination_A(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1929  OFF* p_contents_beg, OFF* p_contents_end)
1930 {
1931  OFF off = beg;
1932 
1933  if(off >= max_end || CH(off) != _T('<'))
1934  return FALSE;
1935  off++;
1936 
1937  while(off < max_end) {
1938  if(CH(off) == _T('\\') && off+1 < max_end && ISPUNCT(off+1)) {
1939  off += 2;
1940  continue;
1941  }
1942 
1943  if(ISNEWLINE(off) || CH(off) == _T('<'))
1944  return FALSE;
1945 
1946  if(CH(off) == _T('>')) {
1947  /* Success. */
1948  *p_contents_beg = beg+1;
1949  *p_contents_end = off;
1950  *p_end = off+1;
1951  return TRUE;
1952  }
1953 
1954  off++;
1955  }
1956 
1957  return FALSE;
1958 }
1959 
1960 static int
1961 md_is_link_destination_B(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
1962  OFF* p_contents_beg, OFF* p_contents_end)
1963 {
1964  OFF off = beg;
1965  int parenthesis_level = 0;
1966 
1967  while(off < max_end) {
1968  if(CH(off) == _T('\\') && off+1 < max_end && ISPUNCT(off+1)) {
1969  off += 2;
1970  continue;
1971  }
1972 
1973  if(ISWHITESPACE(off) || ISCNTRL(off))
1974  break;
1975 
1976  /* Link destination may include balanced pairs of unescaped '(' ')'.
1977  * Note we limit the maximal nesting level by 32 to protect us from
1978  * https://github.com/jgm/cmark/issues/214 */
1979  if(CH(off) == _T('(')) {
1980  parenthesis_level++;
1981  if(parenthesis_level > 32)
1982  return FALSE;
1983  } else if(CH(off) == _T(')')) {
1984  if(parenthesis_level == 0)
1985  break;
1986  parenthesis_level--;
1987  }
1988 
1989  off++;
1990  }
1991 
1992  if(parenthesis_level != 0 || off == beg)
1993  return FALSE;
1994 
1995  /* Success. */
1996  *p_contents_beg = beg;
1997  *p_contents_end = off;
1998  *p_end = off;
1999  return TRUE;
2000 }
2001 
2002 static inline int
2003 md_is_link_destination(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end,
2004  OFF* p_contents_beg, OFF* p_contents_end)
2005 {
2006  if(CH(beg) == _T('<'))
2007  return md_is_link_destination_A(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
2008  else
2009  return md_is_link_destination_B(ctx, beg, max_end, p_end, p_contents_beg, p_contents_end);
2010 }
2011 
2012 static int
2013 md_is_link_title(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
2014  OFF* p_end, int* p_beg_line_index, int* p_end_line_index,
2015  OFF* p_contents_beg, OFF* p_contents_end)
2016 {
2017  OFF off = beg;
2018  CHAR closer_char;
2019  int line_index = 0;
2020 
2021  /* White space with up to one line break. */
2022  while(off < lines[line_index].end && ISWHITESPACE(off))
2023  off++;
2024  if(off >= lines[line_index].end) {
2025  line_index++;
2026  if(line_index >= n_lines)
2027  return FALSE;
2028  off = lines[line_index].beg;
2029  }
2030  if(off == beg)
2031  return FALSE;
2032 
2033  *p_beg_line_index = line_index;
2034 
2035  /* First char determines how to detect end of it. */
2036  switch(CH(off)) {
2037  case _T('"'): closer_char = _T('"'); break;
2038  case _T('\''): closer_char = _T('\''); break;
2039  case _T('('): closer_char = _T(')'); break;
2040  default: return FALSE;
2041  }
2042  off++;
2043 
2044  *p_contents_beg = off;
2045 
2046  while(line_index < n_lines) {
2047  OFF line_end = lines[line_index].end;
2048 
2049  while(off < line_end) {
2050  if(CH(off) == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
2051  off++;
2052  } else if(CH(off) == closer_char) {
2053  /* Success. */
2054  *p_contents_end = off;
2055  *p_end = off+1;
2056  *p_end_line_index = line_index;
2057  return TRUE;
2058  } else if(closer_char == _T(')') && CH(off) == _T('(')) {
2059  /* ()-style title cannot contain (unescaped '(')) */
2060  return FALSE;
2061  }
2062 
2063  off++;
2064  }
2065 
2066  line_index++;
2067  }
2068 
2069  return FALSE;
2070 }
2071 
2072 /* Returns 0 if it is not a reference definition.
2073  *
2074  * Returns N > 0 if it is a reference definition. N then corresponds to the
2075  * number of lines forming it). In this case the definition is stored for
2076  * resolving any links referring to it.
2077  *
2078  * Returns -1 in case of an error (out of memory).
2079  */
2080 static int
2081 md_is_link_reference_definition(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
2082 {
2083  OFF label_contents_beg;
2084  OFF label_contents_end;
2085  int label_contents_line_index = -1;
2086  int label_is_multiline = FALSE;
2087  OFF dest_contents_beg;
2088  OFF dest_contents_end;
2089  OFF title_contents_beg;
2090  OFF title_contents_end;
2091  int title_contents_line_index;
2092  int title_is_multiline = FALSE;
2093  OFF off;
2094  int line_index = 0;
2095  int tmp_line_index;
2096  MD_REF_DEF* def = NULL;
2097  int ret = 0;
2098 
2099  /* Link label. */
2100  if(!md_is_link_label(ctx, lines, n_lines, lines[0].beg,
2101  &off, &label_contents_line_index, &line_index,
2102  &label_contents_beg, &label_contents_end))
2103  return FALSE;
2104  label_is_multiline = (label_contents_line_index != line_index);
2105 
2106  /* Colon. */
2107  if(off >= lines[line_index].end || CH(off) != _T(':'))
2108  return FALSE;
2109  off++;
2110 
2111  /* Optional white space with up to one line break. */
2112  while(off < lines[line_index].end && ISWHITESPACE(off))
2113  off++;
2114  if(off >= lines[line_index].end) {
2115  line_index++;
2116  if(line_index >= n_lines)
2117  return FALSE;
2118  off = lines[line_index].beg;
2119  }
2120 
2121  /* Link destination. */
2122  if(!md_is_link_destination(ctx, off, lines[line_index].end,
2123  &off, &dest_contents_beg, &dest_contents_end))
2124  return FALSE;
2125 
2126  /* (Optional) title. Note we interpret it as an title only if nothing
2127  * more follows on its last line. */
2128  if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
2129  &off, &title_contents_line_index, &tmp_line_index,
2130  &title_contents_beg, &title_contents_end)
2131  && off >= lines[line_index + tmp_line_index].end)
2132  {
2133  title_is_multiline = (tmp_line_index != title_contents_line_index);
2134  title_contents_line_index += line_index;
2135  line_index += tmp_line_index;
2136  } else {
2137  /* Not a title. */
2138  title_is_multiline = FALSE;
2139  title_contents_beg = off;
2140  title_contents_end = off;
2141  title_contents_line_index = 0;
2142  }
2143 
2144  /* Nothing more can follow on the last line. */
2145  if(off < lines[line_index].end)
2146  return FALSE;
2147 
2148  /* So, it _is_ a reference definition. Remember it. */
2149  if(ctx->n_ref_defs >= ctx->alloc_ref_defs) {
2150  MD_REF_DEF* new_defs;
2151 
2152  ctx->alloc_ref_defs = (ctx->alloc_ref_defs > 0
2153  ? ctx->alloc_ref_defs + ctx->alloc_ref_defs / 2
2154  : 16);
2155  new_defs = (MD_REF_DEF*) realloc(ctx->ref_defs, ctx->alloc_ref_defs * sizeof(MD_REF_DEF));
2156  if(new_defs == NULL) {
2157  MD_LOG("realloc() failed.");
2158  goto abort;
2159  }
2160 
2161  ctx->ref_defs = new_defs;
2162  }
2163  def = &ctx->ref_defs[ctx->n_ref_defs];
2164  memset(def, 0, sizeof(MD_REF_DEF));
2165 
2166  if(label_is_multiline) {
2167  MD_CHECK(md_merge_lines_alloc(ctx, label_contents_beg, label_contents_end,
2168  lines + label_contents_line_index, n_lines - label_contents_line_index,
2169  _T(' '), &def->label, &def->label_size));
2170  def->label_needs_free = TRUE;
2171  } else {
2172  def->label = (CHAR*) STR(label_contents_beg);
2173  def->label_size = label_contents_end - label_contents_beg;
2174  }
2175 
2176  if(title_is_multiline) {
2177  MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2178  lines + title_contents_line_index, n_lines - title_contents_line_index,
2179  _T('\n'), &def->title, &def->title_size));
2180  def->title_needs_free = TRUE;
2181  } else {
2182  def->title = (CHAR*) STR(title_contents_beg);
2183  def->title_size = title_contents_end - title_contents_beg;
2184  }
2185 
2186  def->dest_beg = dest_contents_beg;
2187  def->dest_end = dest_contents_end;
2188 
2189  /* Success. */
2190  ctx->n_ref_defs++;
2191  return line_index + 1;
2192 
2193 abort:
2194  /* Failure. */
2195  if(def != NULL && def->label_needs_free)
2196  free(def->label);
2197  if(def != NULL && def->title_needs_free)
2198  free(def->title);
2199  return ret;
2200 }
2201 
2202 static int
2203 md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
2204  OFF beg, OFF end, MD_LINK_ATTR* attr)
2205 {
2206  const MD_REF_DEF* def;
2207  const MD_LINE* beg_line;
2208  const MD_LINE* end_line;
2209  CHAR* label;
2210  SZ label_size;
2211  int ret;
2212 
2213  MD_ASSERT(CH(beg) == _T('[') || CH(beg) == _T('!'));
2214  MD_ASSERT(CH(end-1) == _T(']'));
2215 
2216  beg += (CH(beg) == _T('!') ? 2 : 1);
2217  end--;
2218 
2219  /* Find lines corresponding to the beg and end positions. */
2220  MD_ASSERT(lines[0].beg <= beg);
2221  beg_line = lines;
2222  while(beg >= beg_line->end)
2223  beg_line++;
2224 
2225  MD_ASSERT(end <= lines[n_lines-1].end);
2226  end_line = beg_line;
2227  while(end >= end_line->end)
2228  end_line++;
2229 
2230  if(beg_line != end_line) {
2231  MD_CHECK(md_merge_lines_alloc(ctx, beg, end, beg_line,
2232  (int)(n_lines - (beg_line - lines)), _T(' '), &label, &label_size));
2233  } else {
2234  label = (CHAR*) STR(beg);
2235  label_size = end - beg;
2236  }
2237 
2238  def = md_lookup_ref_def(ctx, label, label_size);
2239  if(def != NULL) {
2240  attr->dest_beg = def->dest_beg;
2241  attr->dest_end = def->dest_end;
2242  attr->title = def->title;
2243  attr->title_size = def->title_size;
2244  attr->title_needs_free = FALSE;
2245  }
2246 
2247  if(beg_line != end_line)
2248  free(label);
2249 
2250  ret = (def != NULL);
2251 
2252 abort:
2253  return ret;
2254 }
2255 
2256 static int
2257 md_is_inline_link_spec(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
2258  OFF beg, OFF* p_end, MD_LINK_ATTR* attr)
2259 {
2260  int line_index = 0;
2261  int tmp_line_index;
2262  OFF title_contents_beg;
2263  OFF title_contents_end;
2264  int title_contents_line_index;
2265  int title_is_multiline;
2266  OFF off = beg;
2267  int ret = FALSE;
2268 
2269  while(off >= lines[line_index].end)
2270  line_index++;
2271 
2272  MD_ASSERT(CH(off) == _T('('));
2273  off++;
2274 
2275  /* Optional white space with up to one line break. */
2276  while(off < lines[line_index].end && ISWHITESPACE(off))
2277  off++;
2278  if(off >= lines[line_index].end && ISNEWLINE(off)) {
2279  line_index++;
2280  if(line_index >= n_lines)
2281  return FALSE;
2282  off = lines[line_index].beg;
2283  }
2284 
2285  /* Link destination may be omitted, but only when not also having a title. */
2286  if(off < ctx->size && CH(off) == _T(')')) {
2287  attr->dest_beg = off;
2288  attr->dest_end = off;
2289  attr->title = NULL;
2290  attr->title_size = 0;
2291  attr->title_needs_free = FALSE;
2292  off++;
2293  *p_end = off;
2294  return TRUE;
2295  }
2296 
2297  /* Link destination. */
2298  if(!md_is_link_destination(ctx, off, lines[line_index].end,
2299  &off, &attr->dest_beg, &attr->dest_end))
2300  return FALSE;
2301 
2302  /* (Optional) title. */
2303  if(md_is_link_title(ctx, lines + line_index, n_lines - line_index, off,
2304  &off, &title_contents_line_index, &tmp_line_index,
2305  &title_contents_beg, &title_contents_end))
2306  {
2307  title_is_multiline = (tmp_line_index != title_contents_line_index);
2308  title_contents_line_index += line_index;
2309  line_index += tmp_line_index;
2310  } else {
2311  /* Not a title. */
2312  title_is_multiline = FALSE;
2313  title_contents_beg = off;
2314  title_contents_end = off;
2315  title_contents_line_index = 0;
2316  }
2317 
2318  /* Optional whitespace followed with final ')'. */
2319  while(off < lines[line_index].end && ISWHITESPACE(off))
2320  off++;
2321  if(off >= lines[line_index].end && ISNEWLINE(off)) {
2322  line_index++;
2323  if(line_index >= n_lines)
2324  return FALSE;
2325  off = lines[line_index].beg;
2326  }
2327  if(CH(off) != _T(')'))
2328  goto abort;
2329  off++;
2330 
2331  if(title_contents_beg >= title_contents_end) {
2332  attr->title = NULL;
2333  attr->title_size = 0;
2334  attr->title_needs_free = FALSE;
2335  } else if(!title_is_multiline) {
2336  attr->title = (CHAR*) STR(title_contents_beg);
2337  attr->title_size = title_contents_end - title_contents_beg;
2338  attr->title_needs_free = FALSE;
2339  } else {
2340  MD_CHECK(md_merge_lines_alloc(ctx, title_contents_beg, title_contents_end,
2341  lines + title_contents_line_index, n_lines - title_contents_line_index,
2342  _T('\n'), &attr->title, &attr->title_size));
2343  attr->title_needs_free = TRUE;
2344  }
2345 
2346  *p_end = off;
2347  ret = TRUE;
2348 
2349 abort:
2350  return ret;
2351 }
2352 
2353 static void
2354 md_free_ref_defs(MD_CTX* ctx)
2355 {
2356  int i;
2357 
2358  for(i = 0; i < ctx->n_ref_defs; i++) {
2359  MD_REF_DEF* def = &ctx->ref_defs[i];
2360 
2361  if(def->label_needs_free)
2362  free(def->label);
2363  if(def->title_needs_free)
2364  free(def->title);
2365  }
2366 
2367  free(ctx->ref_defs);
2368 }
2369 
2370 
2371 /******************************************
2372  *** Processing Inlines (a.k.a Spans) ***
2373  ******************************************/
2374 
2375 /* We process inlines in few phases:
2376  *
2377  * (1) We go through the block text and collect all significant characters
2378  * which may start/end a span or some other significant position into
2379  * ctx->marks[]. Core of this is what md_collect_marks() does.
2380  *
2381  * We also do some very brief preliminary context-less analysis, whether
2382  * it might be opener or closer (e.g. of an emphasis span).
2383  *
2384  * This speeds the other steps as we do not need to re-iterate over all
2385  * characters anymore.
2386  *
2387  * (2) We analyze each potential mark types, in order by their precedence.
2388  *
2389  * In each md_analyze_XXX() function, we re-iterate list of the marks,
2390  * skipping already resolved regions (in preceding precedences) and try to
2391  * resolve them.
2392  *
2393  * (2.1) For trivial marks, which are single (e.g. HTML entity), we just mark
2394  * them as resolved.
2395  *
2396  * (2.2) For range-type marks, we analyze whether the mark could be closer
2397  * and, if yes, whether there is some preceding opener it could satisfy.
2398  *
2399  * If not we check whether it could be really an opener and if yes, we
2400  * remember it so subsequent closers may resolve it.
2401  *
2402  * (3) Finally, when all marks were analyzed, we render the block contents
2403  * by calling MD_RENDERER::text() callback, interrupting by ::enter_span()
2404  * or ::close_span() whenever we reach a resolved mark.
2405  */
2406 
2407 
2408 /* The mark structure.
2409  *
2410  * '\\': Maybe escape sequence.
2411  * '\0': NULL char.
2412  * '*': Maybe (strong) emphasis start/end.
2413  * '_': Maybe (strong) emphasis start/end.
2414  * '~': Maybe strikethrough start/end (needs MD_FLAG_STRIKETHROUGH).
2415  * '`': Maybe code span start/end.
2416  * '&': Maybe start of entity.
2417  * ';': Maybe end of entity.
2418  * '<': Maybe start of raw HTML or autolink.
2419  * '>': Maybe end of raw HTML or autolink.
2420  * '[': Maybe start of link label or link text.
2421  * '!': Equivalent of '[' for image.
2422  * ']': Maybe end of link label or link text.
2423  * '@': Maybe permissive e-mail auto-link (needs MD_FLAG_PERMISSIVEEMAILAUTOLINKS).
2424  * ':': Maybe permissive URL auto-link (needs MD_FLAG_PERMISSIVEURLAUTOLINKS).
2425  * '.': Maybe permissive WWW auto-link (needs MD_FLAG_PERMISSIVEWWWAUTOLINKS).
2426  * 'D': Dummy mark, it reserves a space for splitting a previous mark
2427  * (e.g. emphasis) or to make more space for storing some special data
2428  * related to the preceding mark (e.g. link).
2429  *
2430  * Note that not all instances of these chars in the text imply creation of the
2431  * structure. Only those which have (or may have, after we see more context)
2432  * the special meaning.
2433  *
2434  * (Keep this struct as small as possible to fit as much of them into CPU
2435  * cache line.)
2436  */
2437 struct MD_MARK_tag {
2440 
2441  /* For unresolved openers, 'prev' and 'next' form the chain of open openers
2442  * of given type 'ch'.
2443  *
2444  * During resolving, we disconnect from the chain and point to the
2445  * corresponding counterpart so opener points to its closer and vice versa.
2446  */
2447  int prev;
2448  int next;
2450  unsigned char flags;
2451 };
2452 
2453 /* Mark flags (these apply to ALL mark types). */
2454 #define MD_MARK_POTENTIAL_OPENER 0x01 /* Maybe opener. */
2455 #define MD_MARK_POTENTIAL_CLOSER 0x02 /* Maybe closer. */
2456 #define MD_MARK_OPENER 0x04 /* Definitely opener. */
2457 #define MD_MARK_CLOSER 0x08 /* Definitely closer. */
2458 #define MD_MARK_RESOLVED 0x10 /* Resolved in any definite way. */
2459 
2460 /* Mark flags specific for various mark types (so they can share bits). */
2461 #define MD_MARK_EMPH_INTRAWORD 0x20 /* Helper for the "rule of 3". */
2462 #define MD_MARK_EMPH_MOD3_0 0x40
2463 #define MD_MARK_EMPH_MOD3_1 0x80
2464 #define MD_MARK_EMPH_MOD3_2 (0x40 | 0x80)
2465 #define MD_MARK_EMPH_MOD3_MASK (0x40 | 0x80)
2466 #define MD_MARK_AUTOLINK 0x20 /* Distinguisher for '<', '>'. */
2467 #define MD_MARK_VALIDPERMISSIVEAUTOLINK 0x20 /* For permissive autolinks. */
2468 
2469 static MD_MARKCHAIN*
2470 md_asterisk_chain(MD_CTX* ctx, unsigned flags)
2471 {
2479  default: MD_UNREACHABLE();
2480  }
2481  return NULL;
2482 }
2483 
2484 static MD_MARKCHAIN*
2485 md_mark_chain(MD_CTX* ctx, int mark_index)
2486 {
2487  MD_MARK* mark = &ctx->marks[mark_index];
2488 
2489  switch(mark->ch) {
2490  case _T('*'): return md_asterisk_chain(ctx, mark->flags);
2491  case _T('_'): return &UNDERSCORE_OPENERS;
2492  case _T('~'): return (mark->end - mark->beg == 1) ? &TILDE_OPENERS_1 : &TILDE_OPENERS_2;
2493  case _T('['): return &BRACKET_OPENERS;
2494  case _T('|'): return &TABLECELLBOUNDARIES;
2495  default: return NULL;
2496  }
2497 }
2498 
2499 static MD_MARK*
2500 md_push_mark(MD_CTX* ctx)
2501 {
2502  if(ctx->n_marks >= ctx->alloc_marks) {
2503  MD_MARK* new_marks;
2504 
2505  ctx->alloc_marks = (ctx->alloc_marks > 0
2506  ? ctx->alloc_marks + ctx->alloc_marks / 2
2507  : 64);
2508  new_marks = realloc(ctx->marks, ctx->alloc_marks * sizeof(MD_MARK));
2509  if(new_marks == NULL) {
2510  MD_LOG("realloc() failed.");
2511  return NULL;
2512  }
2513 
2514  ctx->marks = new_marks;
2515  }
2516 
2517  return &ctx->marks[ctx->n_marks++];
2518 }
2519 
2520 #define PUSH_MARK_() \
2521  do { \
2522  mark = md_push_mark(ctx); \
2523  if(mark == NULL) { \
2524  ret = -1; \
2525  goto abort; \
2526  } \
2527  } while(0)
2528 
2529 #define PUSH_MARK(ch_, beg_, end_, flags_) \
2530  do { \
2531  PUSH_MARK_(); \
2532  mark->beg = (beg_); \
2533  mark->end = (end_); \
2534  mark->prev = -1; \
2535  mark->next = -1; \
2536  mark->ch = (char)(ch_); \
2537  mark->flags = (flags_); \
2538  } while(0)
2539 
2540 
2541 static void
2542 md_mark_chain_append(MD_CTX* ctx, MD_MARKCHAIN* chain, int mark_index)
2543 {
2544  if(chain->tail >= 0)
2545  ctx->marks[chain->tail].next = mark_index;
2546  else
2547  chain->head = mark_index;
2548 
2549  ctx->marks[mark_index].prev = chain->tail;
2550  ctx->marks[mark_index].next = -1;
2551  chain->tail = mark_index;
2552 }
2553 
2554 /* Sometimes, we need to store a pointer into the mark. It is quite rare
2555  * so we do not bother to make MD_MARK use union, and it can only happen
2556  * for dummy marks. */
2557 static inline void
2558 md_mark_store_ptr(MD_CTX* ctx, int mark_index, void* ptr)
2559 {
2560  MD_MARK* mark = &ctx->marks[mark_index];
2561  MD_ASSERT(mark->ch == 'D');
2562 
2563  /* Check only members beg and end are misused for this. */
2564  MD_ASSERT(sizeof(void*) <= 2 * sizeof(OFF));
2565  memcpy(mark, &ptr, sizeof(void*));
2566 }
2567 
2568 static inline void*
2569 md_mark_get_ptr(MD_CTX* ctx, int mark_index)
2570 {
2571  void* ptr;
2572  MD_MARK* mark = &ctx->marks[mark_index];
2573  MD_ASSERT(mark->ch == 'D');
2574  memcpy(&ptr, mark, sizeof(void*));
2575  return ptr;
2576 }
2577 
2578 static void
2579 md_resolve_range(MD_CTX* ctx, MD_MARKCHAIN* chain, int opener_index, int closer_index)
2580 {
2581  MD_MARK* opener = &ctx->marks[opener_index];
2582  MD_MARK* closer = &ctx->marks[closer_index];
2583 
2584  /* Remove opener from the list of openers. */
2585  if(chain != NULL) {
2586  if(opener->prev >= 0)
2587  ctx->marks[opener->prev].next = opener->next;
2588  else
2589  chain->head = opener->next;
2590 
2591  if(opener->next >= 0)
2592  ctx->marks[opener->next].prev = opener->prev;
2593  else
2594  chain->tail = opener->prev;
2595  }
2596 
2597  /* Interconnect opener and closer and mark both as resolved. */
2598  opener->next = closer_index;
2600  closer->prev = opener_index;
2602 }
2603 
2604 
2605 #define MD_ROLLBACK_ALL 0
2606 #define MD_ROLLBACK_CROSSING 1
2607 
2608 /* In the range ctx->marks[opener_index] ... [closer_index], undo some or all
2609  * resolvings accordingly to these rules:
2610  *
2611  * (1) All openers BEFORE the range corresponding to any closer inside the
2612  * range are un-resolved and they are re-added to their respective chains
2613  * of unresolved openers. This ensures we can reuse the opener for closers
2614  * AFTER the range.
2615  *
2616  * (2) If 'how' is MD_ROLLBACK_ALL, then ALL resolved marks inside the range
2617  * are discarded.
2618  *
2619  * (3) If 'how' is MD_ROLLBACK_CROSSING, only closers with openers handled
2620  * in (1) are discarded. I.e. pairs of openers and closers which are both
2621  * inside the range are retained as well as any unpaired marks.
2622  */
2623 static void
2624 md_rollback(MD_CTX* ctx, int opener_index, int closer_index, int how)
2625 {
2626  int i;
2627  int mark_index;
2628 
2629  /* Cut all unresolved openers at the mark index. */
2630  for(i = OPENERS_CHAIN_FIRST; i < OPENERS_CHAIN_LAST+1; i++) {
2631  MD_MARKCHAIN* chain = &ctx->mark_chains[i];
2632 
2633  while(chain->tail >= opener_index)
2634  chain->tail = ctx->marks[chain->tail].prev;
2635 
2636  if(chain->tail >= 0)
2637  ctx->marks[chain->tail].next = -1;
2638  else
2639  chain->head = -1;
2640  }
2641 
2642  /* Go backwards so that unresolved openers are re-added into their
2643  * respective chains, in the right order. */
2644  mark_index = closer_index - 1;
2645  while(mark_index > opener_index) {
2646  MD_MARK* mark = &ctx->marks[mark_index];
2647  int mark_flags = mark->flags;
2648  int discard_flag = (how == MD_ROLLBACK_ALL);
2649 
2650  if(mark->flags & MD_MARK_CLOSER) {
2651  int mark_opener_index = mark->prev;
2652 
2653  /* Undo opener BEFORE the range. */
2654  if(mark_opener_index < opener_index) {
2655  MD_MARK* mark_opener = &ctx->marks[mark_opener_index];
2656  MD_MARKCHAIN* chain;
2657 
2658  mark_opener->flags &= ~(MD_MARK_OPENER | MD_MARK_CLOSER | MD_MARK_RESOLVED);
2659  chain = md_mark_chain(ctx, opener_index);
2660  if(chain != NULL) {
2661  md_mark_chain_append(ctx, chain, mark_opener_index);
2662  discard_flag = 1;
2663  }
2664  }
2665  }
2666 
2667  /* And reset our flags. */
2668  if(discard_flag)
2670 
2671  /* Jump as far as we can over unresolved or non-interesting marks. */
2672  switch(how) {
2673  case MD_ROLLBACK_CROSSING:
2674  if((mark_flags & MD_MARK_CLOSER) && mark->prev > opener_index) {
2675  /* If we are closer with opener INSIDE the range, there may
2676  * not be any other crosser inside the subrange. */
2677  mark_index = mark->prev;
2678  break;
2679  }
2680  MD_FALLTHROUGH();
2681  default:
2682  mark_index--;
2683  break;
2684  }
2685  }
2686 }
2687 
2688 static void
2689 md_build_mark_char_map(MD_CTX* ctx)
2690 {
2691  memset(ctx->mark_char_map, 0, sizeof(ctx->mark_char_map));
2692 
2693  ctx->mark_char_map['\\'] = 1;
2694  ctx->mark_char_map['*'] = 1;
2695  ctx->mark_char_map['_'] = 1;
2696  ctx->mark_char_map['`'] = 1;
2697  ctx->mark_char_map['&'] = 1;
2698  ctx->mark_char_map[';'] = 1;
2699  ctx->mark_char_map['<'] = 1;
2700  ctx->mark_char_map['>'] = 1;
2701  ctx->mark_char_map['['] = 1;
2702  ctx->mark_char_map['!'] = 1;
2703  ctx->mark_char_map[']'] = 1;
2704  ctx->mark_char_map['\0'] = 1;
2705 
2707  ctx->mark_char_map['~'] = 1;
2708 
2710  ctx->mark_char_map['$'] = 1;
2711 
2713  ctx->mark_char_map['@'] = 1;
2714 
2716  ctx->mark_char_map[':'] = 1;
2717 
2719  ctx->mark_char_map['.'] = 1;
2720 
2721  if((ctx->parser.flags & MD_FLAG_TABLES) || (ctx->parser.flags & MD_FLAG_WIKILINKS))
2722  ctx->mark_char_map['|'] = 1;
2723 
2725  int i;
2726 
2727  for(i = 0; i < (int) sizeof(ctx->mark_char_map); i++) {
2728  if(ISWHITESPACE_(i))
2729  ctx->mark_char_map[i] = 1;
2730  }
2731  }
2732 }
2733 
2734 /* We limit code span marks to lower than 32 backticks. This solves the
2735  * pathologic case of too many openers, each of different length: Their
2736  * resolving would be then O(n^2). */
2737 #define CODESPAN_MARK_MAXLEN 32
2738 
2739 static int
2740 md_is_code_span(MD_CTX* ctx, const MD_LINE* lines, int n_lines, OFF beg,
2741  OFF* p_opener_beg, OFF* p_opener_end,
2742  OFF* p_closer_beg, OFF* p_closer_end,
2743  OFF last_potential_closers[CODESPAN_MARK_MAXLEN],
2744  int* p_reached_paragraph_end)
2745 {
2746  OFF opener_beg = beg;
2747  OFF opener_end;
2748  OFF closer_beg;
2749  OFF closer_end;
2750  SZ mark_len;
2751  OFF line_end;
2752  int has_space_after_opener = FALSE;
2753  int has_eol_after_opener = FALSE;
2754  int has_space_before_closer = FALSE;
2755  int has_eol_before_closer = FALSE;
2756  int has_only_space = TRUE;
2757  int line_index = 0;
2758 
2759  line_end = lines[0].end;
2760  opener_end = opener_beg;
2761  while(opener_end < line_end && CH(opener_end) == _T('`'))
2762  opener_end++;
2763  has_space_after_opener = (opener_end < line_end && CH(opener_end) == _T(' '));
2764  has_eol_after_opener = (opener_end == line_end);
2765 
2766  /* The caller needs to know end of the opening mark even if we fail. */
2767  *p_opener_end = opener_end;
2768 
2769  mark_len = opener_end - opener_beg;
2770  if(mark_len > CODESPAN_MARK_MAXLEN)
2771  return FALSE;
2772 
2773  /* Check whether we already know there is no closer of this length.
2774  * If so, re-scan does no sense. This fixes issue #59. */
2775  if(last_potential_closers[mark_len-1] >= lines[n_lines-1].end ||
2776  (*p_reached_paragraph_end && last_potential_closers[mark_len-1] < opener_end))
2777  return FALSE;
2778 
2779  closer_beg = opener_end;
2780  closer_end = opener_end;
2781 
2782  /* Find closer mark. */
2783  while(TRUE) {
2784  while(closer_beg < line_end && CH(closer_beg) != _T('`')) {
2785  if(CH(closer_beg) != _T(' '))
2786  has_only_space = FALSE;
2787  closer_beg++;
2788  }
2789  closer_end = closer_beg;
2790  while(closer_end < line_end && CH(closer_end) == _T('`'))
2791  closer_end++;
2792 
2793  if(closer_end - closer_beg == mark_len) {
2794  /* Success. */
2795  has_space_before_closer = (closer_beg > lines[line_index].beg && CH(closer_beg-1) == _T(' '));
2796  has_eol_before_closer = (closer_beg == lines[line_index].beg);
2797  break;
2798  }
2799 
2800  if(closer_end - closer_beg > 0) {
2801  /* We have found a back-tick which is not part of the closer. */
2802  has_only_space = FALSE;
2803 
2804  /* But if we eventually fail, remember it as a potential closer
2805  * of its own length for future attempts. This mitigates needs for
2806  * rescans. */
2807  if(closer_end - closer_beg < CODESPAN_MARK_MAXLEN) {
2808  if(closer_beg > last_potential_closers[closer_end - closer_beg - 1])
2809  last_potential_closers[closer_end - closer_beg - 1] = closer_beg;
2810  }
2811  }
2812 
2813  if(closer_end >= line_end) {
2814  line_index++;
2815  if(line_index >= n_lines) {
2816  /* Reached end of the paragraph and still nothing. */
2817  *p_reached_paragraph_end = TRUE;
2818  return FALSE;
2819  }
2820  /* Try on the next line. */
2821  line_end = lines[line_index].end;
2822  closer_beg = lines[line_index].beg;
2823  } else {
2824  closer_beg = closer_end;
2825  }
2826  }
2827 
2828  /* If there is a space or a new line both after and before the opener
2829  * (and if the code span is not made of spaces only), consume one initial
2830  * and one trailing space as part of the marks. */
2831  if(!has_only_space &&
2832  (has_space_after_opener || has_eol_after_opener) &&
2833  (has_space_before_closer || has_eol_before_closer))
2834  {
2835  if(has_space_after_opener)
2836  opener_end++;
2837  else
2838  opener_end = lines[1].beg;
2839 
2840  if(has_space_before_closer)
2841  closer_beg--;
2842  else {
2843  closer_beg = lines[line_index-1].end;
2844  /* We need to eat the preceding "\r\n" but not any line trailing
2845  * spaces. */
2846  while(closer_beg < ctx->size && ISBLANK(closer_beg))
2847  closer_beg++;
2848  }
2849  }
2850 
2851  *p_opener_beg = opener_beg;
2852  *p_opener_end = opener_end;
2853  *p_closer_beg = closer_beg;
2854  *p_closer_end = closer_end;
2855  return TRUE;
2856 }
2857 
2858 static int
2859 md_is_autolink_uri(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2860 {
2861  OFF off = beg+1;
2862 
2863  MD_ASSERT(CH(beg) == _T('<'));
2864 
2865  /* Check for scheme. */
2866  if(off >= max_end || !ISASCII(off))
2867  return FALSE;
2868  off++;
2869  while(1) {
2870  if(off >= max_end)
2871  return FALSE;
2872  if(off - beg > 32)
2873  return FALSE;
2874  if(CH(off) == _T(':') && off - beg >= 3)
2875  break;
2876  if(!ISALNUM(off) && CH(off) != _T('+') && CH(off) != _T('-') && CH(off) != _T('.'))
2877  return FALSE;
2878  off++;
2879  }
2880 
2881  /* Check the path after the scheme. */
2882  while(off < max_end && CH(off) != _T('>')) {
2883  if(ISWHITESPACE(off) || ISCNTRL(off) || CH(off) == _T('<'))
2884  return FALSE;
2885  off++;
2886  }
2887 
2888  if(off >= max_end)
2889  return FALSE;
2890 
2891  MD_ASSERT(CH(off) == _T('>'));
2892  *p_end = off+1;
2893  return TRUE;
2894 }
2895 
2896 static int
2897 md_is_autolink_email(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end)
2898 {
2899  OFF off = beg + 1;
2900  int label_len;
2901 
2902  MD_ASSERT(CH(beg) == _T('<'));
2903 
2904  /* The code should correspond to this regexp:
2905  /^[a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+
2906  @[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?
2907  (?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$/
2908  */
2909 
2910  /* Username (before '@'). */
2911  while(off < max_end && (ISALNUM(off) || ISANYOF(off, _T(".!#$%&'*+/=?^_`{|}~-"))))
2912  off++;
2913  if(off <= beg+1)
2914  return FALSE;
2915 
2916  /* '@' */
2917  if(off >= max_end || CH(off) != _T('@'))
2918  return FALSE;
2919  off++;
2920 
2921  /* Labels delimited with '.'; each label is sequence of 1 - 63 alnum
2922  * characters or '-', but '-' is not allowed as first or last char. */
2923  label_len = 0;
2924  while(off < max_end) {
2925  if(ISALNUM(off))
2926  label_len++;
2927  else if(CH(off) == _T('-') && label_len > 0)
2928  label_len++;
2929  else if(CH(off) == _T('.') && label_len > 0 && CH(off-1) != _T('-'))
2930  label_len = 0;
2931  else
2932  break;
2933 
2934  if(label_len > 63)
2935  return FALSE;
2936 
2937  off++;
2938  }
2939 
2940  if(label_len <= 0 || off >= max_end || CH(off) != _T('>') || CH(off-1) == _T('-'))
2941  return FALSE;
2942 
2943  *p_end = off+1;
2944  return TRUE;
2945 }
2946 
2947 static int
2948 md_is_autolink(MD_CTX* ctx, OFF beg, OFF max_end, OFF* p_end, int* p_missing_mailto)
2949 {
2950  if(md_is_autolink_uri(ctx, beg, max_end, p_end)) {
2951  *p_missing_mailto = FALSE;
2952  return TRUE;
2953  }
2954 
2955  if(md_is_autolink_email(ctx, beg, max_end, p_end)) {
2956  *p_missing_mailto = TRUE;
2957  return TRUE;
2958  }
2959 
2960  return FALSE;
2961 }
2962 
2963 static int
2964 md_collect_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
2965 {
2966  int i;
2967  int ret = 0;
2968  MD_MARK* mark;
2969  OFF codespan_last_potential_closers[CODESPAN_MARK_MAXLEN] = { 0 };
2970  int codespan_scanned_till_paragraph_end = FALSE;
2971 
2972  for(i = 0; i < n_lines; i++) {
2973  const MD_LINE* line = &lines[i];
2974  OFF off = line->beg;
2975  OFF line_end = line->end;
2976 
2977  while(TRUE) {
2978  CHAR ch;
2979 
2980 #ifdef MD4C_USE_UTF16
2981  /* For UTF-16, mark_char_map[] covers only ASCII. */
2982  #define IS_MARK_CHAR(off) ((CH(off) < SIZEOF_ARRAY(ctx->mark_char_map)) && \
2983  (ctx->mark_char_map[(unsigned char) CH(off)]))
2984 #else
2985  /* For 8-bit encodings, mark_char_map[] covers all 256 elements. */
2986  #define IS_MARK_CHAR(off) (ctx->mark_char_map[(unsigned char) CH(off)])
2987 #endif
2988 
2989  /* Optimization: Use some loop unrolling. */
2990  while(off + 3 < line_end && !IS_MARK_CHAR(off+0) && !IS_MARK_CHAR(off+1)
2991  && !IS_MARK_CHAR(off+2) && !IS_MARK_CHAR(off+3))
2992  off += 4;
2993  while(off < line_end && !IS_MARK_CHAR(off+0))
2994  off++;
2995 
2996  if(off >= line_end)
2997  break;
2998 
2999  ch = CH(off);
3000 
3001  /* A backslash escape.
3002  * It can go beyond line->end as it may involve escaped new
3003  * line to form a hard break. */
3004  if(ch == _T('\\') && off+1 < ctx->size && (ISPUNCT(off+1) || ISNEWLINE(off+1))) {
3005  /* Hard-break cannot be on the last line of the block. */
3006  if(!ISNEWLINE(off+1) || i+1 < n_lines)
3007  PUSH_MARK(ch, off, off+2, MD_MARK_RESOLVED);
3008  off += 2;
3009  continue;
3010  }
3011 
3012  /* A potential (string) emphasis start/end. */
3013  if(ch == _T('*') || ch == _T('_')) {
3014  OFF tmp = off+1;
3015  int left_level; /* What precedes: 0 = whitespace; 1 = punctuation; 2 = other char. */
3016  int right_level; /* What follows: 0 = whitespace; 1 = punctuation; 2 = other char. */
3017 
3018  while(tmp < line_end && CH(tmp) == ch)
3019  tmp++;
3020 
3021  if(off == line->beg || ISUNICODEWHITESPACEBEFORE(off))
3022  left_level = 0;
3023  else if(ISUNICODEPUNCTBEFORE(off))
3024  left_level = 1;
3025  else
3026  left_level = 2;
3027 
3028  if(tmp == line_end || ISUNICODEWHITESPACE(tmp))
3029  right_level = 0;
3030  else if(ISUNICODEPUNCT(tmp))
3031  right_level = 1;
3032  else
3033  right_level = 2;
3034 
3035  /* Intra-word underscore doesn't have special meaning. */
3036  if(ch == _T('_') && left_level == 2 && right_level == 2) {
3037  left_level = 0;
3038  right_level = 0;
3039  }
3040 
3041  if(left_level != 0 || right_level != 0) {
3042  unsigned flags = 0;
3043 
3044  if(left_level > 0 && left_level >= right_level)
3046  if(right_level > 0 && right_level >= left_level)
3048  if(left_level == 2 && right_level == 2)
3050 
3051  /* For "the rule of three" we need to remember the original
3052  * size of the mark (modulo three), before we potentially
3053  * split the mark when being later resolved partially by some
3054  * shorter closer. */
3055  switch((tmp - off) % 3) {
3056  case 0: flags |= MD_MARK_EMPH_MOD3_0; break;
3057  case 1: flags |= MD_MARK_EMPH_MOD3_1; break;
3058  case 2: flags |= MD_MARK_EMPH_MOD3_2; break;
3059  }
3060 
3061  PUSH_MARK(ch, off, tmp, flags);
3062 
3063  /* During resolving, multiple asterisks may have to be
3064  * split into independent span start/ends. Consider e.g.
3065  * "**foo* bar*". Therefore we push also some empty dummy
3066  * marks to have enough space for that. */
3067  off++;
3068  while(off < tmp) {
3069  PUSH_MARK('D', off, off, 0);
3070  off++;
3071  }
3072  continue;
3073  }
3074 
3075  off = tmp;
3076  continue;
3077  }
3078 
3079  /* A potential code span start/end. */
3080  if(ch == _T('`')) {
3081  OFF opener_beg, opener_end;
3082  OFF closer_beg, closer_end;
3083  int is_code_span;
3084 
3085  is_code_span = md_is_code_span(ctx, lines + i, n_lines - i, off,
3086  &opener_beg, &opener_end, &closer_beg, &closer_end,
3087  codespan_last_potential_closers,
3088  &codespan_scanned_till_paragraph_end);
3089  if(is_code_span) {
3090  PUSH_MARK(_T('`'), opener_beg, opener_end, MD_MARK_OPENER | MD_MARK_RESOLVED);
3091  PUSH_MARK(_T('`'), closer_beg, closer_end, MD_MARK_CLOSER | MD_MARK_RESOLVED);
3092  ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
3093  ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
3094 
3095  off = closer_end;
3096 
3097  /* Advance the current line accordingly. */
3098  while(off > line_end) {
3099  i++;
3100  line++;
3101  line_end = line->end;
3102  }
3103  continue;
3104  }
3105 
3106  off = opener_end;
3107  continue;
3108  }
3109 
3110  /* A potential entity start. */
3111  if(ch == _T('&')) {
3112  PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
3113  off++;
3114  continue;
3115  }
3116 
3117  /* A potential entity end. */
3118  if(ch == _T(';')) {
3119  /* We surely cannot be entity unless the previous mark is '&'. */
3120  if(ctx->n_marks > 0 && ctx->marks[ctx->n_marks-1].ch == _T('&'))
3121  PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER);
3122 
3123  off++;
3124  continue;
3125  }
3126 
3127  /* A potential autolink or raw HTML start/end. */
3128  if(ch == _T('<')) {
3129  int is_autolink;
3130  OFF autolink_end;
3131  int missing_mailto;
3132 
3133  if(!(ctx->parser.flags & MD_FLAG_NOHTMLSPANS)) {
3134  int is_html;
3135  OFF html_end;
3136 
3137  /* Given the nature of the raw HTML, we have to recognize
3138  * it here. Doing so later in md_analyze_lt_gt() could
3139  * open can of worms of quadratic complexity. */
3140  is_html = md_is_html_any(ctx, lines + i, n_lines - i, off,
3141  lines[n_lines-1].end, &html_end);
3142  if(is_html) {
3143  PUSH_MARK(_T('<'), off, off, MD_MARK_OPENER | MD_MARK_RESOLVED);
3144  PUSH_MARK(_T('>'), html_end, html_end, MD_MARK_CLOSER | MD_MARK_RESOLVED);
3145  ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
3146  ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
3147  off = html_end;
3148 
3149  /* Advance the current line accordingly. */
3150  while(off > line_end) {
3151  i++;
3152  line++;
3153  line_end = line->end;
3154  }
3155  continue;
3156  }
3157  }
3158 
3159  is_autolink = md_is_autolink(ctx, off, lines[n_lines-1].end,
3160  &autolink_end, &missing_mailto);
3161  if(is_autolink) {
3162  PUSH_MARK((missing_mailto ? _T('@') : _T('<')), off, off+1,
3164  PUSH_MARK(_T('>'), autolink_end-1, autolink_end,
3166  ctx->marks[ctx->n_marks-2].next = ctx->n_marks-1;
3167  ctx->marks[ctx->n_marks-1].prev = ctx->n_marks-2;
3168  off = autolink_end;
3169  continue;
3170  }
3171 
3172  off++;
3173  continue;
3174  }
3175 
3176  /* A potential link or its part. */
3177  if(ch == _T('[') || (ch == _T('!') && off+1 < line_end && CH(off+1) == _T('['))) {
3178  OFF tmp = (ch == _T('[') ? off+1 : off+2);
3180  off = tmp;
3181  /* Two dummies to make enough place for data we need if it is
3182  * a link. */
3183  PUSH_MARK('D', off, off, 0);
3184  PUSH_MARK('D', off, off, 0);
3185  continue;
3186  }
3187  if(ch == _T(']')) {
3188  PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_CLOSER);
3189  off++;
3190  continue;
3191  }
3192 
3193  /* A potential permissive e-mail autolink. */
3194  if(ch == _T('@')) {
3195  if(line->beg + 1 <= off && ISALNUM(off-1) &&
3196  off + 3 < line->end && ISALNUM(off+1))
3197  {
3198  PUSH_MARK(ch, off, off+1, MD_MARK_POTENTIAL_OPENER);
3199  /* Push a dummy as a reserve for a closer. */
3200  PUSH_MARK('D', off, off, 0);
3201  }
3202 
3203  off++;
3204  continue;
3205  }
3206 
3207  /* A potential permissive URL autolink. */
3208  if(ch == _T(':')) {
3209  static struct {
3210  const CHAR* scheme;
3211  SZ scheme_size;
3212  const CHAR* suffix;
3213  SZ suffix_size;
3214  } scheme_map[] = {
3215  /* In the order from the most frequently used, arguably. */
3216  { _T("http"), 4, _T("//"), 2 },
3217  { _T("https"), 5, _T("//"), 2 },
3218  { _T("ftp"), 3, _T("//"), 2 }
3219  };
3220  int scheme_index;
3221 
3222  for(scheme_index = 0; scheme_index < (int) SIZEOF_ARRAY(scheme_map); scheme_index++) {
3223  const CHAR* scheme = scheme_map[scheme_index].scheme;
3224  const SZ scheme_size = scheme_map[scheme_index].scheme_size;
3225  const CHAR* suffix = scheme_map[scheme_index].suffix;
3226  const SZ suffix_size = scheme_map[scheme_index].suffix_size;
3227 
3228  if(line->beg + scheme_size <= off && md_ascii_eq(STR(off-scheme_size), scheme, scheme_size) &&
3229  (line->beg + scheme_size == off || ISWHITESPACE(off-scheme_size-1) || ISANYOF(off-scheme_size-1, _T("*_~(["))) &&
3230  off + 1 + suffix_size < line->end && md_ascii_eq(STR(off+1), suffix, suffix_size))
3231  {
3232  PUSH_MARK(ch, off-scheme_size, off+1+suffix_size, MD_MARK_POTENTIAL_OPENER);
3233  /* Push a dummy as a reserve for a closer. */
3234  PUSH_MARK('D', off, off, 0);
3235  off += 1 + suffix_size;
3236  break;
3237  }
3238  }
3239 
3240  off++;
3241  continue;
3242  }
3243 
3244  /* A potential permissive WWW autolink. */
3245  if(ch == _T('.')) {
3246  if(line->beg + 3 <= off && md_ascii_eq(STR(off-3), _T("www"), 3) &&
3247  (line->beg + 3 == off || ISWHITESPACE(off-4) || ISANYOF(off-4, _T("*_~(["))) &&
3248  off + 1 < line_end)
3249  {
3250  PUSH_MARK(ch, off-3, off+1, MD_MARK_POTENTIAL_OPENER);
3251  /* Push a dummy as a reserve for a closer. */
3252  PUSH_MARK('D', off, off, 0);
3253  off++;
3254  continue;
3255  }
3256 
3257  off++;
3258  continue;
3259  }
3260 
3261  /* A potential table cell boundary or wiki link label delimiter. */
3262  if((table_mode || ctx->parser.flags & MD_FLAG_WIKILINKS) && ch == _T('|')) {
3263  PUSH_MARK(ch, off, off+1, 0);
3264  off++;
3265  continue;
3266  }
3267 
3268  /* A potential strikethrough start/end. */
3269  if(ch == _T('~')) {
3270  OFF tmp = off+1;
3271 
3272  while(tmp < line_end && CH(tmp) == _T('~'))
3273  tmp++;
3274 
3275  if(tmp - off < 3) {
3276  unsigned flags = 0;
3277 
3278  if(tmp < line_end && !ISUNICODEWHITESPACE(tmp))
3280  if(off > line->beg && !ISUNICODEWHITESPACEBEFORE(off))
3282  if(flags != 0)
3283  PUSH_MARK(ch, off, tmp, flags);
3284  }
3285 
3286  off = tmp;
3287  continue;
3288  }
3289 
3290  /* A potential equation start/end */
3291  if(ch == _T('$')) {
3292  /* We can have at most two consecutive $ signs,
3293  * where two dollar signs signify a display equation. */
3294  OFF tmp = off+1;
3295 
3296  while(tmp < line_end && CH(tmp) == _T('$'))
3297  tmp++;
3298 
3299  if (tmp - off <= 2)
3301  off = tmp;
3302  continue;
3303  }
3304 
3305  /* Turn non-trivial whitespace into single space. */
3306  if(ISWHITESPACE_(ch)) {
3307  OFF tmp = off+1;
3308 
3309  while(tmp < line_end && ISWHITESPACE(tmp))
3310  tmp++;
3311 
3312  if(tmp - off > 1 || ch != _T(' '))
3313  PUSH_MARK(ch, off, tmp, MD_MARK_RESOLVED);
3314 
3315  off = tmp;
3316  continue;
3317  }
3318 
3319  /* NULL character. */
3320  if(ch == _T('\0')) {
3321  PUSH_MARK(ch, off, off+1, MD_MARK_RESOLVED);
3322  off++;
3323  continue;
3324  }
3325 
3326  off++;
3327  }
3328  }
3329 
3330  /* Add a dummy mark at the end of the mark vector to simplify
3331  * process_inlines(). */
3332  PUSH_MARK(127, ctx->size, ctx->size, MD_MARK_RESOLVED);
3333 
3334 abort:
3335  return ret;
3336 }
3337 
3338 static void
3339 md_analyze_bracket(MD_CTX* ctx, int mark_index)
3340 {
3341  /* We cannot really resolve links here as for that we would need
3342  * more context. E.g. a following pair of brackets (reference link),
3343  * or enclosing pair of brackets (if the inner is the link, the outer
3344  * one cannot be.)
3345  *
3346  * Therefore we here only construct a list of resolved '[' ']' pairs
3347  * ordered by position of the closer. This allows ur to analyze what is
3348  * or is not link in the right order, from inside to outside in case
3349  * of nested brackets.
3350  *
3351  * The resolving itself is deferred into md_resolve_links().
3352  */
3353 
3354  MD_MARK* mark = &ctx->marks[mark_index];
3355 
3356  if(mark->flags & MD_MARK_POTENTIAL_OPENER) {
3357  md_mark_chain_append(ctx, &BRACKET_OPENERS, mark_index);
3358  return;
3359  }
3360 
3361  if(BRACKET_OPENERS.tail >= 0) {
3362  /* Pop the opener from the chain. */
3363  int opener_index = BRACKET_OPENERS.tail;
3364  MD_MARK* opener = &ctx->marks[opener_index];
3365  if(opener->prev >= 0)
3366  ctx->marks[opener->prev].next = -1;
3367  else
3368  BRACKET_OPENERS.head = -1;
3369  BRACKET_OPENERS.tail = opener->prev;
3370 
3371  /* Interconnect the opener and closer. */
3372  opener->next = mark_index;
3373  mark->prev = opener_index;
3374 
3375  /* Add the pair into chain of potential links for md_resolve_links().
3376  * Note we misuse opener->prev for this as opener->next points to its
3377  * closer. */
3378  if(ctx->unresolved_link_tail >= 0)
3379  ctx->marks[ctx->unresolved_link_tail].prev = opener_index;
3380  else
3381  ctx->unresolved_link_head = opener_index;
3382  ctx->unresolved_link_tail = opener_index;
3383  opener->prev = -1;
3384  }
3385 }
3386 
3387 /* Forward declaration. */
3388 static void md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
3389  int mark_beg, int mark_end);
3390 
3391 static int
3392 md_resolve_links(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
3393 {
3394  int opener_index = ctx->unresolved_link_head;
3395  OFF last_link_beg = 0;
3396  OFF last_link_end = 0;
3397  OFF last_img_beg = 0;
3398  OFF last_img_end = 0;
3399 
3400  while(opener_index >= 0) {
3401  MD_MARK* opener = &ctx->marks[opener_index];
3402  int closer_index = opener->next;
3403  MD_MARK* closer = &ctx->marks[closer_index];
3404  int next_index = opener->prev;
3405  MD_MARK* next_opener;
3406  MD_MARK* next_closer;
3408  int is_link = FALSE;
3409 
3410  if(next_index >= 0) {
3411  next_opener = &ctx->marks[next_index];
3412  next_closer = &ctx->marks[next_opener->next];
3413  } else {
3414  next_opener = NULL;
3415  next_closer = NULL;
3416  }
3417 
3418  /* If nested ("[ [ ] ]"), we need to make sure that:
3419  * - The outer does not end inside of (...) belonging to the inner.
3420  * - The outer cannot be link if the inner is link (i.e. not image).
3421  *
3422  * (Note we here analyze from inner to outer as the marks are ordered
3423  * by closer->beg.)
3424  */
3425  if((opener->beg < last_link_beg && closer->end < last_link_end) ||
3426  (opener->beg < last_img_beg && closer->end < last_img_end) ||
3427  (opener->beg < last_link_end && opener->ch == '['))
3428  {
3429  opener_index = next_index;
3430  continue;
3431  }
3432 
3433  /* Recognize and resolve wiki links.
3434  * Wiki-links maybe '[[destination]]' or '[[destination|label]]'.
3435  */
3436  if ((ctx->parser.flags & MD_FLAG_WIKILINKS) &&
3437  (opener->end - opener->beg == 1) && /* not image */
3438  next_opener != NULL && /* double '[' opener */
3439  next_opener->ch == '[' &&
3440  (next_opener->beg == opener->beg - 1) &&
3441  (next_opener->end - next_opener->beg == 1) &&
3442  next_closer != NULL && /* double ']' closer */
3443  next_closer->ch == ']' &&
3444  (next_closer->beg == closer->beg + 1) &&
3445  (next_closer->end - next_closer->beg == 1))
3446  {
3447  MD_MARK* delim = NULL;
3448  int delim_index;
3449  OFF dest_beg, dest_end;
3450 
3451  is_link = TRUE;
3452 
3453  /* We don't allow destination to be longer than 100 characters.
3454  * Lets scan to see whether there is '|'. (If not then the whole
3455  * wiki-link has to be below the 100 characters.) */
3456  delim_index = opener_index + 1;
3457  while(delim_index < closer_index) {
3458  MD_MARK* m = &ctx->marks[delim_index];
3459  if(m->ch == '|') {
3460  delim = m;
3461  break;
3462  }
3463  if(m->ch != 'D' && m->beg - opener->end > 100)
3464  break;
3465  delim_index++;
3466  }
3467  dest_beg = opener->end;
3468  dest_end = (delim != NULL) ? delim->beg : closer->beg;
3469  if(dest_end - dest_beg == 0 || dest_end - dest_beg > 100)
3470  is_link = FALSE;
3471 
3472  /* There may not be any new line in the destination. */
3473  if(is_link) {
3474  OFF off;
3475  for(off = dest_beg; off < dest_end; off++) {
3476  if(ISNEWLINE(off)) {
3477  is_link = FALSE;
3478  break;
3479  }
3480  }
3481  }
3482 
3483  if(is_link) {
3484  if(delim != NULL) {
3485  if(delim->end < closer->beg) {
3486  opener->end = delim->beg;
3487  } else {
3488  /* The pipe is just before the closer: [[foo|]] */
3489  closer->beg = delim->beg;
3490  delim = NULL;
3491  }
3492  }
3493 
3494  opener->beg = next_opener->beg;
3495  opener->next = closer_index;
3497 
3498  closer->end = next_closer->end;
3499  closer->prev = opener_index;
3501 
3502  last_link_beg = opener->beg;
3503  last_link_end = closer->end;
3504 
3505  if(delim != NULL) {
3506  delim->flags |= MD_MARK_RESOLVED;
3507  md_rollback(ctx, opener_index, delim_index, MD_ROLLBACK_ALL);
3508  md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index);
3509  } else {
3510  md_rollback(ctx, opener_index, closer_index, MD_ROLLBACK_ALL);
3511  }
3512 
3513  opener_index = next_opener->prev;
3514  continue;
3515  }
3516  }
3517 
3518  if(next_opener != NULL && next_opener->beg == closer->end) {
3519  if(next_closer->beg > closer->end + 1) {
3520  /* Might be full reference link. */
3521  is_link = md_is_link_reference(ctx, lines, n_lines, next_opener->beg, next_closer->end, &attr);
3522  } else {
3523  /* Might be shortcut reference link. */
3524  is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
3525  }
3526 
3527  if(is_link < 0)
3528  return -1;
3529 
3530  if(is_link) {
3531  /* Eat the 2nd "[...]". */
3532  closer->end = next_closer->end;
3533 
3534  /* Do not analyze the label as a standalone link in the next
3535  * iteration. */
3536  next_index = ctx->marks[next_index].prev;
3537  }
3538  } else {
3539  if(closer->end < ctx->size && CH(closer->end) == _T('(')) {
3540  /* Might be inline link. */
3541  OFF inline_link_end = UINT_MAX;
3542 
3543  is_link = md_is_inline_link_spec(ctx, lines, n_lines, closer->end, &inline_link_end, &attr);
3544  if(is_link < 0)
3545  return -1;
3546 
3547  /* Check the closing ')' is not inside an already resolved range
3548  * (i.e. a range with a higher priority), e.g. a code span. */
3549  if(is_link) {
3550  int i = closer_index + 1;
3551 
3552  while(i < ctx->n_marks) {
3553  MD_MARK* mark = &ctx->marks[i];
3554 
3555  if(mark->beg >= inline_link_end)
3556  break;
3558  if(ctx->marks[mark->next].beg >= inline_link_end) {
3559  /* Cancel the link status. */
3560  if(attr.title_needs_free)
3561  free(attr.title);
3562  is_link = FALSE;
3563  break;
3564  }
3565 
3566  i = mark->next + 1;
3567  } else {
3568  i++;
3569  }
3570  }
3571  }
3572 
3573  if(is_link) {
3574  /* Eat the "(...)" */
3575  closer->end = inline_link_end;
3576  }
3577  }
3578 
3579  if(!is_link) {
3580  /* Might be collapsed reference link. */
3581  is_link = md_is_link_reference(ctx, lines, n_lines, opener->beg, closer->end, &attr);
3582  if(is_link < 0)
3583  return -1;
3584  }
3585  }
3586 
3587  if(is_link) {
3588  /* Resolve the brackets as a link. */
3591 
3592  /* If it is a link, we store the destination and title in the two
3593  * dummy marks after the opener. */
3594  MD_ASSERT(ctx->marks[opener_index+1].ch == 'D');
3595  ctx->marks[opener_index+1].beg = attr.dest_beg;
3596  ctx->marks[opener_index+1].end = attr.dest_end;
3597 
3598  MD_ASSERT(ctx->marks[opener_index+2].ch == 'D');
3599  md_mark_store_ptr(ctx, opener_index+2, attr.title);
3600  /* The title might or might not have been allocated for us. */
3601  if(attr.title_needs_free)
3602  md_mark_chain_append(ctx, &PTR_CHAIN, opener_index+2);
3603  ctx->marks[opener_index+2].prev = attr.title_size;
3604 
3605  if(opener->ch == '[') {
3606  last_link_beg = opener->beg;
3607  last_link_end = closer->end;
3608  } else {
3609  last_img_beg = opener->beg;
3610  last_img_end = closer->end;
3611  }
3612 
3613  md_analyze_link_contents(ctx, lines, n_lines, opener_index+1, closer_index);
3614 
3615  /* If the link text is formed by nothing but permissive autolink,
3616  * suppress the autolink.
3617  * See https://github.com/mity/md4c/issues/152 for more info. */
3619  MD_MARK* first_nested;
3620  MD_MARK* last_nested;
3621 
3622  first_nested = opener + 1;
3623  while(first_nested->ch == _T('D') && first_nested < closer)
3624  first_nested++;
3625 
3626  last_nested = closer - 1;
3627  while(first_nested->ch == _T('D') && last_nested > opener)
3628  last_nested--;
3629 
3630  if((first_nested->flags & MD_MARK_RESOLVED) &&
3631  first_nested->beg == opener->end &&
3632  ISANYOF_(first_nested->ch, _T("@:.")) &&
3633  first_nested->next == (last_nested - ctx->marks) &&
3634  last_nested->end == closer->beg)
3635  {
3636  first_nested->ch = _T('D');
3637  first_nested->flags &= ~MD_MARK_RESOLVED;
3638  last_nested->ch = _T('D');
3639  last_nested->flags &= ~MD_MARK_RESOLVED;
3640  }
3641  }
3642  }
3643 
3644  opener_index = next_index;
3645  }
3646 
3647  return 0;
3648 }
3649 
3650 /* Analyze whether the mark '&' starts a HTML entity.
3651  * If so, update its flags as well as flags of corresponding closer ';'. */
3652 static void
3653 md_analyze_entity(MD_CTX* ctx, int mark_index)
3654 {
3655  MD_MARK* opener = &ctx->marks[mark_index];
3656  MD_MARK* closer;
3657  OFF off;
3658 
3659  /* Cannot be entity if there is no closer as the next mark.
3660  * (Any other mark between would mean strange character which cannot be
3661  * part of the entity.
3662  *
3663  * So we can do all the work on '&' and do not call this later for the
3664  * closing mark ';'.
3665  */
3666  if(mark_index + 1 >= ctx->n_marks)
3667  return;
3668  closer = &ctx->marks[mark_index+1];
3669  if(closer->ch != ';')
3670  return;
3671 
3672  if(md_is_entity(ctx, opener->beg, closer->end, &off)) {
3673  MD_ASSERT(off == closer->end);
3674 
3675  md_resolve_range(ctx, NULL, mark_index, mark_index+1);
3676  opener->end = closer->end;
3677  }
3678 }
3679 
3680 static void
3681 md_analyze_table_cell_boundary(MD_CTX* ctx, int mark_index)
3682 {
3683  MD_MARK* mark = &ctx->marks[mark_index];
3684  mark->flags |= MD_MARK_RESOLVED;
3685 
3686  md_mark_chain_append(ctx, &TABLECELLBOUNDARIES, mark_index);
3687  ctx->n_table_cell_boundaries++;
3688 }
3689 
3690 /* Split a longer mark into two. The new mark takes the given count of
3691  * characters. May only be called if an adequate number of dummy 'D' marks
3692  * follows.
3693  */
3694 static int
3695 md_split_emph_mark(MD_CTX* ctx, int mark_index, SZ n)
3696 {
3697  MD_MARK* mark = &ctx->marks[mark_index];
3698  int new_mark_index = mark_index + (mark->end - mark->beg - n);
3699  MD_MARK* dummy = &ctx->marks[new_mark_index];
3700 
3701  MD_ASSERT(mark->end - mark->beg > n);
3702  MD_ASSERT(dummy->ch == 'D');
3703 
3704  memcpy(dummy, mark, sizeof(MD_MARK));
3705  mark->end -= n;
3706  dummy->beg = mark->end;
3707 
3708  return new_mark_index;
3709 }
3710 
3711 static void
3712 md_analyze_emph(MD_CTX* ctx, int mark_index)
3713 {
3714  MD_MARK* mark = &ctx->marks[mark_index];
3715  MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
3716 
3717  /* If we can be a closer, try to resolve with the preceding opener. */
3718  if(mark->flags & MD_MARK_POTENTIAL_CLOSER) {
3719  MD_MARK* opener = NULL;
3720  int opener_index = 0;
3721 
3722  if(mark->ch == _T('*')) {
3723  MD_MARKCHAIN* opener_chains[6];
3724  int i, n_opener_chains;
3725  unsigned flags = mark->flags;
3726 
3727  /* Apply the "rule of three". */
3728  n_opener_chains = 0;
3729  opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_0;
3731  opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_1;
3733  opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_intraword_mod3_2;
3734  opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_0;
3736  opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_1;
3738  opener_chains[n_opener_chains++] = &ASTERISK_OPENERS_extraword_mod3_2;
3739 
3740  /* Opener is the most recent mark from the allowed chains. */
3741  for(i = 0; i < n_opener_chains; i++) {
3742  if(opener_chains[i]->tail >= 0) {
3743  int tmp_index = opener_chains[i]->tail;
3744  MD_MARK* tmp_mark = &ctx->marks[tmp_index];
3745  if(opener == NULL || tmp_mark->end > opener->end) {
3746  opener_index = tmp_index;
3747  opener = tmp_mark;
3748  }
3749  }
3750  }
3751  } else {
3752  /* Simple emph. mark */
3753  if(chain->tail >= 0) {
3754  opener_index = chain->tail;
3755  opener = &ctx->marks[opener_index];
3756  }
3757  }
3758 
3759  /* Resolve, if we have found matching opener. */
3760  if(opener != NULL) {
3761  SZ opener_size = opener->end - opener->beg;
3762  SZ closer_size = mark->end - mark->beg;
3763  MD_MARKCHAIN* opener_chain = md_mark_chain(ctx, opener_index);
3764 
3765  if(opener_size > closer_size) {
3766  opener_index = md_split_emph_mark(ctx, opener_index, closer_size);
3767  md_mark_chain_append(ctx, opener_chain, opener_index);
3768  } else if(opener_size < closer_size) {
3769  md_split_emph_mark(ctx, mark_index, closer_size - opener_size);
3770  }
3771 
3772  md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
3773  md_resolve_range(ctx, opener_chain, opener_index, mark_index);
3774  return;
3775  }
3776  }
3777 
3778  /* If we could not resolve as closer, we may be yet be an opener. */
3779  if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3780  md_mark_chain_append(ctx, chain, mark_index);
3781 }
3782 
3783 static void
3784 md_analyze_tilde(MD_CTX* ctx, int mark_index)
3785 {
3786  MD_MARK* mark = &ctx->marks[mark_index];
3787  MD_MARKCHAIN* chain = md_mark_chain(ctx, mark_index);
3788 
3789  /* We attempt to be Github Flavored Markdown compatible here. GFM accepts
3790  * only tildes sequences of length 1 and 2, and the length of the opener
3791  * and closer has to match. */
3792 
3793  if((mark->flags & MD_MARK_POTENTIAL_CLOSER) && chain->head >= 0) {
3794  int opener_index = chain->head;
3795 
3796  md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_CROSSING);
3797  md_resolve_range(ctx, chain, opener_index, mark_index);
3798  return;
3799  }
3800 
3801  if(mark->flags & MD_MARK_POTENTIAL_OPENER)
3802  md_mark_chain_append(ctx, chain, mark_index);
3803 }
3804 
3805 static void
3806 md_analyze_dollar(MD_CTX* ctx, int mark_index)
3807 {
3808  /* This should mimic the way inline equations work in LaTeX, so there
3809  * can only ever be one item in the chain (i.e. the dollars can't be
3810  * nested). This is basically the same as the md_analyze_tilde function,
3811  * except that we require matching openers and closers to be of the same
3812  * length.
3813  *
3814  * E.g.: $abc$$def$$ => abc (display equation) def (end equation) */
3815  if(DOLLAR_OPENERS.head >= 0) {
3816  /* If the potential closer has a non-matching number of $, discard */
3817  MD_MARK* open = &ctx->marks[DOLLAR_OPENERS.head];
3818  MD_MARK* close = &ctx->marks[mark_index];
3819 
3820  int opener_index = DOLLAR_OPENERS.head;
3821  md_rollback(ctx, opener_index, mark_index, MD_ROLLBACK_ALL);
3822  if (open->end - open->beg == close->end - close->beg) {
3823  /* We are the matching closer */
3824  md_resolve_range(ctx, &DOLLAR_OPENERS, opener_index, mark_index);
3825  } else {
3826  /* We don't match the opener, so discard old opener and insert as opener */
3827  md_mark_chain_append(ctx, &DOLLAR_OPENERS, mark_index);
3828  }
3829  } else {
3830  /* No unmatched openers, so we are opener */
3831  md_mark_chain_append(ctx, &DOLLAR_OPENERS, mark_index);
3832  }
3833 }
3834 
3835 static void
3836 md_analyze_permissive_url_autolink(MD_CTX* ctx, int mark_index)
3837 {
3838  MD_MARK* opener = &ctx->marks[mark_index];
3839  int closer_index = mark_index + 1;
3840  MD_MARK* closer = &ctx->marks[closer_index];
3841  MD_MARK* next_resolved_mark;
3842  OFF off = opener->end;
3843  int n_dots = FALSE;
3844  int has_underscore_in_last_seg = FALSE;
3845  int has_underscore_in_next_to_last_seg = FALSE;
3846  int n_opened_parenthesis = 0;
3847  int n_excess_parenthesis = 0;
3848 
3849  /* Check for domain. */
3850  while(off < ctx->size) {
3851  if(ISALNUM(off) || CH(off) == _T('-')) {
3852  off++;
3853  } else if(CH(off) == _T('.')) {
3854  /* We must see at least one period. */
3855  n_dots++;
3856  has_underscore_in_next_to_last_seg = has_underscore_in_last_seg;
3857  has_underscore_in_last_seg = FALSE;
3858  off++;
3859  } else if(CH(off) == _T('_')) {
3860  /* No underscore may be present in the last two domain segments. */
3861  has_underscore_in_last_seg = TRUE;
3862  off++;
3863  } else {
3864  break;
3865  }
3866  }
3867  if(off > opener->end && CH(off-1) == _T('.')) {
3868  off--;
3869  n_dots--;
3870  }
3871  if(off <= opener->end || n_dots == 0 || has_underscore_in_next_to_last_seg || has_underscore_in_last_seg)
3872  return;
3873 
3874  /* Check for path. */
3875  next_resolved_mark = closer + 1;
3876  while(next_resolved_mark->ch == 'D' || !(next_resolved_mark->flags & MD_MARK_RESOLVED))
3877  next_resolved_mark++;
3878  while(off < next_resolved_mark->beg && CH(off) != _T('<') && !ISWHITESPACE(off) && !ISNEWLINE(off)) {
3879  /* Parenthesis must be balanced. */
3880  if(CH(off) == _T('(')) {
3881  n_opened_parenthesis++;
3882  } else if(CH(off) == _T(')')) {
3883  if(n_opened_parenthesis > 0)
3884  n_opened_parenthesis--;
3885  else
3886  n_excess_parenthesis++;
3887  }
3888 
3889  off++;
3890  }
3891 
3892  /* Trim a trailing punctuation from the end. */
3893  while(TRUE) {
3894  if(ISANYOF(off-1, _T("?!.,:*_~"))) {
3895  off--;
3896  } else if(CH(off-1) == ')' && n_excess_parenthesis > 0) {
3897  /* Unmatched ')' can be in an interior of the path but not at the
3898  * of it, so the auto-link may be safely nested in a parenthesis
3899  * pair. */
3900  off--;
3901  n_excess_parenthesis--;
3902  } else {
3903  break;
3904  }
3905  }
3906 
3907  /* Ok. Lets call it an auto-link. Adapt opener and create closer to zero
3908  * length so all the contents becomes the link text. */
3909  MD_ASSERT(closer->ch == 'D');
3910  opener->end = opener->beg;
3911  closer->ch = opener->ch;
3912  closer->beg = off;
3913  closer->end = off;
3914  md_resolve_range(ctx, NULL, mark_index, closer_index);
3915 }
3916 
3917 /* The permissive autolinks do not have to be enclosed in '<' '>' but we
3918  * instead impose stricter rules what is understood as an e-mail address
3919  * here. Actually any non-alphanumeric characters with exception of '.'
3920  * are prohibited both in username and after '@'. */
3921 static void
3922 md_analyze_permissive_email_autolink(MD_CTX* ctx, int mark_index)
3923 {
3924  MD_MARK* opener = &ctx->marks[mark_index];
3925  int closer_index;
3926  MD_MARK* closer;
3927  OFF beg = opener->beg;
3928  OFF end = opener->end;
3929  int dot_count = 0;
3930 
3931  MD_ASSERT(CH(beg) == _T('@'));
3932 
3933  /* Scan for name before '@'. */
3934  while(beg > 0 && (ISALNUM(beg-1) || ISANYOF(beg-1, _T(".-_+"))))
3935  beg--;
3936 
3937  /* Scan for domain after '@'. */
3938  while(end < ctx->size && (ISALNUM(end) || ISANYOF(end, _T(".-_")))) {
3939  if(CH(end) == _T('.'))
3940  dot_count++;
3941  end++;
3942  }
3943  if(CH(end-1) == _T('.')) { /* Final '.' not part of it. */
3944  dot_count--;
3945  end--;
3946  }
3947  else if(ISANYOF2(end-1, _T('-'), _T('_'))) /* These are forbidden at the end. */
3948  return;
3949  if(CH(end-1) == _T('@') || dot_count == 0)
3950  return;
3951 
3952  /* Ok. Lets call it auto-link. Adapt opener and create closer to zero
3953  * length so all the contents becomes the link text. */
3954  closer_index = mark_index + 1;
3955  closer = &ctx->marks[closer_index];
3956  MD_ASSERT(closer->ch == 'D');
3957 
3958  opener->beg = beg;
3959  opener->end = beg;
3960  closer->ch = opener->ch;
3961  closer->beg = end;
3962  closer->end = end;
3963  md_resolve_range(ctx, NULL, mark_index, closer_index);
3964 }
3965 
3966 static inline void
3967 md_analyze_marks(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
3968  int mark_beg, int mark_end, const CHAR* mark_chars)
3969 {
3970  int i = mark_beg;
3971  MD_UNUSED(lines);
3972  MD_UNUSED(n_lines);
3973 
3974  while(i < mark_end) {
3975  MD_MARK* mark = &ctx->marks[i];
3976 
3977  /* Skip resolved spans. */
3978  if(mark->flags & MD_MARK_RESOLVED) {
3979  if(mark->flags & MD_MARK_OPENER) {
3980  MD_ASSERT(i < mark->next);
3981  i = mark->next + 1;
3982  } else {
3983  i++;
3984  }
3985  continue;
3986  }
3987 
3988  /* Skip marks we do not want to deal with. */
3989  if(!ISANYOF_(mark->ch, mark_chars)) {
3990  i++;
3991  continue;
3992  }
3993 
3994  /* Analyze the mark. */
3995  switch(mark->ch) {
3996  case '[': /* Pass through. */
3997  case '!': /* Pass through. */
3998  case ']': md_analyze_bracket(ctx, i); break;
3999  case '&': md_analyze_entity(ctx, i); break;
4000  case '|': md_analyze_table_cell_boundary(ctx, i); break;
4001  case '_': /* Pass through. */
4002  case '*': md_analyze_emph(ctx, i); break;
4003  case '~': md_analyze_tilde(ctx, i); break;
4004  case '$': md_analyze_dollar(ctx, i); break;
4005  case '.': /* Pass through. */
4006  case ':': md_analyze_permissive_url_autolink(ctx, i); break;
4007  case '@': md_analyze_permissive_email_autolink(ctx, i); break;
4008  }
4009 
4010  i++;
4011  }
4012 }
4013 
4014 /* Analyze marks (build ctx->marks). */
4015 static int
4016 md_analyze_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines, int table_mode)
4017 {
4018  int ret;
4019 
4020  /* Reset the previously collected stack of marks. */
4021  ctx->n_marks = 0;
4022 
4023  /* Collect all marks. */
4024  MD_CHECK(md_collect_marks(ctx, lines, n_lines, table_mode));
4025 
4026  /* We analyze marks in few groups to handle their precedence. */
4027  /* (1) Entities; code spans; autolinks; raw HTML. */
4028  md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("&"));
4029 
4030  /* (2) Links. */
4031  md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("[]!"));
4032  MD_CHECK(md_resolve_links(ctx, lines, n_lines));
4033  BRACKET_OPENERS.head = -1;
4034  BRACKET_OPENERS.tail = -1;
4035  ctx->unresolved_link_head = -1;
4036  ctx->unresolved_link_tail = -1;
4037 
4038  if(table_mode) {
4039  /* (3) Analyze table cell boundaries.
4040  * Note we reset TABLECELLBOUNDARIES chain prior to the call md_analyze_marks(),
4041  * not after, because caller may need it. */
4042  MD_ASSERT(n_lines == 1);
4043  TABLECELLBOUNDARIES.head = -1;
4044  TABLECELLBOUNDARIES.tail = -1;
4045  ctx->n_table_cell_boundaries = 0;
4046  md_analyze_marks(ctx, lines, n_lines, 0, ctx->n_marks, _T("|"));
4047  return ret;
4048  }
4049 
4050  /* (4) Emphasis and strong emphasis; permissive autolinks. */
4051  md_analyze_link_contents(ctx, lines, n_lines, 0, ctx->n_marks);
4052 
4053 abort:
4054  return ret;
4055 }
4056 
4057 static void
4058 md_analyze_link_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines,
4059  int mark_beg, int mark_end)
4060 {
4061  int i;
4062 
4063  md_analyze_marks(ctx, lines, n_lines, mark_beg, mark_end, _T("*_~$@:."));
4064 
4065  for(i = OPENERS_CHAIN_FIRST; i <= OPENERS_CHAIN_LAST; i++) {
4066  ctx->mark_chains[i].head = -1;
4067  ctx->mark_chains[i].tail = -1;
4068  }
4069 }
4070 
4071 static int
4072 md_enter_leave_span_a(MD_CTX* ctx, int enter, MD_SPANTYPE type,
4073  const CHAR* dest, SZ dest_size, int prohibit_escapes_in_dest,
4074  const CHAR* title, SZ title_size)
4075 {
4076  MD_ATTRIBUTE_BUILD href_build = { 0 };
4077  MD_ATTRIBUTE_BUILD title_build = { 0 };
4078  MD_SPAN_A_DETAIL det;
4079  int ret = 0;
4080 
4081  /* Note we here rely on fact that MD_SPAN_A_DETAIL and
4082  * MD_SPAN_IMG_DETAIL are binary-compatible. */
4083  memset(&det, 0, sizeof(MD_SPAN_A_DETAIL));
4084  MD_CHECK(md_build_attribute(ctx, dest, dest_size,
4085  (prohibit_escapes_in_dest ? MD_BUILD_ATTR_NO_ESCAPES : 0),
4086  &det.href, &href_build));
4087  MD_CHECK(md_build_attribute(ctx, title, title_size, 0, &det.title, &title_build));
4088 
4089  if(enter)
4090  MD_ENTER_SPAN(type, &det);
4091  else
4092  MD_LEAVE_SPAN(type, &det);
4093 
4094 abort:
4095  md_free_attribute(ctx, &href_build);
4096  md_free_attribute(ctx, &title_build);
4097  return ret;
4098 }
4099 
4100 static int
4101 md_enter_leave_span_wikilink(MD_CTX* ctx, int enter, const CHAR* target, SZ target_size)
4102 {
4103  MD_ATTRIBUTE_BUILD target_build = { 0 };
4105  int ret = 0;
4106 
4107  memset(&det, 0, sizeof(MD_SPAN_WIKILINK_DETAIL));
4108  MD_CHECK(md_build_attribute(ctx, target, target_size, 0, &det.target, &target_build));
4109 
4110  if (enter)
4112  else
4114 
4115 abort:
4116  md_free_attribute(ctx, &target_build);
4117  return ret;
4118 }
4119 
4120 
4121 /* Render the output, accordingly to the analyzed ctx->marks. */
4122 static int
4123 md_process_inlines(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
4124 {
4125  MD_TEXTTYPE text_type;
4126  const MD_LINE* line = lines;
4127  MD_MARK* prev_mark = NULL;
4128  MD_MARK* mark;
4129  OFF off = lines[0].beg;
4130  OFF end = lines[n_lines-1].end;
4131  int enforce_hardbreak = 0;
4132  int ret = 0;
4133 
4134  /* Find first resolved mark. Note there is always at least one resolved
4135  * mark, the dummy last one after the end of the latest line we actually
4136  * never really reach. This saves us of a lot of special checks and cases
4137  * in this function. */
4138  mark = ctx->marks;
4139  while(!(mark->flags & MD_MARK_RESOLVED))
4140  mark++;
4141 
4142  text_type = MD_TEXT_NORMAL;
4143 
4144  while(1) {
4145  /* Process the text up to the next mark or end-of-line. */
4146  OFF tmp = (line->end < mark->beg ? line->end : mark->beg);
4147  if(tmp > off) {
4148  MD_TEXT(text_type, STR(off), tmp - off);
4149  off = tmp;
4150  }
4151 
4152  /* If reached the mark, process it and move to next one. */
4153  if(off >= mark->beg) {
4154  switch(mark->ch) {
4155  case '\\': /* Backslash escape. */
4156  if(ISNEWLINE(mark->beg+1))
4157  enforce_hardbreak = 1;
4158  else
4159  MD_TEXT(text_type, STR(mark->beg+1), 1);
4160  break;
4161 
4162  case ' ': /* Non-trivial space. */
4163  MD_TEXT(text_type, _T(" "), 1);
4164  break;
4165 
4166  case '`': /* Code span. */
4167  if(mark->flags & MD_MARK_OPENER) {
4169  text_type = MD_TEXT_CODE;
4170  } else {
4172  text_type = MD_TEXT_NORMAL;
4173  }
4174  break;
4175 
4176  case '_': /* Underline (or emphasis if we fall through). */
4177  if(ctx->parser.flags & MD_FLAG_UNDERLINE) {
4178  if(mark->flags & MD_MARK_OPENER) {
4179  while(off < mark->end) {
4181  off++;
4182  }
4183  } else {
4184  while(off < mark->end) {
4186  off++;
4187  }
4188  }
4189  break;
4190  }
4191  MD_FALLTHROUGH();
4192 
4193  case '*': /* Emphasis, strong emphasis. */
4194  if(mark->flags & MD_MARK_OPENER) {
4195  if((mark->end - off) % 2) {
4197  off++;
4198  }
4199  while(off + 1 < mark->end) {
4201  off += 2;
4202  }
4203  } else {
4204  while(off + 1 < mark->end) {
4206  off += 2;
4207  }
4208  if((mark->end - off) % 2) {
4210  off++;
4211  }
4212  }
4213  break;
4214 
4215  case '~':
4216  if(mark->flags & MD_MARK_OPENER)
4218  else
4220  break;
4221 
4222  case '$':
4223  if(mark->flags & MD_MARK_OPENER) {
4225  text_type = MD_TEXT_LATEXMATH;
4226  } else {
4228  text_type = MD_TEXT_NORMAL;
4229  }
4230  break;
4231 
4232  case '[': /* Link, wiki link, image. */
4233  case '!':
4234  case ']':
4235  {
4236  const MD_MARK* opener = (mark->ch != ']' ? mark : &ctx->marks[mark->prev]);
4237  const MD_MARK* closer = &ctx->marks[opener->next];
4238  const MD_MARK* dest_mark;
4239  const MD_MARK* title_mark;
4240 
4241  if ((opener->ch == '[' && closer->ch == ']') &&
4242  opener->end - opener->beg >= 2 &&
4243  closer->end - closer->beg >= 2)
4244  {
4245  int has_label = (opener->end - opener->beg > 2);
4246  SZ target_sz;
4247 
4248  if(has_label)
4249  target_sz = opener->end - (opener->beg+2);
4250  else
4251  target_sz = closer->beg - opener->end;
4252 
4253  MD_CHECK(md_enter_leave_span_wikilink(ctx, (mark->ch != ']'),
4254  has_label ? STR(opener->beg+2) : STR(opener->end),
4255  target_sz));
4256 
4257  break;
4258  }
4259 
4260  dest_mark = opener+1;
4261  MD_ASSERT(dest_mark->ch == 'D');
4262  title_mark = opener+2;
4263  MD_ASSERT(title_mark->ch == 'D');
4264 
4265  MD_CHECK(md_enter_leave_span_a(ctx, (mark->ch != ']'),
4266  (opener->ch == '!' ? MD_SPAN_IMG : MD_SPAN_A),
4267  STR(dest_mark->beg), dest_mark->end - dest_mark->beg, FALSE,
4268  md_mark_get_ptr(ctx, (int)(title_mark - ctx->marks)), title_mark->prev));
4269 
4270  /* link/image closer may span multiple lines. */
4271  if(mark->ch == ']') {
4272  while(mark->end > line->end)
4273  line++;
4274  }
4275 
4276  break;
4277  }
4278 
4279  case '<':
4280  case '>': /* Autolink or raw HTML. */
4281  if(!(mark->flags & MD_MARK_AUTOLINK)) {
4282  /* Raw HTML. */
4283  if(mark->flags & MD_MARK_OPENER)
4284  text_type = MD_TEXT_HTML;
4285  else
4286  text_type = MD_TEXT_NORMAL;
4287  break;
4288  }
4289  /* Pass through, if auto-link. */
4290  MD_FALLTHROUGH();
4291 
4292  case '@': /* Permissive e-mail autolink. */
4293  case ':': /* Permissive URL autolink. */
4294  case '.': /* Permissive WWW autolink. */
4295  {
4296  MD_MARK* opener = ((mark->flags & MD_MARK_OPENER) ? mark : &ctx->marks[mark->prev]);
4297  MD_MARK* closer = &ctx->marks[opener->next];
4298  const CHAR* dest = STR(opener->end);
4299  SZ dest_size = closer->beg - opener->end;
4300 
4301  /* For permissive auto-links we do not know closer mark
4302  * position at the time of md_collect_marks(), therefore
4303  * it can be out-of-order in ctx->marks[].
4304  *
4305  * With this flag, we make sure that we output the closer
4306  * only if we processed the opener. */
4307  if(mark->flags & MD_MARK_OPENER)
4309 
4310  if(opener->ch == '@' || opener->ch == '.') {
4311  dest_size += 7;
4312  MD_TEMP_BUFFER(dest_size * sizeof(CHAR));
4313  memcpy(ctx->buffer,
4314  (opener->ch == '@' ? _T("mailto:") : _T("http://")),
4315  7 * sizeof(CHAR));
4316  memcpy(ctx->buffer + 7, dest, (dest_size-7) * sizeof(CHAR));
4317  dest = ctx->buffer;
4318  }
4319 
4321  MD_CHECK(md_enter_leave_span_a(ctx, (mark->flags & MD_MARK_OPENER),
4322  MD_SPAN_A, dest, dest_size, TRUE, NULL, 0));
4323  break;
4324  }
4325 
4326  case '&': /* Entity. */
4327  MD_TEXT(MD_TEXT_ENTITY, STR(mark->beg), mark->end - mark->beg);
4328  break;
4329 
4330  case '\0':
4331  MD_TEXT(MD_TEXT_NULLCHAR, _T(""), 1);
4332  break;
4333 
4334  case 127:
4335  goto abort;
4336  }
4337 
4338  off = mark->end;
4339 
4340  /* Move to next resolved mark. */
4341  prev_mark = mark;
4342  mark++;
4343  while(!(mark->flags & MD_MARK_RESOLVED) || mark->beg < off)
4344  mark++;
4345  }
4346 
4347  /* If reached end of line, move to next one. */
4348  if(off >= line->end) {
4349  /* If it is the last line, we are done. */
4350  if(off >= end)
4351  break;
4352 
4353  if(text_type == MD_TEXT_CODE || text_type == MD_TEXT_LATEXMATH) {
4354  OFF tmp;
4355 
4356  MD_ASSERT(prev_mark != NULL);
4357  MD_ASSERT(ISANYOF2_(prev_mark->ch, '`', '$') && (prev_mark->flags & MD_MARK_OPENER));
4358  MD_ASSERT(ISANYOF2_(mark->ch, '`', '$') && (mark->flags & MD_MARK_CLOSER));
4359 
4360  /* Inside a code span, trailing line whitespace has to be
4361  * outputted. */
4362  tmp = off;
4363  while(off < ctx->size && ISBLANK(off))
4364  off++;
4365  if(off > tmp)
4366  MD_TEXT(text_type, STR(tmp), off-tmp);
4367 
4368  /* and new lines are transformed into single spaces. */
4369  if(prev_mark->end < off && off < mark->beg)
4370  MD_TEXT(text_type, _T(" "), 1);
4371  } else if(text_type == MD_TEXT_HTML) {
4372  /* Inside raw HTML, we output the new line verbatim, including
4373  * any trailing spaces. */
4374  OFF tmp = off;
4375 
4376  while(tmp < end && ISBLANK(tmp))
4377  tmp++;
4378  if(tmp > off)
4379  MD_TEXT(MD_TEXT_HTML, STR(off), tmp - off);
4380  MD_TEXT(MD_TEXT_HTML, _T("\n"), 1);
4381  } else {
4382  /* Output soft or hard line break. */
4383  MD_TEXTTYPE break_type = MD_TEXT_SOFTBR;
4384 
4385  if(text_type == MD_TEXT_NORMAL) {
4386  if(enforce_hardbreak)
4387  break_type = MD_TEXT_BR;
4388  else if((CH(line->end) == _T(' ') && CH(line->end+1) == _T(' ')))
4389  break_type = MD_TEXT_BR;
4390  }
4391 
4392  MD_TEXT(break_type, _T("\n"), 1);
4393  }
4394 
4395  /* Move to the next line. */
4396  line++;
4397  off = line->beg;
4398 
4399  enforce_hardbreak = 0;
4400  }
4401  }
4402 
4403 abort:
4404  return ret;
4405 }
4406 
4407 
4408 /***************************
4409  *** Processing Tables ***
4410  ***************************/
4411 
4412 static void
4413 md_analyze_table_alignment(MD_CTX* ctx, OFF beg, OFF end, MD_ALIGN* align, int n_align)
4414 {
4415  static const MD_ALIGN align_map[] = { MD_ALIGN_DEFAULT, MD_ALIGN_LEFT, MD_ALIGN_RIGHT, MD_ALIGN_CENTER };
4416  OFF off = beg;
4417 
4418  while(n_align > 0) {
4419  int index = 0; /* index into align_map[] */
4420 
4421  while(CH(off) != _T('-'))
4422  off++;
4423  if(off > beg && CH(off-1) == _T(':'))
4424  index |= 1;
4425  while(off < end && CH(off) == _T('-'))
4426  off++;
4427  if(off < end && CH(off) == _T(':'))
4428  index |= 2;
4429 
4430  *align = align_map[index];
4431  align++;
4432  n_align--;
4433  }
4434 
4435 }
4436 
4437 /* Forward declaration. */
4438 static int md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines);
4439 
4440 static int
4441 md_process_table_cell(MD_CTX* ctx, MD_BLOCKTYPE cell_type, MD_ALIGN align, OFF beg, OFF end)
4442 {
4443  MD_LINE line;
4444  MD_BLOCK_TD_DETAIL det;
4445  int ret = 0;
4446 
4447  while(beg < end && ISWHITESPACE(beg))
4448  beg++;
4449  while(end > beg && ISWHITESPACE(end-1))
4450  end--;
4451 
4452  det.align = align;
4453  line.beg = beg;
4454  line.end = end;
4455 
4456  MD_ENTER_BLOCK(cell_type, &det);
4457  MD_CHECK(md_process_normal_block_contents(ctx, &line, 1));
4458  MD_LEAVE_BLOCK(cell_type, &det);
4459 
4460 abort:
4461  return ret;
4462 }
4463 
4464 static int
4465 md_process_table_row(MD_CTX* ctx, MD_BLOCKTYPE cell_type, OFF beg, OFF end,
4466  const MD_ALIGN* align, int col_count)
4467 {
4468  MD_LINE line;
4469  OFF* pipe_offs = NULL;
4470  int i, j, k, n;
4471  int ret = 0;
4472 
4473  line.beg = beg;
4474  line.end = end;
4475 
4476  /* Break the line into table cells by identifying pipe characters who
4477  * form the cell boundary. */
4478  MD_CHECK(md_analyze_inlines(ctx, &line, 1, TRUE));
4479 
4480  /* We have to remember the cell boundaries in local buffer because
4481  * ctx->marks[] shall be reused during cell contents processing. */
4482  n = ctx->n_table_cell_boundaries + 2;
4483  pipe_offs = (OFF*) malloc(n * sizeof(OFF));
4484  if(pipe_offs == NULL) {
4485  MD_LOG("malloc() failed.");
4486  ret = -1;
4487  goto abort;
4488  }
4489  j = 0;
4490  pipe_offs[j++] = beg;
4491  for(i = TABLECELLBOUNDARIES.head; i >= 0; i = ctx->marks[i].next) {
4492  MD_MARK* mark = &ctx->marks[i];
4493  pipe_offs[j++] = mark->end;
4494  }
4495  pipe_offs[j++] = end+1;
4496 
4497  /* Process cells. */
4499  k = 0;
4500  for(i = 0; i < j-1 && k < col_count; i++) {
4501  if(pipe_offs[i] < pipe_offs[i+1]-1)
4502  MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], pipe_offs[i], pipe_offs[i+1]-1));
4503  }
4504  /* Make sure we call enough table cells even if the current table contains
4505  * too few of them. */
4506  while(k < col_count)
4507  MD_CHECK(md_process_table_cell(ctx, cell_type, align[k++], 0, 0));
4509 
4510 abort:
4511  free(pipe_offs);
4512 
4513  /* Free any temporary memory blocks stored within some dummy marks. */
4514  for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next)
4515  free(md_mark_get_ptr(ctx, i));
4516  PTR_CHAIN.head = -1;
4517  PTR_CHAIN.tail = -1;
4518 
4519  return ret;
4520 }
4521 
4522 static int
4523 md_process_table_block_contents(MD_CTX* ctx, int col_count, const MD_LINE* lines, int n_lines)
4524 {
4525  MD_ALIGN* align;
4526  int i;
4527  int ret = 0;
4528 
4529  /* At least two lines have to be present: The column headers and the line
4530  * with the underlines. */
4531  MD_ASSERT(n_lines >= 2);
4532 
4533  align = malloc(col_count * sizeof(MD_ALIGN));
4534  if(align == NULL) {
4535  MD_LOG("malloc() failed.");
4536  ret = -1;
4537  goto abort;
4538  }
4539 
4540  md_analyze_table_alignment(ctx, lines[1].beg, lines[1].end, align, col_count);
4541 
4543  MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TH,
4544  lines[0].beg, lines[0].end, align, col_count));
4546 
4547  if(n_lines > 2) {
4549  for(i = 2; i < n_lines; i++) {
4550  MD_CHECK(md_process_table_row(ctx, MD_BLOCK_TD,
4551  lines[i].beg, lines[i].end, align, col_count));
4552  }
4554  }
4555 
4556 abort:
4557  free(align);
4558  return ret;
4559 }
4560 
4561 
4562 /**************************
4563  *** Processing Block ***
4564  **************************/
4565 
4566 #define MD_BLOCK_CONTAINER_OPENER 0x01
4567 #define MD_BLOCK_CONTAINER_CLOSER 0x02
4568 #define MD_BLOCK_CONTAINER (MD_BLOCK_CONTAINER_OPENER | MD_BLOCK_CONTAINER_CLOSER)
4569 #define MD_BLOCK_LOOSE_LIST 0x04
4570 #define MD_BLOCK_SETEXT_HEADER 0x08
4571 
4574  unsigned flags : 8;
4575 
4576  /* MD_BLOCK_H: Header level (1 - 6)
4577  * MD_BLOCK_CODE: Non-zero if fenced, zero if indented.
4578  * MD_BLOCK_LI: Task mark character (0 if not task list item, 'x', 'X' or ' ').
4579  * MD_BLOCK_TABLE: Column count (as determined by the table underline).
4580  */
4581  unsigned data : 16;
4582 
4583  /* Leaf blocks: Count of lines (MD_LINE or MD_VERBATIMLINE) on the block.
4584  * MD_BLOCK_LI: Task mark offset in the input doc.
4585  * MD_BLOCK_OL: Start item number.
4586  */
4587  unsigned n_lines;
4588 };
4589 
4592  unsigned is_loose : 8;
4593  unsigned is_task : 8;
4594  unsigned start;
4595  unsigned mark_indent;
4599 };
4600 
4601 
4602 static int
4603 md_process_normal_block_contents(MD_CTX* ctx, const MD_LINE* lines, int n_lines)
4604 {
4605  int i;
4606  int ret;
4607 
4608  MD_CHECK(md_analyze_inlines(ctx, lines, n_lines, FALSE));
4609  MD_CHECK(md_process_inlines(ctx, lines, n_lines));
4610 
4611 abort:
4612  /* Free any temporary memory blocks stored within some dummy marks. */
4613  for(i = PTR_CHAIN.head; i >= 0; i = ctx->marks[i].next)
4614  free(md_mark_get_ptr(ctx, i));
4615  PTR_CHAIN.head = -1;
4616  PTR_CHAIN.tail = -1;
4617 
4618  return ret;
4619 }
4620 
4621 static int
4622 md_process_verbatim_block_contents(MD_CTX* ctx, MD_TEXTTYPE text_type, const MD_VERBATIMLINE* lines, int n_lines)
4623 {
4624  static const CHAR indent_chunk_str[] = _T(" ");
4625  static const SZ indent_chunk_size = SIZEOF_ARRAY(indent_chunk_str) - 1;
4626 
4627  int i;
4628  int ret = 0;
4629 
4630  for(i = 0; i < n_lines; i++) {
4631  const MD_VERBATIMLINE* line = &lines[i];
4632  int indent = line->indent;
4633 
4634  MD_ASSERT(indent >= 0);
4635 
4636  /* Output code indentation. */
4637  while(indent > (int) indent_chunk_size) {
4638  MD_TEXT(text_type, indent_chunk_str, indent_chunk_size);
4639  indent -= indent_chunk_size;
4640  }
4641  if(indent > 0)
4642  MD_TEXT(text_type, indent_chunk_str, indent);
4643 
4644  /* Output the code line itself. */
4645  MD_TEXT_INSECURE(text_type, STR(line->beg), line->end - line->beg);
4646 
4647  /* Enforce end-of-line. */
4648  MD_TEXT(text_type, _T("\n"), 1);
4649  }
4650 
4651 abort:
4652  return ret;
4653 }
4654 
4655 static int
4656 md_process_code_block_contents(MD_CTX* ctx, int is_fenced, const MD_VERBATIMLINE* lines, int n_lines)
4657 {
4658  if(is_fenced) {
4659  /* Skip the first line in case of fenced code: It is the fence.
4660  * (Only the starting fence is present due to logic in md_analyze_line().) */
4661  lines++;
4662  n_lines--;
4663  } else {
4664  /* Ignore blank lines at start/end of indented code block. */
4665  while(n_lines > 0 && lines[0].beg == lines[0].end) {
4666  lines++;
4667  n_lines--;
4668  }
4669  while(n_lines > 0 && lines[n_lines-1].beg == lines[n_lines-1].end) {
4670  n_lines--;
4671  }
4672  }
4673 
4674  if(n_lines == 0)
4675  return 0;
4676 
4677  return md_process_verbatim_block_contents(ctx, MD_TEXT_CODE, lines, n_lines);
4678 }
4679 
4680 static int
4681 md_setup_fenced_code_detail(MD_CTX* ctx, const MD_BLOCK* block, MD_BLOCK_CODE_DETAIL* det,
4682  MD_ATTRIBUTE_BUILD* info_build, MD_ATTRIBUTE_BUILD* lang_build)
4683 {
4684  const MD_VERBATIMLINE* fence_line = (const MD_VERBATIMLINE*)(block + 1);
4685  OFF beg = fence_line->beg;
4686  OFF end = fence_line->end;
4687  OFF lang_end;
4688  CHAR fence_ch = CH(fence_line->beg);
4689  int ret = 0;
4690 
4691  /* Skip the fence itself. */
4692  while(beg < ctx->size && CH(beg) == fence_ch)
4693  beg++;
4694  /* Trim initial spaces. */
4695  while(beg < ctx->size && CH(beg) == _T(' '))
4696  beg++;
4697 
4698  /* Trim trailing spaces. */
4699  while(end > beg && CH(end-1) == _T(' '))
4700  end--;
4701 
4702  /* Build info string attribute. */
4703  MD_CHECK(md_build_attribute(ctx, STR(beg), end - beg, 0, &det->info, info_build));
4704 
4705  /* Build info string attribute. */
4706  lang_end = beg;
4707  while(lang_end < end && !ISWHITESPACE(lang_end))
4708  lang_end++;
4709  MD_CHECK(md_build_attribute(ctx, STR(beg), lang_end - beg, 0, &det->lang, lang_build));
4710 
4711  det->fence_char = fence_ch;
4712 
4713 abort:
4714  return ret;
4715 }
4716 
4717 static int
4718 md_process_leaf_block(MD_CTX* ctx, const MD_BLOCK* block)
4719 {
4720  union {
4724  } det;
4725  MD_ATTRIBUTE_BUILD info_build;
4726  MD_ATTRIBUTE_BUILD lang_build;
4727  int is_in_tight_list;
4728  int clean_fence_code_detail = FALSE;
4729  int ret = 0;
4730 
4731  memset(&det, 0, sizeof(det));
4732 
4733  if(ctx->n_containers == 0)
4734  is_in_tight_list = FALSE;
4735  else
4736  is_in_tight_list = !ctx->containers[ctx->n_containers-1].is_loose;
4737 
4738  switch(block->type) {
4739  case MD_BLOCK_H:
4740  det.header.level = block->data;
4741  break;
4742 
4743  case MD_BLOCK_CODE:
4744  /* For fenced code block, we may need to set the info string. */
4745  if(block->data != 0) {
4746  memset(&det.code, 0, sizeof(MD_BLOCK_CODE_DETAIL));
4747  clean_fence_code_detail = TRUE;
4748  MD_CHECK(md_setup_fenced_code_detail(ctx, block, &det.code, &info_build, &lang_build));
4749  }
4750  break;
4751 
4752  case MD_BLOCK_TABLE:
4753  det.table.col_count = block->data;
4754  det.table.head_row_count = 1;
4755  det.table.body_row_count = block->n_lines - 2;
4756  break;
4757 
4758  default:
4759  /* Noop. */
4760  break;
4761  }
4762 
4763  if(!is_in_tight_list || block->type != MD_BLOCK_P)
4764  MD_ENTER_BLOCK(block->type, (void*) &det);
4765 
4766  /* Process the block contents accordingly to is type. */
4767  switch(block->type) {
4768  case MD_BLOCK_HR:
4769  /* noop */
4770  break;
4771 
4772  case MD_BLOCK_CODE:
4773  MD_CHECK(md_process_code_block_contents(ctx, (block->data != 0),
4774  (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
4775  break;
4776 
4777  case MD_BLOCK_HTML:
4778  MD_CHECK(md_process_verbatim_block_contents(ctx, MD_TEXT_HTML,
4779  (const MD_VERBATIMLINE*)(block + 1), block->n_lines));
4780  break;
4781 
4782  case MD_BLOCK_TABLE:
4783  MD_CHECK(md_process_table_block_contents(ctx, block->data,
4784  (const MD_LINE*)(block + 1), block->n_lines));
4785  break;
4786 
4787  default:
4788  MD_CHECK(md_process_normal_block_contents(ctx,
4789  (const MD_LINE*)(block + 1), block->n_lines));
4790  break;
4791  }
4792 
4793  if(!is_in_tight_list || block->type != MD_BLOCK_P)
4794  MD_LEAVE_BLOCK(block->type, (void*) &det);
4795 
4796 abort:
4797  if(clean_fence_code_detail) {
4798  md_free_attribute(ctx, &info_build);
4799  md_free_attribute(ctx, &lang_build);
4800  }
4801  return ret;
4802 }
4803 
4804 static int
4805 md_process_all_blocks(MD_CTX* ctx)
4806 {
4807  int byte_off = 0;
4808  int ret = 0;
4809 
4810  /* ctx->containers now is not needed for detection of lists and list items
4811  * so we reuse it for tracking what lists are loose or tight. We rely
4812  * on the fact the vector is large enough to hold the deepest nesting
4813  * level of lists. */
4814  ctx->n_containers = 0;
4815 
4816  while(byte_off < ctx->n_block_bytes) {
4817  MD_BLOCK* block = (MD_BLOCK*)((char*)ctx->block_bytes + byte_off);
4818  union {
4819  MD_BLOCK_UL_DETAIL ul;
4820  MD_BLOCK_OL_DETAIL ol;
4821  MD_BLOCK_LI_DETAIL li;
4822  } det;
4823 
4824  switch(block->type) {
4825  case MD_BLOCK_UL:
4826  det.ul.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
4827  det.ul.mark = (CHAR) block->data;
4828  break;
4829 
4830  case MD_BLOCK_OL:
4831  det.ol.start = block->n_lines;
4832  det.ol.is_tight = (block->flags & MD_BLOCK_LOOSE_LIST) ? FALSE : TRUE;
4833  det.ol.mark_delimiter = (CHAR) block->data;
4834  break;
4835 
4836  case MD_BLOCK_LI:
4837  det.li.is_task = (block->data != 0);
4838  det.li.task_mark = (CHAR) block->data;
4839  det.li.task_mark_offset = (OFF) block->n_lines;
4840  break;
4841 
4842  default:
4843  /* noop */
4844  break;
4845  }
4846 
4847  if(block->flags & MD_BLOCK_CONTAINER) {
4848  if(block->flags & MD_BLOCK_CONTAINER_CLOSER) {
4849  MD_LEAVE_BLOCK(block->type, &det);
4850 
4851  if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL || block->type == MD_BLOCK_QUOTE)
4852  ctx->n_containers--;
4853  }
4854 
4855  if(block->flags & MD_BLOCK_CONTAINER_OPENER) {
4856  MD_ENTER_BLOCK(block->type, &det);
4857 
4858  if(block->type == MD_BLOCK_UL || block->type == MD_BLOCK_OL) {
4859  ctx->containers[ctx->n_containers].is_loose = (block->flags & MD_BLOCK_LOOSE_LIST);
4860  ctx->n_containers++;
4861  } else if(block->type == MD_BLOCK_QUOTE) {
4862  /* This causes that any text in a block quote, even if
4863  * nested inside a tight list item, is wrapped with
4864  * <p>...</p>. */
4865  ctx->containers[ctx->n_containers].is_loose = TRUE;
4866  ctx->n_containers++;
4867  }
4868  }
4869  } else {
4870  MD_CHECK(md_process_leaf_block(ctx, block));
4871 
4872  if(block->type == MD_BLOCK_CODE || block->type == MD_BLOCK_HTML)
4873  byte_off += block->n_lines * sizeof(MD_VERBATIMLINE);
4874  else
4875  byte_off += block->n_lines * sizeof(MD_LINE);
4876  }
4877 
4878  byte_off += sizeof(MD_BLOCK);
4879  }
4880 
4881  ctx->n_block_bytes = 0;
4882 
4883 abort:
4884  return ret;
4885 }
4886 
4887 
4888 /************************************
4889  *** Grouping Lines into Blocks ***
4890  ************************************/
4891 
4892 static void*
4893 md_push_block_bytes(MD_CTX* ctx, int n_bytes)
4894 {
4895  void* ptr;
4896 
4897  if(ctx->n_block_bytes + n_bytes > ctx->alloc_block_bytes) {
4898  void* new_block_bytes;
4899 
4900  ctx->alloc_block_bytes = (ctx->alloc_block_bytes > 0
4901  ? ctx->alloc_block_bytes + ctx->alloc_block_bytes / 2
4902  : 512);
4903  new_block_bytes = realloc(ctx->block_bytes, ctx->alloc_block_bytes);
4904  if(new_block_bytes == NULL) {
4905  MD_LOG("realloc() failed.");
4906  return NULL;
4907  }
4908 
4909  /* Fix the ->current_block after the reallocation. */
4910  if(ctx->current_block != NULL) {
4911  OFF off_current_block = (OFF)((char*) ctx->current_block - (char*) ctx->block_bytes);
4912  ctx->current_block = (MD_BLOCK*) ((char*) new_block_bytes + off_current_block);
4913  }
4914 
4915  ctx->block_bytes = new_block_bytes;
4916  }
4917 
4918  ptr = (char*)ctx->block_bytes + ctx->n_block_bytes;
4919  ctx->n_block_bytes += n_bytes;
4920  return ptr;
4921 }
4922 
4923 static int
4924 md_start_new_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* line)
4925 {
4926  MD_BLOCK* block;
4927 
4928  MD_ASSERT(ctx->current_block == NULL);
4929 
4930  block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK));
4931  if(block == NULL)
4932  return -1;
4933 
4934  switch(line->type) {
4935  case MD_LINE_HR:
4936  block->type = MD_BLOCK_HR;
4937  break;
4938 
4939  case MD_LINE_ATXHEADER:
4940  case MD_LINE_SETEXTHEADER:
4941  block->type = MD_BLOCK_H;
4942  break;
4943 
4944  case MD_LINE_FENCEDCODE:
4945  case MD_LINE_INDENTEDCODE:
4946  block->type = MD_BLOCK_CODE;
4947  break;
4948 
4949  case MD_LINE_TEXT:
4950  block->type = MD_BLOCK_P;
4951  break;
4952 
4953  case MD_LINE_HTML:
4954  block->type = MD_BLOCK_HTML;
4955  break;
4956 
4957  case MD_LINE_BLANK:
4960  default:
4961  MD_UNREACHABLE();
4962  break;
4963  }
4964 
4965  block->flags = 0;
4966  block->data = line->data;
4967  block->n_lines = 0;
4968 
4969  ctx->current_block = block;
4970  return 0;
4971 }
4972 
4973 /* Eat from start of current (textual) block any reference definitions and
4974  * remember them so we can resolve any links referring to them.
4975  *
4976  * (Reference definitions can only be at start of it as they cannot break
4977  * a paragraph.)
4978  */
4979 static int
4980 md_consume_link_reference_definitions(MD_CTX* ctx)
4981 {
4982  MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
4983  int n_lines = ctx->current_block->n_lines;
4984  int n = 0;
4985 
4986  /* Compute how many lines at the start of the block form one or more
4987  * reference definitions. */
4988  while(n < n_lines) {
4989  int n_link_ref_lines;
4990 
4991  n_link_ref_lines = md_is_link_reference_definition(ctx,
4992  lines + n, n_lines - n);
4993  /* Not a reference definition? */
4994  if(n_link_ref_lines == 0)
4995  break;
4996 
4997  /* We fail if it is the ref. def. but it could not be stored due
4998  * a memory allocation error. */
4999  if(n_link_ref_lines < 0)
5000  return -1;
5001 
5002  n += n_link_ref_lines;
5003  }
5004 
5005  /* If there was at least one reference definition, we need to remove
5006  * its lines from the block, or perhaps even the whole block. */
5007  if(n > 0) {
5008  if(n == n_lines) {
5009  /* Remove complete block. */
5010  ctx->n_block_bytes -= n * sizeof(MD_LINE);
5011  ctx->n_block_bytes -= sizeof(MD_BLOCK);
5012  ctx->current_block = NULL;
5013  } else {
5014  /* Remove just some initial lines from the block. */
5015  memmove(lines, lines + n, (n_lines - n) * sizeof(MD_LINE));
5016  ctx->current_block->n_lines -= n;
5017  ctx->n_block_bytes -= n * sizeof(MD_LINE);
5018  }
5019  }
5020 
5021  return 0;
5022 }
5023 
5024 static int
5025 md_end_current_block(MD_CTX* ctx)
5026 {
5027  int ret = 0;
5028 
5029  if(ctx->current_block == NULL)
5030  return ret;
5031 
5032  /* Check whether there is a reference definition. (We do this here instead
5033  * of in md_analyze_line() because reference definition can take multiple
5034  * lines.) */
5035  if(ctx->current_block->type == MD_BLOCK_P ||
5037  {
5038  MD_LINE* lines = (MD_LINE*) (ctx->current_block + 1);
5039  if(CH(lines[0].beg) == _T('[')) {
5040  MD_CHECK(md_consume_link_reference_definitions(ctx));
5041  if(ctx->current_block == NULL)
5042  return ret;
5043  }
5044  }
5045 
5047  int n_lines = ctx->current_block->n_lines;
5048 
5049  if(n_lines > 1) {
5050  /* Get rid of the underline. */
5051  ctx->current_block->n_lines--;
5052  ctx->n_block_bytes -= sizeof(MD_LINE);
5053  } else {
5054  /* Only the underline has left after eating the ref. defs.
5055  * Keep the line as beginning of a new ordinary paragraph. */
5056  ctx->current_block->type = MD_BLOCK_P;
5057  return 0;
5058  }
5059  }
5060 
5061  /* Mark we are not building any block anymore. */
5062  ctx->current_block = NULL;
5063 
5064 abort:
5065  return ret;
5066 }
5067 
5068 static int
5069 md_add_line_into_current_block(MD_CTX* ctx, const MD_LINE_ANALYSIS* analysis)
5070 {
5071  MD_ASSERT(ctx->current_block != NULL);
5072 
5073  if(ctx->current_block->type == MD_BLOCK_CODE || ctx->current_block->type == MD_BLOCK_HTML) {
5075 
5076  line = (MD_VERBATIMLINE*) md_push_block_bytes(ctx, sizeof(MD_VERBATIMLINE));
5077  if(line == NULL)
5078  return -1;
5079 
5080  line->indent = analysis->indent;
5081  line->beg = analysis->beg;
5082  line->end = analysis->end;
5083  } else {
5084  MD_LINE* line;
5085 
5086  line = (MD_LINE*) md_push_block_bytes(ctx, sizeof(MD_LINE));
5087  if(line == NULL)
5088  return -1;
5089 
5090  line->beg = analysis->beg;
5091  line->end = analysis->end;
5092  }
5093  ctx->current_block->n_lines++;
5094 
5095  return 0;
5096 }
5097 
5098 static int
5099 md_push_container_bytes(MD_CTX* ctx, MD_BLOCKTYPE type, unsigned start,
5100  unsigned data, unsigned flags)
5101 {
5102  MD_BLOCK* block;
5103  int ret = 0;
5104 
5105  MD_CHECK(md_end_current_block(ctx));
5106 
5107  block = (MD_BLOCK*) md_push_block_bytes(ctx, sizeof(MD_BLOCK));
5108  if(block == NULL)
5109  return -1;
5110 
5111  block->type = type;
5112  block->flags = flags;
5113  block->data = data;
5114  block->n_lines = start;
5115 
5116 abort:
5117  return ret;
5118 }
5119 
5120 
5121 
5122 /***********************
5123  *** Line Analysis ***
5124  ***********************/
5125 
5126 static int
5127 md_is_hr_line(MD_CTX* ctx, OFF beg, OFF* p_end, OFF* p_killer)
5128 {
5129  OFF off = beg + 1;
5130  int n = 1;
5131 
5132  while(off < ctx->size && (CH(off) == CH(beg) || CH(off) == _T(' ') || CH(off) == _T('\t'))) {
5133  if(CH(off) == CH(beg))
5134  n++;
5135  off++;
5136  }
5137 
5138  if(n < 3) {
5139  *p_killer = off;
5140  return FALSE;
5141  }
5142 
5143  /* Nothing else can be present on the line. */
5144  if(off < ctx->size && !ISNEWLINE(off)) {
5145  *p_killer = off;
5146  return FALSE;
5147  }
5148 
5149  *p_end = off;
5150  return TRUE;
5151 }
5152 
5153 static int
5154 md_is_atxheader_line(MD_CTX* ctx, OFF beg, OFF* p_beg, OFF* p_end, unsigned* p_level)
5155 {
5156  int n;
5157  OFF off = beg + 1;
5158 
5159  while(off < ctx->size && CH(off) == _T('#') && off - beg < 7)
5160  off++;
5161  n = off - beg;
5162 
5163  if(n > 6)
5164  return FALSE;
5165  *p_level = n;
5166 
5167  if(!(ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS) && off < ctx->size &&
5168  CH(off) != _T(' ') && CH(off) != _T('\t') && !ISNEWLINE(off))
5169  return FALSE;
5170 
5171  while(off < ctx->size && CH(off) == _T(' '))
5172  off++;
5173  *p_beg = off;
5174  *p_end = off;
5175  return TRUE;
5176 }
5177 
5178 static int
5179 md_is_setext_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_level)
5180 {
5181  OFF off = beg + 1;
5182 
5183  while(off < ctx->size && CH(off) == CH(beg))
5184  off++;
5185 
5186  /* Optionally, space(s) can follow. */
5187  while(off < ctx->size && CH(off) == _T(' '))
5188  off++;
5189 
5190  /* But nothing more is allowed on the line. */
5191  if(off < ctx->size && !ISNEWLINE(off))
5192  return FALSE;
5193 
5194  *p_level = (CH(beg) == _T('=') ? 1 : 2);
5195  *p_end = off;
5196  return TRUE;
5197 }
5198 
5199 static int
5200 md_is_table_underline(MD_CTX* ctx, OFF beg, OFF* p_end, unsigned* p_col_count)
5201 {
5202  OFF off = beg;
5203  int found_pipe = FALSE;
5204  unsigned col_count = 0;
5205 
5206  if(off < ctx->size && CH(off) == _T('|')) {
5207  found_pipe = TRUE;
5208  off++;
5209  while(off < ctx->size && ISWHITESPACE(off))
5210  off++;
5211  }
5212 
5213  while(1) {
5214  OFF cell_beg;
5215  int delimited = FALSE;
5216 
5217  /* Cell underline ("-----", ":----", "----:" or ":----:") */
5218  cell_beg = off;
5219  if(off < ctx->size && CH(off) == _T(':'))
5220  off++;
5221  while(off < ctx->size && CH(off) == _T('-'))
5222  off++;
5223  if(off < ctx->size && CH(off) == _T(':'))
5224  off++;
5225  if(off - cell_beg < 3)
5226  return FALSE;
5227 
5228  col_count++;
5229 
5230  /* Pipe delimiter (optional at the end of line). */
5231  while(off < ctx->size && ISWHITESPACE(off))
5232  off++;
5233  if(off < ctx->size && CH(off) == _T('|')) {
5234  delimited = TRUE;
5235  found_pipe = TRUE;
5236  off++;
5237  while(off < ctx->size && ISWHITESPACE(off))
5238  off++;
5239  }
5240 
5241  /* Success, if we reach end of line. */
5242  if(off >= ctx->size || ISNEWLINE(off))
5243  break;
5244 
5245  if(!delimited)
5246  return FALSE;
5247  }
5248 
5249  if(!found_pipe)
5250  return FALSE;
5251 
5252  *p_end = off;
5253  *p_col_count = col_count;
5254  return TRUE;
5255 }
5256 
5257 static int
5258 md_is_opening_code_fence(MD_CTX* ctx, OFF beg, OFF* p_end)
5259 {
5260  OFF off = beg;
5261 
5262  while(off < ctx->size && CH(off) == CH(beg))
5263  off++;
5264 
5265  /* Fence must have at least three characters. */
5266  if(off - beg < 3)
5267  return FALSE;
5268 
5269  ctx->code_fence_length = off - beg;
5270 
5271  /* Optionally, space(s) can follow. */
5272  while(off < ctx->size && CH(off) == _T(' '))
5273  off++;
5274 
5275  /* Optionally, an info string can follow. */
5276  while(off < ctx->size && !ISNEWLINE(off)) {
5277  /* Backtick-based fence must not contain '`' in the info string. */
5278  if(CH(beg) == _T('`') && CH(off) == _T('`'))
5279  return FALSE;
5280  off++;
5281  }
5282 
5283  *p_end = off;
5284  return TRUE;
5285 }
5286 
5287 static int
5288 md_is_closing_code_fence(MD_CTX* ctx, CHAR ch, OFF beg, OFF* p_end)
5289 {
5290  OFF off = beg;
5291  int ret = FALSE;
5292 
5293  /* Closing fence must have at least the same length and use same char as
5294  * opening one. */
5295  while(off < ctx->size && CH(off) == ch)
5296  off++;
5297  if(off - beg < ctx->code_fence_length)
5298  goto out;
5299 
5300  /* Optionally, space(s) can follow */
5301  while(off < ctx->size && CH(off) == _T(' '))
5302  off++;
5303 
5304  /* But nothing more is allowed on the line. */
5305  if(off < ctx->size && !ISNEWLINE(off))
5306  goto out;
5307 
5308  ret = TRUE;
5309 
5310 out:
5311  /* Note we set *p_end even on failure: If we are not closing fence, caller
5312  * would eat the line anyway without any parsing. */
5313  *p_end = off;
5314  return ret;
5315 }
5316 
5317 /* Returns type of the raw HTML block, or FALSE if it is not HTML block.
5318  * (Refer to CommonMark specification for details about the types.)
5319  */
5320 static int
5321 md_is_html_block_start_condition(MD_CTX* ctx, OFF beg)
5322 {
5323  typedef struct TAG_tag TAG;
5324  struct TAG_tag {
5325  const CHAR* name;
5326  unsigned len : 8;
5327  };
5328 
5329  /* Type 6 is started by a long list of allowed tags. We use two-level
5330  * tree to speed-up the search. */
5331 #ifdef X
5332  #undef X
5333 #endif
5334 #define X(name) { _T(name), (sizeof(name)-1) / sizeof(CHAR) }
5335 #define Xend { NULL, 0 }
5336  static const TAG t1[] = { X("script"), X("pre"), X("style"), Xend };
5337 
5338  static const TAG a6[] = { X("address"), X("article"), X("aside"), Xend };
5339  static const TAG b6[] = { X("base"), X("basefont"), X("blockquote"), X("body"), Xend };
5340  static const TAG c6[] = { X("caption"), X("center"), X("col"), X("colgroup"), Xend };
5341  static const TAG d6[] = { X("dd"), X("details"), X("dialog"), X("dir"),
5342  X("div"), X("dl"), X("dt"), Xend };
5343  static const TAG f6[] = { X("fieldset"), X("figcaption"), X("figure"), X("footer"),
5344  X("form"), X("frame"), X("frameset"), Xend };
5345  static const TAG h6[] = { X("h1"), X("head"), X("header"), X("hr"), X("html"), Xend };
5346  static const TAG i6[] = { X("iframe"), Xend };
5347  static const TAG l6[] = { X("legend"), X("li"), X("link"), Xend };
5348  static const TAG m6[] = { X("main"), X("menu"), X("menuitem"), Xend };
5349  static const TAG n6[] = { X("nav"), X("noframes"), Xend };
5350  static const TAG o6[] = { X("ol"), X("optgroup"), X("option"), Xend };
5351  static const TAG p6[] = { X("p"), X("param"), Xend };
5352  static const TAG s6[] = { X("section"), X("source"), X("summary"), Xend };
5353  static const TAG t6[] = { X("table"), X("tbody"), X("td"), X("tfoot"), X("th"),
5354  X("thead"), X("title"), X("tr"), X("track"), Xend };
5355  static const TAG u6[] = { X("ul"), Xend };
5356  static const TAG xx[] = { Xend };
5357 #undef X
5358 
5359  static const TAG* map6[26] = {
5360  a6, b6, c6, d6, xx, f6, xx, h6, i6, xx, xx, l6, m6,
5361  n6, o6, p6, xx, xx, s6, t6, u6, xx, xx, xx, xx, xx
5362  };
5363  OFF off = beg + 1;
5364  int i;
5365 
5366  /* Check for type 1: <script, <pre, or <style */
5367  for(i = 0; t1[i].name != NULL; i++) {
5368  if(off + t1[i].len <= ctx->size) {
5369  if(md_ascii_case_eq(STR(off), t1[i].name, t1[i].len))
5370  return 1;
5371  }
5372  }
5373 
5374  /* Check for type 2: <!-- */
5375  if(off + 3 < ctx->size && CH(off) == _T('!') && CH(off+1) == _T('-') && CH(off+2) == _T('-'))
5376  return 2;
5377 
5378  /* Check for type 3: <? */
5379  if(off < ctx->size && CH(off) == _T('?'))
5380  return 3;
5381 
5382  /* Check for type 4 or 5: <! */
5383  if(off < ctx->size && CH(off) == _T('!')) {
5384  /* Check for type 4: <! followed by uppercase letter. */
5385  if(off + 1 < ctx->size && ISUPPER(off+1))
5386  return 4;
5387 
5388  /* Check for type 5: <![CDATA[ */
5389  if(off + 8 < ctx->size) {
5390  if(md_ascii_eq(STR(off), _T("![CDATA["), 8))
5391  return 5;
5392  }
5393  }
5394 
5395  /* Check for type 6: Many possible starting tags listed above. */
5396  if(off + 1 < ctx->size && (ISALPHA(off) || (CH(off) == _T('/') && ISALPHA(off+1)))) {
5397  int slot;
5398  const TAG* tags;
5399 
5400  if(CH(off) == _T('/'))
5401  off++;
5402 
5403  slot = (ISUPPER(off) ? CH(off) - 'A' : CH(off) - 'a');
5404  tags = map6[slot];
5405 
5406  for(i = 0; tags[i].name != NULL; i++) {
5407  if(off + tags[i].len <= ctx->size) {
5408  if(md_ascii_case_eq(STR(off), tags[i].name, tags[i].len)) {
5409  OFF tmp = off + tags[i].len;
5410  if(tmp >= ctx->size)
5411  return 6;
5412  if(ISBLANK(tmp) || ISNEWLINE(tmp) || CH(tmp) == _T('>'))
5413  return 6;
5414  if(tmp+1 < ctx->size && CH(tmp) == _T('/') && CH(tmp+1) == _T('>'))
5415  return 6;
5416  break;
5417  }
5418  }
5419  }
5420  }
5421 
5422  /* Check for type 7: any COMPLETE other opening or closing tag. */
5423  if(off + 1 < ctx->size) {
5424  OFF end;
5425 
5426  if(md_is_html_tag(ctx, NULL, 0, beg, ctx->size, &end)) {
5427  /* Only optional whitespace and new line may follow. */
5428  while(end < ctx->size && ISWHITESPACE(end))
5429  end++;
5430  if(end >= ctx->size || ISNEWLINE(end))
5431  return 7;
5432  }
5433  }
5434 
5435  return FALSE;
5436 }
5437 
5438 /* Case sensitive check whether there is a substring 'what' between 'beg'
5439  * and end of line. */
5440 static int
5441 md_line_contains(MD_CTX* ctx, OFF beg, const CHAR* what, SZ what_len, OFF* p_end)
5442 {
5443  OFF i;
5444  for(i = beg; i + what_len < ctx->size; i++) {
5445  if(ISNEWLINE(i))
5446  break;
5447  if(memcmp(STR(i), what, what_len * sizeof(CHAR)) == 0) {
5448  *p_end = i + what_len;
5449  return TRUE;
5450  }
5451  }
5452 
5453  *p_end = i;
5454  return FALSE;
5455 }
5456 
5457 /* Returns type of HTML block end condition or FALSE if not an end condition.
5458  *
5459  * Note it fills p_end even when it is not end condition as the caller
5460  * does not need to analyze contents of a raw HTML block.
5461  */
5462 static int
5463 md_is_html_block_end_condition(MD_CTX* ctx, OFF beg, OFF* p_end)
5464 {
5465  switch(ctx->html_block_type) {
5466  case 1:
5467  {
5468  OFF off = beg;
5469 
5470  while(off < ctx->size && !ISNEWLINE(off)) {
5471  if(CH(off) == _T('<')) {
5472  if(md_ascii_case_eq(STR(off), _T("</script>"), 9)) {
5473  *p_end = off + 9;
5474  return TRUE;
5475  }
5476 
5477  if(md_ascii_case_eq(STR(off), _T("</style>"), 8)) {
5478  *p_end = off + 8;
5479  return TRUE;
5480  }
5481 
5482  if(md_ascii_case_eq(STR(off), _T("</pre>"), 6)) {
5483  *p_end = off + 6;
5484  return TRUE;
5485  }
5486  }
5487 
5488  off++;
5489  }
5490  *p_end = off;
5491  return FALSE;
5492  }
5493 
5494  case 2:
5495  return (md_line_contains(ctx, beg, _T("-->"), 3, p_end) ? 2 : FALSE);
5496 
5497  case 3:
5498  return (md_line_contains(ctx, beg, _T("?>"), 2, p_end) ? 3 : FALSE);
5499 
5500  case 4:
5501  return (md_line_contains(ctx, beg, _T(">"), 1, p_end) ? 4 : FALSE);
5502 
5503  case 5:
5504  return (md_line_contains(ctx, beg, _T("]]>"), 3, p_end) ? 5 : FALSE);
5505 
5506  case 6: /* Pass through */
5507  case 7:
5508  *p_end = beg;
5509  return (ISNEWLINE(beg) ? ctx->html_block_type : FALSE);
5510 
5511  default:
5512  MD_UNREACHABLE();
5513  }
5514  return FALSE;
5515 }
5516 
5517 
5518 static int
5519 md_is_container_compatible(const MD_CONTAINER* pivot, const MD_CONTAINER* container)
5520 {
5521  /* Block quote has no "items" like lists. */
5522  if(container->ch == _T('>'))
5523  return FALSE;
5524 
5525  if(container->ch != pivot->ch)
5526  return FALSE;
5527  if(container->mark_indent > pivot->contents_indent)
5528  return FALSE;
5529 
5530  return TRUE;
5531 }
5532 
5533 static int
5534 md_push_container(MD_CTX* ctx, const MD_CONTAINER* container)
5535 {
5536  if(ctx->n_containers >= ctx->alloc_containers) {
5537  MD_CONTAINER* new_containers;
5538 
5539  ctx->alloc_containers = (ctx->alloc_containers > 0
5540  ? ctx->alloc_containers + ctx->alloc_containers / 2
5541  : 16);
5542  new_containers = realloc(ctx->containers, ctx->alloc_containers * sizeof(MD_CONTAINER));
5543  if(new_containers == NULL) {
5544  MD_LOG("realloc() failed.");
5545  return -1;
5546  }
5547 
5548  ctx->containers = new_containers;
5549  }
5550 
5551  memcpy(&ctx->containers[ctx->n_containers++], container, sizeof(MD_CONTAINER));
5552  return 0;
5553 }
5554 
5555 static int
5556 md_enter_child_containers(MD_CTX* ctx, int n_children)
5557 {
5558  int i;
5559  int ret = 0;
5560 
5561  for(i = ctx->n_containers - n_children; i < ctx->n_containers; i++) {
5562  MD_CONTAINER* c = &ctx->containers[i];
5563  int is_ordered_list = FALSE;
5564 
5565  switch(c->ch) {
5566  case _T(')'):
5567  case _T('.'):
5568  is_ordered_list = TRUE;
5569  MD_FALLTHROUGH();
5570 
5571  case _T('-'):
5572  case _T('+'):
5573  case _T('*'):
5574  /* Remember offset in ctx->block_bytes so we can revisit the
5575  * block if we detect it is a loose list. */
5576  md_end_current_block(ctx);
5577  c->block_byte_off = ctx->n_block_bytes;
5578 
5579  MD_CHECK(md_push_container_bytes(ctx,
5580  (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL),
5581  c->start, c->ch, MD_BLOCK_CONTAINER_OPENER));
5582  MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5583  c->task_mark_off,
5584  (c->is_task ? CH(c->task_mark_off) : 0),
5586  break;
5587 
5588  case _T('>'):
5589  MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0, 0, MD_BLOCK_CONTAINER_OPENER));
5590  break;
5591 
5592  default:
5593  MD_UNREACHABLE();
5594  break;
5595  }
5596  }
5597 
5598 abort:
5599  return ret;
5600 }
5601 
5602 static int
5603 md_leave_child_containers(MD_CTX* ctx, int n_keep)
5604 {
5605  int ret = 0;
5606 
5607  while(ctx->n_containers > n_keep) {
5608  MD_CONTAINER* c = &ctx->containers[ctx->n_containers-1];
5609  int is_ordered_list = FALSE;
5610 
5611  switch(c->ch) {
5612  case _T(')'):
5613  case _T('.'):
5614  is_ordered_list = TRUE;
5615  MD_FALLTHROUGH();
5616 
5617  case _T('-'):
5618  case _T('+'):
5619  case _T('*'):
5620  MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
5621  c->task_mark_off, (c->is_task ? CH(c->task_mark_off) : 0),
5623  MD_CHECK(md_push_container_bytes(ctx,
5624  (is_ordered_list ? MD_BLOCK_OL : MD_BLOCK_UL), 0,
5626  break;
5627 
5628  case _T('>'):
5629  MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_QUOTE, 0,
5631  break;
5632 
5633  default:
5634  MD_UNREACHABLE();
5635  break;
5636  }
5637 
5638  ctx->n_containers--;
5639  }
5640 
5641 abort:
5642  return ret;
5643 }
5644 
5645 static int
5646 md_is_container_mark(MD_CTX* ctx, unsigned indent, OFF beg, OFF* p_end, MD_CONTAINER* p_container)
5647 {
5648  OFF off = beg;
5649  OFF max_end;
5650 
5651  if(off >= ctx->size || indent >= ctx->code_indent_offset)
5652  return FALSE;
5653 
5654  /* Check for block quote mark. */
5655  if(CH(off) == _T('>')) {
5656  off++;
5657  p_container->ch = _T('>');
5658  p_container->is_loose = FALSE;
5659  p_container->is_task = FALSE;
5660  p_container->mark_indent = indent;
5661  p_container->contents_indent = indent + 1;
5662  *p_end = off;
5663  return TRUE;
5664  }
5665 
5666  /* Check for list item bullet mark. */
5667  if(ISANYOF(off, _T("-+*")) && (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1))) {
5668  p_container->ch = CH(off);
5669  p_container->is_loose = FALSE;
5670  p_container->is_task = FALSE;
5671  p_container->mark_indent = indent;
5672  p_container->contents_indent = indent + 1;
5673  *p_end = off+1;
5674  return TRUE;
5675  }
5676 
5677  /* Check for ordered list item marks. */
5678  max_end = off + 9;
5679  if(max_end > ctx->size)
5680  max_end = ctx->size;
5681  p_container->start = 0;
5682  while(off < max_end && ISDIGIT(off)) {
5683  p_container->start = p_container->start * 10 + CH(off) - _T('0');
5684  off++;
5685  }
5686  if(off > beg &&
5687  (CH(off) == _T('.') || CH(off) == _T(')')) &&
5688  (off+1 >= ctx->size || ISBLANK(off+1) || ISNEWLINE(off+1)))
5689  {
5690  p_container->ch = CH(off);
5691  p_container->is_loose = FALSE;
5692  p_container->is_task = FALSE;
5693  p_container->mark_indent = indent;
5694  p_container->contents_indent = indent + off - beg + 1;
5695  *p_end = off+1;
5696  return TRUE;
5697  }
5698 
5699  return FALSE;
5700 }
5701 
5702 static unsigned
5703 md_line_indentation(MD_CTX* ctx, unsigned total_indent, OFF beg, OFF* p_end)
5704 {
5705  OFF off = beg;
5706  unsigned indent = total_indent;
5707 
5708  while(off < ctx->size && ISBLANK(off)) {
5709  if(CH(off) == _T('\t'))
5710  indent = (indent + 4) & ~3;
5711  else
5712  indent++;
5713  off++;
5714  }
5715 
5716  *p_end = off;
5717  return indent - total_indent;
5718 }
5719 
5720 static const MD_LINE_ANALYSIS md_dummy_blank_line = { MD_LINE_BLANK, 0, 0, 0, 0 };
5721 
5722 /* Analyze type of the line and find some its properties. This serves as a
5723  * main input for determining type and boundaries of a block. */
5724 static int
5725 md_analyze_line(MD_CTX* ctx, OFF beg, OFF* p_end,
5726  const MD_LINE_ANALYSIS* pivot_line, MD_LINE_ANALYSIS* line)
5727 {
5728  unsigned total_indent = 0;
5729  int n_parents = 0;
5730  int n_brothers = 0;
5731  int n_children = 0;
5732  MD_CONTAINER container = { 0 };
5733  int prev_line_has_list_loosening_effect = ctx->last_line_has_list_loosening_effect;
5734  OFF off = beg;
5735  OFF hr_killer = 0;
5736  int ret = 0;
5737 
5738  line->indent = md_line_indentation(ctx, total_indent, off, &off);
5739  total_indent += line->indent;
5740  line->beg = off;
5741 
5742  /* Given the indentation and block quote marks '>', determine how many of
5743  * the current containers are our parents. */
5744  while(n_parents < ctx->n_containers) {
5745  MD_CONTAINER* c = &ctx->containers[n_parents];
5746 
5747  if(c->ch == _T('>') && line->indent < ctx->code_indent_offset &&
5748  off < ctx->size && CH(off) == _T('>'))
5749  {
5750  /* Block quote mark. */
5751  off++;
5752  total_indent++;
5753  line->indent = md_line_indentation(ctx, total_indent, off, &off);
5754  total_indent += line->indent;
5755 
5756  /* The optional 1st space after '>' is part of the block quote mark. */
5757  if(line->indent > 0)
5758  line->indent--;
5759 
5760  line->beg = off;
5761 
5762  } else if(c->ch != _T('>') && line->indent >= c->contents_indent) {
5763  /* List. */
5764  line->indent -= c->contents_indent;
5765  } else {
5766  break;
5767  }
5768 
5769  n_parents++;
5770  }
5771 
5772  if(off >= ctx->size || ISNEWLINE(off)) {
5773  /* Blank line does not need any real indentation to be nested inside
5774  * a list. */
5775  if(n_brothers + n_children == 0) {
5776  while(n_parents < ctx->n_containers && ctx->containers[n_parents].ch != _T('>'))
5777  n_parents++;
5778  }
5779  }
5780 
5781  while(TRUE) {
5782  /* Check whether we are fenced code continuation. */
5783  if(pivot_line->type == MD_LINE_FENCEDCODE) {
5784  line->beg = off;
5785 
5786  /* We are another MD_LINE_FENCEDCODE unless we are closing fence
5787  * which we transform into MD_LINE_BLANK. */
5788  if(line->indent < ctx->code_indent_offset) {
5789  if(md_is_closing_code_fence(ctx, CH(pivot_line->beg), off, &off)) {
5790  line->type = MD_LINE_BLANK;
5792  break;
5793  }
5794  }
5795 
5796  /* Change indentation accordingly to the initial code fence. */
5797  if(n_parents == ctx->n_containers) {
5798  if(line->indent > pivot_line->indent)
5799  line->indent -= pivot_line->indent;
5800  else
5801  line->indent = 0;
5802 
5803  line->type = MD_LINE_FENCEDCODE;
5804  break;
5805  }
5806  }
5807 
5808  /* Check whether we are HTML block continuation. */
5809  if(pivot_line->type == MD_LINE_HTML && ctx->html_block_type > 0) {
5810  if(n_parents < ctx->n_containers) {
5811  /* HTML block is implicitly ended if the enclosing container
5812  * block ends. */
5813  ctx->html_block_type = 0;
5814  } else {
5815  int html_block_type;
5816 
5817  html_block_type = md_is_html_block_end_condition(ctx, off, &off);
5818  if(html_block_type > 0) {
5819  MD_ASSERT(html_block_type == ctx->html_block_type);
5820 
5821  /* Make sure this is the last line of the block. */
5822  ctx->html_block_type = 0;
5823 
5824  /* Some end conditions serve as blank lines at the same time. */
5825  if(html_block_type == 6 || html_block_type == 7) {
5826  line->type = MD_LINE_BLANK;
5827  line->indent = 0;
5828  break;
5829  }
5830  }
5831 
5832  line->type = MD_LINE_HTML;
5833  n_parents = ctx->n_containers;
5834  break;
5835  }
5836  }
5837 
5838  /* Check for blank line. */
5839  if(off >= ctx->size || ISNEWLINE(off)) {
5840  if(pivot_line->type == MD_LINE_INDENTEDCODE && n_parents == ctx->n_containers) {
5841  line->type = MD_LINE_INDENTEDCODE;
5842  if(line->indent > ctx->code_indent_offset)
5843  line->indent -= ctx->code_indent_offset;
5844  else
5845  line->indent = 0;
5847  } else {
5848  line->type = MD_LINE_BLANK;
5849  ctx->last_line_has_list_loosening_effect = (n_parents > 0 &&
5850  n_brothers + n_children == 0 &&
5851  ctx->containers[n_parents-1].ch != _T('>'));
5852 
5853  #if 1
5854  /* See https://github.com/mity/md4c/issues/6
5855  *
5856  * This ugly checking tests we are in (yet empty) list item but
5857  * not its very first line (i.e. not the line with the list
5858  * item mark).
5859  *
5860  * If we are such a blank line, then any following non-blank
5861  * line which would be part of the list item actually has to
5862  * end the list because according to the specification, "a list
5863  * item can begin with at most one blank line."
5864  */
5865  if(n_parents > 0 && ctx->containers[n_parents-1].ch != _T('>') &&
5866  n_brothers + n_children == 0 && ctx->current_block == NULL &&
5867  ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
5868  {
5869  MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK));
5870  if(top_block->type == MD_BLOCK_LI)
5872  }
5873  #endif
5874  }
5875  break;
5876  } else {
5877  #if 1
5878  /* This is the 2nd half of the hack. If the flag is set (i.e. there
5879  * was a 2nd blank line at the beginning of the list item) and if
5880  * we would otherwise still belong to the list item, we enforce
5881  * the end of the list. */
5884  if(n_parents > 0 && ctx->containers[n_parents-1].ch != _T('>') &&
5885  n_brothers + n_children == 0 && ctx->current_block == NULL &&
5886  ctx->n_block_bytes > (int) sizeof(MD_BLOCK))
5887  {
5888  MD_BLOCK* top_block = (MD_BLOCK*) ((char*)ctx->block_bytes + ctx->n_block_bytes - sizeof(MD_BLOCK));
5889  if(top_block->type == MD_BLOCK_LI)
5890  n_parents--;
5891  }
5892 
5894  }
5895  #endif
5896  }
5897 
5898  /* Check whether we are Setext underline. */
5899  if(line->indent < ctx->code_indent_offset && pivot_line->type == MD_LINE_TEXT
5900  && off < ctx->size && ISANYOF2(off, _T('='), _T('-'))
5901  && (n_parents == ctx->n_containers))
5902  {
5903  unsigned level;
5904 
5905  if(md_is_setext_underline(ctx, off, &off, &level)) {
5906  line->type = MD_LINE_SETEXTUNDERLINE;
5907  line->data = level;
5908  break;
5909  }
5910  }
5911 
5912  /* Check for thematic break line. */
5913  if(line->indent < ctx->code_indent_offset
5914  && off < ctx->size && off >= hr_killer
5915  && ISANYOF(off, _T("-_*")))
5916  {
5917  if(md_is_hr_line(ctx, off, &off, &hr_killer)) {
5918  line->type = MD_LINE_HR;
5919  break;
5920  }
5921  }
5922 
5923  /* Check for "brother" container. I.e. whether we are another list item
5924  * in already started list. */
5925  if(n_parents < ctx->n_containers && n_brothers + n_children == 0) {
5926  OFF tmp;
5927 
5928  if(md_is_container_mark(ctx, line->indent, off, &tmp, &container) &&
5929  md_is_container_compatible(&ctx->containers[n_parents], &container))
5930  {
5931  pivot_line = &md_dummy_blank_line;
5932 
5933  off = tmp;
5934 
5935  total_indent += container.contents_indent - container.mark_indent;
5936  line->indent = md_line_indentation(ctx, total_indent, off, &off);
5937  total_indent += line->indent;
5938  line->beg = off;
5939 
5940  /* Some of the following whitespace actually still belongs to the mark. */
5941  if(off >= ctx->size || ISNEWLINE(off)) {
5942  container.contents_indent++;
5943  } else if(line->indent <= ctx->code_indent_offset) {
5944  container.contents_indent += line->indent;
5945  line->indent = 0;
5946  } else {
5947  container.contents_indent += 1;
5948  line->indent--;
5949  }
5950 
5951  ctx->containers[n_parents].mark_indent = container.mark_indent;
5952  ctx->containers[n_parents].contents_indent = container.contents_indent;
5953 
5954  n_brothers++;
5955  continue;
5956  }
5957  }
5958 
5959  /* Check for indented code.
5960  * Note indented code block cannot interrupt a paragraph. */
5961  if(line->indent >= ctx->code_indent_offset &&
5962  (pivot_line->type == MD_LINE_BLANK || pivot_line->type == MD_LINE_INDENTEDCODE))
5963  {
5964  line->type = MD_LINE_INDENTEDCODE;
5965  MD_ASSERT(line->indent >= ctx->code_indent_offset);
5966  line->indent -= ctx->code_indent_offset;
5967  line->data = 0;
5968  break;
5969  }
5970 
5971  /* Check for start of a new container block. */
5972  if(line->indent < ctx->code_indent_offset &&
5973  md_is_container_mark(ctx, line->indent, off, &off, &container))
5974  {
5975  if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers &&
5976  (off >= ctx->size || ISNEWLINE(off)) && container.ch != _T('>'))
5977  {
5978  /* Noop. List mark followed by a blank line cannot interrupt a paragraph. */
5979  } else if(pivot_line->type == MD_LINE_TEXT && n_parents == ctx->n_containers &&
5980  ISANYOF2_(container.ch, _T('.'), _T(')')) && container.start != 1)
5981  {
5982  /* Noop. Ordered list cannot interrupt a paragraph unless the start index is 1. */
5983  } else {
5984  total_indent += container.contents_indent - container.mark_indent;
5985  line->indent = md_line_indentation(ctx, total_indent, off, &off);
5986  total_indent += line->indent;
5987 
5988  line->beg = off;
5989  line->data = container.ch;
5990 
5991  /* Some of the following whitespace actually still belongs to the mark. */
5992  if(off >= ctx->size || ISNEWLINE(off)) {
5993  container.contents_indent++;
5994  } else if(line->indent <= ctx->code_indent_offset) {
5995  container.contents_indent += line->indent;
5996  line->indent = 0;
5997  } else {
5998  container.contents_indent += 1;
5999  line->indent--;
6000  }
6001 
6002  if(n_brothers + n_children == 0)
6003  pivot_line = &md_dummy_blank_line;
6004 
6005  if(n_children == 0)
6006  MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
6007 
6008  n_children++;
6009  MD_CHECK(md_push_container(ctx, &container));
6010  continue;
6011  }
6012  }
6013 
6014  /* Check whether we are table continuation. */
6015  if(pivot_line->type == MD_LINE_TABLE && n_parents == ctx->n_containers) {
6016  line->type = MD_LINE_TABLE;
6017  break;
6018  }
6019 
6020  /* Check for ATX header. */
6021  if(line->indent < ctx->code_indent_offset &&
6022  off < ctx->size && CH(off) == _T('#'))
6023  {
6024  unsigned level;
6025 
6026  if(md_is_atxheader_line(ctx, off, &line->beg, &off, &level)) {
6027  line->type = MD_LINE_ATXHEADER;
6028  line->data = level;
6029  break;
6030  }
6031  }
6032 
6033  /* Check whether we are starting code fence. */
6034  if(off < ctx->size && ISANYOF2(off, _T('`'), _T('~'))) {
6035  if(md_is_opening_code_fence(ctx, off, &off)) {
6036  line->type = MD_LINE_FENCEDCODE;
6037  line->data = 1;
6038  break;
6039  }
6040  }
6041 
6042  /* Check for start of raw HTML block. */
6043  if(off < ctx->size && CH(off) == _T('<')
6044  && !(ctx->parser.flags & MD_FLAG_NOHTMLBLOCKS))
6045  {
6046  ctx->html_block_type = md_is_html_block_start_condition(ctx, off);
6047 
6048  /* HTML block type 7 cannot interrupt paragraph. */
6049  if(ctx->html_block_type == 7 && pivot_line->type == MD_LINE_TEXT)
6050  ctx->html_block_type = 0;
6051 
6052  if(ctx->html_block_type > 0) {
6053  /* The line itself also may immediately close the block. */
6054  if(md_is_html_block_end_condition(ctx, off, &off) == ctx->html_block_type) {
6055  /* Make sure this is the last line of the block. */
6056  ctx->html_block_type = 0;
6057  }
6058 
6059  line->type = MD_LINE_HTML;
6060  break;
6061  }
6062  }
6063 
6064  /* Check for table underline. */
6065  if((ctx->parser.flags & MD_FLAG_TABLES) && pivot_line->type == MD_LINE_TEXT
6066  && off < ctx->size && ISANYOF3(off, _T('|'), _T('-'), _T(':'))
6067  && n_parents == ctx->n_containers)
6068  {
6069  unsigned col_count;
6070 
6071  if(ctx->current_block != NULL && ctx->current_block->n_lines == 1 &&
6072  md_is_table_underline(ctx, off, &off, &col_count))
6073  {
6074  line->data = col_count;
6075  line->type = MD_LINE_TABLEUNDERLINE;
6076  break;
6077  }
6078  }
6079 
6080  /* By default, we are normal text line. */
6081  line->type = MD_LINE_TEXT;
6082  if(pivot_line->type == MD_LINE_TEXT && n_brothers + n_children == 0) {
6083  /* Lazy continuation. */
6084  n_parents = ctx->n_containers;
6085  }
6086 
6087  /* Check for task mark. */
6088  if((ctx->parser.flags & MD_FLAG_TASKLISTS) && n_brothers + n_children > 0 &&
6089  ISANYOF_(ctx->containers[ctx->n_containers-1].ch, _T("-+*.)")))
6090  {
6091  OFF tmp = off;
6092 
6093  while(tmp < ctx->size && tmp < off + 3 && ISBLANK(tmp))
6094  tmp++;
6095  if(tmp + 2 < ctx->size && CH(tmp) == _T('[') &&
6096  ISANYOF(tmp+1, _T("xX ")) && CH(tmp+2) == _T(']') &&
6097  (tmp + 3 == ctx->size || ISBLANK(tmp+3) || ISNEWLINE(tmp+3)))
6098  {
6099  MD_CONTAINER* task_container = (n_children > 0 ? &ctx->containers[ctx->n_containers-1] : &container);
6100  task_container->is_task = TRUE;
6101  task_container->task_mark_off = tmp + 1;
6102  off = tmp + 3;
6103  while(ISWHITESPACE(off))
6104  off++;
6105  line->beg = off;
6106  }
6107  }
6108 
6109  break;
6110  }
6111 
6112  /* Scan for end of the line.
6113  *
6114  * Note this is quite a bottleneck of the parsing as we here iterate almost
6115  * over compete document.
6116  */
6117 #if defined __linux__ && !defined MD4C_USE_UTF16
6118  /* Recent glibc versions have superbly optimized strcspn(), even using
6119  * vectorization if available. */
6120  if(ctx->doc_ends_with_newline && off < ctx->size) {
6121  while(TRUE) {
6122  off += (OFF) strcspn(STR(off), "\r\n");
6123 
6124  /* strcspn() can stop on zero terminator; but that can appear
6125  * anywhere in the Markfown input... */
6126  if(CH(off) == _T('\0'))
6127  off++;
6128  else
6129  break;
6130  }
6131  } else
6132 #endif
6133  {
6134  /* Optimization: Use some loop unrolling. */
6135  while(off + 3 < ctx->size && !ISNEWLINE(off+0) && !ISNEWLINE(off+1)
6136  && !ISNEWLINE(off+2) && !ISNEWLINE(off+3))
6137  off += 4;
6138  while(off < ctx->size && !ISNEWLINE(off))
6139  off++;
6140  }
6141 
6142  /* Set end of the line. */
6143  line->end = off;
6144 
6145  /* But for ATX header, we should exclude the optional trailing mark. */
6146  if(line->type == MD_LINE_ATXHEADER) {
6147  OFF tmp = line->end;
6148  while(tmp > line->beg && CH(tmp-1) == _T(' '))
6149  tmp--;
6150  while(tmp > line->beg && CH(tmp-1) == _T('#'))
6151  tmp--;
6152  if(tmp == line->beg || CH(tmp-1) == _T(' ') || (ctx->parser.flags & MD_FLAG_PERMISSIVEATXHEADERS))
6153  line->end = tmp;
6154  }
6155 
6156  /* Trim trailing spaces. */
6157  if(line->type != MD_LINE_INDENTEDCODE && line->type != MD_LINE_FENCEDCODE) {
6158  while(line->end > line->beg && CH(line->end-1) == _T(' '))
6159  line->end--;
6160  }
6161 
6162  /* Eat also the new line. */
6163  if(off < ctx->size && CH(off) == _T('\r'))
6164  off++;
6165  if(off < ctx->size && CH(off) == _T('\n'))
6166  off++;
6167 
6168  *p_end = off;
6169 
6170  /* If we belong to a list after seeing a blank line, the list is loose. */
6171  if(prev_line_has_list_loosening_effect && line->type != MD_LINE_BLANK && n_parents + n_brothers > 0) {
6172  MD_CONTAINER* c = &ctx->containers[n_parents + n_brothers - 1];
6173  if(c->ch != _T('>')) {
6174  MD_BLOCK* block = (MD_BLOCK*) (((char*)ctx->block_bytes) + c->block_byte_off);
6175  block->flags |= MD_BLOCK_LOOSE_LIST;
6176  }
6177  }
6178 
6179  /* Leave any containers we are not part of anymore. */
6180  if(n_children == 0 && n_parents + n_brothers < ctx->n_containers)
6181  MD_CHECK(md_leave_child_containers(ctx, n_parents + n_brothers));
6182 
6183  /* Enter any container we found a mark for. */
6184  if(n_brothers > 0) {
6185  MD_ASSERT(n_brothers == 1);
6186  MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
6187  ctx->containers[n_parents].task_mark_off,
6188  (ctx->containers[n_parents].is_task ? CH(ctx->containers[n_parents].task_mark_off) : 0),
6190  MD_CHECK(md_push_container_bytes(ctx, MD_BLOCK_LI,
6191  container.task_mark_off,
6192  (container.is_task ? CH(container.task_mark_off) : 0),
6194  ctx->containers[n_parents].is_task = container.is_task;
6195  ctx->containers[n_parents].task_mark_off = container.task_mark_off;
6196  }
6197 
6198  if(n_children > 0)
6199  MD_CHECK(md_enter_child_containers(ctx, n_children));
6200 
6201 abort:
6202  return ret;
6203 }
6204 
6205 static int
6206 md_process_line(MD_CTX* ctx, const MD_LINE_ANALYSIS** p_pivot_line, MD_LINE_ANALYSIS* line)
6207 {
6208  const MD_LINE_ANALYSIS* pivot_line = *p_pivot_line;
6209  int ret = 0;
6210 
6211  /* Blank line ends current leaf block. */
6212  if(line->type == MD_LINE_BLANK) {
6213  MD_CHECK(md_end_current_block(ctx));
6214  *p_pivot_line = &md_dummy_blank_line;
6215  return 0;
6216  }
6217 
6218  /* Some line types form block on their own. */
6219  if(line->type == MD_LINE_HR || line->type == MD_LINE_ATXHEADER) {
6220  MD_CHECK(md_end_current_block(ctx));
6221 
6222  /* Add our single-line block. */
6223  MD_CHECK(md_start_new_block(ctx, line));
6224  MD_CHECK(md_add_line_into_current_block(ctx, line));
6225  MD_CHECK(md_end_current_block(ctx));
6226  *p_pivot_line = &md_dummy_blank_line;
6227  return 0;
6228  }
6229 
6230  /* MD_LINE_SETEXTUNDERLINE changes meaning of the current block and ends it. */
6231  if(line->type == MD_LINE_SETEXTUNDERLINE) {
6232  MD_ASSERT(ctx->current_block != NULL);
6233  ctx->current_block->type = MD_BLOCK_H;
6234  ctx->current_block->data = line->data;
6236  MD_CHECK(md_add_line_into_current_block(ctx, line));
6237  MD_CHECK(md_end_current_block(ctx));
6238  if(ctx->current_block == NULL) {
6239  *p_pivot_line = &md_dummy_blank_line;
6240  } else {
6241  /* This happens if we have consumed all the body as link ref. defs.
6242  * and downgraded the underline into start of a new paragraph block. */
6243  line->type = MD_LINE_TEXT;
6244  *p_pivot_line = line;
6245  }
6246  return 0;
6247  }
6248 
6249  /* MD_LINE_TABLEUNDERLINE changes meaning of the current block. */
6250  if(line->type == MD_LINE_TABLEUNDERLINE) {
6251  MD_ASSERT(ctx->current_block != NULL);
6252  MD_ASSERT(ctx->current_block->n_lines == 1);
6254  ctx->current_block->data = line->data;
6255  MD_ASSERT(pivot_line != &md_dummy_blank_line);
6256  ((MD_LINE_ANALYSIS*)pivot_line)->type = MD_LINE_TABLE;
6257  MD_CHECK(md_add_line_into_current_block(ctx, line));
6258  return 0;
6259  }
6260 
6261  /* The current block also ends if the line has different type. */
6262  if(line->type != pivot_line->type)
6263  MD_CHECK(md_end_current_block(ctx));
6264 
6265  /* The current line may start a new block. */
6266  if(ctx->current_block == NULL) {
6267  MD_CHECK(md_start_new_block(ctx, line));
6268  *p_pivot_line = line;
6269  }
6270 
6271  /* In all other cases the line is just a continuation of the current block. */
6272  MD_CHECK(md_add_line_into_current_block(ctx, line));
6273 
6274 abort:
6275  return ret;
6276 }
6277 
6278 static int
6279 md_process_doc(MD_CTX *ctx)
6280 {
6281  const MD_LINE_ANALYSIS* pivot_line = &md_dummy_blank_line;
6282  MD_LINE_ANALYSIS line_buf[2];
6283  MD_LINE_ANALYSIS* line = &line_buf[0];
6284  OFF off = 0;
6285  int ret = 0;
6286 
6288 
6289  while(off < ctx->size) {
6290  if(line == pivot_line)
6291  line = (line == &line_buf[0] ? &line_buf[1] : &line_buf[0]);
6292 
6293  MD_CHECK(md_analyze_line(ctx, off, &off, pivot_line, line));
6294  MD_CHECK(md_process_line(ctx, &pivot_line, line));
6295  }
6296 
6297  md_end_current_block(ctx);
6298 
6299  MD_CHECK(md_build_ref_def_hashtable(ctx));
6300 
6301  /* Process all blocks. */
6302  MD_CHECK(md_leave_child_containers(ctx, 0));
6303  MD_CHECK(md_process_all_blocks(ctx));
6304 
6306 
6307 abort:
6308 
6309 #if 0
6310  /* Output some memory consumption statistics. */
6311  {
6312  char buffer[256];
6313  sprintf(buffer, "Alloced %u bytes for block buffer.",
6314  (unsigned)(ctx->alloc_block_bytes));
6315  MD_LOG(buffer);
6316 
6317  sprintf(buffer, "Alloced %u bytes for containers buffer.",
6318  (unsigned)(ctx->alloc_containers * sizeof(MD_CONTAINER)));
6319  MD_LOG(buffer);
6320 
6321  sprintf(buffer, "Alloced %u bytes for marks buffer.",
6322  (unsigned)(ctx->alloc_marks * sizeof(MD_MARK)));
6323  MD_LOG(buffer);
6324 
6325  sprintf(buffer, "Alloced %u bytes for aux. buffer.",
6326  (unsigned)(ctx->alloc_buffer * sizeof(MD_CHAR)));
6327  MD_LOG(buffer);
6328  }
6329 #endif
6330 
6331  return ret;
6332 }
6333 
6334 
6335 /********************
6336  *** Public API ***
6337  ********************/
6338 
6339 int
6340 md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userdata)
6341 {
6342  MD_CTX ctx;
6343  int i;
6344  int ret;
6345 
6346  if(parser->abi_version != 0) {
6347  if(parser->debug_log != NULL)
6348  parser->debug_log("Unsupported abi_version.", userdata);
6349  return -1;
6350  }
6351 
6352  /* Setup context structure. */
6353  memset(&ctx, 0, sizeof(MD_CTX));
6354  ctx.text = text;
6355  ctx.size = size;
6356  memcpy(&ctx.parser, parser, sizeof(MD_PARSER));
6357  ctx.userdata = userdata;
6359  md_build_mark_char_map(&ctx);
6360  ctx.doc_ends_with_newline = (size > 0 && ISNEWLINE_(text[size-1]));
6361 
6362  /* Reset all unresolved opener mark chains. */
6363  for(i = 0; i < (int) SIZEOF_ARRAY(ctx.mark_chains); i++) {
6364  ctx.mark_chains[i].head = -1;
6365  ctx.mark_chains[i].tail = -1;
6366  }
6367  ctx.unresolved_link_head = -1;
6368  ctx.unresolved_link_tail = -1;
6369 
6370  /* All the work. */
6371  ret = md_process_doc(&ctx);
6372 
6373  /* Clean-up. */
6374  md_free_ref_defs(&ctx);
6375  md_free_ref_def_hashtable(&ctx);
6376  free(ctx.buffer);
6377  free(ctx.marks);
6378  free(ctx.block_bytes);
6379  free(ctx.containers);
6380 
6381  return ret;
6382 }
small capitals from c petite p scientific i
[1]
Definition: afcover.h:80
char * data()
iterator end()
Definition: qstring.h:1339
QChar * data()
Definition: qstring.h:1228
Definition: base.h:37
QHash< int, QWidget * > hash
[35multi]
QMap< QString, QString > map
[6]
QString str
[2]
QString text
[meta data]
#define NULL
Definition: ftobjs.h:61
backing_store_ptr info
[4]
Definition: jmemsys.h:161
JOCTET JCOEFPTR block
Definition: jsimd.h:109
short next
Definition: keywords.cpp:454
#define MD_MARK_VALIDPERMISSIVEAUTOLINK
Definition: md4c.c:2467
#define MD_TEMP_BUFFER(sz)
Definition: md4c.c:390
#define R(cp_min, cp_max)
#define MD_MARK_EMPH_MOD3_MASK
Definition: md4c.c:2465
#define ISANYOF3(off, ch1, ch2, ch3)
Definition: md4c.c:304
#define MD_MARK_EMPH_MOD3_1
Definition: md4c.c:2463
#define SZ
Definition: md4c.c:124
#define MD_TEXT(type, str, size)
Definition: md4c.c:445
#define MD_ROLLBACK_CROSSING
Definition: md4c.c:2606
#define ISNEWLINE_(ch)
Definition: md4c.c:291
#define ASTERISK_OPENERS_intraword_mod3_2
Definition: md4c.c:189
#define STR(off)
Definition: md4c.c:281
#define ISXDIGIT_(ch)
Definition: md4c.c:299
#define MD_BLOCK_LOOSE_LIST
Definition: md4c.c:4569
#define MD_LOG(msg)
Definition: md4c.c:75
#define IS_MARK_CHAR(off)
#define OFF
Definition: md4c.c:125
#define ISANYOF2(off, ch1, ch2)
Definition: md4c.c:303
#define MD_LEAVE_BLOCK(type, arg)
Definition: md4c.c:418
#define CHAR
Definition: md4c.c:123
enum MD_LINETYPE_tag MD_LINETYPE
Definition: md4c.c:250
#define MD_FNV1A_BASE
Definition: md4c.c:1486
#define MD_MARK_POTENTIAL_CLOSER
Definition: md4c.c:2455
#define MD_MARK_EMPH_MOD3_0
Definition: md4c.c:2462
#define MD_BLOCK_CONTAINER
Definition: md4c.c:4568
#define MD_FALLTHROUGH()
Definition: md4c.c:111
#define MD_LEAVE_SPAN(type, arg)
Definition: md4c.c:436
#define ISANYOF_(ch, palette)
Definition: md4c.c:286
struct MD_LINE_tag MD_LINE
Definition: md4c.c:261
#define ISALPHA_(ch)
Definition: md4c.c:297
struct MD_VERBATIMLINE_tag MD_VERBATIMLINE
Definition: md4c.c:267
#define Xend
#define MD_MARK_RESOLVED
Definition: md4c.c:2458
#define CODESPAN_MARK_MAXLEN
Definition: md4c.c:2737
#define ISUPPER_(ch)
Definition: md4c.c:295
#define MD_BUILD_ATTR_NO_ESCAPES
Definition: md4c.c:1346
#define MD_UNREACHABLE()
Definition: md4c.c:101
#define MD_BLOCK_CONTAINER_CLOSER
Definition: md4c.c:4567
#define MD_MARK_EMPH_MOD3_2
Definition: md4c.c:2464
#define ISDIGIT(off)
Definition: md4c.c:314
#define TABLECELLBOUNDARIES
Definition: md4c.c:183
#define ASTERISK_OPENERS_extraword_mod3_0
Definition: md4c.c:184
int md_parse(const MD_CHAR *text, MD_SIZE size, const MD_PARSER *parser, void *userdata)
Definition: md4c.c:6340
#define ISALNUM(off)
Definition: md4c.c:316
#define TILDE_OPENERS_2
Definition: md4c.c:192
#define MD_MARK_CLOSER
Definition: md4c.c:2457
#define PUSH_MARK(ch_, beg_, end_, flags_)
Definition: md4c.c:2529
#define ISALPHA(off)
Definition: md4c.c:313
#define IS_UTF8_LEAD3(byte)
Definition: md4c.c:781
#define X(name)
#define MD_CHECK(func)
Definition: md4c.c:382
#define ISUNICODEPUNCT(off)
Definition: md4c.c:851
#define MD_TEXT_INSECURE(type, str, size)
Definition: md4c.c:456
#define TILDE_OPENERS_1
Definition: md4c.c:191
#define IS_UTF8_LEAD1(byte)
Definition: md4c.c:779
#define ISLOWER_(ch)
Definition: md4c.c:296
#define ISWHITESPACE_(ch)
Definition: md4c.c:292
#define ISDIGIT_(ch)
Definition: md4c.c:298
#define MD_FNV1A_PRIME
Definition: md4c.c:1487
#define ISUNICODEPUNCTBEFORE(off)
Definition: md4c.c:852
#define ISWHITESPACE(off)
Definition: md4c.c:308
#define ISUNICODEWHITESPACEBEFORE(off)
Definition: md4c.c:849
#define ASTERISK_OPENERS_intraword_mod3_0
Definition: md4c.c:187
#define OPENERS_CHAIN_FIRST
Definition: md4c.c:195
#define OPENERS_CHAIN_LAST
Definition: md4c.c:196
#define ISANYOF3_(ch, ch1, ch2, ch3)
Definition: md4c.c:288
#define DOLLAR_OPENERS
Definition: md4c.c:194
#define TRUE
Definition: md4c.c:71
#define FALSE
Definition: md4c.c:72
#define ISANYOF2_(ch, ch1, ch2)
Definition: md4c.c:287
#define ISANYOF(off, palette)
Definition: md4c.c:302
#define MD_MARK_AUTOLINK
Definition: md4c.c:2466
#define CH(off)
Definition: md4c.c:280
#define ISBLANK(off)
Definition: md4c.c:306
#define IS_UTF8_TAIL(byte)
Definition: md4c.c:783
#define MD_ENTER_SPAN(type, arg)
Definition: md4c.c:427
#define MD_MARK_POTENTIAL_OPENER
Definition: md4c.c:2454
#define BRACKET_OPENERS
Definition: md4c.c:193
#define ISCNTRL(off)
Definition: md4c.c:309
#define ISASCII(off)
Definition: md4c.c:305
#define MD_ENTER_BLOCK(type, arg)
Definition: md4c.c:409
#define MD_ASSERT(cond)
Definition: md4c.c:100
#define S(cp)
#define ISUNICODEWHITESPACE_(codepoint)
Definition: md4c.c:847
#define PTR_CHAIN
Definition: md4c.c:182
#define IS_UTF8_LEAD2(byte)
Definition: md4c.c:780
#define ISNEWLINE(off)
Definition: md4c.c:307
#define MD_MARK_EMPH_INTRAWORD
Definition: md4c.c:2461
#define MD_BLOCK_CONTAINER_OPENER
Definition: md4c.c:4566
#define MD_MARK_OPENER
Definition: md4c.c:2456
#define ASTERISK_OPENERS_extraword_mod3_2
Definition: md4c.c:186
#define ASTERISK_OPENERS_extraword_mod3_1
Definition: md4c.c:185
#define ISALNUM_(ch)
Definition: md4c.c:300
#define ASTERISK_OPENERS_intraword_mod3_1
Definition: md4c.c:188
#define ISPUNCT(off)
Definition: md4c.c:310
#define MD_UNUSED(x)
Definition: md4c.c:115
#define _T(x)
Definition: md4c.c:61
#define MD_BLOCK_SETEXT_HEADER
Definition: md4c.c:4570
#define MD_ROLLBACK_ALL
Definition: md4c.c:2605
MD_LINETYPE_tag
Definition: md4c.c:237
@ MD_LINE_SETEXTUNDERLINE
Definition: md4c.c:242
@ MD_LINE_TEXT
Definition: md4c.c:246
@ MD_LINE_INDENTEDCODE
Definition: md4c.c:243
@ MD_LINE_FENCEDCODE
Definition: md4c.c:244
@ MD_LINE_SETEXTHEADER
Definition: md4c.c:241
@ MD_LINE_HTML
Definition: md4c.c:245
@ MD_LINE_ATXHEADER
Definition: md4c.c:240
@ MD_LINE_BLANK
Definition: md4c.c:238
@ MD_LINE_HR
Definition: md4c.c:239
@ MD_LINE_TABLE
Definition: md4c.c:247
@ MD_LINE_TABLEUNDERLINE
Definition: md4c.c:248
#define ISUNICODEWHITESPACE(off)
Definition: md4c.c:848
#define ISPUNCT_(ch)
Definition: md4c.c:294
#define ISUPPER(off)
Definition: md4c.c:311
#define IS_UTF8_LEAD4(byte)
Definition: md4c.c:782
#define UNDERSCORE_OPENERS
Definition: md4c.c:190
struct MD_BLOCK_tag MD_BLOCK
Definition: md4c.c:128
#define SIZEOF_ARRAY(a)
Definition: md4c.c:65
#define MD_FLAG_TASKLISTS
Definition: md4c.h:315
#define MD_FLAG_NOHTMLBLOCKS
Definition: md4c.h:310
MD_BLOCKTYPE
Definition: md4c.h:54
@ MD_BLOCK_UL
Definition: md4c.h:63
@ MD_BLOCK_QUOTE
Definition: md4c.h:59
@ MD_BLOCK_DOC
Definition: md4c.h:56
@ MD_BLOCK_TABLE
Definition: md4c.h:97
@ MD_BLOCK_TH
Definition: md4c.h:101
@ MD_BLOCK_OL
Definition: md4c.h:67
@ MD_BLOCK_P
Definition: md4c.h:91
@ MD_BLOCK_HR
Definition: md4c.h:74
@ MD_BLOCK_H
Definition: md4c.h:78
@ MD_BLOCK_TBODY
Definition: md4c.h:99
@ MD_BLOCK_CODE
Definition: md4c.h:83
@ MD_BLOCK_LI
Definition: md4c.h:71
@ MD_BLOCK_HTML
Definition: md4c.h:88
@ MD_BLOCK_TR
Definition: md4c.h:100
@ MD_BLOCK_TD
Definition: md4c.h:102
@ MD_BLOCK_THEAD
Definition: md4c.h:98
char MD_CHAR
Definition: md4c.h:44
unsigned MD_SIZE
Definition: md4c.h:47
#define MD_FLAG_NOINDENTEDCODEBLOCKS
Definition: md4c.h:309
#define MD_FLAG_PERMISSIVEURLAUTOLINKS
Definition: md4c.h:307
#define MD_FLAG_PERMISSIVEWWWAUTOLINKS
Definition: md4c.h:314
#define MD_FLAG_UNDERLINE
Definition: md4c.h:318
MD_ALIGN
Definition: md4c.h:196
@ MD_ALIGN_DEFAULT
Definition: md4c.h:197
@ MD_ALIGN_RIGHT
Definition: md4c.h:200
@ MD_ALIGN_CENTER
Definition: md4c.h:199
@ MD_ALIGN_LEFT
Definition: md4c.h:198
#define MD_FLAG_PERMISSIVEEMAILAUTOLINKS
Definition: md4c.h:308
MD_SPANTYPE
Definition: md4c.h:108
@ MD_SPAN_LATEXMATH_DISPLAY
Definition: md4c.h:139
@ MD_SPAN_DEL
Definition: md4c.h:133
@ MD_SPAN_LATEXMATH
Definition: md4c.h:138
@ MD_SPAN_EM
Definition: md4c.h:110
@ MD_SPAN_U
Definition: md4c.h:148
@ MD_SPAN_STRONG
Definition: md4c.h:113
@ MD_SPAN_CODE
Definition: md4c.h:128
@ MD_SPAN_IMG
Definition: md4c.h:125
@ MD_SPAN_A
Definition: md4c.h:117
#define MD_FLAG_PERMISSIVEATXHEADERS
Definition: md4c.h:306
#define MD_FLAG_WIKILINKS
Definition: md4c.h:317
#define MD_FLAG_PERMISSIVEAUTOLINKS
Definition: md4c.h:320
MD_TEXTTYPE
Definition: md4c.h:152
@ MD_TEXT_HTML
Definition: md4c.h:187
@ MD_TEXT_ENTITY
Definition: md4c.h:176
@ MD_TEXT_NORMAL
Definition: md4c.h:154
@ MD_TEXT_SOFTBR
Definition: md4c.h:164
@ MD_TEXT_BR
Definition: md4c.h:163
@ MD_TEXT_LATEXMATH
Definition: md4c.h:191
@ MD_TEXT_NULLCHAR
Definition: md4c.h:158
@ MD_TEXT_CODE
Definition: md4c.h:182
#define MD_FLAG_NOHTMLSPANS
Definition: md4c.h:311
#define MD_FLAG_COLLAPSEWHITESPACE
Definition: md4c.h:305
#define MD_FLAG_TABLES
Definition: md4c.h:312
#define MD_FLAG_LATEXMATHSPANS
Definition: md4c.h:316
#define MD_FLAG_STRIKETHROUGH
Definition: md4c.h:313
parser
Definition: devices.py:74
void *PRIV() memmove(void *d, const void *s, size_t n)
QT_BEGIN_NAMESPACE bool done
#define TAG(x)
GLenum type
Definition: qopengl.h:270
GLboolean GLboolean GLboolean b
const GLfloat * m
GLenum GLuint GLint level
GLuint64 key
GLboolean GLboolean GLboolean GLboolean a
[7]
GLenum GLuint GLintptr GLsizeiptr size
[1]
GLuint index
[2]
GLuint GLuint end
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat s1
GLuint GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat GLfloat t1
[4]
GLenum GLuint buffer
GLuint GLsizei const GLchar * label
[43]
GLenum GLuint GLenum GLsizei const GLchar * buf
GLenum target
GLbitfield flags
GLint GLsizei GLsizei GLenum GLenum GLsizei void * data
GLuint start
GLuint name
GLfloat n
const GLubyte * c
Definition: qopenglext.h:12701
GLenum GLsizei len
Definition: qopenglext.h:3292
GLenum GLenum GLsizei void * table
Definition: qopenglext.h:2745
#define s6
#define t6
#define s2
char closer(int i)
Definition: object2.h:55
char opener(int i)
Definition: object2.h:43
QT_BEGIN_NAMESPACE bool is_whitespace(char s)
Definition: utils.h:36
file open(QIODevice::ReadOnly)
QString base
QTextStream out(stdout)
[7]
QString title
[35]
QHttpRequestHeader header("GET", QUrl::toPercentEncoding("/index.html"))
[1]
QStringList list
[0]
MD_TEXTTYPE * substr_types
Definition: md4c.c:1337
OFF trivial_offsets[2]
Definition: md4c.c:1342
MD_TEXTTYPE trivial_types[1]
Definition: md4c.c:1341
MD_CHAR fence_char
Definition: md4c.h:268
MD_ATTRIBUTE info
Definition: md4c.h:266
MD_ATTRIBUTE lang
Definition: md4c.h:267
MD_ALIGN align
Definition: md4c.h:280
unsigned data
Definition: md4c.c:4581
unsigned flags
Definition: md4c.c:4574
MD_BLOCKTYPE type
Definition: md4c.c:4573
unsigned n_lines
Definition: md4c.c:4587
unsigned is_loose
Definition: md4c.c:4592
OFF task_mark_off
Definition: md4c.c:4598
unsigned is_task
Definition: md4c.c:4593
unsigned start
Definition: md4c.c:4594
OFF block_byte_off
Definition: md4c.c:4597
unsigned mark_indent
Definition: md4c.c:4595
unsigned contents_indent
Definition: md4c.c:4596
MD_MARK * marks
Definition: md4c.c:170
int last_list_item_starts_with_two_blank_lines
Definition: md4c.c:234
MD_MARKCHAIN mark_chains[13]
Definition: md4c.c:181
OFF html_comment_horizon
Definition: md4c.c:205
int alloc_marks
Definition: md4c.c:172
int alloc_block_bytes
Definition: md4c.c:220
int n_block_bytes
Definition: md4c.c:219
MD_CONTAINER * containers
Definition: md4c.c:223
int alloc_ref_defs
Definition: md4c.c:162
int html_block_type
Definition: md4c.c:232
CHAR * buffer
Definition: md4c.c:156
int n_table_cell_boundaries
Definition: md4c.c:198
OFF html_proc_instr_horizon
Definition: md4c.c:206
MD_PARSER parser
Definition: md4c.c:149
int n_ref_defs
Definition: md4c.c:161
int alloc_containers
Definition: md4c.c:225
void * block_bytes
Definition: md4c.c:217
int n_containers
Definition: md4c.c:224
int unresolved_link_tail
Definition: md4c.c:202
int n_marks
Definition: md4c.c:171
void * userdata
Definition: md4c.c:150
unsigned code_indent_offset
Definition: md4c.c:228
SZ size
Definition: md4c.c:148
SZ code_fence_length
Definition: md4c.c:231
MD_REF_DEF * ref_defs
Definition: md4c.c:160
char mark_char_map[256]
Definition: md4c.c:177
int last_line_has_list_loosening_effect
Definition: md4c.c:233
unsigned alloc_buffer
Definition: md4c.c:157
OFF html_decl_horizon
Definition: md4c.c:207
int unresolved_link_head
Definition: md4c.c:201
MD_BLOCK * current_block
Definition: md4c.c:218
int doc_ends_with_newline
Definition: md4c.c:153
int ref_def_hashtable_size
Definition: md4c.c:164
OFF html_cdata_horizon
Definition: md4c.c:208
void ** ref_def_hashtable
Definition: md4c.c:163
const CHAR * text
Definition: md4c.c:147
unsigned data
Definition: md4c.c:255
unsigned indent
Definition: md4c.c:258
MD_LINETYPE type
Definition: md4c.c:254
OFF end
Definition: md4c.c:264
OFF beg
Definition: md4c.c:263
CHAR * title
Definition: md4c.c:1849
int title_needs_free
Definition: md4c.c:1851
unsigned char flags
Definition: md4c.c:2450
int next
Definition: md4c.c:2448
OFF beg
Definition: md4c.c:2438
CHAR ch
Definition: md4c.c:2449
OFF end
Definition: md4c.c:2439
int prev
Definition: md4c.c:2447
int(* text)(MD_TEXTTYPE, const MD_CHAR *, MD_SIZE, void *)
Definition: md4c.h:367
unsigned flags
Definition: md4c.h:344
MD_REF_DEF * ref_defs[]
Definition: md4c.c:1622
unsigned char title_needs_free
Definition: md4c.c:1514
SZ label_size
Definition: md4c.c:1509
CHAR * label
Definition: md4c.c:1506
OFF dest_end
Definition: md4c.c:1512
OFF dest_beg
Definition: md4c.c:1511
SZ title_size
Definition: md4c.c:1510
CHAR * title
Definition: md4c.c:1507
unsigned char label_needs_free
Definition: md4c.c:1513
unsigned hash
Definition: md4c.c:1508
MD_ATTRIBUTE href
Definition: md4c.h:285
MD_ATTRIBUTE title
Definition: md4c.h:286
MD_ATTRIBUTE target
Definition: md4c.h:297
unsigned codepoints[3]
Definition: md4c.c:475
unsigned n_codepoints
Definition: md4c.c:476
QThreadStorage< int * > dummy[8]
void build(const QString &name)
XmlOutput::xml_output attr(const QString &name, const QString &value)
Definition: xmloutput.h:202