QtBase  v6.3.1
main.cpp
Go to the documentation of this file.
1 /****************************************************************************
2 **
3 ** Copyright (C) 2020 The Qt Company Ltd.
4 ** Contact: https://www.qt.io/licensing/
5 **
6 ** This file is part of the utils of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:GPL-EXCEPT$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see https://www.qt.io/terms-conditions. For further
15 ** information use the contact form at https://www.qt.io/contact-us.
16 **
17 ** GNU General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
21 ** included in the packaging of this file. Please review the following
22 ** information to ensure the GNU General Public License requirements will
23 ** be met: https://www.gnu.org/licenses/gpl-3.0.html.
24 **
25 ** $QT_END_LICENSE$
26 **
27 ****************************************************************************/
28 
29 #include <qbytearray.h>
30 #include <qchar.h>
31 #include <qdebug.h>
32 #include <qfile.h>
33 #include <qhash.h>
34 #include <qlist.h>
35 #include <qstring.h>
36 #include <qbitarray.h>
37 #include <private/qstringiterator_p.h>
38 #if 0
39 #include <private/qunicodetables_p.h>
40 #endif
41 
42 #define DATA_VERSION_S "14.0"
43 #define DATA_VERSION_STR "QChar::Unicode_14_0"
44 
45 
47 
48 static void initAgeMap()
49 {
50  struct AgeMap {
52  const char *age;
53  } ageMap[] = {
54  { QChar::Unicode_1_1, "1.1" },
55  { QChar::Unicode_2_0, "2.0" },
56  { QChar::Unicode_2_1_2, "2.1" },
57  { QChar::Unicode_3_0, "3.0" },
58  { QChar::Unicode_3_1, "3.1" },
59  { QChar::Unicode_3_2, "3.2" },
60  { QChar::Unicode_4_0, "4.0" },
61  { QChar::Unicode_4_1, "4.1" },
62  { QChar::Unicode_5_0, "5.0" },
63  { QChar::Unicode_5_1, "5.1" },
64  { QChar::Unicode_5_2, "5.2" },
65  { QChar::Unicode_6_0, "6.0" },
66  { QChar::Unicode_6_1, "6.1" },
67  { QChar::Unicode_6_2, "6.2" },
68  { QChar::Unicode_6_3, "6.3" },
69  { QChar::Unicode_7_0, "7.0" },
70  { QChar::Unicode_8_0, "8.0" },
71  { QChar::Unicode_9_0, "9.0" },
72  { QChar::Unicode_10_0, "10.0" },
73  { QChar::Unicode_11_0, "11.0" },
74  { QChar::Unicode_12_0, "12.0" },
75  { QChar::Unicode_12_1, "12.1" }, // UCD Revision 24
76  { QChar::Unicode_13_0, "13.0" }, // UCD Revision 26
77  { QChar::Unicode_14_0, "14.0" }, // UCD Revision 28
79  };
80  AgeMap *d = ageMap;
81  while (d->age) {
82  age_map.insert(d->age, d->version);
83  ++d;
84  }
85 }
86 
87 static const char *east_asian_width_string =
88 R"(enum class EastAsianWidth : unsigned int {
89  A,
90  F,
91  H,
92  N,
93  Na,
94  W,
95 };
96 
97 )";
98 
99 enum class EastAsianWidth : unsigned int {
100  A,
101  F,
102  H,
103  N,
104  Na,
105  W,
106 };
107 
108 static QHash<QByteArray, EastAsianWidth> eastAsianWidthMap;
109 
110 static void initEastAsianWidthMap()
111 {
112  constexpr struct W {
114  const char *name;
115  } widths[] = {
116  { EastAsianWidth::A, "A" },
117  { EastAsianWidth::F, "F" },
118  { EastAsianWidth::H, "H" },
119  { EastAsianWidth::N, "N" },
120  { EastAsianWidth::Na, "Na" },
121  { EastAsianWidth::W, "W" },
122  };
123 
124  for (auto &w : widths)
125  eastAsianWidthMap.insert(w.name, w.width);
126 }
127 
128 static QHash<QByteArray, QChar::Category> categoryMap;
129 
130 static void initCategoryMap()
131 {
132  struct Cat {
133  QChar::Category cat;
134  const char *name;
135  } categories[] = {
136  { QChar::Mark_NonSpacing, "Mn" },
138  { QChar::Mark_Enclosing, "Me" },
139 
140  { QChar::Number_DecimalDigit, "Nd" },
141  { QChar::Number_Letter, "Nl" },
142  { QChar::Number_Other, "No" },
143 
144  { QChar::Separator_Space, "Zs" },
145  { QChar::Separator_Line, "Zl" },
146  { QChar::Separator_Paragraph, "Zp" },
147 
148  { QChar::Other_Control, "Cc" },
149  { QChar::Other_Format, "Cf" },
150  { QChar::Other_Surrogate, "Cs" },
151  { QChar::Other_PrivateUse, "Co" },
152  { QChar::Other_NotAssigned, "Cn" },
153 
154  { QChar::Letter_Uppercase, "Lu" },
155  { QChar::Letter_Lowercase, "Ll" },
156  { QChar::Letter_Titlecase, "Lt" },
157  { QChar::Letter_Modifier, "Lm" },
158  { QChar::Letter_Other, "Lo" },
159 
161  { QChar::Punctuation_Dash, "Pd" },
162  { QChar::Punctuation_Open, "Ps" },
163  { QChar::Punctuation_Close, "Pe" },
166  { QChar::Punctuation_Other, "Po" },
167 
168  { QChar::Symbol_Math, "Sm" },
169  { QChar::Symbol_Currency, "Sc" },
170  { QChar::Symbol_Modifier, "Sk" },
171  { QChar::Symbol_Other, "So" },
173  };
174  Cat *c = categories;
175  while (c->name) {
176  categoryMap.insert(c->name, c->cat);
177  ++c;
178  }
179 }
180 
181 
182 static QHash<QByteArray, QChar::Decomposition> decompositionMap;
183 
184 static void initDecompositionMap()
185 {
186  struct Dec {
188  const char *name;
189  } decompositions[] = {
190  { QChar::Canonical, "<canonical>" },
191  { QChar::Font, "<font>" },
192  { QChar::NoBreak, "<noBreak>" },
193  { QChar::Initial, "<initial>" },
194  { QChar::Medial, "<medial>" },
195  { QChar::Final, "<final>" },
196  { QChar::Isolated, "<isolated>" },
197  { QChar::Circle, "<circle>" },
198  { QChar::Super, "<super>" },
199  { QChar::Sub, "<sub>" },
200  { QChar::Vertical, "<vertical>" },
201  { QChar::Wide, "<wide>" },
202  { QChar::Narrow, "<narrow>" },
203  { QChar::Small, "<small>" },
204  { QChar::Square, "<square>" },
205  { QChar::Compat, "<compat>" },
206  { QChar::Fraction, "<fraction>" },
208  };
209  Dec *d = decompositions;
210  while (d->name) {
211  decompositionMap.insert(d->name, d->dec);
212  ++d;
213  }
214 }
215 
216 
217 enum Direction {
241 
243 };
244 
245 static QHash<QByteArray, Direction> directionMap;
246 
247 static void initDirectionMap()
248 {
249  struct Dir {
250  Direction dir;
251  const char *name;
252  } directions[] = {
253  { DirL, "L" },
254  { DirR, "R" },
255  { DirEN, "EN" },
256  { DirES, "ES" },
257  { DirET, "ET" },
258  { DirAN, "AN" },
259  { DirCS, "CS" },
260  { DirB, "B" },
261  { DirS, "S" },
262  { DirWS, "WS" },
263  { DirON, "ON" },
264  { DirLRE, "LRE" },
265  { DirLRO, "LRO" },
266  { DirAL, "AL" },
267  { DirRLE, "RLE" },
268  { DirRLO, "RLO" },
269  { DirPDF, "PDF" },
270  { DirNSM, "NSM" },
271  { DirBN, "BN" },
272  { DirLRI, "LRI" },
273  { DirRLI, "RLI" },
274  { DirFSI, "FSI" },
275  { DirPDI, "PDI" },
276  { Dir_Unassigned, 0 }
277  };
278  Dir *d = directions;
279  while (d->name) {
280  directionMap.insert(d->name, d->dir);
281  ++d;
282  }
283 }
284 
285 
293 
295 };
296 
297 static QHash<QByteArray, JoiningType> joining_map;
298 
299 static void initJoiningMap()
300 {
301  struct JoiningList {
302  JoiningType joining;
303  const char *name;
304  } joinings[] = {
305  { Joining_None, "U" },
306  { Joining_Causing, "C" },
307  { Joining_Dual, "D" },
308  { Joining_Right, "R" },
309  { Joining_Left, "L" },
310  { Joining_Transparent, "T" },
311  { Joining_Unassigned, 0 }
312  };
313  JoiningList *d = joinings;
314  while (d->name) {
315  joining_map.insert(d->name, d->joining);
316  ++d;
317  }
318 }
319 
320 
321 static const char *grapheme_break_class_string =
322  "enum GraphemeBreakClass {\n"
323  " GraphemeBreak_Any,\n"
324  " GraphemeBreak_CR,\n"
325  " GraphemeBreak_LF,\n"
326  " GraphemeBreak_Control,\n"
327  " GraphemeBreak_Extend,\n"
328  " GraphemeBreak_ZWJ,\n"
329  " GraphemeBreak_RegionalIndicator,\n"
330  " GraphemeBreak_Prepend,\n"
331  " GraphemeBreak_SpacingMark,\n"
332  " GraphemeBreak_L,\n"
333  " GraphemeBreak_V,\n"
334  " GraphemeBreak_T,\n"
335  " GraphemeBreak_LV,\n"
336  " GraphemeBreak_LVT,\n"
337  " GraphemeBreak_Extended_Pictographic,\n"
338  "\n"
339  " NumGraphemeBreakClasses\n"
340  "};\n\n";
341 
358 
360 };
361 
362 static QHash<QByteArray, GraphemeBreakClass> grapheme_break_map;
363 
364 static void initGraphemeBreak()
365 {
366  struct GraphemeBreakList {
367  GraphemeBreakClass brk;
368  const char *name;
369  } breaks[] = {
370  { GraphemeBreak_Any, "Any" },
371  { GraphemeBreak_CR, "CR" },
372  { GraphemeBreak_LF, "LF" },
373  { GraphemeBreak_Control, "Control" },
374  { GraphemeBreak_Extend, "Extend" },
375  { GraphemeBreak_ZWJ, "ZWJ" },
376  { GraphemeBreak_RegionalIndicator, "Regional_Indicator" },
377  { GraphemeBreak_Prepend, "Prepend" },
378  { GraphemeBreak_SpacingMark, "SpacingMark" },
379  { GraphemeBreak_L, "L" },
380  { GraphemeBreak_V, "V" },
381  { GraphemeBreak_T, "T" },
382  { GraphemeBreak_LV, "LV" },
383  { GraphemeBreak_LVT, "LVT" },
384  { GraphemeBreak_Extended_Pictographic, "Extended_Pictographic" },
385  { GraphemeBreak_Unassigned, nullptr }
386  };
387  GraphemeBreakList *d = breaks;
388  while (d->name) {
389  grapheme_break_map.insert(d->name, d->brk);
390  ++d;
391  }
392 }
393 
394 
395 static const char *word_break_class_string =
396  "enum WordBreakClass {\n"
397  " WordBreak_Any,\n"
398  " WordBreak_CR,\n"
399  " WordBreak_LF,\n"
400  " WordBreak_Newline,\n"
401  " WordBreak_Extend,\n"
402  " WordBreak_ZWJ,\n"
403  " WordBreak_Format,\n"
404  " WordBreak_RegionalIndicator,\n"
405  " WordBreak_Katakana,\n"
406  " WordBreak_HebrewLetter,\n"
407  " WordBreak_ALetter,\n"
408  " WordBreak_SingleQuote,\n"
409  " WordBreak_DoubleQuote,\n"
410  " WordBreak_MidNumLet,\n"
411  " WordBreak_MidLetter,\n"
412  " WordBreak_MidNum,\n"
413  " WordBreak_Numeric,\n"
414  " WordBreak_ExtendNumLet,\n"
415  " WordBreak_WSegSpace,\n"
416  "\n"
417  " NumWordBreakClasses\n"
418  "};\n\n";
419 
440 
442 };
443 
444 static QHash<QByteArray, WordBreakClass> word_break_map;
445 
446 static void initWordBreak()
447 {
448  struct WordBreakList {
449  WordBreakClass brk;
450  const char *name;
451  } breaks[] = {
452  { WordBreak_Any, "Any" },
453  { WordBreak_CR, "CR" },
454  { WordBreak_LF, "LF" },
455  { WordBreak_Newline, "Newline" },
456  { WordBreak_Extend, "Extend" },
457  { WordBreak_ZWJ, "ZWJ" },
458  { WordBreak_Format, "Format" },
459  { WordBreak_RegionalIndicator, "Regional_Indicator" },
460  { WordBreak_Katakana, "Katakana" },
461  { WordBreak_HebrewLetter, "Hebrew_Letter" },
462  { WordBreak_ALetter, "ALetter" },
463  { WordBreak_SingleQuote, "Single_Quote" },
464  { WordBreak_DoubleQuote, "Double_Quote" },
465  { WordBreak_MidNumLet, "MidNumLet" },
466  { WordBreak_MidLetter, "MidLetter" },
467  { WordBreak_MidNum, "MidNum" },
468  { WordBreak_Numeric, "Numeric" },
469  { WordBreak_ExtendNumLet, "ExtendNumLet" },
470  { WordBreak_WSegSpace, "WSegSpace" },
471  { WordBreak_Unassigned, 0 }
472  };
473  WordBreakList *d = breaks;
474  while (d->name) {
475  word_break_map.insert(d->name, d->brk);
476  ++d;
477  }
478 }
479 
480 
481 static const char *sentence_break_class_string =
482  "enum SentenceBreakClass {\n"
483  " SentenceBreak_Any,\n"
484  " SentenceBreak_CR,\n"
485  " SentenceBreak_LF,\n"
486  " SentenceBreak_Sep,\n"
487  " SentenceBreak_Extend,\n"
488  " SentenceBreak_Sp,\n"
489  " SentenceBreak_Lower,\n"
490  " SentenceBreak_Upper,\n"
491  " SentenceBreak_OLetter,\n"
492  " SentenceBreak_Numeric,\n"
493  " SentenceBreak_ATerm,\n"
494  " SentenceBreak_SContinue,\n"
495  " SentenceBreak_STerm,\n"
496  " SentenceBreak_Close,\n"
497  "\n"
498  " NumSentenceBreakClasses\n"
499  "};\n\n";
500 
516 
518 };
519 
520 static QHash<QByteArray, SentenceBreakClass> sentence_break_map;
521 
522 static void initSentenceBreak()
523 {
524  struct SentenceBreakList {
525  SentenceBreakClass brk;
526  const char *name;
527  } breaks[] = {
528  { SentenceBreak_Any, "Any" },
529  { SentenceBreak_CR, "CR" },
530  { SentenceBreak_LF, "LF" },
531  { SentenceBreak_Sep, "Sep" },
532  { SentenceBreak_Extend, "Extend" },
533  { SentenceBreak_Extend, "Format" },
534  { SentenceBreak_Sp, "Sp" },
535  { SentenceBreak_Lower, "Lower" },
536  { SentenceBreak_Upper, "Upper" },
537  { SentenceBreak_OLetter, "OLetter" },
538  { SentenceBreak_Numeric, "Numeric" },
539  { SentenceBreak_ATerm, "ATerm" },
540  { SentenceBreak_SContinue, "SContinue" },
541  { SentenceBreak_STerm, "STerm" },
542  { SentenceBreak_Close, "Close" },
544  };
545  SentenceBreakList *d = breaks;
546  while (d->name) {
547  sentence_break_map.insert(d->name, d->brk);
548  ++d;
549  }
550 }
551 
552 
553 static const char *line_break_class_string =
554  "// see http://www.unicode.org/reports/tr14/tr14-30.html\n"
555  "// we don't use the XX and AI classes and map them to AL instead.\n"
556  "enum LineBreakClass {\n"
557  " LineBreak_OP, LineBreak_CL, LineBreak_CP, LineBreak_QU, LineBreak_GL,\n"
558  " LineBreak_NS, LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR,\n"
559  " LineBreak_PO, LineBreak_NU, LineBreak_AL, LineBreak_HL, LineBreak_ID,\n"
560  " LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,\n"
561  " LineBreak_ZW, LineBreak_CM, LineBreak_WJ, LineBreak_H2, LineBreak_H3,\n"
562  " LineBreak_JL, LineBreak_JV, LineBreak_JT, LineBreak_RI, LineBreak_CB,\n"
563  " LineBreak_EB, LineBreak_EM, LineBreak_ZWJ,\n"
564  " LineBreak_SA, LineBreak_SG, LineBreak_SP,\n"
565  " LineBreak_CR, LineBreak_LF, LineBreak_BK,\n"
566  "\n"
567  " NumLineBreakClasses\n"
568  "};\n\n";
569 
580 
582 };
583 
584 static QHash<QByteArray, LineBreakClass> line_break_map;
585 
586 static void initLineBreak()
587 {
588  // ### Classes XX and AI are left out and mapped to AL for now.
589  // ### Class NL is mapped to BK.
590  // ### Treating characters of class CJ as class NS will give CSS strict line breaking;
591  // treating them as class ID will give CSS normal breaking.
592  struct LineBreakList {
593  LineBreakClass brk;
594  const char *name;
595  } breaks[] = {
596  { LineBreak_BK, "BK" },
597  { LineBreak_CR, "CR" },
598  { LineBreak_LF, "LF" },
599  { LineBreak_CM, "CM" },
600  { LineBreak_BK, "NL" },
601  { LineBreak_SG, "SG" },
602  { LineBreak_WJ, "WJ" },
603  { LineBreak_ZW, "ZW" },
604  { LineBreak_GL, "GL" },
605  { LineBreak_SP, "SP" },
606  { LineBreak_B2, "B2" },
607  { LineBreak_BA, "BA" },
608  { LineBreak_BB, "BB" },
609  { LineBreak_HY, "HY" },
610  { LineBreak_CB, "CB" },
611  { LineBreak_NS, "CJ" },
612  { LineBreak_CL, "CL" },
613  { LineBreak_CP, "CP" },
614  { LineBreak_EX, "EX" },
615  { LineBreak_IN, "IN" },
616  { LineBreak_NS, "NS" },
617  { LineBreak_OP, "OP" },
618  { LineBreak_QU, "QU" },
619  { LineBreak_IS, "IS" },
620  { LineBreak_NU, "NU" },
621  { LineBreak_PO, "PO" },
622  { LineBreak_PR, "PR" },
623  { LineBreak_SY, "SY" },
624  { LineBreak_AL, "AI" },
625  { LineBreak_AL, "AL" },
626  { LineBreak_HL, "HL" },
627  { LineBreak_H2, "H2" },
628  { LineBreak_H3, "H3" },
629  { LineBreak_ID, "ID" },
630  { LineBreak_JL, "JL" },
631  { LineBreak_JV, "JV" },
632  { LineBreak_JT, "JT" },
633  { LineBreak_RI, "RI" },
634  { LineBreak_SA, "SA" },
635  { LineBreak_AL, "XX" },
636  { LineBreak_EB, "EB" },
637  { LineBreak_EM, "EM" },
638  { LineBreak_ZWJ, "ZWJ" },
639  { LineBreak_Unassigned, 0 }
640  };
641  LineBreakList *d = breaks;
642  while (d->name) {
643  line_break_map.insert(d->name, d->brk);
644  ++d;
645  }
646 }
647 
648 
649 static QHash<QByteArray, QChar::Script> scriptMap;
650 
651 static void initScriptMap()
652 {
653  struct Scrpt {
655  const char *name;
656  } scripts[] = {
657  // general
658  { QChar::Script_Unknown, "Unknown" },
659  { QChar::Script_Inherited, "Inherited" },
660  { QChar::Script_Common, "Common" },
661  // pre-4.0
662  { QChar::Script_Latin, "Latin" },
663  { QChar::Script_Greek, "Greek" },
664  { QChar::Script_Cyrillic, "Cyrillic" },
665  { QChar::Script_Armenian, "Armenian" },
666  { QChar::Script_Hebrew, "Hebrew" },
667  { QChar::Script_Arabic, "Arabic" },
668  { QChar::Script_Syriac, "Syriac" },
669  { QChar::Script_Thaana, "Thaana" },
670  { QChar::Script_Devanagari, "Devanagari" },
671  { QChar::Script_Bengali, "Bengali" },
672  { QChar::Script_Gurmukhi, "Gurmukhi" },
673  { QChar::Script_Gujarati, "Gujarati" },
674  { QChar::Script_Oriya, "Oriya" },
675  { QChar::Script_Tamil, "Tamil" },
676  { QChar::Script_Telugu, "Telugu" },
677  { QChar::Script_Kannada, "Kannada" },
678  { QChar::Script_Malayalam, "Malayalam" },
679  { QChar::Script_Sinhala, "Sinhala" },
680  { QChar::Script_Thai, "Thai" },
681  { QChar::Script_Lao, "Lao" },
682  { QChar::Script_Tibetan, "Tibetan" },
683  { QChar::Script_Myanmar, "Myanmar" },
684  { QChar::Script_Georgian, "Georgian" },
685  { QChar::Script_Hangul, "Hangul" },
686  { QChar::Script_Ethiopic, "Ethiopic" },
687  { QChar::Script_Cherokee, "Cherokee" },
688  { QChar::Script_CanadianAboriginal, "CanadianAboriginal" },
689  { QChar::Script_Ogham, "Ogham" },
690  { QChar::Script_Runic, "Runic" },
691  { QChar::Script_Khmer, "Khmer" },
692  { QChar::Script_Mongolian, "Mongolian" },
693  { QChar::Script_Hiragana, "Hiragana" },
694  { QChar::Script_Katakana, "Katakana" },
695  { QChar::Script_Bopomofo, "Bopomofo" },
696  { QChar::Script_Han, "Han" },
697  { QChar::Script_Yi, "Yi" },
698  { QChar::Script_OldItalic, "OldItalic" },
699  { QChar::Script_Gothic, "Gothic" },
700  { QChar::Script_Deseret, "Deseret" },
701  { QChar::Script_Tagalog, "Tagalog" },
702  { QChar::Script_Hanunoo, "Hanunoo" },
703  { QChar::Script_Buhid, "Buhid" },
704  { QChar::Script_Tagbanwa, "Tagbanwa" },
705  { QChar::Script_Coptic, "Coptic" },
706  // 4.0
707  { QChar::Script_Limbu, "Limbu" },
708  { QChar::Script_TaiLe, "TaiLe" },
709  { QChar::Script_LinearB, "LinearB" },
710  { QChar::Script_Ugaritic, "Ugaritic" },
711  { QChar::Script_Shavian, "Shavian" },
712  { QChar::Script_Osmanya, "Osmanya" },
713  { QChar::Script_Cypriot, "Cypriot" },
714  { QChar::Script_Braille, "Braille" },
715  // 4.1
716  { QChar::Script_Buginese, "Buginese" },
717  { QChar::Script_NewTaiLue, "NewTaiLue" },
718  { QChar::Script_Glagolitic, "Glagolitic" },
719  { QChar::Script_Tifinagh, "Tifinagh" },
720  { QChar::Script_SylotiNagri, "SylotiNagri" },
721  { QChar::Script_OldPersian, "OldPersian" },
722  { QChar::Script_Kharoshthi, "Kharoshthi" },
723  // 5.0
724  { QChar::Script_Balinese, "Balinese" },
725  { QChar::Script_Cuneiform, "Cuneiform" },
726  { QChar::Script_Phoenician, "Phoenician" },
727  { QChar::Script_PhagsPa, "PhagsPa" },
728  { QChar::Script_Nko, "Nko" },
729  // 5.1
730  { QChar::Script_Sundanese, "Sundanese" },
731  { QChar::Script_Lepcha, "Lepcha" },
732  { QChar::Script_OlChiki, "OlChiki" },
733  { QChar::Script_Vai, "Vai" },
734  { QChar::Script_Saurashtra, "Saurashtra" },
735  { QChar::Script_KayahLi, "KayahLi" },
736  { QChar::Script_Rejang, "Rejang" },
737  { QChar::Script_Lycian, "Lycian" },
738  { QChar::Script_Carian, "Carian" },
739  { QChar::Script_Lydian, "Lydian" },
740  { QChar::Script_Cham, "Cham" },
741  // 5.2
742  { QChar::Script_TaiTham, "TaiTham" },
743  { QChar::Script_TaiViet, "TaiViet" },
744  { QChar::Script_Avestan, "Avestan" },
745  { QChar::Script_EgyptianHieroglyphs, "EgyptianHieroglyphs" },
746  { QChar::Script_Samaritan, "Samaritan" },
747  { QChar::Script_Lisu, "Lisu" },
748  { QChar::Script_Bamum, "Bamum" },
749  { QChar::Script_Javanese, "Javanese" },
750  { QChar::Script_MeeteiMayek, "MeeteiMayek" },
751  { QChar::Script_ImperialAramaic, "ImperialAramaic" },
752  { QChar::Script_OldSouthArabian, "OldSouthArabian" },
753  { QChar::Script_InscriptionalParthian, "InscriptionalParthian" },
754  { QChar::Script_InscriptionalPahlavi, "InscriptionalPahlavi" },
755  { QChar::Script_OldTurkic, "OldTurkic" },
756  { QChar::Script_Kaithi, "Kaithi" },
757  // 6.0
758  { QChar::Script_Batak, "Batak" },
759  { QChar::Script_Brahmi, "Brahmi" },
760  { QChar::Script_Mandaic, "Mandaic" },
761  // 6.1
762  { QChar::Script_Chakma, "Chakma" },
763  { QChar::Script_MeroiticCursive, "MeroiticCursive" },
764  { QChar::Script_MeroiticHieroglyphs, "MeroiticHieroglyphs" },
765  { QChar::Script_Miao, "Miao" },
766  { QChar::Script_Sharada, "Sharada" },
767  { QChar::Script_SoraSompeng, "SoraSompeng" },
768  { QChar::Script_Takri, "Takri" },
769  // 7.0
770  { QChar::Script_CaucasianAlbanian, "CaucasianAlbanian" },
771  { QChar::Script_BassaVah, "BassaVah" },
772  { QChar::Script_Duployan, "Duployan" },
773  { QChar::Script_Elbasan, "Elbasan" },
774  { QChar::Script_Grantha, "Grantha" },
775  { QChar::Script_PahawhHmong, "PahawhHmong" },
776  { QChar::Script_Khojki, "Khojki" },
777  { QChar::Script_LinearA, "LinearA" },
778  { QChar::Script_Mahajani, "Mahajani" },
779  { QChar::Script_Manichaean, "Manichaean" },
780  { QChar::Script_MendeKikakui, "MendeKikakui" },
781  { QChar::Script_Modi, "Modi" },
782  { QChar::Script_Mro, "Mro" },
783  { QChar::Script_OldNorthArabian, "OldNorthArabian" },
784  { QChar::Script_Nabataean, "Nabataean" },
785  { QChar::Script_Palmyrene, "Palmyrene" },
786  { QChar::Script_PauCinHau, "PauCinHau" },
787  { QChar::Script_OldPermic, "OldPermic" },
788  { QChar::Script_PsalterPahlavi, "PsalterPahlavi" },
789  { QChar::Script_Siddham, "Siddham" },
790  { QChar::Script_Khudawadi, "Khudawadi" },
791  { QChar::Script_Tirhuta, "Tirhuta" },
792  { QChar::Script_WarangCiti, "WarangCiti" },
793  // 8.0
794  { QChar::Script_Ahom, "Ahom" },
795  { QChar::Script_AnatolianHieroglyphs, "AnatolianHieroglyphs" },
796  { QChar::Script_Hatran, "Hatran" },
797  { QChar::Script_Multani, "Multani" },
798  { QChar::Script_OldHungarian, "OldHungarian" },
799  { QChar::Script_SignWriting, "SignWriting" },
800  // 9.0
801  { QChar::Script_Adlam, "Adlam" },
802  { QChar::Script_Bhaiksuki, "Bhaiksuki" },
803  { QChar::Script_Marchen, "Marchen" },
804  { QChar::Script_Newa, "Newa" },
805  { QChar::Script_Osage, "Osage" },
806  { QChar::Script_Tangut, "Tangut" },
807  // 10.0
808  { QChar::Script_MasaramGondi, "MasaramGondi" },
809  { QChar::Script_Nushu, "Nushu" },
810  { QChar::Script_Soyombo, "Soyombo" },
811  { QChar::Script_ZanabazarSquare, "ZanabazarSquare" },
812  // 12.1
813  { QChar::Script_Dogra, "Dogra" },
814  { QChar::Script_GunjalaGondi, "GunjalaGondi" },
815  { QChar::Script_HanifiRohingya, "HanifiRohingya" },
816  { QChar::Script_Makasar, "Makasar" },
817  { QChar::Script_Medefaidrin, "Medefaidrin" },
818  { QChar::Script_OldSogdian, "OldSogdian" },
819  { QChar::Script_Sogdian, "Sogdian" },
820  { QChar::Script_Elymaic, "Elymaic" },
821  { QChar::Script_Nandinagari, "Nandinagari" },
822  { QChar::Script_NyiakengPuachueHmong, "NyiakengPuachueHmong" },
823  { QChar::Script_Wancho, "Wancho" },
824  // 13.0
825  { QChar::Script_Chorasmian, "Chorasmian" },
826  { QChar::Script_DivesAkuru, "DivesAkuru" },
827  { QChar::Script_KhitanSmallScript, "KhitanSmallScript" },
828  { QChar::Script_Yezidi, "Yezidi" },
829 
830  // 14.0
831  { QChar::Script_CyproMinoan, "CyproMinoan"},
832  { QChar::Script_OldUyghur, "OldUyghur"},
833  { QChar::Script_Tangsa, "Tangsa"},
834  { QChar::Script_Toto, "Toto"},
835  { QChar::Script_Vithkuqi, "Vithkuqi"},
836 
837  // unhandled
838  { QChar::Script_Unknown, 0 }
839  };
840  Scrpt *p = scripts;
841  while (p->name) {
842  scriptMap.insert(p->name, p->script);
843  ++p;
844  }
845 }
846 
847 // IDNA status as present int the data file
848 enum class IdnaRawStatus : unsigned int {
849  Disallowed,
850  Valid,
851  Ignored,
852  Mapped,
853  Deviation,
856 };
857 
858 static QHash<QByteArray, IdnaRawStatus> idnaStatusMap;
859 
860 static void initIdnaStatusMap()
861 {
862  struct {
863  IdnaRawStatus status;
864  const char *name;
865  } data[] = {
866  {IdnaRawStatus::Disallowed, "disallowed"},
867  {IdnaRawStatus::Valid, "valid"},
868  {IdnaRawStatus::Ignored, "ignored"},
869  {IdnaRawStatus::Mapped, "mapped"},
870  {IdnaRawStatus::Deviation, "deviation"},
871  {IdnaRawStatus::DisallowedStd3Valid, "disallowed_STD3_valid"},
872  {IdnaRawStatus::DisallowedStd3Mapped, "disallowed_STD3_mapped"},
873  };
874 
875  for (const auto &entry : data)
876  idnaStatusMap[entry.name] = entry.status;
877 }
878 
879 static const char *idna_status_string =
880  "enum class IdnaStatus : unsigned int {\n"
881  " Disallowed,\n"
882  " Valid,\n"
883  " Ignored,\n"
884  " Mapped,\n"
885  " Deviation\n"
886  "};\n\n";
887 
888 // Resolved IDNA status as it goes into the database.
889 // Qt extends host name validity rules to allow underscores
890 // NOTE: The members here should come in the same order and have the same values
891 // as in IdnaRawStatus
892 enum class IdnaStatus : unsigned int {
893  Disallowed,
894  Valid,
895  Ignored,
896  Mapped,
897  Deviation,
898 };
899 
900 // Keep this one in sync with the code in createPropertyInfo
901 static const char *property_string =
902  "enum Case {\n"
903  " LowerCase,\n"
904  " UpperCase,\n"
905  " TitleCase,\n"
906  " CaseFold,\n"
907  "\n"
908  " NumCases\n"
909  "};\n"
910  "\n"
911  "struct Properties {\n"
912  " ushort category : 8; /* 5 used */\n"
913  " ushort direction : 8; /* 5 used */\n"
914  " ushort combiningClass : 8;\n"
915  " ushort joining : 3;\n"
916  " signed short digitValue : 5;\n"
917  " signed short mirrorDiff : 16;\n"
918  " ushort unicodeVersion : 5; /* 5 used */\n"
919  " ushort eastAsianWidth : 3; /* 3 used */\n"
920  " ushort nfQuickCheck : 8;\n" // could be narrowed
921  "#ifdef Q_OS_WASM\n"
922  " unsigned char : 0; //wasm 64 packing trick\n"
923  "#endif\n"
924  " struct {\n"
925  " ushort special : 1;\n"
926  " signed short diff : 15;\n"
927  " } cases[NumCases];\n"
928  "#ifdef Q_OS_WASM\n"
929  " unsigned char : 0; //wasm 64 packing trick\n"
930  "#endif\n"
931  " ushort graphemeBreakClass : 5; /* 5 used */\n"
932  " ushort wordBreakClass : 5; /* 5 used */\n"
933  " ushort lineBreakClass : 6; /* 6 used */\n"
934  " ushort sentenceBreakClass : 4; /* 4 used */\n"
935  " ushort idnaStatus : 4; /* 3 used */\n"
936  " ushort script : 8;\n"
937  "};\n\n"
938  "Q_CORE_EXPORT const Properties * QT_FASTCALL properties(char32_t ucs4) noexcept;\n"
939  "Q_CORE_EXPORT const Properties * QT_FASTCALL properties(char16_t ucs2) noexcept;\n"
940  "\n";
941 
942 static const char *methods =
943  "Q_CORE_EXPORT GraphemeBreakClass QT_FASTCALL graphemeBreakClass(char32_t ucs4) noexcept;\n"
944  "inline GraphemeBreakClass graphemeBreakClass(QChar ch) noexcept\n"
945  "{ return graphemeBreakClass(ch.unicode()); }\n"
946  "\n"
947  "Q_CORE_EXPORT WordBreakClass QT_FASTCALL wordBreakClass(char32_t ucs4) noexcept;\n"
948  "inline WordBreakClass wordBreakClass(QChar ch) noexcept\n"
949  "{ return wordBreakClass(ch.unicode()); }\n"
950  "\n"
951  "Q_CORE_EXPORT SentenceBreakClass QT_FASTCALL sentenceBreakClass(char32_t ucs4) noexcept;\n"
952  "inline SentenceBreakClass sentenceBreakClass(QChar ch) noexcept\n"
953  "{ return sentenceBreakClass(ch.unicode()); }\n"
954  "\n"
955  "Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(char32_t ucs4) noexcept;\n"
956  "inline LineBreakClass lineBreakClass(QChar ch) noexcept\n"
957  "{ return lineBreakClass(ch.unicode()); }\n"
958  "\n"
959  "Q_CORE_EXPORT IdnaStatus QT_FASTCALL idnaStatus(char32_t ucs4) noexcept;\n"
960  "inline IdnaStatus idnaStatus(QChar ch) noexcept\n"
961  "{ return idnaStatus(ch.unicode()); }\n"
962  "\n"
963  "Q_CORE_EXPORT QStringView QT_FASTCALL idnaMapping(char32_t usc4) noexcept;\n"
964  "inline QStringView idnaMapping(QChar ch) noexcept\n"
965  "{ return idnaMapping(ch.unicode()); }\n"
966  "\n"
967  "Q_CORE_EXPORT EastAsianWidth QT_FASTCALL eastAsianWidth(char32_t ucs4) noexcept;\n"
968  "inline EastAsianWidth eastAsianWidth(QChar ch) noexcept\n"
969  "{ return eastAsianWidth(ch.unicode()); }\n"
970  "\n";
971 
972 static const int SizeOfPropertiesStruct = 20;
973 
974 static const QByteArray sizeOfPropertiesStructCheck =
975  "static_assert(sizeof(Properties) == " + QByteArray::number(SizeOfPropertiesStruct) + ");\n\n";
976 
979  : combiningClass(0)
980  , category(QChar::Other_NotAssigned) // Cn
981  , direction(QChar::DirL)
983  , age(QChar::Unicode_Unassigned)
984  , mirrorDiff(0) {}
985 
986  bool operator==(const PropertyFlags &o) const {
987  return (combiningClass == o.combiningClass
988  && category == o.category
989  && direction == o.direction
990  && joining == o.joining
991  && age == o.age
992  && eastAsianWidth == o.eastAsianWidth
993  && digitValue == o.digitValue
994  && mirrorDiff == o.mirrorDiff
995  && lowerCaseDiff == o.lowerCaseDiff
996  && upperCaseDiff == o.upperCaseDiff
997  && titleCaseDiff == o.titleCaseDiff
998  && caseFoldDiff == o.caseFoldDiff
999  && lowerCaseSpecial == o.lowerCaseSpecial
1000  && upperCaseSpecial == o.upperCaseSpecial
1001  && titleCaseSpecial == o.titleCaseSpecial
1002  && caseFoldSpecial == o.caseFoldSpecial
1003  && graphemeBreakClass == o.graphemeBreakClass
1004  && wordBreakClass == o.wordBreakClass
1005  && sentenceBreakClass == o.sentenceBreakClass
1006  && lineBreakClass == o.lineBreakClass
1007  && script == o.script
1008  && nfQuickCheck == o.nfQuickCheck
1009  && idnaStatus == o.idnaStatus
1010  );
1011  }
1012  // from UnicodeData.txt
1016  // from ArabicShaping.txt
1018  // from DerivedAge.txt
1020  // From EastAsianWidth.txt
1022  int digitValue = -1;
1023 
1024  int mirrorDiff : 16;
1025 
1026  int lowerCaseDiff = 0;
1027  int upperCaseDiff = 0;
1028  int titleCaseDiff = 0;
1029  int caseFoldDiff = 0;
1039  // from DerivedNormalizationProps.txt
1042 };
1043 
1044 
1045 static QList<int> specialCaseMap;
1046 
1047 static int appendToSpecialCaseMap(const QList<int> &map)
1048 {
1049  QList<int> utf16map;
1050  for (int i = 0; i < map.size(); ++i) {
1051  uint codepoint = map.at(i);
1052  // if the condition below doesn't hold anymore we need to modify our special case mapping code
1053  Q_ASSERT(!QChar::requiresSurrogates(codepoint));
1054  if (QChar::requiresSurrogates(codepoint)) {
1055  utf16map << QChar::highSurrogate(codepoint);
1056  utf16map << QChar::lowSurrogate(codepoint);
1057  } else {
1058  utf16map << codepoint;
1059  }
1060  }
1061  int length = utf16map.size();
1062  utf16map.prepend(length);
1063 
1064  if (specialCaseMap.isEmpty())
1065  specialCaseMap << 0; // placeholder
1066 
1067  int i = 1;
1068  while (i < specialCaseMap.size()) {
1069  int n = specialCaseMap.at(i);
1070  if (n == length) {
1071  int j;
1072  for (j = 1; j <= n; ++j) {
1073  if (specialCaseMap.at(i+j) != utf16map.at(j))
1074  break;
1075  }
1076  if (j > n)
1077  return i;
1078  }
1079  i += n + 1;
1080  }
1081 
1082  int pos = specialCaseMap.size();
1083  specialCaseMap << utf16map;
1084  return pos;
1085 }
1086 
1087 // DerivedCoreProperties.txt
1088 static inline bool isDefaultIgnorable(uint ucs4)
1089 {
1090  // Default_Ignorable_Code_Point:
1091  // Generated from
1092  // Other_Default_Ignorable_Code_Point + Cf + Variation_Selector
1093  // - White_Space - FFF9..FFFB (Annotation Characters)
1094  // - 0600..0604, 06DD, 070F, 110BD (exceptional Cf characters that should be visible)
1095  if (ucs4 <= 0xff)
1096  return ucs4 == 0xad;
1097 
1098  return ucs4 == 0x034f
1099  || ucs4 == 0x061c
1100  || (ucs4 >= 0x115f && ucs4 <= 0x1160)
1101  || (ucs4 >= 0x17b4 && ucs4 <= 0x17b5)
1102  || (ucs4 >= 0x180b && ucs4 <= 0x180d)
1103  || ucs4 == 0x180e
1104  || (ucs4 >= 0x200b && ucs4 <= 0x200f)
1105  || (ucs4 >= 0x202a && ucs4 <= 0x202e)
1106  || (ucs4 >= 0x2060 && ucs4 <= 0x206f)
1107  || ucs4 == 0x3164
1108  || (ucs4 >= 0xfe00 && ucs4 <= 0xfe0f)
1109  || ucs4 == 0xfeff
1110  || ucs4 == 0xffa0
1111  || (ucs4 >= 0xfff0 && ucs4 <= 0xfff8)
1112  || (ucs4 >= 0x1bca0 && ucs4 <= 0x1bca3)
1113  || (ucs4 >= 0x1d173 && ucs4 <= 0x1d17a)
1114  || (ucs4 >= 0xe0000 && ucs4 <= 0xe0fff);
1115 }
1116 
1117 struct UnicodeData {
1118  UnicodeData(int codepoint = 0) {
1119  p.direction = QChar::DirL;
1120  // DerivedBidiClass.txt
1121  // The unassigned code points that default to AL are in the ranges:
1122  // [U+0600..U+07BF, U+08A0..U+08FF, U+FB50..U+FDCF, U+FDF0..U+FDFF, U+FE70..U+FEFF, U+1EE00..U+1EEFF]
1123  if ((codepoint >= 0x0600 && codepoint <= 0x07BF)
1124  || (codepoint >= 0x08A0 && codepoint <= 0x08FF)
1125  || (codepoint >= 0xFB50 && codepoint <= 0xFDCF)
1126  || (codepoint >= 0xFDF0 && codepoint <= 0xFDFF)
1127  || (codepoint >= 0xFE70 && codepoint <= 0xFEFF)
1128  || (codepoint >= 0x1EE00 && codepoint <= 0x1EEFF)) {
1129  p.direction = QChar::DirAL;
1130  }
1131  // The unassigned code points that default to R are in the ranges:
1132  // [U+0590..U+05FF, U+07C0..U+089F, U+FB1D..U+FB4F, U+10800..U+10FFF, U+1E800..U+1EDFF, U+1EF00..U+1EFFF]
1133  else if ((codepoint >= 0x0590 && codepoint <= 0x05FF)
1134  || (codepoint >= 0x07C0 && codepoint <= 0x089F)
1135  || (codepoint >= 0xFB1D && codepoint <= 0xFB4F)
1136  || (codepoint >= 0x10800 && codepoint <= 0x10FFF)
1137  || (codepoint >= 0x1E800 && codepoint <= 0x1EDFF)
1138  || (codepoint >= 0x1EF00 && codepoint <= 0x1EFFF)) {
1139  p.direction = QChar::DirR;
1140  }
1141  // The unassigned code points that default to ET are in the range:
1142  // [U+20A0..U+20CF]
1143  else if (codepoint >= 0x20A0 && codepoint <= 0x20CF) {
1144  p.direction = QChar::DirET;
1145  }
1146  // The unassigned code points that default to BN have one of the following properties:
1147  // Default_Ignorable_Code_Point
1148  // Noncharacter_Code_Point
1149  else if (QChar::isNonCharacter(codepoint) || isDefaultIgnorable(codepoint)) {
1150  p.direction = QChar::DirBN;
1151  }
1152 
1153  p.lineBreakClass = LineBreak_AL; // XX -> AL
1154  // LineBreak.txt
1155  // The unassigned code points that default to "ID" include ranges in the following blocks:
1156  // [U+3400..U+4DBF, U+4E00..U+9FFF, U+F900..U+FAFF, U+20000..U+2A6DF, U+2A700..U+2B73F, U+2B740..U+2B81F, U+2B820..U+2CEAF, U+2F800..U+2FA1F]
1157  // and any other reserved code points on
1158  // [U+20000..U+2FFFD, U+30000..U+3FFFD]
1159  if ((codepoint >= 0x3400 && codepoint <= 0x4DBF)
1160  || (codepoint >= 0x4E00 && codepoint <= 0x9FFF)
1161  || (codepoint >= 0xF900 && codepoint <= 0xFAFF)
1162  || (codepoint >= 0x20000 && codepoint <= 0x2A6DF)
1163  || (codepoint >= 0x2A700 && codepoint <= 0x2B73F)
1164  || (codepoint >= 0x2B740 && codepoint <= 0x2B81F)
1165  || (codepoint >= 0x2B820 && codepoint <= 0x2CEAF)
1166  || (codepoint >= 0x2F800 && codepoint <= 0x2FA1F)
1167  || (codepoint >= 0x20000 && codepoint <= 0x2FFFD)
1168  || (codepoint >= 0x30000 && codepoint <= 0x3FFFD)) {
1169  p.lineBreakClass = LineBreak_ID;
1170  }
1171  // The unassigned code points that default to "PR" comprise a range in the following block:
1172  // [U+20A0..U+20CF]
1173  else if (codepoint >= 0x20A0 && codepoint <= 0x20CF) {
1174  p.lineBreakClass = LineBreak_PR;
1175  }
1176  }
1177 
1178  static UnicodeData &valueRef(int codepoint);
1179 
1181 
1182  // from UnicodeData.txt
1185 
1187 
1188  // from BidiMirroring.txt
1189  int mirroredChar = 0;
1190 
1191  // DerivedNormalizationProps.txt
1192  bool excludedComposition = false;
1193 
1194  // computed position of unicode property set
1195  int propertyIndex = -1;
1196 
1198 };
1199 
1200 static QList<UnicodeData> unicodeData;
1201 
1203 {
1204  static bool initialized = false;
1205  if (!initialized) {
1206  unicodeData.reserve(QChar::LastValidCodePoint + 1);
1207  for (int uc = 0; uc <= QChar::LastValidCodePoint; ++uc)
1208  unicodeData.append(UnicodeData(uc));
1209  initialized = true;
1210  }
1211 
1212  Q_ASSERT(codepoint <= 0x10ffff);
1213  return unicodeData[codepoint];
1214 }
1215 
1216 
1217 static QHash<int, int> decompositionLength;
1218 static int highestComposedCharacter = 0;
1219 static int numLigatures = 0;
1220 static int highestLigature = 0;
1221 
1222 struct Ligature {
1223  int u1;
1224  int u2;
1226 };
1227 // we need them sorted after the first component for fast lookup
1228 bool operator < (const Ligature &l1, const Ligature &l2)
1229 { return l1.u1 < l2.u1; }
1230 
1231 static QHash<int, QList<Ligature> > ligatureHashes;
1232 
1233 static QHash<int, int> combiningClassUsage;
1234 
1235 static int maxLowerCaseDiff = 0;
1236 static int maxUpperCaseDiff = 0;
1237 static int maxTitleCaseDiff = 0;
1238 
1239 static void readUnicodeData()
1240 {
1241  qDebug("Reading UnicodeData.txt");
1242 
1243  enum UniDataFields {
1244  UD_Value,
1245  UD_Name,
1246  UD_Category,
1247  UD_CombiningClass,
1248  UD_BidiCategory,
1249  UD_Decomposition,
1250  UD_DecimalDigitValue,
1251  UD_DigitValue,
1252  UD_NumericValue,
1253  UD_Mirrored,
1254  UD_OldName,
1255  UD_Comment,
1256  UD_UpperCase,
1257  UD_LowerCase,
1258  UD_TitleCase
1259  };
1260 
1261  QFile f("data/UnicodeData.txt");
1262  if (!f.exists())
1263  qFatal("Couldn't find UnicodeData.txt");
1264 
1265  f.open(QFile::ReadOnly);
1266 
1267  while (!f.atEnd()) {
1268  QByteArray line;
1269  line.resize(1024);
1270  int len = f.readLine(line.data(), 1024);
1271  line.truncate(len-1);
1272 
1273  int comment = line.indexOf('#');
1274  if (comment >= 0)
1275  line = line.left(comment);
1276  if (line.isEmpty())
1277  continue;
1278 
1280  bool ok;
1281  int codepoint = properties[UD_Value].toInt(&ok, 16);
1282  Q_ASSERT(ok);
1283  Q_ASSERT(codepoint <= QChar::LastValidCodePoint);
1284  int lastCodepoint = codepoint;
1285 
1286  QByteArray name = properties[UD_Name];
1287  if (name.startsWith('<') && name.contains("First")) {
1288  QByteArray nextLine;
1289  nextLine.resize(1024);
1290  f.readLine(nextLine.data(), 1024);
1291  QList<QByteArray> properties = nextLine.split(';');
1292  Q_ASSERT(properties[UD_Name].startsWith('<') && properties[UD_Name].contains("Last"));
1293  lastCodepoint = properties[UD_Value].toInt(&ok, 16);
1294  Q_ASSERT(ok);
1295  Q_ASSERT(lastCodepoint <= QChar::LastValidCodePoint);
1296  }
1297 
1298  UnicodeData &data = UnicodeData::valueRef(codepoint);
1299  data.p.category = categoryMap.value(properties[UD_Category], QChar::Other_NotAssigned);
1300  data.p.combiningClass = properties[UD_CombiningClass].toInt();
1301  if (!combiningClassUsage.contains(data.p.combiningClass))
1302  combiningClassUsage[data.p.combiningClass] = 1;
1303  else
1304  ++combiningClassUsage[data.p.combiningClass];
1305 
1306  Direction dir = directionMap.value(properties[UD_BidiCategory], Dir_Unassigned);
1307  if (dir == Dir_Unassigned)
1308  qFatal("unhandled direction value: %s", properties[UD_BidiCategory].constData());
1309  data.p.direction = QChar::Direction(dir);
1310 
1311  if (!properties[UD_UpperCase].isEmpty()) {
1312  int upperCase = properties[UD_UpperCase].toInt(&ok, 16);
1313  Q_ASSERT(ok);
1314  int diff = upperCase - codepoint;
1315  // if the conditions below doesn't hold anymore we need to modify our upper casing code
1317  if (QChar::requiresSurrogates(codepoint)) {
1318  Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(upperCase));
1319  Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(upperCase));
1320  }
1321  if (qAbs(diff) >= (1<<13)) {
1322  data.p.upperCaseSpecial = true;
1323  data.p.upperCaseDiff = appendToSpecialCaseMap(QList<int>() << upperCase);
1324  } else {
1325  data.p.upperCaseDiff = diff;
1326  maxUpperCaseDiff = qMax(maxUpperCaseDiff, qAbs(diff));
1327  }
1328  }
1329  if (!properties[UD_LowerCase].isEmpty()) {
1330  int lowerCase = properties[UD_LowerCase].toInt(&ok, 16);
1331  Q_ASSERT(ok);
1332  int diff = lowerCase - codepoint;
1333  // if the conditions below doesn't hold anymore we need to modify our lower casing code
1335  if (QChar::requiresSurrogates(codepoint)) {
1338  }
1339  if (qAbs(diff) >= (1<<13)) {
1340  data.p.lowerCaseSpecial = true;
1341  data.p.lowerCaseDiff = appendToSpecialCaseMap(QList<int>() << lowerCase);
1342  } else {
1343  data.p.lowerCaseDiff = diff;
1344  maxLowerCaseDiff = qMax(maxLowerCaseDiff, qAbs(diff));
1345  }
1346  }
1347  // we want toTitleCase to map to ToUpper in case we don't have any titlecase.
1348  if (properties[UD_TitleCase].isEmpty())
1349  properties[UD_TitleCase] = properties[UD_UpperCase];
1350  if (!properties[UD_TitleCase].isEmpty()) {
1351  int titleCase = properties[UD_TitleCase].toInt(&ok, 16);
1352  Q_ASSERT(ok);
1353  int diff = titleCase - codepoint;
1354  // if the conditions below doesn't hold anymore we need to modify our title casing code
1356  if (QChar::requiresSurrogates(codepoint)) {
1357  Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(titleCase));
1358  Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(titleCase));
1359  }
1360  if (qAbs(diff) >= (1<<13)) {
1361  data.p.titleCaseSpecial = true;
1362  data.p.titleCaseDiff = appendToSpecialCaseMap(QList<int>() << titleCase);
1363  } else {
1364  data.p.titleCaseDiff = diff;
1365  maxTitleCaseDiff = qMax(maxTitleCaseDiff, qAbs(diff));
1366  }
1367  }
1368 
1369  if (!properties[UD_DigitValue].isEmpty())
1370  data.p.digitValue = properties[UD_DigitValue].toInt();
1371 
1372  // decompositition
1373  QByteArray decomposition = properties[UD_Decomposition];
1374  if (!decomposition.isEmpty()) {
1375  highestComposedCharacter = qMax(highestComposedCharacter, codepoint);
1376  QList<QByteArray> d = decomposition.split(' ');
1377  if (d[0].contains('<')) {
1378  data.decompositionType = decompositionMap.value(d[0], QChar::NoDecomposition);
1379  if (data.decompositionType == QChar::NoDecomposition)
1380  qFatal("unhandled decomposition type: %s", d[0].constData());
1381  d.takeFirst();
1382  } else {
1383  data.decompositionType = QChar::Canonical;
1384  }
1385  for (int i = 0; i < d.size(); ++i) {
1386  data.decomposition.append(d[i].toInt(&ok, 16));
1387  Q_ASSERT(ok);
1388  }
1389  ++decompositionLength[data.decomposition.size()];
1390  }
1391 
1392  for (int i = codepoint; i <= lastCodepoint; ++i)
1393  unicodeData[i] = data;
1394  }
1395 }
1396 
1397 static int maxMirroredDiff = 0;
1398 
1399 static void readBidiMirroring()
1400 {
1401  qDebug("Reading BidiMirroring.txt");
1402 
1403  QFile f("data/BidiMirroring.txt");
1404  if (!f.exists())
1405  qFatal("Couldn't find BidiMirroring.txt");
1406 
1407  f.open(QFile::ReadOnly);
1408 
1409  while (!f.atEnd()) {
1410  QByteArray line;
1411  line.resize(1024);
1412  int len = f.readLine(line.data(), 1024);
1413  line.resize(len-1);
1414 
1415  int comment = line.indexOf('#');
1416  if (comment >= 0)
1417  line = line.left(comment);
1418 
1419  if (line.isEmpty())
1420  continue;
1421  line = line.replace(" ", "");
1422 
1423  QList<QByteArray> pair = line.split(';');
1424  Q_ASSERT(pair.size() == 2);
1425 
1426  bool ok;
1427  int codepoint = pair[0].toInt(&ok, 16);
1428  Q_ASSERT(ok);
1429  int mirror = pair[1].toInt(&ok, 16);
1430  Q_ASSERT(ok);
1431 
1432  UnicodeData &d = UnicodeData::valueRef(codepoint);
1433  d.mirroredChar = mirror;
1434  d.p.mirrorDiff = d.mirroredChar - codepoint;
1435  maxMirroredDiff = qMax(maxMirroredDiff, qAbs(d.p.mirrorDiff));
1436  }
1437 }
1438 
1439 static void readArabicShaping()
1440 {
1441  qDebug("Reading ArabicShaping.txt");
1442 
1443  // Initialize defaults:
1444  // Code points that are not explicitly listed in ArabicShaping.txt are either of joining type T or U:
1445  // - Those that not explicitly listed that are of General Category Mn, Me, or Cf have joining type T.
1446  // - All others not explicitly listed have joining type U.
1447  for (int codepoint = 0; codepoint <= QChar::LastValidCodePoint; ++codepoint) {
1448  UnicodeData &d = UnicodeData::valueRef(codepoint);
1449  if (d.p.joining == QChar::Joining_None) {
1450  if (d.p.category == QChar::Mark_NonSpacing || d.p.category == QChar::Mark_Enclosing || d.p.category == QChar::Other_Format)
1451  d.p.joining = QChar::Joining_Transparent;
1452  }
1453  }
1454 
1455  QFile f("data/ArabicShaping.txt");
1456  if (!f.exists())
1457  qFatal("Couldn't find ArabicShaping.txt");
1458 
1459  f.open(QFile::ReadOnly);
1460 
1461  while (!f.atEnd()) {
1462  QByteArray line;
1463  line.resize(1024);
1464  int len = f.readLine(line.data(), 1024);
1465  line.resize(len-1);
1466 
1467  int comment = line.indexOf('#');
1468  if (comment >= 0)
1469  line = line.left(comment);
1470  line = line.trimmed();
1471 
1472  if (line.isEmpty())
1473  continue;
1474 
1475  QList<QByteArray> l = line.split(';');
1476  Q_ASSERT(l.size() == 4);
1477 
1478  bool ok;
1479  int codepoint = l[0].toInt(&ok, 16);
1480  Q_ASSERT(ok);
1481 
1482  UnicodeData &d = UnicodeData::valueRef(codepoint);
1483  JoiningType joining = joining_map.value(l[2].trimmed(), Joining_Unassigned);
1484  switch (joining) {
1485  case Joining_Unassigned:
1486  qFatal("%x: unassigned or unhandled joining type: %s", codepoint, l[2].constData());
1487  break;
1488  case Joining_Transparent:
1489  switch (d.p.category) {
1490  case QChar::Mark_Enclosing:
1493  case QChar::Other_Format:
1494  break;
1495  default:
1496  qFatal("%x: joining type '%s' was met (category: %d); "
1497  "the current implementation needs to be revised!",
1498  codepoint, l[2].constData(), d.p.category);
1499  }
1500  Q_FALLTHROUGH();
1501  default:
1502  d.p.joining = QChar::JoiningType(joining);
1503  break;
1504  }
1505  }
1506 }
1507 
1508 static void readDerivedAge()
1509 {
1510  qDebug("Reading DerivedAge.txt");
1511 
1512  QFile f("data/DerivedAge.txt");
1513  if (!f.exists())
1514  qFatal("Couldn't find DerivedAge.txt");
1515 
1516  f.open(QFile::ReadOnly);
1517 
1518  while (!f.atEnd()) {
1519  QByteArray line;
1520  line.resize(1024);
1521  int len = f.readLine(line.data(), 1024);
1522  line.resize(len-1);
1523 
1524  int comment = line.indexOf('#');
1525  if (comment >= 0)
1526  line = line.left(comment);
1527  line.replace(" ", "");
1528 
1529  if (line.isEmpty())
1530  continue;
1531 
1532  QList<QByteArray> l = line.split(';');
1533  Q_ASSERT(l.size() == 2);
1534 
1535  QByteArray codes = l[0];
1536  codes.replace("..", ".");
1537  QList<QByteArray> cl = codes.split('.');
1538 
1539  bool ok;
1540  int from = cl[0].toInt(&ok, 16);
1541  Q_ASSERT(ok);
1542  int to = from;
1543  if (cl.size() == 2) {
1544  to = cl[1].toInt(&ok, 16);
1545  Q_ASSERT(ok);
1546  }
1547 
1548  QChar::UnicodeVersion age = age_map.value(l[1].trimmed(), QChar::Unicode_Unassigned);
1549  //qDebug() << Qt::hex << from << ".." << to << ba << age;
1550  if (age == QChar::Unicode_Unassigned)
1551  qFatal("unassigned or unhandled age value: %s", l[1].constData());
1552 
1553  for (int codepoint = from; codepoint <= to; ++codepoint) {
1554  UnicodeData &d = UnicodeData::valueRef(codepoint);
1555  d.p.age = age;
1556  }
1557  }
1558 }
1559 
1560 static void readEastAsianWidth()
1561 {
1562  qDebug("Reading EastAsianWidth.txt");
1563 
1564  QFile f("data/EastAsianWidth.txt");
1565  if (!f.exists() || !f.open(QFile::ReadOnly))
1566  qFatal("Couldn't find or read EastAsianWidth.txt");
1567 
1568  while (!f.atEnd()) {
1569  QByteArray line = f.readLine().trimmed();
1570 
1571  int comment = line.indexOf('#');
1572  line = (comment < 0 ? line : line.left(comment)).simplified();
1573 
1574  if (line.isEmpty())
1575  continue;
1576 
1577  QList<QByteArray> fields = line.split(';');
1578  Q_ASSERT(fields.size() == 2);
1579 
1580  // That would be split(".."), but that API does not exist.
1581  const QByteArray codePoints = fields[0].trimmed().replace("..", ".");
1582  QList<QByteArray> cl = codePoints.split('.');
1583  Q_ASSERT(cl.size() >= 1 && cl.size() <= 2);
1584 
1585  const QByteArray widthString = fields[1].trimmed();
1586  if (!eastAsianWidthMap.contains(widthString)) {
1587  qFatal("Unhandled EastAsianWidth property value for %s: %s",
1588  qPrintable(codePoints), qPrintable(widthString));
1589  }
1590  auto width = eastAsianWidthMap.value(widthString);
1591 
1592  bool ok;
1593  const int first = cl[0].toInt(&ok, 16);
1594  const int last = ok && cl.size() == 2 ? cl[1].toInt(&ok, 16) : first;
1595  Q_ASSERT(ok);
1596 
1597  for (int codepoint = first; codepoint <= last; ++codepoint) {
1598  UnicodeData &ud = UnicodeData::valueRef(codepoint);
1599  // Ensure that ranges don't overlap.
1601  ud.p.eastAsianWidth = width;
1602  }
1603  }
1604 }
1605 
1606 static void readDerivedNormalizationProps()
1607 {
1608  qDebug("Reading DerivedNormalizationProps.txt");
1609 
1610  QFile f("data/DerivedNormalizationProps.txt");
1611  if (!f.exists())
1612  qFatal("Couldn't find DerivedNormalizationProps.txt");
1613 
1614  f.open(QFile::ReadOnly);
1615 
1616  while (!f.atEnd()) {
1617  QByteArray line;
1618  line.resize(1024);
1619  int len = f.readLine(line.data(), 1024);
1620  line.resize(len-1);
1621 
1622  int comment = line.indexOf('#');
1623  if (comment >= 0)
1624  line = line.left(comment);
1625 
1626  if (line.trimmed().isEmpty())
1627  continue;
1628 
1629  QList<QByteArray> l = line.split(';');
1630  Q_ASSERT(l.size() >= 2);
1631 
1632  QByteArray propName = l[1].trimmed();
1633  if (propName != "Full_Composition_Exclusion" &&
1634  propName != "NFD_QC" && propName != "NFC_QC" &&
1635  propName != "NFKD_QC" && propName != "NFKC_QC") {
1636  // ###
1637  continue;
1638  }
1639 
1640  QByteArray codes = l[0].trimmed();
1641  codes.replace("..", ".");
1642  QList<QByteArray> cl = codes.split('.');
1643 
1644  bool ok;
1645  int from = cl[0].toInt(&ok, 16);
1646  Q_ASSERT(ok);
1647  int to = from;
1648  if (cl.size() == 2) {
1649  to = cl[1].toInt(&ok, 16);
1650  Q_ASSERT(ok);
1651  }
1652 
1653  for (int codepoint = from; codepoint <= to; ++codepoint) {
1654  UnicodeData &d = UnicodeData::valueRef(codepoint);
1655  if (propName == "Full_Composition_Exclusion") {
1656  d.excludedComposition = true;
1657  } else {
1658  static_assert(QString::NormalizationForm_D == 0);
1659  static_assert(QString::NormalizationForm_C == 1);
1660  static_assert(QString::NormalizationForm_KD == 2);
1661  static_assert(QString::NormalizationForm_KC == 3);
1662 
1664  if (propName == "NFD_QC")
1666  else if (propName == "NFC_QC")
1668  else if (propName == "NFKD_QC")
1670  else// if (propName == "NFKC_QC")
1672 
1673  Q_ASSERT(l.size() == 3);
1674  l[2] = l[2].trimmed();
1675 
1676  enum { NFQC_YES = 0, NFQC_NO = 1, NFQC_MAYBE = 3 };
1677  uchar ynm = (l[2] == "N" ? NFQC_NO : l[2] == "M" ? NFQC_MAYBE : NFQC_YES);
1678  if (ynm == NFQC_MAYBE) {
1679  // if this changes, we need to revise the normalizationQuickCheckHelper() implementation
1681  }
1682  d.p.nfQuickCheck |= (ynm << (form << 1)); // 2 bits per NF
1683  }
1684  }
1685  }
1686 
1687  for (int codepoint = 0; codepoint <= QChar::LastValidCodePoint; ++codepoint) {
1688  UnicodeData &d = UnicodeData::valueRef(codepoint);
1689  if (!d.excludedComposition
1690  && d.decompositionType == QChar::Canonical
1691  && d.decomposition.size() > 1) {
1692  Q_ASSERT(d.decomposition.size() == 2);
1693 
1694  int part1 = d.decomposition.at(0);
1695  int part2 = d.decomposition.at(1);
1696 
1697  // all non-starters are listed in DerivedNormalizationProps.txt
1698  // and already excluded from composition
1699  Q_ASSERT(UnicodeData::valueRef(part1).p.combiningClass == 0);
1700 
1701  ++numLigatures;
1702  highestLigature = qMax(highestLigature, part1);
1703  Ligature l = { part1, part2, codepoint };
1704  ligatureHashes[part2].append(l);
1705  }
1706  }
1707 }
1708 
1709 
1713  int version;
1714 };
1715 
1716 static QByteArray createNormalizationCorrections()
1717 {
1718  qDebug("Reading NormalizationCorrections.txt");
1719 
1720  QFile f("data/NormalizationCorrections.txt");
1721  if (!f.exists())
1722  qFatal("Couldn't find NormalizationCorrections.txt");
1723 
1724  f.open(QFile::ReadOnly);
1725 
1726  QByteArray out
1727  = "struct NormalizationCorrection {\n"
1728  " uint ucs4;\n"
1729  " uint old_mapping;\n"
1730  " int version;\n"
1731  "};\n\n"
1732 
1733  "static constexpr NormalizationCorrection uc_normalization_corrections[] = {\n";
1734 
1735  int maxVersion = 0;
1736  int numCorrections = 0;
1737  while (!f.atEnd()) {
1738  QByteArray line;
1739  line.resize(1024);
1740  int len = f.readLine(line.data(), 1024);
1741  line.resize(len-1);
1742 
1743  int comment = line.indexOf('#');
1744  if (comment >= 0)
1745  line = line.left(comment);
1746  line.replace(" ", "");
1747 
1748  if (line.isEmpty())
1749  continue;
1750 
1751  Q_ASSERT(!line.contains(".."));
1752 
1753  QList<QByteArray> fields = line.split(';');
1754  Q_ASSERT(fields.size() == 4);
1755 
1756  NormalizationCorrection c = { 0, 0, 0 };
1757  bool ok;
1758  c.codepoint = fields.at(0).toInt(&ok, 16);
1759  Q_ASSERT(ok);
1760  c.mapped = fields.at(1).toInt(&ok, 16);
1761  Q_ASSERT(ok);
1762  if (fields.at(3) == "3.2.0")
1763  c.version = QChar::Unicode_3_2;
1764  else if (fields.at(3) == "4.0.0")
1765  c.version = QChar::Unicode_4_0;
1766  else
1767  qFatal("unknown unicode version in NormalizationCorrection.txt");
1768 
1769  out += " { 0x" + QByteArray::number(c.codepoint, 16) + ", 0x"
1770  + QByteArray::number(c.mapped, 16) + ", "
1771  + QByteArray::number(c.version) + " },\n";
1772  ++numCorrections;
1773  maxVersion = qMax(c.version, maxVersion);
1774  }
1775  if (out.endsWith(",\n"))
1776  out.chop(2);
1777 
1778  out += "\n};\n\n"
1779 
1780  "enum { NumNormalizationCorrections = " + QByteArray::number(numCorrections) + " };\n"
1781  "enum { NormalizationCorrectionsVersionMax = " + QByteArray::number(maxVersion) + " };\n\n";
1782 
1783  return out;
1784 }
1785 
1786 static void readLineBreak()
1787 {
1788  qDebug("Reading LineBreak.txt");
1789 
1790  QFile f("data/LineBreak.txt");
1791  if (!f.exists())
1792  qFatal("Couldn't find LineBreak.txt");
1793 
1794  f.open(QFile::ReadOnly);
1795 
1796  while (!f.atEnd()) {
1797  QByteArray line;
1798  line.resize(1024);
1799  int len = f.readLine(line.data(), 1024);
1800  line.resize(len-1);
1801 
1802  int comment = line.indexOf('#');
1803  if (comment >= 0)
1804  line = line.left(comment);
1805  line.replace(" ", "");
1806 
1807  if (line.isEmpty())
1808  continue;
1809 
1810  QList<QByteArray> l = line.split(';');
1811  Q_ASSERT(l.size() == 2);
1812 
1813  QByteArray codes = l[0];
1814  codes.replace("..", ".");
1815  QList<QByteArray> cl = codes.split('.');
1816 
1817  bool ok;
1818  int from = cl[0].toInt(&ok, 16);
1819  Q_ASSERT(ok);
1820  int to = from;
1821  if (cl.size() == 2) {
1822  to = cl[1].toInt(&ok, 16);
1823  Q_ASSERT(ok);
1824  }
1825 
1826  LineBreakClass lb = line_break_map.value(l[1], LineBreak_Unassigned);
1827  if (lb == LineBreak_Unassigned)
1828  qFatal("unassigned line break class: %s", l[1].constData());
1829 
1830  for (int codepoint = from; codepoint <= to; ++codepoint) {
1831  UnicodeData &d = UnicodeData::valueRef(codepoint);
1832  d.p.lineBreakClass = lb;
1833  }
1834  }
1835 }
1836 
1837 static void readSpecialCasing()
1838 {
1839  qDebug("Reading SpecialCasing.txt");
1840 
1841  QFile f("data/SpecialCasing.txt");
1842  if (!f.exists())
1843  qFatal("Couldn't find SpecialCasing.txt");
1844 
1845  f.open(QFile::ReadOnly);
1846 
1847  while (!f.atEnd()) {
1848  QByteArray line;
1849  line.resize(1024);
1850  int len = f.readLine(line.data(), 1024);
1851  line.resize(len-1);
1852 
1853  int comment = line.indexOf('#');
1854  if (comment >= 0)
1855  line = line.left(comment);
1856 
1857  if (line.isEmpty())
1858  continue;
1859 
1860  QList<QByteArray> l = line.split(';');
1861 
1862  QByteArray condition = l.size() < 5 ? QByteArray() : l[4].trimmed();
1863  if (!condition.isEmpty())
1864  // #####
1865  continue;
1866 
1867  bool ok;
1868  int codepoint = l[0].trimmed().toInt(&ok, 16);
1869  Q_ASSERT(ok);
1870 
1871  // if the condition below doesn't hold anymore we need to modify our
1872  // lower/upper/title casing code and case folding code
1873  Q_ASSERT(!QChar::requiresSurrogates(codepoint));
1874 
1875 // qDebug() << "codepoint" << Qt::hex << codepoint;
1876 // qDebug() << line;
1877 
1878  QList<QByteArray> lower = l[1].trimmed().split(' ');
1879  QList<int> lowerMap;
1880  for (int i = 0; i < lower.size(); ++i) {
1881  bool ok;
1882  lowerMap.append(lower.at(i).toInt(&ok, 16));
1883  Q_ASSERT(ok);
1884  }
1885 
1886  QList<QByteArray> title = l[2].trimmed().split(' ');
1887  QList<int> titleMap;
1888  for (int i = 0; i < title.size(); ++i) {
1889  bool ok;
1890  titleMap.append(title.at(i).toInt(&ok, 16));
1891  Q_ASSERT(ok);
1892  }
1893 
1894  QList<QByteArray> upper = l[3].trimmed().split(' ');
1895  QList<int> upperMap;
1896  for (int i = 0; i < upper.size(); ++i) {
1897  bool ok;
1898  upperMap.append(upper.at(i).toInt(&ok, 16));
1899  Q_ASSERT(ok);
1900  }
1901 
1902 
1903  UnicodeData &ud = UnicodeData::valueRef(codepoint);
1904  Q_ASSERT(lowerMap.size() > 1 || lowerMap.at(0) == codepoint + ud.p.lowerCaseDiff);
1905  Q_ASSERT(titleMap.size() > 1 || titleMap.at(0) == codepoint + ud.p.titleCaseDiff);
1906  Q_ASSERT(upperMap.size() > 1 || upperMap.at(0) == codepoint + ud.p.upperCaseDiff);
1907 
1908  if (lowerMap.size() > 1) {
1909  ud.p.lowerCaseSpecial = true;
1910  ud.p.lowerCaseDiff = appendToSpecialCaseMap(lowerMap);
1911  }
1912  if (titleMap.size() > 1) {
1913  ud.p.titleCaseSpecial = true;
1914  ud.p.titleCaseDiff = appendToSpecialCaseMap(titleMap);
1915  }
1916  if (upperMap.size() > 1) {
1917  ud.p.upperCaseSpecial = true;
1918  ud.p.upperCaseDiff = appendToSpecialCaseMap(upperMap);
1919  }
1920  }
1921 }
1922 
1923 static int maxCaseFoldDiff = 0;
1924 
1925 static void readCaseFolding()
1926 {
1927  qDebug("Reading CaseFolding.txt");
1928 
1929  QFile f("data/CaseFolding.txt");
1930  if (!f.exists())
1931  qFatal("Couldn't find CaseFolding.txt");
1932 
1933  f.open(QFile::ReadOnly);
1934 
1935  while (!f.atEnd()) {
1936  QByteArray line;
1937  line.resize(1024);
1938  int len = f.readLine(line.data(), 1024);
1939  line.resize(len-1);
1940 
1941  int comment = line.indexOf('#');
1942  if (comment >= 0)
1943  line = line.left(comment);
1944 
1945  if (line.isEmpty())
1946  continue;
1947 
1948  QList<QByteArray> l = line.split(';');
1949 
1950  bool ok;
1951  int codepoint = l[0].trimmed().toInt(&ok, 16);
1952  Q_ASSERT(ok);
1953 
1954 
1955  l[1] = l[1].trimmed();
1956  if (l[1] == "F" || l[1] == "T")
1957  continue;
1958 
1959 // qDebug() << "codepoint" << Qt::hex << codepoint;
1960 // qDebug() << line;
1961  QList<QByteArray> fold = l[2].trimmed().split(' ');
1962  QList<int> foldMap;
1963  for (int i = 0; i < fold.size(); ++i) {
1964  bool ok;
1965  foldMap.append(fold.at(i).toInt(&ok, 16));
1966  Q_ASSERT(ok);
1967  }
1968 
1969  UnicodeData &ud = UnicodeData::valueRef(codepoint);
1970  if (foldMap.size() == 1) {
1971  int caseFolded = foldMap.at(0);
1972  int diff = caseFolded - codepoint;
1973  // if the conditions below doesn't hold anymore we need to modify our case folding code
1975  if (QChar::requiresSurrogates(codepoint)) {
1976  Q_ASSERT(QChar::highSurrogate(codepoint) == QChar::highSurrogate(caseFolded));
1977  Q_ASSERT(QChar::lowSurrogate(codepoint) + diff == QChar::lowSurrogate(caseFolded));
1978  }
1979  if (qAbs(diff) >= (1<<13)) {
1980  ud.p.caseFoldSpecial = true;
1981  ud.p.caseFoldDiff = appendToSpecialCaseMap(foldMap);
1982  } else {
1983  ud.p.caseFoldDiff = diff;
1984  maxCaseFoldDiff = qMax(maxCaseFoldDiff, qAbs(diff));
1985  }
1986  } else {
1987  qFatal("we currently don't support full case foldings");
1988 // qDebug() << "special" << Qt::hex << foldMap;
1989  ud.p.caseFoldSpecial = true;
1990  ud.p.caseFoldDiff = appendToSpecialCaseMap(foldMap);
1991  }
1992  }
1993 }
1994 
1995 static void readGraphemeBreak()
1996 {
1997  qDebug("Reading GraphemeBreakProperty.txt");
1998 
1999  QFile f("data/GraphemeBreakProperty.txt");
2000  if (!f.exists())
2001  qFatal("Couldn't find GraphemeBreakProperty.txt");
2002 
2003  f.open(QFile::ReadOnly);
2004 
2005  while (!f.atEnd()) {
2006  QByteArray line;
2007  line.resize(1024);
2008  int len = f.readLine(line.data(), 1024);
2009  line.resize(len-1);
2010 
2011  int comment = line.indexOf('#');
2012  if (comment >= 0)
2013  line = line.left(comment);
2014  line.replace(" ", "");
2015 
2016  if (line.isEmpty())
2017  continue;
2018 
2019  QList<QByteArray> l = line.split(';');
2020  Q_ASSERT(l.size() == 2);
2021 
2022  QByteArray codes = l[0];
2023  codes.replace("..", ".");
2024  QList<QByteArray> cl = codes.split('.');
2025 
2026  bool ok;
2027  int from = cl[0].toInt(&ok, 16);
2028  Q_ASSERT(ok);
2029  int to = from;
2030  if (cl.size() == 2) {
2031  to = cl[1].toInt(&ok, 16);
2032  Q_ASSERT(ok);
2033  }
2034 
2035  GraphemeBreakClass brk = grapheme_break_map.value(l[1], GraphemeBreak_Unassigned);
2036  if (brk == GraphemeBreak_Unassigned)
2037  qFatal("unassigned grapheme break class: %s", l[1].constData());
2038 
2039  for (int codepoint = from; codepoint <= to; ++codepoint) {
2040  UnicodeData &ud = UnicodeData::valueRef(codepoint);
2041  ud.p.graphemeBreakClass = brk;
2042  }
2043  }
2044 }
2045 
2046 static void readEmojiData()
2047 {
2048  qDebug("Reading emoji-data.txt");
2049 
2050  QFile f("data/emoji-data.txt");
2051  if (!f.open(QFile::ReadOnly))
2052  qFatal("Couldn't find emoji-data.txt");
2053 
2054  while (!f.atEnd()) {
2055  QByteArray line;
2056  line.resize(1024);
2057  int len = f.readLine(line.data(), 1024);
2058  line.resize(len-1);
2059 
2060  int comment = line.indexOf('#');
2061  if (comment >= 0)
2062  line = line.left(comment);
2063  line.replace(" ", "");
2064 
2065  if (line.isEmpty())
2066  continue;
2067 
2068  QList<QByteArray> l = line.split(';');
2069  Q_ASSERT(l.size() == 2);
2070 
2071  // NOTE: for the moment we process emoji_data only to extract
2072  // the code points with Extended_Pictographic. This is needed by
2073  // extended grapheme clustering (cf. the GB11 rule in UAX #29).
2074  if (l[1] != "Extended_Pictographic")
2075  continue;
2076 
2077  QByteArray codes = l[0];
2078  codes.replace("..", ".");
2079  QList<QByteArray> cl = codes.split('.');
2080 
2081  bool ok;
2082  int from = cl[0].toInt(&ok, 16);
2083  Q_ASSERT(ok);
2084  int to = from;
2085  if (cl.size() == 2) {
2086  to = cl[1].toInt(&ok, 16);
2087  Q_ASSERT(ok);
2088  }
2089 
2090  for (int codepoint = from; codepoint <= to; ++codepoint) {
2091  UnicodeData &ud = UnicodeData::valueRef(codepoint);
2092  // Check we're not overwriting the data from GraphemeBreakProperty.txt...
2095  }
2096  }
2097 }
2098 
2099 static void readWordBreak()
2100 {
2101  qDebug("Reading WordBreakProperty.txt");
2102 
2103  QFile f("data/WordBreakProperty.txt");
2104  if (!f.exists())
2105  qFatal("Couldn't find WordBreakProperty.txt");
2106 
2107  f.open(QFile::ReadOnly);
2108 
2109  while (!f.atEnd()) {
2110  QByteArray line;
2111  line.resize(1024);
2112  int len = f.readLine(line.data(), 1024);
2113  line.resize(len-1);
2114 
2115  int comment = line.indexOf('#');
2116  if (comment >= 0)
2117  line = line.left(comment);
2118  line.replace(" ", "");
2119 
2120  if (line.isEmpty())
2121  continue;
2122 
2123  QList<QByteArray> l = line.split(';');
2124  Q_ASSERT(l.size() == 2);
2125 
2126  QByteArray codes = l[0];
2127  codes.replace("..", ".");
2128  QList<QByteArray> cl = codes.split('.');
2129 
2130  bool ok;
2131  int from = cl[0].toInt(&ok, 16);
2132  Q_ASSERT(ok);
2133  int to = from;
2134  if (cl.size() == 2) {
2135  to = cl[1].toInt(&ok, 16);
2136  Q_ASSERT(ok);
2137  }
2138 
2139  WordBreakClass brk = word_break_map.value(l[1], WordBreak_Unassigned);
2140  if (brk == WordBreak_Unassigned)
2141  qFatal("unassigned word break class: %s", l[1].constData());
2142 
2143  for (int codepoint = from; codepoint <= to; ++codepoint) {
2144  // ### [
2145  // as of Unicode 5.1, some punctuation marks were mapped to MidLetter and MidNumLet
2146  // which caused "hi.there" to be treated like if it were just a single word;
2147  // until we have a tailoring mechanism, retain the old behavior by remapping those characters here.
2148  if (codepoint == 0x002E) // FULL STOP
2149  brk = WordBreak_MidNum;
2150  else if (codepoint == 0x003A) // COLON
2151  brk = WordBreak_Any;
2152  // ] ###
2153  UnicodeData &ud = UnicodeData::valueRef(codepoint);
2154  ud.p.wordBreakClass = brk;
2155  }
2156  }
2157 }
2158 
2159 static void readSentenceBreak()
2160 {
2161  qDebug("Reading SentenceBreakProperty.txt");
2162 
2163  QFile f("data/SentenceBreakProperty.txt");
2164  if (!f.exists())
2165  qFatal("Couldn't find SentenceBreakProperty.txt");
2166 
2167  f.open(QFile::ReadOnly);
2168 
2169  while (!f.atEnd()) {
2170  QByteArray line;
2171  line.resize(1024);
2172  int len = f.readLine(line.data(), 1024);
2173  line.resize(len-1);
2174 
2175  int comment = line.indexOf('#');
2176  if (comment >= 0)
2177  line = line.left(comment);
2178  line.replace(" ", "");
2179 
2180  if (line.isEmpty())
2181  continue;
2182 
2183  QList<QByteArray> l = line.split(';');
2184  Q_ASSERT(l.size() == 2);
2185 
2186  QByteArray codes = l[0];
2187  codes.replace("..", ".");
2188  QList<QByteArray> cl = codes.split('.');
2189 
2190  bool ok;
2191  int from = cl[0].toInt(&ok, 16);
2192  Q_ASSERT(ok);
2193  int to = from;
2194  if (cl.size() == 2) {
2195  to = cl[1].toInt(&ok, 16);
2196  Q_ASSERT(ok);
2197  }
2198 
2199  SentenceBreakClass brk = sentence_break_map.value(l[1], SentenceBreak_Unassigned);
2200  if (brk == SentenceBreak_Unassigned)
2201  qFatal("unassigned sentence break class: %s", l[1].constData());
2202 
2203  for (int codepoint = from; codepoint <= to; ++codepoint) {
2204  UnicodeData &ud = UnicodeData::valueRef(codepoint);
2205  ud.p.sentenceBreakClass = brk;
2206  }
2207  }
2208 }
2209 
2210 #if 0
2211 // this piece of code does full case folding and comparison. We currently
2212 // don't use it, since this gives lots of issues with things as case insensitive
2213 // search and replace.
2214 static inline void foldCase(uint ch, ushort *out)
2215 {
2216  const QUnicodeTables::Properties *p = qGetProp(ch);
2217  if (!p->caseFoldSpecial) {
2218  *(out++) = ch + p->caseFoldDiff;
2219  } else {
2220  const ushort *folded = specialCaseMap + p->caseFoldDiff;
2221  ushort length = *folded++;
2222  while (length--)
2223  *out++ = *folded++;
2224  }
2225  *out = 0;
2226 }
2227 
2228 static int ucstricmp(const ushort *a, const ushort *ae, const ushort *b, const ushort *be)
2229 {
2230  if (a == b)
2231  return 0;
2232  if (a == 0)
2233  return 1;
2234  if (b == 0)
2235  return -1;
2236 
2237  while (a != ae && b != be) {
2238  const QUnicodeTables::Properties *pa = qGetProp(*a);
2239  const QUnicodeTables::Properties *pb = qGetProp(*b);
2240  if (pa->caseFoldSpecial | pb->caseFoldSpecial)
2241  goto special;
2242  int diff = (int)(*a + pa->caseFoldDiff) - (int)(*b + pb->caseFoldDiff);
2243  if ((diff))
2244  return diff;
2245  ++a;
2246  ++b;
2247  }
2248  }
2249  if (a == ae) {
2250  if (b == be)
2251  return 0;
2252  return -1;
2253  }
2254  return 1;
2255 special:
2256  ushort abuf[SPECIAL_CASE_MAX_LEN + 1];
2257  ushort bbuf[SPECIAL_CASE_MAX_LEN + 1];
2258  abuf[0] = bbuf[0] = 0;
2259  ushort *ap = abuf;
2260  ushort *bp = bbuf;
2261  while (1) {
2262  if (!*ap) {
2263  if (a == ae) {
2264  if (!*bp && b == be)
2265  return 0;
2266  return -1;
2267  }
2268  foldCase(*(a++), abuf);
2269  ap = abuf;
2270  }
2271  if (!*bp) {
2272  if (b == be)
2273  return 1;
2274  foldCase(*(b++), bbuf);
2275  bp = bbuf;
2276  }
2277  if (*ap != *bp)
2278  return (int)*ap - (int)*bp;
2279  ++ap;
2280  ++bp;
2281  }
2282 }
2283 
2284 
2285 static int ucstricmp(const ushort *a, const ushort *ae, const uchar *b)
2286 {
2287  if (a == 0)
2288  return 1;
2289  if (b == 0)
2290  return -1;
2291 
2292  while (a != ae && *b) {
2293  const QUnicodeTables::Properties *pa = qGetProp(*a);
2294  const QUnicodeTables::Properties *pb = qGetProp((ushort)*b);
2295  if (pa->caseFoldSpecial | pb->caseFoldSpecial)
2296  goto special;
2297  int diff = (int)(*a + pa->caseFoldDiff) - (int)(*b + pb->caseFoldDiff);
2298  if ((diff))
2299  return diff;
2300  ++a;
2301  ++b;
2302  }
2303  if (a == ae) {
2304  if (!*b)
2305  return 0;
2306  return -1;
2307  }
2308  return 1;
2309 
2310 special:
2311  ushort abuf[SPECIAL_CASE_MAX_LEN + 1];
2312  ushort bbuf[SPECIAL_CASE_MAX_LEN + 1];
2313  abuf[0] = bbuf[0] = 0;
2314  ushort *ap = abuf;
2315  ushort *bp = bbuf;
2316  while (1) {
2317  if (!*ap) {
2318  if (a == ae) {
2319  if (!*bp && !*b)
2320  return 0;
2321  return -1;
2322  }
2323  foldCase(*(a++), abuf);
2324  ap = abuf;
2325  }
2326  if (!*bp) {
2327  if (!*b)
2328  return 1;
2329  foldCase(*(b++), bbuf);
2330  bp = bbuf;
2331  }
2332  if (*ap != *bp)
2333  return (int)*ap - (int)*bp;
2334  ++ap;
2335  ++bp;
2336  }
2337 }
2338 #endif
2339 
2340 #if 0
2341 static QList<QByteArray> blockNames;
2342 struct BlockInfo
2343 {
2344  int blockIndex;
2345  int firstCodePoint;
2346  int lastCodePoint;
2347 };
2348 static QList<BlockInfo> blockInfoList;
2349 
2350 static void readBlocks()
2351 {
2352  qDebug("Reading Blocks.txt");
2353 
2354  QFile f("data/Blocks.txt");
2355  if (!f.exists())
2356  qFatal("Couldn't find Blocks.txt");
2357 
2358  f.open(QFile::ReadOnly);
2359 
2360  while (!f.atEnd()) {
2361  QByteArray line = f.readLine();
2362  line.resize(line.size() - 1);
2363 
2364  int comment = line.indexOf("#");
2365  if (comment >= 0)
2366  line = line.left(comment);
2367 
2368  line.replace(" ", "");
2369 
2370  if (line.isEmpty())
2371  continue;
2372 
2373  int semicolon = line.indexOf(';');
2374  Q_ASSERT(semicolon >= 0);
2375  QByteArray codePoints = line.left(semicolon);
2376  QByteArray blockName = line.mid(semicolon + 1);
2377 
2378  int blockIndex = blockNames.indexOf(blockName);
2379  if (blockIndex == -1) {
2380  blockIndex = blockNames.size();
2381  blockNames.append(blockName);
2382  }
2383 
2384  codePoints.replace("..", ".");
2385  QList<QByteArray> cl = codePoints.split('.');
2386 
2387  bool ok;
2388  int first = cl[0].toInt(&ok, 16);
2389  Q_ASSERT(ok);
2390  int last = first;
2391  if (cl.size() == 2) {
2392  last = cl[1].toInt(&ok, 16);
2393  Q_ASSERT(ok);
2394  }
2395 
2396  BlockInfo blockInfo = { blockIndex, first, last };
2397  blockInfoList.append(blockInfo);
2398  }
2399 }
2400 #endif
2401 
2402 static void readScripts()
2403 {
2404  qDebug("Reading Scripts.txt");
2405 
2406  QFile f("data/Scripts.txt");
2407  if (!f.exists())
2408  qFatal("Couldn't find Scripts.txt");
2409 
2410  f.open(QFile::ReadOnly);
2411 
2412  while (!f.atEnd()) {
2413  QByteArray line = f.readLine();
2414  line.resize(line.size() - 1);
2415 
2416  int comment = line.indexOf("#");
2417  if (comment >= 0)
2418  line = line.left(comment);
2419 
2420  line.replace(" ", "");
2421  line.replace("_", "");
2422 
2423  if (line.isEmpty())
2424  continue;
2425 
2426  int semicolon = line.indexOf(';');
2427  Q_ASSERT(semicolon >= 0);
2428  QByteArray codePoints = line.left(semicolon);
2429  QByteArray scriptName = line.mid(semicolon + 1);
2430 
2431  codePoints.replace("..", ".");
2432  QList<QByteArray> cl = codePoints.split('.');
2433 
2434  bool ok;
2435  int first = cl[0].toInt(&ok, 16);
2436  Q_ASSERT(ok);
2437  int last = first;
2438  if (cl.size() == 2) {
2439  last = cl[1].toInt(&ok, 16);
2440  Q_ASSERT(ok);
2441  }
2442 
2443  if (!scriptMap.contains(scriptName))
2444  qFatal("Unhandled script property value: %s", scriptName.constData());
2445  QChar::Script script = scriptMap.value(scriptName, QChar::Script_Unknown);
2446 
2447  for (int codepoint = first; codepoint <= last; ++codepoint) {
2448  UnicodeData &ud = UnicodeData::valueRef(codepoint);
2449  ud.p.script = script;
2450  }
2451  }
2452 }
2453 
2454 static QMap<char32_t, QString> idnaMappingTable;
2455 
2456 static void readIdnaMappingTable()
2457 {
2458  qDebug("Reading IdnaMappingTable.txt");
2459 
2460  QFile f("data/IdnaMappingTable.txt");
2461  if (!f.exists() || !f.open(QFile::ReadOnly))
2462  qFatal("Couldn't find or read IdnaMappingTable.txt");
2463 
2464  while (!f.atEnd()) {
2465  QByteArray line = f.readLine().trimmed();
2466 
2467  int comment = line.indexOf('#');
2468  line = (comment < 0 ? line : line.left(comment)).simplified();
2469 
2470  if (line.isEmpty())
2471  continue;
2472 
2473  QList<QByteArray> fields = line.split(';');
2474  Q_ASSERT(fields.size() >= 2);
2475 
2476  // That would be split(".."), but that API does not exist.
2477  const QByteArray codePoints = fields[0].trimmed().replace("..", ".");
2478  QList<QByteArray> cl = codePoints.split('.');
2479  Q_ASSERT(cl.size() >= 1 && cl.size() <= 2);
2480 
2481  const QByteArray statusString = fields[1].trimmed();
2482  if (!idnaStatusMap.contains(statusString))
2483  qFatal("Unhandled IDNA status property value for %s: %s",
2484  qPrintable(codePoints), qPrintable(statusString));
2485  IdnaRawStatus rawStatus = idnaStatusMap.value(statusString);
2486 
2487  bool ok;
2488  const int first = cl[0].toInt(&ok, 16);
2489  const int last = ok && cl.size() == 2 ? cl[1].toInt(&ok, 16) : first;
2490  Q_ASSERT(ok);
2491 
2492  QString mapping;
2493 
2494  switch (rawStatus) {
2496  case IdnaRawStatus::Valid:
2499  break;
2500 
2501  case IdnaRawStatus::Mapped:
2504  Q_ASSERT(fields.size() >= 3);
2505 
2506  for (const auto &s : fields[2].trimmed().split(' ')) {
2507  if (!s.isEmpty()) {
2508  bool ok;
2509  int val = s.toInt(&ok, 16);
2510  Q_ASSERT_X(ok, "readIdnaMappingTable", qPrintable(line));
2511  for (auto c : QChar::fromUcs4(val))
2512  mapping.append(c);
2513  }
2514  }
2515 
2516  // Some deviations have empty mappings, others should not...
2517  if (mapping.isEmpty()) {
2518  Q_ASSERT(rawStatus == IdnaRawStatus::Deviation);
2519  qDebug() << " Empty IDNA mapping for" << codePoints;
2520  }
2521 
2522  break;
2523  }
2524 
2525  for (int codepoint = first; codepoint <= last; ++codepoint) {
2526  UnicodeData &ud = UnicodeData::valueRef(codepoint);
2527  // Ensure that ranges don't overlap.
2529  ud.idnaRawStatus = rawStatus;
2530 
2531  // ASCII codepoints are skipped here because they are processed in separate
2532  // optimized code paths that do not use this mapping table.
2533  if (codepoint >= 0x80 && !mapping.isEmpty())
2534  idnaMappingTable[codepoint] = mapping;
2535  }
2536  }
2537 }
2538 
2539 /*
2540  Resolve IDNA status by deciding whether to allow STD3 violations
2541 
2542  Underscores are normally prohibited by STD3 rules but Qt allows underscores
2543  to be used inside URLs (see QTBUG-7434 for example). This code changes the
2544  underscore status to Valid. The same is done to mapped codepoints that
2545  map to underscores combined with other Valid codepoints.
2546 
2547  Underscores in domain names are required when using DNS-SD protocol and they
2548  are also allowed by the SMB protocol.
2549 */
2550 static void resolveIdnaStatus()
2551 {
2552  qDebug("resolveIdnaStatus:");
2553 
2555 
2556  for (int codepoint = 0; codepoint <= QChar::LastValidCodePoint; ++codepoint) {
2557  UnicodeData &ud = UnicodeData::valueRef(codepoint);
2558  switch (ud.idnaRawStatus) {
2560  case IdnaRawStatus::Valid:
2563  case IdnaRawStatus::Mapped:
2564  ud.p.idnaStatus = static_cast<IdnaStatus>(ud.idnaRawStatus);
2565  break;
2568  break;
2570  Q_ASSERT(idnaMappingTable.contains(codepoint));
2571  const auto &mapping = idnaMappingTable[codepoint];
2572 
2573  bool allow = true;
2574  for (QStringIterator iter(mapping); iter.hasNext();) {
2576  allow = false;
2577  break;
2578  }
2579  }
2580 
2581  if (allow) {
2582  qDebug() << " Allowing" << Qt::hex << codepoint;
2584  } else {
2586  idnaMappingTable.remove(codepoint);
2587  }
2588  break;
2589  }
2590  }
2591  }
2592 }
2593 
2594 /*
2595  Return maximum overlap for strings left and right in this order.
2596 
2597  The input strings should not be substrings of each other.
2598 */
2599 static qsizetype overlap(const QString &left, const QString &right)
2600 {
2601  for (qsizetype n = std::min(left.size(), right.size()) - 1; n > 0; n--) {
2602  if (left.last(n) == right.first(n))
2603  return n;
2604  }
2605  return 0;
2606 }
2607 
2608 using GraphNode = unsigned int;
2609 
2611 {
2615 };
2616 
2617 /*
2618  Returns a common superstring of all inputs.
2619 
2620  Ideally this function would return the superstring of the smallest
2621  possible size, but the shortest common superstring problem is know to be
2622  NP-hard so an approximation must be used here.
2623 
2624  This function implements the greedy algorithm for building the superstring.
2625 
2626  As an optimization this function is allowed to destroy its inputs.
2627 */
2628 static QString buildSuperstring(QList<QString> &inputs)
2629 {
2630  // Ensure that the inputs don't contain substrings.
2631  // First, sort the array by length to make substring removal easier.
2632  std::sort(inputs.begin(), inputs.end(), [](const QString &a, const QString &b) {
2633  return a.size() == b.size() ? a > b : a.size() > b.size();
2634  });
2635 
2636  // Remove duplicates and other substrings
2637  for (auto i = inputs.begin() + 1; i != inputs.end();) {
2638  bool isSubstring = std::any_of(inputs.begin(), i, [i](const QString &s) {
2639  return s.contains(*i);
2640  });
2641  i = isSubstring ? inputs.erase(i) : i + 1;
2642  }
2643 
2644  // Build overlap graph for the remaining inputs. It is fully-connected.
2645  QList<OverlapGraphEdge> graphEdges;
2646  graphEdges.reserve(inputs.size() * (inputs.size() - 1));
2647 
2648  for (GraphNode i = 0; i < inputs.size(); i++) {
2649  for (GraphNode j = 0; j < inputs.size(); j++) {
2650  if (i != j)
2651  graphEdges.append(OverlapGraphEdge {i, j, overlap(inputs[i], inputs[j])});
2652  }
2653  }
2654 
2655  // Build a Hamiltonian path through the overlap graph, taking nodes with highest overlap
2656  // first.
2657  std::sort(graphEdges.begin(), graphEdges.end(), [](const auto &a, const auto &b) {
2658  return a.overlap == b.overlap
2659  ? a.start == b.start ? a.end < b.end : a.start < b.start
2660  : a.overlap > b.overlap;
2661  });
2662 
2663  QBitArray starts(inputs.size());
2664  QBitArray ends(inputs.size());
2666 
2667  auto createsCycle = [&](const OverlapGraphEdge &edge) {
2668  if (!starts[edge.end] || !ends[edge.start])
2669  return false;
2670  Q_ASSERT(!pathEdges.contains(edge.start)); // Caller checks it's not yet a start.
2671 
2672  GraphNode node = edge.end;
2673  while (pathEdges.contains(node))
2674  node = pathEdges[node].end;
2675 
2676  return node == edge.start;
2677  };
2678 
2679  for (const auto &edge : graphEdges) {
2680  if (!starts[edge.start] && !ends[edge.end] && !createsCycle(edge)) {
2681  starts.setBit(edge.start);
2682  ends.setBit(edge.end);
2683  pathEdges[edge.start] = edge;
2684  if (pathEdges.size() == inputs.size() - 1)
2685  break;
2686  }
2687  }
2688 
2689  Q_ASSERT(ends.count(false) == 1);
2690  Q_ASSERT(starts.count(false) == 1);
2691 
2692  // Find the start node of the path.
2693  GraphNode node = 0;
2694  while (node < ends.size() && ends[node])
2695  node++;
2696  Q_ASSERT(node < ends.size());
2697 
2698  QString superstring = inputs[node];
2699  qsizetype pathNodes = 1; // Count path nodes for sanity check
2700 
2701  while (pathEdges.contains(node)) {
2702  const auto &edge = pathEdges[node];
2703  Q_ASSERT(edge.start == node);
2704 
2705  superstring.append(QStringView { inputs[edge.end] }.sliced(edge.overlap));
2706 
2707  node = edge.end;
2708  pathNodes++;
2709  }
2710  Q_ASSERT(pathNodes == inputs.size());
2711 
2712  return superstring;
2713 }
2714 
2715 /*
2716  Stores IDNA mapping information.
2717 
2718  The mapping table is an array of IdnaMapEntry instances sorted
2719  by codePoint. For mapping resulting in a single QChar, that character
2720  is stored inside the entry in charOrOffset. Otherwise the entry contains
2721  offset inside idnaMappingData array.
2722 
2723  It should be possible to find all mapped strings with size > 1 inside
2724  idnaMappingData, otherwise the construction of this array should be optimized
2725  to take advantage of common substrings and minimize the data size.
2726 */
2727 static QByteArray createIdnaMapping()
2728 {
2729  qDebug("createIdnaMapping:");
2730 
2732  values.reserve(idnaMappingTable.size());
2733  qsizetype uncompressedSize = 0;
2734 
2735  for (const auto &v : idnaMappingTable.values()) {
2736  if (v.size() > 2) {
2737  values.append(v);
2738  uncompressedSize += v.size();
2739  }
2740  }
2741 
2742  QString idnaMappingData = buildSuperstring(values);
2743  qDebug() << " uncompressed size:" << uncompressedSize << "characters";
2744  qDebug() << " consolidated size:" << idnaMappingData.size() << "characters";
2745 
2746  qsizetype memoryUsage = 0;
2747 
2748  QByteArray out =
2749  "static constexpr char16_t idnaMappingData[] = {";
2750 
2751  int col = 0;
2752  for (auto c : idnaMappingData) {
2753  if (col == 0)
2754  out += "\n ";
2755  out += " 0x" + QByteArray::number(c.unicode(), 16) + ",";
2756  col = (col + 1) % 12;
2757  memoryUsage += 2;
2758  }
2759  out += "\n};\n\n";
2760 
2761  // Check if the values fit into IdnaMapEntry below.
2762  Q_ASSERT(idnaMappingData.size() < (1 << 16));
2763 
2764  // This could be written more elegantly with a union and designated initializers,
2765  // but designated initizers is a C++20 feature
2766  out +=
2767  "struct IdnaMapEntry {\n"
2768  " // 21 bits suffice for any valid code-point (LastValidCodePoint = 0x10ffff)\n"
2769  " unsigned codePoint : 24;\n"
2770  " unsigned size : 8;\n"
2771  " char16_t ucs[2]; // ucs[0] is offset if size > 2\n"
2772  "};\n"
2773  "static_assert(sizeof(IdnaMapEntry) == 8);\n\n"
2774  "static constexpr IdnaMapEntry idnaMap[] = {\n";
2775 
2776  for (auto i = idnaMappingTable.keyValueBegin(); i != idnaMappingTable.keyValueEnd(); i++) {
2777  const QString &mapping = i->second;
2778  Q_ASSERT(!mapping.isEmpty());
2779 
2780  qsizetype mappingIndex = idnaMappingData.indexOf(mapping);
2781  Q_ASSERT(mappingIndex >= 0 || mapping.size() <= 2);
2782 
2783  out += " { 0x" + QByteArray::number(i->first, 16) +
2784  ", " + QByteArray::number(mapping.size());
2785  if (mapping.size() <= 2) {
2786  out += ", { 0x" + QByteArray::number(mapping[0].unicode(), 16);
2787  if (mapping.size() == 2)
2788  out += ", 0x" + QByteArray::number(mapping[1].unicode(), 16);
2789  else
2790  out += ", 0";
2791  } else {
2792  out += ", { " + QByteArray::number(mappingIndex);
2793  out += ", 0";
2794  }
2795  out += " } },\n";
2796  memoryUsage += 8;
2797  }
2798 
2799  qDebug() << " memory usage:" << memoryUsage << "bytes";
2800 
2801  out +=
2802  "};\n\n"
2803  "Q_CORE_EXPORT QStringView QT_FASTCALL idnaMapping(char32_t ucs4) noexcept\n"
2804  "{\n"
2805  " auto i = std::lower_bound(std::begin(idnaMap), std::end(idnaMap), ucs4,\n"
2806  " [](const auto &p, char32_t c) { return p.codePoint < c; });\n"
2807  " if (i == std::end(idnaMap) || i->codePoint != ucs4)\n"
2808  " return {};\n\n"
2809  " return QStringView(i->size > 2 ? idnaMappingData + i->ucs[0] : i->ucs, i->size);\n"
2810  "}\n\n";
2811 
2812  return out;
2813 }
2814 
2815 #if 0
2816 static void dump(int from, int to)
2817 {
2818  for (int i = from; i <= to; ++i) {
2820  qDebug("0x%04x: cat=%d combining=%d dir=%d case=%x mirror=%x joining=%d age=%d",
2821  i, d.p.category, d.p.combiningClass, d.p.direction, d.otherCase, d.mirroredChar, d.p.joining, d.p.age);
2822  if (d.decompositionType != QChar::NoDecomposition) {
2823  qDebug(" decomposition: type=%d, length=%d, first=%x", d.decompositionType, d.decomposition.size(),
2824  d.decomposition[0]);
2825  }
2826  }
2827  qDebug(" ");
2828 }
2829 #endif
2830 
2831 static QList<PropertyFlags> uniqueProperties;
2832 
2833 static void computeUniqueProperties()
2834 {
2835  qDebug("computeUniqueProperties:");
2836  for (int codepoint = 0; codepoint <= QChar::LastValidCodePoint; ++codepoint) {
2837  UnicodeData &d = UnicodeData::valueRef(codepoint);
2838  int index = uniqueProperties.indexOf(d.p);
2839  if (index == -1) {
2840  index = uniqueProperties.size();
2841  uniqueProperties.append(d.p);
2842  }
2843  d.propertyIndex = index;
2844  }
2845  qDebug(" %zd unique unicode properties found", ssize_t(uniqueProperties.size()));
2846 }
2847 
2848 struct UniqueBlock {
2849  inline UniqueBlock() : index(-1) {}
2850 
2851  inline bool operator==(const UniqueBlock &other) const
2852  { return values == other.values; }
2853 
2854  int index;
2856 };
2857 
2858 static QByteArray createPropertyInfo()
2859 {
2860  qDebug("createPropertyInfo:");
2861 
2862  // we reserve one bit more than in the assert below for the sign
2863  Q_ASSERT(maxMirroredDiff < (1<<12));
2864  Q_ASSERT(maxLowerCaseDiff < (1<<13));
2865  Q_ASSERT(maxUpperCaseDiff < (1<<13));
2866  Q_ASSERT(maxTitleCaseDiff < (1<<13));
2867  Q_ASSERT(maxCaseFoldDiff < (1<<13));
2868 
2869  const int BMP_BLOCKSIZE = 32;
2870  const int BMP_SHIFT = 5;
2871  const int BMP_END = 0x11000;
2872  const int SMP_END = 0x110000;
2873  const int SMP_BLOCKSIZE = 256;
2874  const int SMP_SHIFT = 8;
2875 
2876  QList<UniqueBlock> uniqueBlocks;
2877  QList<int> blockMap;
2878  int used = 0;
2879 
2880  // Group BMP data into blocks indexed by their 12 most significant bits
2881  // (blockId = ucs >> 5):
2882  for (int block = 0; block < BMP_END/BMP_BLOCKSIZE; ++block) {
2883  UniqueBlock b;
2884  b.values.reserve(BMP_BLOCKSIZE);
2885  for (int i = 0; i < BMP_BLOCKSIZE; ++i) {
2886  int uc = block*BMP_BLOCKSIZE + i;
2888  b.values.append(d.propertyIndex);
2889  }
2890  int index = uniqueBlocks.indexOf(b);
2891  if (index == -1) {
2892  index = uniqueBlocks.size();
2893  b.index = used;
2894  used += BMP_BLOCKSIZE;
2895  uniqueBlocks.append(b);
2896  }
2897  blockMap.append(uniqueBlocks.at(index).index);
2898  }
2899  int bmp_blocks = uniqueBlocks.size();
2900 
2901  // Group SMP data into blocks indexed by their 9 most significant bits, plus
2902  // an offset to put them after the BMP blocks (blockId = (ucs >> 8) + 0x880):
2903  for (int block = BMP_END/SMP_BLOCKSIZE; block < SMP_END/SMP_BLOCKSIZE; ++block) {
2904  UniqueBlock b;
2905  b.values.reserve(SMP_BLOCKSIZE);
2906  for (int i = 0; i < SMP_BLOCKSIZE; ++i) {
2907  int uc = block*SMP_BLOCKSIZE + i;
2909  b.values.append(d.propertyIndex);
2910  }
2911  int index = uniqueBlocks.indexOf(b);
2912  if (index == -1) {
2913  index = uniqueBlocks.size();
2914  b.index = used;
2915  used += SMP_BLOCKSIZE;
2916  uniqueBlocks.append(b);
2917  }
2918  blockMap.append(uniqueBlocks.at(index).index);
2919  }
2920  int smp_blocks = uniqueBlocks.size() - bmp_blocks;
2921 
2922  int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*sizeof(unsigned short);
2923  int bmp_trie = BMP_END/BMP_BLOCKSIZE*sizeof(unsigned short);
2924  int bmp_mem = bmp_block_data + bmp_trie;
2925  qDebug(" %d unique blocks in BMP.", bmp_blocks);
2926  qDebug(" block data uses: %d bytes", bmp_block_data);
2927  qDebug(" trie data uses : %d bytes", bmp_trie);
2928 
2929  int smp_block_data = smp_blocks*SMP_BLOCKSIZE*sizeof(unsigned short);
2930  int smp_trie = (SMP_END-BMP_END)/SMP_BLOCKSIZE*sizeof(unsigned short);
2931  int smp_mem = smp_block_data + smp_trie;
2932  qDebug(" %d unique blocks in SMP.", smp_blocks);
2933  qDebug(" block data uses: %d bytes", smp_block_data);
2934  qDebug(" trie data uses : %d bytes", smp_trie);
2935 
2936  int prop_data = uniqueProperties.size() * SizeOfPropertiesStruct;
2937  qDebug("\n properties data uses : %d bytes", prop_data);
2938  qDebug(" memory usage: %d bytes", bmp_mem + smp_mem + prop_data);
2939 
2940  Q_ASSERT(blockMap.size() == BMP_END/BMP_BLOCKSIZE +(SMP_END-BMP_END)/SMP_BLOCKSIZE); // 0x1870
2941  Q_ASSERT(blockMap.last() + blockMap.size() < (1<<(sizeof(unsigned short)*8)));
2942 
2943  QByteArray out = "static constexpr unsigned short uc_property_trie[] = {\n";
2944  // First write the map from blockId to indices of unique blocks:
2945  out += " // [0x0..0x" + QByteArray::number(BMP_END, 16) + ")";
2946  for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) {
2947  if (!(i % 8)) {
2948  if (out.endsWith(' '))
2949  out.chop(1);
2950  if (!((i*BMP_BLOCKSIZE) % 0x1000))
2951  out += "\n";
2952  out += "\n ";
2953  }
2954  out += QByteArray::number(blockMap.at(i) + blockMap.size());
2955  out += ", ";
2956  }
2957  if (out.endsWith(' '))
2958  out.chop(1);
2959  out += "\n\n // [0x" + QByteArray::number(BMP_END, 16) + "..0x" + QByteArray::number(SMP_END, 16) + ")\n";
2960  for (int i = BMP_END/BMP_BLOCKSIZE; i < blockMap.size(); ++i) {
2961  if (!(i % 8)) {
2962  if (out.endsWith(' '))
2963  out.chop(1);
2964  if (!(i % (0x10000/SMP_BLOCKSIZE)))
2965  out += "\n";
2966  out += "\n ";
2967  }
2968  out += QByteArray::number(blockMap.at(i) + blockMap.size());
2969  out += ", ";
2970  }
2971  if (out.endsWith(' '))
2972  out.chop(1);
2973  out += "\n";
2974  // Then write the contents of the unique blocks, at the anticipated indices.
2975  // Each unique block is a list of UnicodeData::propertyIndex values, whch
2976  // are indices into the uc_properties table.
2977  for (int i = 0; i < uniqueBlocks.size(); ++i) {
2978  if (out.endsWith(' '))
2979  out.chop(1);
2980  out += "\n";
2981  const UniqueBlock &b = uniqueBlocks.at(i);
2982  for (int j = 0; j < b.values.size(); ++j) {
2983  if (!(j % 8)) {
2984  if (out.endsWith(' '))
2985  out.chop(1);
2986  out += "\n ";
2987  }
2988  out += QByteArray::number(b.values.at(j));
2989  out += ", ";
2990  }
2991  }
2992  if (out.endsWith(", "))
2993  out.chop(2);
2994  out += "\n};\n\n";
2995 
2996  out += "static constexpr Properties uc_properties[] = {";
2997  // keep in sync with the property declaration
2998  for (int i = 0; i < uniqueProperties.size(); ++i) {
2999  const PropertyFlags &p = uniqueProperties.at(i);
3000  out += "\n { ";
3001 // " ushort category : 8; /* 5 used */\n"
3002  out += QByteArray::number( p.category );
3003  out += ", ";
3004 // " ushort direction : 8; /* 5 used */\n"
3005  out += QByteArray::number( p.direction );
3006  out += ", ";
3007 // " ushort combiningClass : 8;\n"
3008  out += QByteArray::number( p.combiningClass );
3009  out += ", ";
3010 // " ushort joining : 3;\n"
3011  out += QByteArray::number( p.joining );
3012  out += ", ";
3013 // " signed short digitValue : 5;\n"
3014  out += QByteArray::number( p.digitValue );
3015  out += ", ";
3016 // " signed short mirrorDiff : 16;\n"
3017  out += QByteArray::number( p.mirrorDiff );
3018  out += ", ";
3019 // " ushort unicodeVersion : 5; /* 5 used */\n"
3020  out += QByteArray::number( p.age );
3021  out += ", ";
3022 // " ushort eastAsianWidth : 3;" /* 3 used */\n"
3023  out += QByteArray::number( static_cast<unsigned int>(p.eastAsianWidth) );
3024  out += ", ";
3025 // " ushort nfQuickCheck : 8;\n"
3026  out += QByteArray::number( p.nfQuickCheck );
3027  out += ", ";
3028 // " struct {\n"
3029 // " ushort special : 1;\n"
3030 // " signed short diff : 15;\n"
3031 // " } cases[NumCases];\n"
3032  out += " { {";
3033  out += QByteArray::number( p.lowerCaseSpecial );
3034  out += ", ";
3035  out += QByteArray::number( p.lowerCaseDiff );
3036  out += "}, {";
3037  out += QByteArray::number( p.upperCaseSpecial );
3038  out += ", ";
3039  out += QByteArray::number( p.upperCaseDiff );
3040  out += "}, {";
3041  out += QByteArray::number( p.titleCaseSpecial );
3042  out += ", ";
3043  out += QByteArray::number( p.titleCaseDiff );
3044  out += "}, {";
3045  out += QByteArray::number( p.caseFoldSpecial );
3046  out += ", ";
3047  out += QByteArray::number( p.caseFoldDiff );
3048  out += "} }, ";
3049 // " ushort graphemeBreakClass : 5; /* 5 used */\n"
3050 // " ushort wordBreakClass : 5; /* 5 used */\n"
3051 // " ushort lineBreakClass : 6; /* 6 used */\n"
3052  out += QByteArray::number( p.graphemeBreakClass );
3053  out += ", ";
3054  out += QByteArray::number( p.wordBreakClass );
3055  out += ", ";
3056  out += QByteArray::number( p.lineBreakClass );
3057  out += ", ";
3058 // " ushort sentenceBreakClass : 4; /* 4 used */\n"
3059  out += QByteArray::number( p.sentenceBreakClass );
3060  out += ", ";
3061 // " ushort idnaStatus : 4; /* 3 used */\n"
3062  out += QByteArray::number( static_cast<unsigned int>(p.idnaStatus) );
3063  out += ", ";
3064 // " ushort script : 8;\n"
3065  out += QByteArray::number( p.script );
3066  out += " },";
3067  }
3068  if (out.endsWith(','))
3069  out.chop(1);
3070  out += "\n};\n\n";
3071 
3072  out += "Q_DECL_CONST_FUNCTION static inline const Properties *qGetProp(char32_t ucs4) noexcept\n"
3073  "{\n"
3074  " Q_ASSERT(ucs4 <= QChar::LastValidCodePoint);\n"
3075  " if (ucs4 < 0x" + QByteArray::number(BMP_END, 16) + ")\n"
3076  " return uc_properties + uc_property_trie[uc_property_trie[ucs4 >> "
3077  + QByteArray::number(BMP_SHIFT) + "] + (ucs4 & 0x"
3078  + QByteArray::number(BMP_BLOCKSIZE - 1, 16)+ ")];\n"
3079  "\n"
3080  " return uc_properties\n"
3081  " + uc_property_trie[uc_property_trie[((ucs4 - 0x"
3082  + QByteArray::number(BMP_END, 16) + ") >> "
3083  + QByteArray::number(SMP_SHIFT) + ") + 0x"
3084  + QByteArray::number(BMP_END / BMP_BLOCKSIZE, 16) + "] + (ucs4 & 0x"
3085  + QByteArray::number(SMP_BLOCKSIZE - 1, 16) + ")];\n"
3086  "}\n"
3087  "\n"
3088  "Q_DECL_CONST_FUNCTION static inline const Properties *qGetProp(char16_t ucs2) noexcept\n"
3089  "{\n"
3090  " return uc_properties + uc_property_trie[uc_property_trie[ucs2 >> "
3091  + QByteArray::number(BMP_SHIFT) + "] + (ucs2 & 0x"
3092  + QByteArray::number(BMP_BLOCKSIZE - 1, 16) + ")];\n"
3093  "}\n"
3094  "\n"
3095  "Q_DECL_CONST_FUNCTION Q_CORE_EXPORT const Properties * QT_FASTCALL properties(char32_t ucs4) noexcept\n"
3096  "{\n"
3097  " return qGetProp(ucs4);\n"
3098  "}\n"
3099  "\n"
3100  "Q_DECL_CONST_FUNCTION Q_CORE_EXPORT const Properties * QT_FASTCALL properties(char16_t ucs2) noexcept\n"
3101  "{\n"
3102  " return qGetProp(ucs2);\n"
3103  "}\n\n";
3104 
3105  out += "Q_CORE_EXPORT GraphemeBreakClass QT_FASTCALL graphemeBreakClass(char32_t ucs4) noexcept\n"
3106  "{\n"
3107  " return static_cast<GraphemeBreakClass>(qGetProp(ucs4)->graphemeBreakClass);\n"
3108  "}\n"
3109  "\n"
3110  "Q_CORE_EXPORT WordBreakClass QT_FASTCALL wordBreakClass(char32_t ucs4) noexcept\n"
3111  "{\n"
3112  " return static_cast<WordBreakClass>(qGetProp(ucs4)->wordBreakClass);\n"
3113  "}\n"
3114  "\n"
3115  "Q_CORE_EXPORT SentenceBreakClass QT_FASTCALL sentenceBreakClass(char32_t ucs4) noexcept\n"
3116  "{\n"
3117  " return static_cast<SentenceBreakClass>(qGetProp(ucs4)->sentenceBreakClass);\n"
3118  "}\n"
3119  "\n"
3120  "Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(char32_t ucs4) noexcept\n"
3121  "{\n"
3122  " return static_cast<LineBreakClass>(qGetProp(ucs4)->lineBreakClass);\n"
3123  "}\n"
3124  "\n"
3125  "Q_CORE_EXPORT IdnaStatus QT_FASTCALL idnaStatus(char32_t ucs4) noexcept\n"
3126  "{\n"
3127  " return static_cast<IdnaStatus>(qGetProp(ucs4)->idnaStatus);\n"
3128  "}\n"
3129  "\n"
3130  "Q_CORE_EXPORT EastAsianWidth QT_FASTCALL eastAsianWidth(char32_t ucs4) noexcept\n"
3131  "{\n"
3132  " return static_cast<EastAsianWidth>(qGetProp(ucs4)->eastAsianWidth);\n"
3133  "}\n"
3134  "\n";
3135 
3136  return out;
3137 }
3138 
3139 static QByteArray createSpecialCaseMap()
3140 {
3141  qDebug("createSpecialCaseMap:");
3142 
3143  QByteArray out
3144  = "static constexpr unsigned short specialCaseMap[] = {\n"
3145  " 0x0, // placeholder";
3146 
3147  int i = 1;
3148  int maxN = 0;
3149  while (i < specialCaseMap.size()) {
3150  out += "\n ";
3151  int n = specialCaseMap.at(i);
3152  for (int j = 0; j <= n; ++j) {
3153  out += QByteArray(" 0x") + QByteArray::number(specialCaseMap.at(i+j), 16);
3154  out += ",";
3155  }
3156  i += n + 1;
3157  maxN = std::max(maxN, n);
3158  }
3159  out.chop(1);
3160  out += "\n};\n\nconstexpr unsigned int MaxSpecialCaseLength = ";
3161  out += QByteArray::number(maxN);
3162  out += ";\n\n";
3163 
3164  qDebug(" memory usage: %zd bytes", ssize_t(specialCaseMap.size() * sizeof(unsigned short)));
3165 
3166  return out;
3167 }
3168 
3169 
3170 static QByteArray createCompositionInfo()
3171 {
3172  qDebug("createCompositionInfo: highestComposedCharacter=0x%x", highestComposedCharacter);
3173 
3174  const int BMP_BLOCKSIZE = 16;
3175  const int BMP_SHIFT = 4;
3176  const int BMP_END = 0x3400; // start of Han
3177  const int SMP_END = 0x30000;
3178  const int SMP_BLOCKSIZE = 256;
3179  const int SMP_SHIFT = 8;
3180 
3181  if (SMP_END <= highestComposedCharacter)
3182  qFatal("end of table smaller than highest composed character 0x%x", highestComposedCharacter);
3183 
3184  QList<unsigned short> decompositions;
3185  int tableIndex = 0;
3186 
3187  QList<UniqueBlock> uniqueBlocks;
3188  QList<int> blockMap;
3189  int used = 0;
3190 
3191  for (int block = 0; block < BMP_END/BMP_BLOCKSIZE; ++block) {
3192  UniqueBlock b;
3193  b.values.reserve(BMP_BLOCKSIZE);
3194  for (int i = 0; i < BMP_BLOCKSIZE; ++i) {
3195  int uc = block*BMP_BLOCKSIZE + i;
3197  if (!d.decomposition.isEmpty()) {
3198  int utf16Length = 0;
3199  decompositions.append(0);
3200  for (int j = 0; j < d.decomposition.size(); ++j) {
3201  int code = d.decomposition.at(j);
3203  // save as surrogate pair
3204  decompositions.append(QChar::highSurrogate(code));
3205  decompositions.append(QChar::lowSurrogate(code));
3206  utf16Length += 2;
3207  } else {
3208  decompositions.append(code);
3209  utf16Length++;
3210  }
3211  }
3212  decompositions[tableIndex] = d.decompositionType + (utf16Length<<8);
3213  b.values.append(tableIndex);
3214  tableIndex += utf16Length + 1;
3215  } else {
3216  b.values.append(0xffff);
3217  }
3218  }
3219  int index = uniqueBlocks.indexOf(b);
3220  if (index == -1) {
3221  index = uniqueBlocks.size();
3222  b.index = used;
3223  used += BMP_BLOCKSIZE;
3224  uniqueBlocks.append(b);
3225  }
3226  blockMap.append(uniqueBlocks.at(index).index);
3227  }
3228  int bmp_blocks = uniqueBlocks.size();
3229 
3230  for (int block = BMP_END/SMP_BLOCKSIZE; block < SMP_END/SMP_BLOCKSIZE; ++block) {
3231  UniqueBlock b;
3232  b.values.reserve(SMP_BLOCKSIZE);
3233  for (int i = 0; i < SMP_BLOCKSIZE; ++i) {
3234  int uc = block*SMP_BLOCKSIZE + i;
3236  if (!d.decomposition.isEmpty()) {
3237  int utf16Length = 0;
3238  decompositions.append(0);
3239  for (int j = 0; j < d.decomposition.size(); ++j) {
3240  int code = d.decomposition.at(j);
3242  // save as surrogate pair
3243  decompositions.append(QChar::highSurrogate(code));
3244  decompositions.append(QChar::lowSurrogate(code));
3245  utf16Length += 2;
3246  } else {
3247  decompositions.append(code);
3248  utf16Length++;
3249  }
3250  }
3251  decompositions[tableIndex] = d.decompositionType + (utf16Length<<8);
3252  b.values.append(tableIndex);
3253  tableIndex += utf16Length + 1;
3254  } else {
3255  b.values.append(0xffff);
3256  }
3257  }
3258  int index = uniqueBlocks.indexOf(b);
3259  if (index == -1) {
3260  index = uniqueBlocks.size();
3261  b.index = used;
3262  used += SMP_BLOCKSIZE;
3263  uniqueBlocks.append(b);
3264  }
3265  blockMap.append(uniqueBlocks.at(index).index);
3266  }
3267  int smp_blocks = uniqueBlocks.size() - bmp_blocks;
3268 
3269  // if the condition below doesn't hold anymore we need to modify our decomposition code
3270  Q_ASSERT(tableIndex < 0xffff);
3271 
3272  int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*sizeof(unsigned short);
3273  int bmp_trie = BMP_END/BMP_BLOCKSIZE*sizeof(unsigned short);
3274  int bmp_mem = bmp_block_data + bmp_trie;
3275  qDebug(" %d unique blocks in BMP.", bmp_blocks);
3276  qDebug(" block data uses: %d bytes", bmp_block_data);
3277  qDebug(" trie data uses : %d bytes", bmp_trie);
3278 
3279  int smp_block_data = smp_blocks*SMP_BLOCKSIZE*sizeof(unsigned short);
3280  int smp_trie = (SMP_END-BMP_END)/SMP_BLOCKSIZE*sizeof(unsigned short);
3281  int smp_mem = smp_block_data + smp_trie;
3282  qDebug(" %d unique blocks in SMP.", smp_blocks);
3283  qDebug(" block data uses: %d bytes", smp_block_data);
3284  qDebug(" trie data uses : %d bytes", smp_trie);
3285 
3286  int decomposition_data = decompositions.size() * 2;
3287  qDebug("\n decomposition data uses : %d bytes", decomposition_data);
3288  qDebug(" memory usage: %d bytes", bmp_mem + smp_mem + decomposition_data);
3289 
3290  Q_ASSERT(blockMap.last() + blockMap.size() < (1<<(sizeof(unsigned short)*8)));
3291 
3292  QByteArray out = "static constexpr unsigned short uc_decomposition_trie[] = {\n";
3293  // first write the map
3294  out += " // 0 - 0x" + QByteArray::number(BMP_END, 16);
3295  for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) {
3296  if (!(i % 8)) {
3297  if (out.endsWith(' '))
3298  out.chop(1);
3299  if (!((i*BMP_BLOCKSIZE) % 0x1000))
3300  out += "\n";
3301  out += "\n ";
3302  }
3303  out += QByteArray::number(blockMap.at(i) + blockMap.size());
3304  out += ", ";
3305  }
3306  if (out.endsWith(' '))
3307  out.chop(1);
3308  out += "\n\n // 0x" + QByteArray::number(BMP_END, 16) + " - 0x" + QByteArray::number(SMP_END, 16) + "\n";
3309  for (int i = BMP_END/BMP_BLOCKSIZE; i < blockMap.size(); ++i) {
3310  if (!(i % 8)) {
3311  if (out.endsWith(' '))
3312  out.chop(1);
3313  if (!(i % (0x10000/SMP_BLOCKSIZE)))
3314  out += "\n";
3315  out += "\n ";
3316  }
3317  out += QByteArray::number(blockMap.at(i) + blockMap.size());
3318  out += ", ";
3319  }
3320  if (out.endsWith(' '))
3321  out.chop(1);
3322  out += "\n";
3323  // write the data
3324  for (int i = 0; i < uniqueBlocks.size(); ++i) {
3325  if (out.endsWith(' '))
3326  out.chop(1);
3327  out += "\n";
3328  const UniqueBlock &b = uniqueBlocks.at(i);
3329  for (int j = 0; j < b.values.size(); ++j) {
3330  if (!(j % 8)) {
3331  if (out.endsWith(' '))
3332  out.chop(1);
3333  out += "\n ";
3334  }
3335  out += "0x" + QByteArray::number(b.values.at(j), 16);
3336  out += ", ";
3337  }
3338  }
3339  if (out.endsWith(' '))
3340  out.chop(2);
3341  out += "\n};\n\n";
3342 
3343  out += "#define GET_DECOMPOSITION_INDEX(ucs4) \\\n"
3344  " (ucs4 < 0x" + QByteArray::number(BMP_END, 16) + " \\\n"
3345  " ? (uc_decomposition_trie[uc_decomposition_trie[ucs4 >> "
3346  + QByteArray::number(BMP_SHIFT) + "] + (ucs4 & 0x"
3347  + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")]) \\\n"
3348  " : ucs4 < 0x" + QByteArray::number(SMP_END, 16) + " \\\n"
3349  " ? uc_decomposition_trie[uc_decomposition_trie[((ucs4 - 0x"
3350  + QByteArray::number(BMP_END, 16) + ") >> "
3351  + QByteArray::number(SMP_SHIFT) + ") + 0x"
3352  + QByteArray::number(BMP_END/BMP_BLOCKSIZE, 16) + "] + (ucs4 & 0x"
3353  + QByteArray::number(SMP_BLOCKSIZE-1, 16) + ")] \\\n"
3354  " : 0xffff)\n\n";
3355 
3356  out += "static constexpr unsigned short uc_decomposition_map[] = {";
3357  for (int i = 0; i < decompositions.size(); ++i) {
3358  if (!(i % 8)) {
3359  if (out.endsWith(' '))
3360  out.chop(1);
3361  out += "\n ";
3362  }
3363  out += "0x" + QByteArray::number(decompositions.at(i), 16);
3364  out += ", ";
3365  }
3366  if (out.endsWith(' '))
3367  out.chop(2);
3368  out += "\n};\n\n";
3369 
3370  return out;
3371 }
3372 
3373 static QByteArray createLigatureInfo()
3374 {
3375  qDebug("createLigatureInfo: numLigatures=%d, highestLigature=0x%x", numLigatures, highestLigature);
3376 
3377  for (int i = 0; i < ligatureHashes.size(); ++i) {
3378  const QList<Ligature> &l = ligatureHashes.value(i);
3379  for (int j = 0; j < l.size(); ++j) {
3380  // if the condition below doesn't hold anymore we need to modify our ligatureHelper code
3381  Q_ASSERT(QChar::requiresSurrogates(l.at(j).u2) == QChar::requiresSurrogates(l.at(j).ligature) &&
3383  }
3384  }
3385 
3386  const int BMP_BLOCKSIZE = 32;
3387  const int BMP_SHIFT = 5;
3388  const int BMP_END = 0x3100;
3389  const int SMP_END = 0x12000;
3390  const int SMP_BLOCKSIZE = 256;
3391  const int SMP_SHIFT = 8;
3392 
3393  if (SMP_END <= highestLigature)
3394  qFatal("end of table smaller than highest ligature character 0x%x", highestLigature);
3395 
3396  QList<unsigned short> ligatures;
3397  int tableIndex = 0;
3398 
3399  QList<UniqueBlock> uniqueBlocks;
3400  QList<int> blockMap;
3401  int used = 0;
3402 
3403  for (int block = 0; block < BMP_END/BMP_BLOCKSIZE; ++block) {
3404  UniqueBlock b;
3405  b.values.reserve(BMP_BLOCKSIZE);
3406  for (int i = 0; i < BMP_BLOCKSIZE; ++i) {
3407  int uc = block*BMP_BLOCKSIZE + i;
3408  QList<Ligature> l = ligatureHashes.value(uc);
3409  if (!l.isEmpty()) {
3411  std::sort(l.begin(), l.end()); // needed for bsearch in ligatureHelper code
3412 
3413  ligatures.append(l.size());
3414  for (int j = 0; j < l.size(); ++j) {
3415  ligatures.append(l.at(j).u1);
3416  ligatures.append(l.at(j).ligature);
3417  }
3418  b.values.append(tableIndex);
3419  tableIndex += 2*l.size() + 1;
3420  } else {
3421  b.values.append(0xffff);
3422  }
3423  }
3424  int index = uniqueBlocks.indexOf(b);
3425  if (index == -1) {
3426  index = uniqueBlocks.size();
3427  b.index = used;
3428  used += BMP_BLOCKSIZE;
3429  uniqueBlocks.append(b);
3430  }
3431  blockMap.append(uniqueBlocks.at(index).index);
3432  }
3433  int bmp_blocks = uniqueBlocks.size();
3434 
3435  for (int block = BMP_END/SMP_BLOCKSIZE; block < SMP_END/SMP_BLOCKSIZE; ++block) {
3436  UniqueBlock b;
3437  b.values.reserve(SMP_BLOCKSIZE);
3438  for (int i = 0; i < SMP_BLOCKSIZE; ++i) {
3439  int uc = block*SMP_BLOCKSIZE + i;
3440  QList<Ligature> l = ligatureHashes.value(uc);
3441  if (!l.isEmpty()) {
3443  std::sort(l.begin(), l.end()); // needed for bsearch in ligatureHelper code
3444 
3445  ligatures.append(l.size());
3446  for (int j = 0; j < l.size(); ++j) {
3447  ligatures.append(QChar::highSurrogate(l.at(j).u1));
3448  ligatures.append(QChar::lowSurrogate(l.at(j).u1));
3449  ligatures.append(QChar::highSurrogate(l.at(j).ligature));
3450  ligatures.append(QChar::lowSurrogate(l.at(j).ligature));
3451  }
3452  b.values.append(tableIndex);
3453  tableIndex += 4*l.size() + 1;
3454  } else {
3455  b.values.append(0xffff);
3456  }
3457  }
3458  int index = uniqueBlocks.indexOf(b);
3459  if (index == -1) {
3460  index = uniqueBlocks.size();
3461  b.index = used;
3462  used += SMP_BLOCKSIZE;
3463  uniqueBlocks.append(b);
3464  }
3465  blockMap.append(uniqueBlocks.at(index).index);
3466  }
3467  int smp_blocks = uniqueBlocks.size() - bmp_blocks;
3468 
3469  // if the condition below doesn't hold anymore we need to modify our composition code
3470  Q_ASSERT(tableIndex < 0xffff);
3471 
3472  int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*sizeof(unsigned short);
3473  int bmp_trie = BMP_END/BMP_BLOCKSIZE*sizeof(unsigned short);
3474  int bmp_mem = bmp_block_data + bmp_trie;
3475  qDebug(" %d unique blocks in BMP.", bmp_blocks);
3476  qDebug(" block data uses: %d bytes", bmp_block_data);
3477  qDebug(" trie data uses : %d bytes", bmp_trie);
3478 
3479  int smp_block_data = smp_blocks*SMP_BLOCKSIZE*sizeof(unsigned short);
3480  int smp_trie = (SMP_END-BMP_END)/SMP_BLOCKSIZE*sizeof(unsigned short);
3481  int smp_mem = smp_block_data + smp_trie;
3482  qDebug(" %d unique blocks in SMP.", smp_blocks);
3483  qDebug(" block data uses: %d bytes", smp_block_data);
3484  qDebug(" trie data uses : %d bytes", smp_trie);
3485 
3486  int ligature_data = ligatures.size() * 2;
3487  qDebug("\n ligature data uses : %d bytes", ligature_data);
3488  qDebug(" memory usage: %d bytes", bmp_mem + smp_mem + ligature_data);
3489 
3490  Q_ASSERT(blockMap.last() + blockMap.size() < (1<<(sizeof(unsigned short)*8)));
3491 
3492  QByteArray out = "static constexpr unsigned short uc_ligature_trie[] = {\n";
3493  // first write the map
3494  out += " // 0 - 0x" + QByteArray::number(BMP_END, 16);
3495  for (int i = 0; i < BMP_END/BMP_BLOCKSIZE; ++i) {
3496  if (!(i % 8)) {
3497  if (out.endsWith(' '))
3498  out.chop(1);
3499  if (!((i*BMP_BLOCKSIZE) % 0x1000))
3500  out += "\n";
3501  out += "\n ";
3502  }
3503  out += QByteArray::number(blockMap.at(i) + blockMap.size());
3504  out += ", ";
3505  }
3506  if (out.endsWith(' '))
3507  out.chop(1);
3508  out += "\n\n // 0x" + QByteArray::number(BMP_END, 16) + " - 0x" + QByteArray::number(SMP_END, 16) + "\n";
3509  for (int i = BMP_END/BMP_BLOCKSIZE; i < blockMap.size(); ++i) {
3510  if (!(i % 8)) {
3511  if (out.endsWith(' '))
3512  out.chop(1);
3513  if (!(i % (0x10000/SMP_BLOCKSIZE)))
3514  out += "\n";
3515  out += "\n ";
3516  }
3517  out += QByteArray::number(blockMap.at(i) + blockMap.size());
3518  out += ", ";
3519  }
3520  if (out.endsWith(' '))
3521  out.chop(1);
3522  out += "\n";
3523  // write the data
3524  for (int i = 0; i < uniqueBlocks.size(); ++i) {
3525  if (out.endsWith(' '))
3526  out.chop(1);
3527  out += "\n";
3528  const UniqueBlock &b = uniqueBlocks.at(i);
3529  for (int j = 0; j < b.values.size(); ++j) {
3530  if (!(j % 8)) {
3531  if (out.endsWith(' '))
3532  out.chop(1);
3533  out += "\n ";
3534  }
3535  out += "0x" + QByteArray::number(b.values.at(j), 16);
3536  out += ", ";
3537  }
3538  }
3539  if (out.endsWith(' '))
3540  out.chop(2);
3541  out += "\n};\n\n";
3542 
3543  out += "#define GET_LIGATURE_INDEX(ucs4) \\\n"
3544  " (ucs4 < 0x" + QByteArray::number(BMP_END, 16) + " \\\n"
3545  " ? (uc_ligature_trie[uc_ligature_trie[ucs4 >> "
3546  + QByteArray::number(BMP_SHIFT) + "] + (ucs4 & 0x"
3547  + QByteArray::number(BMP_BLOCKSIZE-1, 16)+ ")]) \\\n"
3548  " : ucs4 < 0x" + QByteArray::number(SMP_END, 16) + " \\\n"
3549  " ? uc_ligature_trie[uc_ligature_trie[((ucs4 - 0x"
3550  + QByteArray::number(BMP_END, 16) + ") >> "
3551  + QByteArray::number(SMP_SHIFT) + ") + 0x"
3552  + QByteArray::number(BMP_END/BMP_BLOCKSIZE, 16) + "]" " + (ucs4 & 0x"
3553  + QByteArray::number(SMP_BLOCKSIZE-1, 16) + ")] \\\n"
3554  " : 0xffff)\n\n";
3555 
3556  out += "static constexpr unsigned short uc_ligature_map[] = {";
3557  for (int i = 0; i < ligatures.size(); ++i) {
3558  if (!(i % 8)) {
3559  if (out.endsWith(' '))
3560  out.chop(1);
3561  out += "\n ";
3562  }
3563  out += "0x" + QByteArray::number(ligatures.at(i), 16);
3564  out += ", ";
3565  }
3566  if (out.endsWith(' '))
3567  out.chop(2);
3568  out += "\n};\n";
3569 
3570  return out;
3571 }
3572 
3574 {
3575  QByteArray out
3576  = "struct CasingInfo {\n"
3577  " uint codePoint : 16;\n"
3578  " uint flags : 8;\n"
3579  " uint offset : 8;\n"
3580  "};\n\n";
3581 
3582  return out;
3583 }
3584 
3585 
3586 int main(int, char **)
3587 {
3588  initAgeMap();
3589  initEastAsianWidthMap();
3590  initCategoryMap();
3591  initDecompositionMap();
3592  initDirectionMap();
3593  initJoiningMap();
3594  initGraphemeBreak();
3595  initWordBreak();
3596  initSentenceBreak();
3597  initLineBreak();
3598  initScriptMap();
3599  initIdnaStatusMap();
3600 
3601  readUnicodeData();
3602  readBidiMirroring();
3603  readArabicShaping();
3604  readDerivedAge();
3605  readEastAsianWidth();
3606  readDerivedNormalizationProps();
3607  readSpecialCasing();
3608  readCaseFolding();
3609  // readBlocks();
3610  readScripts();
3611  readGraphemeBreak();
3612  readEmojiData();
3613  readWordBreak();
3614  readSentenceBreak();
3615  readLineBreak();
3616  readIdnaMappingTable();
3617 
3618  resolveIdnaStatus();
3619 
3620  computeUniqueProperties();
3621  QByteArray properties = createPropertyInfo();
3622  QByteArray specialCases = createSpecialCaseMap();
3623  QByteArray compositions = createCompositionInfo();
3624  QByteArray ligatures = createLigatureInfo();
3625  QByteArray normalizationCorrections = createNormalizationCorrections();
3626  QByteArray idnaMapping = createIdnaMapping();
3627 
3628  QByteArray header =
3629  "/****************************************************************************\n"
3630  "**\n"
3631  "** Copyright (C) 2020 The Qt Company Ltd.\n"
3632  "** Contact: https://www.qt.io/licensing/\n"
3633  "**\n"
3634  "** This file is part of the QtCore module of the Qt Toolkit.\n"
3635  "**\n"
3636  "** $QT_BEGIN_LICENSE:LGPL$\n"
3637  "** Commercial License Usage\n"
3638  "** Licensees holding valid commercial Qt licenses may use this file in\n"
3639  "** accordance with the commercial license agreement provided with the\n"
3640  "** Software or, alternatively, in accordance with the terms contained in\n"
3641  "** a written agreement between you and The Qt Company. For licensing terms\n"
3642  "** and conditions see https://www.qt.io/terms-conditions. For further\n"
3643  "** information use the contact form at https://www.qt.io/contact-us.\n"
3644  "**\n"
3645  "** GNU Lesser General Public License Usage\n"
3646  "** Alternatively, this file may be used under the terms of the GNU Lesser\n"
3647  "** General Public License version 3 as published by the Free Software\n"
3648  "** Foundation and appearing in the file LICENSE.LGPL3 included in the\n"
3649  "** packaging of this file. Please review the following information to\n"
3650  "** ensure the GNU Lesser General Public License version 3 requirements\n"
3651  "** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.\n"
3652  "**\n"
3653  "** GNU General Public License Usage\n"
3654  "** Alternatively, this file may be used under the terms of the GNU\n"
3655  "** General Public License version 2.0 or (at your option) the GNU General\n"
3656  "** Public license version 3 or any later version approved by the KDE Free\n"
3657  "** Qt Foundation. The licenses are as published by the Free Software\n"
3658  "** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3\n"
3659  "** included in the packaging of this file. Please review the following\n"
3660  "** information to ensure the GNU General Public License requirements will\n"
3661  "** be met: https://www.gnu.org/licenses/gpl-2.0.html and\n"
3662  "** https://www.gnu.org/licenses/gpl-3.0.html.\n"
3663  "**\n"
3664  "** $QT_END_LICENSE$\n"
3665  "**\n"
3666  "****************************************************************************/\n\n";
3667 
3668  QByteArray note =
3669  "/* This file is autogenerated from the Unicode " DATA_VERSION_S " database. Do not edit */\n\n";
3670 
3671  QByteArray warning =
3672  "//\n"
3673  "// W A R N I N G\n"
3674  "// -------------\n"
3675  "//\n"
3676  "// This file is not part of the Qt API. It exists for the convenience\n"
3677  "// of internal files. This header file may change from version to version\n"
3678  "// without notice, or even be removed.\n"
3679  "//\n"
3680  "// We mean it.\n"
3681  "//\n\n";
3682 
3683  QFile f("../../src/corelib/text/qunicodetables.cpp");
3685  f.write(header);
3686  f.write(note);
3687  f.write("#include \"qunicodetables_p.h\"\n\n");
3688  f.write("QT_BEGIN_NAMESPACE\n\n");
3689  f.write("namespace QUnicodeTables {\n\n");
3690  f.write(properties);
3691  f.write(specialCases);
3692  f.write(compositions);
3693  f.write(ligatures);
3694  f.write("\n");
3695  f.write(normalizationCorrections);
3696  f.write(idnaMapping);
3697  f.write("} // namespace QUnicodeTables\n\n");
3698  f.write("using namespace QUnicodeTables;\n\n");
3699  f.write("QT_END_NAMESPACE\n");
3700  f.close();
3701 
3702  f.setFileName("../../src/corelib/text/qunicodetables_p.h");
3704  f.write(header);
3705  f.write(note);
3706  f.write(warning);
3707  f.write("#ifndef QUNICODETABLES_P_H\n"
3708  "#define QUNICODETABLES_P_H\n\n"
3709  "#include <QtCore/private/qglobal_p.h>\n\n"
3710  "#include <QtCore/qchar.h>\n\n"
3711  "QT_BEGIN_NAMESPACE\n\n");
3712  f.write("#define UNICODE_DATA_VERSION " DATA_VERSION_STR "\n\n");
3713  f.write("namespace QUnicodeTables {\n\n");
3714  f.write(property_string);
3715  f.write(sizeOfPropertiesStructCheck);
3716  f.write(east_asian_width_string);
3717  f.write(grapheme_break_class_string);
3718  f.write(word_break_class_string);
3719  f.write(sentence_break_class_string);
3720  f.write(line_break_class_string);
3721  f.write(idna_status_string);
3722  f.write(methods);
3723  f.write("} // namespace QUnicodeTables\n\n"
3724  "QT_END_NAMESPACE\n\n"
3725  "#endif // QUNICODETABLES_P_H\n");
3726  f.close();
3727 
3728  qDebug() << "maxMirroredDiff = " << Qt::hex << maxMirroredDiff;
3729  qDebug() << "maxLowerCaseDiff = " << Qt::hex << maxLowerCaseDiff;
3730  qDebug() << "maxUpperCaseDiff = " << Qt::hex << maxUpperCaseDiff;
3731  qDebug() << "maxTitleCaseDiff = " << Qt::hex << maxTitleCaseDiff;
3732  qDebug() << "maxCaseFoldDiff = " << Qt::hex << maxCaseFoldDiff;
3733 #if 0
3734 // dump(0, 0x7f);
3735 // dump(0x620, 0x640);
3736 // dump(0x10000, 0x10020);
3737 // dump(0x10800, 0x10820);
3738 
3739  qDebug("decompositionLength used:");
3740  int totalcompositions = 0;
3741  int sum = 0;
3742  for (int i = 1; i < 20; ++i) {
3743  qDebug(" length %d used %d times", i, decompositionLength.value(i, 0));
3744  totalcompositions += i*decompositionLength.value(i, 0);
3745  sum += decompositionLength.value(i, 0);
3746  }
3747  qDebug(" len decomposition map %d, average length %f, num composed chars %d",
3748  totalcompositions, (float)totalcompositions/(float)sum, sum);
3749  qDebug("highest composed character %x", highestComposedCharacter);
3750  qDebug("num ligatures = %d highest=%x, maxLength=%d", numLigatures, highestLigature, longestLigature);
3751 
3752  qBubbleSort(ligatures);
3753  for (int i = 0; i < ligatures.size(); ++i)
3754  qDebug("%s", ligatures.at(i).data());
3755 
3756 // qDebug("combiningClass usage:");
3757 // int numClasses = 0;
3758 // for (int i = 0; i < 255; ++i) {
3759 // int num = combiningClassUsage.value(i, 0);
3760 // if (num) {
3761 // ++numClasses;
3762 // qDebug(" combiningClass %d used %d times", i, num);
3763 // }
3764 // }
3765 // qDebug("total of %d combining classes used", numClasses);
3766 
3767 #endif
3768 }
small capitals from c petite p scientific f u
Definition: afcover.h:88
small capitals from c petite p scientific i
[1]
Definition: afcover.h:80
xD9 x84 xD8 xAD xD9 x80 xF0 x90 xAC x9A xE0 xA7 xA6 xE0 xA7 xAA xF0 x91 x84 xA4 xF0 x91 x84 x89 xF0 x91 x84 x9B xF0 x90 x8A xAB xF0 x90 x8B x89 xE2 xB2 x9E xE2 xB2 x9F xD0 xBE xD0 x9E xF0 x90 x90 x84 xF0 x90 x90 xAC xE1 x83 x98 xE1 x83 x94 xE1 x83 x90 xE1 xB2 xBF xE2 xB0 x95 xE2 xB1 x85 xCE xBF xCE x9F xE0 xA8 xA0 xE0 xA8 xB0 xE0 xA9 xA6 Kayah xEA xA4 x8D xEA xA4 x80 Khmer xE1 xA7 xA1 xE1 xA7 xAA xE0 xBB x90 Latin Subscript xE2 x82 x92 xE2 x82 x80 xEA x93 xB3 xF0 x96 xB9 xA1 xF0 x96 xB9 x9B xF0 x96 xB9 xAF xE1 x80 x9D xE1 x80 x84 xE1 x80 x82 no script
Definition: afscript.h:271
The QBitArray class provides an array of bits.
Definition: qbitarray.h:49
The QByteArray class provides an array of bytes.
Definition: qbytearray.h:85
char * data()
Definition: qbytearray.h:516
qsizetype size() const noexcept
Definition: qbytearray.h:470
const char * constData() const noexcept
Definition: qbytearray.h:144
int toInt(bool *ok=nullptr, int base=10) const
QList< QByteArray > split(char sep) const
char at(qsizetype i) const
Definition: qbytearray.h:505
bool isEmpty() const noexcept
Definition: qbytearray.h:129
static QByteArray number(int, int base=10)
void resize(qsizetype size)
QByteArray & replace(qsizetype index, qsizetype len, const char *s, qsizetype alen)
Definition: qbytearray.h:278
The QChar class provides a 16-bit Unicode character.
Definition: qchar.h:84
constexpr bool isNonCharacter() const noexcept
Definition: qchar.h:509
@ LastValidCodePoint
Definition: qchar.h:102
static constexpr bool requiresSurrogates(char32_t ucs4) noexcept
Definition: qchar.h:535
Decomposition
Definition: qchar.h:384
@ Narrow
Definition: qchar.h:398
@ NoDecomposition
Definition: qchar.h:385
@ Square
Definition: qchar.h:400
@ Final
Definition: qchar.h:391
@ Fraction
Definition: qchar.h:402
@ Medial
Definition: qchar.h:390
@ Sub
Definition: qchar.h:395
@ Super
Definition: qchar.h:394
@ NoBreak
Definition: qchar.h:388
@ Compat
Definition: qchar.h:401
@ Circle
Definition: qchar.h:393
@ Small
Definition: qchar.h:399
@ Isolated
Definition: qchar.h:392
@ Font
Definition: qchar.h:387
@ Vertical
Definition: qchar.h:396
@ Wide
Definition: qchar.h:397
@ Canonical
Definition: qchar.h:386
@ Initial
Definition: qchar.h:389
static constexpr auto fromUcs4(char32_t c) noexcept
Definition: qstringview.h:485
Direction
Definition: qchar.h:377
@ DirFSI
Definition: qchar.h:380
@ DirNSM
Definition: qchar.h:379
@ DirPDF
Definition: qchar.h:379
@ DirON
Definition: qchar.h:378
@ DirLRO
Definition: qchar.h:379
@ DirWS
Definition: qchar.h:378
@ DirL
Definition: qchar.h:378
@ DirEN
Definition: qchar.h:378
@ DirCS
Definition: qchar.h:378
@ DirRLO
Definition: qchar.h:379
@ DirAN
Definition: qchar.h:378
@ DirAL
Definition: qchar.h:379
@ DirB
Definition: qchar.h:378
@ DirRLI
Definition: qchar.h:380
@ DirS
Definition: qchar.h:378
@ DirPDI
Definition: qchar.h:380
@ DirLRE
Definition: qchar.h:379
@ DirLRI
Definition: qchar.h:380
@ DirBN
Definition: qchar.h:379
@ DirRLE
Definition: qchar.h:379
@ DirET
Definition: qchar.h:378
@ DirR
Definition: qchar.h:378
@ DirES
Definition: qchar.h:378
UnicodeVersion
Definition: qchar.h:439
@ Unicode_6_3
Definition: qchar.h:455
@ Unicode_3_0
Definition: qchar.h:444
@ Unicode_6_1
Definition: qchar.h:453
@ Unicode_2_1_2
Definition: qchar.h:443
@ Unicode_6_2
Definition: qchar.h:454
@ Unicode_11_0
Definition: qchar.h:460
@ Unicode_Unassigned
Definition: qchar.h:440
@ Unicode_1_1
Definition: qchar.h:441
@ Unicode_14_0
Definition: qchar.h:464
@ Unicode_4_1
Definition: qchar.h:448
@ Unicode_13_0
Definition: qchar.h:463
@ Unicode_2_0
Definition: qchar.h:442
@ Unicode_8_0
Definition: qchar.h:457
@ Unicode_12_0
Definition: qchar.h:461
@ Unicode_5_2
Definition: qchar.h:451
@ Unicode_12_1
Definition: qchar.h:462
@ Unicode_6_0
Definition: qchar.h:452
@ Unicode_5_1
Definition: qchar.h:450
@ Unicode_4_0
Definition: qchar.h:447
@ Unicode_5_0
Definition: qchar.h:449
@ Unicode_9_0
Definition: qchar.h:458
@ Unicode_3_1
Definition: qchar.h:445
@ Unicode_7_0
Definition: qchar.h:456
@ Unicode_3_2
Definition: qchar.h:446
@ Unicode_10_0
Definition: qchar.h:459
static constexpr char16_t highSurrogate(char32_t ucs4) noexcept
Definition: qchar.h:549
Category
Definition: qchar.h:140
@ Mark_SpacingCombining
Definition: qchar.h:142
@ Separator_Paragraph
Definition: qchar.h:151
@ Punctuation_Connector
Definition: qchar.h:165
@ Other_Control
Definition: qchar.h:153
@ Punctuation_FinalQuote
Definition: qchar.h:170
@ Other_Surrogate
Definition: qchar.h:155
@ Symbol_Math
Definition: qchar.h:173
@ Number_Letter
Definition: qchar.h:146
@ Letter_Lowercase
Definition: qchar.h:160
@ Number_DecimalDigit
Definition: qchar.h:145
@ Letter_Titlecase
Definition: qchar.h:161
@ Letter_Modifier
Definition: qchar.h:162
@ Punctuation_Close
Definition: qchar.h:168
@ Other_PrivateUse
Definition: qchar.h:156
@ Separator_Space
Definition: qchar.h:149
@ Symbol_Currency
Definition: qchar.h:174
@ Other_Format
Definition: qchar.h:154
@ Number_Other
Definition: qchar.h:147
@ Punctuation_InitialQuote
Definition: qchar.h:169
@ Letter_Other
Definition: qchar.h:163
@ Letter_Uppercase
Definition: qchar.h:159
@ Symbol_Modifier
Definition: qchar.h:175
@ Symbol_Other
Definition: qchar.h:176
@ Separator_Line
Definition: qchar.h:150
@ Mark_NonSpacing
Definition: qchar.h:141
@ Punctuation_Other
Definition: qchar.h:171
@ Mark_Enclosing
Definition: qchar.h:143
@ Punctuation_Dash
Definition: qchar.h:166
@ Punctuation_Open
Definition: qchar.h:167
@ Other_NotAssigned
Definition: qchar.h:157
static constexpr char16_t lowSurrogate(char32_t ucs4) noexcept
Definition: qchar.h:553
JoiningType
Definition: qchar.h:405
@ Joining_None
Definition: qchar.h:406
@ Joining_Transparent
Definition: qchar.h:411
Script
Definition: qchar.h:180
@ Script_Kharoshthi
Definition: qchar.h:247
@ Script_OldTurkic
Definition: qchar.h:283
@ Script_Balinese
Definition: qchar.h:250
@ Script_OldItalic
Definition: qchar.h:221
@ Script_OldHungarian
Definition: qchar.h:330
@ Script_Newa
Definition: qchar.h:337
@ Script_Batak
Definition: qchar.h:287
@ Script_Tamil
Definition: qchar.h:198
@ Script_Thai
Definition: qchar.h:203
@ Script_PahawhHmong
Definition: qchar.h:306
@ Script_Armenian
Definition: qchar.h:188
@ Script_Marchen
Definition: qchar.h:336
@ Script_MendeKikakui
Definition: qchar.h:311
@ Script_Tangut
Definition: qchar.h:339
@ Script_Gurmukhi
Definition: qchar.h:195
@ Script_Buginese
Definition: qchar.h:241
@ Script_Ethiopic
Definition: qchar.h:209
@ Script_Vai
Definition: qchar.h:260
@ Script_Mahajani
Definition: qchar.h:309
@ Script_Arabic
Definition: qchar.h:190
@ Script_Ugaritic
Definition: qchar.h:234
@ Script_Siddham
Definition: qchar.h:320
@ Script_Gujarati
Definition: qchar.h:196
@ Script_Nabataean
Definition: qchar.h:315
@ Script_Duployan
Definition: qchar.h:303
@ Script_HanifiRohingya
Definition: qchar.h:350
@ Script_Bopomofo
Definition: qchar.h:218
@ Script_Cyrillic
Definition: qchar.h:187
@ Script_Bhaiksuki
Definition: qchar.h:335
@ Script_CyproMinoan
Definition: qchar.h:367
@ Script_Hebrew
Definition: qchar.h:189
@ Script_Samaritan
Definition: qchar.h:274
@ Script_Adlam
Definition: qchar.h:334
@ Script_Sundanese
Definition: qchar.h:257
@ Script_Takri
Definition: qchar.h:298
@ Script_Ogham
Definition: qchar.h:212
@ Script_Hatran
Definition: qchar.h:328
@ Script_Palmyrene
Definition: qchar.h:316
@ Script_EgyptianHieroglyphs
Definition: qchar.h:273
@ Script_Han
Definition: qchar.h:219
@ Script_PhagsPa
Definition: qchar.h:253
@ Script_Miao
Definition: qchar.h:295
@ Script_Telugu
Definition: qchar.h:199
@ Script_Oriya
Definition: qchar.h:197
@ Script_Kannada
Definition: qchar.h:200
@ Script_PsalterPahlavi
Definition: qchar.h:319
@ Script_Tirhuta
Definition: qchar.h:322
@ Script_Lisu
Definition: qchar.h:275
@ Script_Avestan
Definition: qchar.h:272
@ Script_Yi
Definition: qchar.h:220
@ Script_Carian
Definition: qchar.h:265
@ Script_Deseret
Definition: qchar.h:223
@ Script_Khudawadi
Definition: qchar.h:321
@ Script_Kaithi
Definition: qchar.h:284
@ Script_Cham
Definition: qchar.h:267
@ Script_InscriptionalParthian
Definition: qchar.h:281
@ Script_Lydian
Definition: qchar.h:266
@ Script_Sogdian
Definition: qchar.h:354
@ Script_Phoenician
Definition: qchar.h:252
@ Script_Devanagari
Definition: qchar.h:193
@ Script_Georgian
Definition: qchar.h:207
@ Script_InscriptionalPahlavi
Definition: qchar.h:282
@ Script_WarangCiti
Definition: qchar.h:323
@ Script_Runic
Definition: qchar.h:213
@ Script_Hiragana
Definition: qchar.h:216
@ Script_Mandaic
Definition: qchar.h:289
@ Script_Myanmar
Definition: qchar.h:206
@ Script_MasaramGondi
Definition: qchar.h:342
@ Script_OldSouthArabian
Definition: qchar.h:280
@ Script_AnatolianHieroglyphs
Definition: qchar.h:327
@ Script_Wancho
Definition: qchar.h:358
@ Script_Osage
Definition: qchar.h:338
@ Script_LinearA
Definition: qchar.h:308
@ Script_Common
Definition: qchar.h:183
@ Script_Hangul
Definition: qchar.h:208
@ Script_OlChiki
Definition: qchar.h:259
@ Script_Greek
Definition: qchar.h:186
@ Script_Malayalam
Definition: qchar.h:201
@ Script_Tangsa
Definition: qchar.h:369
@ Script_Mongolian
Definition: qchar.h:215
@ Script_Javanese
Definition: qchar.h:277
@ Script_Grantha
Definition: qchar.h:305
@ Script_Tifinagh
Definition: qchar.h:244
@ Script_PauCinHau
Definition: qchar.h:317
@ Script_Hanunoo
Definition: qchar.h:225
@ Script_Bengali
Definition: qchar.h:194
@ Script_Inherited
Definition: qchar.h:182
@ Script_ImperialAramaic
Definition: qchar.h:279
@ Script_Buhid
Definition: qchar.h:226
@ Script_DivesAkuru
Definition: qchar.h:362
@ Script_Tibetan
Definition: qchar.h:205
@ Script_Toto
Definition: qchar.h:370
@ Script_Manichaean
Definition: qchar.h:310
@ Script_Nushu
Definition: qchar.h:343
@ Script_OldPersian
Definition: qchar.h:246
@ Script_MeroiticCursive
Definition: qchar.h:293
@ Script_TaiViet
Definition: qchar.h:271
@ Script_Cuneiform
Definition: qchar.h:251
@ Script_Lao
Definition: qchar.h:204
@ Script_Khmer
Definition: qchar.h:214
@ Script_Modi
Definition: qchar.h:312
@ Script_Vithkuqi
Definition: qchar.h:371
@ Script_Lycian
Definition: qchar.h:264
@ Script_OldNorthArabian
Definition: qchar.h:314
@ Script_Thaana
Definition: qchar.h:192
@ Script_ZanabazarSquare
Definition: qchar.h:345
@ Script_TaiTham
Definition: qchar.h:270
@ Script_Saurashtra
Definition: qchar.h:261
@ Script_CaucasianAlbanian
Definition: qchar.h:301
@ Script_Lepcha
Definition: qchar.h:258
@ Script_KhitanSmallScript
Definition: qchar.h:363
@ Script_Katakana
Definition: qchar.h:217
@ Script_CanadianAboriginal
Definition: qchar.h:211
@ Script_MeeteiMayek
Definition: qchar.h:278
@ Script_Chorasmian
Definition: qchar.h:361
@ Script_OldPermic
Definition: qchar.h:318
@ Script_OldUyghur
Definition: qchar.h:368
@ Script_SoraSompeng
Definition: qchar.h:297
@ Script_Rejang
Definition: qchar.h:263
@ Script_Tagbanwa
Definition: qchar.h:227
@ Script_Ahom
Definition: qchar.h:326
@ Script_BassaVah
Definition: qchar.h:302
@ Script_SylotiNagri
Definition: qchar.h:245
@ Script_Brahmi
Definition: qchar.h:288
@ Script_Nko
Definition: qchar.h:254
@ Script_Braille
Definition: qchar.h:238
@ Script_MeroiticHieroglyphs
Definition: qchar.h:294
@ Script_Multani
Definition: qchar.h:329
@ Script_KayahLi
Definition: qchar.h:262
@ Script_SignWriting
Definition: qchar.h:331
@ Script_Limbu
Definition: qchar.h:231
@ Script_LinearB
Definition: qchar.h:233
@ Script_TaiLe
Definition: qchar.h:232
@ Script_Coptic
Definition: qchar.h:228
@ Script_Syriac
Definition: qchar.h:191
@ Script_Gothic
Definition: qchar.h:222
@ Script_NewTaiLue
Definition: qchar.h:242
@ Script_Glagolitic
Definition: qchar.h:243
@ Script_Latin
Definition: qchar.h:185
@ Script_Cypriot
Definition: qchar.h:237
@ Script_OldSogdian
Definition: qchar.h:353
@ Script_Tagalog
Definition: qchar.h:224
@ Script_Sinhala
Definition: qchar.h:202
@ Script_GunjalaGondi
Definition: qchar.h:349
@ Script_Sharada
Definition: qchar.h:296
@ Script_Elbasan
Definition: qchar.h:304
@ Script_Makasar
Definition: qchar.h:351
@ Script_Elymaic
Definition: qchar.h:355
@ Script_Osmanya
Definition: qchar.h:236
@ Script_Unknown
Definition: qchar.h:181
@ Script_Dogra
Definition: qchar.h:348
@ Script_Cherokee
Definition: qchar.h:210
@ Script_Medefaidrin
Definition: qchar.h:352
@ Script_Nandinagari
Definition: qchar.h:356
@ Script_Soyombo
Definition: qchar.h:344
@ Script_Khojki
Definition: qchar.h:307
@ Script_Yezidi
Definition: qchar.h:364
@ Script_Shavian
Definition: qchar.h:235
@ Script_Chakma
Definition: qchar.h:292
@ Script_NyiakengPuachueHmong
Definition: qchar.h:357
@ Script_Mro
Definition: qchar.h:313
@ Script_Bamum
Definition: qchar.h:276
bool operator<(const QElapsedTimer &lhs, const QElapsedTimer &rhs) noexcept
The QFile class provides an interface for reading from and writing to files.
Definition: qfile.h:94
The QHash class is a template class that provides a hash-table-based dictionary.
Definition: qhash.h:773
bool contains(const Key &key) const noexcept
Definition: qhash.h:944
T value(const Key &key) const noexcept
Definition: qhash.h:997
qsizetype size() const noexcept
Definition: qlist.h:414
bool isEmpty() const noexcept
Definition: qlist.h:418
iterator erase(const_iterator begin, const_iterator end)
Definition: qlist.h:893
iterator end()
Definition: qlist.h:624
QList< T > sliced(qsizetype pos) const
Definition: qlist.h:663
const_reference at(qsizetype i) const noexcept
Definition: qlist.h:457
T & last()
Definition: qlist.h:646
void prepend(rvalue_ref t)
Definition: qlist.h:484
iterator begin()
Definition: qlist.h:623
void reserve(qsizetype size)
Definition: qlist.h:757
void replace(qsizetype i, parameter_type t)
Definition: qlist.h:541
void append(parameter_type t)
Definition: qlist.h:469
Definition: qmap.h:222
bool contains(const Key &key) const
Definition: qmap.h:376
iterator end()
Definition: qmap.h:637
size_type size() const
Definition: qmap.h:302
The QString class provides a Unicode character string.
Definition: qstring.h:388
QString & replace(qsizetype i, qsizetype len, QChar after)
Definition: qstring.cpp:3450
QStringList split(const QString &sep, Qt::SplitBehavior behavior=Qt::KeepEmptyParts, Qt::CaseSensitivity cs=Qt::CaseSensitive) const
Definition: qstring.cpp:7672
void truncate(qsizetype pos)
Definition: qstring.cpp:5934
qsizetype size() const
Definition: qstring.h:413
QString mid(qsizetype position, qsizetype n=-1) const
Definition: qstring.cpp:4994
const QChar at(qsizetype i) const
Definition: qstring.h:1212
bool isEmpty() const
Definition: qstring.h:1216
QChar * data()
Definition: qstring.h:1228
bool contains(QChar c, Qt::CaseSensitivity cs=Qt::CaseSensitive) const
Definition: qstring.h:1353
QString & append(QChar c)
Definition: qstring.cpp:3152
QString left(qsizetype n) const
Definition: qstring.cpp:4951
static QString static QString qsizetype indexOf(QChar c, qsizetype from=0, Qt::CaseSensitivity cs=Qt::CaseSensitive) const
Definition: qstring.cpp:4197
QString trimmed() const &
Definition: qstring.h:623
NormalizationForm
Definition: qstring.h:732
@ NormalizationForm_KC
Definition: qstring.h:736
@ NormalizationForm_KD
Definition: qstring.h:735
@ NormalizationForm_C
Definition: qstring.h:734
@ NormalizationForm_D
Definition: qstring.h:733
void resize(qsizetype size)
Definition: qstring.cpp:2670
The QStringView class provides a unified view on UTF-16 strings with a read-only subset of the QStrin...
Definition: qstringview.h:122
int main(int argc, char **argv)
Definition: main.cpp:1
QMap< QString, QString > map
[6]
set contains("Julia")
int const char * version
Definition: zlib.h:814
JOCTET JCOEFPTR block
Definition: jsimd.h:109
QPainterPath node()
Definition: paths.cpp:574
Q_DECL_CONST_FUNCTION Q_CORE_EXPORT const Properties *QT_FASTCALL properties(char32_t ucs4) noexcept
Q_CORE_EXPORT QStringView QT_FASTCALL idnaMapping(char32_t ucs4) noexcept
QTextStream & hex(QTextStream &stream)
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION QByteArrayView trimmed(QByteArrayView s) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool startsWith(QByteArrayView haystack, QByteArrayView needle) noexcept
constexpr struct q20::ranges::@309 any_of
#define Q_FALLTHROUGH()
DBusConnection const char DBusError DBusBusType DBusError return DBusConnection DBusHandleMessageFunction void DBusFreeFunction return DBusConnection return DBusConnection return const char DBusError return DBusConnection DBusMessage dbus_uint32_t return DBusConnection dbus_bool_t DBusConnection DBusAddWatchFunction DBusRemoveWatchFunction DBusWatchToggledFunction void DBusFreeFunction return DBusConnection DBusDispatchStatusFunction void DBusFreeFunction DBusTimeout return DBusTimeout return DBusWatch return DBusWatch unsigned int return DBusError const DBusError return const DBusMessage return DBusMessage return DBusMessage return DBusMessage return DBusMessage return DBusMessage return DBusMessageIter * iter
typedef QByteArray(EGLAPIENTRYP PFNQGSGETDISPLAYSPROC)()
QT_BEGIN_INCLUDE_NAMESPACE typedef unsigned char uchar
Definition: qglobal.h:332
ptrdiff_t qsizetype
Definition: qglobal.h:308
unsigned int uint
Definition: qglobal.h:334
unsigned short ushort
Definition: qglobal.h:333
#define qDebug
[1]
Definition: qlogging.h:177
#define qFatal
Definition: qlogging.h:181
GLenum GLuint GLenum GLsizei length
Definition: qopengl.h:270
GLenum GLsizei GLsizei GLint * values
[16]
GLboolean GLboolean GLboolean b
GLsizei const GLfloat * v
[13]
GLfloat GLfloat GLfloat w
[0]
GLboolean GLboolean GLboolean GLboolean a
[7]
GLuint index
[2]
GLenum condition
GLdouble GLdouble right
GLfloat GLfloat f
GLint GLsizei width
GLint left
GLint GLsizei GLsizei GLenum GLenum GLsizei void * data
GLuint name
GLint first
GLfloat n
GLsizei GLenum * categories
Definition: qopenglext.h:5440
const GLubyte * c
Definition: qopenglext.h:12701
GLuint GLfloat * val
Definition: qopenglext.h:1513
GLuint entry
Definition: qopenglext.h:11002
GLenum GLsizei len
Definition: qopenglext.h:3292
GLdouble s
[6]
Definition: qopenglext.h:235
GLenum GLenum GLenum GLenum mapping
Definition: qopenglext.h:10816
GLfloat GLfloat p
[1]
Definition: qopenglext.h:12698
#define Q_ASSERT(cond)
Definition: qrandom.cpp:84
#define Q_ASSERT_X(cond, x, msg)
Definition: qrandom.cpp:85
QPointF qAbs(const QPointF &p)
Definition: qscroller.cpp:119
QFuture< int > sum
QFuture< QSet< QString > > lowerCase
QTextStream out(stdout)
[7]
QString title
[35]
QSharedPointer< T > other(t)
[5]
QString dir
[11]
QGraphicsWidget * form
QHttpRequestHeader header("GET", QUrl::toPercentEncoding("/index.html"))
[1]
int u1
Definition: main.cpp:1223
int ligature
Definition: main.cpp:1225
int u2
Definition: main.cpp:1224
qsizetype overlap
Definition: main.cpp:2614
GraphNode end
Definition: main.cpp:2613
GraphNode start
Definition: main.cpp:2612
uchar nfQuickCheck
Definition: main.cpp:1040
IdnaStatus idnaStatus
Definition: main.cpp:1041
bool upperCaseSpecial
Definition: main.cpp:1031
QChar::Category category
Definition: main.cpp:1014
int upperCaseDiff
Definition: main.cpp:1027
bool operator==(const PropertyFlags &o) const
Definition: main.cpp:986
bool caseFoldSpecial
Definition: main.cpp:1033
int mirrorDiff
Definition: main.cpp:1024
int digitValue
Definition: main.cpp:1022
bool titleCaseSpecial
Definition: main.cpp:1032
uchar combiningClass
Definition: main.cpp:1013
QChar::UnicodeVersion age
Definition: main.cpp:1019
LineBreakClass lineBreakClass
Definition: main.cpp:1037
int caseFoldDiff
Definition: main.cpp:1029
int titleCaseDiff
Definition: main.cpp:1028
PropertyFlags()
Definition: main.cpp:978
bool lowerCaseSpecial
Definition: main.cpp:1030
int lowerCaseDiff
Definition: main.cpp:1026
QChar::Direction direction
Definition: main.cpp:1015
WordBreakClass wordBreakClass
Definition: main.cpp:1035
GraphemeBreakClass graphemeBreakClass
Definition: main.cpp:1034
SentenceBreakClass sentenceBreakClass
Definition: main.cpp:1036
QChar::JoiningType joining
Definition: main.cpp:1017
EastAsianWidth eastAsianWidth
Definition: main.cpp:1021
qsizetype indexOf(const AT &t, qsizetype from=0) const noexcept
Definition: qlist.h:966
int mirroredChar
Definition: main.cpp:1189
QList< int > decomposition
Definition: main.cpp:1184
IdnaRawStatus idnaRawStatus
Definition: main.cpp:1197
bool excludedComposition
Definition: main.cpp:1192
PropertyFlags p
Definition: main.cpp:1180
UnicodeData(int codepoint=0)
Definition: main.cpp:1118
QList< int > specialFolding
Definition: main.cpp:1186
QChar::Decomposition decompositionType
Definition: main.cpp:1183
static UnicodeData & valueRef(int codepoint)
Definition: main.cpp:1202
int propertyIndex
Definition: main.cpp:1195
int index
Definition: main.cpp:2854
bool operator==(const UniqueBlock &other) const
Definition: main.cpp:2851
QList< int > values
Definition: main.cpp:2855
UniqueBlock()
Definition: main.cpp:2849
Definition: inftrees.h:24
const char * name
Definition: main.cpp:59
QWidget * c
Definition: main.cpp:45
QCommandLinkButton * pb
void dump(QAbstractItemModel *model, QString const &indent=" - ", QModelIndex const &parent={})
unsigned int GraphNode
Definition: main.cpp:2608
Direction
Definition: main.cpp:217
@ DirB
Definition: main.cpp:225
@ DirBN
Definition: main.cpp:236
@ DirLRO
Definition: main.cpp:230
@ DirLRE
Definition: main.cpp:229
@ DirON
Definition: main.cpp:228
@ DirAN
Definition: main.cpp:223
@ Dir_Unassigned
Definition: main.cpp:242
@ DirR
Definition: main.cpp:219
@ DirRLE
Definition: main.cpp:232
@ DirET
Definition: main.cpp:222
@ DirWS
Definition: main.cpp:227
@ DirL
Definition: main.cpp:218
@ DirRLI
Definition: main.cpp:238
@ DirES
Definition: main.cpp:221
@ DirLRI
Definition: main.cpp:237
@ DirS
Definition: main.cpp:226
@ DirNSM
Definition: main.cpp:235
@ DirAL
Definition: main.cpp:231
@ DirCS
Definition: main.cpp:224
@ DirPDF
Definition: main.cpp:234
@ DirPDI
Definition: main.cpp:240
@ DirRLO
Definition: main.cpp:233
@ DirFSI
Definition: main.cpp:239
@ DirEN
Definition: main.cpp:220
#define DATA_VERSION_S
Definition: main.cpp:42
JoiningType
Definition: main.cpp:286
@ Joining_None
Definition: main.cpp:287
@ Joining_Right
Definition: main.cpp:290
@ Joining_Left
Definition: main.cpp:291
@ Joining_Unassigned
Definition: main.cpp:294
@ Joining_Causing
Definition: main.cpp:288
@ Joining_Dual
Definition: main.cpp:289
@ Joining_Transparent
Definition: main.cpp:292
GraphemeBreakClass
Definition: main.cpp:342
@ GraphemeBreak_LF
Definition: main.cpp:345
@ GraphemeBreak_RegionalIndicator
Definition: main.cpp:349
@ GraphemeBreak_L
Definition: main.cpp:352
@ GraphemeBreak_Unassigned
Definition: main.cpp:359
@ GraphemeBreak_ZWJ
Definition: main.cpp:348
@ GraphemeBreak_Extend
Definition: main.cpp:347
@ GraphemeBreak_CR
Definition: main.cpp:344
@ GraphemeBreak_T
Definition: main.cpp:354
@ GraphemeBreak_SpacingMark
Definition: main.cpp:351
@ GraphemeBreak_Prepend
Definition: main.cpp:350
@ GraphemeBreak_Extended_Pictographic
Definition: main.cpp:357
@ GraphemeBreak_LV
Definition: main.cpp:355
@ GraphemeBreak_Control
Definition: main.cpp:346
@ GraphemeBreak_LVT
Definition: main.cpp:356
@ GraphemeBreak_V
Definition: main.cpp:353
@ GraphemeBreak_Any
Definition: main.cpp:343
IdnaRawStatus
Definition: main.cpp:848
WordBreakClass
Definition: main.cpp:420
@ WordBreak_Any
Definition: main.cpp:421
@ WordBreak_RegionalIndicator
Definition: main.cpp:428
@ WordBreak_WSegSpace
Definition: main.cpp:439
@ WordBreak_LF
Definition: main.cpp:423
@ WordBreak_MidNumLet
Definition: main.cpp:434
@ WordBreak_SingleQuote
Definition: main.cpp:432
@ WordBreak_Unassigned
Definition: main.cpp:441
@ WordBreak_MidNum
Definition: main.cpp:436
@ WordBreak_Newline
Definition: main.cpp:424
@ WordBreak_DoubleQuote
Definition: main.cpp:433
@ WordBreak_Numeric
Definition: main.cpp:437
@ WordBreak_Extend
Definition: main.cpp:425
@ WordBreak_ALetter
Definition: main.cpp:431
@ WordBreak_Format
Definition: main.cpp:427
@ WordBreak_ZWJ
Definition: main.cpp:426
@ WordBreak_Katakana
Definition: main.cpp:429
@ WordBreak_HebrewLetter
Definition: main.cpp:430
@ WordBreak_ExtendNumLet
Definition: main.cpp:438
@ WordBreak_CR
Definition: main.cpp:422
@ WordBreak_MidLetter
Definition: main.cpp:435
QByteArray createCasingInfo()
Definition: main.cpp:3573
SentenceBreakClass
Definition: main.cpp:501
@ SentenceBreak_ATerm
Definition: main.cpp:512
@ SentenceBreak_Unassigned
Definition: main.cpp:517
@ SentenceBreak_Numeric
Definition: main.cpp:511
@ SentenceBreak_OLetter
Definition: main.cpp:510
@ SentenceBreak_Upper
Definition: main.cpp:509
@ SentenceBreak_Extend
Definition: main.cpp:506
@ SentenceBreak_Sep
Definition: main.cpp:505
@ SentenceBreak_LF
Definition: main.cpp:504
@ SentenceBreak_Close
Definition: main.cpp:515
@ SentenceBreak_STerm
Definition: main.cpp:514
@ SentenceBreak_CR
Definition: main.cpp:503
@ SentenceBreak_SContinue
Definition: main.cpp:513
@ SentenceBreak_Sp
Definition: main.cpp:507
@ SentenceBreak_Lower
Definition: main.cpp:508
@ SentenceBreak_Any
Definition: main.cpp:502
LineBreakClass
Definition: main.cpp:570
@ LineBreak_HL
Definition: main.cpp:573
@ LineBreak_CR
Definition: main.cpp:579
@ LineBreak_BA
Definition: main.cpp:574
@ LineBreak_QU
Definition: main.cpp:571
@ LineBreak_SA
Definition: main.cpp:578
@ LineBreak_NU
Definition: main.cpp:573
@ LineBreak_CP
Definition: main.cpp:571
@ LineBreak_ID
Definition: main.cpp:573
@ LineBreak_JV
Definition: main.cpp:576
@ LineBreak_GL
Definition: main.cpp:571
@ LineBreak_NS
Definition: main.cpp:572
@ LineBreak_SY
Definition: main.cpp:572
@ LineBreak_RI
Definition: main.cpp:576
@ LineBreak_Unassigned
Definition: main.cpp:581
@ LineBreak_JL
Definition: main.cpp:576
@ LineBreak_ZWJ
Definition: main.cpp:577
@ LineBreak_H3
Definition: main.cpp:575
@ LineBreak_EX
Definition: main.cpp:572
@ LineBreak_B2
Definition: main.cpp:574
@ LineBreak_BK
Definition: main.cpp:579
@ LineBreak_BB
Definition: main.cpp:574
@ LineBreak_SG
Definition: main.cpp:578
@ LineBreak_EB
Definition: main.cpp:577
@ LineBreak_OP
Definition: main.cpp:571
@ LineBreak_CB
Definition: main.cpp:576
@ LineBreak_ZW
Definition: main.cpp:575
@ LineBreak_PO
Definition: main.cpp:573
@ LineBreak_IS
Definition: main.cpp:572
@ LineBreak_WJ
Definition: main.cpp:575
@ LineBreak_EM
Definition: main.cpp:577
@ LineBreak_CM
Definition: main.cpp:575
@ LineBreak_SP
Definition: main.cpp:578
@ LineBreak_AL
Definition: main.cpp:573
@ LineBreak_CL
Definition: main.cpp:571
@ LineBreak_JT
Definition: main.cpp:576
@ LineBreak_HY
Definition: main.cpp:574
@ LineBreak_LF
Definition: main.cpp:579
@ LineBreak_PR
Definition: main.cpp:572
@ LineBreak_IN
Definition: main.cpp:574
@ LineBreak_H2
Definition: main.cpp:575
EastAsianWidth
Definition: main.cpp:99
#define DATA_VERSION_STR
Definition: main.cpp:43
IdnaStatus
Definition: main.cpp:892
XmlOutput::xml_output comment(const QString &text)
Definition: xmloutput.h:219