37 #include <private/qstringiterator_p.h>
39 #include <private/qunicodetables_p.h>
42 #define DATA_VERSION_S "14.0"
43 #define DATA_VERSION_STR "QChar::Unicode_14_0"
48 static void initAgeMap()
82 age_map.insert(
d->age,
d->version);
87 static const char *east_asian_width_string =
88 R
"(enum class EastAsianWidth : unsigned int {
110 static void initEastAsianWidthMap()
124 for (
auto &
w : widths)
125 eastAsianWidthMap.insert(
w.name,
w.width);
130 static void initCategoryMap()
176 categoryMap.insert(
c->name,
c->cat);
184 static void initDecompositionMap()
189 } decompositions[] = {
209 Dec *
d = decompositions;
211 decompositionMap.insert(
d->name,
d->dec);
247 static void initDirectionMap()
280 directionMap.insert(
d->name,
d->dir);
299 static void initJoiningMap()
313 JoiningList *
d = joinings;
315 joining_map.insert(
d->name,
d->joining);
321 static const char *grapheme_break_class_string =
322 "enum GraphemeBreakClass {\n"
323 " GraphemeBreak_Any,\n"
324 " GraphemeBreak_CR,\n"
325 " GraphemeBreak_LF,\n"
326 " GraphemeBreak_Control,\n"
327 " GraphemeBreak_Extend,\n"
328 " GraphemeBreak_ZWJ,\n"
329 " GraphemeBreak_RegionalIndicator,\n"
330 " GraphemeBreak_Prepend,\n"
331 " GraphemeBreak_SpacingMark,\n"
332 " GraphemeBreak_L,\n"
333 " GraphemeBreak_V,\n"
334 " GraphemeBreak_T,\n"
335 " GraphemeBreak_LV,\n"
336 " GraphemeBreak_LVT,\n"
337 " GraphemeBreak_Extended_Pictographic,\n"
339 " NumGraphemeBreakClasses\n"
364 static void initGraphemeBreak()
366 struct GraphemeBreakList {
387 GraphemeBreakList *
d = breaks;
389 grapheme_break_map.insert(
d->name,
d->brk);
395 static const char *word_break_class_string =
396 "enum WordBreakClass {\n"
400 " WordBreak_Newline,\n"
401 " WordBreak_Extend,\n"
403 " WordBreak_Format,\n"
404 " WordBreak_RegionalIndicator,\n"
405 " WordBreak_Katakana,\n"
406 " WordBreak_HebrewLetter,\n"
407 " WordBreak_ALetter,\n"
408 " WordBreak_SingleQuote,\n"
409 " WordBreak_DoubleQuote,\n"
410 " WordBreak_MidNumLet,\n"
411 " WordBreak_MidLetter,\n"
412 " WordBreak_MidNum,\n"
413 " WordBreak_Numeric,\n"
414 " WordBreak_ExtendNumLet,\n"
415 " WordBreak_WSegSpace,\n"
417 " NumWordBreakClasses\n"
446 static void initWordBreak()
448 struct WordBreakList {
473 WordBreakList *
d = breaks;
475 word_break_map.insert(
d->name,
d->brk);
481 static const char *sentence_break_class_string =
482 "enum SentenceBreakClass {\n"
483 " SentenceBreak_Any,\n"
484 " SentenceBreak_CR,\n"
485 " SentenceBreak_LF,\n"
486 " SentenceBreak_Sep,\n"
487 " SentenceBreak_Extend,\n"
488 " SentenceBreak_Sp,\n"
489 " SentenceBreak_Lower,\n"
490 " SentenceBreak_Upper,\n"
491 " SentenceBreak_OLetter,\n"
492 " SentenceBreak_Numeric,\n"
493 " SentenceBreak_ATerm,\n"
494 " SentenceBreak_SContinue,\n"
495 " SentenceBreak_STerm,\n"
496 " SentenceBreak_Close,\n"
498 " NumSentenceBreakClasses\n"
522 static void initSentenceBreak()
524 struct SentenceBreakList {
545 SentenceBreakList *
d = breaks;
547 sentence_break_map.insert(
d->name,
d->brk);
553 static const char *line_break_class_string =
554 "// see http://www.unicode.org/reports/tr14/tr14-30.html\n"
555 "// we don't use the XX and AI classes and map them to AL instead.\n"
556 "enum LineBreakClass {\n"
557 " LineBreak_OP, LineBreak_CL, LineBreak_CP, LineBreak_QU, LineBreak_GL,\n"
558 " LineBreak_NS, LineBreak_EX, LineBreak_SY, LineBreak_IS, LineBreak_PR,\n"
559 " LineBreak_PO, LineBreak_NU, LineBreak_AL, LineBreak_HL, LineBreak_ID,\n"
560 " LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,\n"
561 " LineBreak_ZW, LineBreak_CM, LineBreak_WJ, LineBreak_H2, LineBreak_H3,\n"
562 " LineBreak_JL, LineBreak_JV, LineBreak_JT, LineBreak_RI, LineBreak_CB,\n"
563 " LineBreak_EB, LineBreak_EM, LineBreak_ZWJ,\n"
564 " LineBreak_SA, LineBreak_SG, LineBreak_SP,\n"
565 " LineBreak_CR, LineBreak_LF, LineBreak_BK,\n"
567 " NumLineBreakClasses\n"
586 static void initLineBreak()
592 struct LineBreakList {
641 LineBreakList *
d = breaks;
643 line_break_map.insert(
d->name,
d->brk);
651 static void initScriptMap()
842 scriptMap.insert(
p->name,
p->script);
860 static void initIdnaStatusMap()
879 static const char *idna_status_string =
880 "enum class IdnaStatus : unsigned int {\n"
901 static const char *property_string =
911 "struct Properties {\n"
912 " ushort category : 8; /* 5 used */\n"
913 " ushort direction : 8; /* 5 used */\n"
914 " ushort combiningClass : 8;\n"
915 " ushort joining : 3;\n"
916 " signed short digitValue : 5;\n"
917 " signed short mirrorDiff : 16;\n"
918 " ushort unicodeVersion : 5; /* 5 used */\n"
919 " ushort eastAsianWidth : 3; /* 3 used */\n"
920 " ushort nfQuickCheck : 8;\n"
922 " unsigned char : 0; //wasm 64 packing trick\n"
925 " ushort special : 1;\n"
926 " signed short diff : 15;\n"
927 " } cases[NumCases];\n"
929 " unsigned char : 0; //wasm 64 packing trick\n"
931 " ushort graphemeBreakClass : 5; /* 5 used */\n"
932 " ushort wordBreakClass : 5; /* 5 used */\n"
933 " ushort lineBreakClass : 6; /* 6 used */\n"
934 " ushort sentenceBreakClass : 4; /* 4 used */\n"
935 " ushort idnaStatus : 4; /* 3 used */\n"
936 " ushort script : 8;\n"
938 "Q_CORE_EXPORT const Properties * QT_FASTCALL properties(char32_t ucs4) noexcept;\n"
939 "Q_CORE_EXPORT const Properties * QT_FASTCALL properties(char16_t ucs2) noexcept;\n"
942 static const char *methods =
943 "Q_CORE_EXPORT GraphemeBreakClass QT_FASTCALL graphemeBreakClass(char32_t ucs4) noexcept;\n"
944 "inline GraphemeBreakClass graphemeBreakClass(QChar ch) noexcept\n"
945 "{ return graphemeBreakClass(ch.unicode()); }\n"
947 "Q_CORE_EXPORT WordBreakClass QT_FASTCALL wordBreakClass(char32_t ucs4) noexcept;\n"
948 "inline WordBreakClass wordBreakClass(QChar ch) noexcept\n"
949 "{ return wordBreakClass(ch.unicode()); }\n"
951 "Q_CORE_EXPORT SentenceBreakClass QT_FASTCALL sentenceBreakClass(char32_t ucs4) noexcept;\n"
952 "inline SentenceBreakClass sentenceBreakClass(QChar ch) noexcept\n"
953 "{ return sentenceBreakClass(ch.unicode()); }\n"
955 "Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(char32_t ucs4) noexcept;\n"
956 "inline LineBreakClass lineBreakClass(QChar ch) noexcept\n"
957 "{ return lineBreakClass(ch.unicode()); }\n"
959 "Q_CORE_EXPORT IdnaStatus QT_FASTCALL idnaStatus(char32_t ucs4) noexcept;\n"
960 "inline IdnaStatus idnaStatus(QChar ch) noexcept\n"
961 "{ return idnaStatus(ch.unicode()); }\n"
963 "Q_CORE_EXPORT QStringView QT_FASTCALL idnaMapping(char32_t usc4) noexcept;\n"
964 "inline QStringView idnaMapping(QChar ch) noexcept\n"
965 "{ return idnaMapping(ch.unicode()); }\n"
967 "Q_CORE_EXPORT EastAsianWidth QT_FASTCALL eastAsianWidth(char32_t ucs4) noexcept;\n"
968 "inline EastAsianWidth eastAsianWidth(QChar ch) noexcept\n"
969 "{ return eastAsianWidth(ch.unicode()); }\n"
972 static const int SizeOfPropertiesStruct = 20;
974 static const QByteArray sizeOfPropertiesStructCheck =
975 "static_assert(sizeof(Properties) == " +
QByteArray::number(SizeOfPropertiesStruct) +
");\n\n";
1058 utf16map << codepoint;
1065 specialCaseMap << 0;
1068 while (i < specialCaseMap.
size()) {
1069 int n = specialCaseMap.
at(i);
1072 for (
j = 1;
j <=
n; ++
j) {
1073 if (specialCaseMap.
at(i+
j) != utf16map.
at(
j))
1082 int pos = specialCaseMap.
size();
1083 specialCaseMap << utf16map;
1088 static inline bool isDefaultIgnorable(
uint ucs4)
1096 return ucs4 == 0xad;
1098 return ucs4 == 0x034f
1100 || (ucs4 >= 0x115f && ucs4 <= 0x1160)
1101 || (ucs4 >= 0x17b4 && ucs4 <= 0x17b5)
1102 || (ucs4 >= 0x180b && ucs4 <= 0x180d)
1104 || (ucs4 >= 0x200b && ucs4 <= 0x200f)
1105 || (ucs4 >= 0x202a && ucs4 <= 0x202e)
1106 || (ucs4 >= 0x2060 && ucs4 <= 0x206f)
1108 || (ucs4 >= 0xfe00 && ucs4 <= 0xfe0f)
1111 || (ucs4 >= 0xfff0 && ucs4 <= 0xfff8)
1112 || (ucs4 >= 0x1bca0 && ucs4 <= 0x1bca3)
1113 || (ucs4 >= 0x1d173 && ucs4 <= 0x1d17a)
1114 || (ucs4 >= 0xe0000 && ucs4 <= 0xe0fff);
1123 if ((codepoint >= 0x0600 && codepoint <= 0x07BF)
1124 || (codepoint >= 0x08A0 && codepoint <= 0x08FF)
1125 || (codepoint >= 0xFB50 && codepoint <= 0xFDCF)
1126 || (codepoint >= 0xFDF0 && codepoint <= 0xFDFF)
1127 || (codepoint >= 0xFE70 && codepoint <= 0xFEFF)
1128 || (codepoint >= 0x1EE00 && codepoint <= 0x1EEFF)) {
1133 else if ((codepoint >= 0x0590 && codepoint <= 0x05FF)
1134 || (codepoint >= 0x07C0 && codepoint <= 0x089F)
1135 || (codepoint >= 0xFB1D && codepoint <= 0xFB4F)
1136 || (codepoint >= 0x10800 && codepoint <= 0x10FFF)
1137 || (codepoint >= 0x1E800 && codepoint <= 0x1EDFF)
1138 || (codepoint >= 0x1EF00 && codepoint <= 0x1EFFF)) {
1143 else if (codepoint >= 0x20A0 && codepoint <= 0x20CF) {
1159 if ((codepoint >= 0x3400 && codepoint <= 0x4DBF)
1160 || (codepoint >= 0x4E00 && codepoint <= 0x9FFF)
1161 || (codepoint >= 0xF900 && codepoint <= 0xFAFF)
1162 || (codepoint >= 0x20000 && codepoint <= 0x2A6DF)
1163 || (codepoint >= 0x2A700 && codepoint <= 0x2B73F)
1164 || (codepoint >= 0x2B740 && codepoint <= 0x2B81F)
1165 || (codepoint >= 0x2B820 && codepoint <= 0x2CEAF)
1166 || (codepoint >= 0x2F800 && codepoint <= 0x2FA1F)
1167 || (codepoint >= 0x20000 && codepoint <= 0x2FFFD)
1168 || (codepoint >= 0x30000 && codepoint <= 0x3FFFD)) {
1173 else if (codepoint >= 0x20A0 && codepoint <= 0x20CF) {
1204 static bool initialized =
false;
1213 return unicodeData[codepoint];
1218 static int highestComposedCharacter = 0;
1219 static int numLigatures = 0;
1220 static int highestLigature = 0;
1229 {
return l1.
u1 < l2.
u1; }
1235 static int maxLowerCaseDiff = 0;
1236 static int maxUpperCaseDiff = 0;
1237 static int maxTitleCaseDiff = 0;
1239 static void readUnicodeData()
1241 qDebug(
"Reading UnicodeData.txt");
1243 enum UniDataFields {
1250 UD_DecimalDigitValue,
1261 QFile f(
"data/UnicodeData.txt");
1263 qFatal(
"Couldn't find UnicodeData.txt");
1267 while (!
f.atEnd()) {
1284 int lastCodepoint = codepoint;
1287 if (
name.startsWith(
'<') &&
name.contains(
"First")) {
1290 f.readLine(nextLine.
data(), 1024);
1301 if (!combiningClassUsage.
contains(
data.p.combiningClass))
1302 combiningClassUsage[
data.p.combiningClass] = 1;
1304 ++combiningClassUsage[
data.p.combiningClass];
1308 qFatal(
"unhandled direction value: %s", properties[UD_BidiCategory].constData());
1311 if (!properties[UD_UpperCase].isEmpty()) {
1312 int upperCase =
properties[UD_UpperCase].toInt(&
ok, 16);
1314 int diff = upperCase - codepoint;
1321 if (
qAbs(diff) >= (1<<13)) {
1322 data.p.upperCaseSpecial =
true;
1323 data.p.upperCaseDiff = appendToSpecialCaseMap(
QList<int>() << upperCase);
1325 data.p.upperCaseDiff = diff;
1326 maxUpperCaseDiff = qMax(maxUpperCaseDiff,
qAbs(diff));
1329 if (!properties[UD_LowerCase].isEmpty()) {
1339 if (
qAbs(diff) >= (1<<13)) {
1340 data.p.lowerCaseSpecial =
true;
1343 data.p.lowerCaseDiff = diff;
1344 maxLowerCaseDiff = qMax(maxLowerCaseDiff,
qAbs(diff));
1348 if (properties[UD_TitleCase].isEmpty())
1350 if (!properties[UD_TitleCase].isEmpty()) {
1351 int titleCase =
properties[UD_TitleCase].toInt(&
ok, 16);
1353 int diff = titleCase - codepoint;
1360 if (
qAbs(diff) >= (1<<13)) {
1361 data.p.titleCaseSpecial =
true;
1362 data.p.titleCaseDiff = appendToSpecialCaseMap(
QList<int>() << titleCase);
1364 data.p.titleCaseDiff = diff;
1365 maxTitleCaseDiff = qMax(maxTitleCaseDiff,
qAbs(diff));
1369 if (!properties[UD_DigitValue].isEmpty())
1374 if (!decomposition.
isEmpty()) {
1375 highestComposedCharacter = qMax(highestComposedCharacter, codepoint);
1380 qFatal(
"unhandled decomposition type: %s",
d[0].constData());
1385 for (
int i = 0;
i <
d.size(); ++
i) {
1386 data.decomposition.append(
d[i].toInt(&
ok, 16));
1389 ++decompositionLength[
data.decomposition.size()];
1392 for (
int i = codepoint;
i <= lastCodepoint; ++
i)
1393 unicodeData[i] =
data;
1397 static int maxMirroredDiff = 0;
1399 static void readBidiMirroring()
1401 qDebug(
"Reading BidiMirroring.txt");
1403 QFile f(
"data/BidiMirroring.txt");
1405 qFatal(
"Couldn't find BidiMirroring.txt");
1409 while (!
f.atEnd()) {
1427 int codepoint = pair[0].toInt(&
ok, 16);
1429 int mirror = pair[1].toInt(&
ok, 16);
1433 d.mirroredChar = mirror;
1434 d.p.mirrorDiff =
d.mirroredChar - codepoint;
1435 maxMirroredDiff = qMax(maxMirroredDiff,
qAbs(
d.p.mirrorDiff));
1439 static void readArabicShaping()
1441 qDebug(
"Reading ArabicShaping.txt");
1455 QFile f(
"data/ArabicShaping.txt");
1457 qFatal(
"Couldn't find ArabicShaping.txt");
1461 while (!
f.atEnd()) {
1479 int codepoint =
l[0].toInt(&
ok, 16);
1486 qFatal(
"%x: unassigned or unhandled joining type: %s", codepoint,
l[2].constData());
1489 switch (
d.p.category) {
1496 qFatal(
"%x: joining type '%s' was met (category: %d); "
1497 "the current implementation needs to be revised!",
1498 codepoint,
l[2].constData(),
d.p.category);
1508 static void readDerivedAge()
1510 qDebug(
"Reading DerivedAge.txt");
1512 QFile f(
"data/DerivedAge.txt");
1514 qFatal(
"Couldn't find DerivedAge.txt");
1518 while (!
f.atEnd()) {
1540 int from = cl[0].toInt(&
ok, 16);
1543 if (cl.
size() == 2) {
1544 to = cl[1].toInt(&
ok, 16);
1551 qFatal(
"unassigned or unhandled age value: %s",
l[1].constData());
1553 for (
int codepoint = from; codepoint <= to; ++codepoint) {
1560 static void readEastAsianWidth()
1562 qDebug(
"Reading EastAsianWidth.txt");
1564 QFile f(
"data/EastAsianWidth.txt");
1566 qFatal(
"Couldn't find or read EastAsianWidth.txt");
1568 while (!
f.atEnd()) {
1585 const QByteArray widthString = fields[1].trimmed();
1586 if (!eastAsianWidthMap.contains(widthString)) {
1587 qFatal(
"Unhandled EastAsianWidth property value for %s: %s",
1588 qPrintable(codePoints), qPrintable(widthString));
1590 auto width = eastAsianWidthMap.value(widthString);
1593 const int first = cl[0].toInt(&
ok, 16);
1594 const int last =
ok && cl.
size() == 2 ? cl[1].toInt(&
ok, 16) :
first;
1597 for (
int codepoint =
first; codepoint <= last; ++codepoint) {
1606 static void readDerivedNormalizationProps()
1608 qDebug(
"Reading DerivedNormalizationProps.txt");
1610 QFile f(
"data/DerivedNormalizationProps.txt");
1612 qFatal(
"Couldn't find DerivedNormalizationProps.txt");
1616 while (!
f.atEnd()) {
1633 if (propName !=
"Full_Composition_Exclusion" &&
1634 propName !=
"NFD_QC" && propName !=
"NFC_QC" &&
1635 propName !=
"NFKD_QC" && propName !=
"NFKC_QC") {
1645 int from = cl[0].toInt(&
ok, 16);
1648 if (cl.
size() == 2) {
1649 to = cl[1].toInt(&
ok, 16);
1653 for (
int codepoint = from; codepoint <= to; ++codepoint) {
1655 if (propName ==
"Full_Composition_Exclusion") {
1656 d.excludedComposition =
true;
1664 if (propName ==
"NFD_QC")
1666 else if (propName ==
"NFC_QC")
1668 else if (propName ==
"NFKD_QC")
1674 l[2] =
l[2].trimmed();
1676 enum { NFQC_YES = 0, NFQC_NO = 1, NFQC_MAYBE = 3 };
1677 uchar ynm = (
l[2] ==
"N" ? NFQC_NO :
l[2] ==
"M" ? NFQC_MAYBE : NFQC_YES);
1678 if (ynm == NFQC_MAYBE) {
1682 d.p.nfQuickCheck |= (ynm << (
form << 1));
1689 if (!
d.excludedComposition
1691 &&
d.decomposition.size() > 1) {
1694 int part1 =
d.decomposition.at(0);
1695 int part2 =
d.decomposition.at(1);
1702 highestLigature = qMax(highestLigature, part1);
1703 Ligature l = { part1, part2, codepoint };
1704 ligatureHashes[part2].append(
l);
1716 static QByteArray createNormalizationCorrections()
1718 qDebug(
"Reading NormalizationCorrections.txt");
1720 QFile f(
"data/NormalizationCorrections.txt");
1722 qFatal(
"Couldn't find NormalizationCorrections.txt");
1727 =
"struct NormalizationCorrection {\n"
1729 " uint old_mapping;\n"
1733 "static constexpr NormalizationCorrection uc_normalization_corrections[] = {\n";
1736 int numCorrections = 0;
1737 while (!
f.atEnd()) {
1762 if (fields.
at(3) ==
"3.2.0")
1764 else if (fields.
at(3) ==
"4.0.0")
1767 qFatal(
"unknown unicode version in NormalizationCorrection.txt");
1773 maxVersion = qMax(
c.version, maxVersion);
1775 if (
out.endsWith(
",\n"))
1780 "enum { NumNormalizationCorrections = " +
QByteArray::number(numCorrections) +
" };\n"
1781 "enum { NormalizationCorrectionsVersionMax = " +
QByteArray::number(maxVersion) +
" };\n\n";
1786 static void readLineBreak()
1788 qDebug(
"Reading LineBreak.txt");
1790 QFile f(
"data/LineBreak.txt");
1792 qFatal(
"Couldn't find LineBreak.txt");
1796 while (!
f.atEnd()) {
1818 int from = cl[0].toInt(&
ok, 16);
1821 if (cl.
size() == 2) {
1822 to = cl[1].toInt(&
ok, 16);
1828 qFatal(
"unassigned line break class: %s",
l[1].constData());
1830 for (
int codepoint = from; codepoint <= to; ++codepoint) {
1832 d.p.lineBreakClass = lb;
1837 static void readSpecialCasing()
1839 qDebug(
"Reading SpecialCasing.txt");
1841 QFile f(
"data/SpecialCasing.txt");
1843 qFatal(
"Couldn't find SpecialCasing.txt");
1847 while (!
f.atEnd()) {
1868 int codepoint =
l[0].trimmed().toInt(&
ok, 16);
1880 for (
int i = 0;
i < lower.
size(); ++
i) {
1896 for (
int i = 0;
i < upper.
size(); ++
i) {
1908 if (lowerMap.
size() > 1) {
1912 if (titleMap.
size() > 1) {
1916 if (upperMap.
size() > 1) {
1923 static int maxCaseFoldDiff = 0;
1925 static void readCaseFolding()
1927 qDebug(
"Reading CaseFolding.txt");
1929 QFile f(
"data/CaseFolding.txt");
1931 qFatal(
"Couldn't find CaseFolding.txt");
1935 while (!
f.atEnd()) {
1951 int codepoint =
l[0].trimmed().toInt(&
ok, 16);
1955 l[1] =
l[1].trimmed();
1956 if (
l[1] ==
"F" ||
l[1] ==
"T")
1963 for (
int i = 0;
i < fold.
size(); ++
i) {
1970 if (foldMap.
size() == 1) {
1971 int caseFolded = foldMap.
at(0);
1972 int diff = caseFolded - codepoint;
1979 if (
qAbs(diff) >= (1<<13)) {
1984 maxCaseFoldDiff = qMax(maxCaseFoldDiff,
qAbs(diff));
1987 qFatal(
"we currently don't support full case foldings");
1995 static void readGraphemeBreak()
1997 qDebug(
"Reading GraphemeBreakProperty.txt");
1999 QFile f(
"data/GraphemeBreakProperty.txt");
2001 qFatal(
"Couldn't find GraphemeBreakProperty.txt");
2005 while (!
f.atEnd()) {
2027 int from = cl[0].toInt(&
ok, 16);
2030 if (cl.
size() == 2) {
2031 to = cl[1].toInt(&
ok, 16);
2037 qFatal(
"unassigned grapheme break class: %s",
l[1].constData());
2039 for (
int codepoint = from; codepoint <= to; ++codepoint) {
2046 static void readEmojiData()
2048 qDebug(
"Reading emoji-data.txt");
2050 QFile f(
"data/emoji-data.txt");
2052 qFatal(
"Couldn't find emoji-data.txt");
2054 while (!
f.atEnd()) {
2074 if (
l[1] !=
"Extended_Pictographic")
2082 int from = cl[0].toInt(&
ok, 16);
2085 if (cl.
size() == 2) {
2086 to = cl[1].toInt(&
ok, 16);
2090 for (
int codepoint = from; codepoint <= to; ++codepoint) {
2099 static void readWordBreak()
2101 qDebug(
"Reading WordBreakProperty.txt");
2103 QFile f(
"data/WordBreakProperty.txt");
2105 qFatal(
"Couldn't find WordBreakProperty.txt");
2109 while (!
f.atEnd()) {
2131 int from = cl[0].toInt(&
ok, 16);
2134 if (cl.
size() == 2) {
2135 to = cl[1].toInt(&
ok, 16);
2141 qFatal(
"unassigned word break class: %s",
l[1].constData());
2143 for (
int codepoint = from; codepoint <= to; ++codepoint) {
2148 if (codepoint == 0x002E)
2150 else if (codepoint == 0x003A)
2159 static void readSentenceBreak()
2161 qDebug(
"Reading SentenceBreakProperty.txt");
2163 QFile f(
"data/SentenceBreakProperty.txt");
2165 qFatal(
"Couldn't find SentenceBreakProperty.txt");
2169 while (!
f.atEnd()) {
2191 int from = cl[0].toInt(&
ok, 16);
2194 if (cl.
size() == 2) {
2195 to = cl[1].toInt(&
ok, 16);
2201 qFatal(
"unassigned sentence break class: %s",
l[1].constData());
2203 for (
int codepoint = from; codepoint <= to; ++codepoint) {
2217 if (!
p->caseFoldSpecial) {
2218 *(
out++) =
ch +
p->caseFoldDiff;
2220 const ushort *folded = specialCaseMap +
p->caseFoldDiff;
2237 while (
a != ae &&
b != be) {
2240 if (pa->caseFoldSpecial |
pb->caseFoldSpecial)
2242 int diff = (int)(*
a + pa->caseFoldDiff) - (int)(*
b +
pb->caseFoldDiff);
2256 ushort abuf[SPECIAL_CASE_MAX_LEN + 1];
2257 ushort bbuf[SPECIAL_CASE_MAX_LEN + 1];
2258 abuf[0] = bbuf[0] = 0;
2264 if (!*bp &&
b == be)
2268 foldCase(*(
a++), abuf);
2274 foldCase(*(
b++), bbuf);
2278 return (
int)*ap - (int)*bp;
2292 while (
a != ae && *
b) {
2295 if (pa->caseFoldSpecial |
pb->caseFoldSpecial)
2297 int diff = (int)(*
a + pa->caseFoldDiff) - (int)(*
b +
pb->caseFoldDiff);
2311 ushort abuf[SPECIAL_CASE_MAX_LEN + 1];
2312 ushort bbuf[SPECIAL_CASE_MAX_LEN + 1];
2313 abuf[0] = bbuf[0] = 0;
2323 foldCase(*(
a++), abuf);
2329 foldCase(*(
b++), bbuf);
2333 return (
int)*ap - (int)*bp;
2350 static void readBlocks()
2352 qDebug(
"Reading Blocks.txt");
2354 QFile f(
"data/Blocks.txt");
2356 qFatal(
"Couldn't find Blocks.txt");
2360 while (!
f.atEnd()) {
2378 int blockIndex = blockNames.
indexOf(blockName);
2379 if (blockIndex == -1) {
2380 blockIndex = blockNames.
size();
2381 blockNames.
append(blockName);
2384 codePoints.
replace(
"..",
".");
2388 int first = cl[0].toInt(&
ok, 16);
2391 if (cl.
size() == 2) {
2392 last = cl[1].toInt(&
ok, 16);
2396 BlockInfo blockInfo = { blockIndex,
first, last };
2397 blockInfoList.
append(blockInfo);
2402 static void readScripts()
2404 qDebug(
"Reading Scripts.txt");
2406 QFile f(
"data/Scripts.txt");
2408 qFatal(
"Couldn't find Scripts.txt");
2412 while (!
f.atEnd()) {
2431 codePoints.
replace(
"..",
".");
2435 int first = cl[0].toInt(&
ok, 16);
2438 if (cl.
size() == 2) {
2439 last = cl[1].toInt(&
ok, 16);
2443 if (!scriptMap.contains(scriptName))
2444 qFatal(
"Unhandled script property value: %s", scriptName.
constData());
2447 for (
int codepoint =
first; codepoint <= last; ++codepoint) {
2456 static void readIdnaMappingTable()
2458 qDebug(
"Reading IdnaMappingTable.txt");
2460 QFile f(
"data/IdnaMappingTable.txt");
2462 qFatal(
"Couldn't find or read IdnaMappingTable.txt");
2464 while (!
f.atEnd()) {
2481 const QByteArray statusString = fields[1].trimmed();
2482 if (!idnaStatusMap.contains(statusString))
2483 qFatal(
"Unhandled IDNA status property value for %s: %s",
2484 qPrintable(codePoints), qPrintable(statusString));
2485 IdnaRawStatus rawStatus = idnaStatusMap.value(statusString);
2488 const int first = cl[0].toInt(&
ok, 16);
2489 const int last =
ok && cl.
size() == 2 ? cl[1].toInt(&
ok, 16) :
first;
2494 switch (rawStatus) {
2506 for (
const auto &
s : fields[2].
trimmed().split(
' ')) {
2509 int val =
s.toInt(&
ok, 16);
2519 qDebug() <<
" Empty IDNA mapping for" << codePoints;
2525 for (
int codepoint =
first; codepoint <= last; ++codepoint) {
2533 if (codepoint >= 0x80 && !
mapping.isEmpty())
2534 idnaMappingTable[codepoint] =
mapping;
2550 static void resolveIdnaStatus()
2552 qDebug(
"resolveIdnaStatus:");
2570 Q_ASSERT(idnaMappingTable.contains(codepoint));
2571 const auto &
mapping = idnaMappingTable[codepoint];
2586 idnaMappingTable.remove(codepoint);
2633 return a.size() == b.size() ? a > b : a.size() > b.size();
2637 for (
auto i = inputs.
begin() + 1;
i != inputs.
end();) {
2639 return s.contains(*i);
2641 i = isSubstring ? inputs.
erase(
i) :
i + 1;
2657 std::sort(graphEdges.
begin(), graphEdges.
end(), [](
const auto &
a,
const auto &
b) {
2658 return a.overlap == b.overlap
2659 ? a.start == b.start ? a.end < b.end : a.start < b.start
2660 : a.overlap > b.overlap;
2668 if (!starts[edge.end] || !ends[edge.start])
2676 return node == edge.start;
2679 for (
const auto &edge : graphEdges) {
2680 if (!starts[edge.start] && !ends[edge.end] && !createsCycle(edge)) {
2681 starts.setBit(edge.start);
2682 ends.setBit(edge.end);
2683 pathEdges[edge.start] = edge;
2684 if (pathEdges.
size() == inputs.
size() - 1)
2690 Q_ASSERT(starts.count(
false) == 1);
2694 while (
node < ends.size() && ends[
node])
2702 const auto &edge = pathEdges[
node];
2729 qDebug(
"createIdnaMapping:");
2732 values.reserve(idnaMappingTable.size());
2735 for (
const auto &
v : idnaMappingTable.values()) {
2738 uncompressedSize +=
v.size();
2743 qDebug() <<
" uncompressed size:" << uncompressedSize <<
"characters";
2744 qDebug() <<
" consolidated size:" << idnaMappingData.
size() <<
"characters";
2749 "static constexpr char16_t idnaMappingData[] = {";
2752 for (
auto c : idnaMappingData) {
2756 col = (col + 1) % 12;
2762 Q_ASSERT(idnaMappingData.size() < (1 << 16));
2767 "struct IdnaMapEntry {\n"
2768 " // 21 bits suffice for any valid code-point (LastValidCodePoint = 0x10ffff)\n"
2769 " unsigned codePoint : 24;\n"
2770 " unsigned size : 8;\n"
2771 " char16_t ucs[2]; // ucs[0] is offset if size > 2\n"
2773 "static_assert(sizeof(IdnaMapEntry) == 8);\n\n"
2774 "static constexpr IdnaMapEntry idnaMap[] = {\n";
2776 for (
auto i = idnaMappingTable.keyValueBegin(); i != idnaMappingTable.keyValueEnd(); i++) {
2799 qDebug() <<
" memory usage:" << memoryUsage <<
"bytes";
2803 "Q_CORE_EXPORT QStringView QT_FASTCALL idnaMapping(char32_t ucs4) noexcept\n"
2805 " auto i = std::lower_bound(std::begin(idnaMap), std::end(idnaMap), ucs4,\n"
2806 " [](const auto &p, char32_t c) { return p.codePoint < c; });\n"
2807 " if (i == std::end(idnaMap) || i->codePoint != ucs4)\n"
2809 " return QStringView(i->size > 2 ? idnaMappingData + i->ucs[0] : i->ucs, i->size);\n"
2816 static void dump(
int from,
int to)
2818 for (
int i = from;
i <= to; ++
i) {
2820 qDebug(
"0x%04x: cat=%d combining=%d dir=%d case=%x mirror=%x joining=%d age=%d",
2821 i,
d.p.category,
d.p.combiningClass,
d.p.direction,
d.otherCase,
d.mirroredChar,
d.p.joining,
d.p.age);
2823 qDebug(
" decomposition: type=%d, length=%d, first=%x",
d.decompositionType,
d.decomposition.size(),
2824 d.decomposition[0]);
2833 static void computeUniqueProperties()
2835 qDebug(
"computeUniqueProperties:");
2838 int index = uniqueProperties.indexOf(
d.p);
2840 index = uniqueProperties.size();
2841 uniqueProperties.append(
d.p);
2845 qDebug(
" %zd unique unicode properties found", ssize_t(uniqueProperties.size()));
2860 qDebug(
"createPropertyInfo:");
2863 Q_ASSERT(maxMirroredDiff < (1<<12));
2864 Q_ASSERT(maxLowerCaseDiff < (1<<13));
2865 Q_ASSERT(maxUpperCaseDiff < (1<<13));
2866 Q_ASSERT(maxTitleCaseDiff < (1<<13));
2867 Q_ASSERT(maxCaseFoldDiff < (1<<13));
2869 const int BMP_BLOCKSIZE = 32;
2870 const int BMP_SHIFT = 5;
2871 const int BMP_END = 0x11000;
2872 const int SMP_END = 0x110000;
2873 const int SMP_BLOCKSIZE = 256;
2874 const int SMP_SHIFT = 8;
2884 b.values.reserve(BMP_BLOCKSIZE);
2885 for (
int i = 0;
i < BMP_BLOCKSIZE; ++
i) {
2886 int uc =
block*BMP_BLOCKSIZE +
i;
2888 b.values.append(
d.propertyIndex);
2894 used += BMP_BLOCKSIZE;
2899 int bmp_blocks = uniqueBlocks.
size();
2903 for (
int block = BMP_END/SMP_BLOCKSIZE;
block < SMP_END/SMP_BLOCKSIZE; ++
block) {
2905 b.values.reserve(SMP_BLOCKSIZE);
2906 for (
int i = 0;
i < SMP_BLOCKSIZE; ++
i) {
2907 int uc =
block*SMP_BLOCKSIZE +
i;
2909 b.values.append(
d.propertyIndex);
2915 used += SMP_BLOCKSIZE;
2920 int smp_blocks = uniqueBlocks.
size() - bmp_blocks;
2922 int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*
sizeof(
unsigned short);
2923 int bmp_trie = BMP_END/BMP_BLOCKSIZE*
sizeof(
unsigned short);
2924 int bmp_mem = bmp_block_data + bmp_trie;
2925 qDebug(
" %d unique blocks in BMP.", bmp_blocks);
2926 qDebug(
" block data uses: %d bytes", bmp_block_data);
2927 qDebug(
" trie data uses : %d bytes", bmp_trie);
2929 int smp_block_data = smp_blocks*SMP_BLOCKSIZE*
sizeof(
unsigned short);
2930 int smp_trie = (SMP_END-BMP_END)/SMP_BLOCKSIZE*
sizeof(
unsigned short);
2931 int smp_mem = smp_block_data + smp_trie;
2932 qDebug(
" %d unique blocks in SMP.", smp_blocks);
2933 qDebug(
" block data uses: %d bytes", smp_block_data);
2934 qDebug(
" trie data uses : %d bytes", smp_trie);
2936 int prop_data = uniqueProperties.size() * SizeOfPropertiesStruct;
2937 qDebug(
"\n properties data uses : %d bytes", prop_data);
2938 qDebug(
" memory usage: %d bytes", bmp_mem + smp_mem + prop_data);
2940 Q_ASSERT(blockMap.
size() == BMP_END/BMP_BLOCKSIZE +(SMP_END-BMP_END)/SMP_BLOCKSIZE);
2941 Q_ASSERT(blockMap.
last() + blockMap.
size() < (1<<(
sizeof(
unsigned short)*8)));
2943 QByteArray out =
"static constexpr unsigned short uc_property_trie[] = {\n";
2946 for (
int i = 0;
i < BMP_END/BMP_BLOCKSIZE; ++
i) {
2948 if (
out.endsWith(
' '))
2950 if (!((i*BMP_BLOCKSIZE) % 0x1000))
2957 if (
out.endsWith(
' '))
2960 for (
int i = BMP_END/BMP_BLOCKSIZE;
i < blockMap.
size(); ++
i) {
2962 if (
out.endsWith(
' '))
2964 if (!(i % (0x10000/SMP_BLOCKSIZE)))
2971 if (
out.endsWith(
' '))
2977 for (
int i = 0;
i < uniqueBlocks.
size(); ++
i) {
2978 if (
out.endsWith(
' '))
2982 for (
int j = 0;
j <
b.values.size(); ++
j) {
2984 if (
out.endsWith(
' '))
2992 if (
out.endsWith(
", "))
2996 out +=
"static constexpr Properties uc_properties[] = {";
2998 for (
int i = 0;
i < uniqueProperties.size(); ++
i) {
3068 if (
out.endsWith(
','))
3072 out +=
"Q_DECL_CONST_FUNCTION static inline const Properties *qGetProp(char32_t ucs4) noexcept\n"
3074 " Q_ASSERT(ucs4 <= QChar::LastValidCodePoint);\n"
3076 " return uc_properties + uc_property_trie[uc_property_trie[ucs4 >> "
3080 " return uc_properties\n"
3081 " + uc_property_trie[uc_property_trie[((ucs4 - 0x"
3088 "Q_DECL_CONST_FUNCTION static inline const Properties *qGetProp(char16_t ucs2) noexcept\n"
3090 " return uc_properties + uc_property_trie[uc_property_trie[ucs2 >> "
3095 "Q_DECL_CONST_FUNCTION Q_CORE_EXPORT const Properties * QT_FASTCALL properties(char32_t ucs4) noexcept\n"
3097 " return qGetProp(ucs4);\n"
3100 "Q_DECL_CONST_FUNCTION Q_CORE_EXPORT const Properties * QT_FASTCALL properties(char16_t ucs2) noexcept\n"
3102 " return qGetProp(ucs2);\n"
3105 out +=
"Q_CORE_EXPORT GraphemeBreakClass QT_FASTCALL graphemeBreakClass(char32_t ucs4) noexcept\n"
3107 " return static_cast<GraphemeBreakClass>(qGetProp(ucs4)->graphemeBreakClass);\n"
3110 "Q_CORE_EXPORT WordBreakClass QT_FASTCALL wordBreakClass(char32_t ucs4) noexcept\n"
3112 " return static_cast<WordBreakClass>(qGetProp(ucs4)->wordBreakClass);\n"
3115 "Q_CORE_EXPORT SentenceBreakClass QT_FASTCALL sentenceBreakClass(char32_t ucs4) noexcept\n"
3117 " return static_cast<SentenceBreakClass>(qGetProp(ucs4)->sentenceBreakClass);\n"
3120 "Q_CORE_EXPORT LineBreakClass QT_FASTCALL lineBreakClass(char32_t ucs4) noexcept\n"
3122 " return static_cast<LineBreakClass>(qGetProp(ucs4)->lineBreakClass);\n"
3125 "Q_CORE_EXPORT IdnaStatus QT_FASTCALL idnaStatus(char32_t ucs4) noexcept\n"
3127 " return static_cast<IdnaStatus>(qGetProp(ucs4)->idnaStatus);\n"
3130 "Q_CORE_EXPORT EastAsianWidth QT_FASTCALL eastAsianWidth(char32_t ucs4) noexcept\n"
3132 " return static_cast<EastAsianWidth>(qGetProp(ucs4)->eastAsianWidth);\n"
3141 qDebug(
"createSpecialCaseMap:");
3144 =
"static constexpr unsigned short specialCaseMap[] = {\n"
3145 " 0x0, // placeholder";
3149 while (i < specialCaseMap.
size()) {
3151 int n = specialCaseMap.
at(i);
3152 for (
int j = 0;
j <=
n; ++
j) {
3157 maxN = std::max(maxN,
n);
3160 out +=
"\n};\n\nconstexpr unsigned int MaxSpecialCaseLength = ";
3164 qDebug(
" memory usage: %zd bytes", ssize_t(specialCaseMap.
size() *
sizeof(
unsigned short)));
3172 qDebug(
"createCompositionInfo: highestComposedCharacter=0x%x", highestComposedCharacter);
3174 const int BMP_BLOCKSIZE = 16;
3175 const int BMP_SHIFT = 4;
3176 const int BMP_END = 0x3400;
3177 const int SMP_END = 0x30000;
3178 const int SMP_BLOCKSIZE = 256;
3179 const int SMP_SHIFT = 8;
3181 if (SMP_END <= highestComposedCharacter)
3182 qFatal(
"end of table smaller than highest composed character 0x%x", highestComposedCharacter);
3193 b.values.reserve(BMP_BLOCKSIZE);
3194 for (
int i = 0;
i < BMP_BLOCKSIZE; ++
i) {
3195 int uc =
block*BMP_BLOCKSIZE +
i;
3197 if (!
d.decomposition.isEmpty()) {
3198 int utf16Length = 0;
3199 decompositions.
append(0);
3200 for (
int j = 0;
j <
d.decomposition.size(); ++
j) {
3201 int code =
d.decomposition.at(
j);
3212 decompositions[tableIndex] =
d.decompositionType + (utf16Length<<8);
3213 b.values.append(tableIndex);
3214 tableIndex += utf16Length + 1;
3216 b.values.append(0xffff);
3223 used += BMP_BLOCKSIZE;
3228 int bmp_blocks = uniqueBlocks.
size();
3230 for (
int block = BMP_END/SMP_BLOCKSIZE;
block < SMP_END/SMP_BLOCKSIZE; ++
block) {
3232 b.values.reserve(SMP_BLOCKSIZE);
3233 for (
int i = 0;
i < SMP_BLOCKSIZE; ++
i) {
3234 int uc =
block*SMP_BLOCKSIZE +
i;
3236 if (!
d.decomposition.isEmpty()) {
3237 int utf16Length = 0;
3238 decompositions.
append(0);
3239 for (
int j = 0;
j <
d.decomposition.size(); ++
j) {
3240 int code =
d.decomposition.at(
j);
3251 decompositions[tableIndex] =
d.decompositionType + (utf16Length<<8);
3252 b.values.append(tableIndex);
3253 tableIndex += utf16Length + 1;
3255 b.values.append(0xffff);
3262 used += SMP_BLOCKSIZE;
3267 int smp_blocks = uniqueBlocks.
size() - bmp_blocks;
3272 int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*
sizeof(
unsigned short);
3273 int bmp_trie = BMP_END/BMP_BLOCKSIZE*
sizeof(
unsigned short);
3274 int bmp_mem = bmp_block_data + bmp_trie;
3275 qDebug(
" %d unique blocks in BMP.", bmp_blocks);
3276 qDebug(
" block data uses: %d bytes", bmp_block_data);
3277 qDebug(
" trie data uses : %d bytes", bmp_trie);
3279 int smp_block_data = smp_blocks*SMP_BLOCKSIZE*
sizeof(
unsigned short);
3280 int smp_trie = (SMP_END-BMP_END)/SMP_BLOCKSIZE*
sizeof(
unsigned short);
3281 int smp_mem = smp_block_data + smp_trie;
3282 qDebug(
" %d unique blocks in SMP.", smp_blocks);
3283 qDebug(
" block data uses: %d bytes", smp_block_data);
3284 qDebug(
" trie data uses : %d bytes", smp_trie);
3286 int decomposition_data = decompositions.
size() * 2;
3287 qDebug(
"\n decomposition data uses : %d bytes", decomposition_data);
3288 qDebug(
" memory usage: %d bytes", bmp_mem + smp_mem + decomposition_data);
3290 Q_ASSERT(blockMap.
last() + blockMap.
size() < (1<<(
sizeof(
unsigned short)*8)));
3292 QByteArray out =
"static constexpr unsigned short uc_decomposition_trie[] = {\n";
3295 for (
int i = 0;
i < BMP_END/BMP_BLOCKSIZE; ++
i) {
3297 if (
out.endsWith(
' '))
3299 if (!((i*BMP_BLOCKSIZE) % 0x1000))
3306 if (
out.endsWith(
' '))
3309 for (
int i = BMP_END/BMP_BLOCKSIZE;
i < blockMap.
size(); ++
i) {
3311 if (
out.endsWith(
' '))
3313 if (!(i % (0x10000/SMP_BLOCKSIZE)))
3320 if (
out.endsWith(
' '))
3324 for (
int i = 0;
i < uniqueBlocks.
size(); ++
i) {
3325 if (
out.endsWith(
' '))
3329 for (
int j = 0;
j <
b.values.size(); ++
j) {
3331 if (
out.endsWith(
' '))
3339 if (
out.endsWith(
' '))
3343 out +=
"#define GET_DECOMPOSITION_INDEX(ucs4) \\\n"
3345 " ? (uc_decomposition_trie[uc_decomposition_trie[ucs4 >> "
3349 " ? uc_decomposition_trie[uc_decomposition_trie[((ucs4 - 0x"
3356 out +=
"static constexpr unsigned short uc_decomposition_map[] = {";
3357 for (
int i = 0;
i < decompositions.
size(); ++
i) {
3359 if (
out.endsWith(
' '))
3366 if (
out.endsWith(
' '))
3375 qDebug(
"createLigatureInfo: numLigatures=%d, highestLigature=0x%x", numLigatures, highestLigature);
3377 for (
int i = 0;
i < ligatureHashes.size(); ++
i) {
3379 for (
int j = 0;
j <
l.size(); ++
j) {
3386 const int BMP_BLOCKSIZE = 32;
3387 const int BMP_SHIFT = 5;
3388 const int BMP_END = 0x3100;
3389 const int SMP_END = 0x12000;
3390 const int SMP_BLOCKSIZE = 256;
3391 const int SMP_SHIFT = 8;
3393 if (SMP_END <= highestLigature)
3394 qFatal(
"end of table smaller than highest ligature character 0x%x", highestLigature);
3405 b.values.reserve(BMP_BLOCKSIZE);
3406 for (
int i = 0;
i < BMP_BLOCKSIZE; ++
i) {
3407 int uc =
block*BMP_BLOCKSIZE +
i;
3411 std::sort(
l.begin(),
l.end());
3414 for (
int j = 0;
j <
l.size(); ++
j) {
3416 ligatures.
append(
l.at(
j).ligature);
3418 b.values.append(tableIndex);
3419 tableIndex += 2*
l.size() + 1;
3421 b.values.append(0xffff);
3428 used += BMP_BLOCKSIZE;
3433 int bmp_blocks = uniqueBlocks.
size();
3435 for (
int block = BMP_END/SMP_BLOCKSIZE;
block < SMP_END/SMP_BLOCKSIZE; ++
block) {
3437 b.values.reserve(SMP_BLOCKSIZE);
3438 for (
int i = 0;
i < SMP_BLOCKSIZE; ++
i) {
3439 int uc =
block*SMP_BLOCKSIZE +
i;
3443 std::sort(
l.begin(),
l.end());
3446 for (
int j = 0;
j <
l.size(); ++
j) {
3452 b.values.append(tableIndex);
3453 tableIndex += 4*
l.size() + 1;
3455 b.values.append(0xffff);
3462 used += SMP_BLOCKSIZE;
3467 int smp_blocks = uniqueBlocks.
size() - bmp_blocks;
3472 int bmp_block_data = bmp_blocks*BMP_BLOCKSIZE*
sizeof(
unsigned short);
3473 int bmp_trie = BMP_END/BMP_BLOCKSIZE*
sizeof(
unsigned short);
3474 int bmp_mem = bmp_block_data + bmp_trie;
3475 qDebug(
" %d unique blocks in BMP.", bmp_blocks);
3476 qDebug(
" block data uses: %d bytes", bmp_block_data);
3477 qDebug(
" trie data uses : %d bytes", bmp_trie);
3479 int smp_block_data = smp_blocks*SMP_BLOCKSIZE*
sizeof(
unsigned short);
3480 int smp_trie = (SMP_END-BMP_END)/SMP_BLOCKSIZE*
sizeof(
unsigned short);
3481 int smp_mem = smp_block_data + smp_trie;
3482 qDebug(
" %d unique blocks in SMP.", smp_blocks);
3483 qDebug(
" block data uses: %d bytes", smp_block_data);
3484 qDebug(
" trie data uses : %d bytes", smp_trie);
3486 int ligature_data = ligatures.
size() * 2;
3487 qDebug(
"\n ligature data uses : %d bytes", ligature_data);
3488 qDebug(
" memory usage: %d bytes", bmp_mem + smp_mem + ligature_data);
3490 Q_ASSERT(blockMap.
last() + blockMap.
size() < (1<<(
sizeof(
unsigned short)*8)));
3492 QByteArray out =
"static constexpr unsigned short uc_ligature_trie[] = {\n";
3495 for (
int i = 0;
i < BMP_END/BMP_BLOCKSIZE; ++
i) {
3497 if (
out.endsWith(
' '))
3499 if (!((i*BMP_BLOCKSIZE) % 0x1000))
3506 if (
out.endsWith(
' '))
3509 for (
int i = BMP_END/BMP_BLOCKSIZE;
i < blockMap.
size(); ++
i) {
3511 if (
out.endsWith(
' '))
3513 if (!(i % (0x10000/SMP_BLOCKSIZE)))
3520 if (
out.endsWith(
' '))
3524 for (
int i = 0;
i < uniqueBlocks.
size(); ++
i) {
3525 if (
out.endsWith(
' '))
3529 for (
int j = 0;
j <
b.values.size(); ++
j) {
3531 if (
out.endsWith(
' '))
3539 if (
out.endsWith(
' '))
3543 out +=
"#define GET_LIGATURE_INDEX(ucs4) \\\n"
3545 " ? (uc_ligature_trie[uc_ligature_trie[ucs4 >> "
3549 " ? uc_ligature_trie[uc_ligature_trie[((ucs4 - 0x"
3556 out +=
"static constexpr unsigned short uc_ligature_map[] = {";
3557 for (
int i = 0;
i < ligatures.
size(); ++
i) {
3559 if (
out.endsWith(
' '))
3566 if (
out.endsWith(
' '))
3576 =
"struct CasingInfo {\n"
3577 " uint codePoint : 16;\n"
3578 " uint flags : 8;\n"
3579 " uint offset : 8;\n"
3589 initEastAsianWidthMap();
3591 initDecompositionMap();
3594 initGraphemeBreak();
3596 initSentenceBreak();
3599 initIdnaStatusMap();
3602 readBidiMirroring();
3603 readArabicShaping();
3605 readEastAsianWidth();
3606 readDerivedNormalizationProps();
3607 readSpecialCasing();
3611 readGraphemeBreak();
3614 readSentenceBreak();
3616 readIdnaMappingTable();
3618 resolveIdnaStatus();
3620 computeUniqueProperties();
3621 QByteArray properties = createPropertyInfo();
3622 QByteArray specialCases = createSpecialCaseMap();
3623 QByteArray compositions = createCompositionInfo();
3625 QByteArray normalizationCorrections = createNormalizationCorrections();
3629 "/****************************************************************************\n"
3631 "** Copyright (C) 2020 The Qt Company Ltd.\n"
3632 "** Contact: https://www.qt.io/licensing/\n"
3634 "** This file is part of the QtCore module of the Qt Toolkit.\n"
3636 "** $QT_BEGIN_LICENSE:LGPL$\n"
3637 "** Commercial License Usage\n"
3638 "** Licensees holding valid commercial Qt licenses may use this file in\n"
3639 "** accordance with the commercial license agreement provided with the\n"
3640 "** Software or, alternatively, in accordance with the terms contained in\n"
3641 "** a written agreement between you and The Qt Company. For licensing terms\n"
3642 "** and conditions see https://www.qt.io/terms-conditions. For further\n"
3643 "** information use the contact form at https://www.qt.io/contact-us.\n"
3645 "** GNU Lesser General Public License Usage\n"
3646 "** Alternatively, this file may be used under the terms of the GNU Lesser\n"
3647 "** General Public License version 3 as published by the Free Software\n"
3648 "** Foundation and appearing in the file LICENSE.LGPL3 included in the\n"
3649 "** packaging of this file. Please review the following information to\n"
3650 "** ensure the GNU Lesser General Public License version 3 requirements\n"
3651 "** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.\n"
3653 "** GNU General Public License Usage\n"
3654 "** Alternatively, this file may be used under the terms of the GNU\n"
3655 "** General Public License version 2.0 or (at your option) the GNU General\n"
3656 "** Public license version 3 or any later version approved by the KDE Free\n"
3657 "** Qt Foundation. The licenses are as published by the Free Software\n"
3658 "** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3\n"
3659 "** included in the packaging of this file. Please review the following\n"
3660 "** information to ensure the GNU General Public License requirements will\n"
3661 "** be met: https://www.gnu.org/licenses/gpl-2.0.html and\n"
3662 "** https://www.gnu.org/licenses/gpl-3.0.html.\n"
3664 "** $QT_END_LICENSE$\n"
3666 "****************************************************************************/\n\n";
3669 "/* This file is autogenerated from the Unicode " DATA_VERSION_S " database. Do not edit */\n\n";
3673 "// W A R N I N G\n"
3674 "// -------------\n"
3676 "// This file is not part of the Qt API. It exists for the convenience\n"
3677 "// of internal files. This header file may change from version to version\n"
3678 "// without notice, or even be removed.\n"
3683 QFile f(
"../../src/corelib/text/qunicodetables.cpp");
3687 f.write(
"#include \"qunicodetables_p.h\"\n\n");
3688 f.write(
"QT_BEGIN_NAMESPACE\n\n");
3689 f.write(
"namespace QUnicodeTables {\n\n");
3690 f.write(properties);
3691 f.write(specialCases);
3692 f.write(compositions);
3695 f.write(normalizationCorrections);
3697 f.write(
"} // namespace QUnicodeTables\n\n");
3698 f.write(
"using namespace QUnicodeTables;\n\n");
3699 f.write(
"QT_END_NAMESPACE\n");
3702 f.setFileName(
"../../src/corelib/text/qunicodetables_p.h");
3707 f.write(
"#ifndef QUNICODETABLES_P_H\n"
3708 "#define QUNICODETABLES_P_H\n\n"
3709 "#include <QtCore/private/qglobal_p.h>\n\n"
3710 "#include <QtCore/qchar.h>\n\n"
3711 "QT_BEGIN_NAMESPACE\n\n");
3713 f.write(
"namespace QUnicodeTables {\n\n");
3714 f.write(property_string);
3715 f.write(sizeOfPropertiesStructCheck);
3716 f.write(east_asian_width_string);
3717 f.write(grapheme_break_class_string);
3718 f.write(word_break_class_string);
3719 f.write(sentence_break_class_string);
3720 f.write(line_break_class_string);
3721 f.write(idna_status_string);
3723 f.write(
"} // namespace QUnicodeTables\n\n"
3724 "QT_END_NAMESPACE\n\n"
3725 "#endif // QUNICODETABLES_P_H\n");
3728 qDebug() <<
"maxMirroredDiff = " <<
Qt::hex << maxMirroredDiff;
3729 qDebug() <<
"maxLowerCaseDiff = " <<
Qt::hex << maxLowerCaseDiff;
3730 qDebug() <<
"maxUpperCaseDiff = " <<
Qt::hex << maxUpperCaseDiff;
3731 qDebug() <<
"maxTitleCaseDiff = " <<
Qt::hex << maxTitleCaseDiff;
3732 qDebug() <<
"maxCaseFoldDiff = " <<
Qt::hex << maxCaseFoldDiff;
3739 qDebug(
"decompositionLength used:");
3740 int totalcompositions = 0;
3742 for (
int i = 1;
i < 20; ++
i) {
3743 qDebug(
" length %d used %d times",
i, decompositionLength.
value(
i, 0));
3744 totalcompositions +=
i*decompositionLength.
value(
i, 0);
3747 qDebug(
" len decomposition map %d, average length %f, num composed chars %d",
3748 totalcompositions, (
float)totalcompositions/(
float)
sum,
sum);
3749 qDebug(
"highest composed character %x", highestComposedCharacter);
3750 qDebug(
"num ligatures = %d highest=%x, maxLength=%d", numLigatures, highestLigature, longestLigature);
3752 qBubbleSort(ligatures);
3753 for (
int i = 0;
i < ligatures.
size(); ++
i)
small capitals from c petite p scientific f u
small capitals from c petite p scientific i
[1]
xD9 x84 xD8 xAD xD9 x80 xF0 x90 xAC x9A xE0 xA7 xA6 xE0 xA7 xAA xF0 x91 x84 xA4 xF0 x91 x84 x89 xF0 x91 x84 x9B xF0 x90 x8A xAB xF0 x90 x8B x89 xE2 xB2 x9E xE2 xB2 x9F xD0 xBE xD0 x9E xF0 x90 x90 x84 xF0 x90 x90 xAC xE1 x83 x98 xE1 x83 x94 xE1 x83 x90 xE1 xB2 xBF xE2 xB0 x95 xE2 xB1 x85 xCE xBF xCE x9F xE0 xA8 xA0 xE0 xA8 xB0 xE0 xA9 xA6 Kayah xEA xA4 x8D xEA xA4 x80 Khmer xE1 xA7 xA1 xE1 xA7 xAA xE0 xBB x90 Latin Subscript xE2 x82 x92 xE2 x82 x80 xEA x93 xB3 xF0 x96 xB9 xA1 xF0 x96 xB9 x9B xF0 x96 xB9 xAF xE1 x80 x9D xE1 x80 x84 xE1 x80 x82 no script
The QBitArray class provides an array of bits.
The QByteArray class provides an array of bytes.
qsizetype size() const noexcept
const char * constData() const noexcept
int toInt(bool *ok=nullptr, int base=10) const
QList< QByteArray > split(char sep) const
char at(qsizetype i) const
bool isEmpty() const noexcept
static QByteArray number(int, int base=10)
void resize(qsizetype size)
QByteArray & replace(qsizetype index, qsizetype len, const char *s, qsizetype alen)
The QChar class provides a 16-bit Unicode character.
constexpr bool isNonCharacter() const noexcept
static constexpr bool requiresSurrogates(char32_t ucs4) noexcept
static constexpr auto fromUcs4(char32_t c) noexcept
static constexpr char16_t highSurrogate(char32_t ucs4) noexcept
@ Punctuation_InitialQuote
static constexpr char16_t lowSurrogate(char32_t ucs4) noexcept
@ Script_EgyptianHieroglyphs
@ Script_InscriptionalParthian
@ Script_InscriptionalPahlavi
@ Script_AnatolianHieroglyphs
@ Script_CaucasianAlbanian
@ Script_KhitanSmallScript
@ Script_CanadianAboriginal
@ Script_MeroiticHieroglyphs
@ Script_NyiakengPuachueHmong
bool operator<(const QElapsedTimer &lhs, const QElapsedTimer &rhs) noexcept
The QFile class provides an interface for reading from and writing to files.
The QHash class is a template class that provides a hash-table-based dictionary.
bool contains(const Key &key) const noexcept
T value(const Key &key) const noexcept
qsizetype size() const noexcept
bool isEmpty() const noexcept
iterator erase(const_iterator begin, const_iterator end)
QList< T > sliced(qsizetype pos) const
const_reference at(qsizetype i) const noexcept
void prepend(rvalue_ref t)
void reserve(qsizetype size)
void replace(qsizetype i, parameter_type t)
void append(parameter_type t)
bool contains(const Key &key) const
The QString class provides a Unicode character string.
QString & replace(qsizetype i, qsizetype len, QChar after)
QStringList split(const QString &sep, Qt::SplitBehavior behavior=Qt::KeepEmptyParts, Qt::CaseSensitivity cs=Qt::CaseSensitive) const
void truncate(qsizetype pos)
QString mid(qsizetype position, qsizetype n=-1) const
const QChar at(qsizetype i) const
bool contains(QChar c, Qt::CaseSensitivity cs=Qt::CaseSensitive) const
QString & append(QChar c)
QString left(qsizetype n) const
static QString static QString qsizetype indexOf(QChar c, qsizetype from=0, Qt::CaseSensitivity cs=Qt::CaseSensitive) const
QString trimmed() const &
void resize(qsizetype size)
The QStringView class provides a unified view on UTF-16 strings with a read-only subset of the QStrin...
int main(int argc, char **argv)
QMap< QString, QString > map
[6]
Q_DECL_CONST_FUNCTION Q_CORE_EXPORT const Properties *QT_FASTCALL properties(char32_t ucs4) noexcept
Q_CORE_EXPORT QStringView QT_FASTCALL idnaMapping(char32_t ucs4) noexcept
QTextStream & hex(QTextStream &stream)
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION QByteArrayView trimmed(QByteArrayView s) noexcept
Q_CORE_EXPORT Q_DECL_PURE_FUNCTION bool startsWith(QByteArrayView haystack, QByteArrayView needle) noexcept
constexpr struct q20::ranges::@309 any_of
DBusConnection const char DBusError DBusBusType DBusError return DBusConnection DBusHandleMessageFunction void DBusFreeFunction return DBusConnection return DBusConnection return const char DBusError return DBusConnection DBusMessage dbus_uint32_t return DBusConnection dbus_bool_t DBusConnection DBusAddWatchFunction DBusRemoveWatchFunction DBusWatchToggledFunction void DBusFreeFunction return DBusConnection DBusDispatchStatusFunction void DBusFreeFunction DBusTimeout return DBusTimeout return DBusWatch return DBusWatch unsigned int return DBusError const DBusError return const DBusMessage return DBusMessage return DBusMessage return DBusMessage return DBusMessage return DBusMessage return DBusMessageIter * iter
typedef QByteArray(EGLAPIENTRYP PFNQGSGETDISPLAYSPROC)()
QT_BEGIN_INCLUDE_NAMESPACE typedef unsigned char uchar
GLenum GLuint GLenum GLsizei length
GLenum GLsizei GLsizei GLint * values
[16]
GLboolean GLboolean GLboolean b
GLsizei const GLfloat * v
[13]
GLfloat GLfloat GLfloat w
[0]
GLboolean GLboolean GLboolean GLboolean a
[7]
GLint GLsizei GLsizei GLenum GLenum GLsizei void * data
GLsizei GLenum * categories
GLenum GLenum GLenum GLenum mapping
#define Q_ASSERT_X(cond, x, msg)
QFuture< QSet< QString > > lowerCase
QTextStream out(stdout)
[7]
QHttpRequestHeader header("GET", QUrl::toPercentEncoding("/index.html"))
[1]
bool operator==(const PropertyFlags &o) const
QChar::UnicodeVersion age
LineBreakClass lineBreakClass
QChar::Direction direction
WordBreakClass wordBreakClass
GraphemeBreakClass graphemeBreakClass
SentenceBreakClass sentenceBreakClass
QChar::JoiningType joining
EastAsianWidth eastAsianWidth
qsizetype indexOf(const AT &t, qsizetype from=0) const noexcept
QList< int > decomposition
IdnaRawStatus idnaRawStatus
UnicodeData(int codepoint=0)
QList< int > specialFolding
QChar::Decomposition decompositionType
static UnicodeData & valueRef(int codepoint)
bool operator==(const UniqueBlock &other) const
void dump(QAbstractItemModel *model, QString const &indent=" - ", QModelIndex const &parent={})
@ GraphemeBreak_RegionalIndicator
@ GraphemeBreak_Unassigned
@ GraphemeBreak_SpacingMark
@ GraphemeBreak_Extended_Pictographic
@ WordBreak_RegionalIndicator
QByteArray createCasingInfo()
@ SentenceBreak_Unassigned
@ SentenceBreak_SContinue
XmlOutput::xml_output comment(const QString &text)