QtBase  v6.3.1
hb-ot-shape-complex-arabic.cc
Go to the documentation of this file.
1 /*
2  * Copyright © 2010,2012 Google, Inc.
3  *
4  * This is part of HarfBuzz, a text shaping library.
5  *
6  * Permission is hereby granted, without written agreement and without
7  * license or royalty fees, to use, copy, modify, and distribute this
8  * software and its documentation for any purpose, provided that the
9  * above copyright notice and the following two paragraphs appear in
10  * all copies of this software.
11  *
12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16  * DAMAGE.
17  *
18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20  * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23  *
24  * Google Author(s): Behdad Esfahbod
25  */
26 
27 #include "hb.hh"
28 
29 #ifndef HB_NO_OT_SHAPE
30 
32 #include "hb-ot-shape.hh"
33 
34 
35 /* buffer var allocations */
36 #define arabic_shaping_action() complex_var_u8_auxiliary() /* arabic shaping action */
37 
38 #define HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH HB_BUFFER_SCRATCH_FLAG_COMPLEX0
39 
40 /* See:
41  * https://github.com/harfbuzz/harfbuzz/commit/6e6f82b6f3dde0fc6c3c7d991d9ec6cfff57823d#commitcomment-14248516 */
42 #define HB_ARABIC_GENERAL_CATEGORY_IS_WORD(gen_cat) \
43  (FLAG_UNSAFE (gen_cat) & \
44  (FLAG (HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED) | \
45  FLAG (HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE) | \
46  /*FLAG (HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER) |*/ \
47  FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER) | \
48  FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER) | \
49  /*FLAG (HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER) |*/ \
50  /*FLAG (HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER) |*/ \
51  FLAG (HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK) | \
52  FLAG (HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK) | \
53  FLAG (HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK) | \
54  FLAG (HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER) | \
55  FLAG (HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER) | \
56  FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER) | \
57  FLAG (HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL) | \
58  FLAG (HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL) | \
59  FLAG (HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL) | \
60  FLAG (HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL)))
61 
62 
63 /*
64  * Joining types:
65  */
66 
67 /*
68  * Bits used in the joining tables
69  */
79 
81  JOINING_TYPE_X = 8 /* means: use general-category to choose between U or T. */
82 };
83 
85 
86 static unsigned int get_joining_type (hb_codepoint_t u, hb_unicode_general_category_t gen_cat)
87 {
88  unsigned int j_type = joining_type(u);
89  if (likely (j_type != JOINING_TYPE_X))
90  return j_type;
91 
92  return (FLAG_UNSAFE(gen_cat) &
97 }
98 
99 #define FEATURE_IS_SYRIAC(tag) hb_in_range<unsigned char> ((unsigned char) (tag), '2', '3')
100 
101 static const hb_tag_t arabic_features[] =
102 {
103  HB_TAG('i','s','o','l'),
104  HB_TAG('f','i','n','a'),
105  HB_TAG('f','i','n','2'),
106  HB_TAG('f','i','n','3'),
107  HB_TAG('m','e','d','i'),
108  HB_TAG('m','e','d','2'),
109  HB_TAG('i','n','i','t'),
111 };
112 
113 
114 /* Same order as the feature array */
123 
125 
127 
128  /* We abuse the same byte for other things... */
131 };
132 
133 static const struct arabic_state_table_entry {
134  uint8_t prev_action;
135  uint8_t curr_action;
136  uint16_t next_state;
137 } arabic_state_table[][NUM_STATE_MACHINE_COLS] =
138 {
139  /* jt_U, jt_L, jt_R, jt_D, jg_ALAPH, jg_DALATH_RISH */
140 
141  /* State 0: prev was U, not willing to join. */
142  { {NONE,NONE,0}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,6}, },
143 
144  /* State 1: prev was R or ISOL/ALAPH, not willing to join. */
145  { {NONE,NONE,0}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,2}, {NONE,FIN2,5}, {NONE,ISOL,6}, },
146 
147  /* State 2: prev was D/L in ISOL form, willing to join. */
148  { {NONE,NONE,0}, {NONE,ISOL,2}, {INIT,FINA,1}, {INIT,FINA,3}, {INIT,FINA,4}, {INIT,FINA,6}, },
149 
150  /* State 3: prev was D in FINA form, willing to join. */
151  { {NONE,NONE,0}, {NONE,ISOL,2}, {MEDI,FINA,1}, {MEDI,FINA,3}, {MEDI,FINA,4}, {MEDI,FINA,6}, },
152 
153  /* State 4: prev was FINA ALAPH, not willing to join. */
154  { {NONE,NONE,0}, {NONE,ISOL,2}, {MED2,ISOL,1}, {MED2,ISOL,2}, {MED2,FIN2,5}, {MED2,ISOL,6}, },
155 
156  /* State 5: prev was FIN2/FIN3 ALAPH, not willing to join. */
157  { {NONE,NONE,0}, {NONE,ISOL,2}, {ISOL,ISOL,1}, {ISOL,ISOL,2}, {ISOL,FIN2,5}, {ISOL,ISOL,6}, },
158 
159  /* State 6: prev was DALATH/RISH, not willing to join. */
160  { {NONE,NONE,0}, {NONE,ISOL,2}, {NONE,ISOL,1}, {NONE,ISOL,2}, {NONE,FIN3,5}, {NONE,ISOL,6}, }
161 };
162 
163 
164 static void
165 arabic_fallback_shape (const hb_ot_shape_plan_t *plan,
166  hb_font_t *font,
168 
169 static void
170 record_stch (const hb_ot_shape_plan_t *plan,
171  hb_font_t *font,
173 
174 static void
175 collect_features_arabic (hb_ot_shape_planner_t *plan)
176 {
177  hb_ot_map_builder_t *map = &plan->map;
178 
179  /* We apply features according to the Arabic spec, with pauses
180  * in between most.
181  *
182  * The pause between init/medi/... and rlig is required. See eg:
183  * https://bugzilla.mozilla.org/show_bug.cgi?id=644184
184  *
185  * The pauses between init/medi/... themselves are not necessarily
186  * needed as only one of those features is applied to any character.
187  * The only difference it makes is when fonts have contextual
188  * substitutions. We now follow the order of the spec, which makes
189  * for better experience if that's what Uniscribe is doing.
190  *
191  * At least for Arabic, looks like Uniscribe has a pause between
192  * rlig and calt. Otherwise the IranNastaliq's ALLAH ligature won't
193  * work. However, testing shows that rlig and calt are applied
194  * together for Mongolian in Uniscribe. As such, we only add a
195  * pause for Arabic, not other scripts.
196  *
197  * A pause after calt is required to make KFGQPC Uthmanic Script HAFS
198  * work correctly. See https://github.com/harfbuzz/harfbuzz/issues/505
199  */
200 
201 
202  map->enable_feature (HB_TAG('s','t','c','h'));
203  map->add_gsub_pause (record_stch);
204 
205  map->enable_feature (HB_TAG('c','c','m','p'));
206  map->enable_feature (HB_TAG('l','o','c','l'));
207 
208  map->add_gsub_pause (nullptr);
209 
210  for (unsigned int i = 0; i < ARABIC_NUM_FEATURES; i++)
211  {
212  bool has_fallback = plan->props.script == HB_SCRIPT_ARABIC && !FEATURE_IS_SYRIAC (arabic_features[i]);
213  map->add_feature (arabic_features[i], has_fallback ? F_HAS_FALLBACK : F_NONE);
214  map->add_gsub_pause (nullptr);
215  }
216 
217  /* Normally, Unicode says a ZWNJ means "don't ligate". In Arabic script
218  * however, it says a ZWJ should also mean "don't ligate". So we run
219  * the main ligating features as MANUAL_ZWJ. */
220 
221  map->enable_feature (HB_TAG('r','l','i','g'), F_MANUAL_ZWJ | F_HAS_FALLBACK);
222 
223  if (plan->props.script == HB_SCRIPT_ARABIC)
224  map->add_gsub_pause (arabic_fallback_shape);
225 
226  /* No pause after rclt. See 98460779bae19e4d64d29461ff154b3527bf8420. */
227  map->enable_feature (HB_TAG('r','c','l','t'), F_MANUAL_ZWJ);
228  map->enable_feature (HB_TAG('c','a','l','t'), F_MANUAL_ZWJ);
229  map->add_gsub_pause (nullptr);
230 
231  /* The spec includes 'cswh'. Earlier versions of Windows
232  * used to enable this by default, but testing suggests
233  * that Windows 8 and later do not enable it by default,
234  * and spec now says 'Off by default'.
235  * We disabled this in ae23c24c32.
236  * Note that IranNastaliq uses this feature extensively
237  * to fixup broken glyph sequences. Oh well...
238  * Test case: U+0643,U+0640,U+0631. */
239  //map->enable_feature (HB_TAG('c','s','w','h'));
240  map->enable_feature (HB_TAG('m','s','e','t'));
241 }
242 
244 
246 {
247  /* The "+ 1" in the next array is to accommodate for the "NONE" command,
248  * which is not an OpenType feature, but this simplifies the code by not
249  * having to do a "if (... < NONE) ..." and just rely on the fact that
250  * mask_array[NONE] == 0. */
252 
254 
255  unsigned int do_fallback : 1;
256  unsigned int has_stch : 1;
257 };
258 
259 void *
261 {
262  arabic_shape_plan_t *arabic_plan = (arabic_shape_plan_t *) hb_calloc (1, sizeof (arabic_shape_plan_t));
263  if (unlikely (!arabic_plan))
264  return nullptr;
265 
266  arabic_plan->do_fallback = plan->props.script == HB_SCRIPT_ARABIC;
267  arabic_plan->has_stch = !!plan->map.get_1_mask (HB_TAG ('s','t','c','h'));
268  for (unsigned int i = 0; i < ARABIC_NUM_FEATURES; i++) {
269  arabic_plan->mask_array[i] = plan->map.get_1_mask (arabic_features[i]);
270  arabic_plan->do_fallback = arabic_plan->do_fallback &&
271  (FEATURE_IS_SYRIAC (arabic_features[i]) ||
272  plan->map.needs_fallback (arabic_features[i]));
273  }
274 
275  return arabic_plan;
276 }
277 
278 void
280 {
281  arabic_shape_plan_t *arabic_plan = (arabic_shape_plan_t *) data;
282 
283  arabic_fallback_plan_destroy (arabic_plan->fallback_plan);
284 
285  hb_free (data);
286 }
287 
288 static void
289 arabic_joining (hb_buffer_t *buffer)
290 {
291  unsigned int count = buffer->len;
292  hb_glyph_info_t *info = buffer->info;
293  unsigned int prev = UINT_MAX, state = 0;
294 
295  /* Check pre-context */
296  for (unsigned int i = 0; i < buffer->context_len[0]; i++)
297  {
298  unsigned int this_type = get_joining_type (buffer->context[0][i], buffer->unicode->general_category (buffer->context[0][i]));
299 
300  if (unlikely (this_type == JOINING_TYPE_T))
301  continue;
302 
303  const arabic_state_table_entry *entry = &arabic_state_table[state][this_type];
304  state = entry->next_state;
305  break;
306  }
307 
308  for (unsigned int i = 0; i < count; i++)
309  {
310  unsigned int this_type = get_joining_type (info[i].codepoint, _hb_glyph_info_get_general_category (&info[i]));
311 
312  if (unlikely (this_type == JOINING_TYPE_T)) {
313  info[i].arabic_shaping_action() = NONE;
314  continue;
315  }
316 
317  const arabic_state_table_entry *entry = &arabic_state_table[state][this_type];
318 
319  if (entry->prev_action != NONE && prev != UINT_MAX)
320  {
321  info[prev].arabic_shaping_action() = entry->prev_action;
322  buffer->unsafe_to_break (prev, i + 1);
323  }
324  else
325  {
326  if (prev == UINT_MAX)
327  {
328  if (this_type >= JOINING_TYPE_R)
329  buffer->unsafe_to_concat_from_outbuffer (0, i + 1);
330  }
331  else
332  {
333  if (this_type >= JOINING_TYPE_R ||
334  (2 <= state && state <= 5) /* States that have a possible prev_action. */)
335  buffer->unsafe_to_concat (prev, i + 1);
336  }
337  }
338 
339  info[i].arabic_shaping_action() = entry->curr_action;
340 
341  prev = i;
342  state = entry->next_state;
343  }
344 
345  for (unsigned int i = 0; i < buffer->context_len[1]; i++)
346  {
347  unsigned int this_type = get_joining_type (buffer->context[1][i], buffer->unicode->general_category (buffer->context[1][i]));
348 
349  if (unlikely (this_type == JOINING_TYPE_T))
350  continue;
351 
352  const arabic_state_table_entry *entry = &arabic_state_table[state][this_type];
353  if (entry->prev_action != NONE && prev != UINT_MAX)
354  {
355  info[prev].arabic_shaping_action() = entry->prev_action;
356  buffer->unsafe_to_break (prev, buffer->len);
357  }
358  else if (2 <= state && state <= 5) /* States that have a possible prev_action. */
359  {
360  buffer->unsafe_to_concat (prev, buffer->len);
361  }
362  break;
363  }
364 }
365 
366 static void
367 mongolian_variation_selectors (hb_buffer_t *buffer)
368 {
369  /* Copy arabic_shaping_action() from base to Mongolian variation selectors. */
370  unsigned int count = buffer->len;
371  hb_glyph_info_t *info = buffer->info;
372  for (unsigned int i = 1; i < count; i++)
373  if (unlikely (hb_in_ranges<hb_codepoint_t> (info[i].codepoint, 0x180Bu, 0x180Du, 0x180Fu, 0x180Fu)))
374  info[i].arabic_shaping_action() = info[i - 1].arabic_shaping_action();
375 }
376 
377 void
381 {
383 
384  arabic_joining (buffer);
386  mongolian_variation_selectors (buffer);
387 
388  unsigned int count = buffer->len;
389  hb_glyph_info_t *info = buffer->info;
390  for (unsigned int i = 0; i < count; i++)
391  info[i].mask |= arabic_plan->mask_array[info[i].arabic_shaping_action()];
392 }
393 
394 static void
395 setup_masks_arabic (const hb_ot_shape_plan_t *plan,
398 {
399  const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data;
400  setup_masks_arabic_plan (arabic_plan, buffer, plan->props.script);
401 }
402 
403 static void
404 arabic_fallback_shape (const hb_ot_shape_plan_t *plan,
405  hb_font_t *font,
407 {
408 #ifdef HB_NO_OT_SHAPE_COMPLEX_ARABIC_FALLBACK
409  return;
410 #endif
411 
412  const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data;
413 
414  if (!arabic_plan->do_fallback)
415  return;
416 
417 retry:
418  arabic_fallback_plan_t *fallback_plan = arabic_plan->fallback_plan;
419  if (unlikely (!fallback_plan))
420  {
421  /* This sucks. We need a font to build the fallback plan... */
422  fallback_plan = arabic_fallback_plan_create (plan, font);
423  if (unlikely (!arabic_plan->fallback_plan.cmpexch (nullptr, fallback_plan)))
424  {
425  arabic_fallback_plan_destroy (fallback_plan);
426  goto retry;
427  }
428  }
429 
430  arabic_fallback_plan_shape (fallback_plan, font, buffer);
431 }
432 
433 /*
434  * Stretch feature: "stch".
435  * See example here:
436  * https://docs.microsoft.com/en-us/typography/script-development/syriac
437  * We implement this in a generic way, such that the Arabic subtending
438  * marks can use it as well.
439  */
440 
441 static void
442 record_stch (const hb_ot_shape_plan_t *plan,
445 {
446  const arabic_shape_plan_t *arabic_plan = (const arabic_shape_plan_t *) plan->data;
447  if (!arabic_plan->has_stch)
448  return;
449 
450  /* 'stch' feature was just applied. Look for anything that multiplied,
451  * and record it for stch treatment later. Note that rtlm, frac, etc
452  * are applied before stch, but we assume that they didn't result in
453  * anything multiplying into 5 pieces, so it's safe-ish... */
454 
455  unsigned int count = buffer->len;
456  hb_glyph_info_t *info = buffer->info;
457  for (unsigned int i = 0; i < count; i++)
458  if (unlikely (_hb_glyph_info_multiplied (&info[i])))
459  {
460  unsigned int comp = _hb_glyph_info_get_lig_comp (&info[i]);
461  info[i].arabic_shaping_action() = comp % 2 ? STCH_REPEATING : STCH_FIXED;
463  }
464 }
465 
466 static void
467 apply_stch (const hb_ot_shape_plan_t *plan HB_UNUSED,
469  hb_font_t *font)
470 {
471  if (likely (!(buffer->scratch_flags & HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH)))
472  return;
473 
474  /* The Arabic shaper currently always processes in RTL mode, so we should
475  * stretch / position the stretched pieces to the left / preceding glyphs. */
476 
477  /* We do a two pass implementation:
478  * First pass calculates the exact number of extra glyphs we need,
479  * We then enlarge buffer to have that much room,
480  * Second pass applies the stretch, copying things to the end of buffer.
481  */
482 
483  int sign = font->x_scale < 0 ? -1 : +1;
484  unsigned int extra_glyphs_needed = 0; // Set during MEASURE, used during CUT
485  enum { MEASURE, CUT } /* step_t */;
486 
487  for (unsigned int step = MEASURE; step <= CUT; step = step + 1)
488  {
489  unsigned int count = buffer->len;
490  hb_glyph_info_t *info = buffer->info;
492  unsigned int new_len = count + extra_glyphs_needed; // write head during CUT
493  unsigned int j = new_len;
494  for (unsigned int i = count; i; i--)
495  {
496  if (!hb_in_range<uint8_t> (info[i - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING))
497  {
498  if (step == CUT)
499  {
500  --j;
501  info[j] = info[i - 1];
502  pos[j] = pos[i - 1];
503  }
504  continue;
505  }
506 
507  /* Yay, justification! */
508 
509  hb_position_t w_total = 0; // Total to be filled
510  hb_position_t w_fixed = 0; // Sum of fixed tiles
511  hb_position_t w_repeating = 0; // Sum of repeating tiles
512  int n_fixed = 0;
513  int n_repeating = 0;
514 
515  unsigned int end = i;
516  while (i &&
517  hb_in_range<uint8_t> (info[i - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING))
518  {
519  i--;
520  hb_position_t width = font->get_glyph_h_advance (info[i].codepoint);
522  {
523  w_fixed += width;
524  n_fixed++;
525  }
526  else
527  {
528  w_repeating += width;
529  n_repeating++;
530  }
531  }
532  unsigned int start = i;
533  unsigned int context = i;
534  while (context &&
535  !hb_in_range<uint8_t> (info[context - 1].arabic_shaping_action(), STCH_FIXED, STCH_REPEATING) &&
536  (_hb_glyph_info_is_default_ignorable (&info[context - 1]) ||
537  HB_ARABIC_GENERAL_CATEGORY_IS_WORD (_hb_glyph_info_get_general_category (&info[context - 1]))))
538  {
539  context--;
540  w_total += pos[context].x_advance;
541  }
542  i++; // Don't touch i again.
543 
544  DEBUG_MSG (ARABIC, nullptr, "%s stretch at (%d,%d,%d)",
545  step == MEASURE ? "measuring" : "cutting", context, start, end);
546  DEBUG_MSG (ARABIC, nullptr, "rest of word: count=%d width %d", start - context, w_total);
547  DEBUG_MSG (ARABIC, nullptr, "fixed tiles: count=%d width=%d", n_fixed, w_fixed);
548  DEBUG_MSG (ARABIC, nullptr, "repeating tiles: count=%d width=%d", n_repeating, w_repeating);
549 
550  /* Number of additional times to repeat each repeating tile. */
551  int n_copies = 0;
552 
553  hb_position_t w_remaining = w_total - w_fixed;
554  if (sign * w_remaining > sign * w_repeating && sign * w_repeating > 0)
555  n_copies = (sign * w_remaining) / (sign * w_repeating) - 1;
556 
557  /* See if we can improve the fit by adding an extra repeat and squeezing them together a bit. */
558  hb_position_t extra_repeat_overlap = 0;
559  hb_position_t shortfall = sign * w_remaining - sign * w_repeating * (n_copies + 1);
560  if (shortfall > 0 && n_repeating > 0)
561  {
562  ++n_copies;
563  hb_position_t excess = (n_copies + 1) * sign * w_repeating - sign * w_remaining;
564  if (excess > 0)
565  extra_repeat_overlap = excess / (n_copies * n_repeating);
566  }
567 
568  if (step == MEASURE)
569  {
570  extra_glyphs_needed += n_copies * n_repeating;
571  DEBUG_MSG (ARABIC, nullptr, "will add extra %d copies of repeating tiles", n_copies);
572  }
573  else
574  {
575  buffer->unsafe_to_break (context, end);
576  hb_position_t x_offset = 0;
577  for (unsigned int k = end; k > start; k--)
578  {
579  hb_position_t width = font->get_glyph_h_advance (info[k - 1].codepoint);
580 
581  unsigned int repeat = 1;
582  if (info[k - 1].arabic_shaping_action() == STCH_REPEATING)
583  repeat += n_copies;
584 
585  DEBUG_MSG (ARABIC, nullptr, "appending %d copies of glyph %d; j=%d",
586  repeat, info[k - 1].codepoint, j);
587  for (unsigned int n = 0; n < repeat; n++)
588  {
589  x_offset -= width;
590  if (n > 0)
591  x_offset += extra_repeat_overlap;
592  pos[k - 1].x_offset = x_offset;
593  /* Append copy. */
594  --j;
595  info[j] = info[k - 1];
596  pos[j] = pos[k - 1];
597  }
598  }
599  }
600  }
601 
602  if (step == MEASURE)
603  {
604  if (unlikely (!buffer->ensure (count + extra_glyphs_needed)))
605  break;
606  }
607  else
608  {
609  assert (j == 0);
610  buffer->len = new_len;
611  }
612  }
613 }
614 
615 
616 static void
617 postprocess_glyphs_arabic (const hb_ot_shape_plan_t *plan,
619  hb_font_t *font)
620 {
621  apply_stch (plan, buffer, font);
622 
624 }
625 
626 /* https://www.unicode.org/reports/tr53/ */
627 
628 static hb_codepoint_t
629 modifier_combining_marks[] =
630 {
631  0x0654u, /* ARABIC HAMZA ABOVE */
632  0x0655u, /* ARABIC HAMZA BELOW */
633  0x0658u, /* ARABIC MARK NOON GHUNNA */
634  0x06DCu, /* ARABIC SMALL HIGH SEEN */
635  0x06E3u, /* ARABIC SMALL LOW SEEN */
636  0x06E7u, /* ARABIC SMALL HIGH YEH */
637  0x06E8u, /* ARABIC SMALL HIGH NOON */
638  0x08CAu, /* ARABIC SMALL HIGH FARSI YEH */
639  0x08CBu, /* ARABIC SMALL HIGH YEH BARREE WITH TWO DOTS BELOW */
640  0x08CDu, /* ARABIC SMALL HIGH ZAH */
641  0x08CEu, /* ARABIC LARGE ROUND DOT ABOVE */
642  0x08CFu, /* ARABIC LARGE ROUND DOT BELOW */
643  0x08D3u, /* ARABIC SMALL LOW WAW */
644  0x08F3u, /* ARABIC SMALL HIGH WAW */
645 };
646 
647 static inline bool
648 info_is_mcm (const hb_glyph_info_t &info)
649 {
650  hb_codepoint_t u = info.codepoint;
651  for (unsigned int i = 0; i < ARRAY_LENGTH (modifier_combining_marks); i++)
652  if (u == modifier_combining_marks[i])
653  return true;
654  return false;
655 }
656 
657 static void
658 reorder_marks_arabic (const hb_ot_shape_plan_t *plan HB_UNUSED,
660  unsigned int start,
661  unsigned int end)
662 {
663  hb_glyph_info_t *info = buffer->info;
664 
665  DEBUG_MSG (ARABIC, buffer, "Reordering marks from %d to %d", start, end);
666 
667  unsigned int i = start;
668  for (unsigned int cc = 220; cc <= 230; cc += 10)
669  {
670  DEBUG_MSG (ARABIC, buffer, "Looking for %d's starting at %d", cc, i);
671  while (i < end && info_cc(info[i]) < cc)
672  i++;
673  DEBUG_MSG (ARABIC, buffer, "Looking for %d's stopped at %d", cc, i);
674 
675  if (i == end)
676  break;
677 
678  if (info_cc(info[i]) > cc)
679  continue;
680 
681  unsigned int j = i;
682  while (j < end && info_cc(info[j]) == cc && info_is_mcm (info[j]))
683  j++;
684 
685  if (i == j)
686  continue;
687 
688  DEBUG_MSG (ARABIC, buffer, "Found %d's from %d to %d", cc, i, j);
689 
690  /* Shift it! */
691  DEBUG_MSG (ARABIC, buffer, "Shifting %d's: %d %d", cc, i, j);
693  assert (j - i <= ARRAY_LENGTH (temp));
694  buffer->merge_clusters (start, j);
695  memmove (temp, &info[i], (j - i) * sizeof (hb_glyph_info_t));
696  memmove (&info[start + j - i], &info[start], (i - start) * sizeof (hb_glyph_info_t));
697  memmove (&info[start], temp, (j - i) * sizeof (hb_glyph_info_t));
698 
699  /* Renumber CC such that the reordered sequence is still sorted.
700  * 22 and 26 are chosen because they are smaller than all Arabic categories,
701  * and are folded back to 220/230 respectively during fallback mark positioning.
702  *
703  * We do this because the CGJ-handling logic in the normalizer relies on
704  * mark sequences having an increasing order even after this reordering.
705  * https://github.com/harfbuzz/harfbuzz/issues/554
706  * This, however, does break some obscure sequences, where the normalizer
707  * might compose a sequence that it should not. For example, in the seequence
708  * ALEF, HAMZAH, MADDAH, we should NOT try to compose ALEF+MADDAH, but with this
709  * renumbering, we will.
710  */
711  unsigned int new_start = start + j - i;
712  unsigned int new_cc = cc == 220 ? HB_MODIFIED_COMBINING_CLASS_CCC22 : HB_MODIFIED_COMBINING_CLASS_CCC26;
713  while (start < new_start)
714  {
715  _hb_glyph_info_set_modified_combining_class (&info[start], new_cc);
716  start++;
717  }
718 
719  i = j;
720  }
721 }
722 
724 {
725  collect_features_arabic,
726  nullptr, /* override_features */
729  nullptr, /* preprocess_text */
730  postprocess_glyphs_arabic,
732  nullptr, /* decompose */
733  nullptr, /* compose */
734  setup_masks_arabic,
735  HB_TAG_NONE, /* gpos_tag */
736  reorder_marks_arabic,
738  true, /* fallback_position */
739 };
740 
741 
742 #endif
small capitals from c petite p scientific f u
Definition: afcover.h:88
small capitals from c petite p scientific i
[1]
Definition: afcover.h:80
xD9 x84 xD8 xAD xD9 x80 xF0 x90 xAC x9A xE0 xA7 xA6 xE0 xA7 xAA xF0 x91 x84 xA4 xF0 x91 x84 x89 xF0 x91 x84 x9B xF0 x90 x8A xAB xF0 x90 x8B x89 xE2 xB2 x9E xE2 xB2 x9F xD0 xBE xD0 x9E xF0 x90 x90 x84 xF0 x90 x90 xAC xE1 x83 x98 xE1 x83 x94 xE1 x83 x90 xE1 xB2 xBF xE2 xB0 x95 xE2 xB1 x85 xCE xBF xCE x9F xE0 xA8 xA0 xE0 xA8 xB0 xE0 xA9 xA6 Kayah xEA xA4 x8D xEA xA4 x80 Khmer xE1 xA7 xA1 xE1 xA7 xAA xE0 xBB x90 Latin Subscript xE2 x82 x92 xE2 x82 x80 xEA x93 xB3 xF0 x96 xB9 xA1 xF0 x96 xB9 x9B xF0 x96 xB9 xAF xE1 x80 x9D xE1 x80 x84 xE1 x80 x82 no script
Definition: afscript.h:271
HB_SCRIPT_ARABIC
Definition: afscript.h:41
float step
QMap< QString, QString > map
[6]
else opt state
[0]
@ FLAG
Definition: inflate.c:14
#define FLAG_UNSAFE(x)
Definition: hb-algs.hh:74
#define HB_BUFFER_DEALLOCATE_VAR(b, var)
Definition: hb-buffer.hh:623
#define HB_BUFFER_ALLOCATE_VAR(b, var)
Definition: hb-buffer.hh:622
#define DEBUG_MSG(WHAT, OBJ,...)
void const void *obj HB_UNUSED
Definition: hb-debug.hh:180
#define info_cc(info)
@ F_HAS_FALLBACK
Definition: hb-ot-map.hh:181
@ F_MANUAL_ZWJ
Definition: hb-ot-map.hh:183
@ F_NONE
Definition: hb-ot-map.hh:179
#define FEATURE_IS_SYRIAC(tag)
#define HB_BUFFER_SCRATCH_FLAG_ARABIC_HAS_STCH
void data_destroy_arabic(void *data)
#define HB_ARABIC_GENERAL_CATEGORY_IS_WORD(gen_cat)
void setup_masks_arabic_plan(const arabic_shape_plan_t *arabic_plan, hb_buffer_t *buffer, hb_script_t script)
#define arabic_shaping_action()
void * data_create_arabic(const hb_ot_shape_plan_t *plan)
const hb_ot_complex_shaper_t _hb_ot_complex_shaper_arabic
@ JOINING_GROUP_DALATH_RISH
@ NUM_STATE_MACHINE_COLS
@ HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE
#define HB_OT_SHAPE_COMPLEX_MAX_COMBINING_MARKS
@ HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT
#define HB_MODIFIED_COMBINING_CLASS_CCC26
Definition: hb-unicode.hh:301
#define HB_MODIFIED_COMBINING_CLASS_CCC22
Definition: hb-unicode.hh:297
#define hb_calloc
Definition: hb.hh:236
#define likely(expr)
Definition: hb.hh:250
#define unlikely(expr)
Definition: hb.hh:251
#define hb_free
Definition: hb.hh:238
backing_store_ptr info
[4]
Definition: jmemsys.h:161
GeneratorWrapper< T > repeat(size_t repeats, GeneratorWrapper< T > &&generator)
Definition: catch_p_p.h:4251
void *PRIV() memmove(void *d, const void *s, size_t n)
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld if[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1(dst_w_bpp<=(lowbit *8)) &&((lowbit *8)<(pixblock_size *dst_w_bpp)) .if lowbit< 16 tst DST_R
[3]
#define assert
Definition: qcborcommon_p.h:63
#define ARRAY_LENGTH(a)
Definition: qkmsdevice.cpp:52
GLuint GLuint end
GLenum GLenum GLsizei count
GLenum GLuint buffer
GLint GLsizei width
GLint GLsizei GLsizei GLenum GLenum GLsizei void * data
GLuint start
GLint GLint GLint GLint GLint GLint GLint GLbitfield mask
GLfloat n
GLuint entry
Definition: qopenglext.h:11002
uint32_t hb_codepoint_t
Definition: hb-common.h:106
#define HB_TAG(c1, c2, c3, c4)
Definition: hb-common.h:169
uint32_t hb_mask_t
Definition: hb-common.h:122
int32_t hb_position_t
Definition: hb-common.h:115
hb_script_t
Definition: hb-common.h:506
@ HB_SCRIPT_MONGOLIAN
Definition: hb-common.h:542
uint32_t hb_tag_t
Definition: hb-common.h:157
#define HB_TAG_NONE
Definition: hb-common.h:187
hb_unicode_general_category_t
Definition: hb-unicode.h:92
@ HB_UNICODE_GENERAL_CATEGORY_FORMAT
Definition: hb-unicode.h:94
@ HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK
Definition: hb-unicode.h:105
@ HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK
Definition: hb-unicode.h:104
hb_mask_t mask_array[ARABIC_NUM_FEATURES+1]
hb_atomic_ptr_t< arabic_fallback_plan_t > fallback_plan
bool cmpexch(const T *old, T *new_) const
Definition: hb-atomic.hh:171
hb_mask_t get_1_mask(hb_tag_t feature_tag) const
Definition: hb-ot-map.hh:124
bool needs_fallback(hb_tag_t feature_tag) const
Definition: hb-ot-map.hh:118
const void * data
Definition: hb-ot-shape.hh:67
hb_ot_map_t map
Definition: hb-ot-shape.hh:65
hb_segment_properties_t props
Definition: hb-ot-shape.hh:63
hb_segment_properties_t props
Definition: hb-ot-shape.hh:151
hb_ot_map_builder_t map
Definition: hb-ot-shape.hh:152