QtBase  v6.3.1
pixman-arm-neon-asm.h
Go to the documentation of this file.
1 /*
2  * Copyright © 2009 Nokia Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  * Author: Siarhei Siamashka (siarhei.siamashka@nokia.com)
24  */
25 
26 /*
27  * This file contains a macro ('generate_composite_function') which can
28  * construct 2D image processing functions, based on a common template.
29  * Any combinations of source, destination and mask images with 8bpp,
30  * 16bpp, 24bpp, 32bpp color formats are supported.
31  *
32  * This macro takes care of:
33  * - handling of leading and trailing unaligned pixels
34  * - doing most of the work related to L2 cache preload
35  * - encourages the use of software pipelining for better instructions
36  * scheduling
37  *
38  * The user of this macro has to provide some configuration parameters
39  * (bit depths for the images, prefetch distance, etc.) and a set of
40  * macros, which should implement basic code chunks responsible for
41  * pixels processing. See 'pixman-arm-neon-asm.S' file for the usage
42  * examples.
43  *
44  * TODO:
45  * - try overlapped pixel method (from Ian Rickards) when processing
46  * exactly two blocks of pixels
47  * - maybe add an option to do reverse scanline processing
48  */
49 
50 /*
51  * Bit flags for 'generate_composite_function' macro which are used
52  * to tune generated functions behavior.
53  */
57 
58 /*
59  * Offset in stack where mask and source pointer/stride can be accessed
60  * from 'init' macro. This is useful for doing special handling for solid mask.
61  */
63 
64 /*
65  * Constants for selecting preferable prefetch type.
66  */
67 .set PREFETCH_TYPE_NONE, 0 /* No prefetch at all */
68 .set PREFETCH_TYPE_SIMPLE, 1 /* A simple, fixed-distance-ahead prefetch */
69 .set PREFETCH_TYPE_ADVANCED, 2 /* Advanced fine-grained prefetch */
70 
71 /*
72  * Definitions of supplementary pixld/pixst macros (for partial load/store of
73  * pixel data).
74  */
75 
76 .macro pixldst1 op, elem_size, reg1, mem_operand, abits
77 .if abits > 0
78  op&.&elem_size {d&reg1}, [&mem_operand&, :&abits&]!
79 .else
80  op&.&elem_size {d&reg1}, [&mem_operand&]!
81 .endif
82 .endm
83 
84 .macro pixldst2 op, elem_size, reg1, reg2, mem_operand, abits
85 .if abits > 0
86  op&.&elem_size {d&reg1, d&reg2}, [&mem_operand&, :&abits&]!
87 .else
88  op&.&elem_size {d&reg1, d&reg2}, [&mem_operand&]!
89 .endif
90 .endm
91 
92 .macro pixldst4 op, elem_size, reg1, reg2, reg3, reg4, mem_operand, abits
93 .if abits > 0
94  op&.&elem_size {d&reg1, d&reg2, d&reg3, d&reg4}, [&mem_operand&, :&abits&]!
95 .else
96  op&.&elem_size {d&reg1, d&reg2, d&reg3, d&reg4}, [&mem_operand&]!
97 .endif
98 .endm
99 
100 .macro pixldst0 op, elem_size, reg1, idx, mem_operand, abits
101  op&.&elem_size {d&reg1[idx]}, [&mem_operand&]!
102 .endm
103 
104 .macro pixldst3 op, elem_size, reg1, reg2, reg3, mem_operand
105  op&.&elem_size {d&reg1, d&reg2, d&reg3}, [&mem_operand&]!
106 .endm
107 
108 .macro pixldst30 op, elem_size, reg1, reg2, reg3, idx, mem_operand
109  op&.&elem_size {d&reg1[idx], d&reg2[idx], d&reg3[idx]}, [&mem_operand&]!
110 .endm
111 
113 .if numbytes == 32
114  pixldst4 op, elem_size, %(basereg+4), %(basereg+5), \
115  %(basereg+6), %(basereg+7), mem_operand, abits
116 .elseif numbytes == 16
117  pixldst2 op, elem_size, %(basereg+2), %(basereg+3), mem_operand, abits
118 .elseif numbytes == 8
119  pixldst1 op, elem_size, %(basereg+1), mem_operand, abits
120 .elseif numbytes == 4
121  .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 32)
122  pixldst0 op, 32, %(basereg+0), 1, mem_operand, abits
123  .elseif elem_size == 16
124  pixldst0 op, 16, %(basereg+0), 2, mem_operand, abits
125  pixldst0 op, 16, %(basereg+0), 3, mem_operand, abits
126  .else
127  pixldst0 op, 8, %(basereg+0), 4, mem_operand, abits
128  pixldst0 op, 8, %(basereg+0), 5, mem_operand, abits
129  pixldst0 op, 8, %(basereg+0), 6, mem_operand, abits
130  pixldst0 op, 8, %(basereg+0), 7, mem_operand, abits
131  .endif
132 .elseif numbytes == 2
133  .if !RESPECT_STRICT_ALIGNMENT || (elem_size == 16)
134  pixldst0 op, 16, %(basereg+0), 1, mem_operand, abits
135  .else
136  pixldst0 op, 8, %(basereg+0), 2, mem_operand, abits
137  pixldst0 op, 8, %(basereg+0), 3, mem_operand, abits
138  .endif
139 .elseif numbytes == 1
140  pixldst0 op, 8, %(basereg+0), 1, mem_operand, abits
141 .else
142  .error "unsupported size: numbytes"
143 .endif
144 .endm
145 
147 .if bpp > 0
148 .if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
149  pixldst4 vld4, 8, %(basereg+4), %(basereg+5), \
150  %(basereg+6), %(basereg+7), mem_operand, abits
151 .elseif (bpp == 24) && (numpix == 8)
152  pixldst3 vld3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
153 .elseif (bpp == 24) && (numpix == 4)
154  pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
155  pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
156  pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
157  pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
158 .elseif (bpp == 24) && (numpix == 2)
159  pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
160  pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
161 .elseif (bpp == 24) && (numpix == 1)
162  pixldst30 vld3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
163 .else
164  pixldst %(numpix * bpp / 8), vld1, %(bpp), basereg, mem_operand, abits
165 .endif
166 .endif
167 .endm
168 
169 .macro pixst numpix, bpp, basereg, mem_operand, abits=0
170 .if bpp > 0
171 .if (bpp == 32) && (numpix == 8) && (DEINTERLEAVE_32BPP_ENABLED != 0)
172  pixldst4 vst4, 8, %(basereg+4), %(basereg+5), \
173  %(basereg+6), %(basereg+7), mem_operand, abits
174 .elseif (bpp == 24) && (numpix == 8)
175  pixldst3 vst3, 8, %(basereg+3), %(basereg+4), %(basereg+5), mem_operand
176 .elseif (bpp == 24) && (numpix == 4)
177  pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 4, mem_operand
178  pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 5, mem_operand
179  pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 6, mem_operand
180  pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 7, mem_operand
181 .elseif (bpp == 24) && (numpix == 2)
182  pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 2, mem_operand
183  pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 3, mem_operand
184 .elseif (bpp == 24) && (numpix == 1)
185  pixldst30 vst3, 8, %(basereg+0), %(basereg+1), %(basereg+2), 1, mem_operand
186 .else
187  pixldst %(numpix * bpp / 8), vst1, %(bpp), basereg, mem_operand, abits
188 .endif
189 .endif
190 .endm
191 
193 .if (bpp * numpix) <= 128
195 .else
197 .endif
198 .endm
199 
201 .if (bpp * numpix) <= 128
202  pixst numpix, bpp, basereg, mem_operand, %(bpp * numpix)
203 .else
205 .endif
206 .endm
207 
209  vuzp.8 d&reg1, d&reg2
210 .endm
211 
212 .macro vzip8 reg1, reg2
213  vzip.8 d&reg1, d&reg2
214 .endm
215 
216 /* deinterleave B, G, R, A channels for eight 32bpp pixels in 4 registers */
217 .macro pixdeinterleave bpp, basereg
218 .if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
219  vuzp8 %(basereg+0), %(basereg+1)
220  vuzp8 %(basereg+2), %(basereg+3)
221  vuzp8 %(basereg+1), %(basereg+3)
222  vuzp8 %(basereg+0), %(basereg+2)
223 .endif
224 .endm
225 
226 /* interleave B, G, R, A channels for eight 32bpp pixels in 4 registers */
227 .macro pixinterleave bpp, basereg
228 .if (bpp == 32) && (DEINTERLEAVE_32BPP_ENABLED != 0)
229  vzip8 %(basereg+0), %(basereg+2)
230  vzip8 %(basereg+1), %(basereg+3)
231  vzip8 %(basereg+2), %(basereg+3)
232  vzip8 %(basereg+0), %(basereg+1)
233 .endif
234 .endm
235 
236 /*
237  * This is a macro for implementing cache preload. The main idea is that
238  * cache preload logic is mostly independent from the rest of pixels
239  * processing code. It starts at the top left pixel and moves forward
240  * across pixels and can jump across scanlines. Prefetch distance is
241  * handled in an 'incremental' way: it starts from 0 and advances to the
242  * optimal distance over time. After reaching optimal prefetch distance,
243  * it is kept constant. There are some checks which prevent prefetching
244  * unneeded pixel lines below the image (but it still can prefetch a bit
245  * more data on the right side of the image - not a big issue and may
246  * be actually helpful when rendering text glyphs). Additional trick is
247  * the use of LDR instruction for prefetch instead of PLD when moving to
248  * the next line, the point is that we have a high chance of getting TLB
249  * miss in this case, and PLD would be useless.
250  *
251  * This sounds like it may introduce a noticeable overhead (when working with
252  * fully cached data). But in reality, due to having a separate pipeline and
253  * instruction queue for NEON unit in ARM Cortex-A8, normal ARM code can
254  * execute simultaneously with NEON and be completely shadowed by it. Thus
255  * we get no performance overhead at all (*). This looks like a very nice
256  * feature of Cortex-A8, if used wisely. We don't have a hardware prefetcher,
257  * but still can implement some rather advanced prefetch logic in sofware
258  * for almost zero cost!
259  *
260  * (*) The overhead of the prefetcher is visible when running some trivial
261  * pixels processing like simple copy. Anyway, having prefetch is a must
262  * when working with the graphics data.
263  */
264 .macro PF a, x:vararg
265 .if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_ADVANCED)
266  a x
267 .endif
268 .endm
269 
270 .macro cache_preload std_increment, boost_increment
271 .if (src_bpp_shift >= 0) || (dst_r_bpp != 0) || (mask_bpp_shift >= 0)
272 .if regs_shortage
273  PF ldr ORIG_W, [sp] /* If we are short on regs, ORIG_W is kept on stack */
274 .endif
277 .endif
278  PF tst PF_CTL, #0xF
279  PF addne PF_X, PF_X, #boost_increment
280  PF subne PF_CTL, PF_CTL, #1
281  PF cmp PF_X, ORIG_W
283  PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
284 .endif
285 .if dst_r_bpp != 0
286  PF pld, [PF_DST, PF_X, lsl #dst_bpp_shift]
287 .endif
289  PF pld, [PF_MASK, PF_X, lsl #mask_bpp_shift]
290 .endif
291  PF subge PF_X, PF_X, ORIG_W
292  PF subges PF_CTL, PF_CTL, #0x10
293 .if src_bpp_shift >= 0
294  PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
295 .endif
296 .if dst_r_bpp != 0
297  PF ldrgeb DUMMY, [PF_DST, DST_STRIDE, lsl #dst_bpp_shift]!
298 .endif
299 .if mask_bpp_shift >= 0
300  PF ldrgeb DUMMY, [PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift]!
301 .endif
302 .endif
303 .endm
304 
305 .macro cache_preload_simple
306 .if (PREFETCH_TYPE_CURRENT == PREFETCH_TYPE_SIMPLE)
307 .if src_bpp > 0
308  pld [SRC, #(PREFETCH_DISTANCE_SIMPLE * src_bpp / 8)]
309 .endif
310 .if dst_r_bpp > 0
311  pld [DST_R, #(PREFETCH_DISTANCE_SIMPLE * dst_r_bpp / 8)]
312 .endif
313 .if mask_bpp > 0
314  pld [MASK, #(PREFETCH_DISTANCE_SIMPLE * mask_bpp / 8)]
315 .endif
316 .endif
317 .endm
318 
319 /*
320  * Macro which is used to process leading pixels until destination
321  * pointer is properly aligned (at 16 bytes boundary). When destination
322  * buffer uses 16bpp format, this is unnecessary, or even pointless.
323  */
324 .macro ensure_destination_ptr_alignment process_pixblock_head, \
327 .if dst_w_bpp != 24
328  tst DST_R, #0xF
329  beq 2f
330 
331 .irp lowbit, 1, 2, 4, 8, 16
332 local skip1
333 .if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
334 .if lowbit < 16 /* we don't need more than 16-byte alignment */
335  tst DST_R, #lowbit
336  beq 1f
337 .endif
340 .if dst_r_bpp > 0
342 .else
344 .endif
345  PF add PF_X, PF_X, #(lowbit * 8 / dst_w_bpp)
346  sub W, W, #(lowbit * 8 / dst_w_bpp)
347 1:
348 .endif
349 .endr
350  pixdeinterleave src_bpp, src_basereg
351  pixdeinterleave mask_bpp, mask_basereg
352  pixdeinterleave dst_r_bpp, dst_r_basereg
353 
355  cache_preload 0, pixblock_size
356  cache_preload_simple
358 
359  pixinterleave dst_w_bpp, dst_w_basereg
360 .irp lowbit, 1, 2, 4, 8, 16
361 .if (dst_w_bpp <= (lowbit * 8)) && ((lowbit * 8) < (pixblock_size * dst_w_bpp))
362 .if lowbit < 16 /* we don't need more than 16-byte alignment */
363  tst DST_W, #lowbit
364  beq 1f
365 .endif
367 1:
368 .endif
369 .endr
370 .endif
371 2:
372 .endm
373 
374 /*
375  * Special code for processing up to (pixblock_size - 1) remaining
376  * trailing pixels. As SIMD processing performs operation on
377  * pixblock_size pixels, anything smaller than this has to be loaded
378  * and stored in a special way. Loading and storing of pixel data is
379  * performed in such a way that we fill some 'slots' in the NEON
380  * registers (some slots naturally are unused), then perform compositing
381  * operation as usual. In the end, the data is taken from these 'slots'
382  * and saved to memory.
383  *
384  * cache_preload_flag - allows to suppress prefetch if
385  * set to 0
386  * dst_aligned_flag - selects whether destination buffer
387  * is aligned
388  */
389 .macro process_trailing_pixels cache_preload_flag, \
390  dst_aligned_flag, \
391  process_pixblock_head, \
392  process_pixblock_tail, \
393  process_pixblock_tail_head
394  tst W, #(pixblock_size - 1)
395  beq 2f
396 .irp chunk_size, 16, 8, 4, 2, 1
397 .if pixblock_size > chunk_size
398  tst W, #chunk_size
399  beq 1f
402 .if dst_aligned_flag != 0
404 .else
406 .endif
408  PF add PF_X, PF_X, #chunk_size
410 1:
411 .endif
412 .endr
413  pixdeinterleave src_bpp, src_basereg
414  pixdeinterleave mask_bpp, mask_basereg
415  pixdeinterleave dst_r_bpp, dst_r_basereg
416 
418 .if cache_preload_flag != 0
419  cache_preload 0, pixblock_size
420  cache_preload_simple
421 .endif
423  pixinterleave dst_w_bpp, dst_w_basereg
424 .irp chunk_size, 16, 8, 4, 2, 1
425 .if pixblock_size > chunk_size
426  tst W, #chunk_size
427  beq 1f
428 .if dst_aligned_flag != 0
430 .else
432 .endif
433 1:
434 .endif
435 .endr
436 2:
437 .endm
438 
439 /*
440  * Macro, which performs all the needed operations to switch to the next
441  * scanline and start the next loop iteration unless all the scanlines
442  * are already processed.
443  */
444 .macro advance_to_next_scanline start_of_loop_label
445 .if regs_shortage
446  ldrd W, [sp] /* load W and H (width and height) from stack */
447 .else
448  mov W, ORIG_W
449 .endif
450  add DST_W, DST_W, DST_STRIDE, lsl #dst_bpp_shift
451 .if src_bpp != 0
453 .endif
454 .if mask_bpp != 0
456 .endif
457 .if (dst_w_bpp != 24)
458  sub DST_W, DST_W, W, lsl #dst_bpp_shift
459 .endif
460 .if (src_bpp != 24) && (src_bpp != 0)
461  sub SRC, SRC, W, lsl #src_bpp_shift
462 .endif
463 .if (mask_bpp != 24) && (mask_bpp != 0)
464  sub MASK, MASK, W, lsl #mask_bpp_shift
465 .endif
466  subs H, H, #1
467  mov DST_R, DST_W
468 .if regs_shortage
469  str H, [sp, #4] /* save updated height to stack */
470 .endif
471  bge start_of_loop_label
472 .endm
473 
474 /*
475  * Registers are allocated in the following way by default:
476  * d0, d1, d2, d3 - reserved for loading source pixel data
477  * d4, d5, d6, d7 - reserved for loading destination pixel data
478  * d24, d25, d26, d27 - reserved for loading mask pixel data
479  * d28, d29, d30, d31 - final destination pixel data for writeback to memory
480  */
481 .macro generate_composite_function fname, \
485  flags, \
488  init, \
497 
498  .func fname
499  .global fname
500  /* For ELF format also set function visibility to hidden */
501 #ifdef __ELF__
502  .hidden fname
503  .type fname, %function
504 #endif
505 fname:
506  push {r4-r12, lr} /* save all registers */
507 
508 /*
509  * Select prefetch type for this function. If prefetch distance is
510  * set to 0 or one of the color formats is 24bpp, SIMPLE prefetch
511  * has to be used instead of ADVANCED.
512  */
513  .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_DEFAULT
514 .if prefetch_distance == 0
515  .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
516 .elseif (PREFETCH_TYPE_CURRENT > PREFETCH_TYPE_SIMPLE) && \
517  ((src_bpp_ == 24) || (mask_bpp_ == 24) || (dst_w_bpp_ == 24))
518  .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_SIMPLE
519 .endif
520 
521 /*
522  * Make some macro arguments globally visible and accessible
523  * from other macros
524  */
525  .set src_bpp, src_bpp_
526  .set mask_bpp, mask_bpp_
527  .set dst_w_bpp, dst_w_bpp_
528  .set pixblock_size, pixblock_size_
533 
534 /*
535  * Assign symbolic names to registers
536  */
537  W .req r0 /* width (is updated during processing) */
538  H .req r1 /* height (is updated during processing) */
539  DST_W .req r2 /* destination buffer pointer for writes */
540  DST_STRIDE .req r3 /* destination image stride */
541  SRC .req r4 /* source buffer pointer */
542  SRC_STRIDE .req r5 /* source image stride */
543  DST_R .req r6 /* destination buffer pointer for reads */
544 
545  MASK .req r7 /* mask pointer */
546  MASK_STRIDE .req r8 /* mask stride */
547 
548  PF_CTL .req r9 /* combined lines counter and prefetch */
549  /* distance increment counter */
550  PF_X .req r10 /* pixel index in a scanline for current */
551  /* pretetch position */
552  PF_SRC .req r11 /* pointer to source scanline start */
553  /* for prefetch purposes */
554  PF_DST .req r12 /* pointer to destination scanline start */
555  /* for prefetch purposes */
556  PF_MASK .req r14 /* pointer to mask scanline start */
557  /* for prefetch purposes */
558 /*
559  * Check whether we have enough registers for all the local variables.
560  * If we don't have enough registers, original width and height are
561  * kept on top of stack (and 'regs_shortage' variable is set to indicate
562  * this for the rest of code). Even if there are enough registers, the
563  * allocation scheme may be a bit different depending on whether source
564  * or mask is not used.
565  */
566 .if (PREFETCH_TYPE_CURRENT < PREFETCH_TYPE_ADVANCED)
567  ORIG_W .req r10 /* saved original width */
568  DUMMY .req r12 /* temporary register */
569  .set regs_shortage, 0
570 .elseif mask_bpp == 0
571  ORIG_W .req r7 /* saved original width */
572  DUMMY .req r8 /* temporary register */
573  .set regs_shortage, 0
574 .elseif src_bpp == 0
575  ORIG_W .req r4 /* saved original width */
576  DUMMY .req r5 /* temporary register */
577  .set regs_shortage, 0
578 .else
579  ORIG_W .req r1 /* saved original width */
580  DUMMY .req r1 /* temporary register */
581  .set regs_shortage, 1
582 .endif
583 
584  .set mask_bpp_shift, -1
585 .if src_bpp == 32
586  .set src_bpp_shift, 2
587 .elseif src_bpp == 24
588  .set src_bpp_shift, 0
589 .elseif src_bpp == 16
590  .set src_bpp_shift, 1
591 .elseif src_bpp == 8
592  .set src_bpp_shift, 0
593 .elseif src_bpp == 0
594  .set src_bpp_shift, -1
595 .else
596  .error "requested src bpp (src_bpp) is not supported"
597 .endif
598 .if mask_bpp == 32
599  .set mask_bpp_shift, 2
600 .elseif mask_bpp == 24
601  .set mask_bpp_shift, 0
602 .elseif mask_bpp == 8
603  .set mask_bpp_shift, 0
604 .elseif mask_bpp == 0
605  .set mask_bpp_shift, -1
606 .else
607  .error "requested mask bpp (mask_bpp) is not supported"
608 .endif
609 .if dst_w_bpp == 32
610  .set dst_bpp_shift, 2
611 .elseif dst_w_bpp == 24
612  .set dst_bpp_shift, 0
613 .elseif dst_w_bpp == 16
614  .set dst_bpp_shift, 1
615 .elseif dst_w_bpp == 8
616  .set dst_bpp_shift, 0
617 .else
618  .error "requested dst bpp (dst_w_bpp) is not supported"
619 .endif
620 
621 .if (((flags) & FLAG_DST_READWRITE) != 0)
622  .set dst_r_bpp, dst_w_bpp
623 .else
624  .set dst_r_bpp, 0
625 .endif
626 .if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
627  .set DEINTERLEAVE_32BPP_ENABLED, 1
628 .else
629  .set DEINTERLEAVE_32BPP_ENABLED, 0
630 .endif
631 
632 .if prefetch_distance < 0 || prefetch_distance > 15
633  .error "invalid prefetch distance (prefetch_distance)"
634 .endif
635 
636 .if src_bpp > 0
637  ldr SRC, [sp, #40]
638 .endif
639 .if mask_bpp > 0
640  ldr MASK, [sp, #48]
641 .endif
642  PF mov PF_X, #0
643 .if src_bpp > 0
644  ldr SRC_STRIDE, [sp, #44]
645 .endif
646 .if mask_bpp > 0
647  ldr MASK_STRIDE, [sp, #52]
648 .endif
649  mov DST_R, DST_W
650 
651 .if src_bpp == 24
653  sub SRC_STRIDE, SRC_STRIDE, W, lsl #1
654 .endif
655 .if mask_bpp == 24
657  sub MASK_STRIDE, MASK_STRIDE, W, lsl #1
658 .endif
659 .if dst_w_bpp == 24
661  sub DST_STRIDE, DST_STRIDE, W, lsl #1
662 .endif
663 
664 /*
665  * Setup advanced prefetcher initial state
666  */
667  PF mov PF_SRC, SRC
668  PF mov PF_DST, DST_R
669  PF mov PF_MASK, MASK
670  /* PF_CTL = prefetch_distance | ((h - 1) << 4) */
671  PF mov PF_CTL, H, lsl #4
672  PF add PF_CTL, #(prefetch_distance - 0x10)
673 
674  init
675 .if regs_shortage
676  push {r0, r1}
677 .endif
678  subs H, H, #1
679 .if regs_shortage
680  str H, [sp, #4] /* save updated height to stack */
681 .else
682  mov ORIG_W, W
683 .endif
684  blt 9f
685  cmp W, #(pixblock_size * 2)
686  blt 8f
687 /*
688  * This is the start of the pipelined loop, which if optimized for
689  * long scanlines
690  */
691 0:
692  ensure_destination_ptr_alignment process_pixblock_head, \
695 
696  /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
697  pixld_a pixblock_size, dst_r_bpp, \
698  (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
699  pixld pixblock_size, src_bpp, \
700  (src_basereg - pixblock_size * src_bpp / 64), SRC
701  pixld pixblock_size, mask_bpp, \
702  (mask_basereg - pixblock_size * mask_bpp / 64), MASK
703  PF add PF_X, PF_X, #pixblock_size
705  cache_preload 0, pixblock_size
706  cache_preload_simple
707  subs W, W, #(pixblock_size * 2)
708  blt 2f
709 1:
711  cache_preload_simple
712  subs W, W, #pixblock_size
713  bge 1b
714 2:
716  pixst_a pixblock_size, dst_w_bpp, \
717  (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
718 
719  /* Process the remaining trailing pixels in the scanline */
720  process_trailing_pixels 1, 1, \
721  process_pixblock_head, \
722  process_pixblock_tail, \
723  process_pixblock_tail_head
724  advance_to_next_scanline 0b
725 
726 .if regs_shortage
727  pop {r0, r1}
728 .endif
729  cleanup
730  pop {r4-r12, pc} /* exit */
731 /*
732  * This is the start of the loop, designed to process images with small width
733  * (less than pixblock_size * 2 pixels). In this case neither pipelining
734  * nor prefetch are used.
735  */
736 8:
737  /* Process exactly pixblock_size pixels if needed */
738  tst W, #pixblock_size
739  beq 1f
740  pixld pixblock_size, dst_r_bpp, \
741  (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
742  pixld pixblock_size, src_bpp, \
743  (src_basereg - pixblock_size * src_bpp / 64), SRC
744  pixld pixblock_size, mask_bpp, \
745  (mask_basereg - pixblock_size * mask_bpp / 64), MASK
748  pixst pixblock_size, dst_w_bpp, \
749  (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
750 1:
751  /* Process the remaining trailing pixels in the scanline */
752  process_trailing_pixels 0, 0, \
753  process_pixblock_head, \
754  process_pixblock_tail, \
755  process_pixblock_tail_head
756  advance_to_next_scanline 8b
757 9:
758 .if regs_shortage
759  pop {r0, r1}
760 .endif
761  cleanup
762  pop {r4-r12, pc} /* exit */
763 
764  .unreq SRC
765  .unreq MASK
766  .unreq DST_R
767  .unreq DST_W
768  .unreq ORIG_W
769  .unreq W
770  .unreq H
771  .unreq SRC_STRIDE
772  .unreq DST_STRIDE
773  .unreq MASK_STRIDE
774  .unreq PF_CTL
775  .unreq PF_X
776  .unreq PF_SRC
777  .unreq PF_DST
778  .unreq PF_MASK
779  .unreq DUMMY
780  .endfunc
781 .endm
782 
783 /*
784  * A simplified variant of function generation template for a single
785  * scanline processing (for implementing pixman combine functions)
786  */
787 .macro generate_composite_function_single_scanline fname, \
788  src_bpp_, \
789  mask_bpp_, \
790  dst_w_bpp_, \
791  flags, \
792  pixblock_size_, \
793  init, \
794  cleanup, \
795  process_pixblock_head, \
796  process_pixblock_tail, \
797  process_pixblock_tail_head, \
798  dst_w_basereg_ = 28, \
799  dst_r_basereg_ = 4, \
800  src_basereg_ = 0, \
801  mask_basereg_ = 24
802 
803  .func fname
804  .global fname
805  /* For ELF format also set function visibility to hidden */
806 #ifdef __ELF__
807  .hidden fname
808  .type fname, %function
809 #endif
810 fname:
811  .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
812 /*
813  * Make some macro arguments globally visible and accessible
814  * from other macros
815  */
816  .set src_bpp, src_bpp_
817  .set mask_bpp, mask_bpp_
818  .set dst_w_bpp, dst_w_bpp_
819  .set pixblock_size, pixblock_size_
824 /*
825  * Assign symbolic names to registers
826  */
827  W .req r0 /* width (is updated during processing) */
828  DST_W .req r1 /* destination buffer pointer for writes */
829  SRC .req r2 /* source buffer pointer */
830  DST_R .req ip /* destination buffer pointer for reads */
831  MASK .req r3 /* mask pointer */
832 
833 .if (((flags) & FLAG_DST_READWRITE) != 0)
834  .set dst_r_bpp, dst_w_bpp
835 .else
836  .set dst_r_bpp, 0
837 .endif
838 .if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
839  .set DEINTERLEAVE_32BPP_ENABLED, 1
840 .else
841  .set DEINTERLEAVE_32BPP_ENABLED, 0
842 .endif
843 
844  init
845  mov DST_R, DST_W
846 
847  cmp W, #pixblock_size
848  blt 8f
849 
850  ensure_destination_ptr_alignment process_pixblock_head, \
851  process_pixblock_tail, \
852  process_pixblock_tail_head
853 
854  subs W, W, #pixblock_size
855  blt 7f
856 
857  /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
858  pixld_a pixblock_size, dst_r_bpp, \
859  (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
860  pixld pixblock_size, src_bpp, \
861  (src_basereg - pixblock_size * src_bpp / 64), SRC
862  pixld pixblock_size, mask_bpp, \
863  (mask_basereg - pixblock_size * mask_bpp / 64), MASK
865  subs W, W, #pixblock_size
866  blt 2f
867 1:
869  subs W, W, #pixblock_size
870  bge 1b
871 2:
873  pixst_a pixblock_size, dst_w_bpp, \
874  (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
875 7:
876  /* Process the remaining trailing pixels in the scanline (dst aligned) */
877  process_trailing_pixels 0, 1, \
878  process_pixblock_head, \
879  process_pixblock_tail, \
880  process_pixblock_tail_head
881 
882  cleanup
883  bx lr /* exit */
884 8:
885  /* Process the remaining trailing pixels in the scanline (dst unaligned) */
886  process_trailing_pixels 0, 0, \
887  process_pixblock_head, \
888  process_pixblock_tail, \
889  process_pixblock_tail_head
890 
891  cleanup
892  bx lr /* exit */
893 
894  .unreq SRC
895  .unreq MASK
896  .unreq DST_R
897  .unreq DST_W
898  .unreq W
899  .endfunc
900 .endm
901 
902 .macro default_init
903 .endm
904 
905 .macro default_cleanup
906 .endm
QString str
[2]
#define macro
Definition: dollars.h:38
#define local
Definition: zutil.h:30
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld DST_W[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if src_bpp_shift
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst basereg
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld mask_basereg_[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld src_bpp[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld endif[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld src_basereg[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if PF_X
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld src_basereg_[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld MASK[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst numbytes
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld dst_aligned_flag[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst numpix
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld mask_bpp_[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld chunk_size[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF_CTL
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld pixblock_size_[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld dst_w_bpp[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if
set set set set set set set macro pixldst1 reg1
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld process_pixblock_tail[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if mask_bpp_shift
set set set set set set set macro pixldst1 elem_size
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp
set set set set set PREFETCH_TYPE_NONE
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld DST_R[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld dst_r_basereg[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp
set set FLAG_DST_READWRITE
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld pixld_a[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp(lowbit *8/dst_r_bpp)
set set set set set set PREFETCH_TYPE_SIMPLE
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 reg3
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld src_bpp_[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld cache_preload[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 reg2
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld process_pixblock_tail_head[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 idx
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if dst_r_bpp
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld mask_bpp[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld DST_STRIDE[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld dst_r_basereg_[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld fname[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg vzip8(basereg+1)
set FLAG_DST_WRITEONLY
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld H[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld mask_basereg[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 reg4
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld cache_preload_flag[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if std_increment
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported abits
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg vuzp8(basereg+2)
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld init[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld dst_w_bpp_[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 mem_operand
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld SRC[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld process_pixblock_head[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld pixld[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif(lowbit *8/dst_w_bpp)
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld cleanup[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set FLAG_DEINTERLEAVE_32BPP
set set set set set set set PREFETCH_TYPE_ADVANCED
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld dst_w_basereg_[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld pixst_a[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif(lowbit *8/dst_w_bpp)
set set set set set set set macro pixldst1 op
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld dst_w_basereg[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld prefetch_distance[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if lsl endif lsl endif lsl endif lsl endif subs mov DST_W if regs_shortage str endif bge start_of_loop_label endm macro generate_composite_function
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld MASK_STRIDE[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if lsl endif if
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld W[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub
set set set set ARGS_STACK_OFFSET
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld SRC_STRIDE[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp skip1 beq endif SRC MASK if dst_r_bpp DST_R else add endif PF add sub src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head pixblock_size cache_preload_simple process_pixblock_tail pixinterleave dst_w_basereg irp beq endif process_pixblock_tail_head tst beq irp if pixblock_size chunk_size tst beq pixld SRC pixld MASK if DST_R else pixld DST_R endif if src_basereg pixdeinterleave mask_basereg pixdeinterleave dst_r_basereg process_pixblock_head if pixblock_size cache_preload_simple endif process_pixblock_tail pixinterleave dst_w_basereg irp if pixblock_size chunk_size tst beq if DST_W else pixst DST_W else mov ORIG_W endif add lsl if
set set set set set set set macro pixldst1 abits if abits op else op endif endm macro pixldst2 abits if abits op else op endif endm macro pixldst4 abits if abits op else op endif endm macro pixldst0 abits op endm macro pixldst3 mem_operand op endm macro pixldst30 mem_operand op endm macro pixldst abits if abits elseif abits elseif abits elseif abits elseif abits pixldst0 abits else pixldst0 abits pixldst0 abits pixldst0 abits pixldst0 abits endif elseif abits else pixldst0 abits pixldst0 abits endif elseif abits else error unsupported bpp *numpix else pixst endif endm macro vuzp8 reg2 vuzp d d &reg2 endm macro vzip8 reg2 vzip d d &reg2 endm macro pixdeinterleave basereg basereg basereg basereg basereg endif endm macro pixinterleave basereg basereg basereg basereg basereg endif endm macro PF boost_increment endif if endif PF tst PF addne PF subne PF cmp ORIG_W if endif if endif if endif PF subge ORIG_W PF subges if endif if endif if endif endif endm macro cache_preload_simple endif if dst_r_bpp pld[DST_R, #(PREFETCH_DISTANCE_SIMPLE *dst_r_bpp/8)] endif if mask_bpp pld lowbit[MASK, #(PREFETCH_DISTANCE_SIMPLE *mask_bpp/8)] endif endif endm macro ensure_destination_ptr_alignment process_pixblock_tail_head if beq irp
DBusConnection const char DBusError DBusBusType DBusError return DBusConnection DBusHandleMessageFunction void DBusFreeFunction return DBusConnection return DBusConnection return const char DBusError return DBusConnection DBusMessage dbus_uint32_t return DBusConnection dbus_bool_t DBusConnection DBusAddWatchFunction DBusRemoveWatchFunction DBusWatchToggledFunction void DBusFreeFunction return DBusConnection DBusDispatchStatusFunction void DBusFreeFunction DBusTimeout return DBusTimeout return DBusWatch return DBusWatch unsigned int return DBusError const DBusError return const DBusMessage return DBusMessage return DBusMessage return DBusMessage return DBusMessage return DBusMessage return DBusMessageIter int const void return DBusMessageIter DBusMessageIter * sub
GLenum type
Definition: qopengl.h:270
GLboolean GLboolean GLboolean b
GLint GLint GLint GLint GLint x
[0]
GLboolean GLboolean GLboolean GLboolean a
[7]
GLfloat GLfloat f
GLbitfield flags
GLenum func
Definition: qopenglext.h:663
#define sp
QRect r1(100, 200, 11, 16)
[0]
QRect r2(QPoint(100, 200), QSize(11, 16))
stack push(command1)
Global global
Definition: main.cpp:114
void add(int &result, const int &sum)