QtBase  v6.3.1
qendian.cpp
Go to the documentation of this file.
1 /****************************************************************************
2 **
3 ** Copyright (C) 2016 The Qt Company Ltd.
4 ** Copyright (C) 2018 Intel Corporation.
5 ** Contact: https://www.qt.io/licensing/
6 **
7 ** This file is part of the QtCore module of the Qt Toolkit.
8 **
9 ** $QT_BEGIN_LICENSE:LGPL$
10 ** Commercial License Usage
11 ** Licensees holding valid commercial Qt licenses may use this file in
12 ** accordance with the commercial license agreement provided with the
13 ** Software or, alternatively, in accordance with the terms contained in
14 ** a written agreement between you and The Qt Company. For licensing terms
15 ** and conditions see https://www.qt.io/terms-conditions. For further
16 ** information use the contact form at https://www.qt.io/contact-us.
17 **
18 ** GNU Lesser General Public License Usage
19 ** Alternatively, this file may be used under the terms of the GNU Lesser
20 ** General Public License version 3 as published by the Free Software
21 ** Foundation and appearing in the file LICENSE.LGPL3 included in the
22 ** packaging of this file. Please review the following information to
23 ** ensure the GNU Lesser General Public License version 3 requirements
24 ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
25 **
26 ** GNU General Public License Usage
27 ** Alternatively, this file may be used under the terms of the GNU
28 ** General Public License version 2.0 or (at your option) the GNU General
29 ** Public license version 3 or any later version approved by the KDE Free
30 ** Qt Foundation. The licenses are as published by the Free Software
31 ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
32 ** included in the packaging of this file. Please review the following
33 ** information to ensure the GNU General Public License requirements will
34 ** be met: https://www.gnu.org/licenses/gpl-2.0.html and
35 ** https://www.gnu.org/licenses/gpl-3.0.html.
36 **
37 ** $QT_END_LICENSE$
38 **
39 ****************************************************************************/
40 
41 #include "qendian.h"
42 
43 #include "qalgorithms.h"
44 #include <private/qsimd_p.h>
45 
47 
766 #if defined(__SSSE3__)
767 using ShuffleMask = uchar[16];
768 alignas(16) static const ShuffleMask shuffleMasks[3] = {
769  // 16-bit
770  {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
771  // 32-bit
772  {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12},
773  // 64-bit
774  {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8}
775 };
776 
777 static size_t sseSwapLoop(const uchar *src, size_t bytes, uchar *dst,
778  const __m128i *shuffleMaskPtr) noexcept
779 {
780  size_t i = 0;
781  const __m128i shuffleMask = _mm_load_si128(shuffleMaskPtr);
782 
783 # ifdef __AVX2__
784  const __m256i shuffleMask256 = _mm256_inserti128_si256(_mm256_castsi128_si256(shuffleMask), shuffleMask, 1);
785  for ( ; i + sizeof(__m256i) <= bytes; i += sizeof(__m256i)) {
786  __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src + i));
787  data = _mm256_shuffle_epi8(data, shuffleMask256);
788  _mm256_storeu_si256(reinterpret_cast<__m256i *>(dst + i), data);
789  }
790 # else
791  for ( ; i + 2 * sizeof(__m128i) <= bytes; i += 2 * sizeof(__m128i)) {
792  __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i));
793  __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i) + 1);
794  data1 = _mm_shuffle_epi8(data1, shuffleMask);
795  data2 = _mm_shuffle_epi8(data2, shuffleMask);
796  _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i), data1);
797  _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i) + 1, data2);
798  }
799 # endif
800 
801  if (i + sizeof(__m128i) <= bytes) {
802  __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i));
803  data = _mm_shuffle_epi8(data, shuffleMask);
804  _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i), data);
805  i += sizeof(__m128i);
806  }
807 
808  return i;
809 }
810 
811 template <typename T> static Q_ALWAYS_INLINE
812 size_t simdSwapLoop(const uchar *src, size_t bytes, uchar *dst) noexcept
813 {
814  auto shuffleMaskPtr = reinterpret_cast<const __m128i *>(shuffleMasks[0]);
815  shuffleMaskPtr += qCountTrailingZeroBits(sizeof(T)) - 1;
816  size_t i = sseSwapLoop(src, bytes, dst, shuffleMaskPtr);
817 
818  // epilogue
819  for (size_t _i = 0; i < bytes && _i < sizeof(__m128i); i += sizeof(T), _i += sizeof(T))
820  qbswap(qFromUnaligned<T>(src + i), dst + i);
821 
822  // return the total, so the bswapLoop below does nothing
823  return bytes;
824 }
825 #elif defined(__SSE2__)
826 template <typename T> static
827 size_t simdSwapLoop(const uchar *, size_t, uchar *) noexcept
828 {
829  // no generic version: we can't do 32- and 64-bit swaps easily,
830  // so we won't try
831  return 0;
832 }
833 
834 template <> size_t simdSwapLoop<quint16>(const uchar *src, size_t bytes, uchar *dst) noexcept
835 {
836  auto swapEndian = [](__m128i &data) {
837  __m128i lows = _mm_srli_epi16(data, 8);
838  __m128i highs = _mm_slli_epi16(data, 8);
839  data = _mm_xor_si128(lows, highs);
840  };
841 
842  size_t i = 0;
843  for ( ; i + 2 * sizeof(__m128i) <= bytes; i += 2 * sizeof(__m128i)) {
844  __m128i data1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i));
845  __m128i data2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i) + 1);
846  swapEndian(data1);
847  swapEndian(data2);
848  _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i), data1);
849  _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i) + 1, data2);
850  }
851 
852  if (i + sizeof(__m128i) <= bytes) {
853  __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src + i));
854  swapEndian(data);
855  _mm_storeu_si128(reinterpret_cast<__m128i *>(dst + i), data);
856  i += sizeof(__m128i);
857  }
858 
859  // epilogue
860  for (size_t _i = 0 ; i < bytes && _i < sizeof(__m128i); i += sizeof(quint16), _i += sizeof(quint16))
861  qbswap(qFromUnaligned<quint16>(src + i), dst + i);
862 
863  // return the total, so the bswapLoop below does nothing
864  return bytes;
865 }
866 #else
867 template <typename T> static Q_ALWAYS_INLINE
868 size_t simdSwapLoop(const uchar *, size_t, uchar *) noexcept
869 {
870  return 0;
871 }
872 #endif
873 
874 template <typename T> static Q_ALWAYS_INLINE
875 void *bswapLoop(const uchar *src, size_t n, uchar *dst) noexcept
876 {
877  // Buffers cannot partially overlap: either they're identical or totally
878  // disjoint (note: they can be adjacent).
879  if (src != dst) {
880  quintptr s = quintptr(src);
881  quintptr d = quintptr(dst);
882  if (s < d)
883  Q_ASSERT(s + n <= d);
884  else
885  Q_ASSERT(d + n <= s);
886  }
887 
888  size_t i = simdSwapLoop<T>(src, n, dst);
889 
890  for (; i < n; i += sizeof(T))
891  qbswap(qFromUnaligned<T>(src + i), dst + i);
892  return dst + i;
893 }
894 
895 template<>
896 void *qbswap<2>(const void *source, qsizetype n, void *dest) noexcept
897 {
898  const uchar *src = reinterpret_cast<const uchar *>(source);
899  uchar *dst = reinterpret_cast<uchar *>(dest);
900 
901  return bswapLoop<quint16>(src, n << 1, dst);
902 }
903 
904 template<>
905 void *qbswap<4>(const void *source, qsizetype n, void *dest) noexcept
906 {
907  const uchar *src = reinterpret_cast<const uchar *>(source);
908  uchar *dst = reinterpret_cast<uchar *>(dest);
909 
910  return bswapLoop<quint32>(src, n << 2, dst);
911 }
912 
913 template<>
914 void *qbswap<8>(const void *source, qsizetype n, void *dest) noexcept
915 {
916  const uchar *src = reinterpret_cast<const uchar *>(source);
917  uchar *dst = reinterpret_cast<uchar *>(dest);
918 
919  return bswapLoop<quint64>(src, n << 3, dst);
920 }
921 
small capitals from c petite p scientific i
[1]
Definition: afcover.h:80
char * data()
#define T(x)
Definition: main.cpp:42
constexpr uint qCountTrailingZeroBits(quint32 v) noexcept
Definition: qalgorithms.h:362
void * qbswap< 8 >(const void *source, qsizetype n, void *dest) noexcept
Definition: qendian.cpp:914
void * qbswap< 2 >(const void *source, qsizetype n, void *dest) noexcept
Definition: qendian.cpp:896
void * qbswap< 4 >(const void *source, qsizetype n, void *dest) noexcept
Definition: qendian.cpp:905
constexpr T qbswap(T source)
Definition: qendian.h:135
QT_BEGIN_INCLUDE_NAMESPACE typedef unsigned char uchar
Definition: qglobal.h:332
unsigned short quint16
Definition: qglobal.h:286
size_t quintptr
Definition: qglobal.h:310
ptrdiff_t qsizetype
Definition: qglobal.h:308
GLenum src
GLenum GLenum dst
GLint GLsizei GLsizei GLenum GLenum GLsizei void * data
GLfloat n
GLsizei GLsizei GLchar * source
GLdouble s
[6]
Definition: qopenglext.h:235
#define Q_ASSERT(cond)
Definition: qrandom.cpp:84
Definition: main.cpp:38