mirror of https://github.com/qt/qtbase.git
590 lines
17 KiB
C++
590 lines
17 KiB
C++
/****************************************************************************
|
|
**
|
|
** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies).
|
|
** Copyright (C) 2012 Intel Corporation.
|
|
** Contact: http://www.qt-project.org/legal
|
|
**
|
|
** This file is part of the QtCore module of the Qt Toolkit.
|
|
**
|
|
** $QT_BEGIN_LICENSE:LGPL$
|
|
** Commercial License Usage
|
|
** Licensees holding valid commercial Qt licenses may use this file in
|
|
** accordance with the commercial license agreement provided with the
|
|
** Software or, alternatively, in accordance with the terms contained in
|
|
** a written agreement between you and Digia. For licensing terms and
|
|
** conditions see http://qt.digia.com/licensing. For further information
|
|
** use the contact form at http://qt.digia.com/contact-us.
|
|
**
|
|
** GNU Lesser General Public License Usage
|
|
** Alternatively, this file may be used under the terms of the GNU Lesser
|
|
** General Public License version 2.1 as published by the Free Software
|
|
** Foundation and appearing in the file LICENSE.LGPL included in the
|
|
** packaging of this file. Please review the following information to
|
|
** ensure the GNU Lesser General Public License version 2.1 requirements
|
|
** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
|
|
**
|
|
** In addition, as a special exception, Digia gives you certain additional
|
|
** rights. These rights are described in the Digia Qt LGPL Exception
|
|
** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
|
|
**
|
|
** GNU General Public License Usage
|
|
** Alternatively, this file may be used under the terms of the GNU
|
|
** General Public License version 3.0 as published by the Free Software
|
|
** Foundation and appearing in the file LICENSE.GPL included in the
|
|
** packaging of this file. Please review the following information to
|
|
** ensure the GNU General Public License version 3.0 requirements will be
|
|
** met: http://www.gnu.org/copyleft/gpl.html.
|
|
**
|
|
**
|
|
** $QT_END_LICENSE$
|
|
**
|
|
****************************************************************************/
|
|
|
|
#include "qsimd_p.h"
|
|
#include <QByteArray>
|
|
#include <stdio.h>
|
|
|
|
#if defined(Q_OS_WIN)
|
|
# if defined(Q_OS_WINCE)
|
|
# include <qt_windows.h>
|
|
# include <cmnintrin.h>
|
|
# endif
|
|
# if !defined(Q_CC_GNU)
|
|
# ifndef Q_OS_WINCE
|
|
# include <intrin.h>
|
|
# endif
|
|
# endif
|
|
#elif defined(Q_OS_LINUX) && (defined(Q_PROCESSOR_ARM) || defined(Q_PROCESSOR_MIPS_32))
|
|
#include "private/qcore_unix_p.h"
|
|
|
|
// the kernel header definitions for HWCAP_*
|
|
// (the ones we need/may need anyway)
|
|
|
|
// copied from <asm/hwcap.h> (ARM)
|
|
#define HWCAP_CRUNCH 1024
|
|
#define HWCAP_THUMBEE 2048
|
|
#define HWCAP_NEON 4096
|
|
#define HWCAP_VFPv3 8192
|
|
#define HWCAP_VFPv3D16 16384
|
|
|
|
// copied from <linux/auxvec.h>
|
|
#define AT_HWCAP 16 /* arch dependent hints at CPU capabilities */
|
|
|
|
#endif
|
|
|
|
QT_BEGIN_NAMESPACE
|
|
|
|
#if defined (Q_OS_NACL)
|
|
static inline uint detectProcessorFeatures()
|
|
{
|
|
return 0;
|
|
}
|
|
#elif defined (Q_OS_WINCE)
|
|
static inline uint detectProcessorFeatures()
|
|
{
|
|
uint features = 0;
|
|
|
|
#if defined (ARM)
|
|
# ifdef PF_ARM_NEON
|
|
if (IsProcessorFeaturePresent(PF_ARM_NEON))
|
|
features |= ARM_NEON;
|
|
# endif
|
|
#elif defined(_X86_)
|
|
if (IsProcessorFeaturePresent(PF_XMMI64_INSTRUCTIONS_AVAILABLE))
|
|
features |= SSE2;
|
|
if (IsProcessorFeaturePresent(PF_SSE3_INSTRUCTIONS_AVAILABLE))
|
|
features |= SSE3;
|
|
#endif
|
|
return features;
|
|
}
|
|
|
|
#elif defined(Q_PROCESSOR_ARM)
|
|
static inline uint detectProcessorFeatures()
|
|
{
|
|
uint features = 0;
|
|
|
|
#if defined(Q_OS_LINUX)
|
|
int auxv = qt_safe_open("/proc/self/auxv", O_RDONLY);
|
|
if (auxv != -1) {
|
|
unsigned long vector[64];
|
|
int nread;
|
|
while (features == 0) {
|
|
nread = qt_safe_read(auxv, (char *)vector, sizeof vector);
|
|
if (nread <= 0) {
|
|
// EOF or error
|
|
break;
|
|
}
|
|
|
|
int max = nread / (sizeof vector[0]);
|
|
for (int i = 0; i < max; i += 2)
|
|
if (vector[i] == AT_HWCAP) {
|
|
if (vector[i+1] & HWCAP_NEON)
|
|
features |= NEON;
|
|
break;
|
|
}
|
|
}
|
|
|
|
qt_safe_close(auxv);
|
|
return features;
|
|
}
|
|
// fall back if /proc/self/auxv wasn't found
|
|
#endif
|
|
|
|
#if defined(__ARM_NEON__)
|
|
features = NEON;
|
|
#endif
|
|
|
|
return features;
|
|
}
|
|
|
|
#elif defined(Q_PROCESSOR_X86)
|
|
|
|
#ifdef Q_PROCESSOR_X86_32
|
|
# define PICreg "%%ebx"
|
|
#else
|
|
# define PICreg "%%rbx"
|
|
#endif
|
|
|
|
static int maxBasicCpuidSupported()
|
|
{
|
|
#if defined(Q_CC_GNU)
|
|
qregisterint tmp1;
|
|
|
|
# if Q_PROCESSOR_X86 < 5
|
|
// check if the CPUID instruction is supported
|
|
long cpuid_supported;
|
|
asm ("pushf\n"
|
|
"pop %0\n"
|
|
"mov %0, %1\n"
|
|
"xor $0x00200000, %0\n"
|
|
"push %0\n"
|
|
"popf\n"
|
|
"pushf\n"
|
|
"pop %0\n"
|
|
"xor %1, %0\n" // %eax is now 0 if CPUID is not supported
|
|
: "=a" (cpuid_supported), "=r" (tmp1)
|
|
);
|
|
if (!cpuid_supported)
|
|
return 0;
|
|
# endif
|
|
|
|
int result;
|
|
asm ("xchg " PICreg", %1\n"
|
|
"cpuid\n"
|
|
"xchg " PICreg", %1\n"
|
|
: "=&a" (result), "=&r" (tmp1)
|
|
: "0" (0)
|
|
: "ecx", "edx");
|
|
return result;
|
|
#elif defined(Q_OS_WIN)
|
|
// Use the __cpuid function; if the CPUID instruction isn't supported, it will return 0
|
|
int info[4];
|
|
__cpuid(info, 0);
|
|
return info[0];
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
|
|
static void cpuidFeatures01(uint &ecx, uint &edx)
|
|
{
|
|
#if defined(Q_CC_GNU)
|
|
qregisterint tmp1;
|
|
asm ("xchg " PICreg", %2\n"
|
|
"cpuid\n"
|
|
"xchg " PICreg", %2\n"
|
|
: "=&c" (ecx), "=&d" (edx), "=&r" (tmp1)
|
|
: "a" (1));
|
|
#elif defined(Q_OS_WIN)
|
|
int info[4];
|
|
__cpuid(info, 1);
|
|
ecx = info[2];
|
|
edx = info[3];
|
|
#endif
|
|
}
|
|
|
|
#ifdef Q_OS_WIN
|
|
inline void __cpuidex(int info[4], int, __int64) { memset(info, 0, 4*sizeof(int));}
|
|
#endif
|
|
|
|
static void cpuidFeatures07_00(uint &ebx)
|
|
{
|
|
#if defined(Q_CC_GNU)
|
|
qregisteruint rbx; // in case it's 64-bit
|
|
asm ("xchg " PICreg", %0\n"
|
|
"cpuid\n"
|
|
"xchg " PICreg", %0\n"
|
|
: "=&r" (rbx)
|
|
: "a" (7), "c" (0)
|
|
: "%edx");
|
|
ebx = rbx;
|
|
#elif defined(Q_OS_WIN)
|
|
int info[4];
|
|
__cpuidex(info, 7, 0);
|
|
ebx = info[1];
|
|
#endif
|
|
}
|
|
|
|
#ifdef Q_OS_WIN
|
|
// fallback overload in case this intrinsic does not exist: unsigned __int64 _xgetbv(unsigned int);
|
|
inline quint64 _xgetbv(__int64) { return 0; }
|
|
#endif
|
|
static void xgetbv(uint in, uint &eax, uint &edx)
|
|
{
|
|
#if defined(Q_CC_GNU)
|
|
asm (".byte 0x0F, 0x01, 0xD0" // xgetbv instruction
|
|
: "=a" (eax), "=d" (edx)
|
|
: "c" (in));
|
|
#elif defined(Q_OS_WIN)
|
|
quint64 result = _xgetbv(in);
|
|
eax = result;
|
|
edx = result >> 32;
|
|
#endif
|
|
}
|
|
|
|
static inline uint detectProcessorFeatures()
|
|
{
|
|
// Flags from the CR0 / XCR0 state register
|
|
enum XCR0Flags {
|
|
X87 = 1 << 0,
|
|
XMM0_15 = 1 << 1,
|
|
YMM0_15Hi128 = 1 << 2,
|
|
BNDRegs = 1 << 3,
|
|
BNDCSR = 1 << 4,
|
|
OpMask = 1 << 5,
|
|
ZMM0_15Hi256 = 1 << 6,
|
|
ZMM16_31 = 1 << 7,
|
|
|
|
SSEState = XMM0_15,
|
|
AVXState = XMM0_15 | YMM0_15Hi128,
|
|
AVX512State = AVXState | OpMask | ZMM0_15Hi256 | ZMM16_31
|
|
};
|
|
|
|
uint features = 0;
|
|
int cpuidLevel = maxBasicCpuidSupported();
|
|
if (cpuidLevel < 1)
|
|
return 0;
|
|
|
|
uint cpuid01ECX = 0, cpuid01EDX = 0;
|
|
cpuidFeatures01(cpuid01ECX, cpuid01EDX);
|
|
#if defined(Q_PROCESSOR_X86_32)
|
|
// x86 might not have SSE2 support
|
|
if (cpuid01EDX & (1u << 26))
|
|
features |= SSE2;
|
|
// we should verify that the OS enabled saving of the SSE state...
|
|
#else
|
|
// x86-64 or x32
|
|
features = SSE2;
|
|
#endif
|
|
|
|
// common part between 32- and 64-bit
|
|
if (cpuid01ECX & (1u))
|
|
features |= SSE3;
|
|
if (cpuid01ECX & (1u << 9))
|
|
features |= SSSE3;
|
|
if (cpuid01ECX & (1u << 19))
|
|
features |= SSE4_1;
|
|
if (cpuid01ECX & (1u << 20))
|
|
features |= SSE4_2;
|
|
if (cpuid01ECX & (1u << 25))
|
|
features |= 0; // AES, enable if needed
|
|
|
|
uint xgetbvA = 0, xgetbvD = 0;
|
|
if (cpuid01ECX & (1u << 27)) {
|
|
// XGETBV enabled
|
|
xgetbv(0, xgetbvA, xgetbvD);
|
|
}
|
|
|
|
uint cpuid0700EBX = 0;
|
|
if (cpuidLevel >= 7)
|
|
cpuidFeatures07_00(cpuid0700EBX);
|
|
|
|
if ((xgetbvA & AVXState) == AVXState) {
|
|
// support for YMM and XMM registers is enabled
|
|
if (cpuid01ECX & (1u << 28))
|
|
features |= AVX;
|
|
|
|
if (cpuid0700EBX & (1u << 5))
|
|
features |= AVX2;
|
|
}
|
|
|
|
if (cpuid0700EBX & (1u << 4))
|
|
features |= HLE; // Hardware Lock Ellision
|
|
if (cpuid0700EBX & (1u << 11))
|
|
features |= RTM; // Restricted Transactional Memory
|
|
|
|
return features;
|
|
}
|
|
|
|
#elif defined(Q_PROCESSOR_MIPS_32)
|
|
|
|
#if defined(Q_OS_LINUX)
|
|
//
|
|
// Do not use QByteArray: it could use SIMD instructions itself at
|
|
// some point, thus creating a recursive dependency. Instead, use a
|
|
// QSimpleBuffer, which has the bare minimum needed to use memory
|
|
// dynamically and read lines from /proc/cpuinfo of arbitrary sizes.
|
|
//
|
|
struct QSimpleBuffer {
|
|
static const int chunk_size = 256;
|
|
char *data;
|
|
unsigned alloc;
|
|
unsigned size;
|
|
|
|
QSimpleBuffer(): data(0), alloc(0), size(0) {}
|
|
~QSimpleBuffer() { ::free(data); }
|
|
|
|
void resize(unsigned newsize) {
|
|
if (newsize > alloc) {
|
|
unsigned newalloc = chunk_size * ((newsize / chunk_size) + 1);
|
|
if (newalloc < newsize) newalloc = newsize;
|
|
if (newalloc != alloc) {
|
|
data = static_cast<char*>(::realloc(data, newalloc));
|
|
alloc = newalloc;
|
|
}
|
|
}
|
|
size = newsize;
|
|
}
|
|
void append(const QSimpleBuffer &other, unsigned appendsize) {
|
|
unsigned oldsize = size;
|
|
resize(oldsize + appendsize);
|
|
::memcpy(data + oldsize, other.data, appendsize);
|
|
}
|
|
void popleft(unsigned amount) {
|
|
if (amount >= size) return resize(0);
|
|
size -= amount;
|
|
::memmove(data, data + amount, size);
|
|
}
|
|
char* cString() {
|
|
if (!alloc) resize(1);
|
|
return (data[size] = '\0', data);
|
|
}
|
|
};
|
|
|
|
//
|
|
// Uses a scratch "buffer" (which must be used for all reads done in the
|
|
// same file descriptor) to read chunks of data from a file, to read
|
|
// one line at a time. Lines include the trailing newline character ('\n').
|
|
// On EOF, line.size is zero.
|
|
//
|
|
static void bufReadLine(int fd, QSimpleBuffer &line, QSimpleBuffer &buffer)
|
|
{
|
|
for (;;) {
|
|
char *newline = static_cast<char*>(::memchr(buffer.data, '\n', buffer.size));
|
|
if (newline) {
|
|
unsigned piece_size = newline - buffer.data + 1;
|
|
line.append(buffer, piece_size);
|
|
buffer.popleft(piece_size);
|
|
line.resize(line.size - 1);
|
|
return;
|
|
}
|
|
if (buffer.size + QSimpleBuffer::chunk_size > buffer.alloc) {
|
|
int oldsize = buffer.size;
|
|
buffer.resize(buffer.size + QSimpleBuffer::chunk_size);
|
|
buffer.size = oldsize;
|
|
}
|
|
ssize_t read_bytes = ::qt_safe_read(fd, buffer.data + buffer.size, QSimpleBuffer::chunk_size);
|
|
if (read_bytes > 0) buffer.size += read_bytes;
|
|
else return;
|
|
}
|
|
}
|
|
|
|
//
|
|
// Checks if any line with a given prefix from /proc/cpuinfo contains
|
|
// a certain string, surrounded by spaces.
|
|
//
|
|
static bool procCpuinfoContains(const char *prefix, const char *string)
|
|
{
|
|
int cpuinfo_fd = ::qt_safe_open("/proc/cpuinfo", O_RDONLY);
|
|
if (cpuinfo_fd == -1)
|
|
return false;
|
|
|
|
unsigned string_len = ::strlen(string);
|
|
unsigned prefix_len = ::strlen(prefix);
|
|
QSimpleBuffer line, buffer;
|
|
bool present = false;
|
|
do {
|
|
line.resize(0);
|
|
bufReadLine(cpuinfo_fd, line, buffer);
|
|
char *colon = static_cast<char*>(::memchr(line.data, ':', line.size));
|
|
if (colon && line.size > prefix_len + string_len) {
|
|
if (!::strncmp(prefix, line.data, prefix_len)) {
|
|
// prefix matches, next character must be ':' or space
|
|
if (line.data[prefix_len] == ':' || ::isspace(line.data[prefix_len])) {
|
|
// Does it contain the string?
|
|
char *found = ::strstr(line.cString(), string);
|
|
if (found && ::isspace(found[-1]) &&
|
|
(::isspace(found[string_len]) || found[string_len] == '\0')) {
|
|
present = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} while (line.size);
|
|
|
|
::qt_safe_close(cpuinfo_fd);
|
|
return present;
|
|
}
|
|
#endif
|
|
|
|
static inline uint detectProcessorFeatures()
|
|
{
|
|
// NOTE: MIPS 74K cores are the only ones supporting DSPr2.
|
|
uint flags = 0;
|
|
|
|
#if defined __mips_dsp
|
|
flags |= DSP;
|
|
# if defined __mips_dsp_rev && __mips_dsp_rev >= 2
|
|
flags |= DSPR2;
|
|
# elif defined(Q_OS_LINUX)
|
|
if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
|
|
flags |= DSPR2;
|
|
# endif
|
|
#elif defined(Q_OS_LINUX)
|
|
if (procCpuinfoContains("ASEs implemented", "dsp")) {
|
|
flags |= DSP;
|
|
if (procCpuinfoContains("cpu model", "MIPS 74Kc") || procCpuinfoContains("cpu model", "MIPS 74Kf"))
|
|
flags |= DSPR2;
|
|
}
|
|
#endif
|
|
|
|
return flags;
|
|
}
|
|
|
|
#else
|
|
static inline uint detectProcessorFeatures()
|
|
{
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Use kdesdk/scripts/generate_string_table.pl to update the table below.
|
|
* Here's the data (don't forget the ONE leading space):
|
|
|
|
neon
|
|
sse2
|
|
sse3
|
|
ssse3
|
|
sse4.1
|
|
sse4.2
|
|
avx
|
|
avx2
|
|
hle
|
|
rtm
|
|
dsp
|
|
dspr2
|
|
*/
|
|
|
|
// begin generated
|
|
static const char features_string[] =
|
|
"\0"
|
|
" neon\0"
|
|
" sse2\0"
|
|
" sse3\0"
|
|
" ssse3\0"
|
|
" sse4.1\0"
|
|
" sse4.2\0"
|
|
" avx\0"
|
|
" avx2\0"
|
|
" hle\0"
|
|
" rtm\0"
|
|
" dsp\0"
|
|
" dspr2\0"
|
|
"\0";
|
|
|
|
static const int features_indices[] = {
|
|
0, 1, 7, 13, 19, 26, 34, 42,
|
|
47, 53, 58, 63, 68, -1
|
|
};
|
|
// end generated
|
|
|
|
static const int features_count = (sizeof features_indices - 1) / (sizeof features_indices[0]);
|
|
|
|
// record what CPU features were enabled by default in this Qt build
|
|
// don't define for HLE, since the HLE prefix can be run on older CPUs
|
|
static const uint minFeature = qCompilerCpuFeatures & ~HLE;
|
|
|
|
#ifdef Q_OS_WIN
|
|
#if defined(Q_CC_GNU)
|
|
# define ffs __builtin_ffs
|
|
#else
|
|
int ffs(int i)
|
|
{
|
|
#ifndef Q_OS_WINCE
|
|
unsigned long result;
|
|
return _BitScanForward(&result, i) ? result : 0;
|
|
#else
|
|
return 0;
|
|
#endif
|
|
}
|
|
#endif
|
|
#endif // Q_OS_WIN
|
|
|
|
QBasicAtomicInt qt_cpu_features = Q_BASIC_ATOMIC_INITIALIZER(0);
|
|
|
|
void qDetectCpuFeatures()
|
|
{
|
|
#if defined(Q_CC_GNU) && !defined(Q_CC_CLANG) && !defined(Q_CC_INTEL)
|
|
# if (__GNUC__ * 100 + __GNUC_MINOR__) < 403
|
|
// GCC 4.2 (at least the one that comes with Apple's XCode, on Mac) is
|
|
// known to be broken beyond repair in dealing with the inline assembly
|
|
// above. It will generate bad code that could corrupt important registers
|
|
// like the PIC register. The behaviour of code after this function would
|
|
// be totally unpredictable.
|
|
//
|
|
// For that reason, simply forego the CPUID check at all and return the set
|
|
// of features that we found at compile time, through the #defines from the
|
|
// compiler. This should at least allow code to execute, even if none of
|
|
// the specialized code found in Qt GUI and elsewhere will ever be enabled
|
|
// (it's the user's fault for using a broken compiler).
|
|
//
|
|
// This also disables the runtime checking that the processor actually
|
|
// contains all the features that the code required. Qt 4 ran for years
|
|
// like that, so it shouldn't be a problem.
|
|
|
|
qt_cpu_features.store(minFeature | QSimdInitialized);
|
|
return;
|
|
# endif
|
|
#endif
|
|
uint f = detectProcessorFeatures();
|
|
QByteArray disable = qgetenv("QT_NO_CPU_FEATURE");
|
|
if (!disable.isEmpty()) {
|
|
disable.prepend(' ');
|
|
for (int i = 0; i < features_count; ++i) {
|
|
if (disable.contains(features_string + features_indices[i]))
|
|
f &= ~(1 << i);
|
|
}
|
|
}
|
|
|
|
if (minFeature != 0 && (f & minFeature) != minFeature) {
|
|
uint missing = minFeature & ~f;
|
|
fprintf(stderr, "Incompatible processor. This Qt build requires the following features:\n ");
|
|
for (int i = 0; i < features_count; ++i) {
|
|
if (missing & (1 << i))
|
|
fprintf(stderr, "%s", features_string + features_indices[i]);
|
|
}
|
|
fprintf(stderr, "\n");
|
|
fflush(stderr);
|
|
qFatal("Aborted. Incompatible processor: missing feature 0x%x -%s.", missing,
|
|
features_string + features_indices[ffs(missing) - 1]);
|
|
}
|
|
|
|
qt_cpu_features.store(f | QSimdInitialized);
|
|
}
|
|
|
|
void qDumpCPUFeatures()
|
|
{
|
|
uint features = qCpuFeatures();
|
|
printf("Processor features: ");
|
|
for (int i = 0; i < features_count; ++i) {
|
|
if (features & (1 << i))
|
|
printf("%s%s", features_string + features_indices[i],
|
|
minFeature & (1 << i) ? "[required]" : "");
|
|
}
|
|
puts("");
|
|
}
|
|
|
|
QT_END_NAMESPACE
|