2 * Copyright (C) 2007-2016 Paul Davis <paul@linuxaudiosystems.com>
3 * Copyright (C) 2009-2012 David Robillard <d@drobilla.net>
4 * Copyright (C) 2013-2015 John Emmas <john@creativepost.co.uk>
5 * Copyright (C) 2015-2019 Robin Gareus <robin@gareus.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
22 #include "libpbd-config.h"
24 #define _XOPEN_SOURCE 600
25 #include <cstring> // for memset
30 #ifdef PLATFORM_WINDOWS
34 #include "pbd/compose.h"
36 #include "pbd/error.h"
43 FPU* FPU::_instance (0);
45 #if ( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // ARCH_X86
46 #ifndef PLATFORM_WINDOWS
48 /* use __cpuid() as the name to match the MSVC/mingw intrinsic */
51 __cpuid(int regs[4], int cpuid_leaf)
58 "movl %%eax, (%1);\n\t"
59 "movl %%ebx, 4(%1);\n\t"
60 "movl %%ecx, 8(%1);\n\t"
61 "movl %%edx, 12(%1);\n\t"
65 :"=a" (cpuid_leaf) /* %eax clobbered by CPUID */
66 :"S" (regs), "a" (cpuid_leaf)
68 #if !defined(__i386__)
71 "%ecx", "%edx", "memory");
74 #endif /* !PLATFORM_WINDOWS */
76 #ifndef HAVE_XGETBV // Allow definition by build system
77 #if defined(__MINGW32__) && defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR >= 5
79 #elif defined(_MSC_VER) && _MSC_VER >= 1600
80 // '_xgetbv()' was only available from VC10 onwards
89 // '_xgetbv()' was only available from VC10 onwards
90 __declspec(noinline) static uint64_t
91 _xgetbv (uint32_t xcr)
95 // N.B. The following would probably work for a pre-VC10 build,
96 // although it might suffer from optimization issues. We'd need
97 // to place this function into its own (unoptimized) source file.
100 __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 /*xgetbv*/
107 _xgetbv (uint32_t xcr)
110 /* it would be nice to make this work on OS X but as long we use veclib,
111 we don't really need to know about SSE/AVX on that platform.
116 __asm__ volatile ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (xcr));
117 return (static_cast<uint64_t>(edx) << 32) | eax;
121 #endif /* !COMPILER_MSVC */
122 #endif /* !HAVE_XGETBV */
123 #endif /* ARCH_X86 */
125 #ifndef _XCR_XFEATURE_ENABLED_MASK
126 #define _XCR_XFEATURE_ENABLED_MASK 0
150 error << _("FPU object instantiated more than once") << endmsg;
153 if (getenv("ARDOUR_FPU_FLAGS")) {
154 _flags = Flags (atoi (getenv("ARDOUR_FPU_FLAGS")));
158 #if !( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // !ARCH_X86
159 /* Non-Intel architecture, nothing to do here */
163 /* Get the CPU vendor just for kicks
165 * __cpuid with an InfoType argument of 0 returns the number of
166 * valid Ids in CPUInfo[0] and the CPU identification string in
167 * the other three array elements. The CPU identification string is
168 * not in linear order. The code below arranges the information
169 * in a human readable form. The human readable order is CPUInfo[1] |
170 * CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped
171 * before using memcpy to copy these three array elements to cpu_string.
178 __cpuid (cpu_info, 0);
180 int num_ids = cpu_info[0];
181 std::swap(cpu_info[2], cpu_info[3]);
182 memcpy(cpu_string, &cpu_info[1], 3 * sizeof(cpu_info[1]));
183 cpu_vendor.assign(cpu_string, 3 * sizeof(cpu_info[1]));
185 info << string_compose (_("CPU vendor: %1"), cpu_vendor) << endmsg;
189 /* Now get CPU/FPU flags */
191 __cpuid (cpu_info, 1);
193 if ((cpu_info[2] & (1<<27)) /* OSXSAVE */ &&
194 (cpu_info[2] & (1<<28) /* AVX */) &&
195 ((_xgetbv (_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6)) { /* OS really supports XSAVE */
196 info << _("AVX-capable processor") << endmsg;
197 _flags = Flags (_flags | (HasAVX) );
200 if (cpu_info[3] & (1<<25)) {
201 _flags = Flags (_flags | (HasSSE|HasFlushToZero));
204 if (cpu_info[3] & (1<<26)) {
205 _flags = Flags (_flags | HasSSE2);
208 /* Figure out CPU/FPU denormal handling capabilities */
210 if (cpu_info[3] & (1 << 24)) {
214 /* DAZ wasn't available in the first version of SSE. Since
215 setting a reserved bit in MXCSR causes a general protection
216 fault, we need to be able to check the availability of this
217 feature without causing problems. To do this, one needs to
218 set up a 512-byte area of memory to save the SSE state to,
219 using fxsave, and then one needs to inspect bytes 28 through
220 31 for the MXCSR_MASK value. If bit 6 is set, DAZ is
221 supported, otherwise, it isn't.
224 #ifndef HAVE_POSIX_MEMALIGN
225 # ifdef PLATFORM_WINDOWS
226 fxbuf = (char **) _aligned_malloc (sizeof (char *), 16);
228 *fxbuf = (char *) _aligned_malloc (512, 16);
231 # warning using default malloc for aligned memory
232 fxbuf = (char **) malloc (sizeof (char *));
234 *fxbuf = (char *) malloc (512);
238 (void) posix_memalign ((void **) &fxbuf, 16, sizeof (char *));
240 (void) posix_memalign ((void **) fxbuf, 16, 512);
244 memset (*fxbuf, 0, 512);
261 uint32_t mxcsr_mask = *((uint32_t*) &((*fxbuf)[28]));
263 /* if the mask is zero, set its default value (from intel specs) */
265 if (mxcsr_mask == 0) {
269 if (mxcsr_mask & (1<<6)) {
270 _flags = Flags (_flags | HasDenormalsAreZero);
273 #if !defined HAVE_POSIX_MEMALIGN && defined PLATFORM_WINDOWS
274 _aligned_free (*fxbuf);
275 _aligned_free (fxbuf);
282 /* finally get the CPU brand */
284 __cpuid (cpu_info, 0x80000000);
286 const int parameter_end = 0x80000004;
289 if (cpu_info[0] >= parameter_end) {
290 char* cpu_string_ptr = cpu_string;
292 for (int parameter = 0x80000002; parameter <= parameter_end &&
293 cpu_string_ptr < &cpu_string[sizeof(cpu_string)]; parameter++) {
294 __cpuid(cpu_info, parameter);
295 memcpy(cpu_string_ptr, cpu_info, sizeof(cpu_info));
296 cpu_string_ptr += sizeof(cpu_info);
298 cpu_brand.assign(cpu_string, cpu_string_ptr - cpu_string);
299 info << string_compose (_("CPU brand: %1"), cpu_brand) << endmsg;
302 #endif /* !ARCH_X86 */