2 Copyright (C) 2012 Paul Davis
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 #include "libpbd-config.h"
22 #define _XOPEN_SOURCE 600
23 #include <cstring> // for memset
28 #ifdef PLATFORM_WINDOWS
32 #include "pbd/compose.h"
34 #include "pbd/error.h"
41 FPU* FPU::_instance (0);
43 #if ( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // ARCH_X86
44 #ifndef PLATFORM_WINDOWS
46 /* use __cpuid() as the name to match the MSVC/mingw intrinsic */
49 __cpuid(int regs[4], int cpuid_leaf)
51 int eax, ebx, ecx, edx;
65 :"=m" (eax), "=m" (ebx), "=m" (ecx), "=m" (edx)
68 #if !defined(__i386__)
79 #endif /* !PLATFORM_WINDOWS */
84 _xgetbv (uint32_t xcr)
87 /* it would be nice to make this work on OS X but as long we use veclib,
88 we don't really need to know about SSE/AVX on that platform.
93 __asm__ volatile ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (xcr));
94 return (static_cast<uint64_t>(edx) << 32) | eax;
98 #endif /* !COMPILER_MSVC */
101 #ifndef _XCR_XFEATURE_ENABLED_MASK
102 #define _XCR_XFEATURE_ENABLED_MASK 0
119 error << _("FPU object instantiated more than once") << endmsg;
122 #if !( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // !ARCH_X86
123 /* Non-Intel architecture, nothing to do here */
127 /* Get the CPU vendor just for kicks */
129 // __cpuid with an InfoType argument of 0 returns the number of
130 // valid Ids in CPUInfo[0] and the CPU identification string in
131 // the other three array elements. The CPU identification string is
132 // not in linear order. The code below arranges the information
133 // in a human readable form. The human readable order is CPUInfo[1] |
134 // CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped
135 // before using memcpy to copy these three array elements to cpu_string.
141 __cpuid (cpu_info, 0);
143 int num_ids = cpu_info[0];
144 std::swap(cpu_info[2], cpu_info[3]);
145 memcpy(cpu_string, &cpu_info[1], 3 * sizeof(cpu_info[1]));
146 cpu_vendor.assign(cpu_string, 3 * sizeof(cpu_info[1]));
148 info << string_compose (_("CPU vendor: %1"), cpu_vendor) << endmsg;
152 /* Now get CPU/FPU flags */
154 __cpuid (cpu_info, 1);
156 if ((cpu_info[2] & (1<<27)) /* AVX */ &&
157 (cpu_info[2] & (1<<28) /* (OS)XSAVE */) &&
158 (_xgetbv (_XCR_XFEATURE_ENABLED_MASK) & 0x6)) { /* OS really supports XSAVE */
159 info << _("AVX-capable processor") << endmsg;
160 _flags = Flags (_flags | (HasAVX) );
163 if (cpu_info[3] & (1<<25)) {
164 _flags = Flags (_flags | (HasSSE|HasFlushToZero));
167 if (cpu_info[3] & (1<<26)) {
168 _flags = Flags (_flags | HasSSE2);
171 /* Figure out CPU/FPU denormal handling capabilities */
173 if (cpu_info[3] & (1 << 24)) {
177 /* DAZ wasn't available in the first version of SSE. Since
178 setting a reserved bit in MXCSR causes a general protection
179 fault, we need to be able to check the availability of this
180 feature without causing problems. To do this, one needs to
181 set up a 512-byte area of memory to save the SSE state to,
182 using fxsave, and then one needs to inspect bytes 28 through
183 31 for the MXCSR_MASK value. If bit 6 is set, DAZ is
184 supported, otherwise, it isn't.
187 #ifndef HAVE_POSIX_MEMALIGN
188 # ifdef PLATFORM_WINDOWS
189 fxbuf = (char **) _aligned_malloc (sizeof (char *), 16);
191 *fxbuf = (char *) _aligned_malloc (512, 16);
194 # warning using default malloc for aligned memory
195 fxbuf = (char **) malloc (sizeof (char *));
197 *fxbuf = (char *) malloc (512);
201 (void) posix_memalign ((void **) &fxbuf, 16, sizeof (char *));
203 (void) posix_memalign ((void **) fxbuf, 16, 512);
207 memset (*fxbuf, 0, 512);
224 uint32_t mxcsr_mask = *((uint32_t*) &((*fxbuf)[28]));
226 /* if the mask is zero, set its default value (from intel specs) */
228 if (mxcsr_mask == 0) {
232 if (mxcsr_mask & (1<<6)) {
233 _flags = Flags (_flags | HasDenormalsAreZero);
236 #if !defined HAVE_POSIX_MEMALIGN && defined PLATFORM_WINDOWS
237 _aligned_free (*fxbuf);
238 _aligned_free (fxbuf);
245 /* finally get the CPU brand */
247 __cpuid (cpu_info, 0x80000000);
249 const int parameter_end = 0x80000004;
252 if (cpu_info[0] >= parameter_end) {
253 char* cpu_string_ptr = cpu_string;
255 for (int parameter = 0x80000002; parameter <= parameter_end &&
256 cpu_string_ptr < &cpu_string[sizeof(cpu_string)]; parameter++) {
257 __cpuid(cpu_info, parameter);
258 memcpy(cpu_string_ptr, cpu_info, sizeof(cpu_info));
259 cpu_string_ptr += sizeof(cpu_info);
261 cpu_brand.assign(cpu_string, cpu_string_ptr - cpu_string);
262 info << string_compose (_("CPU brand: %1"), cpu_brand) << endmsg;
265 #endif /* !ARCH_X86 */