X-Git-Url: https://main.carlh.net/gitweb/?a=blobdiff_plain;f=libs%2Fpbd%2Ffpu.cc;h=7c3e0978812e28affd3cb68e661def7adf401484;hb=cf52d6e4b40111eb04b244ec054055a4ec15dbe0;hp=c297d2dd0734547dd4fe926f99c7506d20a1f11e;hpb=a5d7e8446bdc488be9d52cede7f49ad9910cc127;p=ardour.git diff --git a/libs/pbd/fpu.cc b/libs/pbd/fpu.cc index c297d2dd07..7c3e097881 100644 --- a/libs/pbd/fpu.cc +++ b/libs/pbd/fpu.cc @@ -1,5 +1,5 @@ /* - Copyright (C) 2012 Paul Davis + Copyright (C) 2012 Paul Davis This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,142 +25,267 @@ #include #include +#ifdef PLATFORM_WINDOWS +#include +#endif + +#include "pbd/compose.h" #include "pbd/fpu.h" #include "pbd/error.h" -#include "i18n.h" +#include "pbd/i18n.h" using namespace PBD; using namespace std; -FPU::FPU () +FPU* FPU::_instance (0); + +#if ( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // ARCH_X86 +#ifndef PLATFORM_WINDOWS + +/* use __cpuid() as the name to match the MSVC/mingw intrinsic */ + +static void +__cpuid(int regs[4], int cpuid_leaf) { - unsigned long cpuflags = 0; + asm volatile ( +#if defined(__i386__) + "pushl %%ebx;\n\t" +#endif + "cpuid;\n\t" + "movl %%eax, (%1);\n\t" + "movl %%ebx, 4(%1);\n\t" + "movl %%ecx, 8(%1);\n\t" + "movl %%edx, 12(%1);\n\t" +#if defined(__i386__) + "popl %%ebx;\n\t" +#endif + :"=a" (cpuid_leaf) /* %eax clobbered by CPUID */ + :"S" (regs), "a" (cpuid_leaf) + : +#if !defined(__i386__) + "%ebx", +#endif + "%ecx", "%edx", "memory"); +} - _flags = Flags (0); +#endif /* !PLATFORM_WINDOWS */ -#if !( (defined __x86_64__) || (defined __i386__) ) // !ARCH_X86 - return; +#ifndef COMPILER_MSVC + +static uint64_t +_xgetbv (uint32_t xcr) +{ +#ifdef __APPLE__ + /* it would be nice to make this work on OS X but as long we use veclib, + we don't really need to know about SSE/AVX on that platform. + */ + return 0; #else + uint32_t eax, edx; + __asm__ volatile ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (xcr)); + return (static_cast(edx) << 32) | eax; +#endif +} -#ifdef PLATFORM_WINDOWS +#elif _MSC_VER < 1600 -#ifndef USE_X86_64_ASM - /* no 32 bit version of assembler for windows */ - return; -#else - // Get CPU flags using Microsoft function - // It works for both 64 and 32 bit systems - // no need to use assembler for getting info from register, this function does this for us - int cpuInfo[4]; - __cpuid (cpuInfo, 1); - cpuflags = cpuInfo[3]; +// '_xgetbv()' was only available from VC10 onwards +__declspec(noinline) static uint64_t +_xgetbv (uint32_t xcr) +{ + return 0; + + // N.B. The following would probably work for a pre-VC10 build, + // although it might suffer from optimization issues. We'd need + // to place this function into its own (unoptimized) source file. + __asm { + mov ecx, [xcr] + __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 /*xgetbv*/ + } +} + +#endif /* !COMPILER_MSVC */ +#endif /* ARCH_X86 */ + +#ifndef _XCR_XFEATURE_ENABLED_MASK +#define _XCR_XFEATURE_ENABLED_MASK 0 #endif -#else - -#ifndef USE_X86_64_ASM /* *nix; 32 bit version */ - - asm volatile ( - "mov $1, %%eax\n" - "pushl %%ebx\n" - "cpuid\n" - "movl %%edx, %0\n" - "popl %%ebx\n" - : "=r" (cpuflags) - : - : "%eax", "%ecx", "%edx" - ); - -#else /* *nix; 64 bit version */ - - /* asm notes: although we explicitly save&restore rbx, we must tell - gcc that ebx,rbx is clobbered so that it doesn't try to use it as an intermediate - register when storing rbx. gcc 4.3 didn't make this "mistake", but gcc 4.4 - does, at least on x86_64. - */ - - asm volatile ( - "pushq %%rbx\n" - "movq $1, %%rax\n" - "cpuid\n" - "movq %%rdx, %0\n" - "popq %%rbx\n" - : "=r" (cpuflags) - : - : "%rax", "%rbx", "%rcx", "%rdx" - ); - -#endif /* USE_X86_64_ASM */ -#endif /* PLATFORM_WINDOWS */ - - if (cpuflags & (1<<25)) { - _flags = Flags (_flags | (HasSSE|HasFlushToZero)); +FPU* +FPU::instance() +{ + if (!_instance) { + _instance = new FPU; + } + + return _instance; +} + +void +FPU::destroy () +{ + delete _instance; + _instance = 0; +} + +FPU::FPU () + : _flags ((Flags) 0) +{ + if (_instance) { + error << _("FPU object instantiated more than once") << endmsg; } - if (cpuflags & (1<<26)) { - _flags = Flags (_flags | HasSSE2); + if (getenv("ARDOUR_FPU_FLAGS")) { + _flags = Flags (atoi (getenv("ARDOUR_FPU_FLAGS"))); + return; } - if (cpuflags & (1 << 24)) { - - char** fxbuf = 0; - - /* DAZ wasn't available in the first version of SSE. Since - setting a reserved bit in MXCSR causes a general protection - fault, we need to be able to check the availability of this - feature without causing problems. To do this, one needs to - set up a 512-byte area of memory to save the SSE state to, - using fxsave, and then one needs to inspect bytes 28 through - 31 for the MXCSR_MASK value. If bit 6 is set, DAZ is - supported, otherwise, it isn't. - */ - +#if !( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // !ARCH_X86 + /* Non-Intel architecture, nothing to do here */ + return; +#else + + /* Get the CPU vendor just for kicks */ + + // __cpuid with an InfoType argument of 0 returns the number of + // valid Ids in CPUInfo[0] and the CPU identification string in + // the other three array elements. The CPU identification string is + // not in linear order. The code below arranges the information + // in a human readable form. The human readable order is CPUInfo[1] | + // CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped + // before using memcpy to copy these three array elements to cpu_string. + + int cpu_info[4]; + char cpu_string[48]; + string cpu_vendor; + + __cpuid (cpu_info, 0); + + int num_ids = cpu_info[0]; + std::swap(cpu_info[2], cpu_info[3]); + memcpy(cpu_string, &cpu_info[1], 3 * sizeof(cpu_info[1])); + cpu_vendor.assign(cpu_string, 3 * sizeof(cpu_info[1])); + + info << string_compose (_("CPU vendor: %1"), cpu_vendor) << endmsg; + + if (num_ids > 0) { + + /* Now get CPU/FPU flags */ + + __cpuid (cpu_info, 1); + + if ((cpu_info[2] & (1<<27)) /* OSXSAVE */ && + (cpu_info[2] & (1<<28) /* AVX */) && + ((_xgetbv (_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6)) { /* OS really supports XSAVE */ + info << _("AVX-capable processor") << endmsg; + _flags = Flags (_flags | (HasAVX) ); + } + + if (cpu_info[3] & (1<<25)) { + _flags = Flags (_flags | (HasSSE|HasFlushToZero)); + } + + if (cpu_info[3] & (1<<26)) { + _flags = Flags (_flags | HasSSE2); + } + + /* Figure out CPU/FPU denormal handling capabilities */ + + if (cpu_info[3] & (1 << 24)) { + + char** fxbuf = 0; + + /* DAZ wasn't available in the first version of SSE. Since + setting a reserved bit in MXCSR causes a general protection + fault, we need to be able to check the availability of this + feature without causing problems. To do this, one needs to + set up a 512-byte area of memory to save the SSE state to, + using fxsave, and then one needs to inspect bytes 28 through + 31 for the MXCSR_MASK value. If bit 6 is set, DAZ is + supported, otherwise, it isn't. + */ + #ifndef HAVE_POSIX_MEMALIGN - fxbuf = (char **) malloc (sizeof (char *)); - assert (fxbuf); - *fxbuf = (char *) malloc (512); - assert (*fxbuf); +# ifdef PLATFORM_WINDOWS + fxbuf = (char **) _aligned_malloc (sizeof (char *), 16); + assert (fxbuf); + *fxbuf = (char *) _aligned_malloc (512, 16); + assert (*fxbuf); +# else +# warning using default malloc for aligned memory + fxbuf = (char **) malloc (sizeof (char *)); + assert (fxbuf); + *fxbuf = (char *) malloc (512); + assert (*fxbuf); +# endif #else - (void) posix_memalign ((void **) &fxbuf, 16, sizeof (char *)); - assert (fxbuf); - (void) posix_memalign ((void **) fxbuf, 16, 512); - assert (*fxbuf); -#endif - - memset (*fxbuf, 0, 512); - + (void) posix_memalign ((void **) &fxbuf, 16, sizeof (char *)); + assert (fxbuf); + (void) posix_memalign ((void **) fxbuf, 16, 512); + assert (*fxbuf); +#endif + + memset (*fxbuf, 0, 512); + #ifdef COMPILER_MSVC - __asm { - mov eax, fxbuf - fxsave [eax] - }; + char *buf = *fxbuf; + __asm { + mov eax, buf + fxsave [eax] + }; +#else + asm volatile ( + "fxsave (%0)" + : + : "r" (*fxbuf) + : "memory" + ); +#endif + + uint32_t mxcsr_mask = *((uint32_t*) &((*fxbuf)[28])); + + /* if the mask is zero, set its default value (from intel specs) */ + + if (mxcsr_mask == 0) { + mxcsr_mask = 0xffbf; + } + + if (mxcsr_mask & (1<<6)) { + _flags = Flags (_flags | HasDenormalsAreZero); + } + +#if !defined HAVE_POSIX_MEMALIGN && defined PLATFORM_WINDOWS + _aligned_free (*fxbuf); + _aligned_free (fxbuf); #else - asm volatile ( - "fxsave (%0)" - : - : "r" (*fxbuf) - : "memory" - ); + free (*fxbuf); + free (fxbuf); #endif - - uint32_t mxcsr_mask = *((uint32_t*) &((*fxbuf)[28])); - - /* if the mask is zero, set its default value (from intel specs) */ - - if (mxcsr_mask == 0) { - mxcsr_mask = 0xffbf; } - - if (mxcsr_mask & (1<<6)) { - _flags = Flags (_flags | HasDenormalsAreZero); - } - - free (*fxbuf); - free (fxbuf); + + /* finally get the CPU brand */ + + __cpuid (cpu_info, 0x80000000); + + const int parameter_end = 0x80000004; + string cpu_brand; + + if (cpu_info[0] >= parameter_end) { + char* cpu_string_ptr = cpu_string; + + for (int parameter = 0x80000002; parameter <= parameter_end && + cpu_string_ptr < &cpu_string[sizeof(cpu_string)]; parameter++) { + __cpuid(cpu_info, parameter); + memcpy(cpu_string_ptr, cpu_info, sizeof(cpu_info)); + cpu_string_ptr += sizeof(cpu_info); + } + cpu_brand.assign(cpu_string, cpu_string_ptr - cpu_string); + info << string_compose (_("CPU brand: %1"), cpu_brand) << endmsg; + } } -#endif -} +#endif /* !ARCH_X86 */ +} FPU::~FPU () {