X-Git-Url: https://main.carlh.net/gitweb/?a=blobdiff_plain;f=libs%2Fpbd%2Ffpu.cc;h=7c3e0978812e28affd3cb68e661def7adf401484;hb=cf52d6e4b40111eb04b244ec054055a4ec15dbe0;hp=7450b0d451da629c56693cbb22cbe6891950e18c;hpb=b6f9bbeb2fc3b31e612a8a8ba5a0c91f9f5642b4;p=ardour.git diff --git a/libs/pbd/fpu.cc b/libs/pbd/fpu.cc index 7450b0d451..7c3e097881 100644 --- a/libs/pbd/fpu.cc +++ b/libs/pbd/fpu.cc @@ -1,5 +1,5 @@ /* - Copyright (C) 2012 Paul Davis + Copyright (C) 2012 Paul Davis This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,48 +33,46 @@ #include "pbd/fpu.h" #include "pbd/error.h" -#include "i18n.h" +#include "pbd/i18n.h" using namespace PBD; using namespace std; FPU* FPU::_instance (0); -#ifndef COMPILER_MSVC +#if ( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // ARCH_X86 +#ifndef PLATFORM_WINDOWS -/* use __cpuid() as the name to match the MSVC intrinsic */ +/* use __cpuid() as the name to match the MSVC/mingw intrinsic */ static void __cpuid(int regs[4], int cpuid_leaf) { - int eax, ebx, ecx, edx; asm volatile ( #if defined(__i386__) "pushl %%ebx;\n\t" #endif - "movl %4, %%eax;\n\t" "cpuid;\n\t" - "movl %%eax, %0;\n\t" - "movl %%ebx, %1;\n\t" - "movl %%ecx, %2;\n\t" - "movl %%edx, %3;\n\t" + "movl %%eax, (%1);\n\t" + "movl %%ebx, 4(%1);\n\t" + "movl %%ecx, 8(%1);\n\t" + "movl %%edx, 12(%1);\n\t" #if defined(__i386__) "popl %%ebx;\n\t" #endif - :"=m" (eax), "=m" (ebx), "=m" (ecx), "=m" (edx) - :"r" (cpuid_leaf) - :"%eax", + :"=a" (cpuid_leaf) /* %eax clobbered by CPUID */ + :"S" (regs), "a" (cpuid_leaf) + : #if !defined(__i386__) "%ebx", #endif - "%ecx", "%edx"); - - regs[0] = eax; - regs[1] = ebx; - regs[2] = ecx; - regs[3] = edx; + "%ecx", "%edx", "memory"); } +#endif /* !PLATFORM_WINDOWS */ + +#ifndef COMPILER_MSVC + static uint64_t _xgetbv (uint32_t xcr) { @@ -90,9 +88,29 @@ _xgetbv (uint32_t xcr) #endif } -#define _XCR_XFEATURE_ENABLED_MASK 0 +#elif _MSC_VER < 1600 + +// '_xgetbv()' was only available from VC10 onwards +__declspec(noinline) static uint64_t +_xgetbv (uint32_t xcr) +{ + return 0; + + // N.B. The following would probably work for a pre-VC10 build, + // although it might suffer from optimization issues. We'd need + // to place this function into its own (unoptimized) source file. + __asm { + mov ecx, [xcr] + __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 /*xgetbv*/ + } +} #endif /* !COMPILER_MSVC */ +#endif /* ARCH_X86 */ + +#ifndef _XCR_XFEATURE_ENABLED_MASK +#define _XCR_XFEATURE_ENABLED_MASK 0 +#endif FPU* FPU::instance() @@ -104,6 +122,13 @@ FPU::instance() return _instance; } +void +FPU::destroy () +{ + delete _instance; + _instance = 0; +} + FPU::FPU () : _flags ((Flags) 0) { @@ -111,6 +136,11 @@ FPU::FPU () error << _("FPU object instantiated more than once") << endmsg; } + if (getenv("ARDOUR_FPU_FLAGS")) { + _flags = Flags (atoi (getenv("ARDOUR_FPU_FLAGS"))); + return; + } + #if !( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // !ARCH_X86 /* Non-Intel architecture, nothing to do here */ return; @@ -140,14 +170,14 @@ FPU::FPU () info << string_compose (_("CPU vendor: %1"), cpu_vendor) << endmsg; if (num_ids > 0) { - + /* Now get CPU/FPU flags */ - + __cpuid (cpu_info, 1); - if ((cpu_info[2] & (1<<27)) /* AVX */ && - (cpu_info[2] & (1<<28) /* (OS)XSAVE */) && - (_xgetbv (_XCR_XFEATURE_ENABLED_MASK) & 0x6)) { /* OS really supports XSAVE */ + if ((cpu_info[2] & (1<<27)) /* OSXSAVE */ && + (cpu_info[2] & (1<<28) /* AVX */) && + ((_xgetbv (_XCR_XFEATURE_ENABLED_MASK) & 0x6) == 0x6)) { /* OS really supports XSAVE */ info << _("AVX-capable processor") << endmsg; _flags = Flags (_flags | (HasAVX) ); } @@ -161,11 +191,11 @@ FPU::FPU () } /* Figure out CPU/FPU denormal handling capabilities */ - + if (cpu_info[3] & (1 << 24)) { - + char** fxbuf = 0; - + /* DAZ wasn't available in the first version of SSE. Since setting a reserved bit in MXCSR causes a general protection fault, we need to be able to check the availability of this @@ -194,10 +224,10 @@ FPU::FPU () assert (fxbuf); (void) posix_memalign ((void **) fxbuf, 16, 512); assert (*fxbuf); -#endif - +#endif + memset (*fxbuf, 0, 512); - + #ifdef COMPILER_MSVC char *buf = *fxbuf; __asm { @@ -212,19 +242,19 @@ FPU::FPU () : "memory" ); #endif - + uint32_t mxcsr_mask = *((uint32_t*) &((*fxbuf)[28])); - + /* if the mask is zero, set its default value (from intel specs) */ - + if (mxcsr_mask == 0) { mxcsr_mask = 0xffbf; } - + if (mxcsr_mask & (1<<6)) { _flags = Flags (_flags | HasDenormalsAreZero); - } - + } + #if !defined HAVE_POSIX_MEMALIGN && defined PLATFORM_WINDOWS _aligned_free (*fxbuf); _aligned_free (fxbuf); @@ -233,7 +263,6 @@ FPU::FPU () free (fxbuf); #endif } -#endif /* finally get the CPU brand */ @@ -241,10 +270,10 @@ FPU::FPU () const int parameter_end = 0x80000004; string cpu_brand; - + if (cpu_info[0] >= parameter_end) { char* cpu_string_ptr = cpu_string; - + for (int parameter = 0x80000002; parameter <= parameter_end && cpu_string_ptr < &cpu_string[sizeof(cpu_string)]; parameter++) { __cpuid(cpu_info, parameter); @@ -253,9 +282,10 @@ FPU::FPU () } cpu_brand.assign(cpu_string, cpu_string_ptr - cpu_string); info << string_compose (_("CPU brand: %1"), cpu_brand) << endmsg; - } + } } -} +#endif /* !ARCH_X86 */ +} FPU::~FPU () {