X-Git-Url: https://main.carlh.net/gitweb/?a=blobdiff_plain;ds=sidebyside;f=libs%2Fpbd%2Ffpu.cc;h=78bcd3d8121f0f71e79cd3ebcc700124ae7f8eb9;hb=659701c59b76bdcbfcf5c1b8ce8962eb3f6a1ca2;hp=9838b4565beb7b354e2d8b18ff054331983a66b8;hpb=22b07e0233a29d9633ffa825a79503befaf2e16e;p=ardour.git diff --git a/libs/pbd/fpu.cc b/libs/pbd/fpu.cc index 9838b4565b..78bcd3d812 100644 --- a/libs/pbd/fpu.cc +++ b/libs/pbd/fpu.cc @@ -33,7 +33,7 @@ #include "pbd/fpu.h" #include "pbd/error.h" -#include "i18n.h" +#include "pbd/i18n.h" using namespace PBD; using namespace std; @@ -48,54 +48,41 @@ FPU* FPU::_instance (0); static void __cpuid(int regs[4], int cpuid_leaf) { - int eax, ebx, ecx, edx; asm volatile ( #if defined(__i386__) "pushl %%ebx;\n\t" #endif - "movl %4, %%eax;\n\t" "cpuid;\n\t" - "movl %%eax, %0;\n\t" - "movl %%ebx, %1;\n\t" - "movl %%ecx, %2;\n\t" - "movl %%edx, %3;\n\t" + "movl %%eax, (%1);\n\t" + "movl %%ebx, 4(%1);\n\t" + "movl %%ecx, 8(%1);\n\t" + "movl %%edx, 12(%1);\n\t" #if defined(__i386__) "popl %%ebx;\n\t" #endif - :"=m" (eax), "=m" (ebx), "=m" (ecx), "=m" (edx) - :"r" (cpuid_leaf) - :"%eax", + :"=a" (cpuid_leaf) /* %eax clobbered by CPUID */ + :"S" (regs), "a" (cpuid_leaf) + : #if !defined(__i386__) "%ebx", #endif - "%ecx", "%edx"); - - regs[0] = eax; - regs[1] = ebx; - regs[2] = ecx; - regs[3] = edx; + "%ecx", "%edx", "memory"); } #endif /* !PLATFORM_WINDOWS */ -#ifndef COMPILER_MSVC - -static uint64_t -_xgetbv (uint32_t xcr) -{ -#ifdef __APPLE__ - /* it would be nice to make this work on OS X but as long we use veclib, - we don't really need to know about SSE/AVX on that platform. - */ - return 0; -#else - uint32_t eax, edx; - __asm__ volatile ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (xcr)); - return (static_cast(edx) << 32) | eax; +#ifndef HAVE_XGETBV // Allow definition by build system + #if defined(__MINGW32__) && defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR >= 5 + #define HAVE_XGETBV + #elif defined(_MSC_VER) && _MSC_VER >= 1600 + // '_xgetbv()' was only available from VC10 onwards + #define HAVE_XGETBV + #endif #endif -} -#elif _MSC_VER < 1600 +#ifndef HAVE_XGETBV + +#ifdef COMPILER_MSVC // '_xgetbv()' was only available from VC10 onwards __declspec(noinline) static uint64_t @@ -108,11 +95,29 @@ _xgetbv (uint32_t xcr) // to place this function into its own (unoptimized) source file. __asm { mov ecx, [xcr] - __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 /*xgetbv*/ - } + __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 /*xgetbv*/ + } +} + +#else + +static uint64_t +_xgetbv (uint32_t xcr) +{ +#ifdef __APPLE__ + /* it would be nice to make this work on OS X but as long we use veclib, + we don't really need to know about SSE/AVX on that platform. + */ + return 0; +#else + uint32_t eax, edx; + __asm__ volatile ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (xcr)); + return (static_cast(edx) << 32) | eax; +#endif } #endif /* !COMPILER_MSVC */ +#endif /* !HAVE_XGETBV */ #endif /* ARCH_X86 */ #ifndef _XCR_XFEATURE_ENABLED_MASK @@ -143,6 +148,11 @@ FPU::FPU () error << _("FPU object instantiated more than once") << endmsg; } + if (getenv("ARDOUR_FPU_FLAGS")) { + _flags = Flags (atoi (getenv("ARDOUR_FPU_FLAGS"))); + return; + } + #if !( (defined __x86_64__) || (defined __i386__) || (defined _M_X64) || (defined _M_IX86) ) // !ARCH_X86 /* Non-Intel architecture, nothing to do here */ return;