X-Git-Url: https://main.carlh.net/gitweb/?a=blobdiff_plain;f=libs%2Fpbd%2Ffpu.cc;h=40f8cca9d6e9371b125d0fce45bd2df0e9bd26d2;hb=cd28d62b26214770cfc269aafe666ec2fb3f2607;hp=5f9d4c2778f0cdee74707c4e1d33db6351a08f1b;hpb=e0aaed6d65f160c328cb8b56d7c6552ee15d65e2;p=ardour.git diff --git a/libs/pbd/fpu.cc b/libs/pbd/fpu.cc index 5f9d4c2778..40f8cca9d6 100644 --- a/libs/pbd/fpu.cc +++ b/libs/pbd/fpu.cc @@ -1,6 +1,29 @@ +/* + Copyright (C) 2012 Paul Davis + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +*/ + +#include "libpbd-config.h" + #define _XOPEN_SOURCE 600 +#include // for memset #include #include +#include #include "pbd/fpu.h" #include "pbd/error.h" @@ -16,11 +39,12 @@ FPU::FPU () _flags = Flags (0); -#ifndef ARCH_X86 +#if !( (defined __x86_64__) || (defined __i386__) ) // !ARCH_X86 return; -#endif +#else + -#ifndef USE_X86_64_ASM +#ifndef _LP64 //USE_X86_64_ASM asm volatile ( "mov $1, %%eax\n" "pushl %%ebx\n" @@ -29,11 +53,17 @@ FPU::FPU () "popl %%ebx\n" : "=r" (cpuflags) : - : "%eax", "%ecx", "%edx", "memory" + : "%eax", "%ecx", "%edx" ); #else + /* asm notes: although we explicitly save&restore rbx, we must tell + gcc that ebx,rbx is clobbered so that it doesn't try to use it as an intermediate + register when storing rbx. gcc 4.3 didn't make this "mistake", but gcc 4.4 + does, at least on x86_64. + */ + asm volatile ( "pushq %%rbx\n" "movq $1, %%rax\n" @@ -42,11 +72,11 @@ FPU::FPU () "popq %%rbx\n" : "=r" (cpuflags) : - : "%rax", "%rcx", "%rdx", "memory" + : "%rax", "%rbx", "%rcx", "%rdx" ); #endif /* USE_X86_64_ASM */ - + if (cpuflags & (1<<25)) { _flags = Flags (_flags | (HasSSE|HasFlushToZero)); } @@ -57,39 +87,55 @@ FPU::FPU () if (cpuflags & (1 << 24)) { - char* fxbuf = 0; + char** fxbuf = 0; + + /* DAZ wasn't available in the first version of SSE. Since + setting a reserved bit in MXCSR causes a general protection + fault, we need to be able to check the availability of this + feature without causing problems. To do this, one needs to + set up a 512-byte area of memory to save the SSE state to, + using fxsave, and then one needs to inspect bytes 28 through + 31 for the MXCSR_MASK value. If bit 6 is set, DAZ is + supported, otherwise, it isn't. + */ -#ifdef NO_POSIX_MEMALIGN - if ((fxbuf = (char *) malloc(512)) == 0) +#ifndef HAVE_POSIX_MEMALIGN + fxbuf = (char **) malloc (sizeof (char *)); + assert (fxbuf); + *fxbuf = (char *) malloc (512); + assert (*fxbuf); #else - if (posix_memalign ((void**)&fxbuf, 16, 512)) + posix_memalign ((void **) &fxbuf, 16, sizeof (char *)); + assert (fxbuf); + posix_memalign ((void **) fxbuf, 16, 512); + assert (*fxbuf); #endif - { - error << _("cannot allocate 16 byte aligned buffer for h/w feature detection") << endmsg; - } else { - - asm volatile ( - "fxsave (%0)" - : - : "r" (fxbuf) - : "memory" - ); - - uint32_t mxcsr_mask = *((uint32_t*) &fxbuf[28]); - - /* if the mask is zero, set its default value (from intel specs) */ - - if (mxcsr_mask == 0) { - mxcsr_mask = 0xffbf; - } - - if (mxcsr_mask & (1<<6)) { - _flags = Flags (_flags | HasDenormalsAreZero); - } - - free (fxbuf); + + memset (*fxbuf, 0, 512); + + asm volatile ( + "fxsave (%0)" + : + : "r" (*fxbuf) + : "memory" + ); + + uint32_t mxcsr_mask = *((uint32_t*) &((*fxbuf)[28])); + + /* if the mask is zero, set its default value (from intel specs) */ + + if (mxcsr_mask == 0) { + mxcsr_mask = 0xffbf; } + + if (mxcsr_mask & (1<<6)) { + _flags = Flags (_flags | HasDenormalsAreZero); + } + + free (*fxbuf); + free (fxbuf); } +#endif } FPU::~FPU ()