X-Git-Url: https://main.carlh.net/gitweb/?a=blobdiff_plain;f=libs%2Fardour%2Fsse_functions_xmm.cc;h=6eac488a253a3af89df5f7044a1e60d07ed126b2;hb=f169ff3db3943b9992042e71048cade2ca1fe39d;hp=5554462132d09275bcfb862d0ac562d13f328b05;hpb=45d3ec1437cf661533bc7750c623865def4424df;p=ardour.git diff --git a/libs/ardour/sse_functions_xmm.cc b/libs/ardour/sse_functions_xmm.cc index 5554462132..6eac488a25 100644 --- a/libs/ardour/sse_functions_xmm.cc +++ b/libs/ardour/sse_functions_xmm.cc @@ -1,6 +1,6 @@ /* Copyright (C) 2007 Paul sDavis - Written by Sampo Savolainen + Written by Sampo Savolainen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,10 +19,10 @@ */ #include -#include +#include "ardour/types.h" void -x86_sse_find_peaks(float *buf, nframes_t nframes, float *min, float *max) +x86_sse_find_peaks(const ARDOUR::Sample* buf, ARDOUR::pframes_t nframes, float *min, float *max) { __m128 current_max, current_min, work; @@ -31,22 +31,25 @@ x86_sse_find_peaks(float *buf, nframes_t nframes, float *min, float *max) current_max = _mm_set1_ps(*max); // Work input until "buf" reaches 16 byte alignment - while ( ((unsigned long)buf) % 16 != 0 && nframes > 0) { + while ( ((intptr_t)buf) % 16 != 0 && nframes > 0) { // Load the next float into the work buffer work = _mm_set1_ps(*buf); current_min = _mm_min_ps(current_min, work); current_max = _mm_max_ps(current_max, work); - + buf++; nframes--; } // use 64 byte prefetch for quadruple quads while (nframes >= 16) { +#ifdef COMPILER_MSVC + _mm_prefetch(((char*)buf+64), 0); // A total guess! Assumed to be eqivalent to +#else // the line below but waiting to be tested !! __builtin_prefetch(buf+64,0,0); - +#endif work = _mm_load_ps(buf); current_min = _mm_min_ps(current_min, work); current_max = _mm_max_ps(current_max, work); @@ -77,7 +80,7 @@ x86_sse_find_peaks(float *buf, nframes_t nframes, float *min, float *max) buf+=4; nframes-=4; } - + // work through the rest < 4 samples while ( nframes > 0) { @@ -86,7 +89,7 @@ x86_sse_find_peaks(float *buf, nframes_t nframes, float *min, float *max) current_min = _mm_min_ps(current_min, work); current_max = _mm_max_ps(current_max, work); - + buf++; nframes--; }