/*
Copyright (C) 2007 Paul sDavis
- Written by Sampo Savolainen
+ Written by Sampo Savolainen
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
#include "ardour/types.h"
void
-x86_sse_find_peaks(const ARDOUR::Sample* buf, nframes_t nframes, float *min, float *max)
+x86_sse_find_peaks(const ARDOUR::Sample* buf, ARDOUR::pframes_t nframes, float *min, float *max)
{
__m128 current_max, current_min, work;
current_max = _mm_set1_ps(*max);
// Work input until "buf" reaches 16 byte alignment
- while ( ((unsigned long)buf) % 16 != 0 && nframes > 0) {
+ while ( ((intptr_t)buf) % 16 != 0 && nframes > 0) {
// Load the next float into the work buffer
work = _mm_set1_ps(*buf);
current_min = _mm_min_ps(current_min, work);
current_max = _mm_max_ps(current_max, work);
-
+
buf++;
nframes--;
}
// use 64 byte prefetch for quadruple quads
while (nframes >= 16) {
+#ifdef COMPILER_MSVC
+ _mm_prefetch(((char*)buf+64), 0); // A total guess! Assumed to be eqivalent to
+#else // the line below but waiting to be tested !!
__builtin_prefetch(buf+64,0,0);
-
+#endif
work = _mm_load_ps(buf);
current_min = _mm_min_ps(current_min, work);
current_max = _mm_max_ps(current_max, work);
buf+=4;
nframes-=4;
}
-
+
// work through the rest < 4 samples
while ( nframes > 0) {
current_min = _mm_min_ps(current_min, work);
current_max = _mm_max_ps(current_max, work);
-
+
buf++;
nframes--;
}