/*
- Copyright (C) 2007 Paul Davis
- Written by Sampo Savolainen
+ Copyright (C) 2007 Paul sDavis
+ Written by Sampo Savolainen
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
*/
#include <xmmintrin.h>
-#include <ardour/types.h>
+#include "ardour/types.h"
void
-x86_sse_find_peaks(float *buf, nframes_t nframes, float *min, float *max)
+x86_sse_find_peaks(const ARDOUR::Sample* buf, ARDOUR::pframes_t nframes, float *min, float *max)
{
__m128 current_max, current_min, work;
current_max = _mm_set1_ps(*max);
// Work input until "buf" reaches 16 byte alignment
- while ( ((unsigned long)buf) % 16 != 0 && nframes > 0) {
+ while ( ((intptr_t)buf) % 16 != 0 && nframes > 0) {
// Load the next float into the work buffer
work = _mm_set1_ps(*buf);
current_min = _mm_min_ps(current_min, work);
current_max = _mm_max_ps(current_max, work);
-
+
buf++;
nframes--;
}
+ // use 64 byte prefetch for quadruple quads
+ while (nframes >= 16) {
+ __builtin_prefetch(buf+64,0,0);
+
+ work = _mm_load_ps(buf);
+ current_min = _mm_min_ps(current_min, work);
+ current_max = _mm_max_ps(current_max, work);
+ buf+=4;
+ work = _mm_load_ps(buf);
+ current_min = _mm_min_ps(current_min, work);
+ current_max = _mm_max_ps(current_max, work);
+ buf+=4;
+ work = _mm_load_ps(buf);
+ current_min = _mm_min_ps(current_min, work);
+ current_max = _mm_max_ps(current_max, work);
+ buf+=4;
+ work = _mm_load_ps(buf);
+ current_min = _mm_min_ps(current_min, work);
+ current_max = _mm_max_ps(current_max, work);
+ buf+=4;
+ nframes-=16;
+ }
+
// work through aligned buffers
while (nframes >= 4) {
buf+=4;
nframes-=4;
}
-
+
// work through the rest < 4 samples
while ( nframes > 0) {
current_min = _mm_min_ps(current_min, work);
current_max = _mm_max_ps(current_max, work);
-
+
buf++;
nframes--;
}