fix crash when copy'ing latent plugins
[ardour.git] / libs / ardour / sse_functions_xmm.cc
1 /*
2     Copyright (C) 2007 Paul sDavis
3     Written by Sampo Savolainen
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
19 */
20
21 #include <xmmintrin.h>
22 #include "ardour/types.h"
23
24 void
25 x86_sse_find_peaks(const ARDOUR::Sample* buf, ARDOUR::pframes_t nframes, float *min, float *max)
26 {
27         __m128 current_max, current_min, work;
28
29         // Load max and min values into all four slots of the XMM registers
30         current_min = _mm_set1_ps(*min);
31         current_max = _mm_set1_ps(*max);
32
33         // Work input until "buf" reaches 16 byte alignment
34         while ( ((intptr_t)buf) % 16 != 0 && nframes > 0) {
35
36                 // Load the next float into the work buffer
37                 work = _mm_set1_ps(*buf);
38
39                 current_min = _mm_min_ps(current_min, work);
40                 current_max = _mm_max_ps(current_max, work);
41
42                 buf++;
43                 nframes--;
44         }
45
46         // use 64 byte prefetch for quadruple quads
47         while (nframes >= 16) {
48 #ifdef COMPILER_MSVC
49                                 _mm_prefetch(((char*)buf+64), 0);  // A total guess! Assumed to be eqivalent to
50 #else                                              // the line below but waiting to be tested !!
51                 __builtin_prefetch(buf+64,0,0);
52 #endif
53                 work = _mm_load_ps(buf);
54                 current_min = _mm_min_ps(current_min, work);
55                 current_max = _mm_max_ps(current_max, work);
56                 buf+=4;
57                 work = _mm_load_ps(buf);
58                 current_min = _mm_min_ps(current_min, work);
59                 current_max = _mm_max_ps(current_max, work);
60                 buf+=4;
61                 work = _mm_load_ps(buf);
62                 current_min = _mm_min_ps(current_min, work);
63                 current_max = _mm_max_ps(current_max, work);
64                 buf+=4;
65                 work = _mm_load_ps(buf);
66                 current_min = _mm_min_ps(current_min, work);
67                 current_max = _mm_max_ps(current_max, work);
68                 buf+=4;
69                 nframes-=16;
70         }
71
72         // work through aligned buffers
73         while (nframes >= 4) {
74
75                 work = _mm_load_ps(buf);
76
77                 current_min = _mm_min_ps(current_min, work);
78                 current_max = _mm_max_ps(current_max, work);
79
80                 buf+=4;
81                 nframes-=4;
82         }
83
84         // work through the rest < 4 samples
85         while ( nframes > 0) {
86
87                 // Load the next float into the work buffer
88                 work = _mm_set1_ps(*buf);
89
90                 current_min = _mm_min_ps(current_min, work);
91                 current_max = _mm_max_ps(current_max, work);
92
93                 buf++;
94                 nframes--;
95         }
96
97         // Find min & max value in current_max through shuffle tricks
98
99         work = current_min;
100         work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(2, 3, 0, 1));
101         work = _mm_min_ps (work, current_min);
102         current_min = work;
103         work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(1, 0, 3, 2));
104         work = _mm_min_ps (work, current_min);
105
106         _mm_store_ss(min, work);
107
108         work = current_max;
109         work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(2, 3, 0, 1));
110         work = _mm_max_ps (work, current_max);
111         current_max = work;
112         work = _mm_shuffle_ps(work, work, _MM_SHUFFLE(1, 0, 3, 2));
113         work = _mm_max_ps (work, current_max);
114
115         _mm_store_ss(max, work);
116 }
117
118
119