From fa7c141813844ce5b4c93ca126ed84ab568a2892 Mon Sep 17 00:00:00 2001 From: Taybin Rutkin Date: Wed, 18 May 2005 16:08:37 +0000 Subject: [PATCH] Updated to soundtouch-1.3 (plus modifications) git-svn-id: svn://localhost/trunk/ardour2@13 d708f5d6-7413-0410-9779-e7cbd77b26cf --- libs/soundtouch/3dnow_win.cpp | 350 ++++++++++ libs/soundtouch/AAFilter.cpp | 184 +++++ libs/soundtouch/AAFilter.h | 91 +++ libs/soundtouch/BPMDetect.h | 159 +++++ libs/soundtouch/COPYING | 340 +++++++++ libs/soundtouch/FIFOSampleBuffer.cpp | 252 +++++++ libs/soundtouch/FIFOSampleBuffer.h | 174 +++++ libs/soundtouch/FIFOSamplePipe.h | 217 ++++++ libs/soundtouch/FIRFilter.cpp | 254 +++++++ libs/soundtouch/FIRFilter.h | 160 +++++ libs/soundtouch/README | 191 +++++ libs/soundtouch/RateTransposer.cpp | 611 ++++++++++++++++ libs/soundtouch/RateTransposer.h | 158 +++++ libs/soundtouch/SConscript | 23 + libs/soundtouch/STTypes.h | 110 +++ libs/soundtouch/SoundTouch.cpp | 472 +++++++++++++ libs/soundtouch/SoundTouch.h | 252 +++++++ libs/soundtouch/TDStretch.cpp | 923 +++++++++++++++++++++++++ libs/soundtouch/TDStretch.h | 253 +++++++ libs/soundtouch/cpu_detect.h | 62 ++ libs/soundtouch/cpu_detect_x86_gcc.cpp | 138 ++++ libs/soundtouch/cpu_detect_x86_win.cpp | 126 ++++ libs/soundtouch/mmx_gcc.cpp | 534 ++++++++++++++ libs/soundtouch/mmx_win.cpp | 487 +++++++++++++ libs/soundtouch/sse_win.cpp | 367 ++++++++++ 25 files changed, 6888 insertions(+) create mode 100644 libs/soundtouch/3dnow_win.cpp create mode 100644 libs/soundtouch/AAFilter.cpp create mode 100644 libs/soundtouch/AAFilter.h create mode 100644 libs/soundtouch/BPMDetect.h create mode 100644 libs/soundtouch/COPYING create mode 100644 libs/soundtouch/FIFOSampleBuffer.cpp create mode 100644 libs/soundtouch/FIFOSampleBuffer.h create mode 100644 libs/soundtouch/FIFOSamplePipe.h create mode 100644 libs/soundtouch/FIRFilter.cpp create mode 100644 libs/soundtouch/FIRFilter.h create mode 100644 libs/soundtouch/README create mode 100644 libs/soundtouch/RateTransposer.cpp create mode 100644 libs/soundtouch/RateTransposer.h create mode 100644 libs/soundtouch/SConscript create mode 100644 libs/soundtouch/STTypes.h create mode 100644 libs/soundtouch/SoundTouch.cpp create mode 100644 libs/soundtouch/SoundTouch.h create mode 100644 libs/soundtouch/TDStretch.cpp create mode 100644 libs/soundtouch/TDStretch.h create mode 100644 libs/soundtouch/cpu_detect.h create mode 100644 libs/soundtouch/cpu_detect_x86_gcc.cpp create mode 100644 libs/soundtouch/cpu_detect_x86_win.cpp create mode 100644 libs/soundtouch/mmx_gcc.cpp create mode 100644 libs/soundtouch/mmx_win.cpp create mode 100644 libs/soundtouch/sse_win.cpp diff --git a/libs/soundtouch/3dnow_win.cpp b/libs/soundtouch/3dnow_win.cpp new file mode 100644 index 0000000000..0d593214b7 --- /dev/null +++ b/libs/soundtouch/3dnow_win.cpp @@ -0,0 +1,350 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Win32 version of the AMD 3DNow! optimized routines for AMD K6-2/Athlon +/// processors. All 3DNow! optimized functions have been gathered into this +/// single source code file, regardless to their class or original source code +/// file, in order to ease porting the library to other compiler and processor +/// platforms. +/// +/// By the way; the performance gain depends heavily on the CPU generation: On +/// K6-2 these routines provided speed-up of even 2.4 times, while on Athlon the +/// difference to the original routines stayed at unremarkable 8%! Such a small +/// improvement on Athlon is due to 3DNow can perform only two operations in +/// parallel, and obviously also the Athlon FPU is doing a very good job with +/// the standard C floating point routines! Here these routines are anyway, +/// although it might not be worth the effort to convert these to GCC platform, +/// for Athlon CPU at least. The situation is different regarding the SSE +/// optimizations though, thanks to the four parallel operations of SSE that +/// already make a difference. +/// +/// This file is to be compiled in Windows platform with Microsoft Visual C++ +/// Compiler. Please see '3dnow_gcc.cpp' for the gcc compiler version for all +/// GNU platforms (if file supplied). +/// +/// NOTICE: If using Visual Studio 6.0, you'll need to install the "Visual C++ +/// 6.0 processor pack" update to support 3DNow! instruction set. The update is +/// available for download at Microsoft Developers Network, see here: +/// http://msdn.microsoft.com/vstudio/downloads/tools/ppack/default.aspx +/// +/// If the above URL is expired or removed, go to "http://msdn.microsoft.com" and +/// perform a search with keywords "processor pack". +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include "cpu_detect.h" +#include "STTypes.h" + +#ifndef WIN32 +#error "wrong platform - this source code file is exclusively for Win32 platform" +#endif + +using namespace soundtouch; + +#ifdef ALLOW_3DNOW +// 3DNow! routines available only with float sample type + +////////////////////////////////////////////////////////////////////////////// +// +// implementation of 3DNow! optimized functions of class 'TDStretch3DNow' +// +////////////////////////////////////////////////////////////////////////////// + +#include "TDStretch.h" +#include + +// these are declared in 'TDStretch.cpp' +extern int scanOffsets[4][24]; + + +// Calculates cross correlation of two buffers +double TDStretch3DNow::calcCrossCorrStereo(const float *pV1, const float *pV2) const +{ + uint overlapLengthLocal = overlapLength; + float corr; + + // Calculates the cross-correlation value between 'pV1' and 'pV2' vectors + /* + c-pseudocode: + + corr = 0; + for (i = 0; i < overlapLength / 4; i ++) + { + corr += pV1[0] * pV2[0]; + pV1[1] * pV2[1]; + pV1[2] * pV2[2]; + pV1[3] * pV2[3]; + pV1[4] * pV2[4]; + pV1[5] * pV2[5]; + pV1[6] * pV2[6]; + pV1[7] * pV2[7]; + + pV1 += 8; + pV2 += 8; + } + */ + + _asm + { + // give prefetch hints to CPU of what data are to be needed soonish. + // give more aggressive hints on pV1 as that changes more between different calls + // while pV2 stays the same. + prefetch [pV1] + prefetch [pV2] + prefetch [pV1 + 32] + + mov eax, dword ptr pV2 + mov ebx, dword ptr pV1 + + pxor mm0, mm0 + + mov ecx, overlapLengthLocal + shr ecx, 2 // div by four + + loop1: + movq mm1, [eax] + prefetch [eax + 32] // give a prefetch hint to CPU what data are to be needed soonish + pfmul mm1, [ebx] + prefetch [ebx + 64] // give a prefetch hint to CPU what data are to be needed soonish + + movq mm2, [eax + 8] + pfadd mm0, mm1 + pfmul mm2, [ebx + 8] + + movq mm3, [eax + 16] + pfadd mm0, mm2 + pfmul mm3, [ebx + 16] + + movq mm4, [eax + 24] + pfadd mm0, mm3 + pfmul mm4, [ebx + 24] + + add eax, 32 + pfadd mm0, mm4 + add ebx, 32 + + dec ecx + jnz loop1 + + // add halfs of mm0 together and return the result. + // note: mm1 is used as a dummy parameter only, we actually don't care about it's value + pfacc mm0, mm1 + movd corr, mm0 + femms + } + + return corr; +} + + + + +////////////////////////////////////////////////////////////////////////////// +// +// implementation of 3DNow! optimized functions of class 'FIRFilter' +// +////////////////////////////////////////////////////////////////////////////// + +#include "FIRFilter.h" + +FIRFilter3DNow::FIRFilter3DNow() : FIRFilter() +{ + filterCoeffsUnalign = NULL; +} + + +FIRFilter3DNow::~FIRFilter3DNow() +{ + delete[] filterCoeffsUnalign; +} + + +// (overloaded) Calculates filter coefficients for 3DNow! routine +void FIRFilter3DNow::setCoefficients(const float *coeffs, uint newLength, uint uResultDivFactor) +{ + uint i; + float fDivider; + + FIRFilter::setCoefficients(coeffs, newLength, uResultDivFactor); + + // Scale the filter coefficients so that it won't be necessary to scale the filtering result + // also rearrange coefficients suitably for 3DNow! + // Ensure that filter coeffs array is aligned to 16-byte boundary + delete[] filterCoeffsUnalign; + filterCoeffsUnalign = new float[2 * newLength + 4]; + filterCoeffsAlign = (float *)(((uint)filterCoeffsUnalign + 15) & -16); + + fDivider = (float)resultDivider; + + // rearrange the filter coefficients for mmx routines + for (i = 0; i < newLength; i ++) + { + filterCoeffsAlign[2 * i + 0] = + filterCoeffsAlign[2 * i + 1] = coeffs[i + 0] / fDivider; + } +} + + +// 3DNow!-optimized version of the filter routine for stereo sound +uint FIRFilter3DNow::evaluateFilterStereo(float *dest, const float *src, const uint numSamples) const +{ + float *filterCoeffsLocal = filterCoeffsAlign; + uint count = (numSamples - length) & -2; + uint lengthLocal = length / 4; + + assert(length != 0); + assert(count % 2 == 0); + + /* original code: + + double suml1, suml2; + double sumr1, sumr2; + uint i, j; + + for (j = 0; j < count; j += 2) + { + const float *ptr; + + suml1 = sumr1 = 0.0; + suml2 = sumr2 = 0.0; + ptr = src; + filterCoeffsLocal = filterCoeffs; + for (i = 0; i < lengthLocal; i ++) + { + // unroll loop for efficiency. + + suml1 += ptr[0] * filterCoeffsLocal[0] + + ptr[2] * filterCoeffsLocal[2] + + ptr[4] * filterCoeffsLocal[4] + + ptr[6] * filterCoeffsLocal[6]; + + sumr1 += ptr[1] * filterCoeffsLocal[1] + + ptr[3] * filterCoeffsLocal[3] + + ptr[5] * filterCoeffsLocal[5] + + ptr[7] * filterCoeffsLocal[7]; + + suml2 += ptr[8] * filterCoeffsLocal[0] + + ptr[10] * filterCoeffsLocal[2] + + ptr[12] * filterCoeffsLocal[4] + + ptr[14] * filterCoeffsLocal[6]; + + sumr2 += ptr[9] * filterCoeffsLocal[1] + + ptr[11] * filterCoeffsLocal[3] + + ptr[13] * filterCoeffsLocal[5] + + ptr[15] * filterCoeffsLocal[7]; + + ptr += 16; + filterCoeffsLocal += 8; + } + dest[0] = (float)suml1; + dest[1] = (float)sumr1; + dest[2] = (float)suml2; + dest[3] = (float)sumr2; + + src += 4; + dest += 4; + } + + */ + _asm + { + mov eax, dword ptr dest + mov ebx, dword ptr src + mov edx, count + shr edx, 1 + + loop1: + // "outer loop" : during each round 2*2 output samples are calculated + prefetch [ebx] // give a prefetch hint to CPU what data are to be needed soonish + prefetch [filterCoeffsLocal] // give a prefetch hint to CPU what data are to be needed soonish + + mov esi, ebx + mov edi, filterCoeffsLocal + pxor mm0, mm0 + pxor mm1, mm1 + mov ecx, lengthLocal + + loop2: + // "inner loop" : during each round four FIR filter taps are evaluated for 2*2 output samples + movq mm2, [edi] + movq mm3, mm2 + prefetch [edi + 32] // give a prefetch hint to CPU what data are to be needed soonish + pfmul mm2, [esi] + prefetch [esi + 32] // give a prefetch hint to CPU what data are to be needed soonish + pfmul mm3, [esi + 8] + + movq mm4, [edi + 8] + movq mm5, mm4 + pfadd mm0, mm2 + pfmul mm4, [esi + 8] + pfadd mm1, mm3 + pfmul mm5, [esi + 16] + + movq mm2, [edi + 16] + movq mm6, mm2 + pfadd mm0, mm4 + pfmul mm2, [esi + 16] + pfadd mm1, mm5 + pfmul mm6, [esi + 24] + + movq mm3, [edi + 24] + movq mm7, mm3 + pfadd mm0, mm2 + pfmul mm3, [esi + 24] + pfadd mm1, mm6 + pfmul mm7, [esi + 32] + add esi, 32 + pfadd mm0, mm3 + add edi, 32 + pfadd mm1, mm7 + + dec ecx + jnz loop2 + + movq [eax], mm0 + add ebx, 16 + movq [eax + 8], mm1 + add eax, 16 + + dec edx + jnz loop1 + + femms + } + + return count; +} + + +#endif // ALLOW_3DNOW diff --git a/libs/soundtouch/AAFilter.cpp b/libs/soundtouch/AAFilter.cpp new file mode 100644 index 0000000000..d135218c54 --- /dev/null +++ b/libs/soundtouch/AAFilter.cpp @@ -0,0 +1,184 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// FIR low-pass (anti-alias) filter with filter coefficient design routine and +/// MMX optimization. +/// +/// Anti-alias filter is used to prevent folding of high frequencies when +/// transposing the sample rate with interpolation. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include "AAFilter.h" +#include "FIRFilter.h" + +using namespace soundtouch; + +#define PI 3.141592655357989 +#define TWOPI (2 * PI) + +/***************************************************************************** + * + * Implementation of the class 'AAFilter' + * + *****************************************************************************/ + +AAFilter::AAFilter(const uint length) +{ + pFIR = FIRFilter::newInstance(); + cutoffFreq = 0.5; + setLength(length); +} + + + +AAFilter::~AAFilter() +{ + delete pFIR; +} + + + +// Sets new anti-alias filter cut-off edge frequency, scaled to +// sampling frequency (nyquist frequency = 0.5). +// The filter will cut frequencies higher than the given frequency. +void AAFilter::setCutoffFreq(const double newCutoffFreq) +{ + cutoffFreq = newCutoffFreq; + calculateCoeffs(); +} + + + +// Sets number of FIR filter taps +void AAFilter::setLength(const uint newLength) +{ + length = newLength; + calculateCoeffs(); +} + + + +// Calculates coefficients for a low-pass FIR filter using Hamming window +void AAFilter::calculateCoeffs() +{ + uint i; + double cntTemp, temp, tempCoeff,h, w; + double fc2, wc; + double scaleCoeff, sum; + double *work; + SAMPLETYPE *coeffs; + + assert(length > 0); + assert(length % 4 == 0); + assert(cutoffFreq >= 0); + assert(cutoffFreq <= 0.5); + + work = new double[length]; + coeffs = new SAMPLETYPE[length]; + + fc2 = 2.0 * cutoffFreq; + wc = PI * fc2; + tempCoeff = TWOPI / (double)length; + + sum = 0; + for (i = 0; i < length; i ++) + { + cntTemp = (double)i - (double)(length / 2); + + temp = cntTemp * wc; + if (temp != 0) + { + h = fc2 * sin(temp) / temp; // sinc function + } + else + { + h = 1.0; + } + w = 0.54 + 0.46 * cos(tempCoeff * cntTemp); // hamming window + + temp = w * h; + work[i] = temp; + + // calc net sum of coefficients + sum += temp; + } + + // ensure the sum of coefficients is larger than zero + assert(sum > 0); + + // ensure we've really designed a lowpass filter... + assert(work[length/2] > 0); + assert(work[length/2 + 1] > -1e-6); + assert(work[length/2 - 1] > -1e-6); + + // Calculate a scaling coefficient in such a way that the result can be + // divided by 16384 + scaleCoeff = 16384.0f / sum; + + for (i = 0; i < length; i ++) + { + // scale & round to nearest integer + temp = work[i] * scaleCoeff; + temp += (temp >= 0) ? 0.5 : -0.5; + // ensure no overfloods + assert(temp >= -32768 && temp <= 32767); + coeffs[i] = (SAMPLETYPE)temp; + } + + // Set coefficients. Use divide factor 14 => divide result by 2^14 = 16384 + pFIR->setCoefficients(coeffs, length, 14); + + delete[] work; + delete[] coeffs; +} + + +// Applies the filter to the given sequence of samples. +// Note : The amount of outputted samples is by value of 'filter length' +// smaller than the amount of input samples. +uint AAFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels) const +{ + return pFIR->evaluate(dest, src, numSamples, numChannels); +} + + +uint AAFilter::getLength() const +{ + return pFIR->getLength(); +} diff --git a/libs/soundtouch/AAFilter.h b/libs/soundtouch/AAFilter.h new file mode 100644 index 0000000000..9bd4a8bbce --- /dev/null +++ b/libs/soundtouch/AAFilter.h @@ -0,0 +1,91 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo +/// while maintaining the original pitch by using a time domain WSOLA-like method +/// with several performance-increasing tweaks. +/// +/// Anti-alias filter is used to prevent folding of high frequencies when +/// transposing the sample rate with interpolation. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef AAFilter_H +#define AAFilter_H + +#include "STTypes.h" + +namespace soundtouch +{ + +class AAFilter +{ +protected: + class FIRFilter *pFIR; + + /// Low-pass filter cut-off frequency, negative = invalid + double cutoffFreq; + + /// num of filter taps + uint length; + + /// Calculate the FIR coefficients realizing the given cutoff-frequency + void calculateCoeffs(); +public: + AAFilter(uint length); + + ~AAFilter(); + + /// Sets new anti-alias filter cut-off edge frequency, scaled to sampling + /// frequency (nyquist frequency = 0.5). The filter will cut off the + /// frequencies than that. + void setCutoffFreq(double newCutoffFreq); + + /// Sets number of FIR filter taps, i.e. ~filter complexity + void setLength(uint newLength); + + uint getLength() const; + + /// Applies the filter to the given sequence of samples. + /// Note : The amount of outputted samples is by value of 'filter length' + /// smaller than the amount of input samples. + uint evaluate(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples, + uint numChannels) const; +}; + +} + +#endif diff --git a/libs/soundtouch/BPMDetect.h b/libs/soundtouch/BPMDetect.h new file mode 100644 index 0000000000..8cdd4df184 --- /dev/null +++ b/libs/soundtouch/BPMDetect.h @@ -0,0 +1,159 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Beats-per-minute (BPM) detection routine. +/// +/// The beat detection algorithm works as follows: +/// - Use function 'inputSamples' to input a chunks of samples to the class for +/// analysis. It's a good idea to enter a large sound file or stream in smallish +/// chunks of around few kilosamples in order not to extinguish too much RAM memory. +/// - Input sound data is decimated to approx 500 Hz to reduce calculation burden, +/// which is basically ok as low (bass) frequencies mostly determine the beat rate. +/// Simple averaging is used for anti-alias filtering because the resulting signal +/// quality isn't of that high importance. +/// - Decimated sound data is enveloped, i.e. the amplitude shape is detected by +/// taking absolute value that's smoothed by sliding average. Signal levels that +/// are below a couple of times the general RMS amplitude level are cut away to +/// leave only notable peaks there. +/// - Repeating sound patterns (e.g. beats) are detected by calculating short-term +/// autocorrelation function of the enveloped signal. +/// - After whole sound data file has been analyzed as above, the bpm level is +/// detected by function 'getBpm' that finds the highest peak of the autocorrelation +/// function, calculates it's precise location and converts this reading to bpm's. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef _BPMDetect_H_ +#define _BPMDetect_H_ + +#include "STTypes.h" +#include "FIFOSampleBuffer.h" + +/// Minimum allowed BPM rate. Used to restrict accepted result above a reasonable limit. +#define MIN_BPM 45 + +/// Maximum allowed BPM rate. Used to restrict accepted result below a reasonable limit. +#define MAX_BPM 230 + + +/// Class for calculating BPM rate for audio data. +class BPMDetect +{ +protected: + /// Auto-correlation accumulator bins. + float *xcorr; + + /// Amplitude envelope sliding average approximation level accumulator + float envelopeAccu; + + /// RMS volume sliding average approximation level accumulator + float RMSVolumeAccu; + + /// Sample average counter. + int decimateCount; + + /// Sample average accumulator for FIFO-like decimation. + soundtouch::LONG_SAMPLETYPE decimateSum; + + /// Decimate sound by this coefficient to reach approx. 500 Hz. + int decimateBy; + + /// Auto-correlation window length + int windowLen; + + /// Number of channels (1 = mono, 2 = stereo) + int channels; + + /// sample rate + int sampleRate; + + /// Beginning of auto-correlation window: Autocorrelation isn't being updated for + /// the first these many correlation bins. + int windowStart; + + /// FIFO-buffer for decimated processing samples. + soundtouch::FIFOSampleBuffer *buffer; + + /// Initialize the class for processing. + void init(int numChannels, int sampleRate); + + /// Updates auto-correlation function for given number of decimated samples that + /// are read from the internal 'buffer' pipe (samples aren't removed from the pipe + /// though). + void updateXCorr(int process_samples /// How many samples are processed. + ); + + /// Decimates samples to approx. 500 Hz. + /// + /// \return Number of output samples. + int decimate(soundtouch::SAMPLETYPE *dest, ///< Destination buffer + const soundtouch::SAMPLETYPE *src, ///< Source sample buffer + int numsamples ///< Number of source samples. + ); + + /// Calculates amplitude envelope for the buffer of samples. + /// Result is output to 'samples'. + void calcEnvelope(soundtouch::SAMPLETYPE *samples, ///< Pointer to input/output data buffer + int numsamples ///< Number of samples in buffer + ); + +public: + /// Constructor. + BPMDetect(int numChannels, ///< Number of channels in sample data. + int sampleRate ///< Sample rate in Hz. + ); + + /// Destructor. + virtual ~BPMDetect(); + + /// Inputs a block of samples for analyzing: Envelopes the samples and then + /// updates the autocorrelation estimation. When whole song data has been input + /// in smaller blocks using this function, read the resulting bpm with 'getBpm' + /// function. + /// + /// Notice that data in 'samples' array can be disrupted in processing. + void inputSamples(soundtouch::SAMPLETYPE *samples, ///< Pointer to input/working data buffer + int numSamples ///< Number of samples in buffer + ); + + + /// Analyzes the results and returns the BPM rate. Use this function to read result + /// after whole song data has been input to the class by consecutive calls of + /// 'inputSamples' function. + /// + /// \return Beats-per-minute rate, or zero if detection failed. + float getBpm(); +}; + +#endif // _BPMDetect_H_ diff --git a/libs/soundtouch/COPYING b/libs/soundtouch/COPYING new file mode 100644 index 0000000000..60549be514 --- /dev/null +++ b/libs/soundtouch/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) 19yy + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/libs/soundtouch/FIFOSampleBuffer.cpp b/libs/soundtouch/FIFOSampleBuffer.cpp new file mode 100644 index 0000000000..f158ee7949 --- /dev/null +++ b/libs/soundtouch/FIFOSampleBuffer.cpp @@ -0,0 +1,252 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// A buffer class for temporarily storaging sound samples, operates as a +/// first-in-first-out pipe. +/// +/// Samples are added to the end of the sample buffer with the 'putSamples' +/// function, and are received from the beginning of the buffer by calling +/// the 'receiveSamples' function. The class automatically removes the +/// outputted samples from the buffer, as well as grows the buffer size +/// whenever necessary. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include + +#include "FIFOSampleBuffer.h" + +using namespace soundtouch; + +// Constructor +FIFOSampleBuffer::FIFOSampleBuffer(uint numChannels) +{ + sizeInBytes = 0; // reasonable initial value + buffer = NULL; //new SAMPLETYPE[sizeInBytes / sizeof(SAMPLETYPE)]; + bufferUnaligned = NULL; + samplesInBuffer = 0; + bufferPos = 0; + channels = numChannels; +} + + +// destructor +FIFOSampleBuffer::~FIFOSampleBuffer() +{ + delete[] bufferUnaligned; +} + + +// Sets number of channels, 1 = mono, 2 = stereo +void FIFOSampleBuffer::setChannels(const uint numChannels) +{ + uint usedBytes; + + usedBytes = channels * samplesInBuffer; + channels = numChannels; + samplesInBuffer = usedBytes / channels; +} + + +// if output location pointer 'bufferPos' isn't zero, 'rewinds' the buffer and +// zeroes this pointer by copying samples from the 'bufferPos' pointer +// location on to the beginning of the buffer. +void FIFOSampleBuffer::rewind() +{ + if (bufferPos) + { + memmove(buffer, ptrBegin(), sizeof(SAMPLETYPE) * channels * samplesInBuffer); + bufferPos = 0; + } +} + + +// Adds 'numSamples' pcs of samples from the 'samples' memory position to +// the sample buffer. +void FIFOSampleBuffer::putSamples(const SAMPLETYPE *samples, uint numSamples) +{ + memcpy(ptrEnd(numSamples), samples, sizeof(SAMPLETYPE) * numSamples * channels); + samplesInBuffer += numSamples; +} + + +// Increases the number of samples in the buffer without copying any actual +// samples. +// +// This function is used to update the number of samples in the sample buffer +// when accessing the buffer directly with 'ptrEnd' function. Please be +// careful though! +void FIFOSampleBuffer::putSamples(uint numSamples) +{ + uint req; + + req = samplesInBuffer + numSamples; + ensureCapacity(req); + samplesInBuffer += numSamples; +} + + +// Returns a pointer to the end of the used part of the sample buffer (i.e. +// where the new samples are to be inserted). This function may be used for +// inserting new samples into the sample buffer directly. Please be careful! +// +// Parameter 'slackCapacity' tells the function how much free capacity (in +// terms of samples) there _at least_ should be, in order to the caller to +// succesfully insert all the required samples to the buffer. When necessary, +// the function grows the buffer size to comply with this requirement. +// +// When using this function as means for inserting new samples, also remember +// to increase the sample count afterwards, by calling the +// 'putSamples(numSamples)' function. +SAMPLETYPE *FIFOSampleBuffer::ptrEnd(uint slackCapacity) +{ + ensureCapacity(samplesInBuffer + slackCapacity); + return buffer + samplesInBuffer * channels; +} + + +// Returns a pointer to the beginning of the currently non-outputted samples. +// This function is provided for accessing the output samples directly. +// Please be careful! +// +// When using this function to output samples, also remember to 'remove' the +// outputted samples from the buffer by calling the +// 'receiveSamples(numSamples)' function +SAMPLETYPE *FIFOSampleBuffer::ptrBegin() const +{ + return buffer + bufferPos * channels; +} + + +// Ensures that the buffer has enought capacity, i.e. space for _at least_ +// 'capacityRequirement' number of samples. The buffer is grown in steps of +// 4 kilobytes to eliminate the need for frequently growing up the buffer, +// as well as to round the buffer size up to the virtual memory page size. +void FIFOSampleBuffer::ensureCapacity(uint capacityRequirement) +{ + SAMPLETYPE *tempUnaligned, *temp; + + if (capacityRequirement > getCapacity()) + { + // enlarge the buffer in 4kbyte steps (round up to next 4k boundary) + sizeInBytes = (capacityRequirement * channels * sizeof(SAMPLETYPE) + 4095) & -4096; + assert(sizeInBytes % 2 == 0); + tempUnaligned = new SAMPLETYPE[sizeInBytes / sizeof(SAMPLETYPE) + 16 / sizeof(SAMPLETYPE)]; + if (tempUnaligned == NULL) + { + throw std::runtime_error("Couldn't allocate memory!\n"); + } + temp = (SAMPLETYPE *)(((ulong)tempUnaligned + 15) & -16); + memcpy(temp, ptrBegin(), samplesInBuffer * channels * sizeof(SAMPLETYPE)); + delete[] bufferUnaligned; + buffer = temp; + bufferUnaligned = tempUnaligned; + bufferPos = 0; + } + else + { + // simply rewind the buffer (if necessary) + rewind(); + } +} + + +// Returns the current buffer capacity in terms of samples +uint FIFOSampleBuffer::getCapacity() const +{ + return sizeInBytes / (channels * sizeof(SAMPLETYPE)); +} + + +// Returns the number of samples currently in the buffer +uint FIFOSampleBuffer::numSamples() const +{ + return samplesInBuffer; +} + + +// Output samples from beginning of the sample buffer. Copies demanded number +// of samples to output and removes them from the sample buffer. If there +// are less than 'numsample' samples in the buffer, returns all available. +// +// Returns number of samples copied. +uint FIFOSampleBuffer::receiveSamples(SAMPLETYPE *output, uint maxSamples) +{ + uint num; + + num = (maxSamples > samplesInBuffer) ? samplesInBuffer : maxSamples; + + memcpy(output, ptrBegin(), channels * sizeof(SAMPLETYPE) * num); + return receiveSamples(num); +} + + +// Removes samples from the beginning of the sample buffer without copying them +// anywhere. Used to reduce the number of samples in the buffer, when accessing +// the sample buffer with the 'ptrBegin' function. +uint FIFOSampleBuffer::receiveSamples(uint maxSamples) +{ + if (maxSamples >= samplesInBuffer) + { + uint temp; + + temp = samplesInBuffer; + samplesInBuffer = 0; + return temp; + } + + samplesInBuffer -= maxSamples; + bufferPos += maxSamples; + + return maxSamples; +} + + +// Returns nonzero if the sample buffer is empty +int FIFOSampleBuffer::isEmpty() const +{ + return (samplesInBuffer == 0) ? 1 : 0; +} + + +// Clears the sample buffer +void FIFOSampleBuffer::clear() +{ + samplesInBuffer = 0; + bufferPos = 0; +} diff --git a/libs/soundtouch/FIFOSampleBuffer.h b/libs/soundtouch/FIFOSampleBuffer.h new file mode 100644 index 0000000000..7edbd27c36 --- /dev/null +++ b/libs/soundtouch/FIFOSampleBuffer.h @@ -0,0 +1,174 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// A buffer class for temporarily storaging sound samples, operates as a +/// first-in-first-out pipe. +/// +/// Samples are added to the end of the sample buffer with the 'putSamples' +/// function, and are received from the beginning of the buffer by calling +/// the 'receiveSamples' function. The class automatically removes the +/// output samples from the buffer as well as grows the storage size +/// whenever necessary. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef FIFOSampleBuffer_H +#define FIFOSampleBuffer_H + +#include "FIFOSamplePipe.h" + +namespace soundtouch +{ + +/// Sample buffer working in FIFO (first-in-first-out) principle. The class takes +/// care of storage size adjustment and data moving during input/output operations. +/// +/// Notice that in case of stereo audio, one sample is considered to consist of +/// both channel data. +class FIFOSampleBuffer : public FIFOSamplePipe +{ +private: + /// Sample buffer. + SAMPLETYPE *buffer; + + // Raw unaligned buffer memory. 'buffer' is made aligned by pointing it to first + // 16-byte aligned location of this buffer + SAMPLETYPE *bufferUnaligned; + + /// Sample buffer size in bytes + uint sizeInBytes; + + /// How many samples are currently in buffer. + uint samplesInBuffer; + + /// Channels, 1=mono, 2=stereo. + uint channels; + + /// Current position pointer to the buffer. This pointer is increased when samples are + /// removed from the pipe so that it's necessary to actually rewind buffer (move data) + /// only new data when is put to the pipe. + uint bufferPos; + + /// Rewind the buffer by moving data from position pointed by 'bufferPos' to real + /// beginning of the buffer. + void rewind(); + + /// Ensures that the buffer has capacity for at least this many samples. + void ensureCapacity(const uint capacityRequirement); + + /// Returns current capacity. + uint getCapacity() const; + +public: + + /// Constructor + FIFOSampleBuffer(uint numChannels = 2 ///< Number of channels, 1=mono, 2=stereo. + ///< Default is stereo. + ); + + /// destructor + virtual ~FIFOSampleBuffer(); + + /// Returns a pointer to the beginning of the output samples. + /// This function is provided for accessing the output samples directly. + /// Please be careful for not to corrupt the book-keeping! + /// + /// When using this function to output samples, also remember to 'remove' the + /// output samples from the buffer by calling the + /// 'receiveSamples(numSamples)' function + virtual SAMPLETYPE *ptrBegin() const; + + /// Returns a pointer to the end of the used part of the sample buffer (i.e. + /// where the new samples are to be inserted). This function may be used for + /// inserting new samples into the sample buffer directly. Please be careful + /// not corrupt the book-keeping! + /// + /// When using this function as means for inserting new samples, also remember + /// to increase the sample count afterwards, by calling the + /// 'putSamples(numSamples)' function. + SAMPLETYPE *ptrEnd( + uint slackCapacity ///< How much free capacity (in samples) there _at least_ + ///< should be so that the caller can succesfully insert the + ///< desired samples to the buffer. If necessary, the function + ///< grows the buffer size to comply with this requirement. + ); + + /// Adds 'numSamples' pcs of samples from the 'samples' memory position to + /// the sample buffer. + virtual void putSamples(const SAMPLETYPE *samples, ///< Pointer to samples. + uint numSamples ///< Number of samples to insert. + ); + + /// Adjusts the book-keeping to increase number of samples in the buffer without + /// copying any actual samples. + /// + /// This function is used to update the number of samples in the sample buffer + /// when accessing the buffer directly with 'ptrEnd' function. Please be + /// careful though! + virtual void putSamples(uint numSamples ///< Number of samples been inserted. + ); + + /// Output samples from beginning of the sample buffer. Copies requested samples to + /// output buffer and removes them from the sample buffer. If there are less than + /// 'numsample' samples in the buffer, returns all that available. + /// + /// \return Number of samples returned. + virtual uint receiveSamples(SAMPLETYPE *output, ///< Buffer where to copy output samples. + uint maxSamples ///< How many samples to receive at max. + ); + + /// Adjusts book-keeping so that given number of samples are removed from beginning of the + /// sample buffer without copying them anywhere. + /// + /// Used to reduce the number of samples in the buffer when accessing the sample buffer directly + /// with 'ptrBegin' function. + virtual uint receiveSamples(uint maxSamples ///< Remove this many samples from the beginning of pipe. + ); + + /// Returns number of samples currently available. + virtual uint numSamples() const; + + /// Sets number of channels, 1 = mono, 2 = stereo. + void setChannels(uint numChannels); + + /// Returns nonzero if there aren't any samples available for outputting. + virtual int isEmpty() const; + + /// Clears all the samples. + virtual void clear(); +}; + +} + +#endif diff --git a/libs/soundtouch/FIFOSamplePipe.h b/libs/soundtouch/FIFOSamplePipe.h new file mode 100644 index 0000000000..9e33363b00 --- /dev/null +++ b/libs/soundtouch/FIFOSamplePipe.h @@ -0,0 +1,217 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// 'FIFOSamplePipe' : An abstract base class for classes that manipulate sound +/// samples by operating like a first-in-first-out pipe: New samples are fed +/// into one end of the pipe with the 'putSamples' function, and the processed +/// samples are received from the other end with the 'receiveSamples' function. +/// +/// 'FIFOProcessor' : A base class for classes the do signal processing with +/// the samples while operating like a first-in-first-out pipe. When samples +/// are input with the 'putSamples' function, the class processes them +/// and moves the processed samples to the given 'output' pipe object, which +/// may be either another processing stage, or a fifo sample buffer object. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef FIFOSamplePipe_H +#define FIFOSamplePipe_H + +#include +#include +#include "STTypes.h" + +namespace soundtouch +{ + +/// Abstract base class for FIFO (first-in-first-out) sample processing classes. +class FIFOSamplePipe +{ +public: + /// Returns a pointer to the beginning of the output samples. + /// This function is provided for accessing the output samples directly. + /// Please be careful for not to corrupt the book-keeping! + /// + /// When using this function to output samples, also remember to 'remove' the + /// output samples from the buffer by calling the + /// 'receiveSamples(numSamples)' function + virtual SAMPLETYPE *ptrBegin() const = 0; + + /// Adds 'numSamples' pcs of samples from the 'samples' memory position to + /// the sample buffer. + virtual void putSamples(const SAMPLETYPE *samples, ///< Pointer to samples. + uint numSamples ///< Number of samples to insert. + ) = 0; + + + // Moves samples from the 'other' pipe instance to this instance. + void moveSamples(FIFOSamplePipe &other ///< Other pipe instance where from the receive the data. + ) + { + int oNumSamples = other.numSamples(); + + putSamples(other.ptrBegin(), oNumSamples); + other.receiveSamples(oNumSamples); + }; + + /// Output samples from beginning of the sample buffer. Copies requested samples to + /// output buffer and removes them from the sample buffer. If there are less than + /// 'numsample' samples in the buffer, returns all that available. + /// + /// \return Number of samples returned. + virtual uint receiveSamples(SAMPLETYPE *output, ///< Buffer where to copy output samples. + uint maxSamples ///< How many samples to receive at max. + ) = 0; + + /// Adjusts book-keeping so that given number of samples are removed from beginning of the + /// sample buffer without copying them anywhere. + /// + /// Used to reduce the number of samples in the buffer when accessing the sample buffer directly + /// with 'ptrBegin' function. + virtual uint receiveSamples(uint maxSamples ///< Remove this many samples from the beginning of pipe. + ) = 0; + + /// Returns number of samples currently available. + virtual uint numSamples() const = 0; + + // Returns nonzero if there aren't any samples available for outputting. + virtual int isEmpty() const = 0; + + /// Clears all the samples. + virtual void clear() = 0; +}; + + + +/// Base-class for sound processing routines working in FIFO principle. With this base +/// class it's easy to implement sound processing stages that can be chained together, +/// so that samples that are fed into beginning of the pipe automatically go through +/// all the processing stages. +/// +/// When samples are input to this class, they're first processed and then put to +/// the FIFO pipe that's defined as output of this class. This output pipe can be +/// either other processing stage or a FIFO sample buffer. +class FIFOProcessor :public FIFOSamplePipe +{ +protected: + /// Internal pipe where processed samples are put. + FIFOSamplePipe *output; + + /// Sets output pipe. + void setOutPipe(FIFOSamplePipe *pOutput) + { + assert(output == NULL); + assert(pOutput != NULL); + output = pOutput; + } + + + /// Constructor. Doesn't define output pipe; it has to be set be + /// 'setOutPipe' function. + FIFOProcessor() + { + output = NULL; + } + + + /// Constructor. Configures output pipe. + FIFOProcessor(FIFOSamplePipe *pOutput ///< Output pipe. + ) + { + output = pOutput; + } + + + /// Destructor. + virtual ~FIFOProcessor() + { + } + + + /// Returns a pointer to the beginning of the output samples. + /// This function is provided for accessing the output samples directly. + /// Please be careful for not to corrupt the book-keeping! + /// + /// When using this function to output samples, also remember to 'remove' the + /// output samples from the buffer by calling the + /// 'receiveSamples(numSamples)' function + virtual SAMPLETYPE *ptrBegin() const + { + return output->ptrBegin(); + } + +public: + + /// Output samples from beginning of the sample buffer. Copies requested samples to + /// output buffer and removes them from the sample buffer. If there are less than + /// 'numsample' samples in the buffer, returns all that available. + /// + /// \return Number of samples returned. + virtual uint receiveSamples(SAMPLETYPE *outBuffer, ///< Buffer where to copy output samples. + uint maxSamples ///< How many samples to receive at max. + ) + { + return output->receiveSamples(outBuffer, maxSamples); + } + + + /// Adjusts book-keeping so that given number of samples are removed from beginning of the + /// sample buffer without copying them anywhere. + /// + /// Used to reduce the number of samples in the buffer when accessing the sample buffer directly + /// with 'ptrBegin' function. + virtual uint receiveSamples(uint maxSamples ///< Remove this many samples from the beginning of pipe. + ) + { + return output->receiveSamples(maxSamples); + } + + + /// Returns number of samples currently available. + virtual uint numSamples() const + { + return output->numSamples(); + } + + + /// Returns nonzero if there aren't any samples available for outputting. + virtual int isEmpty() const + { + return output->isEmpty(); + } +}; + +} + +#endif diff --git a/libs/soundtouch/FIRFilter.cpp b/libs/soundtouch/FIRFilter.cpp new file mode 100644 index 0000000000..cc9c40d883 --- /dev/null +++ b/libs/soundtouch/FIRFilter.cpp @@ -0,0 +1,254 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// General FIR digital filter routines with MMX optimization. +/// +/// Note : MMX optimized functions reside in a separate, platform-specific file, +/// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp' +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include "FIRFilter.h" +#include "cpu_detect.h" + +using namespace soundtouch; + +/***************************************************************************** + * + * Implementation of the class 'FIRFilter' + * + *****************************************************************************/ + +FIRFilter::FIRFilter() +{ + resultDivFactor = 0; + length = 0; + lengthDiv8 = 0; + filterCoeffs = NULL; +} + + +FIRFilter::~FIRFilter() +{ + delete[] filterCoeffs; +} + +// Usual C-version of the filter routine for stereo sound +uint FIRFilter::evaluateFilterStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const +{ + uint i, j, end; + LONG_SAMPLETYPE suml, sumr; +#ifdef FLOAT_SAMPLES + // when using floating point samples, use a scaler instead of a divider + // because division is much slower operation than multiplying. + double dScaler = 1.0 / (double)resultDivider; +#endif + + assert(length != 0); + + end = 2 * (numSamples - length); + + for (j = 0; j < end; j += 2) + { + const SAMPLETYPE *ptr; + + suml = sumr = 0; + ptr = src + j; + + for (i = 0; i < length; i += 4) + { + // loop is unrolled by factor of 4 here for efficiency + suml += ptr[2 * i + 0] * filterCoeffs[i + 0] + + ptr[2 * i + 2] * filterCoeffs[i + 1] + + ptr[2 * i + 4] * filterCoeffs[i + 2] + + ptr[2 * i + 6] * filterCoeffs[i + 3]; + sumr += ptr[2 * i + 1] * filterCoeffs[i + 0] + + ptr[2 * i + 3] * filterCoeffs[i + 1] + + ptr[2 * i + 5] * filterCoeffs[i + 2] + + ptr[2 * i + 7] * filterCoeffs[i + 3]; + } + +#ifdef INTEGER_SAMPLES + suml >>= resultDivFactor; + sumr >>= resultDivFactor; + // saturate to 16 bit integer limits + suml = (suml < -32768) ? -32768 : (suml > 32767) ? 32767 : suml; + // saturate to 16 bit integer limits + sumr = (sumr < -32768) ? -32768 : (sumr > 32767) ? 32767 : sumr; +#else + suml *= dScaler; + sumr *= dScaler; +#endif // INTEGER_SAMPLES + dest[j] = (SAMPLETYPE)suml; + dest[j + 1] = (SAMPLETYPE)sumr; + } + return numSamples - length; +} + + + + +// Usual C-version of the filter routine for mono sound +uint FIRFilter::evaluateFilterMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) const +{ + uint i, j, end; + LONG_SAMPLETYPE sum; +#ifdef FLOAT_SAMPLES + // when using floating point samples, use a scaler instead of a divider + // because division is much slower operation than multiplying. + double dScaler = 1.0 / (double)resultDivider; +#endif + + + assert(length != 0); + + end = numSamples - length; + for (j = 0; j < end; j ++) + { + sum = 0; + for (i = 0; i < length; i += 4) + { + // loop is unrolled by factor of 4 here for efficiency + sum += src[i + 0] * filterCoeffs[i + 0] + + src[i + 1] * filterCoeffs[i + 1] + + src[i + 2] * filterCoeffs[i + 2] + + src[i + 3] * filterCoeffs[i + 3]; + } +#ifdef INTEGER_SAMPLES + sum >>= resultDivFactor; + // saturate to 16 bit integer limits + sum = (sum < -32768) ? -32768 : (sum > 32767) ? 32767 : sum; +#else + sum *= dScaler; +#endif // INTEGER_SAMPLES + dest[j] = (SAMPLETYPE)sum; + src ++; + } + return end; +} + + +// Set filter coeffiecients and length. +// +// Throws an exception if filter length isn't divisible by 8 +void FIRFilter::setCoefficients(const SAMPLETYPE *coeffs, uint newLength, uint uResultDivFactor) +{ + assert(newLength > 0); + if (newLength % 8) throw std::runtime_error("FIR filter length not divisible by 8"); + + lengthDiv8 = newLength / 8; + length = lengthDiv8 * 8; + assert(length == newLength); + + resultDivFactor = uResultDivFactor; + resultDivider = (uint)pow(2, resultDivFactor); + + delete[] filterCoeffs; + filterCoeffs = new SAMPLETYPE[length]; + memcpy(filterCoeffs, coeffs, length * sizeof(SAMPLETYPE)); +} + + +uint FIRFilter::getLength() const +{ + return length; +} + + + +// Applies the filter to the given sequence of samples. +// +// Note : The amount of outputted samples is by value of 'filter_length' +// smaller than the amount of input samples. +uint FIRFilter::evaluate(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples, uint numChannels) const +{ + assert(numChannels == 1 || numChannels == 2); + + assert(length > 0); + assert(lengthDiv8 * 8 == length); + if (numSamples < length) return 0; + assert(resultDivFactor >= 0); + if (numChannels == 2) + { + return evaluateFilterStereo(dest, src, numSamples); + } else { + return evaluateFilterMono(dest, src, numSamples); + } +} + +FIRFilter * FIRFilter::newInstance() +{ + uint uExtensions; + + uExtensions = detectCPUextensions(); + + // Check if MMX/SSE/3DNow! instruction set extensions supported by CPU + +#ifdef ALLOW_MMX + // MMX routines available only with integer sample types + if (uExtensions & SUPPORT_MMX) + { + return ::new FIRFilterMMX; + } + else +#endif // ALLOW_MMX + +#ifdef ALLOW_SSE + if (uExtensions & SUPPORT_SSE) + { + // SSE support + return ::new FIRFilterSSE; + } + else +#endif // ALLOW_SSE + +#ifdef ALLOW_3DNOW + if (uExtensions & SUPPORT_3DNOW) + { + // 3DNow! support + return ::new FIRFilter3DNow; + } + else +#endif // ALLOW_3DNOW + + { + // ISA optimizations not supported, use plain C version + return ::new FIRFilter; + } +} diff --git a/libs/soundtouch/FIRFilter.h b/libs/soundtouch/FIRFilter.h new file mode 100644 index 0000000000..7cd265d592 --- /dev/null +++ b/libs/soundtouch/FIRFilter.h @@ -0,0 +1,160 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// General FIR digital filter routines with MMX optimization. +/// +/// Note : MMX optimized functions reside in a separate, platform-specific file, +/// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp' +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef FIRFilter_H +#define FIRFilter_H + +#include "STTypes.h" + +namespace soundtouch +{ + +class FIRFilter +{ +protected: + // Number of FIR filter taps + uint length; + // Number of FIR filter taps divided by 8 + uint lengthDiv8; + + // Result divider factor in 2^k format + uint resultDivFactor; + + // Result divider value. + SAMPLETYPE resultDivider; + + // Memory for filter coefficients + SAMPLETYPE *filterCoeffs; + + virtual uint evaluateFilterStereo(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples) const; + virtual uint evaluateFilterMono(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples) const; + + FIRFilter(); + +public: + virtual ~FIRFilter(); + + static FIRFilter *newInstance(); + + /// Applies the filter to the given sequence of samples. + /// Note : The amount of outputted samples is by value of 'filter_length' + /// smaller than the amount of input samples. + /// + /// \return Number of samples copied to 'dest'. + uint evaluate(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples, + uint numChannels) const; + + uint getLength() const; + + virtual void setCoefficients(const SAMPLETYPE *coeffs, + uint newLength, + uint uResultDivFactor); +}; + + +// Optional subclasses that implement CPU-specific optimizations: + +#ifdef ALLOW_MMX + + /// Class that implements MMX optimized functions exclusive for 16bit integer samples type. + class FIRFilterMMX : public FIRFilter + { + protected: + short *filterCoeffsUnalign; + short *filterCoeffsAlign; + + virtual uint evaluateFilterStereo(short *dest, const short *src, uint numSamples) const; + public: + FIRFilterMMX(); + ~FIRFilterMMX(); + + virtual void setCoefficients(const short *coeffs, uint newLength, uint uResultDivFactor); + }; + +#endif // ALLOW_MMX + + +#ifdef ALLOW_3DNOW + + /// Class that implements 3DNow! optimized functions exclusive for floating point samples type. + class FIRFilter3DNow : public FIRFilter + { + protected: + float *filterCoeffsUnalign; + float *filterCoeffsAlign; + + virtual uint evaluateFilterStereo(float *dest, const float *src, uint numSamples) const; + public: + FIRFilter3DNow(); + ~FIRFilter3DNow(); + virtual void setCoefficients(const float *coeffs, uint newLength, uint uResultDivFactor); + }; + +#endif // ALLOW_3DNOW + + +#ifdef ALLOW_SSE + /// Class that implements SSE optimized functions exclusive for floating point samples type. + class FIRFilterSSE : public FIRFilter + { + protected: + float *filterCoeffsUnalign; + float *filterCoeffsAlign; + + virtual uint evaluateFilterStereo(float *dest, const float *src, uint numSamples) const; + public: + FIRFilterSSE(); + ~FIRFilterSSE(); + + virtual void setCoefficients(const float *coeffs, uint newLength, uint uResultDivFactor); + }; + +#endif // ALLOW_SSE + +} + +#endif // FIRFilter_H diff --git a/libs/soundtouch/README b/libs/soundtouch/README new file mode 100644 index 0000000000..d639041083 --- /dev/null +++ b/libs/soundtouch/README @@ -0,0 +1,191 @@ +SoundTouch sound processing library v1.01 +========================================= +Copyright (c) Olli Parviainen 2002 + +A library for changing tempo, pitch and playback rate of digital sound. + + +SoundStretch sound processing application v1.1 +============================================== +Copyright (c) Olli Parviainen 2002-2003 + +A command-line application for changing tempo, pitch and playback rates +of WAV sound files. This program also demonstrates how the "SoundTouch" +library can be used to process sound in own programs. + + +SoundStretch Usage Instructions +=============================== + +SoundStretch Usage syntax: + soundstretch infile.wav outfile.wav [switches] + +Where: + + "infile.wav" is the name of the input sound data file (in .WAV audio + file format). + + "outfile.wav" is the name of the output sound file where the resulting + sound is saved (in .WAV audio file format). + + [switches] are one or more control switches. + +Available control switches are: + + -tempo=n : Change sound tempo by n percents (n = -95.0 .. +5000.0 %) + + -pitch=n : Change sound pitch by n semitones (n = -60.0 .. + 60.0 semitones) + + -rate=n : Change sound playback rate by n percents (n = -95.0 .. +5000.0 %) + + -bpm=n : Detect the Beats-Per-Minute (BPM) rate of the sound and adjust the + tempo to meet 'n' BPMs. If this switch is defined, the "-tempo=n" + switch value is ignored. + + If "=n" is omitted, i.e. switch "-bpm" is used alone, the + program just calculates and displays the BPM rate but doesn't + adjust tempo according to the BPM value. + + -quick : Use quicker tempo change algorithm. Gains speed but loses sound + quality. + + -naa : Don't use anti-alias filtering in samplerate transposing. Gains + speed but loses sound quality. + + -license : Displays the program license text (GPL) + +Notes: + * The numerical switch values can be entered using either integer (e.g. + "-tempo=123") or decimal (e.g. "-tempo=123.45") numbers. + + * The "-naa" and/or "-quick" switches can be used to reduce CPU usage + while compromising some sound quality + + * The BPM detection algorithm works by detecting repeating low-frequency + (<250Hz) sound patterns and thus works mostly with most rock/pop music + with bass or drum beat. The BPM detection doesn't work on pieces such + as classical music without distinct, repeating bass frequency patterns. + Also pieces with varying tempo, varying bass patterns or very complex + bass patterns (jazz, hiphop) may produce odd BPM readings. + + In cases when the bass pattern drifts a bit around a nominal beat rate + (e.g. drummer is again drunken :), the BPM algorithm may report incorrect + harmonic one-halft of one-thirdth of the correct BPM value; in such case + the system could for example report BPM value of 50 or 100 instead of + correct BPM value of 150. + + +Usage examples: +=============== + + Example 1 + ========= + + The following command increases tempo of the sound file "originalfile.wav" + by 12.5% and saves result to file "destinationfile.wav": + + soundstretch originalfile.wav destinationfile.wav -tempo=12.5 + + + Example 2 + ========= + + The following command decreases the sound pitch (key) of the sound file + "orig.wav" by two semitones and saves the result to file "dest.wav": + + soundstretch orig.wav dest.wav -pitch=-2 + + + Example 3 + ========= + + The following command processes the file "orig.wav" by decreasing the + sound tempo by 25.3% and increasing the sound pitch (key) by 1.5 semitones. + Result is saved to file "dest.wav": + + soundstretch orig.wav dest.wav -tempo=-25.3 -pitch=1.5 + + + Example 4 + ========= + + The following command detects the BPM rate of the file "orig.wav" and + adjusts the tempo to match 100 beats per minute. Result is saved to + file "dest.wav": + + soundstretch orig.wav dest.wav -bpm=100 + + + +Building Instructions +===================== + +The package contains executable binaries for Win32 platform in the "bin" +directory. + +To build the library and application executable for other platforms or to +re-build the delivered binaries, run either of the scripts in the package +root directory: + +"make-win.bat" for Microsoft Windows environment, or +"make-gcc" for GNU/Linux or Unix environment with a gcc compiler. + + + +Change History +============== + + + SoundTouch library Change History + ================================= + + v1.01: + - "mmx_gcc.cpp": Added "using namespace std" and removed "return 0" from a + function with void return value to fix compiler errors when compiling + the library in Solaris environment. + + - Moved file "FIFOSampleBuffer.h" to "include" directory to allow accessing + the FIFOSampleBuffer class from external files. + + v1.0: Initial release + + + SoundStretch application Change History + ======================================= + + v1.1: + - Fixed "Release" settings in Microsoft Visual C++ project file (.dsp) + + - Added beats-per-minute (BPM) detection routine and command-line switch + "-bpm" + + v1.01: Initial release + + +Acknowledgements +================ + +Many thanks to Stuart Lamble for translating the MMX optimizations from +MS Visual C++ syntax into gcc syntax for joy of all Linux users. + +Thanks also to Manish Bajpai, whose WAV file reading routines I've used +as base of the WavInFile & WavOutFile classes, that are being used in +the soundstrecth program for accessing WAV audio files. + + +LICENSE: +======== + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2 of the License, or (at your +option) any later version. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details.\n" + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., 59 +Temple Place, Suite 330, Boston, MA 02111-1307 USA diff --git a/libs/soundtouch/RateTransposer.cpp b/libs/soundtouch/RateTransposer.cpp new file mode 100644 index 0000000000..740d099239 --- /dev/null +++ b/libs/soundtouch/RateTransposer.cpp @@ -0,0 +1,611 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Sample rate transposer. Changes sample rate by using linear interpolation +/// together with anti-alias filtering (first order interpolation with anti- +/// alias filtering should be quite adequate for this application) +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include "RateTransposer.h" +#include "AAFilter.h" + +using namespace soundtouch; + + +/// A linear samplerate transposer class that uses integer arithmetics. +/// for the transposing. +class RateTransposerInteger : public RateTransposer +{ +protected: + int iSlopeCount; + uint uRate; + SAMPLETYPE sPrevSampleL, sPrevSampleR; + + virtual void resetRegisters(); + + virtual uint transposeStereo(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples); + virtual uint transposeMono(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples); + +public: + RateTransposerInteger(); + virtual ~RateTransposerInteger(); + + /// Sets new target rate. Normal rate = 1.0, smaller values represent slower + /// rate, larger faster rates. + virtual void setRate(float newRate); + +}; + + +/// A linear samplerate transposer class that uses floating point arithmetics +/// for the transposing. +class RateTransposerFloat : public RateTransposer +{ +protected: + float fSlopeCount; + float fRateStep; + SAMPLETYPE sPrevSampleL, sPrevSampleR; + + virtual void resetRegisters(); + + virtual uint transposeStereo(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples); + virtual uint transposeMono(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples); + +public: + RateTransposerFloat(); + virtual ~RateTransposerFloat(); +}; + + + +#ifndef min +#define min(a,b) ((a > b) ? b : a) +#define max(a,b) ((a < b) ? b : a) +#endif + +RateTransposer *RateTransposer::newInstance() +{ +#ifdef INTEGER_SAMPLES + return ::new RateTransposerInteger; +#else + return ::new RateTransposerFloat; +#endif +} + + +// Constructor +RateTransposer::RateTransposer() : FIFOProcessor(&outputBuffer) +{ + uChannels = 2; + bUseAAFilter = TRUE; + + // Instantiates the anti-alias filter with default tap length + // of 32 + pAAFilter = new AAFilter(32); +} + + + +RateTransposer::~RateTransposer() +{ + delete pAAFilter; +} + + + +/// Enables/disables the anti-alias filter. Zero to disable, nonzero to enable +void RateTransposer::enableAAFilter(const BOOL newMode) +{ + bUseAAFilter = newMode; +} + + +/// Returns nonzero if anti-alias filter is enabled. +BOOL RateTransposer::isAAFilterEnabled() const +{ + return bUseAAFilter; +} + + +AAFilter *RateTransposer::getAAFilter() const +{ + return pAAFilter; +} + + + +// Sets new target uRate. Normal uRate = 1.0, smaller values represent slower +// uRate, larger faster uRates. +void RateTransposer::setRate(float newRate) +{ + float fCutoff; + + fRate = newRate; + + // design a new anti-alias filter + if (newRate > 1.0f) + { + fCutoff = 0.5f / newRate; + } + else + { + fCutoff = 0.5f * newRate; + } + pAAFilter->setCutoffFreq(fCutoff); +} + + +// Outputs as many samples of the 'outputBuffer' as possible, and if there's +// any room left, outputs also as many of the incoming samples as possible. +// The goal is to drive the outputBuffer empty. +// +// It's allowed for 'output' and 'input' parameters to point to the same +// memory position. +void RateTransposer::flushStoreBuffer() +{ + if (storeBuffer.isEmpty()) return; + + outputBuffer.moveSamples(storeBuffer); +} + + +// Adds 'numSamples' pcs of samples from the 'samples' memory position into +// the input of the object. +void RateTransposer::putSamples(const SAMPLETYPE *samples, uint numSamples) +{ + processSamples(samples, numSamples); +} + + + +// Transposes up the sample rate, causing the observed playback 'rate' of the +// sound to decrease +void RateTransposer::upsample(const SAMPLETYPE *src, uint numSamples) +{ + int count, sizeTemp, num; + + // If the parameter 'uRate' value is smaller than 'SCALE', first transpose + // the samples and then apply the anti-alias filter to remove aliasing. + + // First check that there's enough room in 'storeBuffer' + // (+16 is to reserve some slack in the destination buffer) + sizeTemp = (int)((float)numSamples / fRate + 16.0f); + + // Transpose the samples, store the result into the end of "storeBuffer" + count = transpose(storeBuffer.ptrEnd(sizeTemp), src, numSamples); + storeBuffer.putSamples(count); + + // Apply the anti-alias filter to samples in "store output", output the + // result to "dest" + num = storeBuffer.numSamples(); + count = pAAFilter->evaluate(outputBuffer.ptrEnd(num), + storeBuffer.ptrBegin(), num, uChannels); + outputBuffer.putSamples(count); + + // Remove the processed samples from "storeBuffer" + storeBuffer.receiveSamples(count); +} + + +// Transposes down the sample rate, causing the observed playback 'rate' of the +// sound to increase +void RateTransposer::downsample(const SAMPLETYPE *src, uint numSamples) +{ + int count, sizeTemp; + + // If the parameter 'uRate' value is larger than 'SCALE', first apply the + // anti-alias filter to remove high frequencies (prevent them from folding + // over the lover frequencies), then transpose. */ + + // Add the new samples to the end of the storeBuffer */ + storeBuffer.putSamples(src, numSamples); + + // Anti-alias filter the samples to prevent folding and output the filtered + // data to tempBuffer. Note : because of the FIR filter length, the + // filtering routine takes in 'filter_length' more samples than it outputs. + assert(tempBuffer.isEmpty()); + sizeTemp = storeBuffer.numSamples(); + + count = pAAFilter->evaluate(tempBuffer.ptrEnd(sizeTemp), + storeBuffer.ptrBegin(), sizeTemp, uChannels); + + // Remove the filtered samples from 'storeBuffer' + storeBuffer.receiveSamples(count); + + // Transpose the samples (+16 is to reserve some slack in the destination buffer) + sizeTemp = (int)((float)numSamples / fRate + 16.0f); + count = transpose(outputBuffer.ptrEnd(sizeTemp), tempBuffer.ptrBegin(), count); + outputBuffer.putSamples(count); +} + + +// Transposes sample rate by applying anti-alias filter to prevent folding. +// Returns amount of samples returned in the "dest" buffer. +// The maximum amount of samples that can be returned at a time is set by +// the 'set_returnBuffer_size' function. +void RateTransposer::processSamples(const SAMPLETYPE *src, uint numSamples) +{ + uint count; + uint sizeReq; + + if (numSamples == 0) return; + assert(pAAFilter); + + // If anti-alias filter is turned off, simply transpose without applying + // the filter + if (bUseAAFilter == FALSE) + { + sizeReq = (int)((float)numSamples / fRate + 1.0f); + count = transpose(outputBuffer.ptrEnd(sizeReq), src, numSamples); + outputBuffer.putSamples(count); + return; + } + + // Transpose with anti-alias filter + if (fRate < 1.0f) + { + upsample(src, numSamples); + } + else + { + downsample(src, numSamples); + } +} + + +// Transposes the sample rate of the given samples using linear interpolation. +// Returns the number of samples returned in the "dest" buffer +inline uint RateTransposer::transpose(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) +{ + if (uChannels == 2) + { + return transposeStereo(dest, src, numSamples); + } + else + { + return transposeMono(dest, src, numSamples); + } +} + + +// Sets the number of channels, 1 = mono, 2 = stereo +void RateTransposer::setChannels(const uint numchannels) +{ + if (uChannels == numchannels) return; + + assert(numchannels == 1 || numchannels == 2); + uChannels = numchannels; + + storeBuffer.setChannels(uChannels); + tempBuffer.setChannels(uChannels); + outputBuffer.setChannels(uChannels); + + // Inits the linear interpolation registers + resetRegisters(); +} + + +// Clears all the samples in the object +void RateTransposer::clear() +{ + outputBuffer.clear(); + storeBuffer.clear(); +} + + +// Returns nonzero if there aren't any samples available for outputting. +uint RateTransposer::isEmpty() +{ + int res; + + res = FIFOProcessor::isEmpty(); + if (res == 0) return 0; + return storeBuffer.isEmpty(); +} + + +////////////////////////////////////////////////////////////////////////////// +// +// RateTransposerInteger - integer arithmetic implementation +// + +/// fixed-point interpolation routine precision +#define SCALE 65536 + +// Constructor +RateTransposerInteger::RateTransposerInteger() : RateTransposer() +{ + // call these here as these are virtual functions; calling these + // from the base class constructor wouldn't execute the overloaded + // versions (peculiar C++ can be). + resetRegisters(); + setRate(1.0f); +} + + +RateTransposerInteger::~RateTransposerInteger() +{ +} + + +void RateTransposerInteger::resetRegisters() +{ + iSlopeCount = 0; + sPrevSampleL = + sPrevSampleR = 0; +} + + + +// Transposes the sample rate of the given samples using linear interpolation. +// 'Mono' version of the routine. Returns the number of samples returned in +// the "dest" buffer +uint RateTransposerInteger::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) +{ + unsigned int i, used; + LONG_SAMPLETYPE temp, vol1; + + used = 0; + i = 0; + + // Process the last sample saved from the previous call first... + while (iSlopeCount <= SCALE) + { + vol1 = (LONG_SAMPLETYPE)(SCALE - iSlopeCount); + temp = vol1 * sPrevSampleL + iSlopeCount * src[0]; + dest[i] = (SAMPLETYPE)(temp / SCALE); + i++; + iSlopeCount += uRate; + } + // now always (iSlopeCount > SCALE) + iSlopeCount -= SCALE; + + while (1) + { + while (iSlopeCount > SCALE) + { + iSlopeCount -= SCALE; + used ++; + if (used >= numSamples - 1) goto end; + } + vol1 = (LONG_SAMPLETYPE)(SCALE - iSlopeCount); + temp = src[used] * vol1 + iSlopeCount * src[used + 1]; + dest[i] = (SAMPLETYPE)(temp / SCALE); + + i++; + iSlopeCount += uRate; + } +end: + // Store the last sample for the next round + sPrevSampleL = src[numSamples - 1]; + + return i; +} + + +// Transposes the sample rate of the given samples using linear interpolation. +// 'Mono' version of the routine. Returns the number of samples returned in +// the "dest" buffer +uint RateTransposerInteger::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) +{ + unsigned int srcPos, i, used; + LONG_SAMPLETYPE temp, vol1; + + if (numSamples == 0) return 0; // no samples, no work + + used = 0; + i = 0; + + // Process the last sample saved from the sPrevSampleLious call first... + while (iSlopeCount <= SCALE) + { + vol1 = (LONG_SAMPLETYPE)(SCALE - iSlopeCount); + temp = vol1 * sPrevSampleL + iSlopeCount * src[0]; + dest[2 * i] = (SAMPLETYPE)(temp / SCALE); + temp = vol1 * sPrevSampleR + iSlopeCount * src[1]; + dest[2 * i + 1] = (SAMPLETYPE)(temp / SCALE); + i++; + iSlopeCount += uRate; + } + // now always (iSlopeCount > SCALE) + iSlopeCount -= SCALE; + + while (1) + { + while (iSlopeCount > SCALE) + { + iSlopeCount -= SCALE; + used ++; + if (used >= numSamples - 1) goto end; + } + srcPos = 2 * used; + vol1 = (LONG_SAMPLETYPE)(SCALE - iSlopeCount); + temp = src[srcPos] * vol1 + iSlopeCount * src[srcPos + 2]; + dest[2 * i] = (SAMPLETYPE)(temp / SCALE); + temp = src[srcPos + 1] * vol1 + iSlopeCount * src[srcPos + 3]; + dest[2 * i + 1] = (SAMPLETYPE)(temp / SCALE); + + i++; + iSlopeCount += uRate; + } +end: + // Store the last sample for the next round + sPrevSampleL = src[2 * numSamples - 2]; + sPrevSampleR = src[2 * numSamples - 1]; + + return i; +} + + +// Sets new target uRate. Normal uRate = 1.0, smaller values represent slower +// uRate, larger faster uRates. +void RateTransposerInteger::setRate(float newRate) +{ + uRate = (int)(newRate * SCALE + 0.5f); + RateTransposer::setRate(newRate); +} + + +////////////////////////////////////////////////////////////////////////////// +// +// RateTransposerFloat - floating point arithmetic implementation +// +////////////////////////////////////////////////////////////////////////////// + +// Constructor +RateTransposerFloat::RateTransposerFloat() : RateTransposer() +{ + // call these here as these are virtual functions; calling these + // from the base class constructor wouldn't execute the overloaded + // versions (peculiar C++ can be). + resetRegisters(); + setRate(1.0f); +} + + +RateTransposerFloat::~RateTransposerFloat() +{ +} + + +void RateTransposerFloat::resetRegisters() +{ + fSlopeCount = 0; + sPrevSampleL = + sPrevSampleR = 0; +} + + + +// Transposes the sample rate of the given samples using linear interpolation. +// 'Mono' version of the routine. Returns the number of samples returned in +// the "dest" buffer +uint RateTransposerFloat::transposeMono(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) +{ + unsigned int i, used; + + used = 0; + i = 0; + + // Process the last sample saved from the previous call first... + while (fSlopeCount <= 1.0f) + { + dest[i] = (SAMPLETYPE)((1.0f - fSlopeCount) * sPrevSampleL + fSlopeCount * src[0]); + i++; + fSlopeCount += fRate; + } + fSlopeCount -= 1.0f; + + while (1) + { + while (fSlopeCount > 1.0f) + { + fSlopeCount -= 1.0f; + used ++; + if (used >= numSamples - 1) goto end; + } + dest[i] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[used] + fSlopeCount * src[used + 1]); + i++; + fSlopeCount += fRate; + } +end: + // Store the last sample for the next round + sPrevSampleL = src[numSamples - 1]; + + return i; +} + + +// Transposes the sample rate of the given samples using linear interpolation. +// 'Mono' version of the routine. Returns the number of samples returned in +// the "dest" buffer +uint RateTransposerFloat::transposeStereo(SAMPLETYPE *dest, const SAMPLETYPE *src, uint numSamples) +{ + unsigned int srcPos, i, used; + + if (numSamples == 0) return 0; // no samples, no work + + used = 0; + i = 0; + + // Process the last sample saved from the sPrevSampleLious call first... + while (fSlopeCount <= 1.0f) + { + dest[2 * i] = (SAMPLETYPE)((1.0f - fSlopeCount) * sPrevSampleL + fSlopeCount * src[0]); + dest[2 * i + 1] = (SAMPLETYPE)((1.0f - fSlopeCount) * sPrevSampleR + fSlopeCount * src[1]); + i++; + fSlopeCount += fRate; + } + // now always (iSlopeCount > 1.0f) + fSlopeCount -= 1.0f; + + while (1) + { + while (fSlopeCount > 1.0f) + { + fSlopeCount -= 1.0f; + used ++; + if (used >= numSamples - 1) goto end; + } + srcPos = 2 * used; + + dest[2 * i] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[srcPos] + + fSlopeCount * src[srcPos + 2]); + dest[2 * i + 1] = (SAMPLETYPE)((1.0f - fSlopeCount) * src[srcPos + 1] + + fSlopeCount * src[srcPos + 3]); + + i++; + fSlopeCount += fRate; + } +end: + // Store the last sample for the next round + sPrevSampleL = src[2 * numSamples - 2]; + sPrevSampleR = src[2 * numSamples - 1]; + + return i; +} diff --git a/libs/soundtouch/RateTransposer.h b/libs/soundtouch/RateTransposer.h new file mode 100644 index 0000000000..f7c03f759e --- /dev/null +++ b/libs/soundtouch/RateTransposer.h @@ -0,0 +1,158 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Sample rate transposer. Changes sample rate by using linear interpolation +/// together with anti-alias filtering (first order interpolation with anti- +/// alias filtering should be quite adequate for this application). +/// +/// Use either of the derived classes of 'RateTransposerInteger' or +/// 'RateTransposerFloat' for corresponding integer/floating point tranposing +/// algorithm implementation. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef RateTransposer_H +#define RateTransposer_H + +#include "AAFilter.h" +#include "FIFOSamplePipe.h" +#include "FIFOSampleBuffer.h" + +#include "STTypes.h" + +namespace soundtouch +{ + +/// A common linear samplerate transposer class. +/// +/// Note: Use function "RateTransposer::newInstance()" to create a new class +/// instance instead of the "new" operator; that function automatically +/// chooses a correct implementation depending on if integer or floating +/// arithmetics are to be used. +class RateTransposer : public FIFOProcessor +{ +protected: + /// Anti-alias filter object + AAFilter *pAAFilter; + + float fRate; + + uint uChannels; + + /// Buffer for collecting samples to feed the anti-alias filter between + /// two batches + FIFOSampleBuffer storeBuffer; + + /// Buffer for keeping samples between transposing & anti-alias filter + FIFOSampleBuffer tempBuffer; + + /// Output sample buffer + FIFOSampleBuffer outputBuffer; + + BOOL bUseAAFilter; + + void init(); + + virtual void resetRegisters() = 0; + + virtual uint transposeStereo(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples) = 0; + virtual uint transposeMono(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples) = 0; + uint transpose(SAMPLETYPE *dest, + const SAMPLETYPE *src, + uint numSamples); + + void flushStoreBuffer(); + + void downsample(const SAMPLETYPE *src, + uint numSamples); + void upsample(const SAMPLETYPE *src, + uint numSamples); + + /// Transposes sample rate by applying anti-alias filter to prevent folding. + /// Returns amount of samples returned in the "dest" buffer. + /// The maximum amount of samples that can be returned at a time is set by + /// the 'set_returnBuffer_size' function. + void processSamples(const SAMPLETYPE *src, + uint numSamples); + + RateTransposer(); + +public: + virtual ~RateTransposer(); + + /// Use this function instead of "new" operator to create a new instance of this class. + /// This function automatically chooses a correct implementation, depending on if + /// integer ot floating point arithmetics are to be used. + static RateTransposer *newInstance(); + + /// Returns the output buffer object + FIFOSamplePipe *getOutput() { return &outputBuffer; }; + + /// Returns the store buffer object + FIFOSamplePipe *getStore() { return &storeBuffer; }; + + /// Return anti-alias filter object + AAFilter *getAAFilter() const; + + /// Enables/disables the anti-alias filter. Zero to disable, nonzero to enable + void enableAAFilter(BOOL newMode); + + /// Returns nonzero if anti-alias filter is enabled. + BOOL isAAFilterEnabled() const; + + /// Sets new target rate. Normal rate = 1.0, smaller values represent slower + /// rate, larger faster rates. + virtual void setRate(float newRate); + + /// Sets the number of channels, 1 = mono, 2 = stereo + void setChannels(uint channels); + + /// Adds 'numSamples' pcs of samples from the 'samples' memory position into + /// the input of the object. + void putSamples(const SAMPLETYPE *samples, uint numSamples); + + /// Clears all the samples in the object + void clear(); + + /// Returns nonzero if there aren't any samples available for outputting. + uint isEmpty(); +}; + +} + +#endif diff --git a/libs/soundtouch/SConscript b/libs/soundtouch/SConscript new file mode 100644 index 0000000000..12ce0b385c --- /dev/null +++ b/libs/soundtouch/SConscript @@ -0,0 +1,23 @@ +# -*- python -*- + +import glob + +soundtouch_files = Split(""" +AAFilter.cpp +FIFOSampleBuffer.cpp +FIRFilter.cpp +RateTransposer.cpp +SoundTouch.cpp +TDStretch.cpp +mmx_gcc.cpp +cpu_detect_x86_gcc.cpp +""") + +Import('env') +st = env.Copy() +st.Append(CCFLAGS="-DHAVE_CONFIG_H -D_REENTRANT -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE") +libst = st.StaticLibrary('soundtouch', soundtouch_files) +Default(libst) + +env.Alias('tarball', env.Distribute (env['DISTTREE'], + [ 'SConscript'] + soundtouch_files + glob.glob('*.h'))) diff --git a/libs/soundtouch/STTypes.h b/libs/soundtouch/STTypes.h new file mode 100644 index 0000000000..dc6a97001a --- /dev/null +++ b/libs/soundtouch/STTypes.h @@ -0,0 +1,110 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Common type definitions for SoundTouch audio processing library. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef STTypes_H +#define STTypes_H + +typedef unsigned int uint; +typedef unsigned long ulong; + +#ifndef _WINDEF_ + // if these aren't defined already by Windows headers, define now + + typedef unsigned int BOOL; + + #define FALSE 0 + #define TRUE 1 + +#endif // _WINDEF_ + + +namespace soundtouch +{ + /// Enable one of the following defines to choose either 16bit integer or + /// 32bit float sample type. If you don't have opinion, using integer samples + /// is generally faster. + /// #define INTEGER_SAMPLES //< 16bit integer samples + #define FLOAT_SAMPLES //< 32bit float samples + + + /// Define this to allow CPU-specific assembler optimizations. Notice that + /// having this enabled on non-x86 platforms doesn't matter; the compiler can + /// drop unsupported extensions on different platforms automatically. + /// However, if you're having difficulties getting the optimized routines + /// compiled with your compler (e.g. some gcc compiler versions may be picky), + /// you may wish to disable the optimizations to make the library compile. + #define ALLOW_OPTIMIZATIONS 1 + + + #ifdef INTEGER_SAMPLES + // 16bit integer sample type + typedef short SAMPLETYPE; + // data type for sample accumulation: Use 32bit integer to prevent overflows + typedef long LONG_SAMPLETYPE; + + #ifdef FLOAT_SAMPLES + // check that only one sample type is defined + #error "conflicting sample types defined" + #endif // FLOAT_SAMPLES + + #ifdef ALLOW_OPTIMIZATIONS + #if WIN32 || __i386__ + // Allow MMX optimizations + #define ALLOW_MMX 1 + #endif + #endif + + #else + + // floating point samples + typedef float SAMPLETYPE; + // data type for sample accumulation: Use double to utilize full precision. + typedef double LONG_SAMPLETYPE; + + #ifdef ALLOW_OPTIMIZATIONS + #ifdef WIN32 + // Allow 3DNow! and SSE optimizations + #define ALLOW_3DNOW 1 + #define ALLOW_SSE 1 + #endif // WIN32 + #endif + + #endif // INTEGER_SAMPLES +}; + +#endif diff --git a/libs/soundtouch/SoundTouch.cpp b/libs/soundtouch/SoundTouch.cpp new file mode 100644 index 0000000000..bf8240d111 --- /dev/null +++ b/libs/soundtouch/SoundTouch.cpp @@ -0,0 +1,472 @@ +////////////////////////////////////////////////////////////////////////////// +/// +/// SoundTouch - main class for tempo/pitch/rate adjusting routines. +/// +/// Notes: +/// - Initialize the SoundTouch object instance by setting up the sound stream +/// parameters with functions 'setSampleRate' and 'setChannels', then set +/// desired tempo/pitch/rate settings with the corresponding functions. +/// +/// - The SoundTouch class behaves like a first-in-first-out pipeline: The +/// samples that are to be processed are fed into one of the pipe by calling +/// function 'putSamples', while the ready processed samples can be read +/// from the other end of the pipeline with function 'receiveSamples'. +/// +/// - The SoundTouch processing classes require certain sized 'batches' of +/// samples in order to process the sound. For this reason the classes buffer +/// incoming samples until there are enough of samples available for +/// processing, then they carry out the processing step and consequently +/// make the processed samples available for outputting. +/// +/// - For the above reason, the processing routines introduce a certain +/// 'latency' between the input and output, so that the samples input to +/// SoundTouch may not be immediately available in the output, and neither +/// the amount of outputtable samples may not immediately be in direct +/// relationship with the amount of previously input samples. +/// +/// - The tempo/pitch/rate control parameters can be altered during processing. +/// Please notice though that they aren't currently protected by semaphores, +/// so in multi-thread application external semaphore protection may be +/// required. +/// +/// - This class utilizes classes 'TDStretch' for tempo change (without modifying +/// pitch) and 'RateTransposer' for changing the playback rate (that is, both +/// tempo and pitch in the same ratio) of the sound. The third available control +/// 'pitch' (change pitch but maintain tempo) is produced by a combination of +/// combining the two other controls. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include + +#include "SoundTouch.h" +#include "TDStretch.h" +#include "RateTransposer.h" +#include "cpu_detect.h" + +using namespace soundtouch; + +/// Print library version string +extern "C" void soundtouch_ac_test() +{ + printf("SoundTouch Version: %s\n",SOUNDTOUCH_VERSION); +} + + +SoundTouch::SoundTouch() +{ + // Initialize rate transposer and tempo changer instances + + pRateTransposer = RateTransposer::newInstance(); + pTDStretch = TDStretch::newInstance(); + + setOutPipe(pTDStretch); + + rate = tempo = 0; + + virtualPitch = + virtualRate = + virtualTempo = 1.0; + + calcEffectiveRateAndTempo(); + + channels = 0; + bSrateSet = FALSE; +} + + + +SoundTouch::~SoundTouch() +{ + delete pRateTransposer; + delete pTDStretch; +} + + + +/// Get SoundTouch library version string +const char *SoundTouch::getVersionString() +{ + static const char *_version = SOUNDTOUCH_VERSION; + + return _version; +} + + +/// Get SoundTouch library version Id +uint SoundTouch::getVersionId() +{ + return SOUNDTOUCH_VERSION_ID; +} + + +// Sets the number of channels, 1 = mono, 2 = stereo +void SoundTouch::setChannels(uint numChannels) +{ + if (numChannels != 1 && numChannels != 2) + { + throw std::runtime_error("Illegal number of channels"); + } + channels = numChannels; + pRateTransposer->setChannels(numChannels); + pTDStretch->setChannels(numChannels); +} + + + +// Sets new rate control value. Normal rate = 1.0, smaller values +// represent slower rate, larger faster rates. +void SoundTouch::setRate(float newRate) +{ + virtualRate = newRate; + calcEffectiveRateAndTempo(); +} + + + +// Sets new rate control value as a difference in percents compared +// to the original rate (-50 .. +100 %) +void SoundTouch::setRateChange(float newRate) +{ + virtualRate = 1.0f + 0.01f * newRate; + calcEffectiveRateAndTempo(); +} + + + +// Sets new tempo control value. Normal tempo = 1.0, smaller values +// represent slower tempo, larger faster tempo. +void SoundTouch::setTempo(float newTempo) +{ + virtualTempo = newTempo; + calcEffectiveRateAndTempo(); +} + + + +// Sets new tempo control value as a difference in percents compared +// to the original tempo (-50 .. +100 %) +void SoundTouch::setTempoChange(float newTempo) +{ + virtualTempo = 1.0f + 0.01f * newTempo; + calcEffectiveRateAndTempo(); +} + + + +// Sets new pitch control value. Original pitch = 1.0, smaller values +// represent lower pitches, larger values higher pitch. +void SoundTouch::setPitch(float newPitch) +{ + virtualPitch = newPitch; + calcEffectiveRateAndTempo(); +} + + + +// Sets pitch change in octaves compared to the original pitch +// (-1.00 .. +1.00) +void SoundTouch::setPitchOctaves(float newPitch) +{ + virtualPitch = (float)exp(0.69314718056f * newPitch); + calcEffectiveRateAndTempo(); +} + + + +// Sets pitch change in semi-tones compared to the original pitch +// (-12 .. +12) +void SoundTouch::setPitchSemiTones(int newPitch) +{ + setPitchOctaves((float)newPitch / 12.0f); +} + + + +void SoundTouch::setPitchSemiTones(float newPitch) +{ + setPitchOctaves(newPitch / 12.0f); +} + + +// Calculates 'effective' rate and tempo values from the +// nominal control values. +void SoundTouch::calcEffectiveRateAndTempo() +{ + float oldTempo = tempo; + float oldRate = rate; + + tempo = virtualTempo / virtualPitch; + rate = virtualPitch * virtualRate; + + if (rate != oldRate) pRateTransposer->setRate(rate); + if (tempo != oldTempo) pTDStretch->setTempo(tempo); + + if (rate > 1.0f) + { + if (output != pRateTransposer) + { + FIFOSamplePipe *transOut; + + assert(output == pTDStretch); + // move samples in the current output buffer to the output of pRateTransposer + transOut = pRateTransposer->getOutput(); + transOut->moveSamples(*output); + // move samples in tempo changer's input to pitch transposer's input + pRateTransposer->moveSamples(*pTDStretch->getInput()); + + output = pRateTransposer; + } + } + else + { + if (output != pTDStretch) + { + FIFOSamplePipe *tempoOut; + + assert(output == pRateTransposer); + // move samples in the current output buffer to the output of pTDStretch + tempoOut = pTDStretch->getOutput(); + tempoOut->moveSamples(*output); + // move samples in pitch transposer's store buffer to tempo changer's input + pTDStretch->moveSamples(*pRateTransposer->getStore()); + + output = pTDStretch; + + } + } +} + + +// Sets sample rate. +void SoundTouch::setSampleRate(uint srate) +{ + bSrateSet = TRUE; + // set sample rate, leave other tempo changer parameters as they are. + pTDStretch->setParameters(srate); +} + + +// Adds 'numSamples' pcs of samples from the 'samples' memory position into +// the input of the object. +void SoundTouch::putSamples(const SAMPLETYPE *samples, uint numSamples) +{ + if (bSrateSet == FALSE) + { + throw std::runtime_error("SoundTouch : Sample rate not defined"); + } + else if (channels == 0) + { + throw std::runtime_error("SoundTouch : Number of channels not defined"); + } + + // Transpose the rate of the new samples if necessary + if (rate == 1.0f) + { + // The rate value is same as the original, simply evaluate the tempo changer. + assert(output == pTDStretch); + if (pRateTransposer->isEmpty() == 0) + { + // yet flush the last samples in the pitch transposer buffer + // (may happen if 'rate' changes from a non-zero value to zero) + pTDStretch->moveSamples(*pRateTransposer); + } + pTDStretch->putSamples(samples, numSamples); + } + else if (rate < 1.0f) + { + // transpose the rate down, output the transposed sound to tempo changer buffer + assert(output == pTDStretch); + pRateTransposer->putSamples(samples, numSamples); + pTDStretch->moveSamples(*pRateTransposer); + } + else + { + assert(rate > 1.0f); + // evaluate the tempo changer, then transpose the rate up, + assert(output == pRateTransposer); + pTDStretch->putSamples(samples, numSamples); + pRateTransposer->moveSamples(*pTDStretch); + } +} + + +// Flushes the last samples from the processing pipeline to the output. +// Clears also the internal processing buffers. +// +// Note: This function is meant for extracting the last samples of a sound +// stream. This function may introduce additional blank samples in the end +// of the sound stream, and thus it's not recommended to call this function +// in the middle of a sound stream. +void SoundTouch::flush() +{ + int i; + uint nOut; + SAMPLETYPE buff[128]; + + nOut = numSamples(); + + memset(buff, 0, 128 * sizeof(SAMPLETYPE)); + // "Push" the last active samples out from the processing pipeline by + // feeding blank samples into the processing pipeline until new, + // processed samples appear in the output (not however, more than + // 8ksamples in any case) + for (i = 0; i < 128; i ++) + { + putSamples(buff, 64); + if (numSamples() != nOut) break; // new samples have appeared in the output! + } + + // Clear working buffers + pRateTransposer->clear(); + pTDStretch->clearInput(); + // yet leave the 'tempoChanger' output intouched as that's where the + // flushed samples are! +} + + +// Changes a setting controlling the processing system behaviour. See the +// 'SETTING_...' defines for available setting ID's. +BOOL SoundTouch::setSetting(uint settingId, uint value) +{ + uint sampleRate, sequenceMs, seekWindowMs, overlapMs; + + // read current tdstretch routine parameters + pTDStretch->getParameters(&sampleRate, &sequenceMs, &seekWindowMs, &overlapMs); + + switch (settingId) + { + case SETTING_USE_AA_FILTER : + // enables / disabless anti-alias filter + pRateTransposer->enableAAFilter((value != 0) ? TRUE : FALSE); + return TRUE; + + case SETTING_AA_FILTER_LENGTH : + // sets anti-alias filter length + pRateTransposer->getAAFilter()->setLength(value); + return TRUE; + + case SETTING_USE_QUICKSEEK : + // enables / disables tempo routine quick seeking algorithm + pTDStretch->enableQuickSeek((value != 0) ? TRUE : FALSE); + return TRUE; + + case SETTING_SEQUENCE_MS: + // change time-stretch sequence duration parameter + pTDStretch->setParameters(sampleRate, value, seekWindowMs, overlapMs); + return TRUE; + + case SETTING_SEEKWINDOW_MS: + // change time-stretch seek window length parameter + pTDStretch->setParameters(sampleRate, sequenceMs, value, overlapMs); + return TRUE; + + case SETTING_OVERLAP_MS: + // change time-stretch overlap length parameter + pTDStretch->setParameters(sampleRate, sequenceMs, seekWindowMs, value); + return TRUE; + + default : + return FALSE; + } +} + + +// Reads a setting controlling the processing system behaviour. See the +// 'SETTING_...' defines for available setting ID's. +// +// Returns the setting value. +uint SoundTouch::getSetting(uint settingId) const +{ + uint temp; + + switch (settingId) + { + case SETTING_USE_AA_FILTER : + return pRateTransposer->isAAFilterEnabled(); + + case SETTING_AA_FILTER_LENGTH : + return pRateTransposer->getAAFilter()->getLength(); + + case SETTING_USE_QUICKSEEK : + return pTDStretch->isQuickSeekEnabled(); + + case SETTING_SEQUENCE_MS: + pTDStretch->getParameters(NULL, &temp, NULL, NULL); + return temp; + + case SETTING_SEEKWINDOW_MS: + pTDStretch->getParameters(NULL, NULL, &temp, NULL); + return temp; + + case SETTING_OVERLAP_MS: + pTDStretch->getParameters(NULL, NULL, NULL, &temp); + return temp; + + default : + return 0; + } +} + + +// Clears all the samples in the object's output and internal processing +// buffers. +void SoundTouch::clear() +{ + pRateTransposer->clear(); + pTDStretch->clear(); +} + + + +/// Returns number of samples currently unprocessed. +uint SoundTouch::numUnprocessedSamples() const +{ + FIFOSamplePipe * psp; + if (pTDStretch) + { + psp = pTDStretch->getInput(); + if (psp) + { + return psp->numSamples(); + } + } + return 0; +} diff --git a/libs/soundtouch/SoundTouch.h b/libs/soundtouch/SoundTouch.h new file mode 100644 index 0000000000..3fe2441792 --- /dev/null +++ b/libs/soundtouch/SoundTouch.h @@ -0,0 +1,252 @@ +////////////////////////////////////////////////////////////////////////////// +/// +/// SoundTouch - main class for tempo/pitch/rate adjusting routines. +/// +/// Notes: +/// - Initialize the SoundTouch object instance by setting up the sound stream +/// parameters with functions 'setSampleRate' and 'setChannels', then set +/// desired tempo/pitch/rate settings with the corresponding functions. +/// +/// - The SoundTouch class behaves like a first-in-first-out pipeline: The +/// samples that are to be processed are fed into one of the pipe by calling +/// function 'putSamples', while the ready processed samples can be read +/// from the other end of the pipeline with function 'receiveSamples'. +/// +/// - The SoundTouch processing classes require certain sized 'batches' of +/// samples in order to process the sound. For this reason the classes buffer +/// incoming samples until there are enough of samples available for +/// processing, then they carry out the processing step and consequently +/// make the processed samples available for outputting. +/// +/// - For the above reason, the processing routines introduce a certain +/// 'latency' between the input and output, so that the samples input to +/// SoundTouch may not be immediately available in the output, and neither +/// the amount of outputtable samples may not immediately be in direct +/// relationship with the amount of previously input samples. +/// +/// - The tempo/pitch/rate control parameters can be altered during processing. +/// Please notice though that they aren't currently protected by semaphores, +/// so in multi-thread application external semaphore protection may be +/// required. +/// +/// - This class utilizes classes 'TDStretch' for tempo change (without modifying +/// pitch) and 'RateTransposer' for changing the playback rate (that is, both +/// tempo and pitch in the same ratio) of the sound. The third available control +/// 'pitch' (change pitch but maintain tempo) is produced by a combination of +/// combining the two other controls. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef SoundTouch_H +#define SoundTouch_H + +#include "FIFOSamplePipe.h" +#include "STTypes.h" + +namespace soundtouch +{ + +/// Soundtouch library version string +#define SOUNDTOUCH_VERSION "1.3.0" + +/// SoundTouch library version id +#define SOUNDTOUCH_VERSION_ID 010300 + +// +// Available setting IDs for the 'setSetting' & 'get_setting' functions: + +/// Enable/disable anti-alias filter in pitch transposer (0 = disable) +#define SETTING_USE_AA_FILTER 0 + +/// Pitch transposer anti-alias filter length (8 .. 128 taps, default = 32) +#define SETTING_AA_FILTER_LENGTH 1 + +/// Enable/disable quick seeking algorithm in tempo changer routine +/// (enabling quick seeking lowers CPU utilization but causes a minor sound +/// quality compromising) +#define SETTING_USE_QUICKSEEK 2 + +/// Time-stretch algorithm single processing sequence length in milliseconds. This determines +/// to how long sequences the original sound is chopped in the time-stretch algorithm. +/// See "STTypes.h" or README for more information. +#define SETTING_SEQUENCE_MS 3 + +/// Time-stretch algorithm seeking window length in milliseconds for algorithm that finds the +/// best possible overlapping location. This determines from how wide window the algorithm +/// may look for an optimal joining location when mixing the sound sequences back together. +/// See "STTypes.h" or README for more information. +#define SETTING_SEEKWINDOW_MS 4 + +/// Time-stretch algorithm overlap length in milliseconds. When the chopped sound sequences +/// are mixed back together, to form a continuous sound stream, this parameter defines over +/// how long period the two consecutive sequences are let to overlap each other. +/// See "STTypes.h" or README for more information. +#define SETTING_OVERLAP_MS 5 + + +class SoundTouch : public FIFOProcessor +{ +private: + /// Rate transposer class instance + class RateTransposer *pRateTransposer; + + /// Time-stretch class instance + class TDStretch *pTDStretch; + + /// Virtual pitch parameter. Effective rate & tempo are calculated from these parameters. + float virtualRate; + + /// Virtual pitch parameter. Effective rate & tempo are calculated from these parameters. + float virtualTempo; + + /// Virtual pitch parameter. Effective rate & tempo are calculated from these parameters. + float virtualPitch; + + /// Flag: Has sample rate been set? + BOOL bSrateSet; + + /// Calculates effective rate & tempo valuescfrom 'virtualRate', 'virtualTempo' and + /// 'virtualPitch' parameters. + void calcEffectiveRateAndTempo(); + +protected : + /// Number of channels + uint channels; + + /// Effective 'rate' value calculated from 'virtualRate', 'virtualTempo' and 'virtualPitch' + float rate; + + /// Effective 'tempo' value calculated from 'virtualRate', 'virtualTempo' and 'virtualPitch' + float tempo; + +public: + SoundTouch(); + virtual ~SoundTouch(); + + /// Get SoundTouch library version string + static const char *getVersionString(); + + /// Get SoundTouch library version Id + static uint SoundTouch::getVersionId(); + + /// Sets new rate control value. Normal rate = 1.0, smaller values + /// represent slower rate, larger faster rates. + void setRate(float newRate); + + /// Sets new tempo control value. Normal tempo = 1.0, smaller values + /// represent slower tempo, larger faster tempo. + void setTempo(float newTempo); + + /// Sets new rate control value as a difference in percents compared + /// to the original rate (-50 .. +100 %) + void setRateChange(float newRate); + + /// Sets new tempo control value as a difference in percents compared + /// to the original tempo (-50 .. +100 %) + void setTempoChange(float newTempo); + + /// Sets new pitch control value. Original pitch = 1.0, smaller values + /// represent lower pitches, larger values higher pitch. + void setPitch(float newPitch); + + /// Sets pitch change in octaves compared to the original pitch + /// (-1.00 .. +1.00) + void setPitchOctaves(float newPitch); + + /// Sets pitch change in semi-tones compared to the original pitch + /// (-12 .. +12) + void setPitchSemiTones(int newPitch); + void setPitchSemiTones(float newPitch); + + /// Sets the number of channels, 1 = mono, 2 = stereo + void setChannels(uint numChannels); + + /// Sets sample rate. + void setSampleRate(uint srate); + + /// Flushes the last samples from the processing pipeline to the output. + /// Clears also the internal processing buffers. + // + /// Note: This function is meant for extracting the last samples of a sound + /// stream. This function may introduce additional blank samples in the end + /// of the sound stream, and thus it's not recommended to call this function + /// in the middle of a sound stream. + void flush(); + + /// Adds 'numSamples' pcs of samples from the 'samples' memory position into + /// the input of the object. Notice that sample rate _has_to_ be set before + /// calling this function, otherwise throws a runtime_error exception. + virtual void putSamples( + const SAMPLETYPE *samples, ///< Pointer to sample buffer. + uint numSamples ///< Number of samples in buffer. Notice + ///< that in case of stereo-sound a single sample + ///< contains data for both channels. + ); + + /// Clears all the samples in the object's output and internal processing + /// buffers. + virtual void clear(); + + /// Changes a setting controlling the processing system behaviour. See the + /// 'SETTING_...' defines for available setting ID's. + /// + /// \return 'TRUE' if the setting was succesfully changed + BOOL setSetting(uint settingId, ///< Setting ID number. see SETTING_... defines. + uint value ///< New setting value. + ); + + /// Reads a setting controlling the processing system behaviour. See the + /// 'SETTING_...' defines for available setting ID's. + /// + /// \return the setting value. + uint getSetting(uint settingId ///< Setting ID number, see SETTING_... defines. + ) const; + + /// Returns number of samples currently unprocessed. + virtual uint numUnprocessedSamples() const; + + + /// Other handy functions that are implemented in the ancestor classes (see + /// classes 'FIFOProcessor' and 'FIFOSamplePipe') + /// + /// - receiveSamples() : Use this function to receive 'ready' processed samples from SoundTouch. + /// - numSamples() : Get number of 'ready' samples that can be received with + /// function 'receiveSamples()' + /// - isEmpty() : Returns nonzero if there aren't any 'ready' samples. + /// - clear() : Clears all samples from ready/processing buffers. +}; + +} +#endif diff --git a/libs/soundtouch/TDStretch.cpp b/libs/soundtouch/TDStretch.cpp new file mode 100644 index 0000000000..f1b85b5f17 --- /dev/null +++ b/libs/soundtouch/TDStretch.cpp @@ -0,0 +1,923 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo +/// while maintaining the original pitch by using a time domain WSOLA-like +/// method with several performance-increasing tweaks. +/// +/// Note : MMX optimized functions reside in a separate, platform-specific +/// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp' +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include +#include +#include +#include + +#include "STTypes.h" +#include "cpu_detect.h" +#include "TDStretch.h" + +using namespace soundtouch; + +#ifndef min +#define min(a,b) ((a > b) ? b : a) +#define max(a,b) ((a < b) ? b : a) +#endif + + + +/***************************************************************************** + * + * Constant definitions + * + *****************************************************************************/ + + +#define MAX_SCAN_DELTA 124 + +// Table for the hierarchical mixing position seeking algorithm +int scanOffsets[4][24]={ + { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806, + 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0}, + {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + { -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; + +/***************************************************************************** + * + * Implementation of the class 'TDStretch' + * + *****************************************************************************/ + + +TDStretch::TDStretch() : FIFOProcessor(&outputBuffer) +{ + bQuickseek = FALSE; + channels = 2; + bMidBufferDirty = FALSE; + + pMidBuffer = NULL; + pRefMidBufferUnaligned = NULL; + overlapLength = 0; + + setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS); + + setTempo(1.0f); +} + + + + +TDStretch::~TDStretch() +{ + delete[] pMidBuffer; + delete[] pRefMidBufferUnaligned; +} + + + +// Calculates the x having the closest 2^x value for the given value +static int _getClosest2Power(double value) +{ + return (int)(log(value) / log(2.0) + 0.5); +} + + + +// Sets routine control parameters. These control are certain time constants +// defining how the sound is stretched to the desired duration. +// +// 'sampleRate' = sample rate of the sound +// 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms) +// 'seekwindowMS' = seeking window length for scanning the best overlapping +// position (default = 28 ms) +// 'overlapMS' = overlapping length (default = 12 ms) + +void TDStretch::setParameters(uint aSampleRate, uint aSequenceMS, + uint aSeekWindowMS, uint aOverlapMS) +{ + this->sampleRate = aSampleRate; + this->sequenceMs = aSequenceMS; + this->seekWindowMs = aSeekWindowMS; + this->overlapMs = aOverlapMS; + + seekLength = (sampleRate * seekWindowMs) / 1000; + seekWindowLength = (sampleRate * sequenceMs) / 1000; + + maxOffset = seekLength; + + calculateOverlapLength(overlapMs); + + // set tempo to recalculate 'sampleReq' + setTempo(tempo); + +} + + + +/// Get routine control parameters, see setParameters() function. +/// Any of the parameters to this function can be NULL, in such case corresponding parameter +/// value isn't returned. +void TDStretch::getParameters(uint *pSampleRate, uint *pSequenceMs, uint *pSeekWindowMs, uint *pOverlapMs) +{ + if (pSampleRate) + { + *pSampleRate = sampleRate; + } + + if (pSequenceMs) + { + *pSequenceMs = sequenceMs; + } + + if (pSeekWindowMs) + { + *pSeekWindowMs = seekWindowMs; + } + + if (pOverlapMs) + { + *pOverlapMs = overlapMs; + } +} + + +// Overlaps samples in 'midBuffer' with the samples in 'input' +void TDStretch::overlapMono(SAMPLETYPE *output, const SAMPLETYPE *input) const +{ + int i, itemp; + + for (i = 0; i < (int)overlapLength ; i ++) + { + itemp = overlapLength - i; + output[i] = (input[i] * i + pMidBuffer[i] * itemp ) / overlapLength; // >> overlapDividerBits; + } +} + + + +void TDStretch::clearMidBuffer() +{ + if (bMidBufferDirty) + { + memset(pMidBuffer, 0, 2 * sizeof(SAMPLETYPE) * overlapLength); + bMidBufferDirty = FALSE; + } +} + + +void TDStretch::clearInput() +{ + inputBuffer.clear(); + clearMidBuffer(); +} + + +// Clears the sample buffers +void TDStretch::clear() +{ + outputBuffer.clear(); + inputBuffer.clear(); + clearMidBuffer(); +} + + + +// Enables/disables the quick position seeking algorithm. Zero to disable, nonzero +// to enable +void TDStretch::enableQuickSeek(BOOL enable) +{ + bQuickseek = enable; +} + + +// Returns nonzero if the quick seeking algorithm is enabled. +BOOL TDStretch::isQuickSeekEnabled() const +{ + return bQuickseek; +} + + +// Seeks for the optimal overlap-mixing position. +uint TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos) +{ + if (channels == 2) + { + // stereo sound + if (bQuickseek) + { + return seekBestOverlapPositionStereoQuick(refPos); + } + else + { + return seekBestOverlapPositionStereo(refPos); + } + } + else + { + // mono sound + if (bQuickseek) + { + return seekBestOverlapPositionMonoQuick(refPos); + } + else + { + return seekBestOverlapPositionMono(refPos); + } + } +} + + + + +// Overlaps samples in 'midBuffer' with the samples in 'inputBuffer' at position +// of 'ovlPos'. +inline void TDStretch::overlap(SAMPLETYPE *output, const SAMPLETYPE *input, uint ovlPos) const +{ + if (channels == 2) + { + // stereo sound + overlapStereo(output, input + 2 * ovlPos); + } else { + // mono sound. + overlapMono(output, input + ovlPos); + } +} + + + + +// Seeks for the optimal overlap-mixing position. The 'stereo' version of the +// routine +// +// The best position is determined as the position where the two overlapped +// sample sequences are 'most alike', in terms of the highest cross-correlation +// value over the overlapping period +uint TDStretch::seekBestOverlapPositionStereo(const SAMPLETYPE *refPos) +{ + uint bestOffs; + LONG_SAMPLETYPE bestCorr, corr; + uint i; + + // Slopes the amplitudes of the 'midBuffer' samples + precalcCorrReferenceStereo(); + + bestCorr = INT_MIN; + bestOffs = 0; + + // Scans for the best correlation value by testing each possible position + // over the permitted range. + for (i = 0; i < seekLength; i ++) + { + // Calculates correlation value for the mixing position corresponding + // to 'i' + corr = calcCrossCorrStereo(refPos + 2 * i, pRefMidBuffer); + + // Checks for the highest correlation value + if (corr > bestCorr) + { + bestCorr = corr; + bestOffs = i; + } + } + // clear cross correlation routine state if necessary (is so e.g. in MMX routines). + clearCrossCorrState(); + + return bestOffs; +} + + +// Seeks for the optimal overlap-mixing position. The 'stereo' version of the +// routine +// +// The best position is determined as the position where the two overlapped +// sample sequences are 'most alike', in terms of the highest cross-correlation +// value over the overlapping period +uint TDStretch::seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos) +{ + uint j; + uint bestOffs; + LONG_SAMPLETYPE bestCorr, corr; + uint scanCount, corrOffset, tempOffset; + + // Slopes the amplitude of the 'midBuffer' samples + precalcCorrReferenceStereo(); + + bestCorr = INT_MIN; + bestOffs = 0; + corrOffset = 0; + tempOffset = 0; + + // Scans for the best correlation value using four-pass hierarchical search. + // + // The look-up table 'scans' has hierarchical position adjusting steps. + // In first pass the routine searhes for the highest correlation with + // relatively coarse steps, then rescans the neighbourhood of the highest + // correlation with better resolution and so on. + for (scanCount = 0;scanCount < 4; scanCount ++) + { + j = 0; + while (scanOffsets[scanCount][j]) + { + tempOffset = corrOffset + scanOffsets[scanCount][j]; + if (tempOffset >= seekLength) break; + + // Calculates correlation value for the mixing position corresponding + // to 'tempOffset' + corr = calcCrossCorrStereo(refPos + 2 * tempOffset, pRefMidBuffer); + + // Checks for the highest correlation value + if (corr > bestCorr) + { + bestCorr = corr; + bestOffs = tempOffset; + } + j ++; + } + corrOffset = bestOffs; + } + // clear cross correlation routine state if necessary (is so e.g. in MMX routines). + clearCrossCorrState(); + + return bestOffs; +} + + + +// Seeks for the optimal overlap-mixing position. The 'mono' version of the +// routine +// +// The best position is determined as the position where the two overlapped +// sample sequences are 'most alike', in terms of the highest cross-correlation +// value over the overlapping period +uint TDStretch::seekBestOverlapPositionMono(const SAMPLETYPE *refPos) +{ + uint bestOffs; + LONG_SAMPLETYPE bestCorr, corr; + uint tempOffset; + const SAMPLETYPE *compare; + + // Slopes the amplitude of the 'midBuffer' samples + precalcCorrReferenceMono(); + + bestCorr = INT_MIN; + bestOffs = 0; + + // Scans for the best correlation value by testing each possible position + // over the permitted range. + for (tempOffset = 0; tempOffset < seekLength; tempOffset ++) + { + compare = refPos + tempOffset; + + // Calculates correlation value for the mixing position corresponding + // to 'tempOffset' + corr = calcCrossCorrMono(pRefMidBuffer, compare); + + // Checks for the highest correlation value + if (corr > bestCorr) + { + bestCorr = corr; + bestOffs = tempOffset; + } + } + // clear cross correlation routine state if necessary (is so e.g. in MMX routines). + clearCrossCorrState(); + + return bestOffs; +} + + +// Seeks for the optimal overlap-mixing position. The 'mono' version of the +// routine +// +// The best position is determined as the position where the two overlapped +// sample sequences are 'most alike', in terms of the highest cross-correlation +// value over the overlapping period +uint TDStretch::seekBestOverlapPositionMonoQuick(const SAMPLETYPE *refPos) +{ + uint j; + uint bestOffs; + LONG_SAMPLETYPE bestCorr, corr; + uint scanCount, corrOffset, tempOffset; + + // Slopes the amplitude of the 'midBuffer' samples + precalcCorrReferenceMono(); + + bestCorr = INT_MIN; + bestOffs = 0; + corrOffset = 0; + tempOffset = 0; + + // Scans for the best correlation value using four-pass hierarchical search. + // + // The look-up table 'scans' has hierarchical position adjusting steps. + // In first pass the routine searhes for the highest correlation with + // relatively coarse steps, then rescans the neighbourhood of the highest + // correlation with better resolution and so on. + for (scanCount = 0;scanCount < 4; scanCount ++) + { + j = 0; + while (scanOffsets[scanCount][j]) + { + tempOffset = corrOffset + scanOffsets[scanCount][j]; + if (tempOffset >= seekLength) break; + + // Calculates correlation value for the mixing position corresponding + // to 'tempOffset' + corr = calcCrossCorrMono(refPos + tempOffset, pRefMidBuffer); + + // Checks for the highest correlation value + if (corr > bestCorr) + { + bestCorr = corr; + bestOffs = tempOffset; + } + j ++; + } + corrOffset = bestOffs; + } + // clear cross correlation routine state if necessary (is so e.g. in MMX routines). + clearCrossCorrState(); + + return bestOffs; +} + + +/// clear cross correlation routine state if necessary +void TDStretch::clearCrossCorrState() +{ + // default implementation is empty. +} + + +// Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower +// tempo, larger faster tempo. +void TDStretch::setTempo(float newTempo) +{ + uint intskip; + + tempo = newTempo; + + // Calculate ideal skip length (according to tempo value) + nominalSkip = tempo * (seekWindowLength - overlapLength); + skipFract = 0; + intskip = (int)(nominalSkip + 0.5f); + + // Calculate how many samples are needed in the 'inputBuffer' to + // process another batch of samples + sampleReq = max(intskip + overlapLength, seekWindowLength) + maxOffset; +} + + + +// Sets the number of channels, 1 = mono, 2 = stereo +void TDStretch::setChannels(uint numChannels) +{ + if (channels == numChannels) return; + assert(numChannels == 1 || numChannels == 2); + + channels = numChannels; + inputBuffer.setChannels(channels); + outputBuffer.setChannels(channels); +} + + +// nominal tempo, no need for processing, just pass the samples through +// to outputBuffer +void TDStretch::processNominalTempo() +{ + assert(tempo == 1.0f); + + if (bMidBufferDirty) + { + // If there are samples in pMidBuffer waiting for overlapping, + // do a single sliding overlapping with them in order to prevent a + // clicking distortion in the output sound + if (inputBuffer.numSamples() < overlapLength) + { + // wait until we've got overlapLength input samples + return; + } + // Mix the samples in the beginning of 'inputBuffer' with the + // samples in 'midBuffer' using sliding overlapping + overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0); + outputBuffer.putSamples(overlapLength); + inputBuffer.receiveSamples(overlapLength); + clearMidBuffer(); + // now we've caught the nominal sample flow and may switch to + // bypass mode + } + + // Simply bypass samples from input to output + outputBuffer.moveSamples(inputBuffer); +} + + +// Processes as many processing frames of the samples 'inputBuffer', store +// the result into 'outputBuffer' +void TDStretch::processSamples() +{ + uint ovlSkip, offset; + int temp; + + if (tempo == 1.0f) + { + // tempo not changed from the original, so bypass the processing + processNominalTempo(); + return; + } + + if (bMidBufferDirty == FALSE) + { + // if midBuffer is empty, move the first samples of the input stream + // into it + if (inputBuffer.numSamples() < overlapLength) + { + // wait until we've got overlapLength samples + return; + } + memcpy(pMidBuffer, inputBuffer.ptrBegin(), channels * overlapLength * sizeof(SAMPLETYPE)); + inputBuffer.receiveSamples(overlapLength); + bMidBufferDirty = TRUE; + } + + // Process samples as long as there are enough samples in 'inputBuffer' + // to form a processing frame. + while (inputBuffer.numSamples() >= sampleReq) + { + // If tempo differs from the normal ('SCALE'), scan for the best overlapping + // position + offset = seekBestOverlapPosition(inputBuffer.ptrBegin()); + + // Mix the samples in the 'inputBuffer' at position of 'offset' with the + // samples in 'midBuffer' using sliding overlapping + // ... first partially overlap with the end of the previous sequence + // (that's in 'midBuffer') + overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), offset); + outputBuffer.putSamples(overlapLength); + + // ... then copy sequence samples from 'inputBuffer' to output + temp = (seekWindowLength - 2 * overlapLength);// & 0xfffffffe; + if (temp > 0) + { + outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), temp); + } + + // Copies the end of the current sequence from 'inputBuffer' to + // 'midBuffer' for being mixed with the beginning of the next + // processing sequence and so on + assert(offset + seekWindowLength <= inputBuffer.numSamples()); + memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + seekWindowLength - overlapLength), + channels * sizeof(SAMPLETYPE) * overlapLength); + bMidBufferDirty = TRUE; + + // Remove the processed samples from the input buffer. Update + // the difference between integer & nominal skip step to 'skipFract' + // in order to prevent the error from accumulating over time. + skipFract += nominalSkip; // real skip size + ovlSkip = (int)skipFract; // rounded to integer skip + skipFract -= ovlSkip; // maintain the fraction part, i.e. real vs. integer skip + inputBuffer.receiveSamples(ovlSkip); + } +} + + +// Adds 'numsamples' pcs of samples from the 'samples' memory position into +// the input of the object. +void TDStretch::putSamples(const SAMPLETYPE *samples, uint numSamples) +{ + // Add the samples into the input buffer + inputBuffer.putSamples(samples, numSamples); + // Process the samples in input buffer + processSamples(); +} + + + +/// Set new overlap length parameter & reallocate RefMidBuffer if necessary. +void TDStretch::acceptNewOverlapLength(uint newOverlapLength) +{ + uint prevOvl; + + prevOvl = overlapLength; + overlapLength = newOverlapLength; + + if (overlapLength > prevOvl) + { + delete[] pMidBuffer; + delete[] pRefMidBufferUnaligned; + + pMidBuffer = new SAMPLETYPE[overlapLength * 2]; + bMidBufferDirty = TRUE; + clearMidBuffer(); + + pRefMidBufferUnaligned = new SAMPLETYPE[2 * overlapLength + 16 / sizeof(SAMPLETYPE)]; + // ensure that 'pRefMidBuffer' is aligned to 16 byte boundary for efficiency + pRefMidBuffer = (SAMPLETYPE *)((((ulong)pRefMidBufferUnaligned) + 15) & -16); + } +} + +TDStretch * TDStretch::newInstance() +{ + uint uExtensions; + + uExtensions = detectCPUextensions(); + + // Check if MMX/SSE/3DNow! instruction set extensions supported by CPU + +#ifdef ALLOW_MMX + // MMX routines available only with integer sample types + if (uExtensions & SUPPORT_MMX) + { + return ::new TDStretchMMX; + } + else +#endif // ALLOW_MMX + + +#ifdef ALLOW_SSE + if (uExtensions & SUPPORT_SSE) + { + // SSE support + return ::new TDStretchSSE; + } + else +#endif // ALLOW_SSE + + +#ifdef ALLOW_3DNOW + if (uExtensions & SUPPORT_3DNOW) + { + // 3DNow! support + return ::new TDStretch3DNow; + } + else +#endif // ALLOW_3DNOW + + { + // ISA optimizations not supported, use plain C version + return ::new TDStretch; + } +} + + +////////////////////////////////////////////////////////////////////////////// +// +// Integer arithmetics specific algorithm implementations. +// +////////////////////////////////////////////////////////////////////////////// + +#ifdef INTEGER_SAMPLES + +// Slopes the amplitude of the 'midBuffer' samples so that cross correlation +// is faster to calculate +void TDStretch::precalcCorrReferenceStereo() +{ + int i, cnt2; + int temp, temp2; + + for (i=0 ; i < (int)overlapLength ;i ++) + { + temp = i * (overlapLength - i); + cnt2 = i * 2; + + temp2 = (pMidBuffer[cnt2] * temp) / slopingDivider; + pRefMidBuffer[cnt2] = (short)(temp2); + temp2 = (pMidBuffer[cnt2 + 1] * temp) / slopingDivider; + pRefMidBuffer[cnt2 + 1] = (short)(temp2); + } +} + + +// Slopes the amplitude of the 'midBuffer' samples so that cross correlation +// is faster to calculate +void TDStretch::precalcCorrReferenceMono() +{ + int i; + long temp; + long temp2; + + for (i=0 ; i < (int)overlapLength ;i ++) + { + temp = i * (overlapLength - i); + temp2 = (pMidBuffer[i] * temp) / slopingDivider; + pRefMidBuffer[i] = (short)temp2; + } +} + + +// Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo' +// version of the routine. +void TDStretch::overlapStereo(short *output, const short *input) const +{ + int i; + short temp; + uint cnt2; + + for (i = 0; i < (int)overlapLength ; i ++) + { + temp = (short)(overlapLength - i); + cnt2 = 2 * i; + output[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp ) / overlapLength; + output[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength; + } +} + + +/// Calculates overlap period length in samples. +/// Integer version rounds overlap length to closest power of 2 +/// for a divide scaling operation. +void TDStretch::calculateOverlapLength(uint overlapMs) +{ + uint newOvl; + + overlapDividerBits = _getClosest2Power((sampleRate * overlapMs) / 1000.0); + if (overlapDividerBits > 9) overlapDividerBits = 9; + if (overlapDividerBits < 4) overlapDividerBits = 4; + newOvl = (uint)pow(2, overlapDividerBits); + + acceptNewOverlapLength(newOvl); + + // calculate sloping divider so that crosscorrelation operation won't + // overflow 32-bit register. Max. sum of the crosscorrelation sum without + // divider would be 2^30*(N^3-N)/3, where N = overlap length + slopingDivider = (newOvl * newOvl - 1) / 3; +} + + +long TDStretch::calcCrossCorrMono(const short *mixingPos, const short *compare) const +{ + long corr; + uint i; + + corr = 0; + for (i = 1; i < overlapLength; i ++) + { + corr += (mixingPos[i] * compare[i]) >> overlapDividerBits; + } + + return corr; +} + + +long TDStretch::calcCrossCorrStereo(const short *mixingPos, const short *compare) const +{ + long corr; + uint i; + + corr = 0; + for (i = 2; i < 2 * overlapLength; i += 2) + { + corr += (mixingPos[i] * compare[i] + + mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits; + } + + return corr; +} + +#endif // INTEGER_SAMPLES + +////////////////////////////////////////////////////////////////////////////// +// +// Floating point arithmetics specific algorithm implementations. +// + +#ifdef FLOAT_SAMPLES + + +// Slopes the amplitude of the 'midBuffer' samples so that cross correlation +// is faster to calculate +void TDStretch::precalcCorrReferenceStereo() +{ + int i, cnt2; + float temp; + + for (i=0 ; i < (int)overlapLength ;i ++) + { + temp = (float)i * (float)(overlapLength - i); + cnt2 = i * 2; + pRefMidBuffer[cnt2] = (float)(pMidBuffer[cnt2] * temp); + pRefMidBuffer[cnt2 + 1] = (float)(pMidBuffer[cnt2 + 1] * temp); + } +} + + +// Slopes the amplitude of the 'midBuffer' samples so that cross correlation +// is faster to calculate +void TDStretch::precalcCorrReferenceMono() +{ + int i; + float temp; + + for (i=0 ; i < (int)overlapLength ;i ++) + { + temp = (float)i * (float)(overlapLength - i); + pRefMidBuffer[i] = (float)(pMidBuffer[i] * temp); + } +} + + +// SSE-optimized version of the function overlapStereo +void TDStretch::overlapStereo(float *output, const float *input) const +{ + int i; + uint cnt2; + float fTemp; + float fScale; + float fi; + + fScale = 1.0f / (float)overlapLength; + + for (i = 0; i < (int)overlapLength ; i ++) + { + fTemp = (float)(overlapLength - i) * fScale; + fi = (float)i * fScale; + cnt2 = 2 * i; + output[cnt2 + 0] = input[cnt2 + 0] * fi + pMidBuffer[cnt2 + 0] * fTemp; + output[cnt2 + 1] = input[cnt2 + 1] * fi + pMidBuffer[cnt2 + 1] * fTemp; + } +} + + +/// Calculates overlap period length in samples. +void TDStretch::calculateOverlapLength(uint overlapMs) +{ + uint newOvl; + + newOvl = (sampleRate * overlapMs) / 1000; + if (newOvl < 16) newOvl = 16; + + acceptNewOverlapLength(newOvl); +} + + + +double TDStretch::calcCrossCorrMono(const float *mixingPos, const float *compare) const +{ + double corr; + uint i; + + corr = 0; + for (i = 1; i < overlapLength; i ++) + { + corr += mixingPos[i] * compare[i]; + } + + return corr; +} + + +double TDStretch::calcCrossCorrStereo(const float *mixingPos, const float *compare) const +{ + double corr; + uint i; + + corr = 0; + for (i = 2; i < 2 * overlapLength; i += 2) + { + corr += mixingPos[i] * compare[i] + + mixingPos[i + 1] * compare[i + 1]; + } + + return corr; +} + +#endif // FLOAT_SAMPLES diff --git a/libs/soundtouch/TDStretch.h b/libs/soundtouch/TDStretch.h new file mode 100644 index 0000000000..ac27711e39 --- /dev/null +++ b/libs/soundtouch/TDStretch.h @@ -0,0 +1,253 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo +/// while maintaining the original pitch by using a time domain WSOLA-like method +/// with several performance-increasing tweaks. +/// +/// Note : MMX optimized functions reside in a separate, platform-specific file, +/// e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp' +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef TDStretch_H +#define TDStretch_H + +#include "STTypes.h" +#include "RateTransposer.h" +#include "FIFOSamplePipe.h" + +namespace soundtouch +{ + +// Default values for sound processing parameters: + +/// Default length of a single processing sequence, in milliseconds. This determines to how +/// long sequences the original sound is chopped in the time-stretch algorithm. +/// +/// The larger this value is, the lesser sequences are used in processing. In principle +/// a bigger value sounds better when slowing down tempo, but worse when increasing tempo +/// and vice versa. +/// +/// Increasing this value reduces computational burden & vice versa. +#define DEFAULT_SEQUENCE_MS 82 + +/// Seeking window default length in milliseconds for algorithm that finds the best possible +/// overlapping location. This determines from how wide window the algorithm may look for an +/// optimal joining location when mixing the sound sequences back together. +/// +/// The bigger this window setting is, the higher the possibility to find a better mixing +/// position will become, but at the same time large values may cause a "drifting" artifact +/// because consequent sequences will be taken at more uneven intervals. +/// +/// If there's a disturbing artifact that sounds as if a constant frequency was drifting +/// around, try reducing this setting. +/// +/// Increasing this value increases computational burden & vice versa. +#define DEFAULT_SEEKWINDOW_MS 14 + +/// Overlap length in milliseconds. When the chopped sound sequences are mixed back together, +/// to form a continuous sound stream, this parameter defines over how long period the two +/// consecutive sequences are let to overlap each other. +/// +/// This shouldn't be that critical parameter. If you reduce the DEFAULT_SEQUENCE_MS setting +/// by a large amount, you might wish to try a smaller value on this. +/// +/// Increasing this value increases computational burden & vice versa. +#define DEFAULT_OVERLAP_MS 12 + + +/// Class that does the time-stretch (tempo change) effect for the processed +/// sound. +class TDStretch : public FIFOProcessor +{ +protected: + uint channels; + uint sampleReq; + float tempo; + + SAMPLETYPE *pMidBuffer; + SAMPLETYPE *pRefMidBuffer; + SAMPLETYPE *pRefMidBufferUnaligned; + uint overlapLength; + uint overlapDividerBits; + uint slopingDivider; + uint seekLength; + uint seekWindowLength; + uint maxOffset; + float nominalSkip; + float skipFract; + FIFOSampleBuffer outputBuffer; + FIFOSampleBuffer inputBuffer; + BOOL bQuickseek; + BOOL bMidBufferDirty; + + uint sampleRate; + uint sequenceMs; + uint seekWindowMs; + uint overlapMs; + + void acceptNewOverlapLength(uint newOverlapLength); + + virtual void clearCrossCorrState(); + void calculateOverlapLength(uint overlapMs); + + virtual LONG_SAMPLETYPE calcCrossCorrStereo(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const; + virtual LONG_SAMPLETYPE calcCrossCorrMono(const SAMPLETYPE *mixingPos, const SAMPLETYPE *compare) const; + + virtual uint seekBestOverlapPositionStereo(const SAMPLETYPE *refPos); + virtual uint seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos); + virtual uint seekBestOverlapPositionMono(const SAMPLETYPE *refPos); + virtual uint seekBestOverlapPositionMonoQuick(const SAMPLETYPE *refPos); + uint seekBestOverlapPosition(const SAMPLETYPE *refPos); + + virtual void overlapStereo(SAMPLETYPE *output, const SAMPLETYPE *input) const; + virtual void overlapMono(SAMPLETYPE *output, const SAMPLETYPE *input) const; + + void clearMidBuffer(); + void overlap(SAMPLETYPE *output, const SAMPLETYPE *input, uint ovlPos) const; + + void precalcCorrReferenceMono(); + void precalcCorrReferenceStereo(); + + void processNominalTempo(); + + /// Changes the tempo of the given sound samples. + /// Returns amount of samples returned in the "output" buffer. + /// The maximum amount of samples that can be returned at a time is set by + /// the 'set_returnBuffer_size' function. + void processSamples(); + + TDStretch(); + +public: + virtual ~TDStretch(); + + /// Use this function instead of "new" operator to create a new instance of this class. + /// This function automatically chooses a correct feature set depending on if the CPU + /// supports MMX/SSE/etc extensions. + static TDStretch *newInstance(); + + /// Returns the output buffer object + FIFOSamplePipe *getOutput() { return &outputBuffer; }; + + /// Returns the input buffer object + FIFOSamplePipe *getInput() { return &inputBuffer; }; + + /// Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower + /// tempo, larger faster tempo. + void setTempo(float newTempo); + + /// Returns nonzero if there aren't any samples available for outputting. + virtual void clear(); + + /// Clears the input buffer + void clearInput(); + + /// Sets the number of channels, 1 = mono, 2 = stereo + void setChannels(uint numChannels); + + /// Enables/disables the quick position seeking algorithm. Zero to disable, + /// nonzero to enable + void enableQuickSeek(BOOL enable); + + /// Returns nonzero if the quick seeking algorithm is enabled. + BOOL isQuickSeekEnabled() const; + + /// Sets routine control parameters. These control are certain time constants + /// defining how the sound is stretched to the desired duration. + // + /// 'sampleRate' = sample rate of the sound + /// 'sequenceMS' = one processing sequence length in milliseconds + /// 'seekwindowMS' = seeking window length for scanning the best overlapping + /// position + /// 'overlapMS' = overlapping length + void setParameters(uint sampleRate, ///< Samplerate of sound being processed (Hz) + uint sequenceMS = DEFAULT_SEQUENCE_MS, ///< Single processing sequence length (ms) + uint seekwindowMS = DEFAULT_SEEKWINDOW_MS, ///< Offset seeking window length (ms) + uint overlapMS = DEFAULT_OVERLAP_MS ///< Sequence overlapping length (ms) + ); + + /// Get routine control parameters, see setParameters() function. + /// Any of the parameters to this function can be NULL, in such case corresponding parameter + /// value isn't returned. + void getParameters(uint *pSampleRate, uint *pSequenceMs, uint *pSeekWindowMs, uint *pOverlapMs); + + /// Adds 'numsamples' pcs of samples from the 'samples' memory position into + /// the input of the object. + virtual void putSamples( + const SAMPLETYPE *samples, ///< Input sample data + uint numSamples ///< Number of samples in 'samples' so that one sample + ///< contains both channels if stereo + ); +}; + + + +// Implementation-specific class declarations: + +#ifdef ALLOW_MMX + /// Class that implements MMX optimized routines for 16bit integer samples type. + class TDStretchMMX : public TDStretch + { + protected: + long calcCrossCorrStereo(const short *mixingPos, const short *compare) const; + virtual void overlapStereo(short *output, const short *input) const; + virtual void clearCrossCorrState(); + }; +#endif /// ALLOW_MMX + + +#ifdef ALLOW_3DNOW + /// Class that implements 3DNow! optimized routines for floating point samples type. + class TDStretch3DNow : public TDStretch + { + protected: + double calcCrossCorrStereo(const float *mixingPos, const float *compare) const; + }; +#endif /// ALLOW_3DNOW + + +#ifdef ALLOW_SSE + /// Class that implements SSE optimized routines for floating point samples type. + class TDStretchSSE : public TDStretch + { + protected: + double calcCrossCorrStereo(const float *mixingPos, const float *compare) const; + }; + +#endif /// ALLOW_SSE + +} +#endif /// TDStretch_H diff --git a/libs/soundtouch/cpu_detect.h b/libs/soundtouch/cpu_detect.h new file mode 100644 index 0000000000..ac011ebca8 --- /dev/null +++ b/libs/soundtouch/cpu_detect.h @@ -0,0 +1,62 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// A header file for detecting the Intel MMX instructions set extension. +/// +/// Please see 'mmx_win.cpp', 'mmx_cpp.cpp' and 'mmx_non_x86.cpp' for the +/// routine implementations for x86 Windows, x86 gnu version and non-x86 +/// platforms, respectively. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#ifndef _CPU_DETECT_H_ +#define _CPU_DETECT_H_ + +#include "STTypes.h" + +#define SUPPORT_MMX 0x0001 +#define SUPPORT_3DNOW 0x0002 +#define SUPPORT_ALTIVEC 0x0004 +#define SUPPORT_SSE 0x0008 +#define SUPPORT_SSE2 0x0010 + +/// Checks which instruction set extensions are supported by the CPU. +/// +/// \return A bitmask of supported extensions, see SUPPORT_... defines. +uint detectCPUextensions(void); + +/// Disables given set of instruction extensions. See SUPPORT_... defines. +void disableExtensions(uint wDisableMask); + +#endif // _CPU_DETECT_H_ diff --git a/libs/soundtouch/cpu_detect_x86_gcc.cpp b/libs/soundtouch/cpu_detect_x86_gcc.cpp new file mode 100644 index 0000000000..b4ccdc2834 --- /dev/null +++ b/libs/soundtouch/cpu_detect_x86_gcc.cpp @@ -0,0 +1,138 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// gcc version of the x86 CPU detect routine. +/// +/// This file is to be compiled on any platform with the GNU C compiler. +/// Compiler. Please see 'cpu_detect_x86_win.cpp' for the x86 Windows version +/// of this file. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include "cpu_detect.h" + +#ifndef __GNUC__ +#error wrong platform - this source code file is for the GNU C compiler. +#endif + +using namespace std; + +#include +////////////////////////////////////////////////////////////////////////////// +// +// processor instructions extension detection routines +// +////////////////////////////////////////////////////////////////////////////// + + +// Flag variable indicating whick ISA extensions are disabled (for debugging) +static uint _dwDisabledISA = 0x00; // 0xffffffff; //<- use this to disable all extensions + +// Disables given set of instruction extensions. See SUPPORT_... defines. +void disableExtensions(uint dwDisableMask) +{ + _dwDisabledISA = dwDisableMask; +} + + + +/// Checks which instruction set extensions are supported by the CPU. +uint detectCPUextensions(void) +{ +#ifndef __i386__ + return 0; // always disable extensions on non-x86 platforms. +#else + uint res = 0; + + if (_dwDisabledISA == 0xffffffff) return 0; + + asm volatile( + "\n\txor %%esi, %%esi" // clear %%esi = result register + // check if 'cpuid' instructions is available by toggling eflags bit 21 + + "\n\tpushf" // save eflags to stack + "\n\tpop %%eax" // load eax from stack (with eflags) + "\n\tmovl %%eax, %%ecx" // save the original eflags values to ecx + "\n\txor $0x00200000, %%eax" // toggle bit 21 + "\n\tpush %%eax" // store toggled eflags to stack + "\n\tpopf" // load eflags from stack + "\n\tpushf" // save updated eflags to stack + "\n\tpop %%eax" // load from stack + "\n\txor %%edx, %%edx" // clear edx for defaulting no mmx + "\n\tcmp %%ecx, %%eax" // compare to original eflags values + "\n\tjz end" // jumps to 'end' if cpuid not present + + // cpuid instruction available, test for presence of mmx instructions + + "\n\tmovl $1, %%eax" + "\n\tcpuid" +// movl $0x00800000, %edx // force enable MMX + "\n\ttest $0x00800000, %%edx" + "\n\tjz end" // branch if MMX not available + + "\n\tor $0x01, %%esi" // otherwise add MMX support bit + + "\n\ttest $0x02000000, %%edx" + "\n\tjz test3DNow" // branch if SSE not available + + "\n\tor $0x08, %%esi" // otherwise add SSE support bit + + "\n\ttest3DNow:" + // test for precense of AMD extensions + "\n\tmov $0x80000000, %%eax" + "\n\tcpuid" + "\n\tcmp $0x80000000, %%eax" + "\n\tjbe end" // branch if no AMD extensions detected + + // test for precense of 3DNow! extension + "\n\tmov $0x80000001, %%eax" + "\n\tcpuid" + "\n\ttest $0x80000000, %%edx" + "\n\tjz end" // branch if 3DNow! not detected + + "\n\tor $0x02, %%esi" // otherwise add 3DNow support bit + + "\n\tend:" + + "\n\tmov %%esi, %0" + + : "=r" (res) + : /* no inputs */ + : "%edx", "%eax", "%ecx", "%esi" ); + + return res & ~_dwDisabledISA; +#endif +} diff --git a/libs/soundtouch/cpu_detect_x86_win.cpp b/libs/soundtouch/cpu_detect_x86_win.cpp new file mode 100644 index 0000000000..fd04955d80 --- /dev/null +++ b/libs/soundtouch/cpu_detect_x86_win.cpp @@ -0,0 +1,126 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Win32 version of the x86 CPU detect routine. +/// +/// This file is to be compiled in Windows platform with Microsoft Visual C++ +/// Compiler. Please see 'cpu_detect_x86_gcc.cpp' for the gcc compiler version +/// for all GNU platforms. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include "cpu_detect.h" + +#ifndef WIN32 +#error wrong platform - this source code file is exclusively for Win32 platform +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// processor instructions extension detection routines +// +////////////////////////////////////////////////////////////////////////////// + +// Flag variable indicating whick ISA extensions are disabled (for debugging) +static uint _dwDisabledISA = 0x00; // 0xffffffff; //<- use this to disable all extensions + + +// Disables given set of instruction extensions. See SUPPORT_... defines. +void disableExtensions(uint dwDisableMask) +{ + _dwDisabledISA = dwDisableMask; +} + + + +/// Checks which instruction set extensions are supported by the CPU. +uint detectCPUextensions(void) +{ + uint res = 0; + + if (_dwDisabledISA == 0xffffffff) return 0; + + _asm + { + ; check if 'cpuid' instructions is available by toggling eflags bit 21 + ; + xor esi, esi ; clear esi = result register + + pushfd ; save eflags to stack + pop eax ; load eax from stack (with eflags) + mov ecx, eax ; save the original eflags values to ecx + xor eax, 0x00200000 ; toggle bit 21 + push eax ; store toggled eflags to stack + popfd ; load eflags from stack + pushfd ; save updated eflags to stack + pop eax ; load from stack + xor edx, edx ; clear edx for defaulting no mmx + cmp eax, ecx ; compare to original eflags values + jz end ; jumps to 'end' if cpuid not present + + ; cpuid instruction available, test for presence of mmx instructions + mov eax, 1 + cpuid + test edx, 0x00800000 + jz end ; branch if MMX not available + + or esi, SUPPORT_MMX ; otherwise add MMX support bit + + test edx, 0x02000000 + jz test3DNow ; branch if SSE not available + + or esi, SUPPORT_SSE ; otherwise add SSE support bit + + test3DNow: + ; test for precense of AMD extensions + mov eax, 0x80000000 + cpuid + cmp eax, 0x80000000 + jbe end ; branch if no AMD extensions detected + + ; test for precense of 3DNow! extension + mov eax, 0x80000001 + cpuid + test edx, 0x80000000 + jz end ; branch if 3DNow! not detected + + or esi, SUPPORT_3DNOW ; otherwise add 3DNow support bit + + end: + + mov res, esi + } + + return res & ~_dwDisabledISA; +} diff --git a/libs/soundtouch/mmx_gcc.cpp b/libs/soundtouch/mmx_gcc.cpp new file mode 100644 index 0000000000..9e92765e4e --- /dev/null +++ b/libs/soundtouch/mmx_gcc.cpp @@ -0,0 +1,534 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// gcc version of the MMX optimized routines. All MMX optimized functions +/// have been gathered into this single source code file, regardless to their +/// class or original source code file, in order to ease porting the library +/// to other compiler and processor platforms. +/// +/// This file is to be compiled on any platform with the GNU C compiler. +/// Compiler. Please see 'mmx_win.cpp' for the x86 Windows version of this +/// file. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include "cpu_detect.h" + +#ifndef __GNUC__ +#error "wrong platform - this source code file is for the GNU C compiler." +#endif + +using namespace std; +using namespace soundtouch; + + +#ifdef ALLOW_MMX +// MMX routines available only with integer sample type + +////////////////////////////////////////////////////////////////////////////// +// +// implementation of MMX optimized functions of class 'TDStretch' +// +// NOTE: ebx in gcc 3.x is not preserved if -fPIC and -DPIC +// gcc-3.4 correctly flags this error and wont let you continue. +// gcc-2.95 preserves esi correctly +// +////////////////////////////////////////////////////////////////////////////// + +#include "TDStretch.h" +#include + +// these are declared in 'TDStretch.cpp' +extern int scanOffsets[4][24]; + +// Calculates cross correlation of two buffers +long TDStretchMMX::calcCrossCorrStereo(const short *pV1, const short *pV2) const +{ +#ifdef __i386__ + int corr; + uint local_overlapLength = overlapLength; + uint local_overlapDividerBits = overlapDividerBits; + + asm volatile( + // Calculate cross-correlation between the tempOffset and tmpbid_buffer. + + // Process 4 parallel batches of 2 * stereo samples each during one + // round to improve CPU-level parallellization. + + // load address of sloped pV2 buffer to eax + // load address of mixing point of the sample data buffer to edi + // load counter to ecx = overlapLength / 8 - 1 + // empty the mm0 + + // prepare to the first round by loading + // load mm1 = eax[0] + // load mm2 = eax[1]; + + "\n\tmovl %1, %%eax" + "\n\tmovl %2, %%edi" + + "\n\tmovq (%%eax), %%mm1" + "\n\tmovl %3, %%ecx" + + "\n\tmovq 8(%%eax), %%mm2" + "\n\tshr $3, %%ecx" + + "\n\tpxor %%mm0, %%mm0" + "\n\tsub $1, %%ecx" + + "\n\tmovd %4, %%mm5" + + "\n1:" + // multiply-add mm1 = mm1 * edi[0] + // multiply-add mm2 = mm2 * edi[1] + // + // add mm2 += mm1 + // mm2 >>= mm5 (=overlapDividerBits) + // add mm0 += mm2 + // + // load mm3 = eax[2] + // multiply-add mm3 = mm3 * edi[2] + // + // load mm4 = eax[3] + // multiply-add mm4 = mm4 * edi[3] + // + // add mm3 += mm4 + // mm3 >>= mm5 (=overlapDividerBits) + // add mm0 += mm3 + // + // add eax += 4 + // add edi += 4 + // load mm1 = eax[0] (~eax[4]) + // load mm2 = eax[1] (~eax[5]) + // + // loop + + "\n\tpmaddwd (%%edi), %%mm1" // qword ptr [edi] + "\n\tmovq 16(%%eax), %%mm3" // qword ptr [eax+16] + + "\n\tpmaddwd 8(%%edi), %%mm2" // qword ptr [edi+8] + "\n\tmovq 24(%%eax), %%mm4" // qword ptr [eax+24] + + "\n\tpmaddwd 16(%%edi), %%mm3" // qword ptr [edi+16] + "\n\tpaddd %%mm1, %%mm2" + + "\n\tpmaddwd 24(%%edi), %%mm4" // qword ptr [edi+24] + "\n\tmovq 32(%%eax), %%mm1" // qword ptr [eax+32] + + "\n\tpsrad %%mm5, %%mm2" + "\n\tadd $32, %%eax" + + "\n\tpaddd %%mm4, %%mm3" + "\n\tpaddd %%mm2, %%mm0" + + "\n\tmovq 8(%%eax), %%mm2" // qword ptr [eax+8] + "\n\tpsrad %%mm5, %%mm3" + + "\n\tadd $32, %%edi" + "\n\tpaddd %%mm3, %%mm0" + + "\n\tdec %%ecx" + "\n\tjnz 1b" + + // Finalize the last partial loop: + + "\n\tmovq 16(%%eax), %%mm3" // qword ptr [eax+16] + "\n\tpmaddwd (%%edi), %%mm1" // qword ptr [edi] + + "\n\tmovq 24(%%eax), %%mm4" // qword ptr [eax+24] + "\n\tpmaddwd 8(%%edi), %%mm2" // qword ptr [edi+8] + + "\n\tpmaddwd 16(%%edi), %%mm3" // qword ptr [edi+16] + "\n\tpaddd %%mm1, %%mm2" + + "\n\tpmaddwd 24(%%edi), %%mm4" // qword ptr [edi+24] + "\n\tpsrad %%mm5, %%mm2" + + "\n\tpaddd %%mm4, %%mm3" + "\n\tpaddd %%mm2, %%mm0" + + "\n\tpsrad %%mm5, %%mm3" + "\n\tpaddd %%mm3, %%mm0" + + // copy hi-dword of mm0 to lo-dword of mm1, then sum mmo+mm1 + // and finally store the result into the variable "corr" + + "\n\tmovq %%mm0, %%mm1" + "\n\tpsrlq $32, %%mm1" + "\n\tpaddd %%mm1, %%mm0" + "\n\tmovd %%mm0, %0" + : "=rm" (corr) + : "rim" (pV1), "rim" (pV2), "rim" (local_overlapLength), + "rim" (local_overlapDividerBits) + : "%ecx", "%eax", "%edi" + ); + return corr; + + // Note: Warning about the missing EMMS instruction is harmless + // as it'll be called elsewhere. +#else + throw runtime_error("MMX not supported"); +#endif +} + +void TDStretchMMX::clearCrossCorrState() +{ +#ifdef __i386__ + asm volatile("EMMS"); +#endif +} + +// MMX-optimized version of the function overlapStereo +void TDStretchMMX::overlapStereo(short *output, const short *input) const +{ +#ifdef __i386__ + short *local_midBuffer = pMidBuffer; + uint local_overlapLength = overlapLength; + uint local_overlapDividerBits = overlapDividerBits; + + asm volatile( + "\n\t" + // load sliding mixing value counter to mm6 and mm7 + // load counter value to ecx = overlapLength / 4 + // load divider-shifter value to esi + // load mixing value adder to mm5 + // load address of midBuffer to eax + // load address of inputBuffer added with ovlOffset to edi + // load address of end of the outputBuffer to edx + // + // We need to preserve esi, since gcc uses it for the + // stack frame. + + "movl %0, %%eax\n\t" // ecx = 0x0000 OVL_ + "movl $0x0002fffe, %%edi\n\t" // ecx = 0x0002 fffe + + "movl %1, %%esi\n\t" + "movd %%eax, %%mm6\n\t" // mm6 = 0x0000 0000 0000 OVL_ + + "movl %%eax, %%ecx\n\t" + "sub $1, %%eax\n\t" + + "punpckldq %%mm6, %%mm6\n\t" // mm6 = 0x0000 OVL_ 0000 OVL_ + + "or $0x00010000, %%eax\n\t" // eax = 0x0001 overlapLength-1 + + "movd %%edi, %%mm5\n\t" // mm5 = 0x0000 0000 0002 fffe + "movd %%eax, %%mm7\n\t" // mm7 = 0x0000 0000 0001 01ff + + "movl %3, %%edi\n\t" + + "movl %4, %%eax\n\t" // dword ptr local_midBuffer + "punpckldq %%mm5, %%mm5\n\t" // mm5 = 0x0002 fffe 0002 fffe + + "shr $2, %%ecx\n\t" // ecx = overlapLength / 2 + "punpckldq %%mm7, %%mm7\n\t" // mm7 = 0x0001 01ff 0001 01ff + + "movl %2, %%edx\n" + + "2:\n\t" + // Process two parallel batches of 2+2 stereo samples during each round + // to improve CPU-level parallellization. + // + // Load [eax] into mm0 and mm1 + // Load [edi] into mm3 + // unpack words of mm0, mm1 and mm3 into mm0 and mm1 + // multiply-add mm0*mm6 and mm1*mm7, store results into mm0 and mm1 + // divide mm0 and mm1 by 512 (=right-shift by overlapDividerBits) + // pack the result into mm0 and store into [edx] + // + // Load [eax+8] into mm2 and mm3 + // Load [edi+8] into mm4 + // unpack words of mm2, mm3 and mm4 into mm2 and mm3 + // multiply-add mm2*mm6 and mm3*mm7, store results into mm2 and mm3 + // divide mm2 and mm3 by 512 (=right-shift by overlapDividerBits) + // pack the result into mm2 and store into [edx+8] + + + "movq (%%eax), %%mm0\n\t" // mm0 = m1l m1r m0l m0r + "add $16, %%edx\n\t" + + "movq (%%edi), %%mm3\n\t" // mm3 = i1l i1r i0l i0r + "movq %%mm0, %%mm1\n\t" // mm1 = m1l m1r m0l m0r + + "movq 8(%%eax), %%mm2\n\t" // mm2 = m3l m3r m2l m2r + "punpcklwd %%mm3, %%mm0\n\t" // mm0 = i0l m0l i0r m0r + + "movq 8(%%edi), %%mm4\n\t" // mm4 = i3l i3r i2l i2r + "punpckhwd %%mm3, %%mm1\n\t" // mm1 = i1l m1l i1r m1r + + "movq %%mm2, %%mm3\n\t" // mm3 = m3l m3r m2l m2r + "punpcklwd %%mm4, %%mm2\n\t" // mm2 = i2l m2l i2r m2r + + "pmaddwd %%mm6, %%mm0\n\t" // mm0 = i0l*m63+m0l*m62 i0r*m61+m0r*m60 + "punpckhwd %%mm4, %%mm3\n\t" // mm3 = i3l m3l i3r m3r + + "movd %%esi, %%mm4\n\t" // mm4 = overlapDividerBits + + "pmaddwd %%mm7, %%mm1\n\t" // mm1 = i1l*m73+m1l*m72 i1r*m71+m1r*m70 + "paddw %%mm5, %%mm6\n\t" + + "paddw %%mm5, %%mm7\n\t" + "psrad %%mm4, %%mm0\n\t" // mmo >>= overlapDividerBits + + "pmaddwd %%mm6, %%mm2\n\t" // mm2 = i2l*m63+m2l*m62 i2r*m61+m2r*m60 + "psrad %%mm4, %%mm1\n\t" // mm1 >>= overlapDividerBits + + "pmaddwd %%mm7, %%mm3\n\t" // mm3 = i3l*m73+m3l*m72 i3r*m71+m3r*m70 + "psrad %%mm4, %%mm2\n\t" // mm2 >>= overlapDividerBits + + "packssdw %%mm1, %%mm0\n\t" // mm0 = mm1h mm1l mm0h mm0l + "psrad %%mm4, %%mm3\n\t" // mm3 >>= overlapDividerBits + + "add $16, %%eax\n\t" + "paddw %%mm5, %%mm6\n\t" + + "packssdw %%mm3, %%mm2\n\t" // mm2 = mm2h mm2l mm3h mm3l + "paddw %%mm5, %%mm7\n\t" + + "movq %%mm0, -16(%%edx)\n\t" + "add $16, %%edi\n\t" + + "movq %%mm2, -8(%%edx)\n\t" + "dec %%ecx\n\t" + + "jnz 2b\n\t" + + "emms\n\t" + + : + : "rim" (local_overlapLength), + "rim" (local_overlapDividerBits), + "rim" (output), + "rim" (input), + "rim" (local_midBuffer) + /* input */ + : "%edi", "%ecx", "%edx", "%eax", "%esi" /* regs */ + ); +#else + throw runtime_error("MMX not supported"); +#endif +} + + +////////////////////////////////////////////////////////////////////////////// +// +// implementation of MMX optimized functions of class 'FIRFilter' +// +////////////////////////////////////////////////////////////////////////////// + +#include "FIRFilter.h" + +FIRFilterMMX::FIRFilterMMX() : FIRFilter() +{ + filterCoeffsUnalign = NULL; +} + + +FIRFilterMMX::~FIRFilterMMX() +{ + delete[] filterCoeffsUnalign; +} + + +#if 1 +// (overloaded) Calculates filter coefficients for MMX routine +void FIRFilterMMX::setCoefficients(const short *coeffs, uint newLength, uint uResultDivFactor) +{ +#ifdef __i386__ + uint i; + FIRFilter::setCoefficients(coeffs, newLength, uResultDivFactor); + + // Ensure that filter coeffs array is aligned to 16-byte boundary + delete[] filterCoeffsUnalign; + filterCoeffsUnalign = new short[2 * newLength + 8]; + filterCoeffsAlign = (short *)(((uint)filterCoeffsUnalign + 15) & -16); + + // rearrange the filter coefficients for mmx routines + for (i = 0;i < length; i += 4) + { + filterCoeffsAlign[2 * i + 0] = coeffs[i + 0]; + filterCoeffsAlign[2 * i + 1] = coeffs[i + 2]; + filterCoeffsAlign[2 * i + 2] = coeffs[i + 0]; + filterCoeffsAlign[2 * i + 3] = coeffs[i + 2]; + + filterCoeffsAlign[2 * i + 4] = coeffs[i + 1]; + filterCoeffsAlign[2 * i + 5] = coeffs[i + 3]; + filterCoeffsAlign[2 * i + 6] = coeffs[i + 1]; + filterCoeffsAlign[2 * i + 7] = coeffs[i + 3]; + } +#else + throw runtime_error("MMX not supported"); +#endif +} + + + +// mmx-optimized version of the filter routine for stereo sound +uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, const uint numSamples) const +{ +#ifdef __i386__ + // Create stack copies of the needed member variables for asm routines : + uint local_length = length; + uint local_lengthDiv8 = lengthDiv8; + uint local_resultDivider = resultDivFactor; + short *local_filterCoeffs = (short*)filterCoeffsAlign; + short *local_src = (short *)src; + + asm volatile( + "\n\t" + // Load (num_samples-aa_filter_length)/2 to edi as a i + // Load a pointer to samples to esi + // Load a pointer to destination to edx + + "movl %0, %%edi\n\t" + "subl %2, %%edi\n\t" + "movl %3, %%edx\n\t" + "sar $1, %%edi\n" + + // Load filter length/8 to ecx + // Load pointer to samples from esi to ebx + // Load counter from edi to ecx + // Load [ebx] to mm3 + // Load pointer to filter coefficients to eax + "3:\n\t" + "movl %1, %%esi\n\t" + "pxor %%mm0, %%mm0\n\t" + + "movl %4, %%ecx\n\t" + "pxor %%mm7, %%mm7\n\t" + + "movq (%%esi), %%mm1\n\t" // mm1 = l1 r1 l0 r0 + "movl %5, %%eax\n" + "4:\n\t" + + "movq 8(%%esi), %%mm2\n\t" // mm2 = l3 r3 l2 r2 + "movq %%mm1, %%mm4\n\t" // mm4 = l1 r1 l0 r0 + + "movq 16(%%esi), %%mm3\n\t" // mm3 = l5 r5 l4 r4 + "punpckhwd %%mm2, %%mm1\n\t" // mm1 = l3 l1 r3 r1 + + "movq %%mm2, %%mm6\n\t" // mm6 = l3 r3 l2 r2 + "punpcklwd %%mm2, %%mm4\n\t" // mm4 = l2 l0 r2 r0 + + "movq (%%eax), %%mm2\n\t" // mm2 = f2 f0 f2 f0 + "movq %%mm1, %%mm5\n\t" // mm5 = l3 l1 r3 r1 + + "punpcklwd %%mm3, %%mm6\n\t" // mm6 = l4 l2 r4 r2 + "pmaddwd %%mm2, %%mm4\n\t" // mm4 = l2*f2+l0*f0 r2*f2+r0*f0 + + "pmaddwd %%mm2, %%mm5\n\t" // mm5 = l3*f2+l1*f0 r3*f2+l1*f0 + "movq 8(%%eax), %%mm2\n\t" // mm2 = f3 f1 f3 f1 + + "paddd %%mm4, %%mm0\n\t" // mm0 += s02*f02 + "movq %%mm3, %%mm4\n\t" // mm4 = l1 r1 l0 r0 + + "pmaddwd %%mm2, %%mm1\n\t" // mm1 = l3*f3+l1*f1 r3*f3+l1*f1 + "paddd %%mm5, %%mm7\n\t" // mm7 += s13*f02 + + "pmaddwd %%mm2, %%mm6\n\t" // mm6 = l4*f3+l2*f1 r4*f3+f4*f1 + "movq 24(%%esi), %%mm2\n\t" // mm2 = l3 r3 l2 r2 + + "paddd %%mm1, %%mm0\n\t" // mm0 += s31*f31 + "movq 32(%%esi), %%mm1\n\t" // mm1 = l5 r5 l4 r4 + + "paddd %%mm6, %%mm7\n\t" // mm7 += s42*f31 + "punpckhwd %%mm2, %%mm3\n\t" // mm3 = l3 l1 r3 r1 + + "movq %%mm2, %%mm6\n\t" // mm6 = l3 r3 l2 r2 + "punpcklwd %%mm2, %%mm4\n\t" // mm4 = l2 l0 r2 r0 + + "movq 16(%%eax), %%mm2\n\t" // mm2 = f2 f0 f2 f0 + "movq %%mm3, %%mm5\n\t" // mm5 = l3 l1 r3 r1 + + "punpcklwd %%mm1, %%mm6\n\t" // mm6 = l4 l2 r4 r2 + "add $32, %%eax\n\t" + + "pmaddwd %%mm2, %%mm4\n\t" // mm4 = l2*f2+l0*f0 r2*f2+r0*f0 + "add $32, %%esi\n\t" + + "pmaddwd %%mm2, %%mm5\n\t" // mm5 = l3*f2+l1*f0 r3*f2+l1*f0 + "movq -8(%%eax), %%mm2\n\t" // mm2 = f3 f1 f3 f1 + + "paddd %%mm4, %%mm0\n\t" // mm0 += s02*f02 + "pmaddwd %%mm2, %%mm3\n\t" // mm3 = l3*f3+l1*f1 r3*f3+l1*f1 + + "paddd %%mm5, %%mm7\n\t" // mm7 += s13*f02 + "pmaddwd %%mm2, %%mm6\n\t" // mm6 = l4*f3+l2*f1 r4*f3+f4*f1 + + "paddd %%mm3, %%mm0\n\t" // mm0 += s31*f31 + "paddd %%mm6, %%mm7\n\t" // mm7 += s42*f31 + + "dec %%ecx\n\t" + "jnz 4b\n\t" + + // Divide mm0 and mm7 by 8192 (= right-shift by 13), + // pack and store to [edx] + "movd %6, %%mm4\n\t" + + "psrad %%mm4, %%mm0\n\t" // divide the result + + "add $8, %%edx\n\t" + "psrad %%mm4, %%mm7\n\t" // divide the result + + "add $8, %1\n\t" + "packssdw %%mm7, %%mm0\n\t" + + "movq %%mm0, -8(%%edx)\n\t" + "dec %%edi\n\t" + + "jnz 3b\n\t" + + "emms\n\t" + + : + : "rim" (numSamples), + "rim" (local_src), + "rim" (local_length), + "rim" (dest), + "rim" (local_lengthDiv8), + "rim" (local_filterCoeffs), + "rim" (local_resultDivider) /* input */ + : "%eax", "%ecx", "%edx", "%edi", "%esi" /* regs */ + ); + return (numSamples & 0xfffffffe) - local_length; +#else + throw runtime_error("MMX not supported"); + return 0; +#endif +} +#endif + +#endif // ALLOW_MMX diff --git a/libs/soundtouch/mmx_win.cpp b/libs/soundtouch/mmx_win.cpp new file mode 100644 index 0000000000..ec4ac9d88b --- /dev/null +++ b/libs/soundtouch/mmx_win.cpp @@ -0,0 +1,487 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Win32 version of the MMX optimized routines. All MMX optimized functions +/// have been gathered into this single source code file, regardless to their +/// class or original source code file, in order to ease porting the library +/// to other compiler and processor platforms. +/// +/// This file is to be compiled in Windows platform with Microsoft Visual C++ +/// Compiler. Please see 'mmx_gcc.cpp' for the gcc compiler version for all +/// GNU platforms. +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include "STTypes.h" + +#ifndef WIN32 +#error "wrong platform - this source code file is exclusively for Win32 platform" +#endif + +using namespace soundtouch; + +#ifdef ALLOW_MMX +// MMX routines available only with integer sample type + +////////////////////////////////////////////////////////////////////////////// +// +// implementation of MMX optimized functions of class 'TDStretchMMX' +// +////////////////////////////////////////////////////////////////////////////// + +#include "TDStretch.h" +#include + +// these are declared in 'TDStretch.cpp' +extern int scanOffsets[4][24]; + +// Calculates cross correlation of two buffers +long TDStretchMMX::calcCrossCorrStereo(const short *pV1, const short *pV2) const +{ + long corr; + uint local_overlapLength = overlapLength; + uint local_overlapDividerBits = overlapDividerBits; + + _asm + { + ; Calculate cross-correlation between the tempOffset and tmpbid_buffer. + ; + ; Process 4 parallel batches of 2 * stereo samples each during one + ; round to improve CPU-level parallellization. + ; + ; load address of sloped pV2 buffer to eax + ; load address of mixing point of the sample data buffer to ebx + ; load counter to ecx = overlapLength / 8 - 1 + ; empty the mm0 + ; + ; prepare to the first round by loading + ; load mm1 = eax[0] + ; load mm2 = eax[1]; + + mov eax, dword ptr pV1 + mov ebx, dword ptr pV2 + + movq mm1, qword ptr [eax] + mov ecx, local_overlapLength + + movq mm2, qword ptr [eax+8] + shr ecx, 3 + + pxor mm0, mm0 + sub ecx, 1 + + movd mm5, local_overlapDividerBits + + loop1: + ; multiply-add mm1 = mm1 * ebx[0] + ; multiply-add mm2 = mm2 * ebx[1] + ; + ; add mm2 += mm1 + ; mm2 >>= mm5 (=overlapDividerBits) + ; add mm0 += mm2 + ; + ; load mm3 = eax[2] + ; multiply-add mm3 = mm3 * ebx[2] + ; + ; load mm4 = eax[3] + ; multiply-add mm4 = mm4 * ebx[3] + ; + ; add mm3 += mm4 + ; mm3 >>= mm5 (=overlapDividerBits) + ; add mm0 += mm3 + ; + ; add eax += 4; + ; add ebx += 4 + ; load mm1 = eax[0] (~eax[4]) + ; load mm2 = eax[1] (~eax[5]) + ; + ; loop + + pmaddwd mm1, qword ptr [ebx] + movq mm3, qword ptr [eax+16] + + pmaddwd mm2, qword ptr [ebx+8] + movq mm4, qword ptr [eax+24] + + pmaddwd mm3, qword ptr [ebx+16] + paddd mm2, mm1 + + pmaddwd mm4, qword ptr [ebx+24] + movq mm1, qword ptr [eax+32] + + psrad mm2, mm5 + add eax, 32 + + paddd mm3, mm4 + paddd mm0, mm2 + + movq mm2, qword ptr [eax+8] + psrad mm3, mm5 + + add ebx, 32 + paddd mm0, mm3 + + dec ecx + jnz loop1 + + ; Finalize the last partial loop: + + movq mm3, qword ptr [eax+16] + pmaddwd mm1, qword ptr [ebx] + + movq mm4, qword ptr [eax+24] + pmaddwd mm2, qword ptr [ebx+8] + + pmaddwd mm3, qword ptr [ebx+16] + paddd mm2, mm1 + + pmaddwd mm4, qword ptr [ebx+24] + psrad mm2, mm5 + + paddd mm3, mm4 + paddd mm0, mm2 + + psrad mm3, mm5 + paddd mm0, mm3 + + ; copy hi-dword of mm0 to lo-dword of mm1, then sum mmo+mm1 + ; and finally store the result into the variable "corr" + + movq mm1, mm0 + psrlq mm1, 32 + paddd mm0, mm1 + movd corr, mm0 + } + return corr; + + // Note: Warning about the missing EMMS instruction is harmless + // as it'll be called elsewhere. +} + + + +void TDStretchMMX::clearCrossCorrState() +{ + _asm EMMS; +} + + + + + +// MMX-optimized version of the function overlapStereo +void TDStretchMMX::overlapStereo(short *output, const short *input) const +{ + short *local_midBuffer = pMidBuffer; + uint local_overlapLength = overlapLength; + uint local_overlapDividerBits = overlapDividerBits; + + _asm + { + ; load sliding mixing value counter to mm6 and mm7 + ; load counter value to ecx = overlapLength / 4 + ; load divider-shifter value to esi + ; load mixing value adder to mm5 + ; load address of midBuffer to eax + ; load address of inputBuffer added with ovlOffset to ebx + ; load address of end of the outputBuffer to edx + + mov eax, local_overlapLength ; ecx = 0x0000 OVL_ + mov edi, 0x0002fffe ; ecx = 0x0002 fffe + + mov esi, local_overlapDividerBits + movd mm6, eax ; mm6 = 0x0000 0000 0000 OVL_ + + mov ecx, eax; + sub eax, 1 + + punpckldq mm6, mm6 ; mm6 = 0x0000 OVL_ 0000 OVL_ + mov edx, output + + or eax, 0x00010000 ; eax = 0x0001 overlapLength-1 + mov ebx, dword ptr input + + movd mm5, edi ; mm5 = 0x0000 0000 0002 fffe + movd mm7, eax ; mm7 = 0x0000 0000 0001 01ff + + mov eax, dword ptr local_midBuffer + punpckldq mm5, mm5 ; mm5 = 0x0002 fffe 0002 fffe + + shr ecx, 2 ; ecx = overlapLength / 2 + punpckldq mm7, mm7 ; mm7 = 0x0001 01ff 0001 01ff + + loop1: + ; Process two parallel batches of 2+2 stereo samples during each round + ; to improve CPU-level parallellization. + ; + ; Load [eax] into mm0 and mm1 + ; Load [ebx] into mm3 + ; unpack words of mm0, mm1 and mm3 into mm0 and mm1 + ; multiply-add mm0*mm6 and mm1*mm7, store results into mm0 and mm1 + ; divide mm0 and mm1 by 512 (=right-shift by overlapDividerBits) + ; pack the result into mm0 and store into [edx] + ; + ; Load [eax+8] into mm2 and mm3 + ; Load [ebx+8] into mm4 + ; unpack words of mm2, mm3 and mm4 into mm2 and mm3 + ; multiply-add mm2*mm6 and mm3*mm7, store results into mm2 and mm3 + ; divide mm2 and mm3 by 512 (=right-shift by overlapDividerBits) + ; pack the result into mm2 and store into [edx+8] + + + movq mm0, qword ptr [eax] ; mm0 = m1l m1r m0l m0r + add edx, 16 + + movq mm3, qword ptr [ebx] ; mm3 = i1l i1r i0l i0r + movq mm1, mm0 ; mm1 = m1l m1r m0l m0r + + movq mm2, qword ptr [eax+8] ; mm2 = m3l m3r m2l m2r + punpcklwd mm0, mm3 ; mm0 = i0l m0l i0r m0r + + movq mm4, qword ptr [ebx+8] ; mm4 = i3l i3r i2l i2r + punpckhwd mm1, mm3 ; mm1 = i1l m1l i1r m1r + + movq mm3, mm2 ; mm3 = m3l m3r m2l m2r + punpcklwd mm2, mm4 ; mm2 = i2l m2l i2r m2r + + pmaddwd mm0, mm6 ; mm0 = i0l*m63+m0l*m62 i0r*m61+m0r*m60 + punpckhwd mm3, mm4 ; mm3 = i3l m3l i3r m3r + + movd mm4, esi ; mm4 = overlapDividerBits + + pmaddwd mm1, mm7 ; mm1 = i1l*m73+m1l*m72 i1r*m71+m1r*m70 + paddw mm6, mm5 + + paddw mm7, mm5 + psrad mm0, mm4 ; mmo >>= overlapDividerBits + + pmaddwd mm2, mm6 ; mm2 = i2l*m63+m2l*m62 i2r*m61+m2r*m60 + psrad mm1, mm4 ; mm1 >>= overlapDividerBits + + pmaddwd mm3, mm7 ; mm3 = i3l*m73+m3l*m72 i3r*m71+m3r*m70 + psrad mm2, mm4 ; mm2 >>= overlapDividerBits + + packssdw mm0, mm1 ; mm0 = mm1h mm1l mm0h mm0l + psrad mm3, mm4 ; mm3 >>= overlapDividerBits + + add eax, 16 + paddw mm6, mm5 + + packssdw mm2, mm3 ; mm2 = mm2h mm2l mm3h mm3l + paddw mm7, mm5 + + movq qword ptr [edx-16], mm0 + add ebx, 16 + + movq qword ptr [edx-8], mm2 + dec ecx + + jnz loop1 + + emms + } +} + + +////////////////////////////////////////////////////////////////////////////// +// +// implementation of MMX optimized functions of class 'FIRFilter' +// +////////////////////////////////////////////////////////////////////////////// + +#include "FIRFilter.h" + + +FIRFilterMMX::FIRFilterMMX() : FIRFilter() +{ + filterCoeffsUnalign = NULL; +} + + +FIRFilterMMX::~FIRFilterMMX() +{ + delete[] filterCoeffsUnalign; +} + + +// (overloaded) Calculates filter coefficients for MMX routine +void FIRFilterMMX::setCoefficients(const short *coeffs, uint newLength, uint uResultDivFactor) +{ + uint i; + FIRFilter::setCoefficients(coeffs, newLength, uResultDivFactor); + + // Ensure that filter coeffs array is aligned to 16-byte boundary + delete[] filterCoeffsUnalign; + filterCoeffsUnalign = new short[2 * newLength + 8]; + filterCoeffsAlign = (short *)(((uint)filterCoeffsUnalign + 15) & -16); + + // rearrange the filter coefficients for mmx routines + for (i = 0;i < length; i += 4) + { + filterCoeffsAlign[2 * i + 0] = coeffs[i + 0]; + filterCoeffsAlign[2 * i + 1] = coeffs[i + 2]; + filterCoeffsAlign[2 * i + 2] = coeffs[i + 0]; + filterCoeffsAlign[2 * i + 3] = coeffs[i + 2]; + + filterCoeffsAlign[2 * i + 4] = coeffs[i + 1]; + filterCoeffsAlign[2 * i + 5] = coeffs[i + 3]; + filterCoeffsAlign[2 * i + 6] = coeffs[i + 1]; + filterCoeffsAlign[2 * i + 7] = coeffs[i + 3]; + } +} + + + +// mmx-optimized version of the filter routine for stereo sound +uint FIRFilterMMX::evaluateFilterStereo(short *dest, const short *src, const uint numSamples) const +{ + // Create stack copies of the needed member variables for asm routines : + uint local_length = length; + uint local_lengthDiv8 = lengthDiv8; + uint local_resultDivider = resultDivFactor; + short *local_filterCoeffs = (short*)filterCoeffsAlign; + + if (local_length < 2) return 0; + + _asm + { + ; Load (num_samples-aa_filter_length)/2 to edi as a i + ; Load a pointer to samples to esi + ; Load a pointer to destination to edx + + mov edi, numSamples + mov esi, dword ptr src + sub edi, local_length + mov edx, dword ptr dest + sar edi, 1 + + ; Load filter length/8 to ecx + ; Load pointer to samples from esi to ebx + ; Load counter from edi to ecx + ; Load [ebx] to mm3 + ; Load pointer to filter coefficients to eax +loop1: + mov ebx, esi + pxor mm0, mm0 + + mov ecx, local_lengthDiv8 + pxor mm7, mm7 + + movq mm1, [ebx] ; mm1 = l1 r1 l0 r0 + mov eax, local_filterCoeffs +loop2: + + movq mm2, [ebx+8] ; mm2 = l3 r3 l2 r2 + movq mm4, mm1 ; mm4 = l1 r1 l0 r0 + + movq mm3, [ebx+16] ; mm3 = l5 r5 l4 r4 + punpckhwd mm1, mm2 ; mm1 = l3 l1 r3 r1 + + movq mm6, mm2 ; mm6 = l3 r3 l2 r2 + punpcklwd mm4, mm2 ; mm4 = l2 l0 r2 r0 + + movq mm2, qword ptr [eax] ; mm2 = f2 f0 f2 f0 + movq mm5, mm1 ; mm5 = l3 l1 r3 r1 + + punpcklwd mm6, mm3 ; mm6 = l4 l2 r4 r2 + pmaddwd mm4, mm2 ; mm4 = l2*f2+l0*f0 r2*f2+r0*f0 + + pmaddwd mm5, mm2 ; mm5 = l3*f2+l1*f0 r3*f2+l1*f0 + movq mm2, qword ptr [eax+8] ; mm2 = f3 f1 f3 f1 + + paddd mm0, mm4 ; mm0 += s02*f02 + movq mm4, mm3 ; mm4 = l1 r1 l0 r0 + + pmaddwd mm1, mm2 ; mm1 = l3*f3+l1*f1 r3*f3+l1*f1 + paddd mm7, mm5 ; mm7 += s13*f02 + + pmaddwd mm6, mm2 ; mm6 = l4*f3+l2*f1 r4*f3+f4*f1 + movq mm2, [ebx+24] ; mm2 = l3 r3 l2 r2 + + paddd mm0, mm1 ; mm0 += s31*f31 + movq mm1, [ebx+32] ; mm1 = l5 r5 l4 r4 + + paddd mm7, mm6 ; mm7 += s42*f31 + punpckhwd mm3, mm2 ; mm3 = l3 l1 r3 r1 + + movq mm6, mm2 ; mm6 = l3 r3 l2 r2 + punpcklwd mm4, mm2 ; mm4 = l2 l0 r2 r0 + + movq mm2, qword ptr [eax+16] ; mm2 = f2 f0 f2 f0 + movq mm5, mm3 ; mm5 = l3 l1 r3 r1 + + punpcklwd mm6, mm1 ; mm6 = l4 l2 r4 r2 + add eax, 32 + + pmaddwd mm4, mm2 ; mm4 = l2*f2+l0*f0 r2*f2+r0*f0 + add ebx, 32 + + pmaddwd mm5, mm2 ; mm5 = l3*f2+l1*f0 r3*f2+l1*f0 + movq mm2, qword ptr [eax-8] ; mm2 = f3 f1 f3 f1 + + paddd mm0, mm4 ; mm0 += s02*f02 + pmaddwd mm3, mm2 ; mm3 = l3*f3+l1*f1 r3*f3+l1*f1 + + paddd mm7, mm5 ; mm7 += s13*f02 + pmaddwd mm6, mm2 ; mm6 = l4*f3+l2*f1 r4*f3+f4*f1 + + paddd mm0, mm3 ; mm0 += s31*f31 + paddd mm7, mm6 ; mm7 += s42*f31 + + dec ecx + jnz loop2 + + ; Divide mm0 and mm7 by 8192 (= right-shift by 13), + ; pack and store to [edx] + movd mm4, local_resultDivider; + + psrad mm0, mm4 ; divider the result + + add edx, 8 + psrad mm7, mm4 ; divider the result + + add esi, 8 + packssdw mm0, mm7 + + movq qword ptr [edx-8], mm0 + dec edi + + jnz loop1 + + emms + } + return (numSamples & 0xfffffffe) - local_length; +} + +#endif // ALLOW_MMX diff --git a/libs/soundtouch/sse_win.cpp b/libs/soundtouch/sse_win.cpp new file mode 100644 index 0000000000..ff3bef128c --- /dev/null +++ b/libs/soundtouch/sse_win.cpp @@ -0,0 +1,367 @@ +//////////////////////////////////////////////////////////////////////////////// +/// +/// Win32 version of the SSE optimized routines for Pentium-III, Athlon-XP and +/// later. All SSE optimized functions have been gathered into this single source +/// code file, regardless to their class or original source code file, in order +/// to ease porting the library to other compiler and processor platforms. +/// +/// NOTICE: If using Visual Studio 6.0, you'll need to install the "Visual C++ +/// 6.0 processor pack" update to support SSE instruction set. The update is +/// available for download at Microsoft Developers Network, see here: +/// http://msdn.microsoft.com/vstudio/downloads/tools/ppack/default.aspx +/// +/// If the above URL is expired or removed, go to "http://msdn.microsoft.com" and +/// perform a search with keywords "processor pack". +/// +/// This file is to be compiled in Windows platform with Microsoft Visual C++ +/// Compiler. Please see 'sse_gcc.cpp' for the gcc compiler version for all +/// GNU platforms (if file supplied). +/// +/// Author : Copyright (c) Olli Parviainen +/// Author e-mail : oparviai @ iki.fi +/// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch +/// +//////////////////////////////////////////////////////////////////////////////// +// +// Last changed : $Date$ +// File revision : $Revision$ +// +// $Id$ +// +//////////////////////////////////////////////////////////////////////////////// +// +// License : +// +// SoundTouch audio processing library +// Copyright (c) Olli Parviainen +// +// This library is free software; you can redistribute it and/or +// modify it under the terms of the GNU Lesser General Public +// License as published by the Free Software Foundation; either +// version 2.1 of the License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, write to the Free Software +// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// +//////////////////////////////////////////////////////////////////////////////// + +#include "cpu_detect.h" +#include "STTypes.h" + +#ifndef WIN32 +#error "wrong platform - this source code file is exclusively for Win32 platform" +#endif + +using namespace soundtouch; + +#ifdef ALLOW_SSE +// SSE routines available only with float sample type + +////////////////////////////////////////////////////////////////////////////// +// +// implementation of SSE optimized functions of class 'TDStretchSSE' +// +////////////////////////////////////////////////////////////////////////////// + +#include "TDStretch.h" +#include + +// these are declared in 'TDStretch.cpp' +extern int scanOffsets[4][24]; + +// Calculates cross correlation of two buffers +double TDStretchSSE::calcCrossCorrStereo(const float *pV1, const float *pV2) const +{ + uint overlapLengthLocal = overlapLength; + float corr; + + /* + double corr; + uint i; + + // Calculates the cross-correlation value between 'pV1' and 'pV2' vectors + corr = 0.0; + for (i = 0; i < overlapLength / 8; i ++) + { + corr += pV1[0] * pV2[0] + + pV1[1] * pV2[1] + + pV1[2] * pV2[2] + + pV1[3] * pV2[3] + + pV1[4] * pV2[4] + + pV1[5] * pV2[5] + + pV1[6] * pV2[6] + + pV1[7] * pV2[7] + + pV1[8] * pV2[8] + + pV1[9] * pV2[9] + + pV1[10] * pV2[10] + + pV1[11] * pV2[11] + + pV1[12] * pV2[12] + + pV1[13] * pV2[13] + + pV1[14] * pV2[14] + + pV1[15] * pV2[15]; + + pV1 += 16; + pV2 += 16; + } + */ + + _asm + { + // Very important note: data in 'pV2' _must_ be aligned to + // 16-byte boundary! + + // give prefetch hints to CPU of what data are to be needed soonish + // give more aggressive hints on pV1 as that changes while pV2 stays + // same between runs + prefetcht0 [pV1] + prefetcht0 [pV2] + prefetcht0 [pV1 + 32] + + mov eax, dword ptr pV1 + mov ebx, dword ptr pV2 + + xorps xmm0, xmm0 + + mov ecx, overlapLengthLocal + shr ecx, 3 // div by eight + + loop1: + prefetcht0 [eax + 64] // give a prefetch hint to CPU what data are to be needed soonish + prefetcht0 [ebx + 32] // give a prefetch hint to CPU what data are to be needed soonish + movups xmm1, [eax] + mulps xmm1, [ebx] + addps xmm0, xmm1 + + movups xmm2, [eax + 16] + mulps xmm2, [ebx + 16] + addps xmm0, xmm2 + + prefetcht0 [eax + 96] // give a prefetch hint to CPU what data are to be needed soonish + prefetcht0 [ebx + 64] // give a prefetch hint to CPU what data are to be needed soonish + + movups xmm3, [eax + 32] + mulps xmm3, [ebx + 32] + addps xmm0, xmm3 + + movups xmm4, [eax + 48] + mulps xmm4, [ebx + 48] + addps xmm0, xmm4 + + add eax, 64 + add ebx, 64 + + dec ecx + jnz loop1 + + // add the four floats of xmm0 together and return the result. + + movhlps xmm1, xmm0 // move 3 & 4 of xmm0 to 1 & 2 of xmm1 + addps xmm1, xmm0 + movaps xmm2, xmm1 + shufps xmm2, xmm2, 0x01 // move 2 of xmm2 as 1 of xmm2 + addss xmm2, xmm1 + movss corr, xmm2 + } + + return (double)corr; +} + + +////////////////////////////////////////////////////////////////////////////// +// +// implementation of SSE optimized functions of class 'FIRFilter' +// +////////////////////////////////////////////////////////////////////////////// + +#include "FIRFilter.h" + +FIRFilterSSE::FIRFilterSSE() : FIRFilter() +{ + filterCoeffsUnalign = NULL; +} + + +FIRFilterSSE::~FIRFilterSSE() +{ + delete[] filterCoeffsUnalign; +} + + +// (overloaded) Calculates filter coefficients for SSE routine +void FIRFilterSSE::setCoefficients(const float *coeffs, uint newLength, uint uResultDivFactor) +{ + uint i; + float fDivider; + + FIRFilter::setCoefficients(coeffs, newLength, uResultDivFactor); + + // Scale the filter coefficients so that it won't be necessary to scale the filtering result + // also rearrange coefficients suitably for 3DNow! + // Ensure that filter coeffs array is aligned to 16-byte boundary + delete[] filterCoeffsUnalign; + filterCoeffsUnalign = new float[2 * newLength + 4]; + filterCoeffsAlign = (float *)(((uint)filterCoeffsUnalign + 15) & -16); + + fDivider = (float)resultDivider; + + // rearrange the filter coefficients for mmx routines + for (i = 0; i < newLength; i ++) + { + filterCoeffsAlign[2 * i + 0] = + filterCoeffsAlign[2 * i + 1] = coeffs[i + 0] / fDivider; + } +} + + + +// SSE-optimized version of the filter routine for stereo sound +uint FIRFilterSSE::evaluateFilterStereo(float *dest, const float *src, const uint numSamples) const +{ + int count = (numSamples - length) & -2; + uint lengthLocal = length / 8; + float *filterCoeffsLocal = filterCoeffsAlign; + + assert(count % 2 == 0); + + if (count < 2) return 0; + + /* + double suml1, suml2; + double sumr1, sumr2; + uint i, j; + + for (j = 0; j < count; j += 2) + { + const float *ptr; + const float *pFil; + + suml1 = sumr1 = 0.0; + suml2 = sumr2 = 0.0; + ptr = src; + pFil = filterCoeffs; + for (i = 0; i < lengthLocal; i ++) + { + // unroll loop for efficiency. + + suml1 += ptr[0] * pFil[0] + + ptr[2] * pFil[2] + + ptr[4] * pFil[4] + + ptr[6] * pFil[6]; + + sumr1 += ptr[1] * pFil[1] + + ptr[3] * pFil[3] + + ptr[5] * pFil[5] + + ptr[7] * pFil[7]; + + suml2 += ptr[8] * pFil[0] + + ptr[10] * pFil[2] + + ptr[12] * pFil[4] + + ptr[14] * pFil[6]; + + sumr2 += ptr[9] * pFil[1] + + ptr[11] * pFil[3] + + ptr[13] * pFil[5] + + ptr[15] * pFil[7]; + + ptr += 16; + pFil += 8; + } + dest[0] = (float)suml1; + dest[1] = (float)sumr1; + dest[2] = (float)suml2; + dest[3] = (float)sumr2; + + src += 4; + dest += 4; + } + */ + + _asm + { + // Very important note: data in 'src' _must_ be aligned to + // 16-byte boundary! + mov edx, count + mov ebx, dword ptr src + mov eax, dword ptr dest + shr edx, 1 + + loop1: + // "outer loop" : during each round 2*2 output samples are calculated + + // give prefetch hints to CPU of what data are to be needed soonish + prefetcht0 [ebx] + prefetcht0 [filterCoeffsLocal] + + mov esi, ebx + mov edi, filterCoeffsLocal + xorps xmm0, xmm0 + xorps xmm1, xmm1 + mov ecx, lengthLocal + + loop2: + // "inner loop" : during each round eight FIR filter taps are evaluated for 2*2 samples + prefetcht0 [esi + 32] // give a prefetch hint to CPU what data are to be needed soonish + prefetcht0 [edi + 32] // give a prefetch hint to CPU what data are to be needed soonish + + movups xmm2, [esi] // possibly unaligned load + movups xmm3, [esi + 8] // possibly unaligned load + mulps xmm2, [edi] + mulps xmm3, [edi] + addps xmm0, xmm2 + addps xmm1, xmm3 + + movups xmm4, [esi + 16] // possibly unaligned load + movups xmm5, [esi + 24] // possibly unaligned load + mulps xmm4, [edi + 16] + mulps xmm5, [edi + 16] + addps xmm0, xmm4 + addps xmm1, xmm5 + + prefetcht0 [esi + 64] // give a prefetch hint to CPU what data are to be needed soonish + prefetcht0 [edi + 64] // give a prefetch hint to CPU what data are to be needed soonish + + movups xmm6, [esi + 32] // possibly unaligned load + movups xmm7, [esi + 40] // possibly unaligned load + mulps xmm6, [edi + 32] + mulps xmm7, [edi + 32] + addps xmm0, xmm6 + addps xmm1, xmm7 + + movups xmm4, [esi + 48] // possibly unaligned load + movups xmm5, [esi + 56] // possibly unaligned load + mulps xmm4, [edi + 48] + mulps xmm5, [edi + 48] + addps xmm0, xmm4 + addps xmm1, xmm5 + + add esi, 64 + add edi, 64 + dec ecx + jnz loop2 + + // Now xmm0 and xmm1 both have a filtered 2-channel sample each, but we still need + // to sum the two hi- and lo-floats of these registers together. + + movhlps xmm2, xmm0 // xmm2 = xmm2_3 xmm2_2 xmm0_3 xmm0_2 + movlhps xmm2, xmm1 // xmm2 = xmm1_1 xmm1_0 xmm0_3 xmm0_2 + shufps xmm0, xmm1, 0xe4 // xmm0 = xmm1_3 xmm1_2 xmm0_1 xmm0_0 + addps xmm0, xmm2 + + movaps [eax], xmm0 + add ebx, 16 + add eax, 16 + + dec edx + jnz loop1 + } + + return (uint)count; +} + +#endif // ALLOW_SSE -- 2.30.2