1 ////////////////////////////////////////////////////////////////////////////////
3 /// Sampled sound tempo changer/time stretch algorithm. Changes the sound tempo
4 /// while maintaining the original pitch by using a time domain WSOLA-like
5 /// method with several performance-increasing tweaks.
7 /// Note : MMX optimized functions reside in a separate, platform-specific
8 /// file, e.g. 'mmx_win.cpp' or 'mmx_gcc.cpp'
10 /// Author : Copyright (c) Olli Parviainen
11 /// Author e-mail : oparviai @ iki.fi
12 /// SoundTouch WWW: http://www.iki.fi/oparviai/soundtouch
14 ////////////////////////////////////////////////////////////////////////////////
16 // Last changed : $Date$
17 // File revision : $Revision$
21 ////////////////////////////////////////////////////////////////////////////////
25 // SoundTouch audio processing library
26 // Copyright (c) Olli Parviainen
28 // This library is free software; you can redistribute it and/or
29 // modify it under the terms of the GNU Lesser General Public
30 // License as published by the Free Software Foundation; either
31 // version 2.1 of the License, or (at your option) any later version.
33 // This library is distributed in the hope that it will be useful,
34 // but WITHOUT ANY WARRANTY; without even the implied warranty of
35 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
36 // Lesser General Public License for more details.
38 // You should have received a copy of the GNU Lesser General Public
39 // License along with this library; if not, write to the Free Software
40 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
42 ////////////////////////////////////////////////////////////////////////////////
52 #include "cpu_detect.h"
53 #include "TDStretch.h"
55 using namespace soundtouch;
58 #define min(a,b) ((a > b) ? b : a)
59 #define max(a,b) ((a < b) ? b : a)
64 /*****************************************************************************
66 * Constant definitions
68 *****************************************************************************/
71 #define MAX_SCAN_DELTA 124
73 // Table for the hierarchical mixing position seeking algorithm
74 int scanOffsets[4][24]={
75 { 124, 186, 248, 310, 372, 434, 496, 558, 620, 682, 744, 806,
76 868, 930, 992, 1054, 1116, 1178, 1240, 1302, 1364, 1426, 1488, 0},
77 {-100, -75, -50, -25, 25, 50, 75, 100, 0, 0, 0, 0,
78 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
79 { -20, -15, -10, -5, 5, 10, 15, 20, 0, 0, 0, 0,
80 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
81 { -4, -3, -2, -1, 1, 2, 3, 4, 0, 0, 0, 0,
82 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}};
84 /*****************************************************************************
86 * Implementation of the class 'TDStretch'
88 *****************************************************************************/
91 TDStretch::TDStretch() : FIFOProcessor(&outputBuffer)
95 bMidBufferDirty = FALSE;
98 pRefMidBufferUnaligned = NULL;
101 setParameters(44100, DEFAULT_SEQUENCE_MS, DEFAULT_SEEKWINDOW_MS, DEFAULT_OVERLAP_MS);
109 TDStretch::~TDStretch()
112 delete[] pRefMidBufferUnaligned;
117 // Calculates the x having the closest 2^x value for the given value
118 static int _getClosest2Power(double value)
120 return (int)(log(value) / log(2.0) + 0.5);
125 // Sets routine control parameters. These control are certain time constants
126 // defining how the sound is stretched to the desired duration.
128 // 'sampleRate' = sample rate of the sound
129 // 'sequenceMS' = one processing sequence length in milliseconds (default = 82 ms)
130 // 'seekwindowMS' = seeking window length for scanning the best overlapping
131 // position (default = 28 ms)
132 // 'overlapMS' = overlapping length (default = 12 ms)
134 void TDStretch::setParameters(uint aSampleRate, uint aSequenceMS,
135 uint aSeekWindowMS, uint aOverlapMS)
137 this->sampleRate = aSampleRate;
138 this->sequenceMs = aSequenceMS;
139 this->seekWindowMs = aSeekWindowMS;
140 this->overlapMs = aOverlapMS;
142 seekLength = (sampleRate * seekWindowMs) / 1000;
143 seekWindowLength = (sampleRate * sequenceMs) / 1000;
145 maxOffset = seekLength;
147 calculateOverlapLength(overlapMs);
149 // set tempo to recalculate 'sampleReq'
156 /// Get routine control parameters, see setParameters() function.
157 /// Any of the parameters to this function can be NULL, in such case corresponding parameter
158 /// value isn't returned.
159 void TDStretch::getParameters(uint *pSampleRate, uint *pSequenceMs, uint *pSeekWindowMs, uint *pOverlapMs)
163 *pSampleRate = sampleRate;
168 *pSequenceMs = sequenceMs;
173 *pSeekWindowMs = seekWindowMs;
178 *pOverlapMs = overlapMs;
183 // Overlaps samples in 'midBuffer' with the samples in 'input'
184 void TDStretch::overlapMono(SAMPLETYPE *output, const SAMPLETYPE *input) const
188 for (i = 0; i < (int)overlapLength ; i ++)
190 itemp = overlapLength - i;
191 output[i] = (input[i] * i + pMidBuffer[i] * itemp ) / overlapLength; // >> overlapDividerBits;
197 void TDStretch::clearMidBuffer()
201 memset(pMidBuffer, 0, 2 * sizeof(SAMPLETYPE) * overlapLength);
202 bMidBufferDirty = FALSE;
207 void TDStretch::clearInput()
214 // Clears the sample buffers
215 void TDStretch::clear()
217 outputBuffer.clear();
224 // Enables/disables the quick position seeking algorithm. Zero to disable, nonzero
226 void TDStretch::enableQuickSeek(BOOL enable)
232 // Returns nonzero if the quick seeking algorithm is enabled.
233 BOOL TDStretch::isQuickSeekEnabled() const
239 // Seeks for the optimal overlap-mixing position.
240 uint TDStretch::seekBestOverlapPosition(const SAMPLETYPE *refPos)
247 return seekBestOverlapPositionStereoQuick(refPos);
251 return seekBestOverlapPositionStereo(refPos);
259 return seekBestOverlapPositionMonoQuick(refPos);
263 return seekBestOverlapPositionMono(refPos);
271 // Overlaps samples in 'midBuffer' with the samples in 'inputBuffer' at position
273 inline void TDStretch::overlap(SAMPLETYPE *output, const SAMPLETYPE *input, uint ovlPos) const
278 overlapStereo(output, input + 2 * ovlPos);
281 overlapMono(output, input + ovlPos);
288 // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
291 // The best position is determined as the position where the two overlapped
292 // sample sequences are 'most alike', in terms of the highest cross-correlation
293 // value over the overlapping period
294 uint TDStretch::seekBestOverlapPositionStereo(const SAMPLETYPE *refPos)
297 LONG_SAMPLETYPE bestCorr, corr;
300 // Slopes the amplitudes of the 'midBuffer' samples
301 precalcCorrReferenceStereo();
306 // Scans for the best correlation value by testing each possible position
307 // over the permitted range.
308 for (i = 0; i < seekLength; i ++)
310 // Calculates correlation value for the mixing position corresponding
312 corr = calcCrossCorrStereo(refPos + 2 * i, pRefMidBuffer);
314 // Checks for the highest correlation value
321 // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
322 clearCrossCorrState();
328 // Seeks for the optimal overlap-mixing position. The 'stereo' version of the
331 // The best position is determined as the position where the two overlapped
332 // sample sequences are 'most alike', in terms of the highest cross-correlation
333 // value over the overlapping period
334 uint TDStretch::seekBestOverlapPositionStereoQuick(const SAMPLETYPE *refPos)
338 LONG_SAMPLETYPE bestCorr, corr;
339 uint scanCount, corrOffset, tempOffset;
341 // Slopes the amplitude of the 'midBuffer' samples
342 precalcCorrReferenceStereo();
349 // Scans for the best correlation value using four-pass hierarchical search.
351 // The look-up table 'scans' has hierarchical position adjusting steps.
352 // In first pass the routine searhes for the highest correlation with
353 // relatively coarse steps, then rescans the neighbourhood of the highest
354 // correlation with better resolution and so on.
355 for (scanCount = 0;scanCount < 4; scanCount ++)
358 while (scanOffsets[scanCount][j])
360 tempOffset = corrOffset + scanOffsets[scanCount][j];
361 if (tempOffset >= seekLength) break;
363 // Calculates correlation value for the mixing position corresponding
365 corr = calcCrossCorrStereo(refPos + 2 * tempOffset, pRefMidBuffer);
367 // Checks for the highest correlation value
371 bestOffs = tempOffset;
375 corrOffset = bestOffs;
377 // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
378 clearCrossCorrState();
385 // Seeks for the optimal overlap-mixing position. The 'mono' version of the
388 // The best position is determined as the position where the two overlapped
389 // sample sequences are 'most alike', in terms of the highest cross-correlation
390 // value over the overlapping period
391 uint TDStretch::seekBestOverlapPositionMono(const SAMPLETYPE *refPos)
394 LONG_SAMPLETYPE bestCorr, corr;
396 const SAMPLETYPE *compare;
398 // Slopes the amplitude of the 'midBuffer' samples
399 precalcCorrReferenceMono();
404 // Scans for the best correlation value by testing each possible position
405 // over the permitted range.
406 for (tempOffset = 0; tempOffset < seekLength; tempOffset ++)
408 compare = refPos + tempOffset;
410 // Calculates correlation value for the mixing position corresponding
412 corr = calcCrossCorrMono(pRefMidBuffer, compare);
414 // Checks for the highest correlation value
418 bestOffs = tempOffset;
421 // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
422 clearCrossCorrState();
428 // Seeks for the optimal overlap-mixing position. The 'mono' version of the
431 // The best position is determined as the position where the two overlapped
432 // sample sequences are 'most alike', in terms of the highest cross-correlation
433 // value over the overlapping period
434 uint TDStretch::seekBestOverlapPositionMonoQuick(const SAMPLETYPE *refPos)
438 LONG_SAMPLETYPE bestCorr, corr;
439 uint scanCount, corrOffset, tempOffset;
441 // Slopes the amplitude of the 'midBuffer' samples
442 precalcCorrReferenceMono();
449 // Scans for the best correlation value using four-pass hierarchical search.
451 // The look-up table 'scans' has hierarchical position adjusting steps.
452 // In first pass the routine searhes for the highest correlation with
453 // relatively coarse steps, then rescans the neighbourhood of the highest
454 // correlation with better resolution and so on.
455 for (scanCount = 0;scanCount < 4; scanCount ++)
458 while (scanOffsets[scanCount][j])
460 tempOffset = corrOffset + scanOffsets[scanCount][j];
461 if (tempOffset >= seekLength) break;
463 // Calculates correlation value for the mixing position corresponding
465 corr = calcCrossCorrMono(refPos + tempOffset, pRefMidBuffer);
467 // Checks for the highest correlation value
471 bestOffs = tempOffset;
475 corrOffset = bestOffs;
477 // clear cross correlation routine state if necessary (is so e.g. in MMX routines).
478 clearCrossCorrState();
484 /// clear cross correlation routine state if necessary
485 void TDStretch::clearCrossCorrState()
487 // default implementation is empty.
491 // Sets new target tempo. Normal tempo = 'SCALE', smaller values represent slower
492 // tempo, larger faster tempo.
493 void TDStretch::setTempo(float newTempo)
499 // Calculate ideal skip length (according to tempo value)
500 nominalSkip = tempo * (seekWindowLength - overlapLength);
502 intskip = (int)(nominalSkip + 0.5f);
504 // Calculate how many samples are needed in the 'inputBuffer' to
505 // process another batch of samples
506 sampleReq = max(intskip + overlapLength, seekWindowLength) + maxOffset;
511 // Sets the number of channels, 1 = mono, 2 = stereo
512 void TDStretch::setChannels(uint numChannels)
514 if (channels == numChannels) return;
515 assert(numChannels == 1 || numChannels == 2);
517 channels = numChannels;
518 inputBuffer.setChannels(channels);
519 outputBuffer.setChannels(channels);
523 // nominal tempo, no need for processing, just pass the samples through
525 void TDStretch::processNominalTempo()
527 assert(tempo == 1.0f);
531 // If there are samples in pMidBuffer waiting for overlapping,
532 // do a single sliding overlapping with them in order to prevent a
533 // clicking distortion in the output sound
534 if (inputBuffer.numSamples() < overlapLength)
536 // wait until we've got overlapLength input samples
539 // Mix the samples in the beginning of 'inputBuffer' with the
540 // samples in 'midBuffer' using sliding overlapping
541 overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), 0);
542 outputBuffer.putSamples(overlapLength);
543 inputBuffer.receiveSamples(overlapLength);
545 // now we've caught the nominal sample flow and may switch to
549 // Simply bypass samples from input to output
550 outputBuffer.moveSamples(inputBuffer);
554 // Processes as many processing frames of the samples 'inputBuffer', store
555 // the result into 'outputBuffer'
556 void TDStretch::processSamples()
558 uint ovlSkip, offset;
563 // tempo not changed from the original, so bypass the processing
564 processNominalTempo();
568 if (bMidBufferDirty == FALSE)
570 // if midBuffer is empty, move the first samples of the input stream
572 if (inputBuffer.numSamples() < overlapLength)
574 // wait until we've got overlapLength samples
577 memcpy(pMidBuffer, inputBuffer.ptrBegin(), channels * overlapLength * sizeof(SAMPLETYPE));
578 inputBuffer.receiveSamples(overlapLength);
579 bMidBufferDirty = TRUE;
582 // Process samples as long as there are enough samples in 'inputBuffer'
583 // to form a processing frame.
584 while (inputBuffer.numSamples() >= sampleReq)
586 // If tempo differs from the normal ('SCALE'), scan for the best overlapping
588 offset = seekBestOverlapPosition(inputBuffer.ptrBegin());
590 // Mix the samples in the 'inputBuffer' at position of 'offset' with the
591 // samples in 'midBuffer' using sliding overlapping
592 // ... first partially overlap with the end of the previous sequence
593 // (that's in 'midBuffer')
594 overlap(outputBuffer.ptrEnd(overlapLength), inputBuffer.ptrBegin(), offset);
595 outputBuffer.putSamples(overlapLength);
597 // ... then copy sequence samples from 'inputBuffer' to output
598 temp = (seekWindowLength - 2 * overlapLength);// & 0xfffffffe;
601 outputBuffer.putSamples(inputBuffer.ptrBegin() + channels * (offset + overlapLength), temp);
604 // Copies the end of the current sequence from 'inputBuffer' to
605 // 'midBuffer' for being mixed with the beginning of the next
606 // processing sequence and so on
607 assert(offset + seekWindowLength <= inputBuffer.numSamples());
608 memcpy(pMidBuffer, inputBuffer.ptrBegin() + channels * (offset + seekWindowLength - overlapLength),
609 channels * sizeof(SAMPLETYPE) * overlapLength);
610 bMidBufferDirty = TRUE;
612 // Remove the processed samples from the input buffer. Update
613 // the difference between integer & nominal skip step to 'skipFract'
614 // in order to prevent the error from accumulating over time.
615 skipFract += nominalSkip; // real skip size
616 ovlSkip = (int)skipFract; // rounded to integer skip
617 skipFract -= ovlSkip; // maintain the fraction part, i.e. real vs. integer skip
618 inputBuffer.receiveSamples(ovlSkip);
623 // Adds 'numsamples' pcs of samples from the 'samples' memory position into
624 // the input of the object.
625 void TDStretch::putSamples(const SAMPLETYPE *samples, uint numSamples)
627 // Add the samples into the input buffer
628 inputBuffer.putSamples(samples, numSamples);
629 // Process the samples in input buffer
635 /// Set new overlap length parameter & reallocate RefMidBuffer if necessary.
636 void TDStretch::acceptNewOverlapLength(uint newOverlapLength)
640 prevOvl = overlapLength;
641 overlapLength = newOverlapLength;
643 if (overlapLength > prevOvl)
646 delete[] pRefMidBufferUnaligned;
648 pMidBuffer = new SAMPLETYPE[overlapLength * 2];
649 bMidBufferDirty = TRUE;
652 pRefMidBufferUnaligned = new SAMPLETYPE[2 * overlapLength + 16 / sizeof(SAMPLETYPE)];
653 // ensure that 'pRefMidBuffer' is aligned to 16 byte boundary for efficiency
654 pRefMidBuffer = (SAMPLETYPE *)((((ulong)pRefMidBufferUnaligned) + 15) & -16);
658 TDStretch * TDStretch::newInstance()
662 uExtensions = detectCPUextensions();
664 // Check if MMX/SSE/3DNow! instruction set extensions supported by CPU
667 // MMX routines available only with integer sample types
668 if (uExtensions & SUPPORT_MMX)
670 return ::new TDStretchMMX;
677 if (uExtensions & SUPPORT_SSE)
680 return ::new TDStretchSSE;
687 if (uExtensions & SUPPORT_3DNOW)
690 return ::new TDStretch3DNow;
693 #endif // ALLOW_3DNOW
696 // ISA optimizations not supported, use plain C version
697 return ::new TDStretch;
702 //////////////////////////////////////////////////////////////////////////////
704 // Integer arithmetics specific algorithm implementations.
706 //////////////////////////////////////////////////////////////////////////////
708 #ifdef INTEGER_SAMPLES
710 // Slopes the amplitude of the 'midBuffer' samples so that cross correlation
711 // is faster to calculate
712 void TDStretch::precalcCorrReferenceStereo()
717 for (i=0 ; i < (int)overlapLength ;i ++)
719 temp = i * (overlapLength - i);
722 temp2 = (pMidBuffer[cnt2] * temp) / slopingDivider;
723 pRefMidBuffer[cnt2] = (short)(temp2);
724 temp2 = (pMidBuffer[cnt2 + 1] * temp) / slopingDivider;
725 pRefMidBuffer[cnt2 + 1] = (short)(temp2);
730 // Slopes the amplitude of the 'midBuffer' samples so that cross correlation
731 // is faster to calculate
732 void TDStretch::precalcCorrReferenceMono()
738 for (i=0 ; i < (int)overlapLength ;i ++)
740 temp = i * (overlapLength - i);
741 temp2 = (pMidBuffer[i] * temp) / slopingDivider;
742 pRefMidBuffer[i] = (short)temp2;
747 // Overlaps samples in 'midBuffer' with the samples in 'input'. The 'Stereo'
748 // version of the routine.
749 void TDStretch::overlapStereo(short *output, const short *input) const
755 for (i = 0; i < (int)overlapLength ; i ++)
757 temp = (short)(overlapLength - i);
759 output[cnt2] = (input[cnt2] * i + pMidBuffer[cnt2] * temp ) / overlapLength;
760 output[cnt2 + 1] = (input[cnt2 + 1] * i + pMidBuffer[cnt2 + 1] * temp ) / overlapLength;
765 /// Calculates overlap period length in samples.
766 /// Integer version rounds overlap length to closest power of 2
767 /// for a divide scaling operation.
768 void TDStretch::calculateOverlapLength(uint overlapMs)
772 overlapDividerBits = _getClosest2Power((sampleRate * overlapMs) / 1000.0);
773 if (overlapDividerBits > 9) overlapDividerBits = 9;
774 if (overlapDividerBits < 4) overlapDividerBits = 4;
775 newOvl = (uint)pow(2, overlapDividerBits);
777 acceptNewOverlapLength(newOvl);
779 // calculate sloping divider so that crosscorrelation operation won't
780 // overflow 32-bit register. Max. sum of the crosscorrelation sum without
781 // divider would be 2^30*(N^3-N)/3, where N = overlap length
782 slopingDivider = (newOvl * newOvl - 1) / 3;
786 long TDStretch::calcCrossCorrMono(const short *mixingPos, const short *compare) const
792 for (i = 1; i < overlapLength; i ++)
794 corr += (mixingPos[i] * compare[i]) >> overlapDividerBits;
801 long TDStretch::calcCrossCorrStereo(const short *mixingPos, const short *compare) const
807 for (i = 2; i < 2 * overlapLength; i += 2)
809 corr += (mixingPos[i] * compare[i] +
810 mixingPos[i + 1] * compare[i + 1]) >> overlapDividerBits;
816 #endif // INTEGER_SAMPLES
818 //////////////////////////////////////////////////////////////////////////////
820 // Floating point arithmetics specific algorithm implementations.
826 // Slopes the amplitude of the 'midBuffer' samples so that cross correlation
827 // is faster to calculate
828 void TDStretch::precalcCorrReferenceStereo()
833 for (i=0 ; i < (int)overlapLength ;i ++)
835 temp = (float)i * (float)(overlapLength - i);
837 pRefMidBuffer[cnt2] = (float)(pMidBuffer[cnt2] * temp);
838 pRefMidBuffer[cnt2 + 1] = (float)(pMidBuffer[cnt2 + 1] * temp);
843 // Slopes the amplitude of the 'midBuffer' samples so that cross correlation
844 // is faster to calculate
845 void TDStretch::precalcCorrReferenceMono()
850 for (i=0 ; i < (int)overlapLength ;i ++)
852 temp = (float)i * (float)(overlapLength - i);
853 pRefMidBuffer[i] = (float)(pMidBuffer[i] * temp);
858 // SSE-optimized version of the function overlapStereo
859 void TDStretch::overlapStereo(float *output, const float *input) const
867 fScale = 1.0f / (float)overlapLength;
869 for (i = 0; i < (int)overlapLength ; i ++)
871 fTemp = (float)(overlapLength - i) * fScale;
872 fi = (float)i * fScale;
874 output[cnt2 + 0] = input[cnt2 + 0] * fi + pMidBuffer[cnt2 + 0] * fTemp;
875 output[cnt2 + 1] = input[cnt2 + 1] * fi + pMidBuffer[cnt2 + 1] * fTemp;
880 /// Calculates overlap period length in samples.
881 void TDStretch::calculateOverlapLength(uint overlapMs)
885 newOvl = (sampleRate * overlapMs) / 1000;
886 if (newOvl < 16) newOvl = 16;
888 acceptNewOverlapLength(newOvl);
893 double TDStretch::calcCrossCorrMono(const float *mixingPos, const float *compare) const
899 for (i = 1; i < overlapLength; i ++)
901 corr += mixingPos[i] * compare[i];
908 double TDStretch::calcCrossCorrStereo(const float *mixingPos, const float *compare) const
914 for (i = 2; i < 2 * overlapLength; i += 2)
916 corr += mixingPos[i] * compare[i] +
917 mixingPos[i + 1] * compare[i + 1];
923 #endif // FLOAT_SAMPLES