libs/vamp-pyin/YinUtil.cpp

   1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
   2
   3 /*
   4     pYIN - A fundamental frequency estimator for monophonic audio
   5     Centre for Digital Music, Queen Mary, University of London.
   6
   7     This program is free software; you can redistribute it and/or
   8     modify it under the terms of the GNU General Public License as
   9     published by the Free Software Foundation; either version 2 of the
  10     License, or (at your option) any later version.  See the file
  11     COPYING included with this distribution for more information.
  12 */
  13
  14 #include "YinUtil.h"
  15
  16 #include <vector>
  17
  18 #include <cstdio>
  19 #include <cmath>
  20 #include <algorithm>
  21
  22 #include <boost/math/distributions.hpp>
  23
  24 void
  25 YinUtil::slowDifference(const double *in, double *yinBuffer, const size_t yinBufferSize)
  26 {
  27     yinBuffer[0] = 0;
  28     double delta ;
  29     int startPoint = 0;
  30     int endPoint = 0;
  31     for (size_t i = 1; i < yinBufferSize; ++i) {
  32         yinBuffer[i] = 0;
  33         startPoint = yinBufferSize/2 - i/2;
  34         endPoint = startPoint + yinBufferSize;
  35         for (int j = startPoint; j < endPoint; ++j) {
  36             delta = in[i+j] - in[j];
  37             yinBuffer[i] += delta * delta;
  38         }
  39     }
  40 }
  41
  42 void
  43 YinUtil::fastDifference(const double *in, double *yinBuffer, const size_t yinBufferSize)
  44 {
  45
  46     // DECLARE AND INITIALISE
  47     // initialisation of most of the arrays here was done in a separate function,
  48     // with all the arrays as members of the class... moved them back here.
  49
  50     size_t frameSize = 2 * yinBufferSize;
  51
  52     double *audioTransformedReal = new double[frameSize];
  53     double *audioTransformedImag = new double[frameSize];
  54     double *nullImag = new double[frameSize];
  55     double *kernel = new double[frameSize];
  56     double *kernelTransformedReal = new double[frameSize];
  57     double *kernelTransformedImag = new double[frameSize];
  58     double *yinStyleACFReal = new double[frameSize];
  59     double *yinStyleACFImag = new double[frameSize];
  60     double *powerTerms = new double[yinBufferSize];
  61
  62     for (size_t j = 0; j < yinBufferSize; ++j)
  63     {
  64         yinBuffer[j] = 0.; // set to zero
  65         powerTerms[j] = 0.; // set to zero
  66     }
  67
  68     for (size_t j = 0; j < frameSize; ++j)
  69     {
  70         nullImag[j] = 0.;
  71         audioTransformedReal[j] = 0.;
  72         audioTransformedImag[j] = 0.;
  73         kernel[j] = 0.;
  74         kernelTransformedReal[j] = 0.;
  75         kernelTransformedImag[j] = 0.;
  76         yinStyleACFReal[j] = 0.;
  77         yinStyleACFImag[j] = 0.;
  78     }
  79
  80     // POWER TERM CALCULATION
  81     // ... for the power terms in equation (7) in the Yin paper
  82     powerTerms[0] = 0.0;
  83     for (size_t j = 0; j < yinBufferSize; ++j) {
  84         powerTerms[0] += in[j] * in[j];
  85     }
  86
  87     // now iteratively calculate all others (saves a few multiplications)
  88     for (size_t tau = 1; tau < yinBufferSize; ++tau) {
  89         powerTerms[tau] = powerTerms[tau-1] - in[tau-1] * in[tau-1] + in[tau+yinBufferSize] * in[tau+yinBufferSize];
  90     }
  91
  92     // YIN-STYLE AUTOCORRELATION via FFT
  93     // 1. data
  94     Vamp::FFT::forward(frameSize, in, nullImag, audioTransformedReal, audioTransformedImag);
  95
  96     // 2. half of the data, disguised as a convolution kernel
  97     for (size_t j = 0; j < yinBufferSize; ++j) {
  98         kernel[j] = in[yinBufferSize-1-j];
  99     }
 100     Vamp::FFT::forward(frameSize, kernel, nullImag, kernelTransformedReal, kernelTransformedImag);
 101
 102     // 3. convolution via complex multiplication -- written into
 103     for (size_t j = 0; j < frameSize; ++j) {
 104         yinStyleACFReal[j] = audioTransformedReal[j]*kernelTransformedReal[j] - audioTransformedImag[j]*kernelTransformedImag[j]; // real
 105         yinStyleACFImag[j] = audioTransformedReal[j]*kernelTransformedImag[j] + audioTransformedImag[j]*kernelTransformedReal[j]; // imaginary
 106     }
 107     Vamp::FFT::inverse(frameSize, yinStyleACFReal, yinStyleACFImag, audioTransformedReal, audioTransformedImag);
 108
 109     // CALCULATION OF difference function
 110     // ... according to (7) in the Yin paper.
 111     for (size_t j = 0; j < yinBufferSize; ++j) {
 112         // taking only the real part
 113         yinBuffer[j] = powerTerms[0] + powerTerms[j] - 2 * audioTransformedReal[j+yinBufferSize-1];
 114     }
 115     delete [] audioTransformedReal;
 116     delete [] audioTransformedImag;
 117     delete [] nullImag;
 118     delete [] kernel;
 119     delete [] kernelTransformedReal;
 120     delete [] kernelTransformedImag;
 121     delete [] yinStyleACFReal;
 122     delete [] yinStyleACFImag;
 123     delete [] powerTerms;
 124 }
 125
 126 void
 127 YinUtil::cumulativeDifference(double *yinBuffer, const size_t yinBufferSize)
 128 {
 129     size_t tau;
 130
 131     yinBuffer[0] = 1;
 132
 133     double runningSum = 0;
 134
 135     for (tau = 1; tau < yinBufferSize; ++tau) {
 136         runningSum += yinBuffer[tau];
 137         if (runningSum == 0)
 138         {
 139             yinBuffer[tau] = 1;
 140         } else {
 141             yinBuffer[tau] *= tau / runningSum;
 142         }
 143     }
 144 }
 145
 146 int
 147 YinUtil::absoluteThreshold(const double *yinBuffer, const size_t yinBufferSize, const double thresh)
 148 {
 149     size_t tau;
 150     size_t minTau = 0;
 151     double minVal = 1000.;
 152
 153     // using Joren Six's "loop construct" from TarsosDSP
 154     tau = 2;
 155     while (tau < yinBufferSize)
 156     {
 157         if (yinBuffer[tau] < thresh)
 158         {
 159             while (tau+1 < yinBufferSize && yinBuffer[tau+1] < yinBuffer[tau])
 160             {
 161                 ++tau;
 162             }
 163             return tau;
 164         } else {
 165             if (yinBuffer[tau] < minVal)
 166             {
 167                 minVal = yinBuffer[tau];
 168                 minTau = tau;
 169             }
 170         }
 171         ++tau;
 172     }
 173     if (minTau > 0)
 174     {
 175         return -minTau;
 176     }
 177     return 0;
 178 }
 179
 180 static float uniformDist[100] = {0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000,0.0100000};
 181 static float betaDist1[100] = {0.028911,0.048656,0.061306,0.068539,0.071703,0.071877,0.069915,0.066489,0.062117,0.057199,0.052034,0.046844,0.041786,0.036971,0.032470,0.028323,0.024549,0.021153,0.018124,0.015446,0.013096,0.011048,0.009275,0.007750,0.006445,0.005336,0.004397,0.003606,0.002945,0.002394,0.001937,0.001560,0.001250,0.000998,0.000792,0.000626,0.000492,0.000385,0.000300,0.000232,0.000179,0.000137,0.000104,0.000079,0.000060,0.000045,0.000033,0.000024,0.000018,0.000013,0.000009,0.000007,0.000005,0.000003,0.000002,0.000002,0.000001,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000};
 182 static float betaDist2[100] = {0.012614,0.022715,0.030646,0.036712,0.041184,0.044301,0.046277,0.047298,0.047528,0.047110,0.046171,0.044817,0.043144,0.041231,0.039147,0.036950,0.034690,0.032406,0.030133,0.027898,0.025722,0.023624,0.021614,0.019704,0.017900,0.016205,0.014621,0.013148,0.011785,0.010530,0.009377,0.008324,0.007366,0.006497,0.005712,0.005005,0.004372,0.003806,0.003302,0.002855,0.002460,0.002112,0.001806,0.001539,0.001307,0.001105,0.000931,0.000781,0.000652,0.000542,0.000449,0.000370,0.000303,0.000247,0.000201,0.000162,0.000130,0.000104,0.000082,0.000065,0.000051,0.000039,0.000030,0.000023,0.000018,0.000013,0.000010,0.000007,0.000005,0.000004,0.000003,0.000002,0.000001,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000};
 183 static float betaDist3[100] = {0.006715,0.012509,0.017463,0.021655,0.025155,0.028031,0.030344,0.032151,0.033506,0.034458,0.035052,0.035331,0.035332,0.035092,0.034643,0.034015,0.033234,0.032327,0.031314,0.030217,0.029054,0.027841,0.026592,0.025322,0.024042,0.022761,0.021489,0.020234,0.019002,0.017799,0.016630,0.015499,0.014409,0.013362,0.012361,0.011407,0.010500,0.009641,0.008830,0.008067,0.007351,0.006681,0.006056,0.005475,0.004936,0.004437,0.003978,0.003555,0.003168,0.002814,0.002492,0.002199,0.001934,0.001695,0.001481,0.001288,0.001116,0.000963,0.000828,0.000708,0.000603,0.000511,0.000431,0.000361,0.000301,0.000250,0.000206,0.000168,0.000137,0.000110,0.000088,0.000070,0.000055,0.000043,0.000033,0.000025,0.000019,0.000014,0.000010,0.000007,0.000005,0.000004,0.000002,0.000002,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000};
 184 static float betaDist4[100] = {0.003996,0.007596,0.010824,0.013703,0.016255,0.018501,0.020460,0.022153,0.023597,0.024809,0.025807,0.026607,0.027223,0.027671,0.027963,0.028114,0.028135,0.028038,0.027834,0.027535,0.027149,0.026687,0.026157,0.025567,0.024926,0.024240,0.023517,0.022763,0.021983,0.021184,0.020371,0.019548,0.018719,0.017890,0.017062,0.016241,0.015428,0.014627,0.013839,0.013068,0.012315,0.011582,0.010870,0.010181,0.009515,0.008874,0.008258,0.007668,0.007103,0.006565,0.006053,0.005567,0.005107,0.004673,0.004264,0.003880,0.003521,0.003185,0.002872,0.002581,0.002312,0.002064,0.001835,0.001626,0.001434,0.001260,0.001102,0.000959,0.000830,0.000715,0.000612,0.000521,0.000440,0.000369,0.000308,0.000254,0.000208,0.000169,0.000136,0.000108,0.000084,0.000065,0.000050,0.000037,0.000027,0.000019,0.000014,0.000009,0.000006,0.000004,0.000002,0.000001,0.000001,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000};
 185 static float single10[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000};
 186 static float single15[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000};
 187 static float single20[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000};
 188
 189 std::vector<double>
 190 YinUtil::yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize, const size_t minTau0, const size_t maxTau0)
 191 {
 192     size_t minTau = 2;
 193     size_t maxTau = yinBufferSize;
 194
 195     // adapt period range, if necessary
 196     if (minTau0 > 0 && minTau0 < maxTau0) minTau = minTau0;
 197     if (maxTau0 > 0 && maxTau0 < yinBufferSize && maxTau0 > minTau) maxTau = maxTau0;
 198
 199     double minWeight = 0.01;
 200     size_t tau;
 201     std::vector<float> thresholds;
 202     std::vector<float> distribution;
 203     std::vector<double> peakProb = std::vector<double>(yinBufferSize);
 204
 205     size_t nThreshold = 100;
 206     int nThresholdInt = nThreshold;
 207
 208     for (int i = 0; i < nThresholdInt; ++i)
 209     {
 210         switch (prior) {
 211             case 0:
 212                 distribution.push_back(uniformDist[i]);
 213                 break;
 214             case 1:
 215                 distribution.push_back(betaDist1[i]);
 216                 break;
 217             case 2:
 218                 distribution.push_back(betaDist2[i]);
 219                 break;
 220             case 3:
 221                 distribution.push_back(betaDist3[i]);
 222                 break;
 223             case 4:
 224                 distribution.push_back(betaDist4[i]);
 225                 break;
 226             case 5:
 227                 distribution.push_back(single10[i]);
 228                 break;
 229             case 6:
 230                 distribution.push_back(single15[i]);
 231                 break;
 232             case 7:
 233                 distribution.push_back(single20[i]);
 234                 break;
 235             default:
 236                 distribution.push_back(uniformDist[i]);
 237         }
 238         thresholds.push_back(0.01 + i*0.01);
 239     }
 240
 241     int currThreshInd = nThreshold-1;
 242     tau = minTau;
 243
 244     // double factor = 1.0 / (0.25 * (nThresholdInt+1) * (nThresholdInt + 1)); // factor to scale down triangular weight
 245     size_t minInd = 0;
 246     float minVal = 42.f;
 247     // while (currThreshInd != -1 && tau < maxTau)
 248     // {
 249     //     if (yinBuffer[tau] < thresholds[currThreshInd])
 250     //     {
 251     //         while (tau + 1 < maxTau && yinBuffer[tau+1] < yinBuffer[tau])
 252     //         {
 253     //             tau++;
 254     //         }
 255     //         // tau is now local minimum
 256     //         // std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl;
 257     //         if (yinBuffer[tau] < minVal && tau > 2){
 258     //             minVal = yinBuffer[tau];
 259     //             minInd = tau;
 260     //         }
 261     //         peakProb[tau] += distribution[currThreshInd];
 262     //         currThreshInd--;
 263     //     } else {
 264     //         tau++;
 265     //     }
 266     // }
 267     // double nonPeakProb = 1;
 268     // for (size_t i = minTau; i < maxTau; ++i)
 269     // {
 270     //     nonPeakProb -= peakProb[i];
 271     // }
 272     //
 273     // std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl;
 274     float sumProb = 0;
 275     while (tau+1 < maxTau)
 276     {
 277         if (yinBuffer[tau] < thresholds[thresholds.size()-1] && yinBuffer[tau+1] < yinBuffer[tau])
 278         {
 279             while (tau + 1 < maxTau && yinBuffer[tau+1] < yinBuffer[tau])
 280             {
 281                 tau++;
 282             }
 283             // tau is now local minimum
 284             // std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl;
 285             if (yinBuffer[tau] < minVal && tau > 2){
 286                 minVal = yinBuffer[tau];
 287                 minInd = tau;
 288             }
 289             currThreshInd = nThresholdInt-1;
 290             while (thresholds[currThreshInd] > yinBuffer[tau] && currThreshInd > -1) {
 291                 // std::cerr << distribution[currThreshInd] << std::endl;
 292                 peakProb[tau] += distribution[currThreshInd];
 293                 currThreshInd--;
 294             }
 295             // peakProb[tau] = 1 - yinBuffer[tau];
 296             sumProb += peakProb[tau];
 297             tau++;
 298         } else {
 299             tau++;
 300         }
 301     }
 302
 303     if (peakProb[minInd] > 1) {
 304         std::cerr << "WARNING: yin has prob > 1 ??? I'm returning all zeros instead." << std::endl;
 305         return(std::vector<double>(yinBufferSize));
 306     }
 307
 308     double nonPeakProb = 1;
 309     if (sumProb > 0) {
 310         for (size_t i = minTau; i < maxTau; ++i)
 311         {
 312             peakProb[i] = peakProb[i] / sumProb * peakProb[minInd];
 313             nonPeakProb -= peakProb[i];
 314         }
 315     }
 316     if (minInd > 0)
 317     {
 318         // std::cerr << "min set " << minVal << " " << minInd << " " << nonPeakProb << std::endl;
 319         peakProb[minInd] += nonPeakProb * minWeight;
 320     }
 321
 322     return peakProb;
 323 }
 324
 325 double
 326 YinUtil::parabolicInterpolation(const double *yinBuffer, const size_t tau, const size_t yinBufferSize)
 327 {
 328     // this is taken almost literally from Joren Six's Java implementation
 329     if (tau == yinBufferSize) // not valid anyway.
 330     {
 331         return static_cast<double>(tau);
 332     }
 333
 334     double betterTau = 0.0;
 335     if (tau > 0 && tau < yinBufferSize-1) {
 336         float s0, s1, s2;
 337         s0 = yinBuffer[tau-1];
 338         s1 = yinBuffer[tau];
 339         s2 = yinBuffer[tau+1];
 340
 341         double adjustment = (s2 - s0) / (2 * (2 * s1 - s2 - s0));
 342
 343         if (abs(adjustment)>1) adjustment = 0;
 344
 345         betterTau = tau + adjustment;
 346     } else {
 347         // std::cerr << "WARNING: can't do interpolation at the edge (tau = " << tau << "), will return un-interpolated value.\n";
 348         betterTau = tau;
 349     }
 350     return betterTau;
 351 }
 352
 353 double
 354 YinUtil::sumSquare(const double *in, const size_t start, const size_t end)
 355 {
 356     double out = 0;
 357     for (size_t i = start; i < end; ++i)
 358     {
 359         out += in[i] * in[i];
 360     }
 361     return out;
 362 }