libs/qm-dsp/dsp/tempotracking/TempoTrackV2.cpp

   1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
   2
   3 /*
   4     QM DSP Library
   5
   6     Centre for Digital Music, Queen Mary, University of London.
   7     This file copyright 2008-2009 Matthew Davies and QMUL.
   8
   9     This program is free software; you can redistribute it and/or
  10     modify it under the terms of the GNU General Public License as
  11     published by the Free Software Foundation; either version 2 of the
  12     License, or (at your option) any later version.  See the file
  13     COPYING included with this distribution for more information.
  14 */
  15
  16 #include "TempoTrackV2.h"
  17
  18 #include <cmath>
  19 #include <cstdlib>
  20 #include <iostream>
  21
  22 #include "maths/MathUtilities.h"
  23
  24 #define   EPS 0.0000008 // just some arbitrary small number
  25
  26 TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
  27     m_rate(rate), m_increment(increment) { }
  28 TempoTrackV2::~TempoTrackV2() { }
  29
  30 void
  31 TempoTrackV2::filter_df(d_vec_t &df)
  32 {
  33     d_vec_t a(3);
  34     d_vec_t b(3);
  35     d_vec_t     lp_df(df.size());
  36
  37     //equivalent in matlab to [b,a] = butter(2,0.4);
  38     a[0] = 1.0000;
  39     a[1] = -0.3695;
  40     a[2] = 0.1958;
  41     b[0] = 0.2066;
  42     b[1] = 0.4131;
  43     b[2] = 0.2066;
  44
  45     double inp1 = 0.;
  46     double inp2 = 0.;
  47     double out1 = 0.;
  48     double out2 = 0.;
  49
  50
  51     // forwards filtering
  52     for (unsigned int i = 0;i < df.size();i++)
  53     {
  54         lp_df[i] =  b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
  55         inp2 = inp1;
  56         inp1 = df[i];
  57         out2 = out1;
  58         out1 = lp_df[i];
  59     }
  60
  61     // copy forwards filtering to df...
  62     // but, time-reversed, ready for backwards filtering
  63     for (unsigned int i = 0;i < df.size();i++)
  64     {
  65         df[i] = lp_df[df.size()-i-1];
  66     }
  67
  68     for (unsigned int i = 0;i < df.size();i++)
  69     {
  70         lp_df[i] = 0.;
  71     }
  72
  73     inp1 = 0.; inp2 = 0.;
  74     out1 = 0.; out2 = 0.;
  75
  76   // backwards filetering on time-reversed df
  77     for (unsigned int i = 0;i < df.size();i++)
  78     {
  79         lp_df[i] =  b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
  80         inp2 = inp1;
  81         inp1 = df[i];
  82         out2 = out1;
  83         out1 = lp_df[i];
  84     }
  85
  86   // write the re-reversed (i.e. forward) version back to df
  87     for (unsigned int i = 0;i < df.size();i++)
  88     {
  89         df[i] = lp_df[df.size()-i-1];
  90     }
  91 }
  92
  93
  94 // MEPD 28/11/12
  95 // This function now allows for a user to specify an inputtempo (in BPM)
  96 // and a flag "constraintempo" which replaces the general rayleigh weighting for periodicities
  97 // with a gaussian which is centered around the input tempo
  98 // Note, if inputtempo = 120 and constraintempo = false, then functionality is
  99 // as it was before
 100 void
 101 TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
 102                                   vector<double> &beat_period,
 103                                   vector<double> &tempi,
 104                                   double inputtempo, bool constraintempo)
 105 {
 106     // to follow matlab.. split into 512 sample frames with a 128 hop size
 107     // calculate the acf,
 108     // then the rcf.. and then stick the rcfs as columns of a matrix
 109     // then call viterbi decoding with weight vector and transition matrix
 110     // and get best path
 111
 112     unsigned int wv_len = 128;
 113
 114     // MEPD 28/11/12
 115     // the default value of inputtempo in the beat tracking plugin is 120
 116     // so if the user specifies a different inputtempo, the rayparam will be updated
 117     // accordingly.
 118     // note: 60*44100/512 is a magic number
 119     // this might (will?) break if a user specifies a different frame rate for the onset detection function
 120     double rayparam = (60*44100/512)/inputtempo;
 121
 122     // these debug statements can be removed.
 123 //    std::cerr << "inputtempo" << inputtempo << std::endl;
 124 //    std::cerr << "rayparam" << rayparam << std::endl;
 125 //    std::cerr << "constraintempo" << constraintempo << std::endl;
 126
 127     // make rayleigh weighting curve
 128     d_vec_t wv(wv_len);
 129
 130     // check whether or not to use rayleigh weighting (if constraintempo is false)
 131     // or use gaussian weighting it (constraintempo is true)
 132     if (constraintempo)
 133     {
 134         for (unsigned int i=0; i<wv.size(); i++)
 135         {
 136             // MEPD 28/11/12
 137             // do a gaussian weighting instead of rayleigh
 138             wv[i] = exp( (-1.*pow((static_cast<double> (i)-rayparam),2.)) / (2.*pow(rayparam/4.,2.)) );
 139         }
 140     }
 141     else
 142     {
 143         for (unsigned int i=0; i<wv.size(); i++)
 144         {
 145             // MEPD 28/11/12
 146             // standard rayleigh weighting over periodicities
 147             wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
 148         }
 149     }
 150
 151     // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
 152     unsigned int winlen = 512;
 153     unsigned int step = 128;
 154
 155     // matrix to store output of comb filter bank, increment column of matrix at each frame
 156     d_mat_t rcfmat;
 157     int col_counter = -1;
 158
 159     // main loop for beat period calculation
 160     for (unsigned int i=0; i+winlen<df.size(); i+=step)
 161     {
 162         // get dfframe
 163         d_vec_t dfframe(winlen);
 164         for (unsigned int k=0; k<winlen; k++)
 165         {
 166             dfframe[k] = df[i+k];
 167         }
 168         // get rcf vector for current frame
 169         d_vec_t rcf(wv_len);
 170         get_rcf(dfframe,wv,rcf);
 171
 172         rcfmat.push_back( d_vec_t() ); // adds a new column
 173         col_counter++;
 174         for (unsigned int j=0; j<rcf.size(); j++)
 175         {
 176             rcfmat[col_counter].push_back( rcf[j] );
 177         }
 178     }
 179
 180     // now call viterbi decoding function
 181     viterbi_decode(rcfmat,wv,beat_period,tempi);
 182 }
 183
 184
 185 void
 186 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
 187 {
 188     // calculate autocorrelation function
 189     // then rcf
 190     // just hard code for now... don't really need separate functions to do this
 191
 192     // make acf
 193
 194     d_vec_t dfframe(dfframe_in);
 195
 196     MathUtilities::adaptiveThreshold(dfframe);
 197
 198     d_vec_t acf(dfframe.size());
 199
 200
 201     for (unsigned int lag=0; lag<dfframe.size(); lag++)
 202     {
 203         double sum = 0.;
 204         double tmp = 0.;
 205
 206         for (unsigned int n=0; n<(dfframe.size()-lag); n++)
 207         {
 208             tmp = dfframe[n] * dfframe[n+lag];
 209             sum += tmp;
 210         }
 211         acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
 212     }
 213
 214     // now apply comb filtering
 215     int numelem = 4;
 216
 217     for (unsigned int i = 2;i < rcf.size();i++) // max beat period
 218     {
 219         for (int a = 1;a <= numelem;a++) // number of comb elements
 220         {
 221             for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
 222             {
 223                 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.);     // calculate value for comb filter row
 224             }
 225         }
 226     }
 227
 228     // apply adaptive threshold to rcf
 229     MathUtilities::adaptiveThreshold(rcf);
 230
 231     double rcfsum =0.;
 232     for (unsigned int i=0; i<rcf.size(); i++)
 233     {
 234         rcf[i] += EPS ;
 235         rcfsum += rcf[i];
 236     }
 237
 238     // normalise rcf to sum to unity
 239     for (unsigned int i=0; i<rcf.size(); i++)
 240     {
 241         rcf[i] /= (rcfsum + EPS);
 242     }
 243 }
 244
 245 void
 246 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
 247 {
 248     // following Kevin Murphy's Viterbi decoding to get best path of
 249     // beat periods through rfcmat
 250
 251     // make transition matrix
 252     d_mat_t tmat;
 253     for (unsigned int i=0;i<wv.size();i++)
 254     {
 255         tmat.push_back ( d_vec_t() ); // adds a new column
 256         for (unsigned int j=0; j<wv.size(); j++)
 257         {
 258             tmat[i].push_back(0.); // fill with zeros initially
 259         }
 260     }
 261
 262     // variance of Gaussians in transition matrix
 263     // formed of Gaussians on diagonal - implies slow tempo change
 264     double sigma = 8.;
 265     // don't want really short beat periods, or really long ones
 266     for (unsigned int i=20;i <wv.size()-20; i++)
 267     {
 268         for (unsigned int j=20; j<wv.size()-20; j++)
 269         {
 270             double mu = static_cast<double>(i);
 271             tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
 272         }
 273     }
 274
 275     // parameters for Viterbi decoding... this part is taken from
 276     // Murphy's matlab
 277
 278     d_mat_t delta;
 279     i_mat_t psi;
 280     for (unsigned int i=0;i <rcfmat.size(); i++)
 281     {
 282         delta.push_back( d_vec_t());
 283         psi.push_back( i_vec_t());
 284         for (unsigned int j=0; j<rcfmat[i].size(); j++)
 285         {
 286             delta[i].push_back(0.); // fill with zeros initially
 287             psi[i].push_back(0); // fill with zeros initially
 288         }
 289     }
 290
 291
 292     unsigned int T = delta.size();
 293
 294     if (T < 2) return; // can't do anything at all meaningful
 295
 296     unsigned int Q = delta[0].size();
 297
 298     // initialize first column of delta
 299     for (unsigned int j=0; j<Q; j++)
 300     {
 301         delta[0][j] = wv[j] * rcfmat[0][j];
 302         psi[0][j] = 0;
 303     }
 304
 305     double deltasum = 0.;
 306     for (unsigned int i=0; i<Q; i++)
 307     {
 308         deltasum += delta[0][i];
 309     }
 310     for (unsigned int i=0; i<Q; i++)
 311     {
 312         delta[0][i] /= (deltasum + EPS);
 313     }
 314
 315
 316     for (unsigned int t=1; t<T; t++)
 317     {
 318         d_vec_t tmp_vec(Q);
 319
 320         for (unsigned int j=0; j<Q; j++)
 321         {
 322             for (unsigned int i=0; i<Q; i++)
 323             {
 324                 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
 325             }
 326
 327             delta[t][j] = get_max_val(tmp_vec);
 328
 329             psi[t][j] = get_max_ind(tmp_vec);
 330
 331             delta[t][j] *= rcfmat[t][j];
 332         }
 333
 334         // normalise current delta column
 335         double deltasum = 0.;
 336         for (unsigned int i=0; i<Q; i++)
 337         {
 338             deltasum += delta[t][i];
 339         }
 340         for (unsigned int i=0; i<Q; i++)
 341         {
 342             delta[t][i] /= (deltasum + EPS);
 343         }
 344     }
 345
 346     i_vec_t bestpath(T);
 347     d_vec_t tmp_vec(Q);
 348     for (unsigned int i=0; i<Q; i++)
 349     {
 350         tmp_vec[i] = delta[T-1][i];
 351     }
 352
 353     // find starting point - best beat period for "last" frame
 354     bestpath[T-1] = get_max_ind(tmp_vec);
 355
 356     // backtrace through index of maximum values in psi
 357     for (unsigned int t=T-2; t>0 ;t--)
 358     {
 359         bestpath[t] = psi[t+1][bestpath[t+1]];
 360     }
 361
 362     // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
 363     bestpath[0] = psi[1][bestpath[1]];
 364
 365     unsigned int lastind = 0;
 366     for (unsigned int i=0; i<T; i++)
 367     {
 368         unsigned int step = 128;
 369         for (unsigned int j=0; j<step; j++)
 370         {
 371             lastind = i*step+j;
 372             beat_period[lastind] = bestpath[i];
 373         }
 374 //        std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl;
 375     }
 376
 377     //fill in the last values...
 378     for (unsigned int i=lastind; i<beat_period.size(); i++)
 379     {
 380         beat_period[i] = beat_period[lastind];
 381     }
 382
 383     for (unsigned int i = 0; i < beat_period.size(); i++)
 384     {
 385         tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
 386     }
 387 }
 388
 389 double
 390 TempoTrackV2::get_max_val(const d_vec_t &df)
 391 {
 392     double maxval = 0.;
 393     for (unsigned int i=0; i<df.size(); i++)
 394     {
 395         if (maxval < df[i])
 396         {
 397             maxval = df[i];
 398         }
 399     }
 400
 401     return maxval;
 402 }
 403
 404 int
 405 TempoTrackV2::get_max_ind(const d_vec_t &df)
 406 {
 407     double maxval = 0.;
 408     int ind = 0;
 409     for (unsigned int i=0; i<df.size(); i++)
 410     {
 411         if (maxval < df[i])
 412         {
 413             maxval = df[i];
 414             ind = i;
 415         }
 416     }
 417
 418     return ind;
 419 }
 420
 421 void
 422 TempoTrackV2::normalise_vec(d_vec_t &df)
 423 {
 424     double sum = 0.;
 425     for (unsigned int i=0; i<df.size(); i++)
 426     {
 427         sum += df[i];
 428     }
 429
 430     for (unsigned int i=0; i<df.size(); i++)
 431     {
 432         df[i]/= (sum + EPS);
 433     }
 434 }
 435
 436 // MEPD 28/11/12
 437 // this function has been updated to allow the "alpha" and "tightness" parameters
 438 // of the dynamic program to be set by the user
 439 // the default value of alpha = 0.9 and tightness = 4
 440 void
 441 TempoTrackV2::calculateBeats(const vector<double> &df,
 442                              const vector<double> &beat_period,
 443                              vector<double> &beats, double alpha, double tightness)
 444 {
 445     if (df.empty() || beat_period.empty()) return;
 446
 447     d_vec_t cumscore(df.size()); // store cumulative score
 448     i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
 449     d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
 450
 451     for (unsigned int i=0; i<df.size(); i++)
 452     {
 453         localscore[i] = df[i];
 454         backlink[i] = -1;
 455     }
 456
 457     //double tightness = 4.;
 458     //double alpha = 0.9;
 459     // MEPD 28/11/12
 460     // debug statements that can be removed.
 461 //    std::cerr << "alpha" << alpha << std::endl;
 462 //    std::cerr << "tightness" << tightness << std::endl;
 463
 464     // main loop
 465     for (unsigned int i=0; i<localscore.size(); i++)
 466     {
 467         int prange_min = -2*beat_period[i];
 468         int prange_max = round(-0.5*beat_period[i]);
 469
 470         // transition range
 471         d_vec_t txwt (prange_max - prange_min + 1);
 472         d_vec_t scorecands (txwt.size());
 473
 474         for (unsigned int j=0;j<txwt.size();j++)
 475         {
 476             double mu = static_cast<double> (beat_period[i]);
 477             txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
 478
 479             // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
 480             // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION:  D_VEC_T SCORECANDS (TXWT.SIZE());
 481
 482             int cscore_ind = i+prange_min+j;
 483             if (cscore_ind >= 0)
 484             {
 485                 scorecands[j] = txwt[j] * cumscore[cscore_ind];
 486             }
 487         }
 488
 489         // find max value and index of maximum value
 490         double vv = get_max_val(scorecands);
 491         int xx = get_max_ind(scorecands);
 492
 493         cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
 494         backlink[i] = i+prange_min+xx;
 495
 496 //        std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
 497     }
 498
 499     // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
 500     d_vec_t tmp_vec;
 501     for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
 502     {
 503         tmp_vec.push_back(cumscore[i]);
 504     }
 505
 506     int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
 507
 508     // can happen if no results obtained earlier (e.g. input too short)
 509     if (startpoint >= (int)backlink.size()) startpoint = backlink.size()-1;
 510
 511     // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
 512     //  BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
 513     i_vec_t ibeats;
 514     ibeats.push_back(startpoint);
 515 //    std::cerr << "startpoint = " << startpoint << std::endl;
 516     while (backlink[ibeats.back()] > 0)
 517     {
 518 //        std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
 519         int b = ibeats.back();
 520         if (backlink[b] == b) break; // shouldn't happen... haha
 521         ibeats.push_back(backlink[b]);
 522     }
 523
 524     // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
 525     for (unsigned int i=0; i<ibeats.size(); i++)
 526     {
 527         beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );
 528     }
 529 }
 530
 531