1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
6 Centre for Digital Music, Queen Mary, University of London.
7 This file copyright 2008-2009 Matthew Davies and QMUL.
9 This program is free software; you can redistribute it and/or
10 modify it under the terms of the GNU General Public License as
11 published by the Free Software Foundation; either version 2 of the
12 License, or (at your option) any later version. See the file
13 COPYING included with this distribution for more information.
16 #include "TempoTrackV2.h"
22 #include "maths/MathUtilities.h"
24 #define EPS 0.0000008 // just some arbitrary small number
26 TempoTrackV2::TempoTrackV2(float rate, size_t increment) :
27 m_rate(rate), m_increment(increment) { }
28 TempoTrackV2::~TempoTrackV2() { }
31 TempoTrackV2::filter_df(d_vec_t &df)
35 d_vec_t lp_df(df.size());
37 //equivalent in matlab to [b,a] = butter(2,0.4);
52 for (unsigned int i = 0;i < df.size();i++)
54 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
61 // copy forwards filtering to df...
62 // but, time-reversed, ready for backwards filtering
63 for (unsigned int i = 0;i < df.size();i++)
65 df[i] = lp_df[df.size()-i-1];
68 for (unsigned int i = 0;i < df.size();i++)
76 // backwards filetering on time-reversed df
77 for (unsigned int i = 0;i < df.size();i++)
79 lp_df[i] = b[0]*df[i] + b[1]*inp1 + b[2]*inp2 - a[1]*out1 - a[2]*out2;
86 // write the re-reversed (i.e. forward) version back to df
87 for (unsigned int i = 0;i < df.size();i++)
89 df[i] = lp_df[df.size()-i-1];
95 TempoTrackV2::calculateBeatPeriod(const vector<double> &df,
96 vector<double> &beat_period,
97 vector<double> &tempi)
99 // to follow matlab.. split into 512 sample frames with a 128 hop size
100 // calculate the acf,
101 // then the rcf.. and then stick the rcfs as columns of a matrix
102 // then call viterbi decoding with weight vector and transition matrix
105 unsigned int wv_len = 128;
106 double rayparam = 43.;
108 // make rayleigh weighting curve
110 for (unsigned int i=0; i<wv.size(); i++)
112 wv[i] = (static_cast<double> (i) / pow(rayparam,2.)) * exp((-1.*pow(-static_cast<double> (i),2.)) / (2.*pow(rayparam,2.)));
115 // beat tracking frame size (roughly 6 seconds) and hop (1.5 seconds)
116 unsigned int winlen = 512;
117 unsigned int step = 128;
119 // matrix to store output of comb filter bank, increment column of matrix at each frame
121 int col_counter = -1;
123 // main loop for beat period calculation
124 for (unsigned int i=0; i+winlen<df.size(); i+=step)
127 d_vec_t dfframe(winlen);
128 for (unsigned int k=0; k<winlen; k++)
130 dfframe[k] = df[i+k];
132 // get rcf vector for current frame
134 get_rcf(dfframe,wv,rcf);
136 rcfmat.push_back( d_vec_t() ); // adds a new column
138 for (unsigned int j=0; j<rcf.size(); j++)
140 rcfmat[col_counter].push_back( rcf[j] );
144 // now call viterbi decoding function
145 viterbi_decode(rcfmat,wv,beat_period,tempi);
150 TempoTrackV2::get_rcf(const d_vec_t &dfframe_in, const d_vec_t &wv, d_vec_t &rcf)
152 // calculate autocorrelation function
154 // just hard code for now... don't really need separate functions to do this
158 d_vec_t dfframe(dfframe_in);
160 MathUtilities::adaptiveThreshold(dfframe);
162 d_vec_t acf(dfframe.size());
165 for (unsigned int lag=0; lag<dfframe.size(); lag++)
170 for (unsigned int n=0; n<(dfframe.size()-lag); n++)
172 tmp = dfframe[n] * dfframe[n+lag];
175 acf[lag] = static_cast<double> (sum/ (dfframe.size()-lag));
178 // now apply comb filtering
181 for (unsigned int i = 2;i < rcf.size();i++) // max beat period
183 for (int a = 1;a <= numelem;a++) // number of comb elements
185 for (int b = 1-a;b <= a-1;b++) // general state using normalisation of comb elements
187 rcf[i-1] += ( acf[(a*i+b)-1]*wv[i-1] ) / (2.*a-1.); // calculate value for comb filter row
192 // apply adaptive threshold to rcf
193 MathUtilities::adaptiveThreshold(rcf);
196 for (unsigned int i=0; i<rcf.size(); i++)
202 // normalise rcf to sum to unity
203 for (unsigned int i=0; i<rcf.size(); i++)
205 rcf[i] /= (rcfsum + EPS);
210 TempoTrackV2::viterbi_decode(const d_mat_t &rcfmat, const d_vec_t &wv, d_vec_t &beat_period, d_vec_t &tempi)
212 // following Kevin Murphy's Viterbi decoding to get best path of
213 // beat periods through rfcmat
215 // make transition matrix
217 for (unsigned int i=0;i<wv.size();i++)
219 tmat.push_back ( d_vec_t() ); // adds a new column
220 for (unsigned int j=0; j<wv.size(); j++)
222 tmat[i].push_back(0.); // fill with zeros initially
226 // variance of Gaussians in transition matrix
227 // formed of Gaussians on diagonal - implies slow tempo change
229 // don't want really short beat periods, or really long ones
230 for (unsigned int i=20;i <wv.size()-20; i++)
232 for (unsigned int j=20; j<wv.size()-20; j++)
234 double mu = static_cast<double>(i);
235 tmat[i][j] = exp( (-1.*pow((j-mu),2.)) / (2.*pow(sigma,2.)) );
239 // parameters for Viterbi decoding... this part is taken from
244 for (unsigned int i=0;i <rcfmat.size(); i++)
246 delta.push_back( d_vec_t());
247 psi.push_back( i_vec_t());
248 for (unsigned int j=0; j<rcfmat[i].size(); j++)
250 delta[i].push_back(0.); // fill with zeros initially
251 psi[i].push_back(0); // fill with zeros initially
256 unsigned int T = delta.size();
258 if (T < 2) return; // can't do anything at all meaningful
260 unsigned int Q = delta[0].size();
262 // initialize first column of delta
263 for (unsigned int j=0; j<Q; j++)
265 delta[0][j] = wv[j] * rcfmat[0][j];
269 double deltasum = 0.;
270 for (unsigned int i=0; i<Q; i++)
272 deltasum += delta[0][i];
274 for (unsigned int i=0; i<Q; i++)
276 delta[0][i] /= (deltasum + EPS);
280 for (unsigned int t=1; t<T; t++)
284 for (unsigned int j=0; j<Q; j++)
286 for (unsigned int i=0; i<Q; i++)
288 tmp_vec[i] = delta[t-1][i] * tmat[j][i];
291 delta[t][j] = get_max_val(tmp_vec);
293 psi[t][j] = get_max_ind(tmp_vec);
295 delta[t][j] *= rcfmat[t][j];
298 // normalise current delta column
299 double deltasum = 0.;
300 for (unsigned int i=0; i<Q; i++)
302 deltasum += delta[t][i];
304 for (unsigned int i=0; i<Q; i++)
306 delta[t][i] /= (deltasum + EPS);
312 for (unsigned int i=0; i<Q; i++)
314 tmp_vec[i] = delta[T-1][i];
317 // find starting point - best beat period for "last" frame
318 bestpath[T-1] = get_max_ind(tmp_vec);
320 // backtrace through index of maximum values in psi
321 for (unsigned int t=T-2; t>0 ;t--)
323 bestpath[t] = psi[t+1][bestpath[t+1]];
326 // weird but necessary hack -- couldn't get above loop to terminate at t >= 0
327 bestpath[0] = psi[1][bestpath[1]];
329 unsigned int lastind = 0;
330 for (unsigned int i=0; i<T; i++)
332 unsigned int step = 128;
333 for (unsigned int j=0; j<step; j++)
336 beat_period[lastind] = bestpath[i];
338 // std::cerr << "bestpath[" << i << "] = " << bestpath[i] << " (used for beat_periods " << i*step << " to " << i*step+step-1 << ")" << std::endl;
341 //fill in the last values...
342 for (unsigned int i=lastind; i<beat_period.size(); i++)
344 beat_period[i] = beat_period[lastind];
347 for (unsigned int i = 0; i < beat_period.size(); i++)
349 tempi.push_back((60. * m_rate / m_increment)/beat_period[i]);
354 TempoTrackV2::get_max_val(const d_vec_t &df)
357 for (unsigned int i=0; i<df.size(); i++)
369 TempoTrackV2::get_max_ind(const d_vec_t &df)
373 for (unsigned int i=0; i<df.size(); i++)
386 TempoTrackV2::normalise_vec(d_vec_t &df)
389 for (unsigned int i=0; i<df.size(); i++)
394 for (unsigned int i=0; i<df.size(); i++)
401 TempoTrackV2::calculateBeats(const vector<double> &df,
402 const vector<double> &beat_period,
403 vector<double> &beats)
405 if (df.empty() || beat_period.empty()) return;
407 d_vec_t cumscore(df.size()); // store cumulative score
408 i_vec_t backlink(df.size()); // backlink (stores best beat locations at each time instant)
409 d_vec_t localscore(df.size()); // localscore, for now this is the same as the detection function
411 for (unsigned int i=0; i<df.size(); i++)
413 localscore[i] = df[i];
417 double tightness = 4.;
421 for (unsigned int i=0; i<localscore.size(); i++)
423 int prange_min = -2*beat_period[i];
424 int prange_max = round(-0.5*beat_period[i]);
427 d_vec_t txwt (prange_max - prange_min + 1);
428 d_vec_t scorecands (txwt.size());
430 for (unsigned int j=0;j<txwt.size();j++)
432 double mu = static_cast<double> (beat_period[i]);
433 txwt[j] = exp( -0.5*pow(tightness * log((round(2*mu)-j)/mu),2));
435 // IF IN THE ALLOWED RANGE, THEN LOOK AT CUMSCORE[I+PRANGE_MIN+J
436 // ELSE LEAVE AT DEFAULT VALUE FROM INITIALISATION: D_VEC_T SCORECANDS (TXWT.SIZE());
438 int cscore_ind = i+prange_min+j;
441 scorecands[j] = txwt[j] * cumscore[cscore_ind];
445 // find max value and index of maximum value
446 double vv = get_max_val(scorecands);
447 int xx = get_max_ind(scorecands);
449 cumscore[i] = alpha*vv + (1.-alpha)*localscore[i];
450 backlink[i] = i+prange_min+xx;
452 // std::cerr << "backlink[" << i << "] <= " << backlink[i] << std::endl;
455 // STARTING POINT, I.E. LAST BEAT.. PICK A STRONG POINT IN cumscore VECTOR
457 for (unsigned int i=cumscore.size() - beat_period[beat_period.size()-1] ; i<cumscore.size(); i++)
459 tmp_vec.push_back(cumscore[i]);
462 int startpoint = get_max_ind(tmp_vec) + cumscore.size() - beat_period[beat_period.size()-1] ;
464 // can happen if no results obtained earlier (e.g. input too short)
465 if (startpoint >= backlink.size()) startpoint = backlink.size()-1;
467 // USE BACKLINK TO GET EACH NEW BEAT (TOWARDS THE BEGINNING OF THE FILE)
468 // BACKTRACKING FROM THE END TO THE BEGINNING.. MAKING SURE NOT TO GO BEFORE SAMPLE 0
470 ibeats.push_back(startpoint);
471 // std::cerr << "startpoint = " << startpoint << std::endl;
472 while (backlink[ibeats.back()] > 0)
474 // std::cerr << "backlink[" << ibeats.back() << "] = " << backlink[ibeats.back()] << std::endl;
475 int b = ibeats.back();
476 if (backlink[b] == b) break; // shouldn't happen... haha
477 ibeats.push_back(backlink[b]);
480 // REVERSE SEQUENCE OF IBEATS AND STORE AS BEATS
481 for (unsigned int i=0; i<ibeats.size(); i++)
483 beats.push_back( static_cast<double>(ibeats[ibeats.size()-i-1]) );