libs/vamp-plugins/BeatTrack.cpp

   1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
   2
   3 /*
   4     QM Vamp Plugin Set
   5
   6     Centre for Digital Music, Queen Mary, University of London.
   7
   8     This program is free software; you can redistribute it and/or
   9     modify it under the terms of the GNU General Public License as
  10     published by the Free Software Foundation; either version 2 of the
  11     License, or (at your option) any later version.  See the file
  12     COPYING included with this distribution for more information.
  13 */
  14
  15 #include "BeatTrack.h"
  16
  17 #include <dsp/onsets/DetectionFunction.h>
  18 #include <dsp/onsets/PeakPicking.h>
  19 #include <dsp/tempotracking/TempoTrack.h>
  20 #include <dsp/tempotracking/TempoTrackV2.h>
  21
  22 using std::string;
  23 using std::vector;
  24 using std::cerr;
  25 using std::endl;
  26
  27 float BeatTracker::m_stepSecs = 0.01161; // 512 samples at 44100
  28
  29 #define METHOD_OLD 0
  30 #define METHOD_NEW 1
  31
  32 class BeatTrackerData
  33 {
  34 public:
  35     BeatTrackerData(const DFConfig &config) : dfConfig(config) {
  36     df = new DetectionFunction(config);
  37     }
  38     ~BeatTrackerData() {
  39     delete df;
  40     }
  41     void reset() {
  42     delete df;
  43     df = new DetectionFunction(dfConfig);
  44     dfOutput.clear();
  45         origin = Vamp::RealTime::zeroTime;
  46     }
  47
  48     DFConfig dfConfig;
  49     DetectionFunction *df;
  50     vector<double> dfOutput;
  51     Vamp::RealTime origin;
  52 };
  53
  54
  55 BeatTracker::BeatTracker(float inputSampleRate) :
  56     Vamp::Plugin(inputSampleRate),
  57     m_d(0),
  58     m_method(METHOD_NEW),
  59     m_dfType(DF_COMPLEXSD),
  60     m_alpha(0.9),                       // MEPD new exposed parameter for beat tracker, default value = 0.9 (as old version)
  61     m_tightness(4.),
  62     m_inputtempo(120.),         // MEPD new exposed parameter for beat tracker, default value = 120. (as old version)
  63     m_constraintempo(false), // MEPD new exposed parameter for beat tracker, default value = false (as old version)
  64     // calling the beat tracker with these default parameters will give the same output as the previous existing version
  65     m_whiten(false)
  66
  67 {
  68 }
  69
  70 BeatTracker::~BeatTracker()
  71 {
  72     delete m_d;
  73 }
  74
  75 string
  76 BeatTracker::getIdentifier() const
  77 {
  78     return "qm-tempotracker";
  79 }
  80
  81 string
  82 BeatTracker::getName() const
  83 {
  84     return "Tempo and Beat Tracker";
  85 }
  86
  87 string
  88 BeatTracker::getDescription() const
  89 {
  90     return "Estimate beat locations and tempo";
  91 }
  92
  93 string
  94 BeatTracker::getMaker() const
  95 {
  96     return "Queen Mary, University of London";
  97 }
  98
  99 int
 100 BeatTracker::getPluginVersion() const
 101 {
 102     return 6;
 103 }
 104
 105 string
 106 BeatTracker::getCopyright() const
 107 {
 108     return "Plugin by Christian Landone and Matthew Davies.  Copyright (c) 2006-2013 QMUL - All Rights Reserved";
 109 }
 110
 111 BeatTracker::ParameterList
 112 BeatTracker::getParameterDescriptors() const
 113 {
 114     ParameterList list;
 115
 116     ParameterDescriptor desc;
 117
 118     desc.identifier = "method";
 119     desc.name = "Beat Tracking Method";
 120     desc.description = "Basic method to use ";
 121     desc.minValue = 0;
 122     desc.maxValue = 1;
 123     desc.defaultValue = METHOD_NEW;
 124     desc.isQuantized = true;
 125     desc.quantizeStep = 1;
 126     desc.valueNames.push_back("Old");
 127     desc.valueNames.push_back("New");
 128     list.push_back(desc);
 129
 130     desc.identifier = "dftype";
 131     desc.name = "Onset Detection Function Type";
 132     desc.description = "Method used to calculate the onset detection function";
 133     desc.minValue = 0;
 134     desc.maxValue = 4;
 135     desc.defaultValue = 3;
 136     desc.valueNames.clear();
 137     desc.valueNames.push_back("High-Frequency Content");
 138     desc.valueNames.push_back("Spectral Difference");
 139     desc.valueNames.push_back("Phase Deviation");
 140     desc.valueNames.push_back("Complex Domain");
 141     desc.valueNames.push_back("Broadband Energy Rise");
 142     list.push_back(desc);
 143
 144     desc.identifier = "whiten";
 145     desc.name = "Adaptive Whitening";
 146     desc.description = "Normalize frequency bin magnitudes relative to recent peak levels";
 147     desc.minValue = 0;
 148     desc.maxValue = 1;
 149     desc.defaultValue = 0;
 150     desc.isQuantized = true;
 151     desc.quantizeStep = 1;
 152     desc.unit = "";
 153     desc.valueNames.clear();
 154     list.push_back(desc);
 155
 156     // MEPD new exposed parameter - used in the dynamic programming part of the beat tracker
 157     //Alpha Parameter of Beat Tracker
 158     desc.identifier = "alpha";
 159     desc.name = "Alpha";
 160     desc.description = "Inertia - Flexibility Trade Off";
 161     desc.minValue =  0.1;
 162     desc.maxValue = 0.99;
 163     desc.defaultValue = 0.90;
 164     desc.unit = "";
 165     desc.isQuantized = false;
 166     list.push_back(desc);
 167
 168     // We aren't exposing tightness as a parameter, it's fixed at 4
 169
 170     // MEPD new exposed parameter - used in the periodicity estimation
 171     //User input tempo
 172     desc.identifier = "inputtempo";
 173     desc.name = "Tempo Hint";
 174     desc.description = "User-defined tempo on which to centre the tempo preference function";
 175     desc.minValue =  50;
 176     desc.maxValue = 250;
 177     desc.defaultValue = 120;
 178     desc.unit = "BPM";
 179     desc.isQuantized = true;
 180     list.push_back(desc);
 181
 182     // MEPD new exposed parameter - used in periodicity estimation
 183     desc.identifier = "constraintempo";
 184     desc.name = "Constrain Tempo";
 185     desc.description = "Constrain more tightly around the tempo hint, using a Gaussian weighting instead of Rayleigh";
 186     desc.minValue = 0;
 187     desc.maxValue = 1;
 188     desc.defaultValue = 0;
 189     desc.isQuantized = true;
 190     desc.quantizeStep = 1;
 191     desc.unit = "";
 192     desc.valueNames.clear();
 193     list.push_back(desc);
 194
 195
 196
 197     return list;
 198 }
 199
 200 float
 201 BeatTracker::getParameter(std::string name) const
 202 {
 203     if (name == "dftype") {
 204         switch (m_dfType) {
 205         case DF_HFC: return 0;
 206         case DF_SPECDIFF: return 1;
 207         case DF_PHASEDEV: return 2;
 208         default: case DF_COMPLEXSD: return 3;
 209         case DF_BROADBAND: return 4;
 210         }
 211     } else if (name == "method") {
 212         return m_method;
 213     } else if (name == "whiten") {
 214         return m_whiten ? 1.0 : 0.0;
 215     } else if (name == "alpha") {
 216         return m_alpha;
 217     }  else if (name == "inputtempo") {
 218         return m_inputtempo;
 219     }  else if (name == "constraintempo") {
 220         return m_constraintempo ? 1.0 : 0.0;
 221     }
 222     return 0.0;
 223 }
 224
 225 void
 226 BeatTracker::setParameter(std::string name, float value)
 227 {
 228     if (name == "dftype") {
 229         switch (lrintf(value)) {
 230         case 0: m_dfType = DF_HFC; break;
 231         case 1: m_dfType = DF_SPECDIFF; break;
 232         case 2: m_dfType = DF_PHASEDEV; break;
 233         default: case 3: m_dfType = DF_COMPLEXSD; break;
 234         case 4: m_dfType = DF_BROADBAND; break;
 235         }
 236     } else if (name == "method") {
 237         m_method = lrintf(value);
 238     } else if (name == "whiten") {
 239         m_whiten = (value > 0.5);
 240     } else if (name == "alpha") {
 241         m_alpha = value;
 242     } else if (name == "inputtempo") {
 243         m_inputtempo = value;
 244     } else if (name == "constraintempo") {
 245         m_constraintempo = (value > 0.5);
 246     }
 247 }
 248
 249 bool
 250 BeatTracker::initialise(size_t channels, size_t stepSize, size_t blockSize)
 251 {
 252     if (m_d) {
 253     delete m_d;
 254     m_d = 0;
 255     }
 256
 257     if (channels < getMinChannelCount() ||
 258     channels > getMaxChannelCount()) {
 259         std::cerr << "BeatTracker::initialise: Unsupported channel count: "
 260                   << channels << std::endl;
 261         return false;
 262     }
 263
 264     if (stepSize != getPreferredStepSize()) {
 265         std::cerr << "ERROR: BeatTracker::initialise: Unsupported step size for this sample rate: "
 266                   << stepSize << " (wanted " << (getPreferredStepSize()) << ")" << std::endl;
 267         return false;
 268     }
 269
 270     if (blockSize != getPreferredBlockSize()) {
 271         std::cerr << "WARNING: BeatTracker::initialise: Sub-optimal block size for this sample rate: "
 272                   << blockSize << " (wanted " << getPreferredBlockSize() << ")" << std::endl;
 273 //        return false;
 274     }
 275
 276     DFConfig dfConfig;
 277     dfConfig.DFType = m_dfType;
 278     dfConfig.stepSize = stepSize;
 279     dfConfig.frameLength = blockSize;
 280     dfConfig.dbRise = 3;
 281     dfConfig.adaptiveWhitening = m_whiten;
 282     dfConfig.whiteningRelaxCoeff = -1;
 283     dfConfig.whiteningFloor = -1;
 284
 285     m_d = new BeatTrackerData(dfConfig);
 286     return true;
 287 }
 288
 289 void
 290 BeatTracker::reset()
 291 {
 292     if (m_d) m_d->reset();
 293 }
 294
 295 size_t
 296 BeatTracker::getPreferredStepSize() const
 297 {
 298     size_t step = size_t(m_inputSampleRate * m_stepSecs + 0.0001);
 299 //    std::cerr << "BeatTracker::getPreferredStepSize: input sample rate is " << m_inputSampleRate << ", step size is " << step << std::endl;
 300     return step;
 301 }
 302
 303 size_t
 304 BeatTracker::getPreferredBlockSize() const
 305 {
 306     size_t theoretical = getPreferredStepSize() * 2;
 307
 308     // I think this is not necessarily going to be a power of two, and
 309     // the host might have a problem with that, but I'm not sure we
 310     // can do much about it here
 311     return theoretical;
 312 }
 313
 314 BeatTracker::OutputList
 315 BeatTracker::getOutputDescriptors() const
 316 {
 317     OutputList list;
 318
 319     OutputDescriptor beat;
 320     beat.identifier = "beats";
 321     beat.name = "Beats";
 322     beat.description = "Estimated metrical beat locations";
 323     beat.unit = "";
 324     beat.hasFixedBinCount = true;
 325     beat.binCount = 0;
 326     beat.sampleType = OutputDescriptor::VariableSampleRate;
 327     beat.sampleRate = 1.0 / m_stepSecs;
 328
 329     OutputDescriptor df;
 330     df.identifier = "detection_fn";
 331     df.name = "Onset Detection Function";
 332     df.description = "Probability function of note onset likelihood";
 333     df.unit = "";
 334     df.hasFixedBinCount = true;
 335     df.binCount = 1;
 336     df.hasKnownExtents = false;
 337     df.isQuantized = false;
 338     df.sampleType = OutputDescriptor::OneSamplePerStep;
 339
 340     OutputDescriptor tempo;
 341     tempo.identifier = "tempo";
 342     tempo.name = "Tempo";
 343     tempo.description = "Locked tempo estimates";
 344     tempo.unit = "bpm";
 345     tempo.hasFixedBinCount = true;
 346     tempo.binCount = 1;
 347     tempo.hasKnownExtents = false;
 348     tempo.isQuantized = false;
 349     tempo.sampleType = OutputDescriptor::VariableSampleRate;
 350     tempo.sampleRate = 1.0 / m_stepSecs;
 351
 352     list.push_back(beat);
 353     list.push_back(df);
 354     list.push_back(tempo);
 355
 356     return list;
 357 }
 358
 359 BeatTracker::FeatureSet
 360 BeatTracker::process(const float *const *inputBuffers,
 361                      Vamp::RealTime timestamp)
 362 {
 363     if (!m_d) {
 364     cerr << "ERROR: BeatTracker::process: "
 365          << "BeatTracker has not been initialised"
 366          << endl;
 367     return FeatureSet();
 368     }
 369
 370     size_t len = m_d->dfConfig.frameLength / 2 + 1;
 371
 372     double *reals = new double[len];
 373     double *imags = new double[len];
 374
 375     // We only support a single input channel
 376
 377     for (size_t i = 0; i < len; ++i) {
 378         reals[i] = inputBuffers[0][i*2];
 379         imags[i] = inputBuffers[0][i*2+1];
 380     }
 381
 382     double output = m_d->df->processFrequencyDomain(reals, imags);
 383
 384     delete[] reals;
 385     delete[] imags;
 386
 387     if (m_d->dfOutput.empty()) m_d->origin = timestamp;
 388
 389     m_d->dfOutput.push_back(output);
 390
 391     FeatureSet returnFeatures;
 392
 393     Feature feature;
 394     feature.hasTimestamp = false;
 395     feature.values.push_back(output);
 396
 397     returnFeatures[1].push_back(feature); // detection function is output 1
 398     return returnFeatures;
 399 }
 400
 401 BeatTracker::FeatureSet
 402 BeatTracker::getRemainingFeatures()
 403 {
 404     if (!m_d) {
 405     cerr << "ERROR: BeatTracker::getRemainingFeatures: "
 406          << "BeatTracker has not been initialised"
 407          << endl;
 408     return FeatureSet();
 409     }
 410
 411     if (m_method == METHOD_OLD) return beatTrackOld();
 412     else return beatTrackNew();
 413 }
 414
 415 BeatTracker::FeatureSet
 416 BeatTracker::beatTrackOld()
 417 {
 418     double aCoeffs[] = { 1.0000, -0.5949, 0.2348 };
 419     double bCoeffs[] = { 0.1600,  0.3200, 0.1600 };
 420
 421     TTParams ttParams;
 422     ttParams.winLength = 512;
 423     ttParams.lagLength = 128;
 424     ttParams.LPOrd = 2;
 425     ttParams.LPACoeffs = aCoeffs;
 426     ttParams.LPBCoeffs = bCoeffs;
 427     ttParams.alpha = 9;
 428     ttParams.WinT.post = 8;
 429     ttParams.WinT.pre = 7;
 430
 431     TempoTrack tempoTracker(ttParams);
 432
 433     vector<double> tempi;
 434     vector<int> beats = tempoTracker.process(m_d->dfOutput, &tempi);
 435
 436     FeatureSet returnFeatures;
 437
 438     char label[100];
 439
 440     for (size_t i = 0; i < beats.size(); ++i) {
 441
 442     size_t frame = beats[i] * m_d->dfConfig.stepSize;
 443
 444     Feature feature;
 445     feature.hasTimestamp = true;
 446     feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime
 447         (frame, lrintf(m_inputSampleRate));
 448
 449     float bpm = 0.0;
 450     int frameIncrement = 0;
 451
 452     if (i < beats.size() - 1) {
 453
 454         frameIncrement = (beats[i+1] - beats[i]) * m_d->dfConfig.stepSize;
 455
 456         // one beat is frameIncrement frames, so there are
 457         // samplerate/frameIncrement bps, so
 458         // 60*samplerate/frameIncrement bpm
 459
 460         if (frameIncrement > 0) {
 461         bpm = (60.0 * m_inputSampleRate) / frameIncrement;
 462         bpm = int(bpm * 100.0 + 0.5) / 100.0;
 463                 sprintf(label, "%.2f bpm", bpm);
 464                 feature.label = label;
 465         }
 466     }
 467
 468     returnFeatures[0].push_back(feature); // beats are output 0
 469     }
 470
 471     double prevTempo = 0.0;
 472
 473     for (size_t i = 0; i < tempi.size(); ++i) {
 474
 475         size_t frame = i * m_d->dfConfig.stepSize * ttParams.lagLength;
 476
 477 //        std::cerr << "unit " << i << ", step size " << m_d->dfConfig.stepSize << ", hop " << ttParams.lagLength << ", frame = " << frame << std::endl;
 478
 479         if (tempi[i] > 1 && int(tempi[i] * 100) != int(prevTempo * 100)) {
 480             Feature feature;
 481             feature.hasTimestamp = true;
 482             feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime
 483                 (frame, lrintf(m_inputSampleRate));
 484             feature.values.push_back(tempi[i]);
 485             sprintf(label, "%.2f bpm", tempi[i]);
 486             feature.label = label;
 487             returnFeatures[2].push_back(feature); // tempo is output 2
 488             prevTempo = tempi[i];
 489         }
 490     }
 491
 492     return returnFeatures;
 493 }
 494
 495 BeatTracker::FeatureSet
 496 BeatTracker::beatTrackNew()
 497 {
 498     vector<double> df;
 499     vector<double> beatPeriod;
 500     vector<double> tempi;
 501
 502     size_t nonZeroCount = m_d->dfOutput.size();
 503     while (nonZeroCount > 0) {
 504         if (m_d->dfOutput[nonZeroCount-1] > 0.0) {
 505             break;
 506         }
 507         --nonZeroCount;
 508     }
 509
 510 //    std::cerr << "Note: nonZeroCount was " << m_d->dfOutput.size() << ", is now " << nonZeroCount << std::endl;
 511
 512     for (size_t i = 2; i < nonZeroCount; ++i) { // discard first two elts
 513         df.push_back(m_d->dfOutput[i]);
 514         beatPeriod.push_back(0.0);
 515     }
 516     if (df.empty()) return FeatureSet();
 517
 518     TempoTrackV2 tt(m_inputSampleRate, m_d->dfConfig.stepSize);
 519
 520
 521     // MEPD - note this function is now passed 2 new parameters, m_inputtempo and m_constraintempo
 522     tt.calculateBeatPeriod(df, beatPeriod, tempi, m_inputtempo, m_constraintempo);
 523
 524     vector<double> beats;
 525
 526     // MEPD - note this function is now passed 2 new parameters, m_alpha and m_tightness
 527     tt.calculateBeats(df, beatPeriod, beats, m_alpha, m_tightness);
 528
 529     FeatureSet returnFeatures;
 530
 531     char label[100];
 532
 533     for (size_t i = 0; i < beats.size(); ++i) {
 534
 535     size_t frame = beats[i] * m_d->dfConfig.stepSize;
 536
 537     Feature feature;
 538     feature.hasTimestamp = true;
 539     feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime
 540         (frame, lrintf(m_inputSampleRate));
 541
 542     float bpm = 0.0;
 543     int frameIncrement = 0;
 544
 545     if (i+1 < beats.size()) {
 546
 547         frameIncrement = (beats[i+1] - beats[i]) * m_d->dfConfig.stepSize;
 548
 549         // one beat is frameIncrement frames, so there are
 550         // samplerate/frameIncrement bps, so
 551         // 60*samplerate/frameIncrement bpm
 552
 553         if (frameIncrement > 0) {
 554         bpm = (60.0 * m_inputSampleRate) / frameIncrement;
 555         bpm = int(bpm * 100.0 + 0.5) / 100.0;
 556                 sprintf(label, "%.2f bpm", bpm);
 557                 feature.label = label;
 558         }
 559     }
 560
 561     returnFeatures[0].push_back(feature); // beats are output 0
 562     }
 563
 564     double prevTempo = 0.0;
 565
 566     for (size_t i = 0; i < tempi.size(); ++i) {
 567
 568     size_t frame = i * m_d->dfConfig.stepSize;
 569
 570         if (tempi[i] > 1 && int(tempi[i] * 100) != int(prevTempo * 100)) {
 571             Feature feature;
 572             feature.hasTimestamp = true;
 573             feature.timestamp = m_d->origin + Vamp::RealTime::frame2RealTime
 574                 (frame, lrintf(m_inputSampleRate));
 575             feature.values.push_back(tempi[i]);
 576             sprintf(label, "%.2f bpm", tempi[i]);
 577             feature.label = label;
 578             returnFeatures[2].push_back(feature); // tempo is output 2
 579             prevTempo = tempi[i];
 580         }
 581     }
 582
 583     return returnFeatures;
 584 }