libs/vamp-plugins/TonalChangeDetect.cpp

   1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
   2
   3 /*
   4     QM Vamp Plugin Set
   5
   6     Centre for Digital Music, Queen Mary, University of London.
   7
   8     This program is free software; you can redistribute it and/or
   9     modify it under the terms of the GNU General Public License as
  10     published by the Free Software Foundation; either version 2 of the
  11     License, or (at your option) any later version.  See the file
  12     COPYING included with this distribution for more information.
  13 */
  14
  15 #include "TonalChangeDetect.h"
  16
  17 #include <base/Pitch.h>
  18 #include <dsp/chromagram/Chromagram.h>
  19 #include <dsp/tonal/ChangeDetectionFunction.h>
  20
  21 TonalChangeDetect::TonalChangeDetect(float fInputSampleRate)
  22     : Vamp::Plugin(fInputSampleRate),
  23       m_chromagram(0),
  24       m_step(0),
  25       m_block(0),
  26       m_stepDelay(0),
  27       m_origin(Vamp::RealTime::zeroTime),
  28       m_haveOrigin(false)
  29 {
  30     m_minMIDIPitch = 32;
  31     m_maxMIDIPitch = 108;
  32     m_tuningFrequency = 440;
  33     m_iSmoothingWidth = 5;
  34
  35     setupConfig();
  36 }
  37
  38 TonalChangeDetect::~TonalChangeDetect()
  39 {
  40 }
  41
  42 bool TonalChangeDetect::initialise(size_t channels, size_t stepSize, size_t blockSize)
  43 {
  44     if (m_chromagram) {
  45         delete m_chromagram;
  46         m_chromagram = 0;
  47     }
  48
  49     if (channels < getMinChannelCount() ||
  50         channels > getMaxChannelCount()) {
  51         std::cerr << "TonalChangeDetect::initialise: Given channel count " << channels << " outside acceptable range (" << getMinChannelCount() << " to " << getMaxChannelCount() << ")" << std::endl;
  52         return false;
  53     }
  54
  55     m_chromagram = new Chromagram(m_config);
  56     m_step = m_chromagram->getHopSize();
  57     m_block = m_chromagram->getFrameSize();
  58
  59     if (stepSize != m_step) {
  60         std::cerr << "TonalChangeDetect::initialise: Given step size " << stepSize << " differs from only acceptable value " << m_step << std::endl;
  61         delete m_chromagram;
  62         m_chromagram = 0;
  63         return false;
  64     }
  65     if (blockSize != m_block) {
  66         std::cerr << "TonalChangeDetect::initialise: Given step size " << stepSize << " differs from only acceptable value " << m_step << std::endl;
  67         delete m_chromagram;
  68         m_chromagram = 0;
  69         return false;
  70     }
  71
  72     //    m_stepDelay = (blockSize - stepSize) / 2;
  73     //    m_stepDelay = m_stepDelay / stepSize;
  74     m_stepDelay = (blockSize - stepSize) / stepSize; //!!! why? seems about right to look at, but...
  75
  76 //    std::cerr << "TonalChangeDetect::initialise: step " << stepSize << ", block "
  77 //              << blockSize << ", delay " << m_stepDelay << std::endl;
  78
  79     m_vaCurrentVector.resize(12, 0.0);
  80
  81     return true;
  82
  83 }
  84
  85 std::string TonalChangeDetect::getIdentifier() const
  86 {
  87     return "qm-tonalchange";
  88 }
  89
  90 std::string TonalChangeDetect::getName() const
  91 {
  92     return "Tonal Change";
  93 }
  94
  95 std::string TonalChangeDetect::getDescription() const
  96 {
  97     return "Detect and return the positions of harmonic changes such as chord boundaries";
  98 }
  99
 100 std::string TonalChangeDetect::getMaker() const
 101 {
 102     return "Queen Mary, University of London";
 103 }
 104
 105 int TonalChangeDetect::getPluginVersion() const
 106 {
 107     return 2;
 108 }
 109
 110 std::string TonalChangeDetect::getCopyright() const
 111 {
 112     return "Plugin by Martin Gasser and Christopher Harte.  Copyright (c) 2006-2009 QMUL - All Rights Reserved";
 113 }
 114
 115 TonalChangeDetect::ParameterList TonalChangeDetect::getParameterDescriptors() const
 116 {
 117     ParameterList list;
 118
 119     ParameterDescriptor desc;
 120     desc.identifier = "smoothingwidth";
 121     desc.name = "Gaussian smoothing";
 122     desc.description = "Window length for the internal smoothing operation, in chroma analysis frames";
 123     desc.unit = "frames";
 124     desc.minValue = 0;
 125     desc.maxValue = 20;
 126     desc.defaultValue = 5;
 127     desc.isQuantized = true;
 128     desc.quantizeStep = 1;
 129     list.push_back(desc);
 130
 131     desc.identifier = "minpitch";
 132     desc.name = "Chromagram minimum pitch";
 133     desc.unit = "MIDI units";
 134     desc.description = "Lowest pitch in MIDI units to be included in the chroma analysis";
 135     desc.minValue = 0;
 136     desc.maxValue = 127;
 137     desc.defaultValue = 32;
 138     desc.isQuantized = true;
 139     desc.quantizeStep = 1;
 140     list.push_back(desc);
 141
 142     desc.identifier = "maxpitch";
 143     desc.name = "Chromagram maximum pitch";
 144     desc.unit = "MIDI units";
 145     desc.description = "Highest pitch in MIDI units to be included in the chroma analysis";
 146     desc.minValue = 0;
 147     desc.maxValue = 127;
 148     desc.defaultValue = 108;
 149     desc.isQuantized = true;
 150     desc.quantizeStep = 1;
 151     list.push_back(desc);
 152
 153     desc.identifier = "tuning";
 154     desc.name = "Chromagram tuning frequency";
 155     desc.unit = "Hz";
 156     desc.description = "Frequency of concert A in the music under analysis";
 157     desc.minValue = 420;
 158     desc.maxValue = 460;
 159     desc.defaultValue = 440;
 160     desc.isQuantized = false;
 161     list.push_back(desc);
 162
 163     return list;
 164 }
 165
 166 float
 167 TonalChangeDetect::getParameter(std::string param) const
 168 {
 169     if (param == "smoothingwidth") {
 170         return m_iSmoothingWidth;
 171     }
 172     if (param == "minpitch") {
 173         return m_minMIDIPitch;
 174     }
 175     if (param == "maxpitch") {
 176         return m_maxMIDIPitch;
 177     }
 178     if (param == "tuning") {
 179         return m_tuningFrequency;
 180     }
 181
 182     std::cerr << "WARNING: ChromagramPlugin::getParameter: unknown parameter \""
 183               << param << "\"" << std::endl;
 184     return 0.0;
 185 }
 186
 187 void
 188 TonalChangeDetect::setParameter(std::string param, float value)
 189 {
 190     if (param == "minpitch") {
 191         m_minMIDIPitch = lrintf(value);
 192     } else if (param == "maxpitch") {
 193         m_maxMIDIPitch = lrintf(value);
 194     } else if (param == "tuning") {
 195         m_tuningFrequency = value;
 196     }
 197     else if (param == "smoothingwidth") {
 198         m_iSmoothingWidth = int(value);
 199     } else {
 200         std::cerr << "WARNING: ChromagramPlugin::setParameter: unknown parameter \""
 201                   << param << "\"" << std::endl;
 202     }
 203
 204     setupConfig();
 205 }
 206
 207
 208 void TonalChangeDetect::setupConfig()
 209 {
 210     m_config.FS = lrintf(m_inputSampleRate);
 211     m_config.min = Pitch::getFrequencyForPitch
 212         (m_minMIDIPitch, 0, m_tuningFrequency);
 213     m_config.max = Pitch::getFrequencyForPitch
 214         (m_maxMIDIPitch, 0, m_tuningFrequency);
 215     m_config.BPO = 12;
 216     m_config.CQThresh = 0.0054;
 217     m_config.normalise = MathUtilities::NormaliseNone;
 218
 219     m_step = 0;
 220     m_block = 0;
 221
 222
 223 }
 224
 225 void
 226 TonalChangeDetect::reset()
 227 {
 228     if (m_chromagram) {
 229         delete m_chromagram;
 230         m_chromagram = new Chromagram(m_config);
 231     }
 232     while (!m_pending.empty()) m_pending.pop();
 233     m_vaCurrentVector.clear();
 234     m_TCSGram.clear();
 235
 236     m_origin = Vamp::RealTime::zeroTime;
 237     m_haveOrigin = false;
 238 }
 239
 240 size_t
 241 TonalChangeDetect::getPreferredStepSize() const
 242 {
 243     if (!m_step) {
 244         Chromagram chroma(m_config);
 245         m_step = chroma.getHopSize();
 246         m_block = chroma.getFrameSize();
 247     }
 248
 249     return m_step;
 250 }
 251
 252 size_t
 253 TonalChangeDetect::getPreferredBlockSize() const
 254 {
 255     if (!m_step) {
 256         Chromagram chroma(m_config);
 257         m_step = chroma.getHopSize();
 258         m_block = chroma.getFrameSize();
 259     }
 260
 261     return m_block;
 262 }
 263
 264 TonalChangeDetect::OutputList TonalChangeDetect::getOutputDescriptors() const
 265 {
 266     OutputList list;
 267
 268     OutputDescriptor hc;
 269     hc.identifier = "tcstransform";
 270     hc.name = "Transform to 6D Tonal Content Space";
 271     hc.unit = "";
 272     hc.description = "Representation of content in a six-dimensional tonal space";
 273     hc.hasFixedBinCount = true;
 274     hc.binCount = 6;
 275     hc.hasKnownExtents = true;
 276     hc.minValue = -1.0;
 277     hc.maxValue = 1.0;
 278     hc.isQuantized = false;
 279     hc.sampleType = OutputDescriptor::OneSamplePerStep;
 280
 281     OutputDescriptor d;
 282     d.identifier = "tcfunction";
 283     d.name = "Tonal Change Detection Function";
 284     d.unit = "";
 285     d.description = "Estimate of the likelihood of a tonal change occurring within each spectral frame";
 286     d.minValue = 0;
 287     d.minValue = 2;
 288     d.hasFixedBinCount = true;
 289     d.binCount = 1;
 290     d.hasKnownExtents = false;
 291     d.isQuantized = false;
 292     d.sampleType = OutputDescriptor::VariableSampleRate;
 293     double dStepSecs = double(getPreferredStepSize()) / m_inputSampleRate;
 294     d.sampleRate = 1.0f / dStepSecs;
 295
 296     OutputDescriptor changes;
 297     changes.identifier = "changepositions";
 298     changes.name = "Tonal Change Positions";
 299     changes.unit = "";
 300     changes.description = "Estimated locations of tonal changes";
 301     changes.hasFixedBinCount = true;
 302     changes.binCount = 0;
 303     changes.hasKnownExtents = false;
 304     changes.isQuantized = false;
 305     changes.sampleType = OutputDescriptor::VariableSampleRate;
 306     changes.sampleRate = 1.0 / dStepSecs;
 307
 308     list.push_back(hc);
 309     list.push_back(d);
 310     list.push_back(changes);
 311
 312     return list;
 313 }
 314
 315 TonalChangeDetect::FeatureSet
 316 TonalChangeDetect::process(const float *const *inputBuffers,
 317                            Vamp::RealTime timestamp)
 318 {
 319     if (!m_chromagram) {
 320         cerr << "ERROR: TonalChangeDetect::process: "
 321              << "Chromagram has not been initialised"
 322              << endl;
 323         return FeatureSet();
 324     }
 325
 326     if (!m_haveOrigin) m_origin = timestamp;
 327
 328     // convert float* to double*
 329     double *tempBuffer = new double[m_block];
 330     for (size_t i = 0; i < m_block; ++i) {
 331         tempBuffer[i] = inputBuffers[0][i];
 332     }
 333
 334     double *output = m_chromagram->process(tempBuffer);
 335     delete[] tempBuffer;
 336
 337     for (size_t i = 0; i < 12; i++)
 338     {
 339         m_vaCurrentVector[i] = output[i];
 340     }
 341
 342
 343     FeatureSet returnFeatures;
 344
 345     if (m_stepDelay == 0) {
 346         m_vaCurrentVector.normalizeL1();
 347         TCSVector tcsVector = m_TonalEstimator.transform2TCS(m_vaCurrentVector);
 348         m_TCSGram.addTCSVector(tcsVector);
 349
 350         Feature feature;
 351         feature.hasTimestamp = false;
 352         for (int i = 0; i < 6; i++)
 353         { feature.values.push_back(static_cast<float>(tcsVector[i])); }
 354         feature.label = "";
 355         returnFeatures[0].push_back(feature);
 356
 357         return returnFeatures;
 358     }
 359
 360     if (m_pending.size() == m_stepDelay) {
 361
 362         ChromaVector v = m_pending.front();
 363         v.normalizeL1();
 364         TCSVector tcsVector = m_TonalEstimator.transform2TCS(v);
 365         m_TCSGram.addTCSVector(tcsVector);
 366
 367         Feature feature;
 368         feature.hasTimestamp = false;
 369         for (int i = 0; i < 6; i++)
 370         { feature.values.push_back(static_cast<float>(tcsVector[i])); }
 371         feature.label = "";
 372         returnFeatures[0].push_back(feature);
 373         m_pending.pop();
 374
 375     } else {
 376         returnFeatures[0].push_back(Feature());
 377         m_TCSGram.addTCSVector(TCSVector());
 378     }
 379
 380     m_pending.push(m_vaCurrentVector);
 381
 382
 383     return returnFeatures;
 384 }
 385
 386 TonalChangeDetect::FeatureSet TonalChangeDetect::getRemainingFeatures()
 387 {
 388     FeatureSet returnFeatures;
 389
 390     while (!m_pending.empty()) {
 391         ChromaVector v = m_pending.front();
 392         v.normalizeL1();
 393         TCSVector tcsVector = m_TonalEstimator.transform2TCS(v);
 394         m_TCSGram.addTCSVector(tcsVector);
 395
 396         Feature feature;
 397         feature.hasTimestamp = false;
 398         for (int i = 0; i < 6; i++)
 399         { feature.values.push_back(static_cast<float>(tcsVector[i])); }
 400         feature.label = "";
 401         returnFeatures[0].push_back(feature);
 402         m_pending.pop();
 403     }
 404
 405     ChangeDFConfig dfc;
 406     dfc.smoothingWidth = double(m_iSmoothingWidth);
 407     ChangeDetectionFunction df(dfc);
 408     ChangeDistance d = df.process(m_TCSGram);
 409
 410     for (int i = 0; i < int(d.size()); i++)
 411     {
 412         double dCurrent = d[i];
 413         double dPrevious = d[i > 0 ? i - 1 : i];
 414         double dNext = d[i < int(d.size())-1 ? i + 1 : i];
 415
 416         Feature feature;
 417         feature.label = "";
 418         feature.hasTimestamp = true;
 419         feature.timestamp = m_origin +
 420             Vamp::RealTime::frame2RealTime(i*m_step, m_inputSampleRate);
 421         feature.values.push_back(dCurrent);
 422         returnFeatures[1].push_back(feature);
 423
 424
 425         if (dCurrent > dPrevious && dCurrent > dNext)
 426         {
 427             Feature featurePeak;
 428             featurePeak.label = "";
 429             featurePeak.hasTimestamp = true;
 430             featurePeak.timestamp = m_origin +
 431                 Vamp::RealTime::frame2RealTime(i*m_step, m_inputSampleRate);
 432             returnFeatures[2].push_back(featurePeak);
 433         }
 434
 435     }
 436
 437
 438     return returnFeatures;
 439
 440 }
 441