libs/vamp-pyin/YinVamp.cpp

   1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
   2
   3 /*
   4     pYIN - A fundamental frequency estimator for monophonic audio
   5     Centre for Digital Music, Queen Mary, University of London.
   6
   7     This program is free software; you can redistribute it and/or
   8     modify it under the terms of the GNU General Public License as
   9     published by the Free Software Foundation; either version 2 of the
  10     License, or (at your option) any later version.  See the file
  11     COPYING included with this distribution for more information.
  12 */
  13
  14 #include "YinVamp.h"
  15 #include "MonoNote.h"
  16
  17 #include "vamp-sdk/FFT.h"
  18
  19 #include <vector>
  20 #include <algorithm>
  21
  22 #include <cstdio>
  23 #include <cmath>
  24 #include <complex>
  25
  26 using std::string;
  27 using std::vector;
  28 using Vamp::RealTime;
  29
  30
  31 YinVamp::YinVamp(float inputSampleRate) :
  32     Plugin(inputSampleRate),
  33     m_channels(0),
  34     m_stepSize(256),
  35     m_blockSize(2048),
  36     m_fmin(40),
  37     m_fmax(1600),
  38     m_yin(2048, inputSampleRate, 0.0),
  39     m_outNoF0(0),
  40     m_outNoPeriodicity(0),
  41     m_outNoRms(0),
  42     m_outNoSalience(0),
  43     m_yinParameter(0.15f),
  44     m_outputUnvoiced(2.0f)
  45 {
  46 }
  47
  48 YinVamp::~YinVamp()
  49 {
  50 }
  51
  52 string
  53 YinVamp::getIdentifier() const
  54 {
  55     return "yin";
  56 }
  57
  58 string
  59 YinVamp::getName() const
  60 {
  61     return "Yin";
  62 }
  63
  64 string
  65 YinVamp::getDescription() const
  66 {
  67     return "A vamp implementation of the Yin algorithm for monophonic frequency estimation.";
  68 }
  69
  70 string
  71 YinVamp::getMaker() const
  72 {
  73     return "Matthias Mauch";
  74 }
  75
  76 int
  77 YinVamp::getPluginVersion() const
  78 {
  79     // Increment this each time you release a version that behaves
  80     // differently from the previous one
  81     return 2;
  82 }
  83
  84 string
  85 YinVamp::getCopyright() const
  86 {
  87     return "GPL";
  88 }
  89
  90 YinVamp::InputDomain
  91 YinVamp::getInputDomain() const
  92 {
  93     return TimeDomain;
  94 }
  95
  96 size_t
  97 YinVamp::getPreferredBlockSize() const
  98 {
  99     return 2048;
 100 }
 101
 102 size_t
 103 YinVamp::getPreferredStepSize() const
 104 {
 105     return 256;
 106 }
 107
 108 size_t
 109 YinVamp::getMinChannelCount() const
 110 {
 111     return 1;
 112 }
 113
 114 size_t
 115 YinVamp::getMaxChannelCount() const
 116 {
 117     return 1;
 118 }
 119
 120 YinVamp::ParameterList
 121 YinVamp::getParameterDescriptors() const
 122 {
 123     ParameterList list;
 124
 125     ParameterDescriptor d;
 126     d.identifier = "yinThreshold";
 127     d.name = "Yin threshold";
 128     d.description = "The greedy Yin search for a low value difference function is done once a dip lower than this threshold is reached.";
 129     d.unit = "";
 130     d.minValue = 0.025f;
 131     d.maxValue = 1.0f;
 132     d.defaultValue = 0.15f;
 133     d.isQuantized = true;
 134     d.quantizeStep = 0.025f;
 135
 136     list.push_back(d);
 137
 138     d.identifier = "outputunvoiced";
 139     d.valueNames.clear();
 140     d.name = "Output estimates classified as unvoiced?";
 141     d.description = ".";
 142     d.unit = "";
 143     d.minValue = 0.0f;
 144     d.maxValue = 2.0f;
 145     d.defaultValue = 2.0f;
 146     d.isQuantized = true;
 147     d.quantizeStep = 1.0f;
 148     d.valueNames.push_back("No");
 149     d.valueNames.push_back("Yes");
 150     d.valueNames.push_back("Yes, as negative frequencies");
 151     list.push_back(d);
 152
 153     return list;
 154 }
 155
 156 float
 157 YinVamp::getParameter(string identifier) const
 158 {
 159     if (identifier == "yinThreshold") {
 160         return m_yinParameter;
 161     }
 162     if (identifier == "outputunvoiced") {
 163         return m_outputUnvoiced;
 164     }
 165     return 0.f;
 166 }
 167
 168 void
 169 YinVamp::setParameter(string identifier, float value)
 170 {
 171     if (identifier == "yinThreshold")
 172     {
 173         m_yinParameter = value;
 174     }
 175     if (identifier == "outputunvoiced")
 176     {
 177         m_outputUnvoiced = value;
 178     }
 179 }
 180
 181 YinVamp::ProgramList
 182 YinVamp::getPrograms() const
 183 {
 184     ProgramList list;
 185     return list;
 186 }
 187
 188 string
 189 YinVamp::getCurrentProgram() const
 190 {
 191     return ""; // no programs
 192 }
 193
 194 void
 195 YinVamp::selectProgram(string name)
 196 {
 197 }
 198
 199 YinVamp::OutputList
 200 YinVamp::getOutputDescriptors() const
 201 {
 202     OutputList outputs;
 203
 204     OutputDescriptor d;
 205
 206     int outputNumber = 0;
 207
 208     d.identifier = "f0";
 209     d.name = "Estimated f0";
 210     d.description = "Estimated fundamental frequency";
 211     d.unit = "Hz";
 212     d.hasFixedBinCount = true;
 213     d.binCount = 1;
 214     d.hasKnownExtents = true;
 215     d.minValue = m_fmin;
 216     d.maxValue = 500;
 217     d.isQuantized = false;
 218     d.sampleType = OutputDescriptor::FixedSampleRate;
 219     d.sampleRate = (m_inputSampleRate / m_stepSize);
 220     d.hasDuration = false;
 221     outputs.push_back(d);
 222     m_outNoF0 = outputNumber++;
 223
 224     d.identifier = "periodicity";
 225     d.name = "Periodicity";
 226     d.description = "by-product of Yin f0 estimation";
 227     d.unit = "";
 228     d.hasFixedBinCount = true;
 229     d.binCount = 1;
 230     d.hasKnownExtents = true;
 231     d.minValue = 0;
 232     d.maxValue = 1;
 233     d.isQuantized = false;
 234     d.sampleType = OutputDescriptor::FixedSampleRate;
 235     d.sampleRate = (m_inputSampleRate / m_stepSize);
 236     d.hasDuration = false;
 237     outputs.push_back(d);
 238     m_outNoPeriodicity = outputNumber++;
 239
 240     d.identifier = "rms";
 241     d.name = "Root mean square";
 242     d.description = "Root mean square of the waveform.";
 243     d.unit = "";
 244     d.hasFixedBinCount = true;
 245     d.binCount = 1;
 246     d.hasKnownExtents = true;
 247     d.minValue = 0;
 248     d.maxValue = 1;
 249     d.isQuantized = false;
 250     d.sampleType = OutputDescriptor::FixedSampleRate;
 251     d.sampleRate = (m_inputSampleRate / m_stepSize);
 252     d.hasDuration = false;
 253     outputs.push_back(d);
 254     m_outNoRms = outputNumber++;
 255
 256     d.identifier = "salience";
 257     d.name = "Salience";
 258     d.description = "Yin Salience";
 259     d.hasFixedBinCount = true;
 260     d.binCount = m_blockSize / 2;
 261     d.hasKnownExtents = true;
 262     d.minValue = 0;
 263     d.maxValue = 1;
 264     d.isQuantized = false;
 265     d.sampleType = OutputDescriptor::FixedSampleRate;
 266     d.sampleRate = (m_inputSampleRate / m_stepSize);
 267     d.hasDuration = false;
 268     outputs.push_back(d);
 269     m_outNoSalience = outputNumber++;
 270
 271     return outputs;
 272 }
 273
 274 bool
 275 YinVamp::initialise(size_t channels, size_t stepSize, size_t blockSize)
 276 {
 277     if (channels < getMinChannelCount() ||
 278         channels > getMaxChannelCount()) return false;
 279
 280 /*
 281     std::cerr << "YinVamp::initialise: channels = " << channels
 282           << ", stepSize = " << stepSize << ", blockSize = " << blockSize
 283           << std::endl;
 284 */
 285     m_channels = channels;
 286     m_stepSize = stepSize;
 287     m_blockSize = blockSize;
 288
 289     reset();
 290
 291     return true;
 292 }
 293
 294 void
 295 YinVamp::reset()
 296 {
 297     m_yin.setThreshold(m_yinParameter);
 298     m_yin.setFrameSize(m_blockSize);
 299 /*
 300     std::cerr << "YinVamp::reset: yin threshold set to " << (m_yinParameter)
 301           << ", blockSize = " << m_blockSize
 302           << std::endl;
 303 */
 304 }
 305
 306 YinVamp::FeatureSet
 307 YinVamp::process(const float *const *inputBuffers, RealTime timestamp)
 308 {
 309     timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/2, lrintf(m_inputSampleRate));
 310     FeatureSet fs;
 311
 312     double *dInputBuffers = new double[m_blockSize];
 313     for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
 314
 315     Yin::YinOutput yo = m_yin.process(dInputBuffers);
 316     // std::cerr << "f0 in YinVamp: " << yo.f0 << std::endl;
 317     Feature f;
 318     f.hasTimestamp = true;
 319     f.timestamp = timestamp;
 320     if (m_outputUnvoiced == 0.0f)
 321     {
 322         // std::cerr << "f0 in YinVamp: " << yo.f0 << std::endl;
 323         if (yo.f0 > 0 && yo.f0 < m_fmax && yo.f0 > m_fmin) {
 324             f.values.push_back(yo.f0);
 325             fs[m_outNoF0].push_back(f);
 326         }
 327     } else if (m_outputUnvoiced == 1.0f)
 328     {
 329         if (fabs(yo.f0) < m_fmax && fabs(yo.f0) > m_fmin) {
 330             f.values.push_back(fabs(yo.f0));
 331             fs[m_outNoF0].push_back(f);
 332         }
 333     } else
 334     {
 335         if (fabs(yo.f0) < m_fmax && fabs(yo.f0) > m_fmin) {
 336             f.values.push_back(yo.f0);
 337             fs[m_outNoF0].push_back(f);
 338         }
 339     }
 340
 341     f.values.clear();
 342     f.values.push_back(yo.rms);
 343     fs[m_outNoRms].push_back(f);
 344
 345     f.values.clear();
 346     for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin)
 347     {
 348         f.values.push_back(yo.salience[iBin]);
 349     }
 350     fs[m_outNoSalience].push_back(f);
 351
 352     f.values.clear();
 353     // f.values[0] = yo.periodicity;
 354     f.values.push_back(yo.periodicity);
 355     fs[m_outNoPeriodicity].push_back(f);
 356
 357     delete [] dInputBuffers;
 358
 359     return fs;
 360 }
 361
 362 YinVamp::FeatureSet
 363 YinVamp::getRemainingFeatures()
 364 {
 365     FeatureSet fs;
 366     return fs;
 367 }