libs/vamp-sdk/vamp-sdk/hostext/PluginInputDomainAdapter.cpp

   1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
   2
   3 /*
   4     Vamp
   5
   6     An API for audio analysis and feature extraction plugins.
   7
   8     Centre for Digital Music, Queen Mary, University of London.
   9     Copyright 2006-2007 Chris Cannam and QMUL.
  10
  11     Permission is hereby granted, free of charge, to any person
  12     obtaining a copy of this software and associated documentation
  13     files (the "Software"), to deal in the Software without
  14     restriction, including without limitation the rights to use, copy,
  15     modify, merge, publish, distribute, sublicense, and/or sell copies
  16     of the Software, and to permit persons to whom the Software is
  17     furnished to do so, subject to the following conditions:
  18
  19     The above copyright notice and this permission notice shall be
  20     included in all copies or substantial portions of the Software.
  21
  22     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  23     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  24     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  25     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  26     ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
  27     CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  28     WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  29
  30     Except as contained in this notice, the names of the Centre for
  31     Digital Music; Queen Mary, University of London; and Chris Cannam
  32     shall not be used in advertising or otherwise to promote the sale,
  33     use or other dealings in this Software without prior written
  34     authorization.
  35 */
  36
  37 #include "PluginInputDomainAdapter.h"
  38
  39 #include <cmath>
  40
  41
  42 /**
  43  * If you want to compile using FFTW instead of the built-in FFT
  44  * implementation for the PluginInputDomainAdapter, define HAVE_FFTW3
  45  * in the Makefile.
  46  *
  47  * Remember that FFTW is licensed under the GPL (unlike this SDK, which
  48  * is licensed liberally in order to permit closed-source usage), so
  49  * you should not define this symbol unless your code is also under the
  50  * GPL.  Also, parties redistributing this SDK for use in other
  51  * programs should be careful _not_ to define this symbol in order not
  52  * to affect the stated license of this SDK.
  53  *
  54  * Note: This code uses FFTW_MEASURE, and will perform badly on its
  55  * first invocation unless the host has saved and restored FFTW wisdom
  56  * (see the FFTW documentation).
  57  */
  58 #ifdef HAVE_FFTW3
  59 #include <fftw3.h>
  60 #endif
  61
  62
  63 namespace Vamp {
  64
  65 namespace HostExt {
  66
  67 class PluginInputDomainAdapter::Impl
  68 {
  69 public:
  70     Impl(Plugin *plugin, float inputSampleRate);
  71     ~Impl();
  72
  73     bool initialise(size_t channels, size_t stepSize, size_t blockSize);
  74
  75     size_t getPreferredStepSize() const;
  76     size_t getPreferredBlockSize() const;
  77
  78     FeatureSet process(const float *const *inputBuffers, RealTime timestamp);
  79
  80 protected:
  81     Plugin *m_plugin;
  82     float m_inputSampleRate;
  83     int m_channels;
  84     int m_blockSize;
  85     float **m_freqbuf;
  86
  87     double *m_ri;
  88     double *m_window;
  89
  90 #ifdef HAVE_FFTW3
  91     fftw_plan m_plan;
  92     fftw_complex *m_cbuf;
  93 #else
  94     double *m_ro;
  95     double *m_io;
  96     void fft(unsigned int n, bool inverse,
  97              double *ri, double *ii, double *ro, double *io);
  98 #endif
  99
 100     size_t makeBlockSizeAcceptable(size_t) const;
 101 };
 102
 103 PluginInputDomainAdapter::PluginInputDomainAdapter(Plugin *plugin) :
 104     PluginWrapper(plugin)
 105 {
 106     m_impl = new Impl(plugin, m_inputSampleRate);
 107 }
 108
 109 PluginInputDomainAdapter::~PluginInputDomainAdapter()
 110 {
 111     delete m_impl;
 112 }
 113
 114 bool
 115 PluginInputDomainAdapter::initialise(size_t channels, size_t stepSize, size_t blockSize)
 116 {
 117     return m_impl->initialise(channels, stepSize, blockSize);
 118 }
 119
 120 Plugin::InputDomain
 121 PluginInputDomainAdapter::getInputDomain() const
 122 {
 123     return TimeDomain;
 124 }
 125
 126 size_t
 127 PluginInputDomainAdapter::getPreferredStepSize() const
 128 {
 129     return m_impl->getPreferredStepSize();
 130 }
 131
 132 size_t
 133 PluginInputDomainAdapter::getPreferredBlockSize() const
 134 {
 135     return m_impl->getPreferredBlockSize();
 136 }
 137
 138 Plugin::FeatureSet
 139 PluginInputDomainAdapter::process(const float *const *inputBuffers, RealTime timestamp)
 140 {
 141     return m_impl->process(inputBuffers, timestamp);
 142 }
 143
 144 PluginInputDomainAdapter::Impl::Impl(Plugin *plugin, float inputSampleRate) :
 145     m_plugin(plugin),
 146     m_inputSampleRate(inputSampleRate),
 147     m_channels(0),
 148     m_blockSize(0),
 149     m_freqbuf(0),
 150     m_ri(0),
 151     m_window(0),
 152 #ifdef HAVE_FFTW3
 153     m_plan(0),
 154     m_cbuf(0)
 155 #else
 156     m_ro(0),
 157     m_io(0)
 158 #endif
 159 {
 160 }
 161
 162 PluginInputDomainAdapter::Impl::~Impl()
 163 {
 164     // the adapter will delete the plugin
 165
 166     if (m_channels > 0) {
 167         for (int c = 0; c < m_channels; ++c) {
 168             delete[] m_freqbuf[c];
 169         }
 170         delete[] m_freqbuf;
 171 #ifdef HAVE_FFTW3
 172         if (m_plan) {
 173             fftw_destroy_plan(m_plan);
 174             fftw_free(m_ri);
 175             fftw_free(m_cbuf);
 176             m_plan = 0;
 177         }
 178 #else
 179         delete[] m_ri;
 180         delete[] m_ro;
 181         delete[] m_io;
 182 #endif
 183         delete[] m_window;
 184     }
 185 }
 186
 187 // for some visual studii apparently
 188 #ifndef M_PI
 189 #define M_PI 3.14159265358979232846
 190 #endif
 191
 192 bool
 193 PluginInputDomainAdapter::Impl::initialise(size_t channels, size_t stepSize, size_t blockSize)
 194 {
 195     if (m_plugin->getInputDomain() == TimeDomain) {
 196
 197         m_blockSize = int(blockSize);
 198         m_channels = int(channels);
 199
 200         return m_plugin->initialise(channels, stepSize, blockSize);
 201     }
 202
 203     if (blockSize < 2) {
 204         std::cerr << "ERROR: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: blocksize < 2 not supported" << std::endl;
 205         return false;
 206     }
 207
 208     if (blockSize & (blockSize-1)) {
 209         std::cerr << "ERROR: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: non-power-of-two\nblocksize " << blockSize << " not supported" << std::endl;
 210         return false;
 211     }
 212
 213     if (m_channels > 0) {
 214         for (int c = 0; c < m_channels; ++c) {
 215             delete[] m_freqbuf[c];
 216         }
 217         delete[] m_freqbuf;
 218 #ifdef HAVE_FFTW3
 219         if (m_plan) {
 220             fftw_destroy_plan(m_plan);
 221             fftw_free(m_ri);
 222             fftw_free(m_cbuf);
 223             m_plan = 0;
 224         }
 225 #else
 226         delete[] m_ri;
 227         delete[] m_ro;
 228         delete[] m_io;
 229 #endif
 230         delete[] m_window;
 231     }
 232
 233     m_blockSize = int(blockSize);
 234     m_channels = int(channels);
 235
 236     m_freqbuf = new float *[m_channels];
 237     for (int c = 0; c < m_channels; ++c) {
 238         m_freqbuf[c] = new float[m_blockSize + 2];
 239     }
 240     m_window = new double[m_blockSize];
 241
 242     for (int i = 0; i < m_blockSize; ++i) {
 243         // Hanning window
 244         m_window[i] = (0.50 - 0.50 * cos((2.0 * M_PI * i) / m_blockSize));
 245     }
 246
 247 #ifdef HAVE_FFTW3
 248     m_ri = (double *)fftw_malloc(blockSize * sizeof(double));
 249     m_cbuf = (fftw_complex *)fftw_malloc((blockSize/2 + 1) * sizeof(fftw_complex));
 250     m_plan = fftw_plan_dft_r2c_1d(blockSize, m_ri, m_cbuf, FFTW_MEASURE);
 251 #else
 252     m_ri = new double[m_blockSize];
 253     m_ro = new double[m_blockSize];
 254     m_io = new double[m_blockSize];
 255 #endif
 256
 257     return m_plugin->initialise(channels, stepSize, blockSize);
 258 }
 259
 260 size_t
 261 PluginInputDomainAdapter::Impl::getPreferredStepSize() const
 262 {
 263     size_t step = m_plugin->getPreferredStepSize();
 264
 265     if (step == 0 && (m_plugin->getInputDomain() == FrequencyDomain)) {
 266         step = getPreferredBlockSize() / 2;
 267     }
 268
 269     return step;
 270 }
 271
 272 size_t
 273 PluginInputDomainAdapter::Impl::getPreferredBlockSize() const
 274 {
 275     size_t block = m_plugin->getPreferredBlockSize();
 276
 277     if (m_plugin->getInputDomain() == FrequencyDomain) {
 278         if (block == 0) {
 279             block = 1024;
 280         } else {
 281             block = makeBlockSizeAcceptable(block);
 282         }
 283     }
 284
 285     return block;
 286 }
 287
 288 size_t
 289 PluginInputDomainAdapter::Impl::makeBlockSizeAcceptable(size_t blockSize) const
 290 {
 291     if (blockSize < 2) {
 292
 293         std::cerr << "WARNING: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: blocksize < 2 not" << std::endl
 294                   << "supported, increasing from " << blockSize << " to 2" << std::endl;
 295         blockSize = 2;
 296
 297     } else if (blockSize & (blockSize-1)) {
 298
 299 #ifdef HAVE_FFTW3
 300         // not an issue with FFTW
 301 #else
 302
 303         // not a power of two, can't handle that with our built-in FFT
 304         // implementation
 305
 306         size_t nearest = blockSize;
 307         size_t power = 0;
 308         while (nearest > 1) {
 309             nearest >>= 1;
 310             ++power;
 311         }
 312         nearest = 1;
 313         while (power) {
 314             nearest <<= 1;
 315             --power;
 316         }
 317
 318         if (blockSize - nearest > (nearest*2) - blockSize) {
 319             nearest = nearest*2;
 320         }
 321
 322         std::cerr << "WARNING: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: non-power-of-two\nblocksize " << blockSize << " not supported, using blocksize " << nearest << " instead" << std::endl;
 323         blockSize = nearest;
 324
 325 #endif
 326     }
 327
 328     return blockSize;
 329 }
 330
 331 Plugin::FeatureSet
 332 PluginInputDomainAdapter::Impl::process(const float *const *inputBuffers,
 333                                         RealTime timestamp)
 334 {
 335     if (m_plugin->getInputDomain() == TimeDomain) {
 336         return m_plugin->process(inputBuffers, timestamp);
 337     }
 338
 339     // The timestamp supplied should be (according to the Vamp::Plugin
 340     // spec) the time of the start of the time-domain input block.
 341     // However, we want to pass to the plugin an FFT output calculated
 342     // from the block of samples _centred_ on that timestamp.
 343     //
 344     // We have two options:
 345     //
 346     // 1. Buffer the input, calculating the fft of the values at the
 347     // passed-in block minus blockSize/2 rather than starting at the
 348     // passed-in block.  So each time we call process on the plugin,
 349     // we are passing in the same timestamp as was passed to our own
 350     // process plugin, but not (the frequency domain representation
 351     // of) the same set of samples.  Advantages: avoids confusion in
 352     // the host by ensuring the returned values have timestamps
 353     // comparable with that passed in to this function (in fact this
 354     // is pretty much essential for one-value-per-block outputs);
 355     // consistent with hosts such as SV that deal with the
 356     // frequency-domain transform themselves.  Disadvantages: means
 357     // making the not necessarily correct assumption that the samples
 358     // preceding the first official block are all zero (or some other
 359     // known value).
 360     //
 361     // 2. Increase the passed-in timestamps by half the blocksize.  So
 362     // when we call process, we are passing in the frequency domain
 363     // representation of the same set of samples as passed to us, but
 364     // with a different timestamp.  Advantages: simplicity; avoids
 365     // iffy assumption mentioned above.  Disadvantages: inconsistency
 366     // with SV in cases where stepSize != blockSize/2; potential
 367     // confusion arising from returned timestamps being calculated
 368     // from the adjusted input timestamps rather than the original
 369     // ones (and inaccuracy where the returned timestamp is implied,
 370     // as in one-value-per-block).
 371     //
 372     // Neither way is ideal, but I don't think either is strictly
 373     // incorrect either.  I think this is just a case where the same
 374     // plugin can legitimately produce differing results from the same
 375     // input data, depending on how that data is packaged.
 376     //
 377     // We'll go for option 2, adjusting the timestamps.  Note in
 378     // particular that this means some results can differ from those
 379     // produced by SV.
 380
 381 //    std::cerr << "PluginInputDomainAdapter: sampleRate " << m_inputSampleRate << ", blocksize " << m_blockSize << ", adjusting time from " << timestamp;
 382
 383     timestamp = timestamp + RealTime::frame2RealTime
 384         (m_blockSize/2, int(m_inputSampleRate + 0.5));
 385
 386 //    std::cerr << " to " << timestamp << std::endl;
 387
 388     for (int c = 0; c < m_channels; ++c) {
 389
 390         for (int i = 0; i < m_blockSize; ++i) {
 391             m_ri[i] = double(inputBuffers[c][i]) * m_window[i];
 392         }
 393
 394         for (int i = 0; i < m_blockSize/2; ++i) {
 395             // FFT shift
 396             double value = m_ri[i];
 397             m_ri[i] = m_ri[i + m_blockSize/2];
 398             m_ri[i + m_blockSize/2] = value;
 399         }
 400
 401 #ifdef HAVE_FFTW3
 402
 403         fftw_execute(m_plan);
 404
 405         for (int i = 0; i <= m_blockSize/2; ++i) {
 406             m_freqbuf[c][i * 2] = float(m_cbuf[i][0]);
 407             m_freqbuf[c][i * 2 + 1] = float(m_cbuf[i][1]);
 408         }
 409
 410 #else
 411
 412         fft(m_blockSize, false, m_ri, 0, m_ro, m_io);
 413
 414         for (int i = 0; i <= m_blockSize/2; ++i) {
 415             m_freqbuf[c][i * 2] = float(m_ro[i]);
 416             m_freqbuf[c][i * 2 + 1] = float(m_io[i]);
 417         }
 418
 419 #endif
 420     }
 421
 422     return m_plugin->process(m_freqbuf, timestamp);
 423 }
 424
 425 #ifndef HAVE_FFTW3
 426
 427 void
 428 PluginInputDomainAdapter::Impl::fft(unsigned int n, bool inverse,
 429                                     double *ri, double *ii, double *ro, double *io)
 430 {
 431     if (!ri || !ro || !io) return;
 432
 433     unsigned int bits;
 434     unsigned int i, j, k, m;
 435     unsigned int blockSize, blockEnd;
 436
 437     double tr, ti;
 438
 439     if (n < 2) return;
 440     if (n & (n-1)) return;
 441
 442     double angle = 2.0 * M_PI;
 443     if (inverse) angle = -angle;
 444
 445     for (i = 0; ; ++i) {
 446         if (n & (1 << i)) {
 447             bits = i;
 448             break;
 449         }
 450     }
 451
 452     static unsigned int tableSize = 0;
 453     static int *table = 0;
 454
 455     if (tableSize != n) {
 456
 457         delete[] table;
 458
 459         table = new int[n];
 460
 461         for (i = 0; i < n; ++i) {
 462
 463             m = i;
 464
 465             for (j = k = 0; j < bits; ++j) {
 466                 k = (k << 1) | (m & 1);
 467                 m >>= 1;
 468             }
 469
 470             table[i] = k;
 471         }
 472
 473         tableSize = n;
 474     }
 475
 476     if (ii) {
 477         for (i = 0; i < n; ++i) {
 478             ro[table[i]] = ri[i];
 479             io[table[i]] = ii[i];
 480         }
 481     } else {
 482         for (i = 0; i < n; ++i) {
 483             ro[table[i]] = ri[i];
 484             io[table[i]] = 0.0;
 485         }
 486     }
 487
 488     blockEnd = 1;
 489
 490     for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
 491
 492         double delta = angle / (double)blockSize;
 493         double sm2 = -sin(-2 * delta);
 494         double sm1 = -sin(-delta);
 495         double cm2 = cos(-2 * delta);
 496         double cm1 = cos(-delta);
 497         double w = 2 * cm1;
 498         double ar[3], ai[3];
 499
 500         for (i = 0; i < n; i += blockSize) {
 501
 502             ar[2] = cm2;
 503             ar[1] = cm1;
 504
 505             ai[2] = sm2;
 506             ai[1] = sm1;
 507
 508             for (j = i, m = 0; m < blockEnd; j++, m++) {
 509
 510                 ar[0] = w * ar[1] - ar[2];
 511                 ar[2] = ar[1];
 512                 ar[1] = ar[0];
 513
 514                 ai[0] = w * ai[1] - ai[2];
 515                 ai[2] = ai[1];
 516                 ai[1] = ai[0];
 517
 518                 k = j + blockEnd;
 519                 tr = ar[0] * ro[k] - ai[0] * io[k];
 520                 ti = ar[0] * io[k] + ai[0] * ro[k];
 521
 522                 ro[k] = ro[j] - tr;
 523                 io[k] = io[j] - ti;
 524
 525                 ro[j] += tr;
 526                 io[j] += ti;
 527             }
 528         }
 529
 530         blockEnd = blockSize;
 531     }
 532
 533     if (inverse) {
 534
 535         double denom = (double)n;
 536
 537         for (i = 0; i < n; i++) {
 538             ro[i] /= denom;
 539             io[i] /= denom;
 540         }
 541     }
 542 }
 543
 544 #endif
 545
 546 }
 547
 548 }
 549