libs/vamp-sdk/vamp-sdk/hostext/PluginInputDomainAdapter.cpp

   1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
   2
   3 /*
   4     Vamp
   5
   6     An API for audio analysis and feature extraction plugins.
   7
   8     Centre for Digital Music, Queen Mary, University of London.
   9     Copyright 2006-2007 Chris Cannam and QMUL.
  10
  11     This file is based in part on Don Cross's public domain FFT
  12     implementation.
  13
  14     Permission is hereby granted, free of charge, to any person
  15     obtaining a copy of this software and associated documentation
  16     files (the "Software"), to deal in the Software without
  17     restriction, including without limitation the rights to use, copy,
  18     modify, merge, publish, distribute, sublicense, and/or sell copies
  19     of the Software, and to permit persons to whom the Software is
  20     furnished to do so, subject to the following conditions:
  21
  22     The above copyright notice and this permission notice shall be
  23     included in all copies or substantial portions of the Software.
  24
  25     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  26     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  27     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  28     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  29     ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
  30     CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  31     WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  32
  33     Except as contained in this notice, the names of the Centre for
  34     Digital Music; Queen Mary, University of London; and Chris Cannam
  35     shall not be used in advertising or otherwise to promote the sale,
  36     use or other dealings in this Software without prior written
  37     authorization.
  38 */
  39
  40 #include "PluginInputDomainAdapter.h"
  41
  42 #include <cmath>
  43
  44 /**
  45  * If you want to compile using FFTW instead of the built-in FFT
  46  * implementation for the PluginInputDomainAdapter, define HAVE_FFTW3
  47  * in the Makefile.
  48  *
  49  * Be aware that FFTW is licensed under the GPL -- unlike this SDK,
  50  * which is provided under a more liberal BSD license in order to
  51  * permit use in closed source applications.  The use of FFTW would
  52  * mean that your code would need to be licensed under the GPL as
  53  * well.  Do not define this symbol unless you understand and accept
  54  * the implications of this.
  55  *
  56  * Parties such as Linux distribution packagers who redistribute this
  57  * SDK for use in other programs should _not_ define this symbol, as
  58  * it would change the effective licensing terms under which the SDK
  59  * was available to third party developers.
  60  *
  61  * The default is not to use FFTW, and to use the built-in FFT instead.
  62  *
  63  * Note: The FFTW code uses FFTW_MEASURE, and so will perform badly on
  64  * its first invocation unless the host has saved and restored FFTW
  65  * wisdom (see the FFTW documentation).
  66  */
  67 #ifdef HAVE_FFTW3
  68 #include <fftw3.h>
  69 #endif
  70
  71 namespace Vamp {
  72
  73 namespace HostExt {
  74
  75 class PluginInputDomainAdapter::Impl
  76 {
  77 public:
  78     Impl(Plugin *plugin, float inputSampleRate);
  79     ~Impl();
  80
  81     bool initialise(size_t channels, size_t stepSize, size_t blockSize);
  82
  83     size_t getPreferredStepSize() const;
  84     size_t getPreferredBlockSize() const;
  85
  86     FeatureSet process(const float *const *inputBuffers, RealTime timestamp);
  87
  88 protected:
  89     Plugin *m_plugin;
  90     float m_inputSampleRate;
  91     int m_channels;
  92     int m_blockSize;
  93     float **m_freqbuf;
  94
  95     double *m_ri;
  96     double *m_window;
  97
  98 #ifdef HAVE_FFTW3
  99     fftw_plan m_plan;
 100     fftw_complex *m_cbuf;
 101 #else
 102     double *m_ro;
 103     double *m_io;
 104     void fft(unsigned int n, bool inverse,
 105              double *ri, double *ii, double *ro, double *io);
 106 #endif
 107
 108     size_t makeBlockSizeAcceptable(size_t) const;
 109 };
 110
 111 PluginInputDomainAdapter::PluginInputDomainAdapter(Plugin *plugin) :
 112     PluginWrapper(plugin)
 113 {
 114     m_impl = new Impl(plugin, m_inputSampleRate);
 115 }
 116
 117 PluginInputDomainAdapter::~PluginInputDomainAdapter()
 118 {
 119     delete m_impl;
 120 }
 121
 122 bool
 123 PluginInputDomainAdapter::initialise(size_t channels, size_t stepSize, size_t blockSize)
 124 {
 125     return m_impl->initialise(channels, stepSize, blockSize);
 126 }
 127
 128 Plugin::InputDomain
 129 PluginInputDomainAdapter::getInputDomain() const
 130 {
 131     return TimeDomain;
 132 }
 133
 134 size_t
 135 PluginInputDomainAdapter::getPreferredStepSize() const
 136 {
 137     return m_impl->getPreferredStepSize();
 138 }
 139
 140 size_t
 141 PluginInputDomainAdapter::getPreferredBlockSize() const
 142 {
 143     return m_impl->getPreferredBlockSize();
 144 }
 145
 146 Plugin::FeatureSet
 147 PluginInputDomainAdapter::process(const float *const *inputBuffers, RealTime timestamp)
 148 {
 149     return m_impl->process(inputBuffers, timestamp);
 150 }
 151
 152 PluginInputDomainAdapter::Impl::Impl(Plugin *plugin, float inputSampleRate) :
 153     m_plugin(plugin),
 154     m_inputSampleRate(inputSampleRate),
 155     m_channels(0),
 156     m_blockSize(0),
 157     m_freqbuf(0),
 158     m_ri(0),
 159     m_window(0),
 160 #ifdef HAVE_FFTW3
 161     m_plan(0),
 162     m_cbuf(0)
 163 #else
 164     m_ro(0),
 165     m_io(0)
 166 #endif
 167 {
 168 }
 169
 170 PluginInputDomainAdapter::Impl::~Impl()
 171 {
 172     // the adapter will delete the plugin
 173
 174     if (m_channels > 0) {
 175         for (int c = 0; c < m_channels; ++c) {
 176             delete[] m_freqbuf[c];
 177         }
 178         delete[] m_freqbuf;
 179 #ifdef HAVE_FFTW3
 180         if (m_plan) {
 181             fftw_destroy_plan(m_plan);
 182             fftw_free(m_ri);
 183             fftw_free(m_cbuf);
 184             m_plan = 0;
 185         }
 186 #else
 187         delete[] m_ri;
 188         delete[] m_ro;
 189         delete[] m_io;
 190 #endif
 191         delete[] m_window;
 192     }
 193 }
 194
 195 // for some visual studii apparently
 196 #ifndef M_PI
 197 #define M_PI 3.14159265358979232846
 198 #endif
 199
 200 bool
 201 PluginInputDomainAdapter::Impl::initialise(size_t channels, size_t stepSize, size_t blockSize)
 202 {
 203     if (m_plugin->getInputDomain() == TimeDomain) {
 204
 205         m_blockSize = int(blockSize);
 206         m_channels = int(channels);
 207
 208         return m_plugin->initialise(channels, stepSize, blockSize);
 209     }
 210
 211     if (blockSize < 2) {
 212         std::cerr << "ERROR: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: blocksize < 2 not supported" << std::endl;
 213         return false;
 214     }
 215
 216     if (blockSize & (blockSize-1)) {
 217         std::cerr << "ERROR: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: non-power-of-two\nblocksize " << blockSize << " not supported" << std::endl;
 218         return false;
 219     }
 220
 221     if (m_channels > 0) {
 222         for (int c = 0; c < m_channels; ++c) {
 223             delete[] m_freqbuf[c];
 224         }
 225         delete[] m_freqbuf;
 226 #ifdef HAVE_FFTW3
 227         if (m_plan) {
 228             fftw_destroy_plan(m_plan);
 229             fftw_free(m_ri);
 230             fftw_free(m_cbuf);
 231             m_plan = 0;
 232         }
 233 #else
 234         delete[] m_ri;
 235         delete[] m_ro;
 236         delete[] m_io;
 237 #endif
 238         delete[] m_window;
 239     }
 240
 241     m_blockSize = int(blockSize);
 242     m_channels = int(channels);
 243
 244     m_freqbuf = new float *[m_channels];
 245     for (int c = 0; c < m_channels; ++c) {
 246         m_freqbuf[c] = new float[m_blockSize + 2];
 247     }
 248     m_window = new double[m_blockSize];
 249
 250     for (int i = 0; i < m_blockSize; ++i) {
 251         // Hanning window
 252         m_window[i] = (0.50 - 0.50 * cos((2.0 * M_PI * i) / m_blockSize));
 253     }
 254
 255 #ifdef HAVE_FFTW3
 256     m_ri = (double *)fftw_malloc(blockSize * sizeof(double));
 257     m_cbuf = (fftw_complex *)fftw_malloc((blockSize/2 + 1) * sizeof(fftw_complex));
 258     m_plan = fftw_plan_dft_r2c_1d(blockSize, m_ri, m_cbuf, FFTW_MEASURE);
 259 #else
 260     m_ri = new double[m_blockSize];
 261     m_ro = new double[m_blockSize];
 262     m_io = new double[m_blockSize];
 263 #endif
 264
 265     return m_plugin->initialise(channels, stepSize, blockSize);
 266 }
 267
 268 size_t
 269 PluginInputDomainAdapter::Impl::getPreferredStepSize() const
 270 {
 271     size_t step = m_plugin->getPreferredStepSize();
 272
 273     if (step == 0 && (m_plugin->getInputDomain() == FrequencyDomain)) {
 274         step = getPreferredBlockSize() / 2;
 275     }
 276
 277     return step;
 278 }
 279
 280 size_t
 281 PluginInputDomainAdapter::Impl::getPreferredBlockSize() const
 282 {
 283     size_t block = m_plugin->getPreferredBlockSize();
 284
 285     if (m_plugin->getInputDomain() == FrequencyDomain) {
 286         if (block == 0) {
 287             block = 1024;
 288         } else {
 289             block = makeBlockSizeAcceptable(block);
 290         }
 291     }
 292
 293     return block;
 294 }
 295
 296 size_t
 297 PluginInputDomainAdapter::Impl::makeBlockSizeAcceptable(size_t blockSize) const
 298 {
 299     if (blockSize < 2) {
 300
 301         std::cerr << "WARNING: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: blocksize < 2 not" << std::endl
 302                   << "supported, increasing from " << blockSize << " to 2" << std::endl;
 303         blockSize = 2;
 304
 305     } else if (blockSize & (blockSize-1)) {
 306
 307 #ifdef HAVE_FFTW3
 308         // not an issue with FFTW
 309 #else
 310
 311         // not a power of two, can't handle that with our built-in FFT
 312         // implementation
 313
 314         size_t nearest = blockSize;
 315         size_t power = 0;
 316         while (nearest > 1) {
 317             nearest >>= 1;
 318             ++power;
 319         }
 320         nearest = 1;
 321         while (power) {
 322             nearest <<= 1;
 323             --power;
 324         }
 325
 326         if (blockSize - nearest > (nearest*2) - blockSize) {
 327             nearest = nearest*2;
 328         }
 329
 330         std::cerr << "WARNING: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: non-power-of-two\nblocksize " << blockSize << " not supported, using blocksize " << nearest << " instead" << std::endl;
 331         blockSize = nearest;
 332
 333 #endif
 334     }
 335
 336     return blockSize;
 337 }
 338
 339 Plugin::FeatureSet
 340 PluginInputDomainAdapter::Impl::process(const float *const *inputBuffers,
 341                                         RealTime timestamp)
 342 {
 343     if (m_plugin->getInputDomain() == TimeDomain) {
 344         return m_plugin->process(inputBuffers, timestamp);
 345     }
 346
 347     // The timestamp supplied should be (according to the Vamp::Plugin
 348     // spec) the time of the start of the time-domain input block.
 349     // However, we want to pass to the plugin an FFT output calculated
 350     // from the block of samples _centred_ on that timestamp.
 351     //
 352     // We have two options:
 353     //
 354     // 1. Buffer the input, calculating the fft of the values at the
 355     // passed-in block minus blockSize/2 rather than starting at the
 356     // passed-in block.  So each time we call process on the plugin,
 357     // we are passing in the same timestamp as was passed to our own
 358     // process plugin, but not (the frequency domain representation
 359     // of) the same set of samples.  Advantages: avoids confusion in
 360     // the host by ensuring the returned values have timestamps
 361     // comparable with that passed in to this function (in fact this
 362     // is pretty much essential for one-value-per-block outputs);
 363     // consistent with hosts such as SV that deal with the
 364     // frequency-domain transform themselves.  Disadvantages: means
 365     // making the not necessarily correct assumption that the samples
 366     // preceding the first official block are all zero (or some other
 367     // known value).
 368     //
 369     // 2. Increase the passed-in timestamps by half the blocksize.  So
 370     // when we call process, we are passing in the frequency domain
 371     // representation of the same set of samples as passed to us, but
 372     // with a different timestamp.  Advantages: simplicity; avoids
 373     // iffy assumption mentioned above.  Disadvantages: inconsistency
 374     // with SV in cases where stepSize != blockSize/2; potential
 375     // confusion arising from returned timestamps being calculated
 376     // from the adjusted input timestamps rather than the original
 377     // ones (and inaccuracy where the returned timestamp is implied,
 378     // as in one-value-per-block).
 379     //
 380     // Neither way is ideal, but I don't think either is strictly
 381     // incorrect either.  I think this is just a case where the same
 382     // plugin can legitimately produce differing results from the same
 383     // input data, depending on how that data is packaged.
 384     //
 385     // We'll go for option 2, adjusting the timestamps.  Note in
 386     // particular that this means some results can differ from those
 387     // produced by SV.
 388
 389 //    std::cerr << "PluginInputDomainAdapter: sampleRate " << m_inputSampleRate << ", blocksize " << m_blockSize << ", adjusting time from " << timestamp;
 390
 391     timestamp = timestamp + RealTime::frame2RealTime
 392         (m_blockSize/2, int(m_inputSampleRate + 0.5));
 393
 394 //    std::cerr << " to " << timestamp << std::endl;
 395
 396     for (int c = 0; c < m_channels; ++c) {
 397
 398         for (int i = 0; i < m_blockSize; ++i) {
 399             m_ri[i] = double(inputBuffers[c][i]) * m_window[i];
 400         }
 401
 402         for (int i = 0; i < m_blockSize/2; ++i) {
 403             // FFT shift
 404             double value = m_ri[i];
 405             m_ri[i] = m_ri[i + m_blockSize/2];
 406             m_ri[i + m_blockSize/2] = value;
 407         }
 408
 409 #ifdef HAVE_FFTW3
 410
 411         fftw_execute(m_plan);
 412
 413         for (int i = 0; i <= m_blockSize/2; ++i) {
 414             m_freqbuf[c][i * 2] = float(m_cbuf[i][0]);
 415             m_freqbuf[c][i * 2 + 1] = float(m_cbuf[i][1]);
 416         }
 417
 418 #else
 419
 420         fft(m_blockSize, false, m_ri, 0, m_ro, m_io);
 421
 422         for (int i = 0; i <= m_blockSize/2; ++i) {
 423             m_freqbuf[c][i * 2] = float(m_ro[i]);
 424             m_freqbuf[c][i * 2 + 1] = float(m_io[i]);
 425         }
 426
 427 #endif
 428     }
 429
 430     return m_plugin->process(m_freqbuf, timestamp);
 431 }
 432
 433 #ifndef HAVE_FFTW3
 434
 435 void
 436 PluginInputDomainAdapter::Impl::fft(unsigned int n, bool inverse,
 437                                     double *ri, double *ii, double *ro, double *io)
 438 {
 439     if (!ri || !ro || !io) return;
 440
 441     unsigned int bits;
 442     unsigned int i, j, k, m;
 443     unsigned int blockSize, blockEnd;
 444
 445     double tr, ti;
 446
 447     if (n < 2) return;
 448     if (n & (n-1)) return;
 449
 450     double angle = 2.0 * M_PI;
 451     if (inverse) angle = -angle;
 452
 453     for (i = 0; ; ++i) {
 454         if (n & (1 << i)) {
 455             bits = i;
 456             break;
 457         }
 458     }
 459
 460     static unsigned int tableSize = 0;
 461     static int *table = 0;
 462
 463     if (tableSize != n) {
 464
 465         delete[] table;
 466
 467         table = new int[n];
 468
 469         for (i = 0; i < n; ++i) {
 470
 471             m = i;
 472
 473             for (j = k = 0; j < bits; ++j) {
 474                 k = (k << 1) | (m & 1);
 475                 m >>= 1;
 476             }
 477
 478             table[i] = k;
 479         }
 480
 481         tableSize = n;
 482     }
 483
 484     if (ii) {
 485         for (i = 0; i < n; ++i) {
 486             ro[table[i]] = ri[i];
 487             io[table[i]] = ii[i];
 488         }
 489     } else {
 490         for (i = 0; i < n; ++i) {
 491             ro[table[i]] = ri[i];
 492             io[table[i]] = 0.0;
 493         }
 494     }
 495
 496     blockEnd = 1;
 497
 498     for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
 499
 500         double delta = angle / (double)blockSize;
 501         double sm2 = -sin(-2 * delta);
 502         double sm1 = -sin(-delta);
 503         double cm2 = cos(-2 * delta);
 504         double cm1 = cos(-delta);
 505         double w = 2 * cm1;
 506         double ar[3], ai[3];
 507
 508         for (i = 0; i < n; i += blockSize) {
 509
 510             ar[2] = cm2;
 511             ar[1] = cm1;
 512
 513             ai[2] = sm2;
 514             ai[1] = sm1;
 515
 516             for (j = i, m = 0; m < blockEnd; j++, m++) {
 517
 518                 ar[0] = w * ar[1] - ar[2];
 519                 ar[2] = ar[1];
 520                 ar[1] = ar[0];
 521
 522                 ai[0] = w * ai[1] - ai[2];
 523                 ai[2] = ai[1];
 524                 ai[1] = ai[0];
 525
 526                 k = j + blockEnd;
 527                 tr = ar[0] * ro[k] - ai[0] * io[k];
 528                 ti = ar[0] * io[k] + ai[0] * ro[k];
 529
 530                 ro[k] = ro[j] - tr;
 531                 io[k] = io[j] - ti;
 532
 533                 ro[j] += tr;
 534                 io[j] += ti;
 535             }
 536         }
 537
 538         blockEnd = blockSize;
 539     }
 540
 541     if (inverse) {
 542
 543         double denom = (double)n;
 544
 545         for (i = 0; i < n; i++) {
 546             ro[i] /= denom;
 547             io[i] /= denom;
 548         }
 549     }
 550 }
 551
 552 #endif
 553
 554 }
 555
 556 }
 557