1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
6 An API for audio analysis and feature extraction plugins.
8 Centre for Digital Music, Queen Mary, University of London.
9 Copyright 2006-2007 Chris Cannam and QMUL.
11 Permission is hereby granted, free of charge, to any person
12 obtaining a copy of this software and associated documentation
13 files (the "Software"), to deal in the Software without
14 restriction, including without limitation the rights to use, copy,
15 modify, merge, publish, distribute, sublicense, and/or sell copies
16 of the Software, and to permit persons to whom the Software is
17 furnished to do so, subject to the following conditions:
19 The above copyright notice and this permission notice shall be
20 included in all copies or substantial portions of the Software.
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
23 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
25 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
26 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
27 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
28 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
30 Except as contained in this notice, the names of the Centre for
31 Digital Music; Queen Mary, University of London; and Chris Cannam
32 shall not be used in advertising or otherwise to promote the sale,
33 use or other dealings in this Software without prior written
37 #include "PluginInputDomainAdapter.h"
43 * If you want to compile using FFTW instead of the built-in FFT
44 * implementation for the PluginInputDomainAdapter, define HAVE_FFTW3
47 * Remember that FFTW is licensed under the GPL (unlike this SDK, which
48 * is licensed liberally in order to permit closed-source usage), so
49 * you should not define this symbol unless your code is also under the
50 * GPL. Also, parties redistributing this SDK for use in other
51 * programs should be careful _not_ to define this symbol in order not
52 * to affect the stated license of this SDK.
54 * Note: This code uses FFTW_MEASURE, and will perform badly on its
55 * first invocation unless the host has saved and restored FFTW wisdom
56 * (see the FFTW documentation).
67 class PluginInputDomainAdapter::Impl
70 Impl(Plugin *plugin, float inputSampleRate);
73 bool initialise(size_t channels, size_t stepSize, size_t blockSize);
75 size_t getPreferredStepSize() const;
76 size_t getPreferredBlockSize() const;
78 FeatureSet process(const float *const *inputBuffers, RealTime timestamp);
82 float m_inputSampleRate;
96 void fft(unsigned int n, bool inverse,
97 double *ri, double *ii, double *ro, double *io);
100 size_t makeBlockSizeAcceptable(size_t) const;
103 PluginInputDomainAdapter::PluginInputDomainAdapter(Plugin *plugin) :
104 PluginWrapper(plugin)
106 m_impl = new Impl(plugin, m_inputSampleRate);
109 PluginInputDomainAdapter::~PluginInputDomainAdapter()
115 PluginInputDomainAdapter::initialise(size_t channels, size_t stepSize, size_t blockSize)
117 return m_impl->initialise(channels, stepSize, blockSize);
121 PluginInputDomainAdapter::getInputDomain() const
127 PluginInputDomainAdapter::getPreferredStepSize() const
129 return m_impl->getPreferredStepSize();
133 PluginInputDomainAdapter::getPreferredBlockSize() const
135 return m_impl->getPreferredBlockSize();
139 PluginInputDomainAdapter::process(const float *const *inputBuffers, RealTime timestamp)
141 return m_impl->process(inputBuffers, timestamp);
144 PluginInputDomainAdapter::Impl::Impl(Plugin *plugin, float inputSampleRate) :
146 m_inputSampleRate(inputSampleRate),
162 PluginInputDomainAdapter::Impl::~Impl()
164 // the adapter will delete the plugin
166 if (m_channels > 0) {
167 for (int c = 0; c < m_channels; ++c) {
168 delete[] m_freqbuf[c];
173 fftw_destroy_plan(m_plan);
187 // for some visual studii apparently
189 #define M_PI 3.14159265358979232846
193 PluginInputDomainAdapter::Impl::initialise(size_t channels, size_t stepSize, size_t blockSize)
195 if (m_plugin->getInputDomain() == TimeDomain) {
197 m_blockSize = int(blockSize);
198 m_channels = int(channels);
200 return m_plugin->initialise(channels, stepSize, blockSize);
204 std::cerr << "ERROR: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: blocksize < 2 not supported" << std::endl;
208 if (blockSize & (blockSize-1)) {
209 std::cerr << "ERROR: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: non-power-of-two\nblocksize " << blockSize << " not supported" << std::endl;
213 if (m_channels > 0) {
214 for (int c = 0; c < m_channels; ++c) {
215 delete[] m_freqbuf[c];
220 fftw_destroy_plan(m_plan);
233 m_blockSize = int(blockSize);
234 m_channels = int(channels);
236 m_freqbuf = new float *[m_channels];
237 for (int c = 0; c < m_channels; ++c) {
238 m_freqbuf[c] = new float[m_blockSize + 2];
240 m_window = new double[m_blockSize];
242 for (int i = 0; i < m_blockSize; ++i) {
244 m_window[i] = (0.50 - 0.50 * cos((2.0 * M_PI * i) / m_blockSize));
248 m_ri = (double *)fftw_malloc(blockSize * sizeof(double));
249 m_cbuf = (fftw_complex *)fftw_malloc((blockSize/2 + 1) * sizeof(fftw_complex));
250 m_plan = fftw_plan_dft_r2c_1d(blockSize, m_ri, m_cbuf, FFTW_MEASURE);
252 m_ri = new double[m_blockSize];
253 m_ro = new double[m_blockSize];
254 m_io = new double[m_blockSize];
257 return m_plugin->initialise(channels, stepSize, blockSize);
261 PluginInputDomainAdapter::Impl::getPreferredStepSize() const
263 size_t step = m_plugin->getPreferredStepSize();
265 if (step == 0 && (m_plugin->getInputDomain() == FrequencyDomain)) {
266 step = getPreferredBlockSize() / 2;
273 PluginInputDomainAdapter::Impl::getPreferredBlockSize() const
275 size_t block = m_plugin->getPreferredBlockSize();
277 if (m_plugin->getInputDomain() == FrequencyDomain) {
281 block = makeBlockSizeAcceptable(block);
289 PluginInputDomainAdapter::Impl::makeBlockSizeAcceptable(size_t blockSize) const
293 std::cerr << "WARNING: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: blocksize < 2 not" << std::endl
294 << "supported, increasing from " << blockSize << " to 2" << std::endl;
297 } else if (blockSize & (blockSize-1)) {
300 // not an issue with FFTW
303 // not a power of two, can't handle that with our built-in FFT
306 size_t nearest = blockSize;
308 while (nearest > 1) {
318 if (blockSize - nearest > (nearest*2) - blockSize) {
322 std::cerr << "WARNING: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: non-power-of-two\nblocksize " << blockSize << " not supported, using blocksize " << nearest << " instead" << std::endl;
332 PluginInputDomainAdapter::Impl::process(const float *const *inputBuffers,
335 if (m_plugin->getInputDomain() == TimeDomain) {
336 return m_plugin->process(inputBuffers, timestamp);
339 // The timestamp supplied should be (according to the Vamp::Plugin
340 // spec) the time of the start of the time-domain input block.
341 // However, we want to pass to the plugin an FFT output calculated
342 // from the block of samples _centred_ on that timestamp.
344 // We have two options:
346 // 1. Buffer the input, calculating the fft of the values at the
347 // passed-in block minus blockSize/2 rather than starting at the
348 // passed-in block. So each time we call process on the plugin,
349 // we are passing in the same timestamp as was passed to our own
350 // process plugin, but not (the frequency domain representation
351 // of) the same set of samples. Advantages: avoids confusion in
352 // the host by ensuring the returned values have timestamps
353 // comparable with that passed in to this function (in fact this
354 // is pretty much essential for one-value-per-block outputs);
355 // consistent with hosts such as SV that deal with the
356 // frequency-domain transform themselves. Disadvantages: means
357 // making the not necessarily correct assumption that the samples
358 // preceding the first official block are all zero (or some other
361 // 2. Increase the passed-in timestamps by half the blocksize. So
362 // when we call process, we are passing in the frequency domain
363 // representation of the same set of samples as passed to us, but
364 // with a different timestamp. Advantages: simplicity; avoids
365 // iffy assumption mentioned above. Disadvantages: inconsistency
366 // with SV in cases where stepSize != blockSize/2; potential
367 // confusion arising from returned timestamps being calculated
368 // from the adjusted input timestamps rather than the original
369 // ones (and inaccuracy where the returned timestamp is implied,
370 // as in one-value-per-block).
372 // Neither way is ideal, but I don't think either is strictly
373 // incorrect either. I think this is just a case where the same
374 // plugin can legitimately produce differing results from the same
375 // input data, depending on how that data is packaged.
377 // We'll go for option 2, adjusting the timestamps. Note in
378 // particular that this means some results can differ from those
381 // std::cerr << "PluginInputDomainAdapter: sampleRate " << m_inputSampleRate << ", blocksize " << m_blockSize << ", adjusting time from " << timestamp;
383 timestamp = timestamp + RealTime::frame2RealTime
384 (m_blockSize/2, int(m_inputSampleRate + 0.5));
386 // std::cerr << " to " << timestamp << std::endl;
388 for (int c = 0; c < m_channels; ++c) {
390 for (int i = 0; i < m_blockSize; ++i) {
391 m_ri[i] = double(inputBuffers[c][i]) * m_window[i];
394 for (int i = 0; i < m_blockSize/2; ++i) {
396 double value = m_ri[i];
397 m_ri[i] = m_ri[i + m_blockSize/2];
398 m_ri[i + m_blockSize/2] = value;
403 fftw_execute(m_plan);
405 for (int i = 0; i <= m_blockSize/2; ++i) {
406 m_freqbuf[c][i * 2] = float(m_cbuf[i][0]);
407 m_freqbuf[c][i * 2 + 1] = float(m_cbuf[i][1]);
412 fft(m_blockSize, false, m_ri, 0, m_ro, m_io);
414 for (int i = 0; i <= m_blockSize/2; ++i) {
415 m_freqbuf[c][i * 2] = float(m_ro[i]);
416 m_freqbuf[c][i * 2 + 1] = float(m_io[i]);
422 return m_plugin->process(m_freqbuf, timestamp);
428 PluginInputDomainAdapter::Impl::fft(unsigned int n, bool inverse,
429 double *ri, double *ii, double *ro, double *io)
431 if (!ri || !ro || !io) return;
434 unsigned int i, j, k, m;
435 unsigned int blockSize, blockEnd;
440 if (n & (n-1)) return;
442 double angle = 2.0 * M_PI;
443 if (inverse) angle = -angle;
452 static unsigned int tableSize = 0;
453 static int *table = 0;
455 if (tableSize != n) {
461 for (i = 0; i < n; ++i) {
465 for (j = k = 0; j < bits; ++j) {
466 k = (k << 1) | (m & 1);
477 for (i = 0; i < n; ++i) {
478 ro[table[i]] = ri[i];
479 io[table[i]] = ii[i];
482 for (i = 0; i < n; ++i) {
483 ro[table[i]] = ri[i];
490 for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
492 double delta = angle / (double)blockSize;
493 double sm2 = -sin(-2 * delta);
494 double sm1 = -sin(-delta);
495 double cm2 = cos(-2 * delta);
496 double cm1 = cos(-delta);
500 for (i = 0; i < n; i += blockSize) {
508 for (j = i, m = 0; m < blockEnd; j++, m++) {
510 ar[0] = w * ar[1] - ar[2];
514 ai[0] = w * ai[1] - ai[2];
519 tr = ar[0] * ro[k] - ai[0] * io[k];
520 ti = ar[0] * io[k] + ai[0] * ro[k];
530 blockEnd = blockSize;
535 double denom = (double)n;
537 for (i = 0; i < n; i++) {