1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
6 An API for audio analysis and feature extraction plugins.
8 Centre for Digital Music, Queen Mary, University of London.
9 Copyright 2006-2007 Chris Cannam and QMUL.
11 This file is based in part on Don Cross's public domain FFT
14 Permission is hereby granted, free of charge, to any person
15 obtaining a copy of this software and associated documentation
16 files (the "Software"), to deal in the Software without
17 restriction, including without limitation the rights to use, copy,
18 modify, merge, publish, distribute, sublicense, and/or sell copies
19 of the Software, and to permit persons to whom the Software is
20 furnished to do so, subject to the following conditions:
22 The above copyright notice and this permission notice shall be
23 included in all copies or substantial portions of the Software.
25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
29 ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
30 CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
33 Except as contained in this notice, the names of the Centre for
34 Digital Music; Queen Mary, University of London; and Chris Cannam
35 shall not be used in advertising or otherwise to promote the sale,
36 use or other dealings in this Software without prior written
40 #include "PluginInputDomainAdapter.h"
45 * If you want to compile using FFTW instead of the built-in FFT
46 * implementation for the PluginInputDomainAdapter, define HAVE_FFTW3
49 * Be aware that FFTW is licensed under the GPL -- unlike this SDK,
50 * which is provided under a more liberal BSD license in order to
51 * permit use in closed source applications. The use of FFTW would
52 * mean that your code would need to be licensed under the GPL as
53 * well. Do not define this symbol unless you understand and accept
54 * the implications of this.
56 * Parties such as Linux distribution packagers who redistribute this
57 * SDK for use in other programs should _not_ define this symbol, as
58 * it would change the effective licensing terms under which the SDK
59 * was available to third party developers.
61 * The default is not to use FFTW, and to use the built-in FFT instead.
63 * Note: The FFTW code uses FFTW_MEASURE, and so will perform badly on
64 * its first invocation unless the host has saved and restored FFTW
65 * wisdom (see the FFTW documentation).
75 class PluginInputDomainAdapter::Impl
78 Impl(Plugin *plugin, float inputSampleRate);
81 bool initialise(size_t channels, size_t stepSize, size_t blockSize);
83 size_t getPreferredStepSize() const;
84 size_t getPreferredBlockSize() const;
86 FeatureSet process(const float *const *inputBuffers, RealTime timestamp);
90 float m_inputSampleRate;
100 fftw_complex *m_cbuf;
104 void fft(unsigned int n, bool inverse,
105 double *ri, double *ii, double *ro, double *io);
108 size_t makeBlockSizeAcceptable(size_t) const;
111 PluginInputDomainAdapter::PluginInputDomainAdapter(Plugin *plugin) :
112 PluginWrapper(plugin)
114 m_impl = new Impl(plugin, m_inputSampleRate);
117 PluginInputDomainAdapter::~PluginInputDomainAdapter()
123 PluginInputDomainAdapter::initialise(size_t channels, size_t stepSize, size_t blockSize)
125 return m_impl->initialise(channels, stepSize, blockSize);
129 PluginInputDomainAdapter::getInputDomain() const
135 PluginInputDomainAdapter::getPreferredStepSize() const
137 return m_impl->getPreferredStepSize();
141 PluginInputDomainAdapter::getPreferredBlockSize() const
143 return m_impl->getPreferredBlockSize();
147 PluginInputDomainAdapter::process(const float *const *inputBuffers, RealTime timestamp)
149 return m_impl->process(inputBuffers, timestamp);
152 PluginInputDomainAdapter::Impl::Impl(Plugin *plugin, float inputSampleRate) :
154 m_inputSampleRate(inputSampleRate),
170 PluginInputDomainAdapter::Impl::~Impl()
172 // the adapter will delete the plugin
174 if (m_channels > 0) {
175 for (int c = 0; c < m_channels; ++c) {
176 delete[] m_freqbuf[c];
181 fftw_destroy_plan(m_plan);
195 // for some visual studii apparently
197 #define M_PI 3.14159265358979232846
201 PluginInputDomainAdapter::Impl::initialise(size_t channels, size_t stepSize, size_t blockSize)
203 if (m_plugin->getInputDomain() == TimeDomain) {
205 m_blockSize = int(blockSize);
206 m_channels = int(channels);
208 return m_plugin->initialise(channels, stepSize, blockSize);
212 std::cerr << "ERROR: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: blocksize < 2 not supported" << std::endl;
216 if (blockSize & (blockSize-1)) {
217 std::cerr << "ERROR: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: non-power-of-two\nblocksize " << blockSize << " not supported" << std::endl;
221 if (m_channels > 0) {
222 for (int c = 0; c < m_channels; ++c) {
223 delete[] m_freqbuf[c];
228 fftw_destroy_plan(m_plan);
241 m_blockSize = int(blockSize);
242 m_channels = int(channels);
244 m_freqbuf = new float *[m_channels];
245 for (int c = 0; c < m_channels; ++c) {
246 m_freqbuf[c] = new float[m_blockSize + 2];
248 m_window = new double[m_blockSize];
250 for (int i = 0; i < m_blockSize; ++i) {
252 m_window[i] = (0.50 - 0.50 * cos((2.0 * M_PI * i) / m_blockSize));
256 m_ri = (double *)fftw_malloc(blockSize * sizeof(double));
257 m_cbuf = (fftw_complex *)fftw_malloc((blockSize/2 + 1) * sizeof(fftw_complex));
258 m_plan = fftw_plan_dft_r2c_1d(blockSize, m_ri, m_cbuf, FFTW_MEASURE);
260 m_ri = new double[m_blockSize];
261 m_ro = new double[m_blockSize];
262 m_io = new double[m_blockSize];
265 return m_plugin->initialise(channels, stepSize, blockSize);
269 PluginInputDomainAdapter::Impl::getPreferredStepSize() const
271 size_t step = m_plugin->getPreferredStepSize();
273 if (step == 0 && (m_plugin->getInputDomain() == FrequencyDomain)) {
274 step = getPreferredBlockSize() / 2;
281 PluginInputDomainAdapter::Impl::getPreferredBlockSize() const
283 size_t block = m_plugin->getPreferredBlockSize();
285 if (m_plugin->getInputDomain() == FrequencyDomain) {
289 block = makeBlockSizeAcceptable(block);
297 PluginInputDomainAdapter::Impl::makeBlockSizeAcceptable(size_t blockSize) const
301 std::cerr << "WARNING: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: blocksize < 2 not" << std::endl
302 << "supported, increasing from " << blockSize << " to 2" << std::endl;
305 } else if (blockSize & (blockSize-1)) {
308 // not an issue with FFTW
311 // not a power of two, can't handle that with our built-in FFT
314 size_t nearest = blockSize;
316 while (nearest > 1) {
326 if (blockSize - nearest > (nearest*2) - blockSize) {
330 std::cerr << "WARNING: Vamp::HostExt::PluginInputDomainAdapter::Impl::initialise: non-power-of-two\nblocksize " << blockSize << " not supported, using blocksize " << nearest << " instead" << std::endl;
340 PluginInputDomainAdapter::Impl::process(const float *const *inputBuffers,
343 if (m_plugin->getInputDomain() == TimeDomain) {
344 return m_plugin->process(inputBuffers, timestamp);
347 // The timestamp supplied should be (according to the Vamp::Plugin
348 // spec) the time of the start of the time-domain input block.
349 // However, we want to pass to the plugin an FFT output calculated
350 // from the block of samples _centred_ on that timestamp.
352 // We have two options:
354 // 1. Buffer the input, calculating the fft of the values at the
355 // passed-in block minus blockSize/2 rather than starting at the
356 // passed-in block. So each time we call process on the plugin,
357 // we are passing in the same timestamp as was passed to our own
358 // process plugin, but not (the frequency domain representation
359 // of) the same set of samples. Advantages: avoids confusion in
360 // the host by ensuring the returned values have timestamps
361 // comparable with that passed in to this function (in fact this
362 // is pretty much essential for one-value-per-block outputs);
363 // consistent with hosts such as SV that deal with the
364 // frequency-domain transform themselves. Disadvantages: means
365 // making the not necessarily correct assumption that the samples
366 // preceding the first official block are all zero (or some other
369 // 2. Increase the passed-in timestamps by half the blocksize. So
370 // when we call process, we are passing in the frequency domain
371 // representation of the same set of samples as passed to us, but
372 // with a different timestamp. Advantages: simplicity; avoids
373 // iffy assumption mentioned above. Disadvantages: inconsistency
374 // with SV in cases where stepSize != blockSize/2; potential
375 // confusion arising from returned timestamps being calculated
376 // from the adjusted input timestamps rather than the original
377 // ones (and inaccuracy where the returned timestamp is implied,
378 // as in one-value-per-block).
380 // Neither way is ideal, but I don't think either is strictly
381 // incorrect either. I think this is just a case where the same
382 // plugin can legitimately produce differing results from the same
383 // input data, depending on how that data is packaged.
385 // We'll go for option 2, adjusting the timestamps. Note in
386 // particular that this means some results can differ from those
389 // std::cerr << "PluginInputDomainAdapter: sampleRate " << m_inputSampleRate << ", blocksize " << m_blockSize << ", adjusting time from " << timestamp;
391 timestamp = timestamp + RealTime::frame2RealTime
392 (m_blockSize/2, int(m_inputSampleRate + 0.5));
394 // std::cerr << " to " << timestamp << std::endl;
396 for (int c = 0; c < m_channels; ++c) {
398 for (int i = 0; i < m_blockSize; ++i) {
399 m_ri[i] = double(inputBuffers[c][i]) * m_window[i];
402 for (int i = 0; i < m_blockSize/2; ++i) {
404 double value = m_ri[i];
405 m_ri[i] = m_ri[i + m_blockSize/2];
406 m_ri[i + m_blockSize/2] = value;
411 fftw_execute(m_plan);
413 for (int i = 0; i <= m_blockSize/2; ++i) {
414 m_freqbuf[c][i * 2] = float(m_cbuf[i][0]);
415 m_freqbuf[c][i * 2 + 1] = float(m_cbuf[i][1]);
420 fft(m_blockSize, false, m_ri, 0, m_ro, m_io);
422 for (int i = 0; i <= m_blockSize/2; ++i) {
423 m_freqbuf[c][i * 2] = float(m_ro[i]);
424 m_freqbuf[c][i * 2 + 1] = float(m_io[i]);
430 return m_plugin->process(m_freqbuf, timestamp);
436 PluginInputDomainAdapter::Impl::fft(unsigned int n, bool inverse,
437 double *ri, double *ii, double *ro, double *io)
439 if (!ri || !ro || !io) return;
442 unsigned int i, j, k, m;
443 unsigned int blockSize, blockEnd;
448 if (n & (n-1)) return;
450 double angle = 2.0 * M_PI;
451 if (inverse) angle = -angle;
460 static unsigned int tableSize = 0;
461 static int *table = 0;
463 if (tableSize != n) {
469 for (i = 0; i < n; ++i) {
473 for (j = k = 0; j < bits; ++j) {
474 k = (k << 1) | (m & 1);
485 for (i = 0; i < n; ++i) {
486 ro[table[i]] = ri[i];
487 io[table[i]] = ii[i];
490 for (i = 0; i < n; ++i) {
491 ro[table[i]] = ri[i];
498 for (blockSize = 2; blockSize <= n; blockSize <<= 1) {
500 double delta = angle / (double)blockSize;
501 double sm2 = -sin(-2 * delta);
502 double sm1 = -sin(-delta);
503 double cm2 = cos(-2 * delta);
504 double cm1 = cos(-delta);
508 for (i = 0; i < n; i += blockSize) {
516 for (j = i, m = 0; m < blockEnd; j++, m++) {
518 ar[0] = w * ar[1] - ar[2];
522 ai[0] = w * ai[1] - ai[2];
527 tr = ar[0] * ro[k] - ai[0] * io[k];
528 ti = ar[0] * io[k] + ai[0] * ro[k];
538 blockEnd = blockSize;
543 double denom = (double)n;
545 for (i = 0; i < n; i++) {