libs/vamp-sdk/vamp-sdk/Plugin.h

   1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
   2
   3 /*
   4     Vamp
   5
   6     An API for audio analysis and feature extraction plugins.
   7
   8     Centre for Digital Music, Queen Mary, University of London.
   9     Copyright 2006 Chris Cannam.
  10
  11     Permission is hereby granted, free of charge, to any person
  12     obtaining a copy of this software and associated documentation
  13     files (the "Software"), to deal in the Software without
  14     restriction, including without limitation the rights to use, copy,
  15     modify, merge, publish, distribute, sublicense, and/or sell copies
  16     of the Software, and to permit persons to whom the Software is
  17     furnished to do so, subject to the following conditions:
  18
  19     The above copyright notice and this permission notice shall be
  20     included in all copies or substantial portions of the Software.
  21
  22     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  23     EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  24     MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  25     NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
  26     ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
  27     CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  28     WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  29
  30     Except as contained in this notice, the names of the Centre for
  31     Digital Music; Queen Mary, University of London; and Chris Cannam
  32     shall not be used in advertising or otherwise to promote the sale,
  33     use or other dealings in this Software without prior written
  34     authorization.
  35 */
  36
  37 #ifndef _VAMP_SDK_PLUGIN_H_
  38 #define _VAMP_SDK_PLUGIN_H_
  39
  40 #include <string>
  41 #include <vector>
  42 #include <map>
  43
  44 #include "PluginBase.h"
  45 #include "RealTime.h"
  46
  47 #include "plugguard.h"
  48 _VAMP_SDK_PLUGSPACE_BEGIN(Plugin.h)
  49
  50 namespace Vamp {
  51
  52 /**
  53  * \class Plugin Plugin.h <vamp-sdk/Plugin.h>
  54  *
  55  * Vamp::Plugin is a base class for plugin instance classes
  56  * that provide feature extraction from audio or related data.
  57  *
  58  * In most cases, the input will be audio and the output will be a
  59  * stream of derived data at a lower sampling resolution than the
  60  * input.
  61  *
  62  * Note that this class inherits several abstract methods from
  63  * PluginBase.  These must be implemented by the subclass.
  64  *
  65  *
  66  * PLUGIN LIFECYCLE
  67  *
  68  * Feature extraction plugins are managed differently from real-time
  69  * plugins (such as VST effects).  The main difference is that the
  70  * parameters for a feature extraction plugin are configured before
  71  * the plugin is used, and do not change during use.
  72  *
  73  * 1. Host constructs the plugin, passing it the input sample rate.
  74  * The plugin may do basic initialisation, but should not do anything
  75  * computationally expensive at this point.  You must make sure your
  76  * plugin is cheap to construct, otherwise you'll seriously affect the
  77  * startup performance of almost all hosts.  If you have serious
  78  * initialisation to do, the proper place is in initialise() (step 5).
  79  *
  80  * 2. Host may query the plugin's available outputs.
  81  *
  82  * 3. Host queries programs and parameter descriptors, and may set
  83  * some or all of them.  Parameters that are not explicitly set should
  84  * take their default values as specified in the parameter descriptor.
  85  * When a program is set, the parameter values may change and the host
  86  * will re-query them to check.
  87  *
  88  * 4. Host queries the preferred step size, block size and number of
  89  * channels.  These may all vary depending on the parameter values.
  90  * (Note however that you cannot make the number of distinct outputs
  91  * dependent on parameter values.)
  92  *
  93  * 5. Plugin is properly initialised with a call to initialise.  This
  94  * fixes the step size, block size, and number of channels, as well as
  95  * all of the parameter and program settings.  If the values passed in
  96  * to initialise do not match the plugin's advertised preferred values
  97  * from step 4, the plugin may refuse to initialise and return false
  98  * (although if possible it should accept the new values).  Any
  99  * computationally expensive setup code should take place here.
 100  *
 101  * 6. Host finally checks the number of values, resolution, extents
 102  * etc per output (which may vary depending on the number of channels,
 103  * step size and block size as well as the parameter values).
 104  *
 105  * 7. Host will repeatedly call the process method to pass in blocks
 106  * of input data.  This method may return features extracted from that
 107  * data (if the plugin is causal).
 108  *
 109  * 8. Host will call getRemainingFeatures exactly once, after all the
 110  * input data has been processed.  This may return any non-causal or
 111  * leftover features.
 112  *
 113  * 9. At any point after initialise was called, the host may
 114  * optionally call the reset method and restart processing.  (This
 115  * does not mean it can change the parameters, which are fixed from
 116  * initialise until destruction.)
 117  *
 118  * A plugin does not need to handle the case where setParameter or
 119  * selectProgram is called after initialise has been called.  It's the
 120  * host's responsibility not to do that.  Similarly, the plugin may
 121  * safely assume that initialise is called no more than once.
 122  */
 123
 124 class Plugin : public PluginBase
 125 {
 126 public:
 127     virtual ~Plugin() { }
 128
 129     /**
 130      * Initialise a plugin to prepare it for use with the given number
 131      * of input channels, step size (window increment, in sample
 132      * frames) and block size (window size, in sample frames).
 133      *
 134      * The input sample rate should have been already specified at
 135      * construction time.
 136      *
 137      * Return true for successful initialisation, false if the number
 138      * of input channels, step size and/or block size cannot be
 139      * supported.
 140      */
 141     virtual bool initialise(size_t inputChannels,
 142                             size_t stepSize,
 143                             size_t blockSize) = 0;
 144
 145     /**
 146      * Reset the plugin after use, to prepare it for another clean
 147      * run.  Not called for the first initialisation (i.e. initialise
 148      * must also do a reset).
 149      */
 150     virtual void reset() = 0;
 151
 152     enum InputDomain { TimeDomain, FrequencyDomain };
 153
 154     /**
 155      * Get the plugin's required input domain.
 156      *
 157      * If this is TimeDomain, the samples provided to the process()
 158      * function (below) will be in the time domain, as for a
 159      * traditional audio processing plugin.
 160      *
 161      * If this is FrequencyDomain, the host will carry out a windowed
 162      * FFT of size equal to the negotiated block size on the data
 163      * before passing the frequency bin data in to process().  The
 164      * input data for the FFT will be rotated so as to place the
 165      * origin in the centre of the block.
 166      * The plugin does not get to choose the window type -- the host
 167      * will either let the user do so, or will use a Hanning window.
 168      */
 169     virtual InputDomain getInputDomain() const = 0;
 170
 171     /**
 172      * Get the preferred block size (window size -- the number of
 173      * sample frames passed in each block to the process() function).
 174      * This should be called before initialise().
 175      *
 176      * A plugin that can handle any block size may return 0.  The
 177      * final block size will be set in the initialise() call.
 178      */
 179     virtual size_t getPreferredBlockSize() const { return 0; }
 180
 181     /**
 182      * Get the preferred step size (window increment -- the distance
 183      * in sample frames between the start frames of consecutive blocks
 184      * passed to the process() function) for the plugin.  This should
 185      * be called before initialise().
 186      *
 187      * A plugin may return 0 if it has no particular interest in the
 188      * step size.  In this case, the host should make the step size
 189      * equal to the block size if the plugin is accepting input in the
 190      * time domain.  If the plugin is accepting input in the frequency
 191      * domain, the host may use any step size.  The final step size
 192      * will be set in the initialise() call.
 193      */
 194     virtual size_t getPreferredStepSize() const { return 0; }
 195
 196     /**
 197      * Get the minimum supported number of input channels.
 198      */
 199     virtual size_t getMinChannelCount() const { return 1; }
 200
 201     /**
 202      * Get the maximum supported number of input channels.
 203      */
 204     virtual size_t getMaxChannelCount() const { return 1; }
 205
 206     struct OutputDescriptor
 207     {
 208         /**
 209          * The name of the output, in computer-usable form.  Should be
 210          * reasonably short and without whitespace or punctuation, using
 211          * the characters [a-zA-Z0-9_-] only.
 212          * Example: "zero_crossing_count"
 213          */
 214         std::string identifier;
 215
 216         /**
 217          * The human-readable name of the output.
 218          * Example: "Zero Crossing Counts"
 219          */
 220         std::string name;
 221
 222         /**
 223          * A human-readable short text describing the output.  May be
 224          * empty if the name has said it all already.
 225          * Example: "The number of zero crossing points per processing block"
 226          */
 227         std::string description;
 228
 229         /**
 230          * The unit of the output, in human-readable form.
 231          */
 232         std::string unit;
 233
 234         /**
 235          * True if the output has the same number of values per sample
 236          * for every output sample.  Outputs for which this is false
 237          * are unlikely to be very useful in a general-purpose host.
 238          */
 239         bool hasFixedBinCount;
 240
 241         /**
 242          * The number of values per result of the output.  Undefined
 243          * if hasFixedBinCount is false.  If this is zero, the output
 244          * is point data (i.e. only the time of each output is of
 245          * interest, the value list will be empty).
 246          */
 247         size_t binCount;
 248
 249         /**
 250          * The (human-readable) names of each of the bins, if
 251          * appropriate.  This is always optional.
 252          */
 253         std::vector<std::string> binNames;
 254
 255         /**
 256          * True if the results in each output bin fall within a fixed
 257          * numeric range (minimum and maximum values).  Undefined if
 258          * binCount is zero.
 259          */
 260         bool hasKnownExtents;
 261
 262         /**
 263          * Minimum value of the results in the output.  Undefined if
 264          * hasKnownExtents is false or binCount is zero.
 265          */
 266         float minValue;
 267
 268         /**
 269          * Maximum value of the results in the output.  Undefined if
 270          * hasKnownExtents is false or binCount is zero.
 271          */
 272         float maxValue;
 273
 274         /**
 275          * True if the output values are quantized to a particular
 276          * resolution.  Undefined if binCount is zero.
 277          */
 278         bool isQuantized;
 279
 280         /**
 281          * Quantization resolution of the output values (e.g. 1.0 if
 282          * they are all integers).  Undefined if isQuantized is false
 283          * or binCount is zero.
 284          */
 285         float quantizeStep;
 286
 287         enum SampleType {
 288
 289             /// Results from each process() align with that call's block start
 290             OneSamplePerStep,
 291
 292             /// Results are evenly spaced in time (sampleRate specified below)
 293             FixedSampleRate,
 294
 295             /// Results are unevenly spaced and have individual timestamps
 296             VariableSampleRate
 297         };
 298
 299         /**
 300          * Positioning in time of the output results.
 301          */
 302         SampleType sampleType;
 303
 304         /**
 305          * Sample rate of the output results, as samples per second.
 306          * Undefined if sampleType is OneSamplePerStep.
 307          *
 308          * If sampleType is VariableSampleRate and this value is
 309          * non-zero, then it may be used to calculate a resolution for
 310          * the output (i.e. the "duration" of each sample, in time,
 311          * will be 1/sampleRate seconds).  It's recommended to set
 312          * this to zero if that behaviour is not desired.
 313          */
 314         float sampleRate;
 315
 316         /**
 317          * True if the returned results for this output are known to
 318          * have a duration field.
 319          */
 320         bool hasDuration;
 321
 322         OutputDescriptor() : // defaults for mandatory non-class-type members
 323             hasFixedBinCount(false), hasKnownExtents(false), isQuantized(false),
 324             sampleType(OneSamplePerStep), hasDuration(false) { }
 325     };
 326
 327     typedef std::vector<OutputDescriptor> OutputList;
 328
 329     /**
 330      * Get the outputs of this plugin.  An output's index in this list
 331      * is used as its numeric index when looking it up in the
 332      * FeatureSet returned from the process() call.
 333      */
 334     virtual OutputList getOutputDescriptors() const = 0;
 335
 336     struct Feature
 337     {
 338         /**
 339          * True if an output feature has its own timestamp.  This is
 340          * mandatory if the output has VariableSampleRate, optional if
 341          * the output has FixedSampleRate, and unused if the output
 342          * has OneSamplePerStep.
 343          */
 344         bool hasTimestamp;
 345
 346         /**
 347          * Timestamp of the output feature.  This is mandatory if the
 348          * output has VariableSampleRate or if the output has
 349          * FixedSampleRate and hasTimestamp is true, and unused
 350          * otherwise.
 351          */
 352         RealTime timestamp;
 353
 354         /**
 355          * True if an output feature has a specified duration.  This
 356          * is optional if the output has VariableSampleRate or
 357          * FixedSampleRate, and and unused if the output has
 358          * OneSamplePerStep.
 359          */
 360         bool hasDuration;
 361
 362         /**
 363          * Duration of the output feature.  This is mandatory if the
 364          * output has VariableSampleRate or FixedSampleRate and
 365          * hasDuration is true, and unused otherwise.
 366          */
 367         RealTime duration;
 368
 369         /**
 370          * Results for a single sample of this feature.  If the output
 371          * hasFixedBinCount, there must be the same number of values
 372          * as the output's binCount count.
 373          */
 374         std::vector<float> values;
 375
 376         /**
 377          * Label for the sample of this feature.
 378          */
 379         std::string label;
 380
 381         Feature() : // defaults for mandatory non-class-type members
 382             hasTimestamp(false), hasDuration(false) { }
 383     };
 384
 385     typedef std::vector<Feature> FeatureList;
 386
 387     typedef std::map<int, FeatureList> FeatureSet; // key is output no
 388
 389     /**
 390      * Process a single block of input data.
 391      *
 392      * If the plugin's inputDomain is TimeDomain, inputBuffers will
 393      * point to one array of floats per input channel, and each of
 394      * these arrays will contain blockSize consecutive audio samples
 395      * (the host will zero-pad as necessary).  The timestamp in this
 396      * case will be the real time in seconds of the start of the
 397      * supplied block of samples.
 398      *
 399      * If the plugin's inputDomain is FrequencyDomain, inputBuffers
 400      * will point to one array of floats per input channel, and each
 401      * of these arrays will contain blockSize/2+1 consecutive pairs of
 402      * real and imaginary component floats corresponding to bins
 403      * 0..(blockSize/2) of the FFT output.  That is, bin 0 (the first
 404      * pair of floats) contains the DC output, up to bin blockSize/2
 405      * which contains the Nyquist-frequency output.  There will
 406      * therefore be blockSize+2 floats per channel in total.  The
 407      * timestamp will be the real time in seconds of the centre of the
 408      * FFT input window (i.e. the very first block passed to process
 409      * might contain the FFT of half a block of zero samples and the
 410      * first half-block of the actual data, with a timestamp of zero).
 411      *
 412      * Return any features that have become available after this
 413      * process call.  (These do not necessarily have to fall within
 414      * the process block, except for OneSamplePerStep outputs.)
 415      */
 416     virtual FeatureSet process(const float *const *inputBuffers,
 417                                RealTime timestamp) = 0;
 418
 419     /**
 420      * After all blocks have been processed, calculate and return any
 421      * remaining features derived from the complete input.
 422      */
 423     virtual FeatureSet getRemainingFeatures() = 0;
 424
 425     /**
 426      * Used to distinguish between Vamp::Plugin and other potential
 427      * sibling subclasses of PluginBase.  Do not reimplement this
 428      * function in your subclass.
 429      */
 430     virtual std::string getType() const { return "Feature Extraction Plugin"; }
 431
 432 protected:
 433     Plugin(float inputSampleRate) :
 434         m_inputSampleRate(inputSampleRate) { }
 435
 436     float m_inputSampleRate;
 437 };
 438
 439 }
 440
 441 _VAMP_SDK_PLUGSPACE_END(Plugin.h)
 442
 443 #endif
 444
 445
 446