/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
return 2048;
}
-size_t
+size_t
LocalCandidatePYIN::getPreferredStepSize() const
{
return 256;
LocalCandidatePYIN::getParameterDescriptors() const
{
ParameterList list;
-
+
ParameterDescriptor d;
d.identifier = "threshdistr";
}
void
-LocalCandidatePYIN::setParameter(string identifier, float value)
+LocalCandidatePYIN::setParameter(string identifier, float value)
{
if (identifier == "threshdistr")
{
m_channels = channels;
m_stepSize = stepSize;
m_blockSize = blockSize;
-
+
reset();
return true;
void
LocalCandidatePYIN::reset()
-{
+{
m_pitchProb.clear();
m_timestamp.clear();
-/*
+/*
std::cerr << "LocalCandidatePYIN::reset"
<< ", blockSize = " << m_blockSize
<< std::endl;
{
int offset = m_preciseTime == 1.0 ? m_blockSize/2 : m_blockSize/4;
timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate));
-
+
double *dInputBuffers = new double[m_blockSize];
for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
-
+
size_t yinBufferSize = m_blockSize/2;
double* yinBuffer = new double[yinBufferSize];
if (!m_preciseTime) YinUtil::fastDifference(dInputBuffers, yinBuffer, yinBufferSize);
- else YinUtil::slowDifference(dInputBuffers, yinBuffer, yinBufferSize);
-
+ else YinUtil::slowDifference(dInputBuffers, yinBuffer, yinBufferSize);
+
delete [] dInputBuffers;
YinUtil::cumulativeDifference(yinBuffer, yinBufferSize);
-
+
float minFrequency = 60;
float maxFrequency = 900;
- vector<double> peakProbability = YinUtil::yinProb(yinBuffer,
- m_threshDistr,
- yinBufferSize,
- m_inputSampleRate/maxFrequency,
+ vector<double> peakProbability = YinUtil::yinProb(yinBuffer,
+ m_threshDistr,
+ yinBufferSize,
+ m_inputSampleRate/maxFrequency,
m_inputSampleRate/minFrequency);
vector<pair<double, double> > tempPitchProb;
{
if (peakProbability[iBuf] > 0)
{
- double currentF0 =
+ double currentF0 =
m_inputSampleRate * (1.0 /
YinUtil::parabolicInterpolation(yinBuffer, iBuf, yinBufferSize));
double tempPitch = 12 * std::log(currentF0/440)/std::log(2.) + 69;
vector<float> freqSum = vector<float>(m_nCandidate);
vector<float> freqNumber = vector<float>(m_nCandidate);
vector<float> freqMean = vector<float>(m_nCandidate);
-
+
boost::math::normal normalDist(0, 8); // semitones sd
float maxNormalDist = boost::math::pdf(normalDist, 0);
-
+
// Viterbi-decode multiple times with different frequencies emphasised
for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate)
{
float prob = 0;
for (size_t iProb = 0; iProb < m_pitchProb[iFrame].size(); ++iProb)
{
- pitch = m_pitchProb[iFrame][iProb].first;
- prob = m_pitchProb[iFrame][iProb].second *
+ pitch = m_pitchProb[iFrame][iProb].first;
+ prob = m_pitchProb[iFrame][iProb].second *
boost::math::pdf(normalDist, pitch-centrePitch) /
maxNormalDist * 2;
sumProb += prob;
for (size_t iCandidate = 0; iCandidate < m_nCandidate; ++iCandidate) {
for (size_t jCandidate = iCandidate+1; jCandidate < m_nCandidate; ++jCandidate) {
size_t countEqual = 0;
- for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
+ for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
{
if ((pitchTracks[jCandidate][iFrame] == 0 && pitchTracks[iCandidate][iFrame] == 0) ||
fabs(pitchTracks[iCandidate][iFrame]/pitchTracks[jCandidate][iFrame]-1)<0.01)
countEqual++;
}
- // std::cerr << "proportion equal: " << (countEqual * 1.0 / nFrame) << std::endl;
+ // std::cerr << "proportion equal: " << (countEqual * 1.0 / nFrame) << std::endl;
if (countEqual * 1.0 / nFrame > 0.8) {
if (freqNumber[iCandidate] > freqNumber[jCandidate]) {
duplicates.push_back(jCandidate);
{
bool isDuplicate = false;
for (size_t i = 0; i < duplicates.size(); ++i) {
-
+
if (duplicates[i] == iCandidate) {
isDuplicate = true;
break;
candidateLabels[iCandidate] = convert.str();
candidateActuals[iCandidate] = actualCandidateNumber;
// std::cerr << iCandidate << " " << actualCandidateNumber << " " << freqNumber[iCandidate] << " " << freqMean[iCandidate] << std::endl;
- for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
+ for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
{
if (pitchTracks[iCandidate][iFrame] > 0)
{
- // featureValues[m_timestamp[iFrame]][iCandidate] =
+ // featureValues[m_timestamp[iFrame]][iCandidate] =
// pitchTracks[iCandidate][iFrame];
outputFrequencies[iFrame].push_back(pitchTracks[iCandidate][iFrame]);
} else {
f.values = outputFrequencies[iFrame];
fs[0].push_back(f);
}
-
+
// I stopped using Chris's map stuff below because I couldn't get my head around it
//
// for (map<RealTime, map<int, float> >::const_iterator i =
// f.hasTimestamp = true;
// f.timestamp = i->first;
// int nextCandidate = candidateActuals.begin()->second;
- // for (map<int, float>::const_iterator j =
+ // for (map<int, float>::const_iterator j =
// i->second.begin(); j != i->second.end(); ++j) {
// while (candidateActuals[j->first] > nextCandidate) {
// f.values.push_back(0);
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
size_t m_blockSize;
float m_fmin;
float m_fmax;
-
+
mutable int m_oPitchTrackCandidates;
-
+
float m_threshDistr;
float m_outputUnvoiced;
float m_preciseTime;
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
{
obsProb.push_back(hmm.calculateObsProb(pitchProb[iFrame]));
}
-
+
vector<double> *scale = new vector<double>(pitchProb.size());
-
- vector<MonoNote::FrameOutput> out;
-
+
+ vector<MonoNote::FrameOutput> out;
+
vector<int> path = hmm.decodeViterbi(obsProb, scale);
-
+
for (size_t iFrame = 0; iFrame < path.size(); ++iFrame)
{
double currPitch = -1;
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
public:
MonoNote();
virtual ~MonoNote();
-
+
struct FrameOutput {
size_t frameNumber;
double pitch;
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
MonoNoteHMM::calculateObsProb(const vector<pair<double, double> > pitchProb)
{
// pitchProb is a list of pairs (pitches and their probabilities)
-
+
size_t nCandidate = pitchProb.size();
-
+
// what is the probability of pitched
double pIsPitched = 0;
for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate)
minDistCandidate = iCandidate;
}
}
- tempProb = std::pow(minDistProb, par.yinTrust) *
- boost::math::pdf(pitchDistr[i],
+ tempProb = std::pow(minDistProb, par.yinTrust) *
+ boost::math::pdf(pitchDistr[i],
pitchProb[minDistCandidate].first);
} else {
tempProb = 1;
out[i] = tempProb;
}
}
-
+
for (size_t i = 0; i < par.n; ++i)
{
if (i % par.nSPP != 2)
{
- if (tempProbSum > 0)
+ if (tempProbSum > 0)
{
out[i] = out[i] / tempProbSum * pIsPitched;
}
// 3-5. second-lowest pitch
// 3. attack state
// ...
-
+
// observation distributions
for (size_t iState = 0; iState < par.n; ++iState)
{
// silent state starts tracking
init.push_back(1.0/(par.nS * par.nPPS));
} else {
- init.push_back(0.0);
+ init.push_back(0.0);
}
}
pitchDistr[index+1] = boost::math::normal(mu, par.sigmaYinPitchStable);
pitchDistr[index+2] = boost::math::normal(mu, 1.0); // dummy
}
-
+
boost::math::normal noteDistanceDistr(0, par.sigma2Note);
for (size_t iPitch = 0; iPitch < (par.nS * par.nPPS); ++iPitch)
from.push_back(index+1);
to.push_back(index+1); // to itself
transProb.push_back(par.pStableSelftrans);
-
+
from.push_back(index+1);
to.push_back(index+2); // to silent
transProb.push_back(par.pStable2Silent);
from.push_back(index+2);
to.push_back(index+2);
transProb.push_back(par.pSilentSelftrans);
-
-
+
// the more complicated transitions from the silent
double probSumSilent = 0;
{
int fromPitch = iPitch;
int toPitch = jPitch;
- double semitoneDistance =
+ double semitoneDistance =
std::abs(fromPitch - toPitch) * 1.0 / par.nPPS;
-
+
// if (std::fmod(semitoneDistance, 1) == 0 && semitoneDistance > par.minSemitoneDistance)
- if (semitoneDistance == 0 ||
- (semitoneDistance > par.minSemitoneDistance
+ if (semitoneDistance == 0 ||
+ (semitoneDistance > par.minSemitoneDistance
&& semitoneDistance < par.maxJump))
{
size_t toIndex = jPitch * par.nSPP; // note attack index
- double tempWeightSilent = boost::math::pdf(noteDistanceDistr,
+ double tempWeightSilent = boost::math::pdf(noteDistanceDistr,
semitoneDistance);
probSumSilent += tempWeightSilent;
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
#include "MonoNoteParameters.h"
MonoNoteParameters::MonoNoteParameters() :
- minPitch(35),
- nPPS(3),
- nS(69),
+ minPitch(35),
+ nPPS(3),
+ nS(69),
nSPP(3), // states per pitch
n(0),
- initPi(0),
+ initPi(0),
pAttackSelftrans(0.9),
pStableSelftrans(0.99),
pStable2Silent(0.01),
- pSilentSelftrans(0.9999),
+ pSilentSelftrans(0.9999),
sigma2Note(0.7),
maxJump(13),
pInterSelftrans(0.0),
priorPitchedProb(.7),
priorWeight(0.5),
minSemitoneDistance(.5),
- sigmaYinPitchAttack(5),
+ sigmaYinPitchAttack(5),
sigmaYinPitchStable(0.8),
sigmaYinPitchInter(.1),
yinTrust(0.1)
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
public:
MonoNoteParameters();
virtual ~MonoNoteParameters();
-
+
// model architecture parameters
size_t minPitch; // lowest pitch in MIDI notes
size_t nPPS; // number of pitches per semitone
size_t nS; // number of semitones
size_t nSPP; // number of states per pitch
size_t n; // number of states (will be calcualted from other parameters)
-
+
// initial state probabilities
- vector<double> initPi;
-
+ vector<double> initPi;
+
// transition parameters
double pAttackSelftrans;
double pStableSelftrans;
double sigma2Note; // standard deviation of next note Gaussian distribution
double maxJump;
double pInterSelftrans;
-
+
double priorPitchedProb;
double priorWeight;
double minSemitoneDistance; // minimum distance for a transition
-
+
double sigmaYinPitchAttack;
double sigmaYinPitchStable;
double sigmaYinPitchInter;
-
+
double yinTrust;
-
};
#endif
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
{
obsProb.push_back(hmm.calculateObsProb(pitchProb[iFrame]));
}
-
+
vector<double> *scale = new vector<double>(0);
-
- vector<float> out;
-
+
+ vector<float> out;
+
// std::cerr << "before Viterbi decoding" << obsProb.size() << "ng" << obsProb[1].size() << std::endl;
vector<int> path = hmm.decodeViterbi(obsProb, scale);
// std::cerr << "after Viterbi decoding" << std::endl;
-
+
for (size_t iFrame = 0; iFrame < path.size(); ++iFrame)
{
// std::cerr << path[iFrame] << " " << hmm.m_freqs[path[iFrame]] << std::endl;
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
public:
MonoPitch();
virtual ~MonoPitch();
-
+
// pitchProb is a frame-wise vector carrying a vector of pitch-probability pairs
const vector<float> process(const vector<vector<pair<double, double> > > pitchProb);
private:
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
oldd = d;
}
}
-
+
double probReallyPitched = m_yinTrust * probYinPitched;
// std::cerr << probReallyPitched << " " << probYinPitched << std::endl;
// damn, I forget what this is all about...
{
// INITIAL VECTOR
init = vector<double>(2*m_nPitch, 1.0 / 2*m_nPitch);
-
+
// TRANSITIONS
for (size_t iPitch = 0; iPitch < m_nPitch; ++iPitch)
{
int theoreticalMinNextPitch = static_cast<int>(iPitch)-static_cast<int>(m_transitionWidth/2);
int minNextPitch = iPitch>m_transitionWidth/2 ? iPitch-m_transitionWidth/2 : 0;
int maxNextPitch = iPitch<m_nPitch-m_transitionWidth/2 ? iPitch+m_transitionWidth/2 : m_nPitch-1;
-
+
// WEIGHT VECTOR
double weightSum = 0;
vector<double> weights;
}
weightSum += weights[weights.size()-1];
}
-
+
// std::cerr << minNextPitch << " " << maxNextPitch << std::endl;
// TRANSITIONS TO CLOSE PITCH
for (size_t i = minNextPitch; i <= maxNextPitch; ++i)
to.push_back(i+m_nPitch);
transProb.push_back(weights[i-minNextPitch] / weightSum * m_selfTrans);
// transProb.push_back(weights[i-minNextPitch] / weightSum * 0.5);
-
+
from.push_back(iPitch+m_nPitch);
to.push_back(i);
transProb.push_back(weights[i-minNextPitch] / weightSum * (1-m_selfTrans));
// from.push_back(iPitch+m_nPitch);
// to.push_back(2*m_nPitch);
// transProb.push_back(1-m_selfTrans);
-
+
// TRANSITION FROM UNVOICED TO PITCH
// from.push_back(2*m_nPitch);
// to.push_back(iPitch+m_nPitch);
// from.push_back(2*m_nPitch);
// to.push_back(2*m_nPitch);
// transProb.push_back(m_selfTrans);
-
+
// for (size_t i = 0; i < from.size(); ++i) {
// std::cerr << "P(["<< from[i] << " --> " << to[i] << "]) = " << transProb[i] << std::endl;
// }
-
+
}
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
return 2048;
}
-size_t
+size_t
PYinVamp::getPreferredStepSize() const
{
return 256;
PYinVamp::getParameterDescriptors() const
{
ParameterList list;
-
+
ParameterDescriptor d;
d.identifier = "threshdistr";
}
void
-PYinVamp::setParameter(string identifier, float value)
+PYinVamp::setParameter(string identifier, float value)
{
if (identifier == "threshdistr")
{
OutputList outputs;
OutputDescriptor d;
-
+
int outputNumber = 0;
d.identifier = "f0candidates";
d.hasDuration = false;
outputs.push_back(d);
m_oF0Probs = outputNumber++;
-
+
d.identifier = "voicedprob";
d.name = "Voiced Probability";
d.description = "Probability that the signal is voiced according to Probabilistic Yin.";
d.hasDuration = false;
outputs.push_back(d);
m_oCandidateSalience = outputNumber++;
-
+
d.identifier = "smoothedpitchtrack";
d.name = "Smoothed Pitch Track";
d.description = ".";
m_channels = channels;
m_stepSize = stepSize;
m_blockSize = blockSize;
-
+
reset();
return true;
void
PYinVamp::reset()
-{
+{
m_yin.setThresholdDistr(m_threshDistr);
m_yin.setFrameSize(m_blockSize);
m_yin.setFast(!m_preciseTime);
-
+
m_pitchProb.clear();
m_timestamp.clear();
m_level.clear();
-/*
+/*
std::cerr << "PYinVamp::reset"
<< ", blockSize = " << m_blockSize
<< std::endl;
timestamp = timestamp + Vamp::RealTime::frame2RealTime(offset, lrintf(m_inputSampleRate));
FeatureSet fs;
-
+
float rms = 0;
-
+
double *dInputBuffers = new double[m_blockSize];
for (size_t i = 0; i < m_blockSize; ++i) {
dInputBuffers[i] = inputBuffers[0][i];
}
rms /= m_blockSize;
rms = sqrt(rms);
-
+
bool isLowAmplitude = (rms < m_lowAmp);
-
+
Yin::YinOutput yo = m_yin.processProbabilisticYin(dInputBuffers);
delete [] dInputBuffers;
m_level.push_back(yo.rms);
- // First, get the things out of the way that we don't want to output
+ // First, get the things out of the way that we don't want to output
// immediately, but instead save for later.
vector<pair<double, double> > tempPitchProb;
for (size_t iCandidate = 0; iCandidate < yo.freqProb.size(); ++iCandidate)
f.values.push_back(yo.freqProb[i].first);
}
fs[m_oF0Candidates].push_back(f);
-
+
// VOICEDPROB
f.values.clear();
float voicedProb = 0;
voicedProb += yo.freqProb[i].second;
}
fs[m_oF0Probs].push_back(f);
-
+
f.values.push_back(voicedProb);
fs[m_oVoicedProb].push_back(f);
Feature f;
f.hasTimestamp = true;
f.hasDuration = false;
-
+
if (m_pitchProb.empty()) {
return fs;
}
} else {
f.values.push_back(mpOut[iFrame]);
}
-
+
fs[m_oSmoothedPitchTrack].push_back(f);
}
-
+
// MONO-NOTE STUFF
// std::cerr << "Mono Note Stuff" << std::endl;
MonoNote mn;
}
// vector<MonoNote::FrameOutput> mnOut = mn.process(m_pitchProb);
vector<MonoNote::FrameOutput> mnOut = mn.process(smoothedPitch);
-
+
// turning feature into a note feature
f.hasTimestamp = true;
f.hasDuration = true;
f.values.clear();
-
+
int onsetFrame = 0;
bool isVoiced = 0;
bool oldIsVoiced = 0;
size_t nFrame = m_pitchProb.size();
float minNoteFrames = (m_inputSampleRate*m_pruneThresh) / m_stepSize;
-
+
std::vector<float> notePitchTrack; // collects pitches for one note at a time
for (size_t iFrame = 0; iFrame < nFrame; ++iFrame)
{
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
float m_fmin;
float m_fmax;
Yin m_yin;
-
+
mutable int m_oF0Candidates;
mutable int m_oF0Probs;
mutable int m_oVoicedProb;
--- /dev/null
+https://code.soundsoftware.ac.uk/projects/pyin
+https://code.soundsoftware.ac.uk/attachments/download/1458/pyin-v1.1.tar.gz
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
return(vector<double>());
}
-const std::vector<int>
+const std::vector<int>
SparseHMM::decodeViterbi(std::vector<vector<double> > obsProb,
- vector<double> *scale)
+ vector<double> *scale)
{
if (obsProb.size() < 1) {
return vector<int>();
size_t nState = init.size();
size_t nFrame = obsProb.size();
-
- // check for consistency
+
+ // check for consistency
size_t nTrans = transProb.size();
-
+
// declaring variables
std::vector<double> delta = std::vector<double>(nState);
std::vector<double> oldDelta = std::vector<double>(nState);
size_t toState;
double currentTransProb;
double currentValue;
-
+
// this is the "sparse" loop
for (size_t iTrans = 0; iTrans < nTrans; ++iTrans)
{
fromState = from[iTrans];
toState = to[iTrans];
currentTransProb = transProb[iTrans];
-
+
currentValue = oldDelta[fromState] * currentTransProb;
if (currentValue > delta[toState])
{
delta[toState] = currentValue; // will be multiplied by the right obs later!
psi[iFrame][toState] = fromState;
- }
+ }
}
-
+
for (size_t jState = 0; jState < nState; ++jState)
{
delta[jState] *= obsProb[iFrame][jState];
double currentValue = oldDelta[iState];
if (currentValue > bestValue)
{
- bestValue = currentValue;
+ bestValue = currentValue;
path[nFrame-1] = iState;
}
}
{
path[iFrame] = psi[iFrame+1][path[iFrame+1]];
}
-
+
// for (size_t iState = 0; iState < nState; ++iState)
// {
// // std::cerr << psi[2][iState] << std::endl;
// }
-
+
return path;
}
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
{
public:
virtual const std::vector<double> calculateObsProb(const vector<pair<double, double> >);
- const std::vector<int> decodeViterbi(std::vector<vector<double> > obs,
+ const std::vector<int> decodeViterbi(std::vector<vector<double> > obs,
vector<double> *scale);
vector<double> init;
vector<size_t> from;
/*\r
pYIN - A fundamental frequency estimator for monophonic audio\r
Centre for Digital Music, Queen Mary, University of London.\r
- \r
+\r
This program is free software; you can redistribute it and/or\r
modify it under the terms of the GNU General Public License as\r
published by the Free Software Foundation; either version 2 of the\r
\r
using std::vector;\r
\r
-Yin::Yin(size_t frameSize, size_t inputSampleRate, double thresh, bool fast) : \r
+Yin::Yin(size_t frameSize, size_t inputSampleRate, double thresh, bool fast) :\r
m_frameSize(frameSize),\r
m_inputSampleRate(inputSampleRate),\r
m_thresh(thresh),\r
}\r
}\r
\r
-Yin::~Yin() \r
+Yin::~Yin()\r
{\r
}\r
\r
Yin::YinOutput\r
Yin::process(const double *in) const {\r
- \r
+\r
double* yinBuffer = new double[m_yinBufferSize];\r
\r
// calculate aperiodicity function for all periods\r
\r
int tau = 0;\r
tau = YinUtil::absoluteThreshold(yinBuffer, m_yinBufferSize, m_thresh);\r
- \r
+\r
double interpolatedTau;\r
double aperiodicity;\r
double f0;\r
- \r
+\r
if (tau!=0)\r
{\r
interpolatedTau = YinUtil::parabolicInterpolation(yinBuffer, abs(tau), m_yinBufferSize);\r
{\r
yo.salience.push_back(yinBuffer[iBuf] < 1 ? 1-yinBuffer[iBuf] : 0); // why are the values sometimes < 0 if I don't check?\r
}\r
- \r
+\r
delete [] yinBuffer;\r
return yo;\r
}\r
\r
Yin::YinOutput\r
Yin::processProbabilisticYin(const double *in) const {\r
- \r
+\r
double* yinBuffer = new double[m_yinBufferSize];\r
\r
// calculate aperiodicity function for all periods\r
YinUtil::cumulativeDifference(yinBuffer, m_yinBufferSize);\r
\r
vector<double> peakProbability = YinUtil::yinProb(yinBuffer, m_threshDistr, m_yinBufferSize);\r
- \r
+\r
// calculate overall "probability" from peak probability\r
double probSum = 0;\r
for (size_t iBin = 0; iBin < m_yinBufferSize; ++iBin)\r
yo.salience.push_back(peakProbability[iBuf]);\r
if (peakProbability[iBuf] > 0)\r
{\r
- double currentF0 = \r
+ double currentF0 =\r
m_inputSampleRate * (1.0 /\r
YinUtil::parabolicInterpolation(yinBuffer, iBuf, m_yinBufferSize));\r
yo.freqProb.push_back(pair<double, double>(currentF0, peakProbability[iBuf]));\r
}\r
}\r
- \r
+\r
// std::cerr << yo.freqProb.size() << std::endl;\r
- \r
+\r
delete [] yinBuffer;\r
return yo;\r
}\r
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
using std::pair;
-
class Yin
{
public:
double rms;
vector<double> salience;
vector<pair<double, double> > freqProb;
- YinOutput() : f0(0), periodicity(0), rms(0),
+ YinOutput() : f0(0), periodicity(0), rms(0),
salience(vector<double>(0)), freqProb(vector<pair<double, double> >(0)) { }
YinOutput(double _f, double _p, double _r) :
- f0(_f), periodicity(_p), rms(_r),
+ f0(_f), periodicity(_p), rms(_r),
salience(vector<double>(0)), freqProb(vector<pair<double, double> >(0)) { }
YinOutput(double _f, double _p, double _r, vector<double> _salience) :
- f0(_f), periodicity(_p), rms(_r), salience(_salience),
+ f0(_f), periodicity(_p), rms(_r), salience(_salience),
freqProb(vector<pair<double, double> >(0)) { }
};
-
+
int setThreshold(double parameter);
int setThresholdDistr(float parameter);
int setFrameSize(size_t frameSize);
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
#include <boost/math/distributions.hpp>
-void
-YinUtil::slowDifference(const double *in, double *yinBuffer, const size_t yinBufferSize)
+void
+YinUtil::slowDifference(const double *in, double *yinBuffer, const size_t yinBufferSize)
{
yinBuffer[0] = 0;
double delta ;
delta = in[i+j] - in[j];
yinBuffer[i] += delta * delta;
}
- }
+ }
}
-void
-YinUtil::fastDifference(const double *in, double *yinBuffer, const size_t yinBufferSize)
+void
+YinUtil::fastDifference(const double *in, double *yinBuffer, const size_t yinBufferSize)
{
-
+
// DECLARE AND INITIALISE
// initialisation of most of the arrays here was done in a separate function,
// with all the arrays as members of the class... moved them back here.
-
+
size_t frameSize = 2 * yinBufferSize;
-
+
double *audioTransformedReal = new double[frameSize];
double *audioTransformedImag = new double[frameSize];
double *nullImag = new double[frameSize];
double *yinStyleACFReal = new double[frameSize];
double *yinStyleACFImag = new double[frameSize];
double *powerTerms = new double[yinBufferSize];
-
+
for (size_t j = 0; j < yinBufferSize; ++j)
{
yinBuffer[j] = 0.; // set to zero
powerTerms[j] = 0.; // set to zero
}
-
+
for (size_t j = 0; j < frameSize; ++j)
{
nullImag[j] = 0.;
yinStyleACFReal[j] = 0.;
yinStyleACFImag[j] = 0.;
}
-
+
// POWER TERM CALCULATION
// ... for the power terms in equation (7) in the Yin paper
powerTerms[0] = 0.0;
// now iteratively calculate all others (saves a few multiplications)
for (size_t tau = 1; tau < yinBufferSize; ++tau) {
- powerTerms[tau] = powerTerms[tau-1] - in[tau-1] * in[tau-1] + in[tau+yinBufferSize] * in[tau+yinBufferSize];
+ powerTerms[tau] = powerTerms[tau-1] - in[tau-1] * in[tau-1] + in[tau+yinBufferSize] * in[tau+yinBufferSize];
}
// YIN-STYLE AUTOCORRELATION via FFT
// 1. data
Vamp::FFT::forward(frameSize, in, nullImag, audioTransformedReal, audioTransformedImag);
-
+
// 2. half of the data, disguised as a convolution kernel
for (size_t j = 0; j < yinBufferSize; ++j) {
kernel[j] = in[yinBufferSize-1-j];
yinStyleACFImag[j] = audioTransformedReal[j]*kernelTransformedImag[j] + audioTransformedImag[j]*kernelTransformedReal[j]; // imaginary
}
Vamp::FFT::inverse(frameSize, yinStyleACFReal, yinStyleACFImag, audioTransformedReal, audioTransformedImag);
-
+
// CALCULATION OF difference function
// ... according to (7) in the Yin paper.
for (size_t j = 0; j < yinBufferSize; ++j) {
delete [] powerTerms;
}
-void
+void
YinUtil::cumulativeDifference(double *yinBuffer, const size_t yinBufferSize)
-{
+{
size_t tau;
-
+
yinBuffer[0] = 1;
-
+
double runningSum = 0;
-
+
for (tau = 1; tau < yinBufferSize; ++tau) {
runningSum += yinBuffer[tau];
if (runningSum == 0)
} else {
yinBuffer[tau] *= tau / runningSum;
}
- }
+ }
}
-int
+int
YinUtil::absoluteThreshold(const double *yinBuffer, const size_t yinBufferSize, const double thresh)
{
size_t tau;
size_t minTau = 0;
double minVal = 1000.;
-
+
// using Joren Six's "loop construct" from TarsosDSP
tau = 2;
while (tau < yinBufferSize)
static float single20[100] = {0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000};
std::vector<double>
-YinUtil::yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize, const size_t minTau0, const size_t maxTau0)
+YinUtil::yinProb(const double *yinBuffer, const size_t prior, const size_t yinBufferSize, const size_t minTau0, const size_t maxTau0)
{
size_t minTau = 2;
size_t maxTau = yinBufferSize;
std::vector<float> thresholds;
std::vector<float> distribution;
std::vector<double> peakProb = std::vector<double>(yinBufferSize);
-
+
size_t nThreshold = 100;
int nThresholdInt = nThreshold;
-
+
for (int i = 0; i < nThresholdInt; ++i)
{
switch (prior) {
}
thresholds.push_back(0.01 + i*0.01);
}
-
-
+
int currThreshInd = nThreshold-1;
tau = minTau;
-
+
// double factor = 1.0 / (0.25 * (nThresholdInt+1) * (nThresholdInt + 1)); // factor to scale down triangular weight
size_t minInd = 0;
float minVal = 42.f;
// {
// nonPeakProb -= peakProb[i];
// }
- //
+ //
// std::cerr << tau << " " << currThreshInd << " "<< thresholds[currThreshInd] << " " << distribution[currThreshInd] << std::endl;
float sumProb = 0;
while (tau+1 < maxTau)
tau++;
}
}
-
+
if (peakProb[minInd] > 1) {
std::cerr << "WARNING: yin has prob > 1 ??? I'm returning all zeros instead." << std::endl;
return(std::vector<double>(yinBufferSize));
}
-
+
double nonPeakProb = 1;
if (sumProb > 0) {
for (size_t i = minTau; i < maxTau; ++i)
}
if (minInd > 0)
{
- // std::cerr << "min set " << minVal << " " << minInd << " " << nonPeakProb << std::endl;
+ // std::cerr << "min set " << minVal << " " << minInd << " " << nonPeakProb << std::endl;
peakProb[minInd] += nonPeakProb * minWeight;
}
-
+
return peakProb;
}
double
-YinUtil::parabolicInterpolation(const double *yinBuffer, const size_t tau, const size_t yinBufferSize)
+YinUtil::parabolicInterpolation(const double *yinBuffer, const size_t tau, const size_t yinBufferSize)
{
// this is taken almost literally from Joren Six's Java implementation
if (tau == yinBufferSize) // not valid anyway.
{
return static_cast<double>(tau);
}
-
+
double betterTau = 0.0;
if (tau > 0 && tau < yinBufferSize-1) {
float s0, s1, s2;
s0 = yinBuffer[tau-1];
s1 = yinBuffer[tau];
s2 = yinBuffer[tau+1];
-
+
double adjustment = (s2 - s0) / (2 * (2 * s1 - s2 - s0));
-
+
if (abs(adjustment)>1) adjustment = 0;
-
+
betterTau = tau + adjustment;
} else {
// std::cerr << "WARNING: can't do interpolation at the edge (tau = " << tau << "), will return un-interpolated value.\n";
return betterTau;
}
-double
+double
YinUtil::sumSquare(const double *in, const size_t start, const size_t end)
{
double out = 0;
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
return 2048;
}
-size_t
+size_t
YinVamp::getPreferredStepSize() const
{
return 256;
YinVamp::getParameterDescriptors() const
{
ParameterList list;
-
+
ParameterDescriptor d;
d.identifier = "yinThreshold";
d.name = "Yin threshold";
d.defaultValue = 0.15f;
d.isQuantized = true;
d.quantizeStep = 0.025f;
-
+
list.push_back(d);
d.identifier = "outputunvoiced";
}
void
-YinVamp::setParameter(string identifier, float value)
+YinVamp::setParameter(string identifier, float value)
{
if (identifier == "yinThreshold")
{
OutputList outputs;
OutputDescriptor d;
-
+
int outputNumber = 0;
d.identifier = "f0";
m_channels = channels;
m_stepSize = stepSize;
m_blockSize = blockSize;
-
+
reset();
return true;
void
YinVamp::reset()
-{
+{
m_yin.setThreshold(m_yinParameter);
m_yin.setFrameSize(m_blockSize);
-/*
+/*
std::cerr << "YinVamp::reset: yin threshold set to " << (m_yinParameter)
<< ", blockSize = " << m_blockSize
<< std::endl;
{
timestamp = timestamp + Vamp::RealTime::frame2RealTime(m_blockSize/2, lrintf(m_inputSampleRate));
FeatureSet fs;
-
+
double *dInputBuffers = new double[m_blockSize];
for (size_t i = 0; i < m_blockSize; ++i) dInputBuffers[i] = inputBuffers[0][i];
-
+
Yin::YinOutput yo = m_yin.process(dInputBuffers);
// std::cerr << "f0 in YinVamp: " << yo.f0 << std::endl;
Feature f;
f.values.clear();
f.values.push_back(yo.rms);
fs[m_outNoRms].push_back(f);
-
+
f.values.clear();
for (size_t iBin = 0; iBin < yo.salience.size(); ++iBin)
{
f.values.push_back(yo.salience[iBin]);
}
fs[m_outNoSalience].push_back(f);
-
+
f.values.clear();
// f.values[0] = yo.periodicity;
f.values.push_back(yo.periodicity);
fs[m_outNoPeriodicity].push_back(f);
-
+
delete [] dInputBuffers;
return fs;
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
float m_fmin;
float m_fmax;
Yin m_yin;
-
+
mutable int m_outNoF0;
mutable int m_outNoPeriodicity;
mutable int m_outNoRms;
/*
pYIN - A fundamental frequency estimator for monophonic audio
Centre for Digital Music, Queen Mary, University of London.
-
+
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the