use a note tracker to resolve notes cut off during render by the end of the region
[ardour.git] / libs / vamp-pyin / MonoNoteHMM.cpp
1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*-  vi:set ts=8 sts=4 sw=4: */
2
3 /*
4     pYIN - A fundamental frequency estimator for monophonic audio
5     Centre for Digital Music, Queen Mary, University of London.
6
7     This program is free software; you can redistribute it and/or
8     modify it under the terms of the GNU General Public License as
9     published by the Free Software Foundation; either version 2 of the
10     License, or (at your option) any later version.  See the file
11     COPYING included with this distribution for more information.
12 */
13
14 #include "MonoNoteHMM.h"
15
16 #include <boost/math/distributions.hpp>
17
18 #include <cstdio>
19 #include <cmath>
20
21 using std::vector;
22 using std::pair;
23
24 MonoNoteHMM::MonoNoteHMM() :
25     par()
26 {
27     build();
28 }
29
30 const vector<double>
31 MonoNoteHMM::calculateObsProb(const vector<pair<double, double> > pitchProb)
32 {
33     // pitchProb is a list of pairs (pitches and their probabilities)
34
35     size_t nCandidate = pitchProb.size();
36
37     // what is the probability of pitched
38     double pIsPitched = 0;
39     for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate)
40     {
41         // pIsPitched = pitchProb[iCandidate].second > pIsPitched ? pitchProb[iCandidate].second : pIsPitched;
42         pIsPitched += pitchProb[iCandidate].second;
43     }
44
45     // pIsPitched = std::pow(pIsPitched, (1-par.priorWeight)) * std::pow(par.priorPitchedProb, par.priorWeight);
46     pIsPitched = pIsPitched * (1-par.priorWeight) + par.priorPitchedProb * par.priorWeight;
47
48     vector<double> out = vector<double>(par.n);
49     double tempProbSum = 0;
50     for (size_t i = 0; i < par.n; ++i)
51     {
52         if (i % par.nSPP != 2)
53         {
54             // std::cerr << getMidiPitch(i) << std::endl;
55             double tempProb = 0;
56             if (nCandidate > 0)
57             {
58                 double minDist = 10000.0;
59                 double minDistProb = 0;
60                 size_t minDistCandidate = 0;
61                 for (size_t iCandidate = 0; iCandidate < nCandidate; ++iCandidate)
62                 {
63                     double currDist = std::abs(getMidiPitch(i)-pitchProb[iCandidate].first);
64                     if (currDist < minDist)
65                     {
66                         minDist = currDist;
67                         minDistProb = pitchProb[iCandidate].second;
68                         minDistCandidate = iCandidate;
69                     }
70                 }
71                 tempProb = std::pow(minDistProb, par.yinTrust) *
72                            boost::math::pdf(pitchDistr[i],
73                                             pitchProb[minDistCandidate].first);
74             } else {
75                 tempProb = 1;
76             }
77             tempProbSum += tempProb;
78             out[i] = tempProb;
79         }
80     }
81
82     for (size_t i = 0; i < par.n; ++i)
83     {
84         if (i % par.nSPP != 2)
85         {
86             if (tempProbSum > 0)
87             {
88                 out[i] = out[i] / tempProbSum * pIsPitched;
89             }
90         } else {
91             out[i] = (1-pIsPitched) / (par.nPPS * par.nS);
92         }
93     }
94
95     return(out);
96 }
97
98 void
99 MonoNoteHMM::build()
100 {
101     // the states are organised as follows:
102     // 0-2. lowest pitch
103     //    0. attack state
104     //    1. stable state
105     //    2. silent state
106     // 3-5. second-lowest pitch
107     //    3. attack state
108     //    ...
109
110     // observation distributions
111     for (size_t iState = 0; iState < par.n; ++iState)
112     {
113         pitchDistr.push_back(boost::math::normal(0,1));
114         if (iState % par.nSPP == 2)
115         {
116             // silent state starts tracking
117             init.push_back(1.0/(par.nS * par.nPPS));
118         } else {
119             init.push_back(0.0);
120         }
121     }
122
123     for (size_t iPitch = 0; iPitch < (par.nS * par.nPPS); ++iPitch)
124     {
125         size_t index = iPitch * par.nSPP;
126         double mu = par.minPitch + iPitch * 1.0/par.nPPS;
127         pitchDistr[index] = boost::math::normal(mu, par.sigmaYinPitchAttack);
128         pitchDistr[index+1] = boost::math::normal(mu, par.sigmaYinPitchStable);
129         pitchDistr[index+2] = boost::math::normal(mu, 1.0); // dummy
130     }
131
132     boost::math::normal noteDistanceDistr(0, par.sigma2Note);
133
134     for (size_t iPitch = 0; iPitch < (par.nS * par.nPPS); ++iPitch)
135     {
136         // loop through all notes and set sparse transition probabilities
137         size_t index = iPitch * par.nSPP;
138
139         // transitions from attack state
140         from.push_back(index);
141         to.push_back(index);
142         transProb.push_back(par.pAttackSelftrans);
143
144         from.push_back(index);
145         to.push_back(index+1);
146         transProb.push_back(1-par.pAttackSelftrans);
147
148         // transitions from stable state
149         from.push_back(index+1);
150         to.push_back(index+1); // to itself
151         transProb.push_back(par.pStableSelftrans);
152
153         from.push_back(index+1);
154         to.push_back(index+2); // to silent
155         transProb.push_back(par.pStable2Silent);
156
157         // the "easy" transitions from silent state
158         from.push_back(index+2);
159         to.push_back(index+2);
160         transProb.push_back(par.pSilentSelftrans);
161
162         // the more complicated transitions from the silent
163         double probSumSilent = 0;
164
165         vector<double> tempTransProbSilent;
166         for (size_t jPitch = 0; jPitch < (par.nS * par.nPPS); ++jPitch)
167         {
168             int fromPitch = iPitch;
169             int toPitch = jPitch;
170             double semitoneDistance =
171                 std::abs(fromPitch - toPitch) * 1.0 / par.nPPS;
172
173             // if (std::fmod(semitoneDistance, 1) == 0 && semitoneDistance > par.minSemitoneDistance)
174             if (semitoneDistance == 0 ||
175                 (semitoneDistance > par.minSemitoneDistance
176                  && semitoneDistance < par.maxJump))
177             {
178                 size_t toIndex = jPitch * par.nSPP; // note attack index
179
180                 double tempWeightSilent = boost::math::pdf(noteDistanceDistr,
181                                                            semitoneDistance);
182                 probSumSilent += tempWeightSilent;
183
184                 tempTransProbSilent.push_back(tempWeightSilent);
185
186                 from.push_back(index+2);
187                 to.push_back(toIndex);
188             }
189         }
190         for (size_t i = 0; i < tempTransProbSilent.size(); ++i)
191         {
192             transProb.push_back((1-par.pSilentSelftrans) * tempTransProbSilent[i]/probSumSilent);
193         }
194     }
195 }
196
197 double
198 MonoNoteHMM::getMidiPitch(size_t index)
199 {
200     return pitchDistr[index].mean();
201 }
202
203 double
204 MonoNoteHMM::getFrequency(size_t index)
205 {
206     return 440 * pow(2, (pitchDistr[index].mean()-69)/12);
207 }