globally change all use of "frame" to refer to audio into "sample".
[ardour.git] / libs / audiographer / audiographer / general / silence_trimmer.h
1 #ifndef AUDIOGRAPHER_SILENCE_TRIMMER_H
2 #define AUDIOGRAPHER_SILENCE_TRIMMER_H
3
4 #include "audiographer/visibility.h"
5 #include "audiographer/debug_utils.h"
6 #include "audiographer/flag_debuggable.h"
7 #include "audiographer/sink.h"
8 #include "audiographer/exception.h"
9 #include "audiographer/utils/listed_source.h"
10
11 #include <cstring>
12
13 namespace AudioGrapher {
14
15 template<typename T> struct SilenceTester;
16
17 // this needs to be implemented for every datatype T
18 // currently Ardour always uses Sample aka float
19 template <>
20 struct SilenceTester<float> {
21         public:
22         SilenceTester (const float dB) {
23                 threshold = dB > -318.8f ? pow (10.0f, dB * 0.05f) : 0.0f;
24         }
25         bool is_silent (const float d) {
26                 return fabsf (d) <= threshold;
27         }
28         private:
29         float threshold;
30 };
31
32
33 /// Removes and adds silent samples to beginning and/or end of stream
34 template<typename T = DefaultSampleType>
35 class /*LIBAUDIOGRAPHER_API*/ SilenceTrimmer
36   : public ListedSource<T>
37   , public Sink<T>
38   , public FlagDebuggable<>
39   , public Throwing<>
40 {
41   public:
42
43         /// Constructor, \see reset() \n Not RT safe
44         SilenceTrimmer(samplecnt_t silence_buffer_size_ = 1024, float thresh_dB = -INFINITY)
45           : silence_buffer_size (0)
46           , silence_buffer (0)
47           , tester (thresh_dB)
48         {
49                 reset (silence_buffer_size_);
50                 add_supported_flag (ProcessContext<T>::EndOfInput);
51         }
52
53         ~SilenceTrimmer()
54         {
55                 delete [] silence_buffer;
56         }
57
58         /** Reset state \n Not RT safe
59           * Allocates a buffer the size of \a silence_buffer_size_
60           * This also defines the maximum length of output process context
61           * which can be output during long intermediate silence.
62           */
63         void reset (samplecnt_t silence_buffer_size_ = 1024)
64         {
65                 if (throw_level (ThrowObject) && silence_buffer_size_ == 0) {
66                         throw Exception (*this,
67                           "Silence trimmer constructor and reset() must be called with a non-zero parameter!");
68                 }
69
70                 if (silence_buffer_size != silence_buffer_size_) {
71                         silence_buffer_size = silence_buffer_size_;
72                         delete [] silence_buffer;
73                         silence_buffer = new T[silence_buffer_size];
74                         TypeUtils<T>::zero_fill (silence_buffer, silence_buffer_size);
75                 }
76
77                 processed_data = false;
78                 processing_finished = false;
79                 trim_beginning = false;
80                 trim_end = false;
81                 silence_samples = 0;
82                 max_output_frames = 0;
83                 add_to_beginning = 0;
84                 add_to_end = 0;
85         }
86
87         /** Tells that \a samples_per_channel samples of silence per channel should be added to beginning
88           * Needs to be called before starting processing.
89           * \n RT safe
90           */
91         void add_silence_to_beginning (samplecnt_t samples_per_channel)
92         {
93                 if (throw_level (ThrowObject) && processed_data) {
94                         throw Exception(*this, "Tried to add silence to beginning after processing started");
95                 }
96                 add_to_beginning = samples_per_channel;
97         }
98
99         /** Tells that \a samples_per_channel samples of silence per channel should be added to end
100           * Needs to be called before end is reached.
101           * \n RT safe
102           */
103         void add_silence_to_end (samplecnt_t samples_per_channel)
104         {
105                 if (throw_level (ThrowObject) && processed_data) {
106                         throw Exception(*this, "Tried to add silence to end after processing started");
107                 }
108                 add_to_end = samples_per_channel;
109         }
110
111         /** Tells whether ot nor silence should be trimmed from the beginning
112           * Has to be called before starting processing.
113           * \n RT safe
114           */
115         void set_trim_beginning (bool yn)
116         {
117                 if (throw_level (ThrowObject) && processed_data) {
118                         throw Exception(*this, "Tried to set beginning trim after processing started");
119                 }
120                 trim_beginning = yn;
121         }
122
123         /** Tells whether ot nor silence should be trimmed from the end
124           * Has to be called before the is reached.
125           * \n RT safe
126           */
127         void set_trim_end (bool yn)
128         {
129                 if (throw_level (ThrowObject) && processed_data) {
130                         throw Exception(*this, "Tried to set end trim after processing started");
131                 }
132                 trim_end = yn;
133         }
134
135         /** Process stream according to current settings.
136           * Note that some calls will not produce any output,
137           * while others may produce many. \see reset()
138           * \n RT safe
139           */
140         void process (ProcessContext<T> const & c)
141         {
142                 if (debug_level (DebugVerbose)) {
143                         debug_stream () << DebugUtils::demangled_name (*this) <<
144                                 "::process()" << std::endl;
145                 }
146
147                 check_flags (*this, c);
148
149                 if (throw_level (ThrowStrict) && processing_finished) {
150                         throw Exception(*this, "process() after reaching end of input");
151                 }
152
153                 // delay end of input propagation until output/processing is complete
154                 processing_finished = c.has_flag (ProcessContext<T>::EndOfInput);
155                 c.remove_flag (ProcessContext<T>::EndOfInput);
156
157                 /* TODO this needs a general overhaul.
158                  *
159                  * - decouple "required silence duration" from buffer-size.
160                  * - add hold-times for in/out
161                  * - optional high pass filter (for DC offset)
162                  * -> allocate a buffer "hold time" worth of samples.
163                  * check if all samples in buffer are above/below threshold,
164                  *
165                  * https://github.com/x42/silan/blob/master/src/main.c#L130
166                  * may lend itself for some inspiration.
167                  */
168
169                 samplecnt_t output_start_index = 0;
170                 samplecnt_t output_sample_count = c.samples();
171
172                 if (!processed_data) {
173                         if (trim_beginning) {
174                                 samplecnt_t first_non_silent_sample_index = 0;
175                                 if (find_first_non_silent_sample (c, first_non_silent_sample_index)) {
176                                         // output from start of non-silent data until end of buffer
177                                         // output_sample_count may also be altered in trim end
178                                         output_start_index = first_non_silent_sample_index;
179                                         output_sample_count = c.samples() - first_non_silent_sample_index;
180                                         processed_data = true;
181                                 } else {
182                                         // keep entering this block until non-silence is found to trim
183                                         processed_data = false;
184                                 }
185                         } else {
186                                 processed_data = true;
187                         }
188
189                         // This block won't be called again so add silence to beginning
190                         if (processed_data && add_to_beginning) {
191                                 add_to_beginning *= c.channels ();
192                                 output_silence_samples (c, add_to_beginning);
193                         }
194                 }
195
196                 if (processed_data) {
197                         if (trim_end) {
198                                 samplecnt_t first_non_silent_sample_index = 0;
199                                 if (find_first_non_silent_sample (c, first_non_silent_sample_index)) {
200                                         // context buffer contains non-silent data, flush any intermediate silence
201                                         output_silence_samples (c, silence_samples);
202
203                                         samplecnt_t silent_sample_index = 0;
204                                         find_last_silent_sample_reverse (c, silent_sample_index);
205
206                                         // Count of samples at end of block that are "silent", may be zero.
207                                         samplecnt_t silent_end_samples = c.samples () - silent_sample_index;
208                                         samplecnt_t samples_before_silence = c.samples() - silent_end_samples;
209
210                                         assert (samples_before_silence + silent_end_samples == c.samples ());
211
212                                         // output_start_index may be non-zero if start trim occurred above
213                                         output_sample_count = samples_before_silence - output_start_index;
214
215                                         // keep track of any silent samples not output
216                                         silence_samples = silent_end_samples;
217
218                                 } else {
219                                         // whole context buffer is silent output nothing
220                                         silence_samples += c.samples ();
221                                         output_sample_count = 0;
222                                 }
223                         }
224
225                         // now output data if any
226                         ConstProcessContext<T> c_out (c, &c.data()[output_start_index], output_sample_count);
227                         ListedSource<T>::output (c_out);
228                 }
229
230                 // Finally, if in last process call, add silence to end
231                 if (processing_finished && processed_data && add_to_end) {
232                         add_to_end *= c.channels();
233                         output_silence_samples (c, add_to_end);
234                 }
235
236                 if (processing_finished) {
237                         // reset flag removed previous to processing above
238                         c.set_flag (ProcessContext<T>::EndOfInput);
239
240                         // Finally mark write complete by writing nothing with EndOfInput set
241                         // whether or not any data has been written
242                         ConstProcessContext<T> c_out(c, silence_buffer, 0);
243                         c_out().set_flag (ProcessContext<T>::EndOfInput);
244                         ListedSource<T>::output (c_out);
245                 }
246
247         }
248
249         using Sink<T>::process;
250
251 private:
252
253         bool find_first_non_silent_sample (ProcessContext<T> const & c, samplecnt_t & result_sample)
254         {
255                 for (samplecnt_t i = 0; i < c.samples(); ++i) {
256                         if (!tester.is_silent (c.data()[i])) {
257                                 result_sample = i;
258                                 // Round down to nearest interleaved "frame" beginning
259                                 result_sample -= result_sample % c.channels();
260                                 return true;
261                         }
262                 }
263                 return false;
264         }
265
266         /**
267          * Reverse find the last silent sample index. If the last sample in the
268          * buffer is non-silent the index will be one past the end of the buffer and
269          * equal to c.samples(). e.g silent_end_samples = c.samples() - result_sample
270          *
271          * @return true if result_sample index is valid, false if there were only
272          * silent samples in the context buffer
273          */
274         bool find_last_silent_sample_reverse (ProcessContext<T> const & c, samplecnt_t & result_sample)
275         {
276                 samplecnt_t last_sample_index = c.samples() - 1;
277
278                 for (samplecnt_t i = last_sample_index; i >= 0; --i) {
279                         if (!tester.is_silent (c.data()[i])) {
280                                 result_sample = i;
281                                 // Round down to nearest interleaved "frame" beginning
282                                 result_sample -= result_sample % c.channels();
283                                 // Round up to return the "last" silent interleaved sample
284                                 result_sample += c.channels();
285                                 return true;
286                         }
287                 }
288                 return false;
289         }
290
291         void output_silence_samples (ProcessContext<T> const & c, samplecnt_t & total_samples)
292         {
293                 assert (!c.has_flag (ProcessContext<T>::EndOfInput));
294
295                 while (total_samples > 0) {
296                         samplecnt_t samples = std::min (silence_buffer_size, total_samples);
297                         if (max_output_frames) {
298                                 samples = std::min (samples, max_output_frames);
299                         }
300                         samples -= samples % c.channels();
301
302                         total_samples -= samples;
303                         ConstProcessContext<T> c_out (c, silence_buffer, samples);
304                         ListedSource<T>::output (c_out);
305                 }
306         }
307
308         bool       processed_data;
309         bool       processing_finished;
310
311         bool       trim_beginning;
312         bool       trim_end;
313
314         samplecnt_t silence_samples;
315         samplecnt_t max_output_frames;
316
317         samplecnt_t add_to_beginning;
318         samplecnt_t add_to_end;
319
320         samplecnt_t silence_buffer_size;
321         T *        silence_buffer;
322
323         SilenceTester<T> tester;
324 };
325
326 } // namespace
327
328 #endif // AUDIOGRAPHER_SILENCE_TRIMMER_H