1 /* -*- c-basic-offset: 4 indent-tabs-mode: nil -*- vi:set ts=8 sts=4 sw=4: */
5 An audio time-stretching and pitch-shifting library.
6 Copyright 2007-2008 Chris Cannam.
8 This program is free software; you can redistribute it and/or
9 modify it under the terms of the GNU General Public License as
10 published by the Free Software Foundation; either version 2 of the
11 License, or (at your option) any later version. See the file
12 COPYING included with this distribution for more information.
15 #include "RubberBandStretcher.h"
27 #include "bsd-3rdparty/getopt/getopt.h"
37 using namespace RubberBand;
40 using RubberBand::gettimeofday;
41 using RubberBand::usleep;
44 double tempo_convert(const char *str)
46 const char *d = strchr(str, ':');
50 if (m != 0.0) return 1.0 / m;
54 char *a = strdup(str);
55 char *b = strdup(d+1);
61 if (n != 0.0 && m != 0.0) return m / n;
65 int main(int argc, char **argv)
70 double duration = 0.0;
71 double pitchshift = 0.0;
72 double frequencyshift = 1.0;
74 bool realtime = false;
77 bool lamination = true;
79 bool shortwin = false;
82 bool crispchanged = false;
88 bool haveRatio = false;
92 BandLimitedTransients,
94 } transients = Transients;
99 static struct option longOpts[] = {
100 { "help", 0, 0, 'h' },
101 { "version", 0, 0, 'V' },
102 { "time", 1, 0, 't' },
103 { "tempo", 1, 0, 'T' },
104 { "duration", 1, 0, 'D' },
105 { "pitch", 1, 0, 'p' },
106 { "frequency", 1, 0, 'f' },
107 { "crisp", 1, 0, 'c' },
108 { "crispness", 1, 0, 'c' },
109 { "debug", 1, 0, 'd' },
110 { "realtime", 0, 0, 'R' },
111 { "precise", 0, 0, 'P' },
112 { "formant", 0, 0, 'F' },
113 { "no-threads", 0, 0, '0' },
114 { "no-transients", 0, 0, '1' },
115 { "no-lamination", 0, 0, '2' },
116 { "window-long", 0, 0, '3' },
117 { "window-short", 0, 0, '4' },
118 { "bl-transients", 0, 0, '8' },
119 { "pitch-hq", 0, 0, '%' },
120 { "threads", 0, 0, '@' },
121 { "quiet", 0, 0, 'q' },
125 c = getopt_long(argc, argv, "t:p:d:RPFc:f:T:D:qhV", longOpts, &optionIndex);
129 case 'h': help = true; break;
130 case 'V': version = true; break;
131 case 't': ratio *= atof(optarg); haveRatio = true; break;
132 case 'T': ratio *= tempo_convert(optarg); haveRatio = true; break;
133 case 'D': duration = atof(optarg); haveRatio = true; break;
134 case 'p': pitchshift = atof(optarg); haveRatio = true; break;
135 case 'f': frequencyshift = atof(optarg); haveRatio = true; break;
136 case 'd': debug = atoi(optarg); break;
137 case 'R': realtime = true; break;
138 case 'P': precise = true; break;
139 case 'F': formant = true; break;
140 case '0': threading = 1; break;
141 case '@': threading = 2; break;
142 case '1': transients = NoTransients; crispchanged = true; break;
143 case '2': lamination = false; crispchanged = true; break;
144 case '3': longwin = true; crispchanged = true; break;
145 case '4': shortwin = true; crispchanged = true; break;
146 case '8': transients = BandLimitedTransients; crispchanged = true; break;
147 case '%': hqpitch = true; break;
148 case 'c': crispness = atoi(optarg); break;
149 case 'q': quiet = true; break;
150 default: help = true; break;
155 cerr << RUBBERBAND_VERSION << endl;
159 if (help || !haveRatio || optind + 2 != argc) {
161 cerr << "Rubber Band" << endl;
162 cerr << "An audio time-stretching and pitch-shifting library and utility program." << endl;
163 cerr << "Copyright 2008 Chris Cannam. Distributed under the GNU General Public License." << endl;
165 cerr << " Usage: " << argv[0] << " [options] <infile.wav> <outfile.wav>" << endl;
167 cerr << "You must specify at least one of the following time and pitch ratio options." << endl;
169 cerr << " -t<X>, --time <X> Stretch to X times original duration, or" << endl;
170 cerr << " -T<X>, --tempo <X> Change tempo by multiple X (same as --time 1/X), or" << endl;
171 cerr << " -T<X>, --tempo <X>:<Y> Change tempo from X to Y (same as --time X/Y), or" << endl;
172 cerr << " -D<X>, --duration <X> Stretch or squash to make output file X seconds long" << endl;
174 cerr << " -p<X>, --pitch <X> Raise pitch by X semitones, or" << endl;
175 cerr << " -f<X>, --frequency <X> Change frequency by multiple X" << endl;
177 cerr << "The following options provide a simple way to adjust the sound. See below" << endl;
178 cerr << "for more details." << endl;
180 cerr << " -c<N>, --crisp <N> Crispness (N = 0,1,2,3,4,5); default 4 (see below)" << endl;
181 cerr << " -F, --formant Enable formant preservation when pitch shifting" << endl;
183 cerr << "The remaining options fine-tune the processing mode and stretch algorithm." << endl;
184 cerr << "These are mostly included for test purposes; the default settings and standard" << endl;
185 cerr << "crispness parameter are intended to provide the best sounding set of options" << endl;
186 cerr << "for most situations. The default is to use none of these options." << endl;
188 cerr << " -P, --precise Aim for minimal time distortion (implied by -R)" << endl;
189 cerr << " -R, --realtime Select realtime mode (implies -P --no-threads)" << endl;
190 cerr << " --no-threads No extra threads regardless of CPU and channel count" << endl;
191 cerr << " --threads Assume multi-CPU even if only one CPU is identified" << endl;
192 cerr << " --no-transients Disable phase resynchronisation at transients" << endl;
193 cerr << " --bl-transients Band-limit phase resync to extreme frequencies" << endl;
194 cerr << " --no-lamination Disable phase lamination" << endl;
195 cerr << " --window-long Use longer processing window (actual size may vary)" << endl;
196 cerr << " --window-short Use shorter processing window" << endl;
197 cerr << " --pitch-hq In RT mode, use a slower, higher quality pitch shift" << endl;
199 cerr << " -d<N>, --debug <N> Select debug level (N = 0,1,2,3); default 0, full 3" << endl;
200 cerr << " (N.B. debug level 3 includes audible ticks in output)" << endl;
201 cerr << " -q, --quiet Suppress progress output" << endl;
203 cerr << " -V, --version Show version number and exit" << endl;
204 cerr << " -h, --help Show this help" << endl;
206 cerr << "\"Crispness\" levels:" << endl;
207 cerr << " -c 0 equivalent to --no-transients --no-lamination --window-long" << endl;
208 cerr << " -c 1 equivalent to --no-transients --no-lamination" << endl;
209 cerr << " -c 2 equivalent to --no-transients" << endl;
210 cerr << " -c 3 equivalent to --bl-transients" << endl;
211 cerr << " -c 4 default processing options" << endl;
212 cerr << " -c 5 equivalent to --no-lamination --window-short (may be good for drums)" << endl;
217 if (crispness >= 0 && crispchanged) {
218 cerr << "WARNING: Both crispness option and transients, lamination or window options" << endl;
219 cerr << " provided -- crispness will override these other options" << endl;
223 case -1: crispness = 4; break;
224 case 0: transients = NoTransients; lamination = false; longwin = true; shortwin = false; break;
225 case 1: transients = NoTransients; lamination = false; longwin = false; shortwin = false; break;
226 case 2: transients = NoTransients; lamination = true; longwin = false; shortwin = false; break;
227 case 3: transients = BandLimitedTransients; lamination = true; longwin = false; shortwin = false; break;
228 case 4: transients = Transients; lamination = true; longwin = false; shortwin = false; break;
229 case 5: transients = Transients; lamination = false; longwin = false; shortwin = true; break;
233 cerr << "Using crispness level: " << crispness << " (";
235 case 0: cerr << "Mushy"; break;
236 case 1: cerr << "Smooth"; break;
237 case 2: cerr << "Balanced multitimbral mixture"; break;
238 case 3: cerr << "Unpitched percussion with stable notes"; break;
239 case 4: cerr << "Crisp monophonic instrumental"; break;
240 case 5: cerr << "Unpitched solo percussion"; break;
245 char *fileName = strdup(argv[optind++]);
246 char *fileNameOut = strdup(argv[optind++]);
252 memset(&sfinfo, 0, sizeof(SF_INFO));
254 sndfile = sf_open(fileName, SFM_READ, &sfinfo);
256 cerr << "ERROR: Failed to open input file \"" << fileName << "\": "
257 << sf_strerror(sndfile) << endl;
261 if (duration != 0.0) {
262 if (sfinfo.frames == 0 || sfinfo.samplerate == 0) {
263 cerr << "ERROR: File lacks frame count or sample rate in header, cannot use --duration" << endl;
266 double induration = double(sfinfo.frames) / double(sfinfo.samplerate);
267 if (induration != 0.0) ratio = duration / induration;
270 sfinfoOut.channels = sfinfo.channels;
271 sfinfoOut.format = sfinfo.format;
272 sfinfoOut.frames = int(sfinfo.frames * ratio + 0.1);
273 sfinfoOut.samplerate = sfinfo.samplerate;
274 sfinfoOut.sections = sfinfo.sections;
275 sfinfoOut.seekable = sfinfo.seekable;
277 sndfileOut = sf_open(fileNameOut, SFM_WRITE, &sfinfoOut) ;
279 cerr << "ERROR: Failed to open output file \"" << fileNameOut << "\" for writing: "
280 << sf_strerror(sndfileOut) << endl;
285 size_t channels = sfinfo.channels;
287 RubberBandStretcher::Options options = 0;
288 if (realtime) options |= RubberBandStretcher::OptionProcessRealTime;
289 if (precise) options |= RubberBandStretcher::OptionStretchPrecise;
290 if (!lamination) options |= RubberBandStretcher::OptionPhaseIndependent;
291 if (longwin) options |= RubberBandStretcher::OptionWindowLong;
292 if (shortwin) options |= RubberBandStretcher::OptionWindowShort;
293 if (formant) options |= RubberBandStretcher::OptionFormantPreserved;
294 if (hqpitch) options |= RubberBandStretcher::OptionPitchHighQuality;
298 options |= RubberBandStretcher::OptionThreadingAuto;
301 options |= RubberBandStretcher::OptionThreadingNever;
304 options |= RubberBandStretcher::OptionThreadingAlways;
308 switch (transients) {
310 options |= RubberBandStretcher::OptionTransientsSmooth;
312 case BandLimitedTransients:
313 options |= RubberBandStretcher::OptionTransientsMixed;
316 options |= RubberBandStretcher::OptionTransientsCrisp;
320 if (pitchshift != 0.0) {
321 frequencyshift *= pow(2.0, pitchshift / 12);
324 cerr << "Using time ratio " << ratio;
325 cerr << " and frequency ratio " << frequencyshift << endl;
331 (void)gettimeofday(&tv, 0);
333 RubberBandStretcher::setDefaultDebugLevel(debug);
335 RubberBandStretcher ts(sfinfo.samplerate, channels, options,
336 ratio, frequencyshift);
338 ts.setExpectedInputDuration(sfinfo.frames);
340 float *fbuf = new float[channels * ibs];
341 float **ibuf = new float *[channels];
342 for (size_t i = 0; i < channels; ++i) ibuf[i] = new float[ibs];
347 sf_seek(sndfile, 0, SEEK_SET);
352 cerr << "Pass 1: Studying..." << endl;
355 while (frame < sfinfo.frames) {
359 if ((count = sf_readf_float(sndfile, fbuf, ibs)) <= 0) break;
361 for (size_t c = 0; c < channels; ++c) {
362 for (int i = 0; i < count; ++i) {
363 float value = fbuf[i * channels + c];
368 bool final = (frame + ibs >= sfinfo.frames);
370 ts.study(ibuf, count, final);
372 int p = int((double(frame) * 100.0) / sfinfo.frames);
373 if (p > percent || frame == 0) {
376 cerr << "\r" << percent << "% ";
384 cerr << "\rCalculating profile..." << endl;
387 sf_seek(sndfile, 0, SEEK_SET);
393 size_t countIn = 0, countOut = 0;
395 while (frame < sfinfo.frames) {
399 if ((count = sf_readf_float(sndfile, fbuf, ibs)) < 0) break;
403 for (size_t c = 0; c < channels; ++c) {
404 for (int i = 0; i < count; ++i) {
405 float value = fbuf[i * channels + c];
410 bool final = (frame + ibs >= sfinfo.frames);
412 ts.process(ibuf, count, final);
414 int avail = ts.available();
415 if (debug > 1) cerr << "available = " << avail << endl;
418 float **obf = new float *[channels];
419 for (size_t i = 0; i < channels; ++i) {
420 obf[i] = new float[avail];
422 ts.retrieve(obf, avail);
424 float *fobf = new float[channels * avail];
425 for (size_t c = 0; c < channels; ++c) {
426 for (int i = 0; i < avail; ++i) {
427 float value = obf[c][i];
428 if (value > 1.f) value = 1.f;
429 if (value < -1.f) value = -1.f;
430 fobf[i * channels + c] = value;
433 // cout << "fobf mean: ";
435 // for (int i = 0; i < avail; ++i) {
439 // cout << d << endl;
440 sf_writef_float(sndfileOut, fobf, avail);
442 for (size_t i = 0; i < channels; ++i) {
448 if (frame == 0 && !realtime && !quiet) {
449 cerr << "Pass 2: Processing..." << endl;
452 int p = int((double(frame) * 100.0) / sfinfo.frames);
453 if (p > percent || frame == 0) {
456 cerr << "\r" << percent << "% ";
464 cerr << "\r " << endl;
468 while ((avail = ts.available()) >= 0) {
471 cerr << "(completing) available = " << avail << endl;
475 float **obf = new float *[channels];
476 for (size_t i = 0; i < channels; ++i) {
477 obf[i] = new float[avail];
479 ts.retrieve(obf, avail);
481 float *fobf = new float[channels * avail];
482 for (size_t c = 0; c < channels; ++c) {
483 for (int i = 0; i < avail; ++i) {
484 float value = obf[c][i];
485 if (value > 1.f) value = 1.f;
486 if (value < -1.f) value = -1.f;
487 fobf[i * channels + c] = value;
491 sf_writef_float(sndfileOut, fobf, avail);
493 for (size_t i = 0; i < channels; ++i) {
499 RubberBand::usleep(10000);
507 sf_close(sndfileOut);
511 cerr << "in: " << countIn << ", out: " << countOut << ", ratio: " << float(countOut)/float(countIn) << ", ideal output: " << lrint(countIn * ratio) << ", error: " << abs(lrint(countIn * ratio) - int(countOut)) << endl;
517 (void)gettimeofday(&etv, 0);
519 etv.tv_sec -= tv.tv_sec;
520 if (etv.tv_usec < tv.tv_usec) {
521 etv.tv_usec += 1000000;
524 etv.tv_usec -= tv.tv_usec;
526 double sec = double(etv.tv_sec) + (double(etv.tv_usec) / 1000000.0);
527 cerr << "elapsed time: " << sec << " sec, in frames/sec: " << countIn/sec << ", out frames/sec: " << countOut/sec << endl;