update video import flags, relax GOP size.
[ardour.git] / gtk2_ardour / transcode_ffmpeg.cc
1 /*
2     Copyright (C) 2010-2013 Paul Davis
3     Author: Robin Gareus <robin@gareus.org>
4
5     This program is free software; you can redistribute it and/or modify
6     it under the terms of the GNU General Public License as published by
7     the Free Software Foundation; either version 2 of the License, or
8     (at your option) any later version.
9
10     This program is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13     GNU General Public License for more details.
14
15     You should have received a copy of the GNU General Public License
16     along with this program; if not, write to the Free Software
17     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18
19 */
20 #include <stdio.h>
21 #include <string.h>
22 #include <sstream>
23 #include <sys/types.h>
24
25 #include "pbd/error.h"
26 #include "pbd/convert.h"
27 #include "pbd/file_utils.h"
28 #include "gui_thread.h"
29
30 #include "transcode_ffmpeg.h"
31 #include "utils_videotl.h"
32 #include "video_tool_paths.h"
33
34 #include "i18n.h"
35
36 using namespace PBD;
37 using namespace VideoUtils;
38
39 TranscodeFfmpeg::TranscodeFfmpeg (std::string f)
40         : infile(f)
41 {
42         probeok = false;
43         ffexecok = false;
44         m_duration = 0;
45         m_avoffset = m_lead_in = m_lead_out = 0;
46         m_width = m_height = 0;
47         m_aspect = m_fps = 0;
48         m_sar = "";
49 #if 1 /* tentative debug mode */
50         debug_enable = false;
51 #endif
52
53         if (!ArdourVideoToolPaths::transcoder_exe(ffmpeg_exe, ffprobe_exe)) {
54                 warning << string_compose(
55                                 _(
56                                         "No ffprobe or ffmpeg executables could be found on this system.\n"
57                                         "Video import and export is not possible until you install those tools.\n"
58                                         "%1 requires ffmpeg and ffprobe from ffmpeg.org - version 1.1 or newer.\n"
59                                         "\n"
60                                         "The tools are included with the %1 releases from ardour.org "
61                                         "and also available with the video-server at http://x42.github.com/harvid/\n"
62                                         "\n"
63                                         "Important: the files need to be installed in $PATH and named ffmpeg_harvid and ffprobe_harvid.\n"
64                                         "If you already have a suitable ffmpeg installation on your system, we recommend creating "
65                                         "symbolic links from ffmpeg to ffmpeg_harvid and from ffprobe to ffprobe_harvid.\n"
66                                         "\n"
67                                         "see also http://manual.ardour.org/video-timeline/setup/"
68                                  ), PROGRAM_NAME) << endmsg;
69                 return;
70         }
71         ffexecok = true;
72
73         if (infile.empty() || !probe()) {
74                 return;
75         }
76         probeok = true;
77 }
78
79 TranscodeFfmpeg::~TranscodeFfmpeg ()
80 {
81   ;
82 }
83
84 bool
85 TranscodeFfmpeg::probe ()
86 {
87         ffoutput = "";
88         char **argp;
89         argp=(char**) calloc(7,sizeof(char*));
90         argp[0] = strdup(ffprobe_exe.c_str());
91         argp[1] = strdup("-print_format");
92         argp[2] = strdup("csv=nk=0");
93         argp[3] = strdup("-show_format");
94         argp[4] = strdup("-show_streams");
95         argp[5] = strdup(infile.c_str());
96         argp[6] = 0;
97         ffcmd = new ARDOUR::SystemExec(ffprobe_exe, argp);
98         ffcmd->ReadStdout.connect_same_thread (*this, boost::bind (&TranscodeFfmpeg::ffprobeparse, this, _1 ,_2));
99         ffcmd->Terminated.connect_same_thread (*this, boost::bind (&TranscodeFfmpeg::ffexit, this));
100         if (ffcmd->start(1)) {
101                 ffexit();
102                 return false;
103         }
104
105         /* wait for ffprobe process to exit */
106         ffcmd->wait();
107
108         /* wait for interposer thread to copy all data.
109          * SystemExec::Terminated is emitted and ffcmd set to NULL */
110         int timeout = 300; // 1.5 sec
111         while (ffcmd && --timeout > 0) {
112                 Glib::usleep(5000);
113         }
114         if (timeout == 0 || ffoutput.empty()) {
115                 return false;
116         }
117
118         /* parse */
119
120         std::vector<std::vector<std::string> > lines;
121         ParseCSV(ffoutput, lines);
122         double timebase = 0;
123         m_width = m_height = 0;
124         m_fps = m_aspect = 0;
125         m_duration = 0;
126         m_sar.clear();
127         m_codec.clear();
128         m_audio.clear();
129
130 #define PARSE_FRACTIONAL_FPS(VAR) \
131         { \
132                 std::string::size_type pos; \
133                 VAR = atof(value); \
134                 pos = value.find_first_of('/'); \
135                 if (pos != std::string::npos) { \
136                         VAR = atof(value.substr(0, pos)) / atof(value.substr(pos+1)); \
137                 } \
138         }
139
140         for (std::vector<std::vector<std::string> >::iterator i = lines.begin(); i != lines.end(); ++i) {
141                 if (i->at(0) == X_("format")) {
142                         /* format,filename,#streams,format-name,format-long-name,start-time,duration,size,bitrate */
143                 } else
144                 if (i->at(0) == X_("stream")) {
145                         if (i->at(5) == X_("codec_type=video") && m_width == 0) {
146
147                                 for (std::vector<std::string>::iterator kv = i->begin(); kv != i->end(); ++kv) {
148                                         const size_t kvsep = kv->find('=');
149                                         if(kvsep == std::string::npos) continue;
150                                         std::string key = kv->substr(0, kvsep);
151                                         std::string value = kv->substr(kvsep + 1);
152
153                                         if (key == X_("index")) {
154                                                 m_videoidx = atoi(value);
155                                         } else if (key == X_("width")) {
156                                                 m_width = atoi(value);
157                                         } else if (key == X_("height")) {
158                                                 m_height = atoi(value);
159                                         } else if (key == X_("codec_name")) {
160                                                 if (!m_codec.empty()) m_codec += " ";
161                                                 m_codec += value;
162                                         } else if (key == X_("codec_long_name")) {
163                                                 if (!m_codec.empty()) m_codec += " ";
164                                                 m_codec += "[" + value + "]";
165                                         } else if (key == X_("codec_tag_string")) {
166                                                 if (!m_codec.empty()) m_codec += " ";
167                                                 m_codec += "(" + value + ")";
168                                         } else if (key == X_("r_frame_rate")) {
169                                                 PARSE_FRACTIONAL_FPS(m_fps)
170                                         } else if (key == X_("avg_frame_rate") && m_fps == 0) {
171                                                 PARSE_FRACTIONAL_FPS(m_fps)
172                                         } else if (key == X_("time_base")) {
173                                                 PARSE_FRACTIONAL_FPS(timebase)
174                                         } else if (key == X_("timecode") && m_duration == 0) {
175                                                 int h,m,s; char f[7];
176                                                 if (sscanf(i->at(16).c_str(), "%d:%d:%d:%s",&h,&m,&s,f) == 4) {
177                                                         m_duration = (ARDOUR::framecnt_t) floor(m_fps * (
178                                                                         h * 3600.0
179                                                                 + m * 60.0
180                                                                 + s * 1.0
181                                                                 + atoi(f) / pow((double)10, (int)strlen(f))
182                                                         ));
183                                                 }
184                                         } else if (key == X_("duration_ts") && m_fps == 0 && timebase !=0 ) {
185                                                 m_duration = atof(value) * m_fps * timebase;
186                                         } else if (key == X_("duration") && m_fps != 0 && m_duration == 0) {
187                                                 m_duration = atof(value) * m_fps;
188                                         } else if (key == X_("sample_aspect_ratio")) {
189                                                 std::string::size_type pos;
190                                                 pos = value.find_first_of(':');
191                                                 if (pos != std::string::npos && atof(value.substr(pos+1)) != 0) {
192                                                         m_sar = value;
193                                                         m_sar.replace(pos, 1, "/");
194                                                 }
195                                         } else if (key == X_("display_aspect_ratio")) {
196                                                 std::string::size_type pos;
197                                                 pos = value.find_first_of(':');
198                                                 if (pos != std::string::npos && atof(value.substr(pos+1)) != 0) {
199                                                         m_aspect = atof(value.substr(0, pos)) / atof(value.substr(pos+1));
200                                                 }
201                                         }
202                                 }
203
204                                 if (m_aspect == 0) {
205                                         m_aspect = (double)m_width / (double)m_height;
206                                 }
207
208                         } else if (i->at(5) == X_("codec_type=audio")) { /* new ffprobe */
209                                 FFAudioStream as;
210                                 for (std::vector<std::string>::iterator kv = i->begin(); kv != i->end(); ++kv) {
211                                         const size_t kvsep = kv->find('=');
212                                         if(kvsep == std::string::npos) continue;
213                                         std::string key = kv->substr(0, kvsep);
214                                         std::string value = kv->substr(kvsep + 1);
215
216                                         if (key == X_("channels")) {
217                                                 as.channels   = atoi(value);
218                                         } else if (key == X_("index")) {
219                                                 as.stream_id  = value;
220                                         } else if (key == X_("codec_long_name")) {
221                                                 if (!as.name.empty()) as.name += " ";
222                                                 as.name += value;
223                                         } else if (key == X_("codec_name")) {
224                                                 if (!as.name.empty()) as.name += " ";
225                                                 as.name += value;
226                                         } else if (key == X_("sample_fmt")) {
227                                                 if (!as.name.empty()) as.name += " ";
228                                                 as.name += "FMT:" + value;
229                                         } else if (key == X_("sample_rate")) {
230                                                 if (!as.name.empty()) as.name += " ";
231                                                 as.name += "SR:" + value;
232                                         }
233
234                                 }
235                                 m_audio.push_back(as);
236                         }
237                 }
238         }
239         /* end parse */
240
241 #if 0 /* DEBUG */
242         printf("FPS: %f\n", m_fps);
243         printf("Duration: %lu frames\n",(unsigned long)m_duration);
244         printf("W/H: %ix%i\n",m_width, m_height);
245         printf("aspect: %f\n",m_aspect);
246         printf("codec: %s\n",m_codec.c_str());
247         if (m_audio.size() > 0) {
248                 for (AudioStreams::iterator it = m_audio.begin(); it < m_audio.end(); ++it) {
249                         printf("audio: %s - %i channels\n",(*it).stream_id.c_str(), (*it).channels);
250                 }
251         } else {
252           printf("audio: no audio streams in file.\n");
253         }
254 #endif
255
256         return true;
257 }
258
259 TranscodeFfmpeg::FFSettings
260 TranscodeFfmpeg::default_encoder_settings ()
261 {
262         TranscodeFfmpeg::FFSettings ffs;
263         ffs.clear();
264         ffs["-vcodec"] = "mpeg4";
265         ffs["-acodec"] = "ac3";
266         ffs["-b:v"] = "5000k";
267         ffs["-b:a"] = "160k";
268         return ffs;
269 }
270
271 TranscodeFfmpeg::FFSettings
272 TranscodeFfmpeg::default_meta_data ()
273 {
274         TranscodeFfmpeg::FFSettings ffm;
275         ffm.clear();
276         ffm["comment"] = "Created with " PROGRAM_NAME;
277         return ffm;
278 }
279
280 char *
281 TranscodeFfmpeg::format_metadata (std::string key, std::string value)
282 {
283         size_t start_pos = 0;
284         std::string v1 = value;
285         while((start_pos = v1.find_first_not_of(
286                         "abcdefghijklmnopqrstuvwxyz ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789(),.\"'",
287                         start_pos)) != std::string::npos)
288         {
289                 v1.replace(start_pos, 1, "_");
290                 start_pos += 1;
291         }
292
293         start_pos = 0;
294         while((start_pos = v1.find("\"", start_pos)) != std::string::npos) {
295                 v1.replace(start_pos, 1, "\\\"");
296                 start_pos += 2;
297         }
298
299         size_t len = key.length() + v1.length() + 4;
300         char *mds = (char*) calloc(len, sizeof(char));
301         snprintf(mds, len, "%s=\"%s\"", key.c_str(), v1.c_str());
302         return mds;
303 }
304
305 bool
306 TranscodeFfmpeg::encode (std::string outfile, std::string inf_a, std::string inf_v, TranscodeFfmpeg::FFSettings ffs, TranscodeFfmpeg::FFSettings meta, bool map)
307 {
308 #define MAX_FFMPEG_ENCODER_ARGS (100)
309         char **argp;
310         int a=0;
311
312         argp=(char**) calloc(MAX_FFMPEG_ENCODER_ARGS,sizeof(char*));
313         argp[a++] = strdup(ffmpeg_exe.c_str());
314         if (m_avoffset < 0 || m_avoffset > 0) {
315                 std::ostringstream osstream; osstream << m_avoffset;
316                 argp[a++] = strdup("-itsoffset");
317                 argp[a++] = strdup(osstream.str().c_str());
318         }
319         argp[a++] = strdup("-i");
320         argp[a++] = strdup(inf_v.c_str());
321
322         argp[a++] = strdup("-i");
323         argp[a++] = strdup(inf_a.c_str());
324
325         for(TranscodeFfmpeg::FFSettings::const_iterator it = ffs.begin(); it != ffs.end(); ++it) {
326                 argp[a++] = strdup(it->first.c_str());
327                 argp[a++] = strdup(it->second.c_str());
328         }
329         for(TranscodeFfmpeg::FFSettings::const_iterator it = meta.begin(); it != meta.end(); ++it) {
330                 argp[a++] = strdup("-metadata");
331                 argp[a++] = format_metadata(it->first.c_str(), it->second.c_str());
332         }
333
334         if (m_fps > 0) {
335                 m_lead_in  = rint (m_lead_in * m_fps) / m_fps;
336                 m_lead_out = rint (m_lead_out * m_fps) / m_fps;
337         }
338
339         if (m_lead_in != 0 && m_lead_out != 0) {
340                 std::ostringstream osstream;
341                 argp[a++] = strdup("-vf");
342                 osstream << X_("color=c=black:s=") << m_width << X_("x") << m_height << X_(":d=") << m_lead_in;
343                 if (!m_sar.empty()) osstream << X_(":sar=") << m_sar;
344                 osstream << X_(" [pre]; ");
345                 osstream << X_("color=c=black:s=") << m_width << X_("x") << m_height << X_(":d=") << m_lead_out;
346                 if (!m_sar.empty()) osstream << X_(":sar=") << m_sar;
347                 osstream << X_(" [post]; ");
348                 osstream << X_("[pre] [in] [post] concat=n=3");
349                 argp[a++] = strdup(osstream.str().c_str());
350         } else if (m_lead_in != 0) {
351                 std::ostringstream osstream;
352                 argp[a++] = strdup("-vf");
353                 osstream << X_("color=c=black:s=") << m_width << X_("x") << m_height << X_(":d=") << m_lead_in;
354                 if (!m_sar.empty()) osstream << X_(":sar=") << m_sar;
355                 osstream << X_(" [pre]; ");
356                 osstream << X_("[pre] [in] concat=n=2");
357                 argp[a++] = strdup(osstream.str().c_str());
358         } else if (m_lead_out != 0) {
359                 std::ostringstream osstream;
360                 argp[a++] = strdup("-vf");
361                 osstream << X_("color=c=black:s=") << m_width << X_("x") << m_height << X_(":d=") << m_lead_out;
362                 if (!m_sar.empty()) osstream << X_(":sar=") << m_sar;
363                 osstream << X_(" [post]; ");
364                 osstream << X_("[in] [post] concat=n=2");
365                 argp[a++] = strdup(osstream.str().c_str());
366         }
367
368         if (map) {
369                 std::ostringstream osstream;
370                 argp[a++] = strdup("-map");
371                 osstream << X_("0:") << m_videoidx;
372                 argp[a++] = strdup(osstream.str().c_str());
373                 argp[a++] = strdup("-map");
374                 argp[a++] = strdup("1:0");
375         }
376
377         argp[a++] = strdup("-y");
378         argp[a++] = strdup(outfile.c_str());
379         argp[a] = (char *)0;
380         assert(a<MAX_FFMPEG_ENCODER_ARGS);
381         /* Note: these are free()d in ~SystemExec */
382 #if 1 /* DEBUG */
383         if (debug_enable) { /* tentative debug mode */
384         printf("EXPORT ENCODE:\n");
385         for (int i=0; i< a; ++i) {
386           printf("%s ", argp[i]);
387         }
388         printf("\n");
389         }
390 #endif
391
392         ffcmd = new ARDOUR::SystemExec(ffmpeg_exe, argp);
393         ffcmd->ReadStdout.connect_same_thread (*this, boost::bind (&TranscodeFfmpeg::ffmpegparse_v, this, _1 ,_2));
394         ffcmd->Terminated.connect_same_thread (*this, boost::bind (&TranscodeFfmpeg::ffexit, this));
395         if (ffcmd->start(2)) {
396                 ffexit();
397                 return false;
398         }
399         return true;
400 }
401
402 bool
403 TranscodeFfmpeg::extract_audio (std::string outfile, ARDOUR::framecnt_t /*samplerate*/, unsigned int stream)
404 {
405         if (!probeok) return false;
406   if (stream >= m_audio.size()) return false;
407
408         char **argp;
409         int i = 0;
410
411         argp=(char**) calloc(15,sizeof(char*));
412         argp[i++] = strdup(ffmpeg_exe.c_str());
413         argp[i++] = strdup("-i");
414         argp[i++] = strdup(infile.c_str());
415 #if 0 /* ffmpeg write original samplerate, use a3/SRC to resample */
416         argp[i++] = strdup("-ar");
417         argp[i] = (char*) calloc(7,sizeof(char)); snprintf(argp[i++], 7, "%"PRId64, samplerate);
418 #endif
419         argp[i++] = strdup("-ac");
420         argp[i] = (char*) calloc(3,sizeof(char)); snprintf(argp[i++], 3, "%i", m_audio.at(stream).channels);
421         argp[i++] = strdup("-map");
422         argp[i] = (char*) calloc(8,sizeof(char)); snprintf(argp[i++], 8, "0:%s", m_audio.at(stream).stream_id.c_str());
423         argp[i++] = strdup("-vn");
424         argp[i++] = strdup("-acodec");
425         argp[i++] = strdup("pcm_f32le");
426         argp[i++] = strdup("-y");
427         argp[i++] = strdup(outfile.c_str());
428         argp[i++] = (char *)0;
429         /* Note: argp is free()d in ~SystemExec */
430 #if 1 /* DEBUG */
431         if (debug_enable) { /* tentative debug mode */
432         printf("EXTRACT AUDIO:\n");
433         for (int i=0; i< 14; ++i) {
434           printf("%s ", argp[i]);
435         }
436         printf("\n");
437         }
438 #endif
439
440         ffcmd = new ARDOUR::SystemExec(ffmpeg_exe, argp);
441         ffcmd->ReadStdout.connect_same_thread (*this, boost::bind (&TranscodeFfmpeg::ffmpegparse_a, this, _1 ,_2));
442         ffcmd->Terminated.connect_same_thread (*this, boost::bind (&TranscodeFfmpeg::ffexit, this));
443         if (ffcmd->start(2)) {
444                 ffexit();
445                 return false;
446         }
447         return true;
448 }
449
450
451 bool
452 TranscodeFfmpeg::transcode (std::string outfile, const int outw, const int outh, const int kbitps)
453 {
454         if (!probeok) return false;
455
456         char **argp;
457         int bitrate = kbitps;
458         int width = outw;
459         int height = outh;
460
461         if (width < 1 || width > m_width) { width = m_width; } /* don't allow upscaling */
462         if (height < 1 || height > m_height) { height = floor(width / m_aspect); }
463
464         if (bitrate == 0) {
465                 const double bitperpixel = .7; /* avg quality */
466                 bitrate = floor(m_fps * width * height * bitperpixel / 10000.0);
467         } else {
468                 bitrate = bitrate / 10;
469         }
470         if (bitrate < 10)  bitrate = 10;
471         if (bitrate > 1000) bitrate = 1000;
472
473         argp=(char**) calloc(15,sizeof(char*));
474         argp[0] = strdup(ffmpeg_exe.c_str());
475         argp[1] = strdup("-i");
476         argp[2] = strdup(infile.c_str());
477         argp[3] = strdup("-b:v");
478         argp[4] = (char*) calloc(7,sizeof(char)); snprintf(argp[4], 7, "%i0k", bitrate);
479         argp[5] = strdup("-s");
480         argp[6] = (char*) calloc(10,sizeof(char)); snprintf(argp[6], 10, "%ix%i", width, height);
481         argp[7] = strdup("-y");
482         argp[8] = strdup("-vcodec");
483         argp[9] = strdup("mpeg4");
484         argp[10] = strdup("-an");
485         argp[11] = strdup("-keyint_min");
486         argp[12] = strdup("10");
487         argp[13] = strdup(outfile.c_str());
488         argp[14] = (char *)0;
489         /* Note: these are free()d in ~SystemExec */
490 #if 1 /* DEBUG */
491         if (debug_enable) { /* tentative debug mode */
492         printf("TRANSCODE VIDEO:\n");
493         for (int i=0; i< 14; ++i) {
494           printf("%s ", argp[i]);
495         }
496         printf("\n");
497         }
498 #endif
499         ffcmd = new ARDOUR::SystemExec(ffmpeg_exe, argp);
500         ffcmd->ReadStdout.connect_same_thread (*this, boost::bind (&TranscodeFfmpeg::ffmpegparse_v, this, _1 ,_2));
501         ffcmd->Terminated.connect_same_thread (*this, boost::bind (&TranscodeFfmpeg::ffexit, this));
502         if (ffcmd->start(2)) {
503                 ffexit();
504                 return false;
505         }
506         return true;
507 }
508
509 void
510 TranscodeFfmpeg::cancel ()
511 {
512         if (!ffcmd || !ffcmd->is_running()) { return;}
513         ffcmd->write_to_stdin("q");
514 #ifdef PLATFORM_WINDOWS
515         Sleep(1000);
516 #else
517         sleep (1);
518 #endif
519         if (ffcmd) {
520           ffcmd->terminate();
521         }
522 }
523
524 void
525 TranscodeFfmpeg::ffexit ()
526 {
527         delete ffcmd;
528         ffcmd=0;
529         Finished(); /* EMIT SIGNAL */
530 }
531
532 void
533 TranscodeFfmpeg::ffprobeparse (std::string d, size_t /* s */)
534 {
535         ffoutput+=d;
536 }
537
538 void
539 TranscodeFfmpeg::ffmpegparse_a (std::string d, size_t /* s */)
540 {
541         const char *t;
542         int h,m,s; char f[7];
543         ARDOUR::framecnt_t p = -1;
544
545         if (!(t=strstr(d.c_str(), "time="))) { return; }
546
547         if (sscanf(t+5, "%d:%d:%d.%s",&h,&m,&s,f) == 4) {
548                 p = (ARDOUR::framecnt_t) floor( 100.0 * (
549                       h * 3600.0
550                     + m * 60.0
551                     + s * 1.0
552                     + atoi(f) / pow((double)10, (int)strlen(f))
553                 ));
554                 p = p * m_fps / 100.0;
555                 if (p > m_duration ) { p = m_duration; }
556                 Progress(p, m_duration); /* EMIT SIGNAL */
557         } else {
558                 Progress(0, 0); /* EMIT SIGNAL */
559         }
560 }
561
562 void
563 TranscodeFfmpeg::ffmpegparse_v (std::string d, size_t /* s */)
564 {
565         if (strstr(d.c_str(), "ERROR") || strstr(d.c_str(), "Error") || strstr(d.c_str(), "error")) {
566                 warning << "ffmpeg-error: " << d << endmsg;
567         }
568         if (strncmp(d.c_str(), "frame=",6)) {
569 #if 1 /* DEBUG */
570                 if (debug_enable) {
571                         d.erase(d.find_last_not_of(" \t\r\n") + 1);
572                   printf("ffmpeg: '%s'\n", d.c_str());
573                 }
574 #endif
575                 Progress(0, 0); /* EMIT SIGNAL */
576                 return;
577         }
578         ARDOUR::framecnt_t f = atol(d.substr(6));
579         if (f == 0) {
580                 Progress(0, 0); /* EMIT SIGNAL */
581         } else {
582                 Progress(f, m_duration); /* EMIT SIGNAL */
583         }
584 }