Collect subtitles at the same time before emitting them from
[dcpomatic.git] / src / lib / dcp_decoder.cc
1 /*
2     Copyright (C) 2014-2018 Carl Hetherington <cth@carlh.net>
3
4     This file is part of DCP-o-matic.
5
6     DCP-o-matic is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     DCP-o-matic is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.
18
19 */
20
21 #include "dcp_decoder.h"
22 #include "dcp_content.h"
23 #include "audio_content.h"
24 #include "video_decoder.h"
25 #include "audio_decoder.h"
26 #include "j2k_image_proxy.h"
27 #include "text_decoder.h"
28 #include "ffmpeg_image_proxy.h"
29 #include "image.h"
30 #include "config.h"
31 #include <dcp/dcp.h>
32 #include <dcp/cpl.h>
33 #include <dcp/reel.h>
34 #include <dcp/mono_picture_asset.h>
35 #include <dcp/mono_picture_asset_reader.h>
36 #include <dcp/stereo_picture_asset.h>
37 #include <dcp/stereo_picture_asset_reader.h>
38 #include <dcp/reel_picture_asset.h>
39 #include <dcp/reel_sound_asset.h>
40 #include <dcp/reel_subtitle_asset.h>
41 #include <dcp/reel_closed_caption_asset.h>
42 #include <dcp/mono_picture_frame.h>
43 #include <dcp/stereo_picture_frame.h>
44 #include <dcp/sound_frame.h>
45 #include <dcp/sound_asset_reader.h>
46 #include <dcp/subtitle_image.h>
47 #include <boost/foreach.hpp>
48 #include <iostream>
49
50 #include "i18n.h"
51
52 using std::list;
53 using std::cout;
54 using boost::shared_ptr;
55 using boost::dynamic_pointer_cast;
56 using boost::optional;
57
58 DCPDecoder::DCPDecoder (shared_ptr<const DCPContent> c, shared_ptr<Log> log, bool fast)
59         : DCP (c)
60         , _decode_referenced (false)
61 {
62         if (c->video) {
63                 video.reset (new VideoDecoder (this, c, log));
64         }
65         if (c->audio) {
66                 audio.reset (new AudioDecoder (this, c->audio, log, fast));
67         }
68         BOOST_FOREACH (shared_ptr<TextContent> i, c->text) {
69                 /* XXX: this time here should be the time of the first subtitle, not 0 */
70                 text.push_back (shared_ptr<TextDecoder> (new TextDecoder (this, i, log, ContentTime())));
71         }
72
73         list<shared_ptr<dcp::CPL> > cpl_list = cpls ();
74
75         if (cpl_list.empty()) {
76                 throw DCPError (_("No CPLs found in DCP."));
77         }
78
79         shared_ptr<dcp::CPL> cpl;
80         BOOST_FOREACH (shared_ptr<dcp::CPL> i, cpl_list) {
81                 if (_dcp_content->cpl() && i->id() == _dcp_content->cpl().get()) {
82                         cpl = i;
83                 }
84         }
85
86         if (!cpl) {
87                 /* No CPL found; probably an old file that doesn't specify it;
88                    just use the first one.
89                 */
90                 cpl = cpls().front ();
91         }
92
93         set_decode_referenced (false);
94
95         _reels = cpl->reels ();
96
97         _reel = _reels.begin ();
98         _offset = 0;
99         get_readers ();
100 }
101
102
103 bool
104 DCPDecoder::pass ()
105 {
106         if (_reel == _reels.end () || !_dcp_content->can_be_played ()) {
107                 return true;
108         }
109
110         double const vfr = _dcp_content->active_video_frame_rate ();
111
112         /* Frame within the (played part of the) reel that is coming up next */
113         int64_t const frame = _next.frames_round (vfr);
114
115         shared_ptr<dcp::PictureAsset> picture_asset = (*_reel)->main_picture()->asset();
116         DCPOMATIC_ASSERT (picture_asset);
117
118         /* We must emit texts first as when we emit the video for this frame
119            it will expect already to have the texts.
120         */
121         pass_texts (_next, picture_asset->size());
122
123         if ((_mono_reader || _stereo_reader) && (_decode_referenced || !_dcp_content->reference_video())) {
124                 int64_t const entry_point = (*_reel)->main_picture()->entry_point ();
125                 if (_mono_reader) {
126                         video->emit (
127                                 shared_ptr<ImageProxy> (
128                                         new J2KImageProxy (
129                                                 _mono_reader->get_frame (entry_point + frame),
130                                                 picture_asset->size(),
131                                                 AV_PIX_FMT_XYZ12LE,
132                                                 _forced_reduction
133                                                 )
134                                         ),
135                                 _offset + frame
136                                 );
137                 } else {
138                         video->emit (
139                                 shared_ptr<ImageProxy> (
140                                         new J2KImageProxy (
141                                                 _stereo_reader->get_frame (entry_point + frame),
142                                                 picture_asset->size(),
143                                                 dcp::EYE_LEFT,
144                                                 AV_PIX_FMT_XYZ12LE,
145                                                 _forced_reduction
146                                                 )
147                                         ),
148                                 _offset + frame
149                                 );
150
151                         video->emit (
152                                 shared_ptr<ImageProxy> (
153                                         new J2KImageProxy (
154                                                 _stereo_reader->get_frame (entry_point + frame),
155                                                 picture_asset->size(),
156                                                 dcp::EYE_RIGHT,
157                                                 AV_PIX_FMT_XYZ12LE,
158                                                 _forced_reduction
159                                                 )
160                                         ),
161                                 _offset + frame
162                                 );
163                 }
164         }
165
166         if (_sound_reader && (_decode_referenced || !_dcp_content->reference_audio())) {
167                 int64_t const entry_point = (*_reel)->main_sound()->entry_point ();
168                 shared_ptr<const dcp::SoundFrame> sf = _sound_reader->get_frame (entry_point + frame);
169                 uint8_t const * from = sf->data ();
170
171                 int const channels = _dcp_content->audio->stream()->channels ();
172                 int const frames = sf->size() / (3 * channels);
173                 shared_ptr<AudioBuffers> data (new AudioBuffers (channels, frames));
174                 float** data_data = data->data();
175                 for (int i = 0; i < frames; ++i) {
176                         for (int j = 0; j < channels; ++j) {
177                                 data_data[j][i] = static_cast<int> ((from[0] << 8) | (from[1] << 16) | (from[2] << 24)) / static_cast<float> (INT_MAX - 256);
178                                 from += 3;
179                         }
180                 }
181
182                 audio->emit (_dcp_content->audio->stream(), data, ContentTime::from_frames (_offset, vfr) + _next);
183         }
184
185         _next += ContentTime::from_frames (1, vfr);
186
187         if ((*_reel)->main_picture ()) {
188                 if (_next.frames_round (vfr) >= (*_reel)->main_picture()->duration()) {
189                         next_reel ();
190                         _next = ContentTime ();
191                 }
192         }
193
194         return false;
195 }
196
197 void
198 DCPDecoder::pass_texts (ContentTime next, dcp::Size size)
199 {
200         list<shared_ptr<TextDecoder> >::const_iterator decoder = text.begin ();
201         if ((*_reel)->main_subtitle()) {
202                 DCPOMATIC_ASSERT (decoder != text.end ());
203                 pass_texts (
204                         next, (*_reel)->main_subtitle()->asset(), _dcp_content->reference_text(TEXT_OPEN_SUBTITLE), (*_reel)->main_subtitle()->entry_point(), *decoder, size
205                         );
206                 ++decoder;
207         }
208         BOOST_FOREACH (shared_ptr<dcp::ReelClosedCaptionAsset> i, (*_reel)->closed_captions()) {
209                 DCPOMATIC_ASSERT (decoder != text.end ());
210                 pass_texts (
211                         next, i->asset(), _dcp_content->reference_text(TEXT_CLOSED_CAPTION), i->entry_point(), *decoder, size
212                         );
213                 ++decoder;
214         }
215 }
216
217 void
218 DCPDecoder::pass_texts (ContentTime next, shared_ptr<dcp::SubtitleAsset> asset, bool reference, int64_t entry_point, shared_ptr<TextDecoder> decoder, dcp::Size size)
219 {
220         double const vfr = _dcp_content->active_video_frame_rate ();
221         /* Frame within the (played part of the) reel that is coming up next */
222         int64_t const frame = next.frames_round (vfr);
223
224         if (_decode_referenced || !reference) {
225                 list<shared_ptr<dcp::Subtitle> > subs = asset->subtitles_during (
226                         dcp::Time (entry_point + frame, vfr, vfr),
227                         dcp::Time (entry_point + frame + 1, vfr, vfr),
228                         true
229                         );
230
231                 list<dcp::SubtitleString> strings;
232
233                 BOOST_FOREACH (shared_ptr<dcp::Subtitle> i, subs) {
234                         shared_ptr<dcp::SubtitleString> is = dynamic_pointer_cast<dcp::SubtitleString> (i);
235                         if (is) {
236                                 if (!strings.empty() && (strings.back().in() != is->in() || strings.back().out() != is->out())) {
237                                         dcp::SubtitleString b = strings.back();
238                                         decoder->emit_plain (
239                                                 ContentTimePeriod (
240                                                         ContentTime::from_frames(_offset - entry_point, vfr) + ContentTime::from_seconds(b.in().as_seconds()),
241                                                         ContentTime::from_frames(_offset - entry_point, vfr) + ContentTime::from_seconds(b.out().as_seconds())
242                                                         ),
243                                                 strings
244                                                 );
245                                         strings.clear ();
246                                 }
247
248                                 strings.push_back (*is);
249                         }
250
251                         shared_ptr<dcp::SubtitleImage> ii = dynamic_pointer_cast<dcp::SubtitleImage> (i);
252                         if (ii) {
253                                 FFmpegImageProxy proxy (ii->png_image());
254                                 shared_ptr<Image> image = proxy.image().first;
255                                 /* set up rect with height and width */
256                                 dcpomatic::Rect<double> rect(0, 0, image->size().width / double(size.width), image->size().height / double(size.height));
257
258                                 /* add in position */
259
260                                 switch (ii->h_align()) {
261                                 case dcp::HALIGN_LEFT:
262                                         rect.x += ii->h_position();
263                                         break;
264                                 case dcp::HALIGN_CENTER:
265                                         rect.x += 0.5 + ii->h_position() - rect.width / 2;
266                                         break;
267                                 case dcp::HALIGN_RIGHT:
268                                         rect.x += 1 - ii->h_position() - rect.width;
269                                         break;
270                                 }
271
272                                 switch (ii->v_align()) {
273                                 case dcp::VALIGN_TOP:
274                                         rect.y += ii->v_position();
275                                         break;
276                                 case dcp::VALIGN_CENTER:
277                                         rect.y += 0.5 + ii->v_position() - rect.height / 2;
278                                         break;
279                                 case dcp::VALIGN_BOTTOM:
280                                         rect.y += 1 - ii->v_position() - rect.height;
281                                         break;
282                                 }
283
284                                 decoder->emit_bitmap (
285                                         ContentTimePeriod (
286                                                 ContentTime::from_frames (_offset - entry_point, vfr) + ContentTime::from_seconds (i->in().as_seconds ()),
287                                                 ContentTime::from_frames (_offset - entry_point, vfr) + ContentTime::from_seconds (i->out().as_seconds ())
288                                                 ),
289                                         image, rect
290                                         );
291                         }
292                 }
293
294                 if (!strings.empty()) {
295                         dcp::SubtitleString b = strings.back();
296                         decoder->emit_plain (
297                                 ContentTimePeriod (
298                                         ContentTime::from_frames(_offset - entry_point, vfr) + ContentTime::from_seconds(b.in().as_seconds()),
299                                         ContentTime::from_frames(_offset - entry_point, vfr) + ContentTime::from_seconds(b.out().as_seconds())
300                                         ),
301                                 strings
302                                 );
303                         strings.clear ();
304                 }
305         }
306 }
307
308 void
309 DCPDecoder::next_reel ()
310 {
311         _offset += (*_reel)->main_picture()->duration();
312         ++_reel;
313         get_readers ();
314 }
315
316 void
317 DCPDecoder::get_readers ()
318 {
319         if (_reel == _reels.end() || !_dcp_content->can_be_played ()) {
320                 _mono_reader.reset ();
321                 _stereo_reader.reset ();
322                 _sound_reader.reset ();
323                 return;
324         }
325
326         if ((*_reel)->main_picture()) {
327                 shared_ptr<dcp::PictureAsset> asset = (*_reel)->main_picture()->asset ();
328                 shared_ptr<dcp::MonoPictureAsset> mono = dynamic_pointer_cast<dcp::MonoPictureAsset> (asset);
329                 shared_ptr<dcp::StereoPictureAsset> stereo = dynamic_pointer_cast<dcp::StereoPictureAsset> (asset);
330                 DCPOMATIC_ASSERT (mono || stereo);
331                 if (mono) {
332                         _mono_reader = mono->start_read ();
333                         _stereo_reader.reset ();
334                 } else {
335                         _stereo_reader = stereo->start_read ();
336                         _mono_reader.reset ();
337                 }
338         } else {
339                 _mono_reader.reset ();
340                 _stereo_reader.reset ();
341         }
342
343         if ((*_reel)->main_sound()) {
344                 _sound_reader = (*_reel)->main_sound()->asset()->start_read ();
345         } else {
346                 _sound_reader.reset ();
347         }
348 }
349
350 void
351 DCPDecoder::seek (ContentTime t, bool accurate)
352 {
353         if (!_dcp_content->can_be_played ()) {
354                 return;
355         }
356
357         Decoder::seek (t, accurate);
358
359         _reel = _reels.begin ();
360         _offset = 0;
361         get_readers ();
362
363         int const pre_roll_seconds = 2;
364
365         /* Pre-roll for subs */
366
367         ContentTime pre = t - ContentTime::from_seconds (pre_roll_seconds);
368         if (pre < ContentTime()) {
369                 pre = ContentTime ();
370         }
371
372         /* Seek to pre-roll position */
373
374         while (_reel != _reels.end() && pre >= ContentTime::from_frames ((*_reel)->main_picture()->duration(), _dcp_content->active_video_frame_rate ())) {
375                 ContentTime rd = ContentTime::from_frames ((*_reel)->main_picture()->duration(), _dcp_content->active_video_frame_rate ());
376                 pre -= rd;
377                 t -= rd;
378                 next_reel ();
379         }
380
381         /* Pass texts in the pre-roll */
382
383         double const vfr = _dcp_content->active_video_frame_rate ();
384         for (int i = 0; i < pre_roll_seconds * vfr; ++i) {
385                 pass_texts (pre, (*_reel)->main_picture()->asset()->size());
386                 pre += ContentTime::from_frames (1, vfr);
387         }
388
389         /* Seek to correct position */
390
391         while (_reel != _reels.end() && t >= ContentTime::from_frames ((*_reel)->main_picture()->duration(), _dcp_content->active_video_frame_rate ())) {
392                 t -= ContentTime::from_frames ((*_reel)->main_picture()->duration(), _dcp_content->active_video_frame_rate ());
393                 next_reel ();
394         }
395
396         _next = t;
397 }
398
399 void
400 DCPDecoder::set_decode_referenced (bool r)
401 {
402         _decode_referenced = r;
403
404         if (video) {
405                 video->set_ignore (_dcp_content->reference_video() && !_decode_referenced);
406         }
407         if (audio) {
408                 audio->set_ignore (_dcp_content->reference_audio() && !_decode_referenced);
409         }
410 }
411
412 void
413 DCPDecoder::set_forced_reduction (optional<int> reduction)
414 {
415         _forced_reduction = reduction;
416 }