Remove some unnecessary std:: qualifiers.
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2021 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34 #include "verify.h"
35 #include "dcp.h"
36 #include "cpl.h"
37 #include "reel.h"
38 #include "reel_closed_caption_asset.h"
39 #include "reel_picture_asset.h"
40 #include "reel_sound_asset.h"
41 #include "reel_subtitle_asset.h"
42 #include "interop_subtitle_asset.h"
43 #include "mono_picture_asset.h"
44 #include "mono_picture_frame.h"
45 #include "stereo_picture_asset.h"
46 #include "stereo_picture_frame.h"
47 #include "exceptions.h"
48 #include "compose.hpp"
49 #include "raw_convert.h"
50 #include "smpte_subtitle_asset.h"
51 #include <xercesc/util/PlatformUtils.hpp>
52 #include <xercesc/parsers/XercesDOMParser.hpp>
53 #include <xercesc/parsers/AbstractDOMParser.hpp>
54 #include <xercesc/sax/HandlerBase.hpp>
55 #include <xercesc/dom/DOMImplementation.hpp>
56 #include <xercesc/dom/DOMImplementationLS.hpp>
57 #include <xercesc/dom/DOMImplementationRegistry.hpp>
58 #include <xercesc/dom/DOMLSParser.hpp>
59 #include <xercesc/dom/DOMException.hpp>
60 #include <xercesc/dom/DOMDocument.hpp>
61 #include <xercesc/dom/DOMNodeList.hpp>
62 #include <xercesc/dom/DOMError.hpp>
63 #include <xercesc/dom/DOMLocator.hpp>
64 #include <xercesc/dom/DOMNamedNodeMap.hpp>
65 #include <xercesc/dom/DOMAttr.hpp>
66 #include <xercesc/dom/DOMErrorHandler.hpp>
67 #include <xercesc/framework/LocalFileInputSource.hpp>
68 #include <xercesc/framework/MemBufInputSource.hpp>
69 #include <boost/noncopyable.hpp>
70 #include <boost/algorithm/string.hpp>
71 #include <map>
72 #include <vector>
73 #include <iostream>
74
75 using std::list;
76 using std::vector;
77 using std::string;
78 using std::cout;
79 using std::map;
80 using std::max;
81 using std::shared_ptr;
82 using std::make_shared;
83 using boost::optional;
84 using boost::function;
85 using std::dynamic_pointer_cast;
86
87 using namespace dcp;
88 using namespace xercesc;
89
90 static
91 string
92 xml_ch_to_string (XMLCh const * a)
93 {
94         char* x = XMLString::transcode(a);
95         string const o(x);
96         XMLString::release(&x);
97         return o;
98 }
99
100 class XMLValidationError
101 {
102 public:
103         XMLValidationError (SAXParseException const & e)
104                 : _message (xml_ch_to_string(e.getMessage()))
105                 , _line (e.getLineNumber())
106                 , _column (e.getColumnNumber())
107                 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
108                 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
109         {
110
111         }
112
113         string message () const {
114                 return _message;
115         }
116
117         uint64_t line () const {
118                 return _line;
119         }
120
121         uint64_t column () const {
122                 return _column;
123         }
124
125         string public_id () const {
126                 return _public_id;
127         }
128
129         string system_id () const {
130                 return _system_id;
131         }
132
133 private:
134         string _message;
135         uint64_t _line;
136         uint64_t _column;
137         string _public_id;
138         string _system_id;
139 };
140
141
142 class DCPErrorHandler : public ErrorHandler
143 {
144 public:
145         void warning(const SAXParseException& e)
146         {
147                 maybe_add (XMLValidationError(e));
148         }
149
150         void error(const SAXParseException& e)
151         {
152                 maybe_add (XMLValidationError(e));
153         }
154
155         void fatalError(const SAXParseException& e)
156         {
157                 maybe_add (XMLValidationError(e));
158         }
159
160         void resetErrors() {
161                 _errors.clear ();
162         }
163
164         list<XMLValidationError> errors () const {
165                 return _errors;
166         }
167
168 private:
169         void maybe_add (XMLValidationError e)
170         {
171                 /* XXX: nasty hack */
172                 if (
173                         e.message().find("schema document") != string::npos &&
174                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
175                         ) {
176                         return;
177                 }
178
179                 _errors.push_back (e);
180         }
181
182         list<XMLValidationError> _errors;
183 };
184
185 class StringToXMLCh : public boost::noncopyable
186 {
187 public:
188         StringToXMLCh (string a)
189         {
190                 _buffer = XMLString::transcode(a.c_str());
191         }
192
193         ~StringToXMLCh ()
194         {
195                 XMLString::release (&_buffer);
196         }
197
198         XMLCh const * get () const {
199                 return _buffer;
200         }
201
202 private:
203         XMLCh* _buffer;
204 };
205
206 class LocalFileResolver : public EntityResolver
207 {
208 public:
209         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
210                 : _xsd_dtd_directory (xsd_dtd_directory)
211         {
212                 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
213                  * found without being here.
214                  */
215                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
216                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
217                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
218                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
219                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
220                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
221                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
222                 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
223                 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
224                 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd");
225                 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
226                 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
227                 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
228         }
229
230         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
231         {
232                 if (!system_id) {
233                         return 0;
234                 }
235                 auto system_id_str = xml_ch_to_string (system_id);
236                 auto p = _xsd_dtd_directory;
237                 if (_files.find(system_id_str) == _files.end()) {
238                         p /= system_id_str;
239                 } else {
240                         p /= _files[system_id_str];
241                 }
242                 StringToXMLCh ch (p.string());
243                 return new LocalFileInputSource(ch.get());
244         }
245
246 private:
247         void add (string uri, string file)
248         {
249                 _files[uri] = file;
250         }
251
252         std::map<string, string> _files;
253         boost::filesystem::path _xsd_dtd_directory;
254 };
255
256
257 static void
258 parse (XercesDOMParser& parser, boost::filesystem::path xml)
259 {
260         parser.parse(xml.string().c_str());
261 }
262
263
264 static void
265 parse (XercesDOMParser& parser, string xml)
266 {
267         xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
268         parser.parse(buf);
269 }
270
271
272 template <class T>
273 void
274 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, vector<VerificationNote>& notes)
275 {
276         try {
277                 XMLPlatformUtils::Initialize ();
278         } catch (XMLException& e) {
279                 throw MiscError ("Failed to initialise xerces library");
280         }
281
282         DCPErrorHandler error_handler;
283
284         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
285         {
286                 XercesDOMParser parser;
287                 parser.setValidationScheme(XercesDOMParser::Val_Always);
288                 parser.setDoNamespaces(true);
289                 parser.setDoSchema(true);
290
291                 vector<string> schema;
292                 schema.push_back("xml.xsd");
293                 schema.push_back("xmldsig-core-schema.xsd");
294                 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
295                 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
296                 schema.push_back("SMPTE-429-9-2007-AM.xsd");
297                 schema.push_back("Main-Stereo-Picture-CPL.xsd");
298                 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
299                 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
300                 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
301                 schema.push_back("DCSubtitle.v1.mattsson.xsd");
302                 schema.push_back("DCDMSubtitle-2010.xsd");
303                 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
304                 schema.push_back("SMPTE-429-16.xsd");
305                 schema.push_back("Dolby-2012-AD.xsd");
306                 schema.push_back("SMPTE-429-10-2008.xsd");
307                 schema.push_back("xlink.xsd");
308                 schema.push_back("SMPTE-335-2012.xsd");
309                 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
310                 schema.push_back("isdcf-mca.xsd");
311                 schema.push_back("SMPTE-429-12-2008.xsd");
312
313                 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
314                  * Schemas that are not mentioned in this list are not read, and the things
315                  * they describe are not checked.
316                  */
317                 string locations;
318                 for (auto i: schema) {
319                         locations += String::compose("%1 %1 ", i, i);
320                 }
321
322                 parser.setExternalSchemaLocation(locations.c_str());
323                 parser.setValidationSchemaFullChecking(true);
324                 parser.setErrorHandler(&error_handler);
325
326                 LocalFileResolver resolver (xsd_dtd_directory);
327                 parser.setEntityResolver(&resolver);
328
329                 try {
330                         parser.resetDocumentPool();
331                         parse(parser, xml);
332                 } catch (XMLException& e) {
333                         throw MiscError(xml_ch_to_string(e.getMessage()));
334                 } catch (DOMException& e) {
335                         throw MiscError(xml_ch_to_string(e.getMessage()));
336                 } catch (...) {
337                         throw MiscError("Unknown exception from xerces");
338                 }
339         }
340
341         XMLPlatformUtils::Terminate ();
342
343         for (auto i: error_handler.errors()) {
344                 notes.push_back (
345                         VerificationNote(
346                                 VerificationNote::VERIFY_ERROR,
347                                 VerificationNote::XML_VALIDATION_ERROR,
348                                 i.message(),
349                                 boost::trim_copy(i.public_id() + " " + i.system_id()),
350                                 i.line()
351                                 )
352                         );
353         }
354 }
355
356
357 enum VerifyAssetResult {
358         VERIFY_ASSET_RESULT_GOOD,
359         VERIFY_ASSET_RESULT_CPL_PKL_DIFFER,
360         VERIFY_ASSET_RESULT_BAD
361 };
362
363
364 static VerifyAssetResult
365 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
366 {
367         auto const actual_hash = reel_mxf->asset_ref()->hash(progress);
368
369         auto pkls = dcp->pkls();
370         /* We've read this DCP in so it must have at least one PKL */
371         DCP_ASSERT (!pkls.empty());
372
373         auto asset = reel_mxf->asset_ref().asset();
374
375         optional<string> pkl_hash;
376         for (auto i: pkls) {
377                 pkl_hash = i->hash (reel_mxf->asset_ref()->id());
378                 if (pkl_hash) {
379                         break;
380                 }
381         }
382
383         DCP_ASSERT (pkl_hash);
384
385         auto cpl_hash = reel_mxf->hash();
386         if (cpl_hash && *cpl_hash != *pkl_hash) {
387                 return VERIFY_ASSET_RESULT_CPL_PKL_DIFFER;
388         }
389
390         if (actual_hash != *pkl_hash) {
391                 return VERIFY_ASSET_RESULT_BAD;
392         }
393
394         return VERIFY_ASSET_RESULT_GOOD;
395 }
396
397
398 void
399 verify_language_tag (string tag, vector<VerificationNote>& notes)
400 {
401         try {
402                 dcp::LanguageTag test (tag);
403         } catch (dcp::LanguageTagError &) {
404                 notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::BAD_LANGUAGE, tag));
405         }
406 }
407
408
409 enum VerifyPictureAssetResult
410 {
411         VERIFY_PICTURE_ASSET_RESULT_GOOD,
412         VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE,
413         VERIFY_PICTURE_ASSET_RESULT_BAD,
414 };
415
416
417 int
418 biggest_frame_size (shared_ptr<const MonoPictureFrame> frame)
419 {
420         return frame->size ();
421 }
422
423 int
424 biggest_frame_size (shared_ptr<const StereoPictureFrame> frame)
425 {
426         return max(frame->left()->size(), frame->right()->size());
427 }
428
429
430 template <class A, class R, class F>
431 optional<VerifyPictureAssetResult>
432 verify_picture_asset_type (shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
433 {
434         auto asset = dynamic_pointer_cast<A>(reel_mxf->asset_ref().asset());
435         if (!asset) {
436                 return optional<VerifyPictureAssetResult>();
437         }
438
439         int biggest_frame = 0;
440         auto reader = asset->start_read ();
441         auto const duration = asset->intrinsic_duration ();
442         for (int64_t i = 0; i < duration; ++i) {
443                 shared_ptr<const F> frame = reader->get_frame (i);
444                 biggest_frame = max(biggest_frame, biggest_frame_size(frame));
445                 progress (float(i) / duration);
446         }
447
448         static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
449         static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
450         if (biggest_frame > max_frame) {
451                 return VERIFY_PICTURE_ASSET_RESULT_BAD;
452         } else if (biggest_frame > risky_frame) {
453                 return VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE;
454         }
455
456         return VERIFY_PICTURE_ASSET_RESULT_GOOD;
457 }
458
459
460 static VerifyPictureAssetResult
461 verify_picture_asset (shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
462 {
463         auto r = verify_picture_asset_type<MonoPictureAsset, MonoPictureAssetReader, MonoPictureFrame>(reel_mxf, progress);
464         if (!r) {
465                 r = verify_picture_asset_type<StereoPictureAsset, StereoPictureAssetReader, StereoPictureFrame>(reel_mxf, progress);
466         }
467
468         DCP_ASSERT (r);
469         return *r;
470 }
471
472
473 static void
474 verify_main_picture_asset (
475         shared_ptr<const DCP> dcp,
476         shared_ptr<const ReelPictureAsset> reel_asset,
477         function<void (string, optional<boost::filesystem::path>)> stage,
478         function<void (float)> progress,
479         vector<VerificationNote>& notes
480         )
481 {
482         auto asset = reel_asset->asset();
483         auto const file = *asset->file();
484         stage ("Checking picture asset hash", file);
485         auto const r = verify_asset (dcp, reel_asset, progress);
486         switch (r) {
487                 case VERIFY_ASSET_RESULT_BAD:
488                         notes.push_back (
489                                 VerificationNote(
490                                         VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_HASH_INCORRECT, file
491                                         )
492                                 );
493                         break;
494                 case VERIFY_ASSET_RESULT_CPL_PKL_DIFFER:
495                         notes.push_back (
496                                 VerificationNote(
497                                         VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_PICTURE_HASHES_DIFFER, file
498                                         )
499                                 );
500                         break;
501                 default:
502                         break;
503         }
504         stage ("Checking picture frame sizes", asset->file());
505         auto const pr = verify_picture_asset (reel_asset, progress);
506         switch (pr) {
507                 case VERIFY_PICTURE_ASSET_RESULT_BAD:
508                         notes.push_back (
509                                 VerificationNote(
510                                         VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_FRAME_TOO_LARGE_IN_BYTES, file
511                                         )
512                                 );
513                         break;
514                 case VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE:
515                         notes.push_back (
516                                 VerificationNote(
517                                         VerificationNote::VERIFY_WARNING, VerificationNote::PICTURE_FRAME_NEARLY_TOO_LARGE_IN_BYTES, file
518                                         )
519                                 );
520                         break;
521                 default:
522                         break;
523         }
524
525         /* Only flat/scope allowed by Bv2.1 */
526         if (
527                 asset->size() != dcp::Size(2048, 858) &&
528                 asset->size() != dcp::Size(1998, 1080) &&
529                 asset->size() != dcp::Size(4096, 1716) &&
530                 asset->size() != dcp::Size(3996, 2160)) {
531                 notes.push_back(
532                         VerificationNote(
533                                 VerificationNote::VERIFY_BV21_ERROR,
534                                 VerificationNote::PICTURE_ASSET_INVALID_SIZE_IN_PIXELS,
535                                 String::compose("%1x%2", asset->size().width, asset->size().height),
536                                 file
537                                 )
538                         );
539         }
540
541         /* Only 24, 25, 48fps allowed for 2K */
542         if (
543                 (asset->size() == dcp::Size(2048, 858) || asset->size() == dcp::Size(1998, 1080)) &&
544                 (asset->edit_rate() != dcp::Fraction(24, 1) && asset->edit_rate() != dcp::Fraction(25, 1) && asset->edit_rate() != dcp::Fraction(48, 1))
545            ) {
546                 notes.push_back(
547                         VerificationNote(
548                                 VerificationNote::VERIFY_BV21_ERROR,
549                                 VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_2K,
550                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
551                                 file
552                                 )
553                         );
554         }
555
556         if (asset->size() == dcp::Size(4096, 1716) || asset->size() == dcp::Size(3996, 2160)) {
557                 /* Only 24fps allowed for 4K */
558                 if (asset->edit_rate() != dcp::Fraction(24, 1)) {
559                         notes.push_back(
560                                 VerificationNote(
561                                         VerificationNote::VERIFY_BV21_ERROR,
562                                         VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_4K,
563                                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
564                                         file
565                                         )
566                                 );
567                 }
568
569                 /* Only 2D allowed for 4K */
570                 if (dynamic_pointer_cast<const StereoPictureAsset>(asset)) {
571                         notes.push_back(
572                                 VerificationNote(
573                                         VerificationNote::VERIFY_BV21_ERROR,
574                                         VerificationNote::PICTURE_ASSET_4K_3D,
575                                         file
576                                         )
577                                 );
578
579                 }
580         }
581
582 }
583
584
585 static void
586 verify_main_sound_asset (
587         shared_ptr<const DCP> dcp,
588         shared_ptr<const ReelSoundAsset> reel_asset,
589         function<void (string, optional<boost::filesystem::path>)> stage,
590         function<void (float)> progress,
591         vector<VerificationNote>& notes
592         )
593 {
594         auto asset = reel_asset->asset();
595         stage ("Checking sound asset hash", asset->file());
596         auto const r = verify_asset (dcp, reel_asset, progress);
597         switch (r) {
598                 case VERIFY_ASSET_RESULT_BAD:
599                         notes.push_back (
600                                 VerificationNote(
601                                         VerificationNote::VERIFY_ERROR, VerificationNote::SOUND_HASH_INCORRECT, *asset->file()
602                                         )
603                                 );
604                         break;
605                 case VERIFY_ASSET_RESULT_CPL_PKL_DIFFER:
606                         notes.push_back (
607                                 VerificationNote(
608                                         VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_SOUND_HASHES_DIFFER, *asset->file()
609                                         )
610                                 );
611                         break;
612                 default:
613                         break;
614         }
615
616         stage ("Checking sound asset metadata", asset->file());
617
618         verify_language_tag (asset->language(), notes);
619 }
620
621
622 static void
623 verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, vector<VerificationNote>& notes)
624 {
625         /* XXX: is Language compulsory? */
626         if (reel_asset->language()) {
627                 verify_language_tag (*reel_asset->language(), notes);
628         }
629 }
630
631
632 static void
633 verify_closed_caption_reel (shared_ptr<const ReelClosedCaptionAsset> reel_asset, vector<VerificationNote>& notes)
634 {
635         /* XXX: is Language compulsory? */
636         if (reel_asset->language()) {
637                 verify_language_tag (*reel_asset->language(), notes);
638         }
639 }
640
641
642 struct State
643 {
644         boost::optional<string> subtitle_language;
645 };
646
647
648
649 void
650 verify_smpte_subtitle_asset (
651         shared_ptr<const dcp::SMPTESubtitleAsset> asset,
652         vector<VerificationNote>& notes,
653         State& state
654         )
655 {
656         if (asset->language()) {
657                 auto const language = *asset->language();
658                 verify_language_tag (language, notes);
659                 if (!state.subtitle_language) {
660                         state.subtitle_language = language;
661                 } else if (state.subtitle_language != language) {
662                         notes.push_back (
663                                 VerificationNote(
664                                         VerificationNote::VERIFY_BV21_ERROR, VerificationNote::SUBTITLE_LANGUAGES_DIFFER, *asset->file()
665                                         )
666                                 );
667                 }
668         } else {
669                 notes.push_back (
670                         VerificationNote(
671                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_SUBTITLE_LANGUAGE, *asset->file()
672                                 )
673                         );
674         }
675         if (boost::filesystem::file_size(*asset->file()) > 115 * 1024 * 1024) {
676                 notes.push_back (
677                         VerificationNote(
678                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::TIMED_TEXT_ASSET_TOO_LARGE_IN_BYTES, *asset->file()
679                                 )
680                         );
681         }
682         /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB"
683          * but I'm hoping that checking for the total size of all fonts being <= 10MB will do.
684          */
685         auto fonts = asset->font_data ();
686         int total_size = 0;
687         for (auto i: fonts) {
688                 total_size += i.second.size();
689         }
690         if (total_size > 10 * 1024 * 1024) {
691                 notes.push_back (
692                         VerificationNote(
693                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::TIMED_TEXT_FONTS_TOO_LARGE_IN_BYTES, *asset->file()
694                                 )
695                         );
696         }
697
698         if (!asset->start_time()) {
699                 notes.push_back (
700                         VerificationNote(
701                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_SUBTITLE_START_TIME, *asset->file())
702                         );
703         } else if (asset->start_time() != dcp::Time()) {
704                 notes.push_back (
705                         VerificationNote(
706                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::SUBTITLE_START_TIME_NON_ZERO, *asset->file())
707                         );
708         }
709 }
710
711
712 static void
713 verify_subtitle_asset (
714         shared_ptr<const SubtitleAsset> asset,
715         function<void (string, optional<boost::filesystem::path>)> stage,
716         boost::filesystem::path xsd_dtd_directory,
717         vector<VerificationNote>& notes,
718         State& state
719         )
720 {
721         stage ("Checking subtitle XML", asset->file());
722         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
723          * gets passed through libdcp which may clean up and therefore hide errors.
724          */
725         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
726
727         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
728         if (smpte) {
729                 verify_smpte_subtitle_asset (smpte, notes, state);
730         }
731 }
732
733
734 static void
735 verify_closed_caption_asset (
736         shared_ptr<const SubtitleAsset> asset,
737         function<void (string, optional<boost::filesystem::path>)> stage,
738         boost::filesystem::path xsd_dtd_directory,
739         vector<VerificationNote>& notes,
740         State& state
741         )
742 {
743         verify_subtitle_asset (asset, stage, xsd_dtd_directory, notes, state);
744
745         if (asset->raw_xml().size() > 256 * 1024) {
746                 notes.push_back (
747                         VerificationNote(
748                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::CLOSED_CAPTION_XML_TOO_LARGE_IN_BYTES, *asset->file()
749                                 )
750                         );
751         }
752 }
753
754
755 static
756 void
757 check_text_timing (
758         vector<shared_ptr<dcp::Reel>> reels,
759         optional<int> picture_frame_rate,
760         vector<VerificationNote>& notes,
761         std::function<string (shared_ptr<dcp::Reel>)> xml,
762         std::function<int64_t (shared_ptr<dcp::Reel>)> duration
763         )
764 {
765         /* end of last subtitle (in editable units) */
766         optional<int64_t> last_out;
767         auto too_short = false;
768         auto too_close = false;
769         auto too_early = false;
770         /* current reel start time (in editable units) */
771         int64_t reel_offset = 0;
772
773         std::function<void (cxml::ConstNodePtr, int, int, bool)> parse;
774         parse = [&parse, &last_out, &too_short, &too_close, &too_early, &reel_offset](cxml::ConstNodePtr node, int tcr, int pfr, bool first_reel) {
775                 if (node->name() == "Subtitle") {
776                         dcp::Time in (node->string_attribute("TimeIn"), tcr);
777                         dcp::Time out (node->string_attribute("TimeOut"), tcr);
778                         if (first_reel && in < dcp::Time(0, 0, 4, 0, tcr)) {
779                                 too_early = true;
780                         }
781                         auto length = out - in;
782                         if (length.as_editable_units(pfr) < 15) {
783                                 too_short = true;
784                         }
785                         if (last_out) {
786                                 /* XXX: this feels dubious - is it really what Bv2.1 means? */
787                                 auto distance = reel_offset + in.as_editable_units(pfr) - *last_out;
788                                 if (distance >= 0 && distance < 2) {
789                                         too_close = true;
790                                 }
791                         }
792                         last_out = reel_offset + out.as_editable_units(pfr);
793                 } else {
794                         for (auto i: node->node_children()) {
795                                 parse(i, tcr, pfr, first_reel);
796                         }
797                 }
798         };
799
800         for (auto i = 0U; i < reels.size(); ++i) {
801                 /* We need to look at <Subtitle> instances in the XML being checked, so we can't use the subtitles
802                  * read in by libdcp's parser.
803                  */
804
805                 auto doc = make_shared<cxml::Document>("SubtitleReel");
806                 doc->read_string (xml(reels[i]));
807                 auto const tcr = doc->number_child<int>("TimeCodeRate");
808                 parse (doc, tcr, picture_frame_rate.get_value_or(24), i == 0);
809                 reel_offset += duration(reels[i]);
810         }
811
812         if (too_early) {
813                 notes.push_back(
814                         VerificationNote(
815                                 VerificationNote::VERIFY_WARNING, VerificationNote::FIRST_TEXT_TOO_EARLY
816                                 )
817                         );
818         }
819
820         if (too_short) {
821                 notes.push_back (
822                         VerificationNote(
823                                 VerificationNote::VERIFY_WARNING, VerificationNote::SUBTITLE_TOO_SHORT
824                                 )
825                         );
826         }
827
828         if (too_close) {
829                 notes.push_back (
830                         VerificationNote(
831                                 VerificationNote::VERIFY_WARNING, VerificationNote::SUBTITLE_TOO_CLOSE
832                                 )
833                         );
834         }
835 }
836
837
838 static
839 void
840 check_text_timing (vector<shared_ptr<dcp::Reel>> reels, vector<VerificationNote>& notes)
841 {
842         if (reels.empty()) {
843                 return;
844         }
845
846         optional<int> picture_frame_rate;
847         if (reels[0]->main_picture()) {
848                 picture_frame_rate = reels[0]->main_picture()->frame_rate().numerator;
849         }
850
851         if (reels[0]->main_subtitle()) {
852                 check_text_timing (reels, picture_frame_rate, notes,
853                         [](shared_ptr<dcp::Reel> reel) {
854                                 return reel->main_subtitle()->asset()->raw_xml();
855                         },
856                         [](shared_ptr<dcp::Reel> reel) {
857                                 return reel->main_subtitle()->actual_duration();
858                         }
859                 );
860         }
861
862         for (auto i = 0U; i < reels[0]->closed_captions().size(); ++i) {
863                 check_text_timing (reels, picture_frame_rate, notes,
864                         [i](shared_ptr<dcp::Reel> reel) {
865                                 return reel->closed_captions()[i]->asset()->raw_xml();
866                         },
867                         [i](shared_ptr<dcp::Reel> reel) {
868                                 return reel->closed_captions()[i]->actual_duration();
869                         }
870                 );
871         }
872 }
873
874
875 vector<VerificationNote>
876 dcp::verify (
877         vector<boost::filesystem::path> directories,
878         function<void (string, optional<boost::filesystem::path>)> stage,
879         function<void (float)> progress,
880         boost::filesystem::path xsd_dtd_directory
881         )
882 {
883         xsd_dtd_directory = boost::filesystem::canonical (xsd_dtd_directory);
884
885         vector<VerificationNote> notes;
886         State state;
887
888         vector<shared_ptr<DCP>> dcps;
889         for (auto i: directories) {
890                 dcps.push_back (shared_ptr<DCP> (new DCP (i)));
891         }
892
893         for (auto dcp: dcps) {
894                 stage ("Checking DCP", dcp->directory());
895                 try {
896                         dcp->read (&notes);
897                 } catch (ReadError& e) {
898                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
899                 } catch (XMLError& e) {
900                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
901                 } catch (MXFFileError& e) {
902                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
903                 } catch (cxml::Error& e) {
904                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
905                 }
906
907                 if (dcp->standard() != dcp::SMPTE) {
908                         notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::NOT_SMPTE));
909                 }
910
911                 for (auto cpl: dcp->cpls()) {
912                         stage ("Checking CPL", cpl->file());
913                         validate_xml (cpl->file().get(), xsd_dtd_directory, notes);
914
915                         for (auto const& i: cpl->additional_subtitle_languages()) {
916                                 verify_language_tag (i, notes);
917                         }
918
919                         if (cpl->release_territory()) {
920                                 verify_language_tag (cpl->release_territory().get(), notes);
921                         }
922
923                         /* Check that the CPL's hash corresponds to the PKL */
924                         for (auto i: dcp->pkls()) {
925                                 optional<string> h = i->hash(cpl->id());
926                                 if (h && make_digest(ArrayData(*cpl->file())) != *h) {
927                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::CPL_HASH_INCORRECT));
928                                 }
929                         }
930
931                         for (auto reel: cpl->reels()) {
932                                 stage ("Checking reel", optional<boost::filesystem::path>());
933
934                                 for (auto i: reel->assets()) {
935                                         if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
936                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::DURATION_TOO_SMALL, i->id()));
937                                         }
938                                         if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
939                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INTRINSIC_DURATION_TOO_SMALL, i->id()));
940                                         }
941                                 }
942
943                                 if (reel->main_picture()) {
944                                         /* Check reel stuff */
945                                         auto const frame_rate = reel->main_picture()->frame_rate();
946                                         if (frame_rate.denominator != 1 ||
947                                             (frame_rate.numerator != 24 &&
948                                              frame_rate.numerator != 25 &&
949                                              frame_rate.numerator != 30 &&
950                                              frame_rate.numerator != 48 &&
951                                              frame_rate.numerator != 50 &&
952                                              frame_rate.numerator != 60 &&
953                                              frame_rate.numerator != 96)) {
954                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INVALID_PICTURE_FRAME_RATE));
955                                         }
956                                         /* Check asset */
957                                         if (reel->main_picture()->asset_ref().resolved()) {
958                                                 verify_main_picture_asset (dcp, reel->main_picture(), stage, progress, notes);
959                                         }
960                                 }
961
962                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
963                                         verify_main_sound_asset (dcp, reel->main_sound(), stage, progress, notes);
964                                 }
965
966                                 if (reel->main_subtitle()) {
967                                         verify_main_subtitle_reel (reel->main_subtitle(), notes);
968                                         if (reel->main_subtitle()->asset_ref().resolved()) {
969                                                 verify_subtitle_asset (reel->main_subtitle()->asset(), stage, xsd_dtd_directory, notes, state);
970                                         }
971                                 }
972
973                                 for (auto i: reel->closed_captions()) {
974                                         verify_closed_caption_reel (i, notes);
975                                         if (i->asset_ref().resolved()) {
976                                                 verify_closed_caption_asset (i->asset(), stage, xsd_dtd_directory, notes, state);
977                                         }
978                                 }
979                         }
980
981                         if (dcp->standard() == dcp::SMPTE) {
982                                 check_text_timing (cpl->reels(), notes);
983                         }
984                 }
985
986                 for (auto pkl: dcp->pkls()) {
987                         stage ("Checking PKL", pkl->file());
988                         validate_xml (pkl->file().get(), xsd_dtd_directory, notes);
989                 }
990
991                 if (dcp->asset_map_path()) {
992                         stage ("Checking ASSETMAP", dcp->asset_map_path().get());
993                         validate_xml (dcp->asset_map_path().get(), xsd_dtd_directory, notes);
994                 } else {
995                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::MISSING_ASSETMAP));
996                 }
997         }
998
999         return notes;
1000 }
1001
1002 string
1003 dcp::note_to_string (dcp::VerificationNote note)
1004 {
1005         switch (note.code()) {
1006         case dcp::VerificationNote::GENERAL_READ:
1007                 return *note.note();
1008         case dcp::VerificationNote::CPL_HASH_INCORRECT:
1009                 return "The hash of the CPL in the PKL does not agree with the CPL file.";
1010         case dcp::VerificationNote::INVALID_PICTURE_FRAME_RATE:
1011                 return "The picture in a reel has an invalid frame rate.";
1012         case dcp::VerificationNote::PICTURE_HASH_INCORRECT:
1013                 return dcp::String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
1014         case dcp::VerificationNote::PKL_CPL_PICTURE_HASHES_DIFFER:
1015                 return dcp::String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
1016         case dcp::VerificationNote::SOUND_HASH_INCORRECT:
1017                 return dcp::String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
1018         case dcp::VerificationNote::PKL_CPL_SOUND_HASHES_DIFFER:
1019                 return dcp::String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
1020         case dcp::VerificationNote::EMPTY_ASSET_PATH:
1021                 return "The asset map contains an empty asset path.";
1022         case dcp::VerificationNote::MISSING_ASSET:
1023                 return String::compose("The file for an asset in the asset map cannot be found; missing file is %1.", note.file()->filename());
1024         case dcp::VerificationNote::MISMATCHED_STANDARD:
1025                 return "The DCP contains both SMPTE and Interop parts.";
1026         case dcp::VerificationNote::XML_VALIDATION_ERROR:
1027                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
1028         case dcp::VerificationNote::MISSING_ASSETMAP:
1029                 return "No ASSETMAP or ASSETMAP.xml was found.";
1030         case dcp::VerificationNote::INTRINSIC_DURATION_TOO_SMALL:
1031                 return String::compose("The intrinsic duration of an asset is less than 1 second long: %1", note.note().get());
1032         case dcp::VerificationNote::DURATION_TOO_SMALL:
1033                 return String::compose("The duration of an asset is less than 1 second long: %1", note.note().get());
1034         case dcp::VerificationNote::PICTURE_FRAME_TOO_LARGE_IN_BYTES:
1035                 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
1036         case dcp::VerificationNote::PICTURE_FRAME_NEARLY_TOO_LARGE_IN_BYTES:
1037                 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
1038         case dcp::VerificationNote::EXTERNAL_ASSET:
1039                 return String::compose("An asset that this DCP refers to is not included in the DCP.  It may be a VF.  Missing asset is %1.", note.note().get());
1040         case dcp::VerificationNote::NOT_SMPTE:
1041                 return "This DCP does not use the SMPTE standard, which is required for Bv2.1 compliance.";
1042         case dcp::VerificationNote::BAD_LANGUAGE:
1043                 return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
1044         case dcp::VerificationNote::PICTURE_ASSET_INVALID_SIZE_IN_PIXELS:
1045                 return String::compose("A picture asset's size (%1) is not one of those allowed by Bv2.1 (2048x858, 1998x1080, 4096x1716 or 3996x2160)", note.note().get());
1046         case dcp::VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_2K:
1047                 return String::compose("A picture asset's frame rate (%1) is not one of those allowed for 2K DCPs by Bv2.1 (24, 25 or 48fps)", note.note().get());
1048         case dcp::VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_4K:
1049                 return String::compose("A picture asset's frame rate (%1) is not 24fps as required for 4K DCPs by Bv2.1", note.note().get());
1050         case dcp::VerificationNote::PICTURE_ASSET_4K_3D:
1051                 return "3D 4K DCPs are not allowed by Bv2.1";
1052         case dcp::VerificationNote::CLOSED_CAPTION_XML_TOO_LARGE_IN_BYTES:
1053                 return String::compose("The XML for the closed caption asset %1 is longer than the 256KB maximum required by Bv2.1", note.file()->filename());
1054         case dcp::VerificationNote::TIMED_TEXT_ASSET_TOO_LARGE_IN_BYTES:
1055                 return String::compose("The total size of the timed text asset %1 is larger than the 115MB maximum required by Bv2.1", note.file()->filename());
1056         case dcp::VerificationNote::TIMED_TEXT_FONTS_TOO_LARGE_IN_BYTES:
1057                 return String::compose("The total size of the fonts in timed text asset %1 is larger than the 10MB maximum required by Bv2.1", note.file()->filename());
1058         case dcp::VerificationNote::MISSING_SUBTITLE_LANGUAGE:
1059                 return String::compose("The XML for a SMPTE subtitle asset has no <Language> tag, which is required by Bv2.1", note.file()->filename());
1060         case dcp::VerificationNote::SUBTITLE_LANGUAGES_DIFFER:
1061                 return String::compose("Some subtitle assets have different <Language> tags than others", note.file()->filename());
1062         case dcp::VerificationNote::MISSING_SUBTITLE_START_TIME:
1063                 return String::compose("The XML for a SMPTE subtitle asset has no <StartTime> tag, which is required by Bv2.1", note.file()->filename());
1064         case dcp::VerificationNote::SUBTITLE_START_TIME_NON_ZERO:
1065                 return String::compose("The XML for a SMPTE subtitle asset has a non-zero <StartTime> tag, which is disallowed by Bv2.1", note.file()->filename());
1066         case dcp::VerificationNote::FIRST_TEXT_TOO_EARLY:
1067                 return "The first subtitle or closed caption is less than 4 seconds from the start of the DCP.";
1068         case dcp::VerificationNote::SUBTITLE_TOO_SHORT:
1069                 return "At least one subtitle is less than the minimum of 15 frames suggested by Bv2.1";
1070         case dcp::VerificationNote::SUBTITLE_TOO_CLOSE:
1071                 return "At least one pair of subtitles are separated by less than the the minimum of 2 frames suggested by Bv2.1";
1072         }
1073
1074         return "";
1075 }