6a9967e60f4d35096c3cd382aa2458f571317d42
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2021 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34 #include "verify.h"
35 #include "dcp.h"
36 #include "cpl.h"
37 #include "reel.h"
38 #include "reel_closed_caption_asset.h"
39 #include "reel_picture_asset.h"
40 #include "reel_sound_asset.h"
41 #include "reel_subtitle_asset.h"
42 #include "interop_subtitle_asset.h"
43 #include "mono_picture_asset.h"
44 #include "mono_picture_frame.h"
45 #include "stereo_picture_asset.h"
46 #include "stereo_picture_frame.h"
47 #include "exceptions.h"
48 #include "compose.hpp"
49 #include "raw_convert.h"
50 #include "smpte_subtitle_asset.h"
51 #include <xercesc/util/PlatformUtils.hpp>
52 #include <xercesc/parsers/XercesDOMParser.hpp>
53 #include <xercesc/parsers/AbstractDOMParser.hpp>
54 #include <xercesc/sax/HandlerBase.hpp>
55 #include <xercesc/dom/DOMImplementation.hpp>
56 #include <xercesc/dom/DOMImplementationLS.hpp>
57 #include <xercesc/dom/DOMImplementationRegistry.hpp>
58 #include <xercesc/dom/DOMLSParser.hpp>
59 #include <xercesc/dom/DOMException.hpp>
60 #include <xercesc/dom/DOMDocument.hpp>
61 #include <xercesc/dom/DOMNodeList.hpp>
62 #include <xercesc/dom/DOMError.hpp>
63 #include <xercesc/dom/DOMLocator.hpp>
64 #include <xercesc/dom/DOMNamedNodeMap.hpp>
65 #include <xercesc/dom/DOMAttr.hpp>
66 #include <xercesc/dom/DOMErrorHandler.hpp>
67 #include <xercesc/framework/LocalFileInputSource.hpp>
68 #include <xercesc/framework/MemBufInputSource.hpp>
69 #include <boost/noncopyable.hpp>
70 #include <boost/algorithm/string.hpp>
71 #include <map>
72 #include <list>
73 #include <vector>
74 #include <iostream>
75
76 using std::list;
77 using std::vector;
78 using std::string;
79 using std::cout;
80 using std::map;
81 using std::max;
82 using std::shared_ptr;
83 using boost::optional;
84 using boost::function;
85 using std::dynamic_pointer_cast;
86
87 using namespace dcp;
88 using namespace xercesc;
89
90 static
91 string
92 xml_ch_to_string (XMLCh const * a)
93 {
94         char* x = XMLString::transcode(a);
95         string const o(x);
96         XMLString::release(&x);
97         return o;
98 }
99
100 class XMLValidationError
101 {
102 public:
103         XMLValidationError (SAXParseException const & e)
104                 : _message (xml_ch_to_string(e.getMessage()))
105                 , _line (e.getLineNumber())
106                 , _column (e.getColumnNumber())
107                 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
108                 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
109         {
110
111         }
112
113         string message () const {
114                 return _message;
115         }
116
117         uint64_t line () const {
118                 return _line;
119         }
120
121         uint64_t column () const {
122                 return _column;
123         }
124
125         string public_id () const {
126                 return _public_id;
127         }
128
129         string system_id () const {
130                 return _system_id;
131         }
132
133 private:
134         string _message;
135         uint64_t _line;
136         uint64_t _column;
137         string _public_id;
138         string _system_id;
139 };
140
141
142 class DCPErrorHandler : public ErrorHandler
143 {
144 public:
145         void warning(const SAXParseException& e)
146         {
147                 maybe_add (XMLValidationError(e));
148         }
149
150         void error(const SAXParseException& e)
151         {
152                 maybe_add (XMLValidationError(e));
153         }
154
155         void fatalError(const SAXParseException& e)
156         {
157                 maybe_add (XMLValidationError(e));
158         }
159
160         void resetErrors() {
161                 _errors.clear ();
162         }
163
164         list<XMLValidationError> errors () const {
165                 return _errors;
166         }
167
168 private:
169         void maybe_add (XMLValidationError e)
170         {
171                 /* XXX: nasty hack */
172                 if (
173                         e.message().find("schema document") != string::npos &&
174                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
175                         ) {
176                         return;
177                 }
178
179                 _errors.push_back (e);
180         }
181
182         list<XMLValidationError> _errors;
183 };
184
185 class StringToXMLCh : public boost::noncopyable
186 {
187 public:
188         StringToXMLCh (string a)
189         {
190                 _buffer = XMLString::transcode(a.c_str());
191         }
192
193         ~StringToXMLCh ()
194         {
195                 XMLString::release (&_buffer);
196         }
197
198         XMLCh const * get () const {
199                 return _buffer;
200         }
201
202 private:
203         XMLCh* _buffer;
204 };
205
206 class LocalFileResolver : public EntityResolver
207 {
208 public:
209         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
210                 : _xsd_dtd_directory (xsd_dtd_directory)
211         {
212                 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
213                  * found without being here.
214                  */
215                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
216                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
217                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
218                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
219                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
220                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
221                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
222                 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
223                 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
224                 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd");
225                 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
226                 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
227                 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
228         }
229
230         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
231         {
232                 if (!system_id) {
233                         return 0;
234                 }
235                 auto system_id_str = xml_ch_to_string (system_id);
236                 auto p = _xsd_dtd_directory;
237                 if (_files.find(system_id_str) == _files.end()) {
238                         p /= system_id_str;
239                 } else {
240                         p /= _files[system_id_str];
241                 }
242                 StringToXMLCh ch (p.string());
243                 return new LocalFileInputSource(ch.get());
244         }
245
246 private:
247         void add (string uri, string file)
248         {
249                 _files[uri] = file;
250         }
251
252         std::map<string, string> _files;
253         boost::filesystem::path _xsd_dtd_directory;
254 };
255
256
257 static void
258 parse (XercesDOMParser& parser, boost::filesystem::path xml)
259 {
260         parser.parse(xml.string().c_str());
261 }
262
263
264 static void
265 parse (XercesDOMParser& parser, std::string xml)
266 {
267         xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
268         parser.parse(buf);
269 }
270
271
272 template <class T>
273 void
274 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, list<VerificationNote>& notes)
275 {
276         try {
277                 XMLPlatformUtils::Initialize ();
278         } catch (XMLException& e) {
279                 throw MiscError ("Failed to initialise xerces library");
280         }
281
282         DCPErrorHandler error_handler;
283
284         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
285         {
286                 XercesDOMParser parser;
287                 parser.setValidationScheme(XercesDOMParser::Val_Always);
288                 parser.setDoNamespaces(true);
289                 parser.setDoSchema(true);
290
291                 vector<string> schema;
292                 schema.push_back("xml.xsd");
293                 schema.push_back("xmldsig-core-schema.xsd");
294                 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
295                 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
296                 schema.push_back("SMPTE-429-9-2007-AM.xsd");
297                 schema.push_back("Main-Stereo-Picture-CPL.xsd");
298                 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
299                 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
300                 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
301                 schema.push_back("DCSubtitle.v1.mattsson.xsd");
302                 schema.push_back("DCDMSubtitle-2010.xsd");
303                 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
304                 schema.push_back("SMPTE-429-16.xsd");
305                 schema.push_back("Dolby-2012-AD.xsd");
306                 schema.push_back("SMPTE-429-10-2008.xsd");
307                 schema.push_back("xlink.xsd");
308                 schema.push_back("SMPTE-335-2012.xsd");
309                 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
310                 schema.push_back("isdcf-mca.xsd");
311                 schema.push_back("SMPTE-429-12-2008.xsd");
312
313                 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
314                  * Schemas that are not mentioned in this list are not read, and the things
315                  * they describe are not checked.
316                  */
317                 string locations;
318                 for (auto i: schema) {
319                         locations += String::compose("%1 %1 ", i, i);
320                 }
321
322                 parser.setExternalSchemaLocation(locations.c_str());
323                 parser.setValidationSchemaFullChecking(true);
324                 parser.setErrorHandler(&error_handler);
325
326                 LocalFileResolver resolver (xsd_dtd_directory);
327                 parser.setEntityResolver(&resolver);
328
329                 try {
330                         parser.resetDocumentPool();
331                         parse(parser, xml);
332                 } catch (XMLException& e) {
333                         throw MiscError(xml_ch_to_string(e.getMessage()));
334                 } catch (DOMException& e) {
335                         throw MiscError(xml_ch_to_string(e.getMessage()));
336                 } catch (...) {
337                         throw MiscError("Unknown exception from xerces");
338                 }
339         }
340
341         XMLPlatformUtils::Terminate ();
342
343         for (auto i: error_handler.errors()) {
344                 notes.push_back (
345                         VerificationNote(
346                                 VerificationNote::VERIFY_ERROR,
347                                 VerificationNote::XML_VALIDATION_ERROR,
348                                 i.message(),
349                                 boost::trim_copy(i.public_id() + " " + i.system_id()),
350                                 i.line()
351                                 )
352                         );
353         }
354 }
355
356
357 enum VerifyAssetResult {
358         VERIFY_ASSET_RESULT_GOOD,
359         VERIFY_ASSET_RESULT_CPL_PKL_DIFFER,
360         VERIFY_ASSET_RESULT_BAD
361 };
362
363
364 static VerifyAssetResult
365 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
366 {
367         auto const actual_hash = reel_mxf->asset_ref()->hash(progress);
368
369         auto pkls = dcp->pkls();
370         /* We've read this DCP in so it must have at least one PKL */
371         DCP_ASSERT (!pkls.empty());
372
373         auto asset = reel_mxf->asset_ref().asset();
374
375         optional<string> pkl_hash;
376         for (auto i: pkls) {
377                 pkl_hash = i->hash (reel_mxf->asset_ref()->id());
378                 if (pkl_hash) {
379                         break;
380                 }
381         }
382
383         DCP_ASSERT (pkl_hash);
384
385         auto cpl_hash = reel_mxf->hash();
386         if (cpl_hash && *cpl_hash != *pkl_hash) {
387                 return VERIFY_ASSET_RESULT_CPL_PKL_DIFFER;
388         }
389
390         if (actual_hash != *pkl_hash) {
391                 return VERIFY_ASSET_RESULT_BAD;
392         }
393
394         return VERIFY_ASSET_RESULT_GOOD;
395 }
396
397
398 void
399 verify_language_tag (string tag, list<VerificationNote>& notes)
400 {
401         try {
402                 dcp::LanguageTag test (tag);
403         } catch (dcp::LanguageTagError &) {
404                 notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::BAD_LANGUAGE, tag));
405         }
406 }
407
408
409 enum VerifyPictureAssetResult
410 {
411         VERIFY_PICTURE_ASSET_RESULT_GOOD,
412         VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE,
413         VERIFY_PICTURE_ASSET_RESULT_BAD,
414 };
415
416
417 int
418 biggest_frame_size (shared_ptr<const MonoPictureFrame> frame)
419 {
420         return frame->size ();
421 }
422
423 int
424 biggest_frame_size (shared_ptr<const StereoPictureFrame> frame)
425 {
426         return max(frame->left()->size(), frame->right()->size());
427 }
428
429
430 template <class A, class R, class F>
431 optional<VerifyPictureAssetResult>
432 verify_picture_asset_type (shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
433 {
434         auto asset = dynamic_pointer_cast<A>(reel_mxf->asset_ref().asset());
435         if (!asset) {
436                 return optional<VerifyPictureAssetResult>();
437         }
438
439         int biggest_frame = 0;
440         auto reader = asset->start_read ();
441         auto const duration = asset->intrinsic_duration ();
442         for (int64_t i = 0; i < duration; ++i) {
443                 shared_ptr<const F> frame = reader->get_frame (i);
444                 biggest_frame = max(biggest_frame, biggest_frame_size(frame));
445                 progress (float(i) / duration);
446         }
447
448         static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
449         static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
450         if (biggest_frame > max_frame) {
451                 return VERIFY_PICTURE_ASSET_RESULT_BAD;
452         } else if (biggest_frame > risky_frame) {
453                 return VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE;
454         }
455
456         return VERIFY_PICTURE_ASSET_RESULT_GOOD;
457 }
458
459
460 static VerifyPictureAssetResult
461 verify_picture_asset (shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
462 {
463         auto r = verify_picture_asset_type<MonoPictureAsset, MonoPictureAssetReader, MonoPictureFrame>(reel_mxf, progress);
464         if (!r) {
465                 r = verify_picture_asset_type<StereoPictureAsset, StereoPictureAssetReader, StereoPictureFrame>(reel_mxf, progress);
466         }
467
468         DCP_ASSERT (r);
469         return *r;
470 }
471
472
473 static void
474 verify_main_picture_asset (
475         shared_ptr<const DCP> dcp,
476         shared_ptr<const ReelPictureAsset> reel_asset,
477         function<void (string, optional<boost::filesystem::path>)> stage,
478         function<void (float)> progress,
479         list<VerificationNote>& notes
480         )
481 {
482         auto asset = reel_asset->asset();
483         auto const file = *asset->file();
484         stage ("Checking picture asset hash", file);
485         auto const r = verify_asset (dcp, reel_asset, progress);
486         switch (r) {
487                 case VERIFY_ASSET_RESULT_BAD:
488                         notes.push_back (
489                                 VerificationNote(
490                                         VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_HASH_INCORRECT, file
491                                         )
492                                 );
493                         break;
494                 case VERIFY_ASSET_RESULT_CPL_PKL_DIFFER:
495                         notes.push_back (
496                                 VerificationNote(
497                                         VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_PICTURE_HASHES_DIFFER, file
498                                         )
499                                 );
500                         break;
501                 default:
502                         break;
503         }
504         stage ("Checking picture frame sizes", asset->file());
505         auto const pr = verify_picture_asset (reel_asset, progress);
506         switch (pr) {
507                 case VERIFY_PICTURE_ASSET_RESULT_BAD:
508                         notes.push_back (
509                                 VerificationNote(
510                                         VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_FRAME_TOO_LARGE_IN_BYTES, file
511                                         )
512                                 );
513                         break;
514                 case VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE:
515                         notes.push_back (
516                                 VerificationNote(
517                                         VerificationNote::VERIFY_WARNING, VerificationNote::PICTURE_FRAME_NEARLY_TOO_LARGE_IN_BYTES, file
518                                         )
519                                 );
520                         break;
521                 default:
522                         break;
523         }
524
525         /* Only flat/scope allowed by Bv2.1 */
526         if (
527                 asset->size() != dcp::Size(2048, 858) &&
528                 asset->size() != dcp::Size(1998, 1080) &&
529                 asset->size() != dcp::Size(4096, 1716) &&
530                 asset->size() != dcp::Size(3996, 2160)) {
531                 notes.push_back(
532                         VerificationNote(
533                                 VerificationNote::VERIFY_BV21_ERROR,
534                                 VerificationNote::PICTURE_ASSET_INVALID_SIZE_IN_PIXELS,
535                                 String::compose("%1x%2", asset->size().width, asset->size().height),
536                                 file
537                                 )
538                         );
539         }
540
541         /* Only 24, 25, 48fps allowed for 2K */
542         if (
543                 (asset->size() == dcp::Size(2048, 858) || asset->size() == dcp::Size(1998, 1080)) &&
544                 (asset->edit_rate() != dcp::Fraction(24, 1) && asset->edit_rate() != dcp::Fraction(25, 1) && asset->edit_rate() != dcp::Fraction(48, 1))
545            ) {
546                 notes.push_back(
547                         VerificationNote(
548                                 VerificationNote::VERIFY_BV21_ERROR,
549                                 VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_2K,
550                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
551                                 file
552                                 )
553                         );
554         }
555
556         if (asset->size() == dcp::Size(4096, 1716) || asset->size() == dcp::Size(3996, 2160)) {
557                 /* Only 24fps allowed for 4K */
558                 if (asset->edit_rate() != dcp::Fraction(24, 1)) {
559                         notes.push_back(
560                                 VerificationNote(
561                                         VerificationNote::VERIFY_BV21_ERROR,
562                                         VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_4K,
563                                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
564                                         file
565                                         )
566                                 );
567                 }
568
569                 /* Only 2D allowed for 4K */
570                 if (dynamic_pointer_cast<const StereoPictureAsset>(asset)) {
571                         notes.push_back(
572                                 VerificationNote(
573                                         VerificationNote::VERIFY_BV21_ERROR,
574                                         VerificationNote::PICTURE_ASSET_4K_3D,
575                                         file
576                                         )
577                                 );
578
579                 }
580         }
581
582 }
583
584
585 static void
586 verify_main_sound_asset (
587         shared_ptr<const DCP> dcp,
588         shared_ptr<const ReelSoundAsset> reel_asset,
589         function<void (string, optional<boost::filesystem::path>)> stage,
590         function<void (float)> progress,
591         list<VerificationNote>& notes
592         )
593 {
594         auto asset = reel_asset->asset();
595         stage ("Checking sound asset hash", asset->file());
596         auto const r = verify_asset (dcp, reel_asset, progress);
597         switch (r) {
598                 case VERIFY_ASSET_RESULT_BAD:
599                         notes.push_back (
600                                 VerificationNote(
601                                         VerificationNote::VERIFY_ERROR, VerificationNote::SOUND_HASH_INCORRECT, *asset->file()
602                                         )
603                                 );
604                         break;
605                 case VERIFY_ASSET_RESULT_CPL_PKL_DIFFER:
606                         notes.push_back (
607                                 VerificationNote(
608                                         VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_SOUND_HASHES_DIFFER, *asset->file()
609                                         )
610                                 );
611                         break;
612                 default:
613                         break;
614         }
615
616         stage ("Checking sound asset metadata", asset->file());
617
618         verify_language_tag (asset->language(), notes);
619 }
620
621
622 static void
623 verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, list<VerificationNote>& notes)
624 {
625         /* XXX: is Language compulsory? */
626         if (reel_asset->language()) {
627                 verify_language_tag (*reel_asset->language(), notes);
628         }
629 }
630
631
632 static void
633 verify_closed_caption_reel (shared_ptr<const ReelClosedCaptionAsset> reel_asset, list<VerificationNote>& notes)
634 {
635         /* XXX: is Language compulsory? */
636         if (reel_asset->language()) {
637                 verify_language_tag (*reel_asset->language(), notes);
638         }
639 }
640
641
642 struct State
643 {
644         boost::optional<string> subtitle_language;
645 };
646
647
648 static void
649 verify_subtitle_asset (
650         shared_ptr<const SubtitleAsset> asset,
651         function<void (string, optional<boost::filesystem::path>)> stage,
652         boost::filesystem::path xsd_dtd_directory,
653         list<VerificationNote>& notes,
654         State& state,
655         bool first_reel
656         )
657 {
658         stage ("Checking subtitle XML", asset->file());
659         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
660          * gets passed through libdcp which may clean up and therefore hide errors.
661          */
662         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
663
664         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
665         if (smpte) {
666                 if (smpte->language()) {
667                         auto const language = *smpte->language();
668                         verify_language_tag (language, notes);
669                         if (!state.subtitle_language) {
670                                 state.subtitle_language = language;
671                         } else if (state.subtitle_language != language) {
672                                 notes.push_back (
673                                         VerificationNote(
674                                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::SUBTITLE_LANGUAGES_DIFFER, *asset->file()
675                                                 )
676                                         );
677                         }
678                 } else {
679                         notes.push_back (
680                                 VerificationNote(
681                                         VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_SUBTITLE_LANGUAGE, *asset->file()
682                                         )
683                                 );
684                 }
685                 if (boost::filesystem::file_size(*asset->file()) > 115 * 1024 * 1024) {
686                         notes.push_back (
687                                 VerificationNote(
688                                         VerificationNote::VERIFY_BV21_ERROR, VerificationNote::TIMED_TEXT_ASSET_TOO_LARGE_IN_BYTES, *asset->file()
689                                         )
690                                 );
691                 }
692                 /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB"
693                  * but I'm hoping that checking for the total size of all fonts being <= 10MB will do.
694                  */
695                 auto fonts = asset->font_data ();
696                 int total_size = 0;
697                 for (auto i: fonts) {
698                         total_size += i.second.size();
699                 }
700                 if (total_size > 10 * 1024 * 1024) {
701                         notes.push_back (
702                                 VerificationNote(
703                                         VerificationNote::VERIFY_BV21_ERROR, VerificationNote::TIMED_TEXT_FONTS_TOO_LARGE_IN_BYTES, *asset->file()
704                                         )
705                                 );
706                 }
707
708                 if (!smpte->start_time()) {
709                         notes.push_back (
710                                 VerificationNote(
711                                         VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_SUBTITLE_START_TIME, *asset->file())
712                                 );
713                 } else if (smpte->start_time() != dcp::Time()) {
714                         notes.push_back (
715                                 VerificationNote(
716                                         VerificationNote::VERIFY_BV21_ERROR, VerificationNote::SUBTITLE_START_TIME_NON_ZERO, *asset->file())
717                                 );
718                 }
719
720                 if (first_reel) {
721                         auto subs = smpte->subtitles();
722                         subs.sort ([](shared_ptr<Subtitle> a, shared_ptr<Subtitle> b) {
723                                 return a->in() < b->in();
724                         });
725                         if (!subs.empty() && subs.front()->in() < dcp::Time(0, 0, 4, 0, 24)) {
726                                 notes.push_back(
727                                         VerificationNote(
728                                                 VerificationNote::VERIFY_WARNING, VerificationNote::FIRST_TEXT_TOO_EARLY
729                                                 )
730                                         );
731                         }
732                 }
733         }
734 }
735
736
737 static void
738 verify_closed_caption_asset (
739         shared_ptr<const SubtitleAsset> asset,
740         function<void (string, optional<boost::filesystem::path>)> stage,
741         boost::filesystem::path xsd_dtd_directory,
742         list<VerificationNote>& notes,
743         State& state,
744         bool first_reel
745         )
746 {
747         verify_subtitle_asset (asset, stage, xsd_dtd_directory, notes, state, first_reel);
748
749         if (asset->raw_xml().size() > 256 * 1024) {
750                 notes.push_back (
751                         VerificationNote(
752                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::CLOSED_CAPTION_XML_TOO_LARGE_IN_BYTES, *asset->file()
753                                 )
754                         );
755         }
756 }
757
758
759 list<VerificationNote>
760 dcp::verify (
761         vector<boost::filesystem::path> directories,
762         function<void (string, optional<boost::filesystem::path>)> stage,
763         function<void (float)> progress,
764         boost::filesystem::path xsd_dtd_directory
765         )
766 {
767         xsd_dtd_directory = boost::filesystem::canonical (xsd_dtd_directory);
768
769         list<VerificationNote> notes;
770         State state;
771
772         list<shared_ptr<DCP>> dcps;
773         for (auto i: directories) {
774                 dcps.push_back (shared_ptr<DCP> (new DCP (i)));
775         }
776
777         for (auto dcp: dcps) {
778                 stage ("Checking DCP", dcp->directory());
779                 try {
780                         dcp->read (&notes);
781                 } catch (ReadError& e) {
782                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
783                 } catch (XMLError& e) {
784                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
785                 } catch (MXFFileError& e) {
786                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
787                 } catch (cxml::Error& e) {
788                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
789                 }
790
791                 if (dcp->standard() != dcp::SMPTE) {
792                         notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::NOT_SMPTE));
793                 }
794
795                 for (auto cpl: dcp->cpls()) {
796                         stage ("Checking CPL", cpl->file());
797                         validate_xml (cpl->file().get(), xsd_dtd_directory, notes);
798
799                         for (auto const& i: cpl->additional_subtitle_languages()) {
800                                 verify_language_tag (i, notes);
801                         }
802
803                         if (cpl->release_territory()) {
804                                 verify_language_tag (cpl->release_territory().get(), notes);
805                         }
806
807                         /* Check that the CPL's hash corresponds to the PKL */
808                         for (auto i: dcp->pkls()) {
809                                 optional<string> h = i->hash(cpl->id());
810                                 if (h && make_digest(ArrayData(*cpl->file())) != *h) {
811                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::CPL_HASH_INCORRECT));
812                                 }
813                         }
814
815                         bool first_reel = true;
816                         for (auto reel: cpl->reels()) {
817                                 stage ("Checking reel", optional<boost::filesystem::path>());
818
819                                 for (auto i: reel->assets()) {
820                                         if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
821                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::DURATION_TOO_SMALL, i->id()));
822                                         }
823                                         if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
824                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INTRINSIC_DURATION_TOO_SMALL, i->id()));
825                                         }
826                                 }
827
828                                 if (reel->main_picture()) {
829                                         /* Check reel stuff */
830                                         auto const frame_rate = reel->main_picture()->frame_rate();
831                                         if (frame_rate.denominator != 1 ||
832                                             (frame_rate.numerator != 24 &&
833                                              frame_rate.numerator != 25 &&
834                                              frame_rate.numerator != 30 &&
835                                              frame_rate.numerator != 48 &&
836                                              frame_rate.numerator != 50 &&
837                                              frame_rate.numerator != 60 &&
838                                              frame_rate.numerator != 96)) {
839                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INVALID_PICTURE_FRAME_RATE));
840                                         }
841                                         /* Check asset */
842                                         if (reel->main_picture()->asset_ref().resolved()) {
843                                                 verify_main_picture_asset (dcp, reel->main_picture(), stage, progress, notes);
844                                         }
845                                 }
846
847                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
848                                         verify_main_sound_asset (dcp, reel->main_sound(), stage, progress, notes);
849                                 }
850
851                                 if (reel->main_subtitle()) {
852                                         verify_main_subtitle_reel (reel->main_subtitle(), notes);
853                                         if (reel->main_subtitle()->asset_ref().resolved()) {
854                                                 verify_subtitle_asset (reel->main_subtitle()->asset(), stage, xsd_dtd_directory, notes, state, first_reel);
855                                         }
856                                 }
857
858                                 for (auto i: reel->closed_captions()) {
859                                         verify_closed_caption_reel (i, notes);
860                                         if (i->asset_ref().resolved()) {
861                                                 verify_closed_caption_asset (i->asset(), stage, xsd_dtd_directory, notes, state, first_reel);
862                                         }
863                                 }
864
865                                 first_reel = false;
866                         }
867                 }
868
869                 for (auto pkl: dcp->pkls()) {
870                         stage ("Checking PKL", pkl->file());
871                         validate_xml (pkl->file().get(), xsd_dtd_directory, notes);
872                 }
873
874                 if (dcp->asset_map_path()) {
875                         stage ("Checking ASSETMAP", dcp->asset_map_path().get());
876                         validate_xml (dcp->asset_map_path().get(), xsd_dtd_directory, notes);
877                 } else {
878                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::MISSING_ASSETMAP));
879                 }
880         }
881
882         return notes;
883 }
884
885 string
886 dcp::note_to_string (dcp::VerificationNote note)
887 {
888         switch (note.code()) {
889         case dcp::VerificationNote::GENERAL_READ:
890                 return *note.note();
891         case dcp::VerificationNote::CPL_HASH_INCORRECT:
892                 return "The hash of the CPL in the PKL does not agree with the CPL file.";
893         case dcp::VerificationNote::INVALID_PICTURE_FRAME_RATE:
894                 return "The picture in a reel has an invalid frame rate.";
895         case dcp::VerificationNote::PICTURE_HASH_INCORRECT:
896                 return dcp::String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
897         case dcp::VerificationNote::PKL_CPL_PICTURE_HASHES_DIFFER:
898                 return dcp::String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
899         case dcp::VerificationNote::SOUND_HASH_INCORRECT:
900                 return dcp::String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
901         case dcp::VerificationNote::PKL_CPL_SOUND_HASHES_DIFFER:
902                 return dcp::String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
903         case dcp::VerificationNote::EMPTY_ASSET_PATH:
904                 return "The asset map contains an empty asset path.";
905         case dcp::VerificationNote::MISSING_ASSET:
906                 return String::compose("The file for an asset in the asset map cannot be found; missing file is %1.", note.file()->filename());
907         case dcp::VerificationNote::MISMATCHED_STANDARD:
908                 return "The DCP contains both SMPTE and Interop parts.";
909         case dcp::VerificationNote::XML_VALIDATION_ERROR:
910                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
911         case dcp::VerificationNote::MISSING_ASSETMAP:
912                 return "No ASSETMAP or ASSETMAP.xml was found.";
913         case dcp::VerificationNote::INTRINSIC_DURATION_TOO_SMALL:
914                 return String::compose("The intrinsic duration of an asset is less than 1 second long: %1", note.note().get());
915         case dcp::VerificationNote::DURATION_TOO_SMALL:
916                 return String::compose("The duration of an asset is less than 1 second long: %1", note.note().get());
917         case dcp::VerificationNote::PICTURE_FRAME_TOO_LARGE_IN_BYTES:
918                 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
919         case dcp::VerificationNote::PICTURE_FRAME_NEARLY_TOO_LARGE_IN_BYTES:
920                 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
921         case dcp::VerificationNote::EXTERNAL_ASSET:
922                 return String::compose("An asset that this DCP refers to is not included in the DCP.  It may be a VF.  Missing asset is %1.", note.note().get());
923         case dcp::VerificationNote::NOT_SMPTE:
924                 return "This DCP does not use the SMPTE standard, which is required for Bv2.1 compliance.";
925         case dcp::VerificationNote::BAD_LANGUAGE:
926                 return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
927         case dcp::VerificationNote::PICTURE_ASSET_INVALID_SIZE_IN_PIXELS:
928                 return String::compose("A picture asset's size (%1) is not one of those allowed by Bv2.1 (2048x858, 1998x1080, 4096x1716 or 3996x2160)", note.note().get());
929         case dcp::VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_2K:
930                 return String::compose("A picture asset's frame rate (%1) is not one of those allowed for 2K DCPs by Bv2.1 (24, 25 or 48fps)", note.note().get());
931         case dcp::VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_4K:
932                 return String::compose("A picture asset's frame rate (%1) is not 24fps as required for 4K DCPs by Bv2.1", note.note().get());
933         case dcp::VerificationNote::PICTURE_ASSET_4K_3D:
934                 return "3D 4K DCPs are not allowed by Bv2.1";
935         case dcp::VerificationNote::CLOSED_CAPTION_XML_TOO_LARGE_IN_BYTES:
936                 return String::compose("The XML for the closed caption asset %1 is longer than the 256KB maximum required by Bv2.1", note.file()->filename());
937         case dcp::VerificationNote::TIMED_TEXT_ASSET_TOO_LARGE_IN_BYTES:
938                 return String::compose("The total size of the timed text asset %1 is larger than the 115MB maximum required by Bv2.1", note.file()->filename());
939         case dcp::VerificationNote::TIMED_TEXT_FONTS_TOO_LARGE_IN_BYTES:
940                 return String::compose("The total size of the fonts in timed text asset %1 is larger than the 10MB maximum required by Bv2.1", note.file()->filename());
941         case dcp::VerificationNote::MISSING_SUBTITLE_LANGUAGE:
942                 return String::compose("The XML for a SMPTE subtitle asset has no <Language> tag, which is required by Bv2.1", note.file()->filename());
943         case dcp::VerificationNote::SUBTITLE_LANGUAGES_DIFFER:
944                 return String::compose("Some subtitle assets have different <Language> tags than others", note.file()->filename());
945         case dcp::VerificationNote::MISSING_SUBTITLE_START_TIME:
946                 return String::compose("The XML for a SMPTE subtitle asset has no <StartTime> tag, which is required by Bv2.1", note.file()->filename());
947         case dcp::VerificationNote::SUBTITLE_START_TIME_NON_ZERO:
948                 return String::compose("The XML for a SMPTE subtitle asset has a non-zero <StartTime> tag, which is disallowed by Bv2.1", note.file()->filename());
949         case dcp::VerificationNote::FIRST_TEXT_TOO_EARLY:
950                 return "The first subtitle or closed caption is less than 4 seconds from the start of the DCP.";
951         }
952
953         return "";
954 }