Bv2.1 7.2.2: Check that subtitle languages are the same for all reels.
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2020 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34 #include "verify.h"
35 #include "dcp.h"
36 #include "cpl.h"
37 #include "reel.h"
38 #include "reel_closed_caption_asset.h"
39 #include "reel_picture_asset.h"
40 #include "reel_sound_asset.h"
41 #include "reel_subtitle_asset.h"
42 #include "interop_subtitle_asset.h"
43 #include "mono_picture_asset.h"
44 #include "mono_picture_frame.h"
45 #include "stereo_picture_asset.h"
46 #include "stereo_picture_frame.h"
47 #include "exceptions.h"
48 #include "compose.hpp"
49 #include "raw_convert.h"
50 #include "smpte_subtitle_asset.h"
51 #include <xercesc/util/PlatformUtils.hpp>
52 #include <xercesc/parsers/XercesDOMParser.hpp>
53 #include <xercesc/parsers/AbstractDOMParser.hpp>
54 #include <xercesc/sax/HandlerBase.hpp>
55 #include <xercesc/dom/DOMImplementation.hpp>
56 #include <xercesc/dom/DOMImplementationLS.hpp>
57 #include <xercesc/dom/DOMImplementationRegistry.hpp>
58 #include <xercesc/dom/DOMLSParser.hpp>
59 #include <xercesc/dom/DOMException.hpp>
60 #include <xercesc/dom/DOMDocument.hpp>
61 #include <xercesc/dom/DOMNodeList.hpp>
62 #include <xercesc/dom/DOMError.hpp>
63 #include <xercesc/dom/DOMLocator.hpp>
64 #include <xercesc/dom/DOMNamedNodeMap.hpp>
65 #include <xercesc/dom/DOMAttr.hpp>
66 #include <xercesc/dom/DOMErrorHandler.hpp>
67 #include <xercesc/framework/LocalFileInputSource.hpp>
68 #include <xercesc/framework/MemBufInputSource.hpp>
69 #include <boost/noncopyable.hpp>
70 #include <boost/foreach.hpp>
71 #include <boost/algorithm/string.hpp>
72 #include <map>
73 #include <list>
74 #include <vector>
75 #include <iostream>
76
77 using std::list;
78 using std::vector;
79 using std::string;
80 using std::cout;
81 using std::map;
82 using std::max;
83 using std::shared_ptr;
84 using boost::optional;
85 using boost::function;
86 using std::dynamic_pointer_cast;
87
88 using namespace dcp;
89 using namespace xercesc;
90
91 static
92 string
93 xml_ch_to_string (XMLCh const * a)
94 {
95         char* x = XMLString::transcode(a);
96         string const o(x);
97         XMLString::release(&x);
98         return o;
99 }
100
101 class XMLValidationError
102 {
103 public:
104         XMLValidationError (SAXParseException const & e)
105                 : _message (xml_ch_to_string(e.getMessage()))
106                 , _line (e.getLineNumber())
107                 , _column (e.getColumnNumber())
108                 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
109                 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
110         {
111
112         }
113
114         string message () const {
115                 return _message;
116         }
117
118         uint64_t line () const {
119                 return _line;
120         }
121
122         uint64_t column () const {
123                 return _column;
124         }
125
126         string public_id () const {
127                 return _public_id;
128         }
129
130         string system_id () const {
131                 return _system_id;
132         }
133
134 private:
135         string _message;
136         uint64_t _line;
137         uint64_t _column;
138         string _public_id;
139         string _system_id;
140 };
141
142
143 class DCPErrorHandler : public ErrorHandler
144 {
145 public:
146         void warning(const SAXParseException& e)
147         {
148                 maybe_add (XMLValidationError(e));
149         }
150
151         void error(const SAXParseException& e)
152         {
153                 maybe_add (XMLValidationError(e));
154         }
155
156         void fatalError(const SAXParseException& e)
157         {
158                 maybe_add (XMLValidationError(e));
159         }
160
161         void resetErrors() {
162                 _errors.clear ();
163         }
164
165         list<XMLValidationError> errors () const {
166                 return _errors;
167         }
168
169 private:
170         void maybe_add (XMLValidationError e)
171         {
172                 /* XXX: nasty hack */
173                 if (
174                         e.message().find("schema document") != string::npos &&
175                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
176                         ) {
177                         return;
178                 }
179
180                 _errors.push_back (e);
181         }
182
183         list<XMLValidationError> _errors;
184 };
185
186 class StringToXMLCh : public boost::noncopyable
187 {
188 public:
189         StringToXMLCh (string a)
190         {
191                 _buffer = XMLString::transcode(a.c_str());
192         }
193
194         ~StringToXMLCh ()
195         {
196                 XMLString::release (&_buffer);
197         }
198
199         XMLCh const * get () const {
200                 return _buffer;
201         }
202
203 private:
204         XMLCh* _buffer;
205 };
206
207 class LocalFileResolver : public EntityResolver
208 {
209 public:
210         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
211                 : _xsd_dtd_directory (xsd_dtd_directory)
212         {
213                 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
214                  * found without being here.
215                  */
216                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
217                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
218                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
219                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
220                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
221                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
222                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
223                 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
224                 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
225                 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd");
226                 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
227                 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
228                 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
229         }
230
231         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
232         {
233                 if (!system_id) {
234                         return 0;
235                 }
236                 string system_id_str = xml_ch_to_string (system_id);
237                 boost::filesystem::path p = _xsd_dtd_directory;
238                 if (_files.find(system_id_str) == _files.end()) {
239                         p /= system_id_str;
240                 } else {
241                         p /= _files[system_id_str];
242                 }
243                 StringToXMLCh ch (p.string());
244                 return new LocalFileInputSource(ch.get());
245         }
246
247 private:
248         void add (string uri, string file)
249         {
250                 _files[uri] = file;
251         }
252
253         std::map<string, string> _files;
254         boost::filesystem::path _xsd_dtd_directory;
255 };
256
257
258 static void
259 parse (XercesDOMParser& parser, boost::filesystem::path xml)
260 {
261         parser.parse(xml.string().c_str());
262 }
263
264
265 static void
266 parse (XercesDOMParser& parser, std::string xml)
267 {
268         xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
269         parser.parse(buf);
270 }
271
272
273 template <class T>
274 void
275 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, list<VerificationNote>& notes)
276 {
277         try {
278                 XMLPlatformUtils::Initialize ();
279         } catch (XMLException& e) {
280                 throw MiscError ("Failed to initialise xerces library");
281         }
282
283         DCPErrorHandler error_handler;
284
285         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
286         {
287                 XercesDOMParser parser;
288                 parser.setValidationScheme(XercesDOMParser::Val_Always);
289                 parser.setDoNamespaces(true);
290                 parser.setDoSchema(true);
291
292                 vector<string> schema;
293                 schema.push_back("xml.xsd");
294                 schema.push_back("xmldsig-core-schema.xsd");
295                 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
296                 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
297                 schema.push_back("SMPTE-429-9-2007-AM.xsd");
298                 schema.push_back("Main-Stereo-Picture-CPL.xsd");
299                 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
300                 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
301                 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
302                 schema.push_back("DCSubtitle.v1.mattsson.xsd");
303                 schema.push_back("DCDMSubtitle-2010.xsd");
304                 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
305                 schema.push_back("SMPTE-429-16.xsd");
306                 schema.push_back("Dolby-2012-AD.xsd");
307                 schema.push_back("SMPTE-429-10-2008.xsd");
308                 schema.push_back("xlink.xsd");
309                 schema.push_back("SMPTE-335-2012.xsd");
310                 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
311                 schema.push_back("isdcf-mca.xsd");
312                 schema.push_back("SMPTE-429-12-2008.xsd");
313
314                 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
315                  * Schemas that are not mentioned in this list are not read, and the things
316                  * they describe are not checked.
317                  */
318                 string locations;
319                 BOOST_FOREACH (string i, schema) {
320                         locations += String::compose("%1 %1 ", i, i);
321                 }
322
323                 parser.setExternalSchemaLocation(locations.c_str());
324                 parser.setValidationSchemaFullChecking(true);
325                 parser.setErrorHandler(&error_handler);
326
327                 LocalFileResolver resolver (xsd_dtd_directory);
328                 parser.setEntityResolver(&resolver);
329
330                 try {
331                         parser.resetDocumentPool();
332                         parse(parser, xml);
333                 } catch (XMLException& e) {
334                         throw MiscError(xml_ch_to_string(e.getMessage()));
335                 } catch (DOMException& e) {
336                         throw MiscError(xml_ch_to_string(e.getMessage()));
337                 } catch (...) {
338                         throw MiscError("Unknown exception from xerces");
339                 }
340         }
341
342         XMLPlatformUtils::Terminate ();
343
344         BOOST_FOREACH (XMLValidationError i, error_handler.errors()) {
345                 notes.push_back (
346                         VerificationNote(
347                                 VerificationNote::VERIFY_ERROR,
348                                 VerificationNote::XML_VALIDATION_ERROR,
349                                 i.message(),
350                                 boost::trim_copy(i.public_id() + " " + i.system_id()),
351                                 i.line()
352                                 )
353                         );
354         }
355 }
356
357
358 enum VerifyAssetResult {
359         VERIFY_ASSET_RESULT_GOOD,
360         VERIFY_ASSET_RESULT_CPL_PKL_DIFFER,
361         VERIFY_ASSET_RESULT_BAD
362 };
363
364
365 static VerifyAssetResult
366 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
367 {
368         string const actual_hash = reel_mxf->asset_ref()->hash(progress);
369
370         list<shared_ptr<PKL> > pkls = dcp->pkls();
371         /* We've read this DCP in so it must have at least one PKL */
372         DCP_ASSERT (!pkls.empty());
373
374         shared_ptr<Asset> asset = reel_mxf->asset_ref().asset();
375
376         optional<string> pkl_hash;
377         BOOST_FOREACH (shared_ptr<PKL> i, pkls) {
378                 pkl_hash = i->hash (reel_mxf->asset_ref()->id());
379                 if (pkl_hash) {
380                         break;
381                 }
382         }
383
384         DCP_ASSERT (pkl_hash);
385
386         optional<string> cpl_hash = reel_mxf->hash();
387         if (cpl_hash && *cpl_hash != *pkl_hash) {
388                 return VERIFY_ASSET_RESULT_CPL_PKL_DIFFER;
389         }
390
391         if (actual_hash != *pkl_hash) {
392                 return VERIFY_ASSET_RESULT_BAD;
393         }
394
395         return VERIFY_ASSET_RESULT_GOOD;
396 }
397
398
399 void
400 verify_language_tag (string tag, list<VerificationNote>& notes)
401 {
402         try {
403                 dcp::LanguageTag test (tag);
404         } catch (dcp::LanguageTagError &) {
405                 notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::BAD_LANGUAGE, tag));
406         }
407 }
408
409
410 enum VerifyPictureAssetResult
411 {
412         VERIFY_PICTURE_ASSET_RESULT_GOOD,
413         VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE,
414         VERIFY_PICTURE_ASSET_RESULT_BAD,
415 };
416
417
418 int
419 biggest_frame_size (shared_ptr<const MonoPictureFrame> frame)
420 {
421         return frame->size ();
422 }
423
424 int
425 biggest_frame_size (shared_ptr<const StereoPictureFrame> frame)
426 {
427         return max(frame->left()->size(), frame->right()->size());
428 }
429
430
431 template <class A, class R, class F>
432 optional<VerifyPictureAssetResult>
433 verify_picture_asset_type (shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
434 {
435         shared_ptr<A> asset = dynamic_pointer_cast<A>(reel_mxf->asset_ref().asset());
436         if (!asset) {
437                 return optional<VerifyPictureAssetResult>();
438         }
439
440         int biggest_frame = 0;
441         shared_ptr<R> reader = asset->start_read ();
442         int64_t const duration = asset->intrinsic_duration ();
443         for (int64_t i = 0; i < duration; ++i) {
444                 shared_ptr<const F> frame = reader->get_frame (i);
445                 biggest_frame = max(biggest_frame, biggest_frame_size(frame));
446                 progress (float(i) / duration);
447         }
448
449         static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
450         static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
451         if (biggest_frame > max_frame) {
452                 return VERIFY_PICTURE_ASSET_RESULT_BAD;
453         } else if (biggest_frame > risky_frame) {
454                 return VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE;
455         }
456
457         return VERIFY_PICTURE_ASSET_RESULT_GOOD;
458 }
459
460
461 static VerifyPictureAssetResult
462 verify_picture_asset (shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
463 {
464         optional<VerifyPictureAssetResult> r = verify_picture_asset_type<MonoPictureAsset, MonoPictureAssetReader, MonoPictureFrame>(reel_mxf, progress);
465         if (!r) {
466                 r = verify_picture_asset_type<StereoPictureAsset, StereoPictureAssetReader, StereoPictureFrame>(reel_mxf, progress);
467         }
468
469         DCP_ASSERT (r);
470         return *r;
471 }
472
473
474 static void
475 verify_main_picture_asset (
476         shared_ptr<const DCP> dcp,
477         shared_ptr<const ReelPictureAsset> reel_asset,
478         function<void (string, optional<boost::filesystem::path>)> stage,
479         function<void (float)> progress,
480         list<VerificationNote>& notes
481         )
482 {
483         shared_ptr<const PictureAsset> asset = reel_asset->asset();
484         boost::filesystem::path const file = *asset->file();
485         stage ("Checking picture asset hash", file);
486         VerifyAssetResult const r = verify_asset (dcp, reel_asset, progress);
487         switch (r) {
488                 case VERIFY_ASSET_RESULT_BAD:
489                         notes.push_back (
490                                 VerificationNote(
491                                         VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_HASH_INCORRECT, file
492                                         )
493                                 );
494                         break;
495                 case VERIFY_ASSET_RESULT_CPL_PKL_DIFFER:
496                         notes.push_back (
497                                 VerificationNote(
498                                         VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_PICTURE_HASHES_DIFFER, file
499                                         )
500                                 );
501                         break;
502                 default:
503                         break;
504         }
505         stage ("Checking picture frame sizes", asset->file());
506         VerifyPictureAssetResult const pr = verify_picture_asset (reel_asset, progress);
507         switch (pr) {
508                 case VERIFY_PICTURE_ASSET_RESULT_BAD:
509                         notes.push_back (
510                                 VerificationNote(
511                                         VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_FRAME_TOO_LARGE_IN_BYTES, file
512                                         )
513                                 );
514                         break;
515                 case VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE:
516                         notes.push_back (
517                                 VerificationNote(
518                                         VerificationNote::VERIFY_WARNING, VerificationNote::PICTURE_FRAME_NEARLY_TOO_LARGE_IN_BYTES, file
519                                         )
520                                 );
521                         break;
522                 default:
523                         break;
524         }
525
526         /* Only flat/scope allowed by Bv2.1 */
527         if (
528                 asset->size() != dcp::Size(2048, 858) &&
529                 asset->size() != dcp::Size(1998, 1080) &&
530                 asset->size() != dcp::Size(4096, 1716) &&
531                 asset->size() != dcp::Size(3996, 2160)) {
532                 notes.push_back(
533                         VerificationNote(
534                                 VerificationNote::VERIFY_BV21_ERROR,
535                                 VerificationNote::PICTURE_ASSET_INVALID_SIZE_IN_PIXELS,
536                                 String::compose("%1x%2", asset->size().width, asset->size().height),
537                                 file
538                                 )
539                         );
540         }
541
542         /* Only 24, 25, 48fps allowed for 2K */
543         if (
544                 (asset->size() == dcp::Size(2048, 858) || asset->size() == dcp::Size(1998, 1080)) &&
545                 (asset->edit_rate() != dcp::Fraction(24, 1) && asset->edit_rate() != dcp::Fraction(25, 1) && asset->edit_rate() != dcp::Fraction(48, 1))
546            ) {
547                 notes.push_back(
548                         VerificationNote(
549                                 VerificationNote::VERIFY_BV21_ERROR,
550                                 VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_2K,
551                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
552                                 file
553                                 )
554                         );
555         }
556
557         if (asset->size() == dcp::Size(4096, 1716) || asset->size() == dcp::Size(3996, 2160)) {
558                 /* Only 24fps allowed for 4K */
559                 if (asset->edit_rate() != dcp::Fraction(24, 1)) {
560                         notes.push_back(
561                                 VerificationNote(
562                                         VerificationNote::VERIFY_BV21_ERROR,
563                                         VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_4K,
564                                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
565                                         file
566                                         )
567                                 );
568                 }
569
570                 /* Only 2D allowed for 4K */
571                 if (dynamic_pointer_cast<const StereoPictureAsset>(asset)) {
572                         notes.push_back(
573                                 VerificationNote(
574                                         VerificationNote::VERIFY_BV21_ERROR,
575                                         VerificationNote::PICTURE_ASSET_4K_3D,
576                                         file
577                                         )
578                                 );
579
580                 }
581         }
582
583 }
584
585
586 static void
587 verify_main_sound_asset (
588         shared_ptr<const DCP> dcp,
589         shared_ptr<const ReelSoundAsset> reel_asset,
590         function<void (string, optional<boost::filesystem::path>)> stage,
591         function<void (float)> progress,
592         list<VerificationNote>& notes
593         )
594 {
595         shared_ptr<const dcp::SoundAsset> asset = reel_asset->asset();
596         stage ("Checking sound asset hash", asset->file());
597         VerifyAssetResult const r = verify_asset (dcp, reel_asset, progress);
598         switch (r) {
599                 case VERIFY_ASSET_RESULT_BAD:
600                         notes.push_back (
601                                 VerificationNote(
602                                         VerificationNote::VERIFY_ERROR, VerificationNote::SOUND_HASH_INCORRECT, *asset->file()
603                                         )
604                                 );
605                         break;
606                 case VERIFY_ASSET_RESULT_CPL_PKL_DIFFER:
607                         notes.push_back (
608                                 VerificationNote(
609                                         VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_SOUND_HASHES_DIFFER, *asset->file()
610                                         )
611                                 );
612                         break;
613                 default:
614                         break;
615         }
616
617         stage ("Checking sound asset metadata", asset->file());
618
619         verify_language_tag (asset->language(), notes);
620 }
621
622
623 static void
624 verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, list<VerificationNote>& notes)
625 {
626         /* XXX: is Language compulsory? */
627         if (reel_asset->language()) {
628                 verify_language_tag (*reel_asset->language(), notes);
629         }
630 }
631
632
633 static void
634 verify_closed_caption_reel (shared_ptr<const ReelClosedCaptionAsset> reel_asset, list<VerificationNote>& notes)
635 {
636         /* XXX: is Language compulsory? */
637         if (reel_asset->language()) {
638                 verify_language_tag (*reel_asset->language(), notes);
639         }
640 }
641
642
643 struct State
644 {
645         boost::optional<string> subtitle_language;
646 };
647
648
649 static void
650 verify_subtitle_asset (
651         shared_ptr<const SubtitleAsset> asset,
652         function<void (string, optional<boost::filesystem::path>)> stage,
653         boost::filesystem::path xsd_dtd_directory,
654         list<VerificationNote>& notes,
655         State& state
656         )
657 {
658         stage ("Checking subtitle XML", asset->file());
659         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
660          * gets passed through libdcp which may clean up and therefore hide errors.
661          */
662         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
663
664         shared_ptr<const SMPTESubtitleAsset> smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
665         if (smpte) {
666                 if (smpte->language()) {
667                         string const language = *smpte->language();
668                         verify_language_tag (language, notes);
669                         if (!state.subtitle_language) {
670                                 state.subtitle_language = language;
671                         } else if (state.subtitle_language != language) {
672                                 notes.push_back (
673                                         VerificationNote(
674                                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::SUBTITLE_LANGUAGES_DIFFER, *asset->file()
675                                                 )
676                                         );
677                         }
678                 } else {
679                         notes.push_back (
680                                 VerificationNote(
681                                         VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_SUBTITLE_LANGUAGE, *asset->file()
682                                         )
683                                 );
684                 }
685                 if (boost::filesystem::file_size(*asset->file()) > 115 * 1024 * 1024) {
686                         notes.push_back (
687                                 VerificationNote(
688                                         VerificationNote::VERIFY_BV21_ERROR, VerificationNote::TIMED_TEXT_ASSET_TOO_LARGE_IN_BYTES, *asset->file()
689                                         )
690                                 );
691                 }
692                 /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB"
693                  * but I'm hoping that checking for the total size of all fonts being <= 10MB will do.
694                  */
695                 map<string, ArrayData> fonts = asset->font_data ();
696                 int total_size = 0;
697                 for (map<string, ArrayData>::const_iterator i = fonts.begin(); i != fonts.end(); ++i) {
698                         total_size += i->second.size();
699                 }
700                 if (total_size > 10 * 1024 * 1024) {
701                         notes.push_back (
702                                 VerificationNote(
703                                         VerificationNote::VERIFY_BV21_ERROR, VerificationNote::TIMED_TEXT_FONTS_TOO_LARGE_IN_BYTES, *asset->file()
704                                         )
705                                 );
706                 }
707         }
708 }
709
710
711 static void
712 verify_closed_caption_asset (
713         shared_ptr<const SubtitleAsset> asset,
714         function<void (string, optional<boost::filesystem::path>)> stage,
715         boost::filesystem::path xsd_dtd_directory,
716         list<VerificationNote>& notes,
717         State& state
718         )
719 {
720         verify_subtitle_asset (asset, stage, xsd_dtd_directory, notes, state);
721
722         if (asset->raw_xml().size() > 256 * 1024) {
723                 notes.push_back (
724                         VerificationNote(
725                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::CLOSED_CAPTION_XML_TOO_LARGE_IN_BYTES, *asset->file()
726                                 )
727                         );
728         }
729 }
730
731
732 list<VerificationNote>
733 dcp::verify (
734         vector<boost::filesystem::path> directories,
735         function<void (string, optional<boost::filesystem::path>)> stage,
736         function<void (float)> progress,
737         boost::filesystem::path xsd_dtd_directory
738         )
739 {
740         xsd_dtd_directory = boost::filesystem::canonical (xsd_dtd_directory);
741
742         list<VerificationNote> notes;
743         State state;
744
745         list<shared_ptr<DCP> > dcps;
746         BOOST_FOREACH (boost::filesystem::path i, directories) {
747                 dcps.push_back (shared_ptr<DCP> (new DCP (i)));
748         }
749
750         BOOST_FOREACH (shared_ptr<DCP> dcp, dcps) {
751                 stage ("Checking DCP", dcp->directory());
752                 try {
753                         dcp->read (&notes);
754                 } catch (ReadError& e) {
755                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
756                 } catch (XMLError& e) {
757                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
758                 } catch (MXFFileError& e) {
759                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
760                 } catch (cxml::Error& e) {
761                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
762                 }
763
764                 if (dcp->standard() != dcp::SMPTE) {
765                         notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::NOT_SMPTE));
766                 }
767
768                 BOOST_FOREACH (shared_ptr<CPL> cpl, dcp->cpls()) {
769                         stage ("Checking CPL", cpl->file());
770                         validate_xml (cpl->file().get(), xsd_dtd_directory, notes);
771
772                         BOOST_FOREACH (string const& i, cpl->additional_subtitle_languages()) {
773                                 verify_language_tag (i, notes);
774                         }
775
776                         if (cpl->release_territory()) {
777                                 verify_language_tag (cpl->release_territory().get(), notes);
778                         }
779
780                         /* Check that the CPL's hash corresponds to the PKL */
781                         BOOST_FOREACH (shared_ptr<PKL> i, dcp->pkls()) {
782                                 optional<string> h = i->hash(cpl->id());
783                                 if (h && make_digest(ArrayData(*cpl->file())) != *h) {
784                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::CPL_HASH_INCORRECT));
785                                 }
786                         }
787
788                         BOOST_FOREACH (shared_ptr<Reel> reel, cpl->reels()) {
789                                 stage ("Checking reel", optional<boost::filesystem::path>());
790
791                                 BOOST_FOREACH (shared_ptr<ReelAsset> i, reel->assets()) {
792                                         if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
793                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::DURATION_TOO_SMALL, i->id()));
794                                         }
795                                         if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
796                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INTRINSIC_DURATION_TOO_SMALL, i->id()));
797                                         }
798                                 }
799
800                                 if (reel->main_picture()) {
801                                         /* Check reel stuff */
802                                         Fraction const frame_rate = reel->main_picture()->frame_rate();
803                                         if (frame_rate.denominator != 1 ||
804                                             (frame_rate.numerator != 24 &&
805                                              frame_rate.numerator != 25 &&
806                                              frame_rate.numerator != 30 &&
807                                              frame_rate.numerator != 48 &&
808                                              frame_rate.numerator != 50 &&
809                                              frame_rate.numerator != 60 &&
810                                              frame_rate.numerator != 96)) {
811                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INVALID_PICTURE_FRAME_RATE));
812                                         }
813                                         /* Check asset */
814                                         if (reel->main_picture()->asset_ref().resolved()) {
815                                                 verify_main_picture_asset (dcp, reel->main_picture(), stage, progress, notes);
816                                         }
817                                 }
818
819                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
820                                         verify_main_sound_asset (dcp, reel->main_sound(), stage, progress, notes);
821                                 }
822
823                                 if (reel->main_subtitle()) {
824                                         verify_main_subtitle_reel (reel->main_subtitle(), notes);
825                                         if (reel->main_subtitle()->asset_ref().resolved()) {
826                                                 verify_subtitle_asset (reel->main_subtitle()->asset(), stage, xsd_dtd_directory, notes, state);
827                                         }
828                                 }
829
830                                 BOOST_FOREACH (shared_ptr<dcp::ReelClosedCaptionAsset> i, reel->closed_captions()) {
831                                         verify_closed_caption_reel (i, notes);
832                                         if (i->asset_ref().resolved()) {
833                                                 verify_closed_caption_asset (i->asset(), stage, xsd_dtd_directory, notes, state);
834                                         }
835                                 }
836                         }
837                 }
838
839                 BOOST_FOREACH (shared_ptr<PKL> pkl, dcp->pkls()) {
840                         stage ("Checking PKL", pkl->file());
841                         validate_xml (pkl->file().get(), xsd_dtd_directory, notes);
842                 }
843
844                 if (dcp->asset_map_path()) {
845                         stage ("Checking ASSETMAP", dcp->asset_map_path().get());
846                         validate_xml (dcp->asset_map_path().get(), xsd_dtd_directory, notes);
847                 } else {
848                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::MISSING_ASSETMAP));
849                 }
850         }
851
852         return notes;
853 }
854
855 string
856 dcp::note_to_string (dcp::VerificationNote note)
857 {
858         switch (note.code()) {
859         case dcp::VerificationNote::GENERAL_READ:
860                 return *note.note();
861         case dcp::VerificationNote::CPL_HASH_INCORRECT:
862                 return "The hash of the CPL in the PKL does not agree with the CPL file.";
863         case dcp::VerificationNote::INVALID_PICTURE_FRAME_RATE:
864                 return "The picture in a reel has an invalid frame rate.";
865         case dcp::VerificationNote::PICTURE_HASH_INCORRECT:
866                 return dcp::String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
867         case dcp::VerificationNote::PKL_CPL_PICTURE_HASHES_DIFFER:
868                 return dcp::String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
869         case dcp::VerificationNote::SOUND_HASH_INCORRECT:
870                 return dcp::String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
871         case dcp::VerificationNote::PKL_CPL_SOUND_HASHES_DIFFER:
872                 return dcp::String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
873         case dcp::VerificationNote::EMPTY_ASSET_PATH:
874                 return "The asset map contains an empty asset path.";
875         case dcp::VerificationNote::MISSING_ASSET:
876                 return String::compose("The file for an asset in the asset map cannot be found; missing file is %1.", note.file()->filename());
877         case dcp::VerificationNote::MISMATCHED_STANDARD:
878                 return "The DCP contains both SMPTE and Interop parts.";
879         case dcp::VerificationNote::XML_VALIDATION_ERROR:
880                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
881         case dcp::VerificationNote::MISSING_ASSETMAP:
882                 return "No ASSETMAP or ASSETMAP.xml was found.";
883         case dcp::VerificationNote::INTRINSIC_DURATION_TOO_SMALL:
884                 return String::compose("The intrinsic duration of an asset is less than 1 second long: %1", note.note().get());
885         case dcp::VerificationNote::DURATION_TOO_SMALL:
886                 return String::compose("The duration of an asset is less than 1 second long: %1", note.note().get());
887         case dcp::VerificationNote::PICTURE_FRAME_TOO_LARGE_IN_BYTES:
888                 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
889         case dcp::VerificationNote::PICTURE_FRAME_NEARLY_TOO_LARGE_IN_BYTES:
890                 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
891         case dcp::VerificationNote::EXTERNAL_ASSET:
892                 return String::compose("An asset that this DCP refers to is not included in the DCP.  It may be a VF.  Missing asset is %1.", note.note().get());
893         case dcp::VerificationNote::NOT_SMPTE:
894                 return "This DCP does not use the SMPTE standard, which is required for Bv2.1 compliance.";
895         case dcp::VerificationNote::BAD_LANGUAGE:
896                 return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
897         case dcp::VerificationNote::PICTURE_ASSET_INVALID_SIZE_IN_PIXELS:
898                 return String::compose("A picture asset's size (%1) is not one of those allowed by Bv2.1 (2048x858, 1998x1080, 4096x1716 or 3996x2160)", note.note().get());
899         case dcp::VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_2K:
900                 return String::compose("A picture asset's frame rate (%1) is not one of those allowed for 2K DCPs by Bv2.1 (24, 25 or 48fps)", note.note().get());
901         case dcp::VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_4K:
902                 return String::compose("A picture asset's frame rate (%1) is not 24fps as required for 4K DCPs by Bv2.1", note.note().get());
903         case dcp::VerificationNote::PICTURE_ASSET_4K_3D:
904                 return "3D 4K DCPs are not allowed by Bv2.1";
905         case dcp::VerificationNote::CLOSED_CAPTION_XML_TOO_LARGE_IN_BYTES:
906                 return String::compose("The XML for the closed caption asset %1 is longer than the 256KB maximum required by Bv2.1", note.file()->filename());
907         case dcp::VerificationNote::TIMED_TEXT_ASSET_TOO_LARGE_IN_BYTES:
908                 return String::compose("The total size of the timed text asset %1 is larger than the 115MB maximum required by Bv2.1", note.file()->filename());
909         case dcp::VerificationNote::TIMED_TEXT_FONTS_TOO_LARGE_IN_BYTES:
910                 return String::compose("The total size of the fonts in timed text asset %1 is larger than the 10MB maximum required by Bv2.1", note.file()->filename());
911         case dcp::VerificationNote::MISSING_SUBTITLE_LANGUAGE:
912                 return String::compose("The XML for a SMPTE subtitle asset has no <Language> tag, which is required by Bv2.1", note.file()->filename());
913         case dcp::VerificationNote::SUBTITLE_LANGUAGES_DIFFER:
914                 return String::compose("Some subtitle assets have different <Language> tags than others", note.file()->filename());
915         }
916
917         return "";
918 }