d8a4f37f81c8ac081739dd49da51100e5c8aef00
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2021 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34
35 /** @file  src/verify.cc
36  *  @brief dcp::verify() method and associated code
37  */
38
39
40 #include "verify.h"
41 #include "dcp.h"
42 #include "cpl.h"
43 #include "reel.h"
44 #include "reel_closed_caption_asset.h"
45 #include "reel_picture_asset.h"
46 #include "reel_sound_asset.h"
47 #include "reel_subtitle_asset.h"
48 #include "interop_subtitle_asset.h"
49 #include "mono_picture_asset.h"
50 #include "mono_picture_frame.h"
51 #include "stereo_picture_asset.h"
52 #include "stereo_picture_frame.h"
53 #include "exceptions.h"
54 #include "compose.hpp"
55 #include "raw_convert.h"
56 #include "reel_markers_asset.h"
57 #include "smpte_subtitle_asset.h"
58 #include <xercesc/util/PlatformUtils.hpp>
59 #include <xercesc/parsers/XercesDOMParser.hpp>
60 #include <xercesc/parsers/AbstractDOMParser.hpp>
61 #include <xercesc/sax/HandlerBase.hpp>
62 #include <xercesc/dom/DOMImplementation.hpp>
63 #include <xercesc/dom/DOMImplementationLS.hpp>
64 #include <xercesc/dom/DOMImplementationRegistry.hpp>
65 #include <xercesc/dom/DOMLSParser.hpp>
66 #include <xercesc/dom/DOMException.hpp>
67 #include <xercesc/dom/DOMDocument.hpp>
68 #include <xercesc/dom/DOMNodeList.hpp>
69 #include <xercesc/dom/DOMError.hpp>
70 #include <xercesc/dom/DOMLocator.hpp>
71 #include <xercesc/dom/DOMNamedNodeMap.hpp>
72 #include <xercesc/dom/DOMAttr.hpp>
73 #include <xercesc/dom/DOMErrorHandler.hpp>
74 #include <xercesc/framework/LocalFileInputSource.hpp>
75 #include <xercesc/framework/MemBufInputSource.hpp>
76 #include <boost/algorithm/string.hpp>
77 #include <map>
78 #include <vector>
79 #include <iostream>
80
81
82 using std::list;
83 using std::vector;
84 using std::string;
85 using std::cout;
86 using std::map;
87 using std::max;
88 using std::set;
89 using std::shared_ptr;
90 using std::make_shared;
91 using boost::optional;
92 using boost::function;
93 using std::dynamic_pointer_cast;
94
95
96 using namespace dcp;
97 using namespace xercesc;
98
99
100 static
101 string
102 xml_ch_to_string (XMLCh const * a)
103 {
104         char* x = XMLString::transcode(a);
105         string const o(x);
106         XMLString::release(&x);
107         return o;
108 }
109
110
111 class XMLValidationError
112 {
113 public:
114         XMLValidationError (SAXParseException const & e)
115                 : _message (xml_ch_to_string(e.getMessage()))
116                 , _line (e.getLineNumber())
117                 , _column (e.getColumnNumber())
118                 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
119                 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
120         {
121
122         }
123
124         string message () const {
125                 return _message;
126         }
127
128         uint64_t line () const {
129                 return _line;
130         }
131
132         uint64_t column () const {
133                 return _column;
134         }
135
136         string public_id () const {
137                 return _public_id;
138         }
139
140         string system_id () const {
141                 return _system_id;
142         }
143
144 private:
145         string _message;
146         uint64_t _line;
147         uint64_t _column;
148         string _public_id;
149         string _system_id;
150 };
151
152
153 class DCPErrorHandler : public ErrorHandler
154 {
155 public:
156         void warning(const SAXParseException& e)
157         {
158                 maybe_add (XMLValidationError(e));
159         }
160
161         void error(const SAXParseException& e)
162         {
163                 maybe_add (XMLValidationError(e));
164         }
165
166         void fatalError(const SAXParseException& e)
167         {
168                 maybe_add (XMLValidationError(e));
169         }
170
171         void resetErrors() {
172                 _errors.clear ();
173         }
174
175         list<XMLValidationError> errors () const {
176                 return _errors;
177         }
178
179 private:
180         void maybe_add (XMLValidationError e)
181         {
182                 /* XXX: nasty hack */
183                 if (
184                         e.message().find("schema document") != string::npos &&
185                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
186                         ) {
187                         return;
188                 }
189
190                 _errors.push_back (e);
191         }
192
193         list<XMLValidationError> _errors;
194 };
195
196
197 class StringToXMLCh
198 {
199 public:
200         StringToXMLCh (string a)
201         {
202                 _buffer = XMLString::transcode(a.c_str());
203         }
204
205         StringToXMLCh (StringToXMLCh const&) = delete;
206         StringToXMLCh& operator= (StringToXMLCh const&) = delete;
207
208         ~StringToXMLCh ()
209         {
210                 XMLString::release (&_buffer);
211         }
212
213         XMLCh const * get () const {
214                 return _buffer;
215         }
216
217 private:
218         XMLCh* _buffer;
219 };
220
221
222 class LocalFileResolver : public EntityResolver
223 {
224 public:
225         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
226                 : _xsd_dtd_directory (xsd_dtd_directory)
227         {
228                 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
229                  * found without being here.
230                  */
231                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
232                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
233                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
234                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
235                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
236                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
237                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
238                 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
239                 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
240                 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd");
241                 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
242                 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
243                 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
244         }
245
246         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
247         {
248                 if (!system_id) {
249                         return 0;
250                 }
251                 auto system_id_str = xml_ch_to_string (system_id);
252                 auto p = _xsd_dtd_directory;
253                 if (_files.find(system_id_str) == _files.end()) {
254                         p /= system_id_str;
255                 } else {
256                         p /= _files[system_id_str];
257                 }
258                 StringToXMLCh ch (p.string());
259                 return new LocalFileInputSource(ch.get());
260         }
261
262 private:
263         void add (string uri, string file)
264         {
265                 _files[uri] = file;
266         }
267
268         std::map<string, string> _files;
269         boost::filesystem::path _xsd_dtd_directory;
270 };
271
272
273 static void
274 parse (XercesDOMParser& parser, boost::filesystem::path xml)
275 {
276         parser.parse(xml.string().c_str());
277 }
278
279
280 static void
281 parse (XercesDOMParser& parser, string xml)
282 {
283         xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
284         parser.parse(buf);
285 }
286
287
288 template <class T>
289 void
290 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, vector<VerificationNote>& notes)
291 {
292         try {
293                 XMLPlatformUtils::Initialize ();
294         } catch (XMLException& e) {
295                 throw MiscError ("Failed to initialise xerces library");
296         }
297
298         DCPErrorHandler error_handler;
299
300         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
301         {
302                 XercesDOMParser parser;
303                 parser.setValidationScheme(XercesDOMParser::Val_Always);
304                 parser.setDoNamespaces(true);
305                 parser.setDoSchema(true);
306
307                 vector<string> schema;
308                 schema.push_back("xml.xsd");
309                 schema.push_back("xmldsig-core-schema.xsd");
310                 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
311                 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
312                 schema.push_back("SMPTE-429-9-2007-AM.xsd");
313                 schema.push_back("Main-Stereo-Picture-CPL.xsd");
314                 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
315                 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
316                 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
317                 schema.push_back("DCSubtitle.v1.mattsson.xsd");
318                 schema.push_back("DCDMSubtitle-2010.xsd");
319                 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
320                 schema.push_back("SMPTE-429-16.xsd");
321                 schema.push_back("Dolby-2012-AD.xsd");
322                 schema.push_back("SMPTE-429-10-2008.xsd");
323                 schema.push_back("xlink.xsd");
324                 schema.push_back("SMPTE-335-2012.xsd");
325                 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
326                 schema.push_back("isdcf-mca.xsd");
327                 schema.push_back("SMPTE-429-12-2008.xsd");
328
329                 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
330                  * Schemas that are not mentioned in this list are not read, and the things
331                  * they describe are not checked.
332                  */
333                 string locations;
334                 for (auto i: schema) {
335                         locations += String::compose("%1 %1 ", i, i);
336                 }
337
338                 parser.setExternalSchemaLocation(locations.c_str());
339                 parser.setValidationSchemaFullChecking(true);
340                 parser.setErrorHandler(&error_handler);
341
342                 LocalFileResolver resolver (xsd_dtd_directory);
343                 parser.setEntityResolver(&resolver);
344
345                 try {
346                         parser.resetDocumentPool();
347                         parse(parser, xml);
348                 } catch (XMLException& e) {
349                         throw MiscError(xml_ch_to_string(e.getMessage()));
350                 } catch (DOMException& e) {
351                         throw MiscError(xml_ch_to_string(e.getMessage()));
352                 } catch (...) {
353                         throw MiscError("Unknown exception from xerces");
354                 }
355         }
356
357         XMLPlatformUtils::Terminate ();
358
359         for (auto i: error_handler.errors()) {
360                 notes.push_back ({
361                         VerificationNote::Type::ERROR,
362                         VerificationNote::Code::INVALID_XML,
363                         i.message(),
364                         boost::trim_copy(i.public_id() + " " + i.system_id()),
365                         i.line()
366                 });
367         }
368 }
369
370
371 enum class VerifyAssetResult {
372         GOOD,
373         CPL_PKL_DIFFER,
374         BAD
375 };
376
377
378 static VerifyAssetResult
379 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
380 {
381         auto const actual_hash = reel_file_asset->asset_ref()->hash(progress);
382
383         auto pkls = dcp->pkls();
384         /* We've read this DCP in so it must have at least one PKL */
385         DCP_ASSERT (!pkls.empty());
386
387         auto asset = reel_file_asset->asset_ref().asset();
388
389         optional<string> pkl_hash;
390         for (auto i: pkls) {
391                 pkl_hash = i->hash (reel_file_asset->asset_ref()->id());
392                 if (pkl_hash) {
393                         break;
394                 }
395         }
396
397         DCP_ASSERT (pkl_hash);
398
399         auto cpl_hash = reel_file_asset->hash();
400         if (cpl_hash && *cpl_hash != *pkl_hash) {
401                 return VerifyAssetResult::CPL_PKL_DIFFER;
402         }
403
404         if (actual_hash != *pkl_hash) {
405                 return VerifyAssetResult::BAD;
406         }
407
408         return VerifyAssetResult::GOOD;
409 }
410
411
412 void
413 verify_language_tag (string tag, vector<VerificationNote>& notes)
414 {
415         try {
416                 LanguageTag test (tag);
417         } catch (LanguageTagError &) {
418                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, tag});
419         }
420 }
421
422
423 enum class VerifyPictureAssetResult
424 {
425         GOOD,
426         FRAME_NEARLY_TOO_LARGE,
427         BAD,
428 };
429
430
431 int
432 biggest_frame_size (shared_ptr<const MonoPictureFrame> frame)
433 {
434         return frame->size ();
435 }
436
437 int
438 biggest_frame_size (shared_ptr<const StereoPictureFrame> frame)
439 {
440         return max(frame->left()->size(), frame->right()->size());
441 }
442
443
444 template <class A, class R, class F>
445 optional<VerifyPictureAssetResult>
446 verify_picture_asset_type (shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
447 {
448         auto asset = dynamic_pointer_cast<A>(reel_file_asset->asset_ref().asset());
449         if (!asset) {
450                 return optional<VerifyPictureAssetResult>();
451         }
452
453         int biggest_frame = 0;
454         auto reader = asset->start_read ();
455         auto const duration = asset->intrinsic_duration ();
456         for (int64_t i = 0; i < duration; ++i) {
457                 shared_ptr<const F> frame = reader->get_frame (i);
458                 biggest_frame = max(biggest_frame, biggest_frame_size(frame));
459                 progress (float(i) / duration);
460         }
461
462         static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
463         static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
464         if (biggest_frame > max_frame) {
465                 return VerifyPictureAssetResult::BAD;
466         } else if (biggest_frame > risky_frame) {
467                 return VerifyPictureAssetResult::FRAME_NEARLY_TOO_LARGE;
468         }
469
470         return VerifyPictureAssetResult::GOOD;
471 }
472
473
474 static VerifyPictureAssetResult
475 verify_picture_asset (shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
476 {
477         auto r = verify_picture_asset_type<MonoPictureAsset, MonoPictureAssetReader, MonoPictureFrame>(reel_file_asset, progress);
478         if (!r) {
479                 r = verify_picture_asset_type<StereoPictureAsset, StereoPictureAssetReader, StereoPictureFrame>(reel_file_asset, progress);
480         }
481
482         DCP_ASSERT (r);
483         return *r;
484 }
485
486
487 static void
488 verify_main_picture_asset (
489         shared_ptr<const DCP> dcp,
490         shared_ptr<const ReelPictureAsset> reel_asset,
491         function<void (string, optional<boost::filesystem::path>)> stage,
492         function<void (float)> progress,
493         vector<VerificationNote>& notes
494         )
495 {
496         auto asset = reel_asset->asset();
497         auto const file = *asset->file();
498         stage ("Checking picture asset hash", file);
499         auto const r = verify_asset (dcp, reel_asset, progress);
500         switch (r) {
501                 case VerifyAssetResult::BAD:
502                         notes.push_back ({
503                                 VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_PICTURE_HASH, file
504                         });
505                         break;
506                 case VerifyAssetResult::CPL_PKL_DIFFER:
507                         notes.push_back ({
508                                 VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_PICTURE_HASHES, file
509                         });
510                         break;
511                 default:
512                         break;
513         }
514         stage ("Checking picture frame sizes", asset->file());
515         auto const pr = verify_picture_asset (reel_asset, progress);
516         switch (pr) {
517                 case VerifyPictureAssetResult::BAD:
518                         notes.push_back ({
519                                 VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
520                         });
521                         break;
522                 case VerifyPictureAssetResult::FRAME_NEARLY_TOO_LARGE:
523                         notes.push_back ({
524                                 VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
525                         });
526                         break;
527                 default:
528                         break;
529         }
530
531         /* Only flat/scope allowed by Bv2.1 */
532         if (
533                 asset->size() != Size(2048, 858) &&
534                 asset->size() != Size(1998, 1080) &&
535                 asset->size() != Size(4096, 1716) &&
536                 asset->size() != Size(3996, 2160)) {
537                 notes.push_back({
538                         VerificationNote::Type::BV21_ERROR,
539                         VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS,
540                         String::compose("%1x%2", asset->size().width, asset->size().height),
541                         file
542                 });
543         }
544
545         /* Only 24, 25, 48fps allowed for 2K */
546         if (
547                 (asset->size() == Size(2048, 858) || asset->size() == Size(1998, 1080)) &&
548                 (asset->edit_rate() != Fraction(24, 1) && asset->edit_rate() != Fraction(25, 1) && asset->edit_rate() != Fraction(48, 1))
549            ) {
550                 notes.push_back({
551                         VerificationNote::Type::BV21_ERROR,
552                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K,
553                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
554                         file
555                 });
556         }
557
558         if (asset->size() == Size(4096, 1716) || asset->size() == Size(3996, 2160)) {
559                 /* Only 24fps allowed for 4K */
560                 if (asset->edit_rate() != Fraction(24, 1)) {
561                         notes.push_back({
562                                 VerificationNote::Type::BV21_ERROR,
563                                 VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K,
564                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
565                                 file
566                         });
567                 }
568
569                 /* Only 2D allowed for 4K */
570                 if (dynamic_pointer_cast<const StereoPictureAsset>(asset)) {
571                         notes.push_back({
572                                 VerificationNote::Type::BV21_ERROR,
573                                 VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D,
574                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
575                                 file
576                         });
577
578                 }
579         }
580
581 }
582
583
584 static void
585 verify_main_sound_asset (
586         shared_ptr<const DCP> dcp,
587         shared_ptr<const ReelSoundAsset> reel_asset,
588         function<void (string, optional<boost::filesystem::path>)> stage,
589         function<void (float)> progress,
590         vector<VerificationNote>& notes
591         )
592 {
593         auto asset = reel_asset->asset();
594         stage ("Checking sound asset hash", asset->file());
595         auto const r = verify_asset (dcp, reel_asset, progress);
596         switch (r) {
597                 case VerifyAssetResult::BAD:
598                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_SOUND_HASH, *asset->file()});
599                         break;
600                 case VerifyAssetResult::CPL_PKL_DIFFER:
601                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_SOUND_HASHES, *asset->file()});
602                         break;
603                 default:
604                         break;
605         }
606
607         stage ("Checking sound asset metadata", asset->file());
608
609         verify_language_tag (asset->language(), notes);
610         if (asset->sampling_rate() != 48000) {
611                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SOUND_FRAME_RATE, raw_convert<string>(asset->sampling_rate()), *asset->file()});
612         }
613 }
614
615
616 static void
617 verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, vector<VerificationNote>& notes)
618 {
619         /* XXX: is Language compulsory? */
620         if (reel_asset->language()) {
621                 verify_language_tag (*reel_asset->language(), notes);
622         }
623
624         if (!reel_asset->entry_point()) {
625                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT, reel_asset->id() });
626         } else if (reel_asset->entry_point().get()) {
627                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT, reel_asset->id() });
628         }
629 }
630
631
632 static void
633 verify_closed_caption_reel (shared_ptr<const ReelClosedCaptionAsset> reel_asset, vector<VerificationNote>& notes)
634 {
635         /* XXX: is Language compulsory? */
636         if (reel_asset->language()) {
637                 verify_language_tag (*reel_asset->language(), notes);
638         }
639
640         if (!reel_asset->entry_point()) {
641                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
642         } else if (reel_asset->entry_point().get()) {
643                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
644         }
645 }
646
647
648 struct State
649 {
650         boost::optional<string> subtitle_language;
651 };
652
653
654 /** Verify stuff that is common to both subtitles and closed captions */
655 void
656 verify_smpte_timed_text_asset (
657         shared_ptr<const SMPTESubtitleAsset> asset,
658         vector<VerificationNote>& notes
659         )
660 {
661         if (asset->language()) {
662                 verify_language_tag (*asset->language(), notes);
663         } else {
664                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE, *asset->file() });
665         }
666
667         auto const size = boost::filesystem::file_size(asset->file().get());
668         if (size > 115 * 1024 * 1024) {
669                 notes.push_back (
670                         { VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES, raw_convert<string>(size), *asset->file() }
671                         );
672         }
673
674         /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB"
675          * but I'm hoping that checking for the total size of all fonts being <= 10MB will do.
676          */
677         auto fonts = asset->font_data ();
678         int total_size = 0;
679         for (auto i: fonts) {
680                 total_size += i.second.size();
681         }
682         if (total_size > 10 * 1024 * 1024) {
683                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES, raw_convert<string>(total_size), asset->file().get() });
684         }
685
686         if (!asset->start_time()) {
687                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_START_TIME, asset->file().get() });
688         } else if (asset->start_time() != Time()) {
689                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SUBTITLE_START_TIME, asset->file().get() });
690         }
691 }
692
693
694 /** Verify SMPTE subtitle-only stuff */
695 void
696 verify_smpte_subtitle_asset (
697         shared_ptr<const SMPTESubtitleAsset> asset,
698         vector<VerificationNote>& notes,
699         State& state
700         )
701 {
702         if (asset->language()) {
703                 if (!state.subtitle_language) {
704                         state.subtitle_language = *asset->language();
705                 } else if (state.subtitle_language != *asset->language()) {
706                         notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES });
707                 }
708         }
709 }
710
711
712 /** Verify all subtitle stuff */
713 static void
714 verify_subtitle_asset (
715         shared_ptr<const SubtitleAsset> asset,
716         function<void (string, optional<boost::filesystem::path>)> stage,
717         boost::filesystem::path xsd_dtd_directory,
718         vector<VerificationNote>& notes,
719         State& state
720         )
721 {
722         stage ("Checking subtitle XML", asset->file());
723         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
724          * gets passed through libdcp which may clean up and therefore hide errors.
725          */
726         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
727
728         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
729         if (smpte) {
730                 verify_smpte_timed_text_asset (smpte, notes);
731                 verify_smpte_subtitle_asset (smpte, notes, state);
732         }
733 }
734
735
736 /** Verify all closed caption stuff */
737 static void
738 verify_closed_caption_asset (
739         shared_ptr<const SubtitleAsset> asset,
740         function<void (string, optional<boost::filesystem::path>)> stage,
741         boost::filesystem::path xsd_dtd_directory,
742         vector<VerificationNote>& notes
743         )
744 {
745         stage ("Checking closed caption XML", asset->file());
746         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
747          * gets passed through libdcp which may clean up and therefore hide errors.
748          */
749         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
750
751         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
752         if (smpte) {
753                 verify_smpte_timed_text_asset (smpte, notes);
754         }
755
756         if (asset->raw_xml().size() > 256 * 1024) {
757                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES, raw_convert<string>(asset->raw_xml().size()), *asset->file()});
758         }
759 }
760
761
762 static
763 void
764 verify_text_timing (
765         vector<shared_ptr<Reel>> reels,
766         int edit_rate,
767         vector<VerificationNote>& notes,
768         std::function<bool (shared_ptr<Reel>)> check,
769         std::function<string (shared_ptr<Reel>)> xml,
770         std::function<int64_t (shared_ptr<Reel>)> duration
771         )
772 {
773         /* end of last subtitle (in editable units) */
774         optional<int64_t> last_out;
775         auto too_short = false;
776         auto too_close = false;
777         auto too_early = false;
778         auto reel_overlap = false;
779         /* current reel start time (in editable units) */
780         int64_t reel_offset = 0;
781
782         std::function<void (cxml::ConstNodePtr, optional<int>, optional<Time>, int, bool)> parse;
783         parse = [&parse, &last_out, &too_short, &too_close, &too_early, &reel_offset](cxml::ConstNodePtr node, optional<int> tcr, optional<Time> start_time, int er, bool first_reel) {
784                 if (node->name() == "Subtitle") {
785                         Time in (node->string_attribute("TimeIn"), tcr);
786                         if (start_time) {
787                                 in -= *start_time;
788                         }
789                         Time out (node->string_attribute("TimeOut"), tcr);
790                         if (start_time) {
791                                 out -= *start_time;
792                         }
793                         if (first_reel && tcr && in < Time(0, 0, 4, 0, *tcr)) {
794                                 too_early = true;
795                         }
796                         auto length = out - in;
797                         if (length.as_editable_units_ceil(er) < 15) {
798                                 too_short = true;
799                         }
800                         if (last_out) {
801                                 /* XXX: this feels dubious - is it really what Bv2.1 means? */
802                                 auto distance = reel_offset + in.as_editable_units_ceil(er) - *last_out;
803                                 if (distance >= 0 && distance < 2) {
804                                         too_close = true;
805                                 }
806                         }
807                         last_out = reel_offset + out.as_editable_units_floor(er);
808                 } else {
809                         for (auto i: node->node_children()) {
810                                 parse(i, tcr, start_time, er, first_reel);
811                         }
812                 }
813         };
814
815         for (auto i = 0U; i < reels.size(); ++i) {
816                 if (!check(reels[i])) {
817                         continue;
818                 }
819
820                 /* We need to look at <Subtitle> instances in the XML being checked, so we can't use the subtitles
821                  * read in by libdcp's parser.
822                  */
823
824                 shared_ptr<cxml::Document> doc;
825                 optional<int> tcr;
826                 optional<Time> start_time;
827                 try {
828                         doc = make_shared<cxml::Document>("SubtitleReel");
829                         doc->read_string (xml(reels[i]));
830                         tcr = doc->number_child<int>("TimeCodeRate");
831                         auto start_time_string = doc->optional_string_child("StartTime");
832                         if (start_time_string) {
833                                 start_time = Time(*start_time_string, tcr);
834                         }
835                 } catch (...) {
836                         doc = make_shared<cxml::Document>("DCSubtitle");
837                         doc->read_string (xml(reels[i]));
838                 }
839                 parse (doc, tcr, start_time, edit_rate, i == 0);
840                 auto end = reel_offset + duration(reels[i]);
841                 if (last_out && *last_out > end) {
842                         reel_overlap = true;
843                 }
844                 reel_offset = end;
845         }
846
847         if (last_out && *last_out > reel_offset) {
848                 reel_overlap = true;
849         }
850
851         if (too_early) {
852                 notes.push_back({
853                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME
854                 });
855         }
856
857         if (too_short) {
858                 notes.push_back ({
859                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_DURATION
860                 });
861         }
862
863         if (too_close) {
864                 notes.push_back ({
865                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_SPACING
866                 });
867         }
868
869         if (reel_overlap) {
870                 notes.push_back ({
871                         VerificationNote::Type::ERROR, VerificationNote::Code::SUBTITLE_OVERLAPS_REEL_BOUNDARY
872                 });
873         }
874 }
875
876
877 struct LinesCharactersResult
878 {
879         bool warning_length_exceeded = false;
880         bool error_length_exceeded = false;
881         bool line_count_exceeded = false;
882 };
883
884
885 static
886 void
887 verify_text_lines_and_characters (
888         shared_ptr<SubtitleAsset> asset,
889         int warning_length,
890         int error_length,
891         LinesCharactersResult* result
892         )
893 {
894         class Event
895         {
896         public:
897                 Event (Time time_, float position_, int characters_)
898                         : time (time_)
899                         , position (position_)
900                         , characters (characters_)
901                 {}
902
903                 Event (Time time_, shared_ptr<Event> start_)
904                         : time (time_)
905                         , start (start_)
906                 {}
907
908                 Time time;
909                 int position; //< position from 0 at top of screen to 100 at bottom
910                 int characters;
911                 shared_ptr<Event> start;
912         };
913
914         vector<shared_ptr<Event>> events;
915
916         auto position = [](shared_ptr<const SubtitleString> sub) {
917                 switch (sub->v_align()) {
918                 case VAlign::TOP:
919                         return lrintf(sub->v_position() * 100);
920                 case VAlign::CENTER:
921                         return lrintf((0.5f + sub->v_position()) * 100);
922                 case VAlign::BOTTOM:
923                         return lrintf((1.0f - sub->v_position()) * 100);
924                 }
925
926                 return 0L;
927         };
928
929         for (auto j: asset->subtitles()) {
930                 auto text = dynamic_pointer_cast<const SubtitleString>(j);
931                 if (text) {
932                         auto in = make_shared<Event>(text->in(), position(text), text->text().length());
933                         events.push_back(in);
934                         events.push_back(make_shared<Event>(text->out(), in));
935                 }
936         }
937
938         std::sort(events.begin(), events.end(), [](shared_ptr<Event> const& a, shared_ptr<Event>const& b) {
939                 return a->time < b->time;
940         });
941
942         map<int, int> current;
943         for (auto i: events) {
944                 if (current.size() > 3) {
945                         result->line_count_exceeded = true;
946                 }
947                 for (auto j: current) {
948                         if (j.second >= warning_length) {
949                                 result->warning_length_exceeded = true;
950                         }
951                         if (j.second >= error_length) {
952                                 result->error_length_exceeded = true;
953                         }
954                 }
955
956                 if (i->start) {
957                         /* end of a subtitle */
958                         DCP_ASSERT (current.find(i->start->position) != current.end());
959                         if (current[i->start->position] == i->start->characters) {
960                                 current.erase(i->start->position);
961                         } else {
962                                 current[i->start->position] -= i->start->characters;
963                         }
964                 } else {
965                         /* start of a subtitle */
966                         if (current.find(i->position) == current.end()) {
967                                 current[i->position] = i->characters;
968                         } else {
969                                 current[i->position] += i->characters;
970                         }
971                 }
972         }
973 }
974
975
976 static
977 void
978 verify_text_timing (vector<shared_ptr<Reel>> reels, vector<VerificationNote>& notes)
979 {
980         if (reels.empty()) {
981                 return;
982         }
983
984         if (reels[0]->main_subtitle()) {
985                 verify_text_timing (reels, reels[0]->main_subtitle()->edit_rate().numerator, notes,
986                         [](shared_ptr<Reel> reel) {
987                                 return static_cast<bool>(reel->main_subtitle());
988                         },
989                         [](shared_ptr<Reel> reel) {
990                                 return reel->main_subtitle()->asset()->raw_xml();
991                         },
992                         [](shared_ptr<Reel> reel) {
993                                 return reel->main_subtitle()->actual_duration();
994                         }
995                 );
996         }
997
998         for (auto i = 0U; i < reels[0]->closed_captions().size(); ++i) {
999                 verify_text_timing (reels, reels[0]->closed_captions()[i]->edit_rate().numerator, notes,
1000                         [i](shared_ptr<Reel> reel) {
1001                                 return i < reel->closed_captions().size();
1002                         },
1003                         [i](shared_ptr<Reel> reel) {
1004                                 return reel->closed_captions()[i]->asset()->raw_xml();
1005                         },
1006                         [i](shared_ptr<Reel> reel) {
1007                                 return reel->closed_captions()[i]->actual_duration();
1008                         }
1009                 );
1010         }
1011 }
1012
1013
1014 void
1015 verify_extension_metadata (shared_ptr<CPL> cpl, vector<VerificationNote>& notes)
1016 {
1017         DCP_ASSERT (cpl->file());
1018         cxml::Document doc ("CompositionPlaylist");
1019         doc.read_file (cpl->file().get());
1020
1021         auto missing = false;
1022         string malformed;
1023
1024         if (auto reel_list = doc.node_child("ReelList")) {
1025                 auto reels = reel_list->node_children("Reel");
1026                 if (!reels.empty()) {
1027                         if (auto asset_list = reels[0]->optional_node_child("AssetList")) {
1028                                 if (auto metadata = asset_list->optional_node_child("CompositionMetadataAsset")) {
1029                                         if (auto extension_list = metadata->optional_node_child("ExtensionMetadataList")) {
1030                                                 missing = true;
1031                                                 for (auto extension: extension_list->node_children("ExtensionMetadata")) {
1032                                                         if (extension->optional_string_attribute("scope").get_value_or("") != "http://isdcf.com/ns/cplmd/app") {
1033                                                                 continue;
1034                                                         }
1035                                                         missing = false;
1036                                                         if (auto name = extension->optional_node_child("Name")) {
1037                                                                 if (name->content() != "Application") {
1038                                                                         malformed = "<Name> should be 'Application'";
1039                                                                 }
1040                                                         }
1041                                                         if (auto property_list = extension->optional_node_child("PropertyList")) {
1042                                                                 if (auto property = property_list->optional_node_child("Property")) {
1043                                                                         if (auto name = property->optional_node_child("Name")) {
1044                                                                                 if (name->content() != "DCP Constraints Profile") {
1045                                                                                         malformed = "<Name> property should be 'DCP Constraints Profile'";
1046                                                                                 }
1047                                                                         }
1048                                                                         if (auto value = property->optional_node_child("Value")) {
1049                                                                                 if (value->content() != "SMPTE-RDD-52:2020-Bv2.1") {
1050                                                                                         malformed = "<Value> property should be 'SMPTE-RDD-52:2020-Bv2.1'";
1051                                                                                 }
1052                                                                         }
1053                                                                 }
1054                                                         }
1055                                                 }
1056                                         } else {
1057                                                 missing = true;
1058                                         }
1059                                 }
1060                         }
1061                 }
1062         }
1063
1064         if (missing) {
1065                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_EXTENSION_METADATA, cpl->id(), cpl->file().get()});
1066         } else if (!malformed.empty()) {
1067                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_EXTENSION_METADATA, malformed, cpl->file().get()});
1068         }
1069 }
1070
1071
1072 bool
1073 pkl_has_encrypted_assets (shared_ptr<DCP> dcp, shared_ptr<PKL> pkl)
1074 {
1075         vector<string> encrypted;
1076         for (auto i: dcp->cpls()) {
1077                 for (auto j: i->reel_file_assets()) {
1078                         if (j->asset_ref().resolved()) {
1079                                 /* It's a bit surprising / broken but Interop subtitle assets are represented
1080                                  * in reels by ReelSubtitleAsset which inherits ReelFileAsset, so it's possible for
1081                                  * ReelFileAssets to have assets which are not MXFs.
1082                                  */
1083                                 if (auto asset = dynamic_pointer_cast<MXF>(j->asset_ref().asset())) {
1084                                         if (asset->encrypted()) {
1085                                                 encrypted.push_back(j->asset_ref().id());
1086                                         }
1087                                 }
1088                         }
1089                 }
1090         }
1091
1092         for (auto i: pkl->asset_list()) {
1093                 if (find(encrypted.begin(), encrypted.end(), i->id()) != encrypted.end()) {
1094                         return true;
1095                 }
1096         }
1097
1098         return false;
1099 }
1100
1101
1102 vector<VerificationNote>
1103 dcp::verify (
1104         vector<boost::filesystem::path> directories,
1105         function<void (string, optional<boost::filesystem::path>)> stage,
1106         function<void (float)> progress,
1107         optional<boost::filesystem::path> xsd_dtd_directory
1108         )
1109 {
1110         if (!xsd_dtd_directory) {
1111                 xsd_dtd_directory = resources_directory() / "xsd";
1112         }
1113         *xsd_dtd_directory = boost::filesystem::canonical (*xsd_dtd_directory);
1114
1115         vector<VerificationNote> notes;
1116         State state{};
1117
1118         vector<shared_ptr<DCP>> dcps;
1119         for (auto i: directories) {
1120                 dcps.push_back (make_shared<DCP>(i));
1121         }
1122
1123         for (auto dcp: dcps) {
1124                 stage ("Checking DCP", dcp->directory());
1125                 bool carry_on = true;
1126                 try {
1127                         dcp->read (&notes);
1128                 } catch (MissingAssetmapError& e) {
1129                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1130                         carry_on = false;
1131                 } catch (ReadError& e) {
1132                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1133                 } catch (XMLError& e) {
1134                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1135                 } catch (MXFFileError& e) {
1136                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1137                 } catch (cxml::Error& e) {
1138                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1139                 }
1140
1141                 if (!carry_on) {
1142                         continue;
1143                 }
1144
1145                 if (dcp->standard() != Standard::SMPTE) {
1146                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_STANDARD});
1147                 }
1148
1149                 for (auto cpl: dcp->cpls()) {
1150                         stage ("Checking CPL", cpl->file());
1151                         validate_xml (cpl->file().get(), *xsd_dtd_directory, notes);
1152
1153                         if (cpl->any_encrypted() && !cpl->all_encrypted()) {
1154                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::PARTIALLY_ENCRYPTED});
1155                         }
1156
1157                         for (auto const& i: cpl->additional_subtitle_languages()) {
1158                                 verify_language_tag (i, notes);
1159                         }
1160
1161                         if (cpl->release_territory()) {
1162                                 if (!cpl->release_territory_scope() || cpl->release_territory_scope().get() != "http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata#scope/release-territory/UNM49") {
1163                                         auto terr = cpl->release_territory().get();
1164                                         /* Must be a valid region tag, or "001" */
1165                                         try {
1166                                                 LanguageTag::RegionSubtag test (terr);
1167                                         } catch (...) {
1168                                                 if (terr != "001") {
1169                                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, terr});
1170                                                 }
1171                                         }
1172                                 }
1173                         }
1174
1175                         if (dcp->standard() == Standard::SMPTE) {
1176                                 if (!cpl->annotation_text()) {
1177                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1178                                 } else if (cpl->annotation_text().get() != cpl->content_title_text()) {
1179                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1180                                 }
1181                         }
1182
1183                         for (auto i: dcp->pkls()) {
1184                                 /* Check that the CPL's hash corresponds to the PKL */
1185                                 optional<string> h = i->hash(cpl->id());
1186                                 if (h && make_digest(ArrayData(*cpl->file())) != *h) {
1187                                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_CPL_HASHES, cpl->id(), cpl->file().get()});
1188                                 }
1189
1190                                 /* Check that any PKL with a single CPL has its AnnotationText the same as the CPL's ContentTitleText */
1191                                 optional<string> required_annotation_text;
1192                                 for (auto j: i->asset_list()) {
1193                                         /* See if this is a CPL */
1194                                         for (auto k: dcp->cpls()) {
1195                                                 if (j->id() == k->id()) {
1196                                                         if (!required_annotation_text) {
1197                                                                 /* First CPL we have found; this is the required AnnotationText unless we find another */
1198                                                                 required_annotation_text = cpl->content_title_text();
1199                                                         } else {
1200                                                                 /* There's more than one CPL so we don't care what the PKL's AnnotationText is */
1201                                                                 required_annotation_text = boost::none;
1202                                                         }
1203                                                 }
1204                                         }
1205                                 }
1206
1207                                 if (required_annotation_text && i->annotation_text() != required_annotation_text) {
1208                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL, i->id(), i->file().get()});
1209                                 }
1210                         }
1211
1212                         /* set to true if any reel has a MainSubtitle */
1213                         auto have_main_subtitle = false;
1214                         /* set to true if any reel has no MainSubtitle */
1215                         auto have_no_main_subtitle = false;
1216                         /* fewest number of closed caption assets seen in a reel */
1217                         size_t fewest_closed_captions = SIZE_MAX;
1218                         /* most number of closed caption assets seen in a reel */
1219                         size_t most_closed_captions = 0;
1220                         map<Marker, Time> markers_seen;
1221
1222                         for (auto reel: cpl->reels()) {
1223                                 stage ("Checking reel", optional<boost::filesystem::path>());
1224
1225                                 for (auto i: reel->assets()) {
1226                                         if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1227                                                 notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_DURATION, i->id()});
1228                                         }
1229                                         if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1230                                                 notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_INTRINSIC_DURATION, i->id()});
1231                                         }
1232                                         auto file_asset = dynamic_pointer_cast<ReelFileAsset>(i);
1233                                         if (file_asset && !file_asset->hash()) {
1234                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_HASH, i->id()});
1235                                         }
1236                                 }
1237
1238                                 if (dcp->standard() == Standard::SMPTE) {
1239                                         boost::optional<int64_t> duration;
1240                                         for (auto i: reel->assets()) {
1241                                                 if (!duration) {
1242                                                         duration = i->actual_duration();
1243                                                 } else if (*duration != i->actual_duration()) {
1244                                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_ASSET_DURATION});
1245                                                         break;
1246                                                 }
1247                                         }
1248                                 }
1249
1250                                 if (reel->main_picture()) {
1251                                         /* Check reel stuff */
1252                                         auto const frame_rate = reel->main_picture()->frame_rate();
1253                                         if (frame_rate.denominator != 1 ||
1254                                             (frame_rate.numerator != 24 &&
1255                                              frame_rate.numerator != 25 &&
1256                                              frame_rate.numerator != 30 &&
1257                                              frame_rate.numerator != 48 &&
1258                                              frame_rate.numerator != 50 &&
1259                                              frame_rate.numerator != 60 &&
1260                                              frame_rate.numerator != 96)) {
1261                                                 notes.push_back ({
1262                                                         VerificationNote::Type::ERROR,
1263                                                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE,
1264                                                         String::compose("%1/%2", frame_rate.numerator, frame_rate.denominator)
1265                                                 });
1266                                         }
1267                                         /* Check asset */
1268                                         if (reel->main_picture()->asset_ref().resolved()) {
1269                                                 verify_main_picture_asset (dcp, reel->main_picture(), stage, progress, notes);
1270                                         }
1271                                 }
1272
1273                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
1274                                         verify_main_sound_asset (dcp, reel->main_sound(), stage, progress, notes);
1275                                 }
1276
1277                                 if (reel->main_subtitle()) {
1278                                         verify_main_subtitle_reel (reel->main_subtitle(), notes);
1279                                         if (reel->main_subtitle()->asset_ref().resolved()) {
1280                                                 verify_subtitle_asset (reel->main_subtitle()->asset(), stage, *xsd_dtd_directory, notes, state);
1281                                         }
1282                                         have_main_subtitle = true;
1283                                 } else {
1284                                         have_no_main_subtitle = true;
1285                                 }
1286
1287                                 for (auto i: reel->closed_captions()) {
1288                                         verify_closed_caption_reel (i, notes);
1289                                         if (i->asset_ref().resolved()) {
1290                                                 verify_closed_caption_asset (i->asset(), stage, *xsd_dtd_directory, notes);
1291                                         }
1292                                 }
1293
1294                                 if (reel->main_markers()) {
1295                                         for (auto const& i: reel->main_markers()->get()) {
1296                                                 markers_seen.insert (i);
1297                                         }
1298                                 }
1299
1300                                 fewest_closed_captions = std::min (fewest_closed_captions, reel->closed_captions().size());
1301                                 most_closed_captions = std::max (most_closed_captions, reel->closed_captions().size());
1302                         }
1303
1304                         verify_text_timing (cpl->reels(), notes);
1305
1306                         if (dcp->standard() == Standard::SMPTE) {
1307
1308                                 if (have_main_subtitle && have_no_main_subtitle) {
1309                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS});
1310                                 }
1311
1312                                 if (fewest_closed_captions != most_closed_captions) {
1313                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS});
1314                                 }
1315
1316                                 if (cpl->content_kind() == ContentKind::FEATURE) {
1317                                         if (markers_seen.find(Marker::FFEC) == markers_seen.end()) {
1318                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFEC_IN_FEATURE});
1319                                         }
1320                                         if (markers_seen.find(Marker::FFMC) == markers_seen.end()) {
1321                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFMC_IN_FEATURE});
1322                                         }
1323                                 }
1324
1325                                 auto ffoc = markers_seen.find(Marker::FFOC);
1326                                 if (ffoc == markers_seen.end()) {
1327                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_FFOC});
1328                                 } else if (ffoc->second.e != 1) {
1329                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_FFOC, raw_convert<string>(ffoc->second.e)});
1330                                 }
1331
1332                                 auto lfoc = markers_seen.find(Marker::LFOC);
1333                                 if (lfoc == markers_seen.end()) {
1334                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_LFOC});
1335                                 } else {
1336                                         auto lfoc_time = lfoc->second.as_editable_units_ceil(lfoc->second.tcr);
1337                                         if (lfoc_time != (cpl->reels().back()->duration() - 1)) {
1338                                                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_LFOC, raw_convert<string>(lfoc_time)});
1339                                         }
1340                                 }
1341
1342                                 LinesCharactersResult result;
1343                                 for (auto reel: cpl->reels()) {
1344                                         if (reel->main_subtitle() && reel->main_subtitle()->asset()) {
1345                                                 verify_text_lines_and_characters (reel->main_subtitle()->asset(), 52, 79, &result);
1346                                         }
1347                                 }
1348
1349                                 if (result.line_count_exceeded) {
1350                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT});
1351                                 }
1352                                 if (result.error_length_exceeded) {
1353                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH});
1354                                 } else if (result.warning_length_exceeded) {
1355                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH});
1356                                 }
1357
1358                                 result = LinesCharactersResult();
1359                                 for (auto reel: cpl->reels()) {
1360                                         for (auto i: reel->closed_captions()) {
1361                                                 if (i->asset()) {
1362                                                         verify_text_lines_and_characters (i->asset(), 32, 32, &result);
1363                                                 }
1364                                         }
1365                                 }
1366
1367                                 if (result.line_count_exceeded) {
1368                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT});
1369                                 }
1370                                 if (result.error_length_exceeded) {
1371                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH});
1372                                 }
1373
1374                                 if (!cpl->full_content_title_text()) {
1375                                         /* Since FullContentTitleText is assumed always to exist if there's a CompositionMetadataAsset we
1376                                          * can use it as a proxy for CompositionMetadataAsset's existence.
1377                                          */
1378                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA, cpl->id(), cpl->file().get()});
1379                                 } else if (!cpl->version_number()) {
1380                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER, cpl->id(), cpl->file().get()});
1381                                 }
1382
1383                                 verify_extension_metadata (cpl, notes);
1384
1385                                 if (cpl->any_encrypted()) {
1386                                         cxml::Document doc ("CompositionPlaylist");
1387                                         DCP_ASSERT (cpl->file());
1388                                         doc.read_file (cpl->file().get());
1389                                         if (!doc.optional_node_child("Signature")) {
1390                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT, cpl->id(), cpl->file().get()});
1391                                         }
1392                                 }
1393                         }
1394                 }
1395
1396                 for (auto pkl: dcp->pkls()) {
1397                         stage ("Checking PKL", pkl->file());
1398                         validate_xml (pkl->file().get(), *xsd_dtd_directory, notes);
1399                         if (pkl_has_encrypted_assets(dcp, pkl)) {
1400                                 cxml::Document doc ("PackingList");
1401                                 doc.read_file (pkl->file().get());
1402                                 if (!doc.optional_node_child("Signature")) {
1403                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT, pkl->id(), pkl->file().get()});
1404                                 }
1405                         }
1406                 }
1407
1408                 if (dcp->asset_map_path()) {
1409                         stage ("Checking ASSETMAP", dcp->asset_map_path().get());
1410                         validate_xml (dcp->asset_map_path().get(), *xsd_dtd_directory, notes);
1411                 } else {
1412                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_ASSETMAP});
1413                 }
1414         }
1415
1416         return notes;
1417 }
1418
1419
1420 string
1421 dcp::note_to_string (VerificationNote note)
1422 {
1423         /** These strings should say what is wrong, incorporating any extra details (ID, filenames etc.).
1424          *
1425          *  e.g. "ClosedCaption asset has no <EntryPoint> tag.",
1426          *  not "ClosedCaption assets must have an <EntryPoint> tag."
1427          *
1428          *  It's OK to use XML tag names where they are clear.
1429          *  If both ID and filename are available, use only the ID.
1430          *  End messages with a full stop.
1431          *  Messages should not mention whether or not their errors are a part of Bv2.1.
1432          */
1433         switch (note.code()) {
1434         case VerificationNote::Code::FAILED_READ:
1435                 return *note.note();
1436         case VerificationNote::Code::MISMATCHED_CPL_HASHES:
1437                 return String::compose("The hash of the CPL %1 in the PKL does not agree with the CPL file.", note.note().get());
1438         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE:
1439                 return String::compose("The picture in a reel has an invalid frame rate %1.", note.note().get());
1440         case VerificationNote::Code::INCORRECT_PICTURE_HASH:
1441                 return String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
1442         case VerificationNote::Code::MISMATCHED_PICTURE_HASHES:
1443                 return String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
1444         case VerificationNote::Code::INCORRECT_SOUND_HASH:
1445                 return String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
1446         case VerificationNote::Code::MISMATCHED_SOUND_HASHES:
1447                 return String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
1448         case VerificationNote::Code::EMPTY_ASSET_PATH:
1449                 return "The asset map contains an empty asset path.";
1450         case VerificationNote::Code::MISSING_ASSET:
1451                 return String::compose("The file %1 for an asset in the asset map cannot be found.", note.file()->filename());
1452         case VerificationNote::Code::MISMATCHED_STANDARD:
1453                 return "The DCP contains both SMPTE and Interop parts.";
1454         case VerificationNote::Code::INVALID_XML:
1455                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
1456         case VerificationNote::Code::MISSING_ASSETMAP:
1457                 return "No ASSETMAP or ASSETMAP.xml was found.";
1458         case VerificationNote::Code::INVALID_INTRINSIC_DURATION:
1459                 return String::compose("The intrinsic duration of the asset %1 is less than 1 second.", note.note().get());
1460         case VerificationNote::Code::INVALID_DURATION:
1461                 return String::compose("The duration of the asset %1 is less than 1 second.", note.note().get());
1462         case VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1463                 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
1464         case VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1465                 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
1466         case VerificationNote::Code::EXTERNAL_ASSET:
1467                 return String::compose("The asset %1 that this DCP refers to is not included in the DCP.  It may be a VF.", note.note().get());
1468         case VerificationNote::Code::INVALID_STANDARD:
1469                 return "This DCP does not use the SMPTE standard.";
1470         case VerificationNote::Code::INVALID_LANGUAGE:
1471                 return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
1472         case VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS:
1473                 return String::compose("The size %1 of picture asset %2 is not allowed.", note.note().get(), note.file()->filename());
1474         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K:
1475                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 2K DCPs.", note.note().get(), note.file()->filename());
1476         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K:
1477                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 4K DCPs.", note.note().get(), note.file()->filename());
1478         case VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D:
1479                 return "3D 4K DCPs are not allowed.";
1480         case VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES:
1481                 return String::compose("The size %1 of the closed caption asset %2 is larger than the 256KB maximum.", note.note().get(), note.file()->filename());
1482         case VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES:
1483                 return String::compose("The size %1 of the timed text asset %2 is larger than the 115MB maximum.", note.note().get(), note.file()->filename());
1484         case VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES:
1485                 return String::compose("The size %1 of the fonts in timed text asset %2 is larger than the 10MB maximum.", note.note().get(), note.file()->filename());
1486         case VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE:
1487                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <Language> tag.", note.file()->filename());
1488         case VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES:
1489                 return "Some subtitle assets have different <Language> tags than others";
1490         case VerificationNote::Code::MISSING_SUBTITLE_START_TIME:
1491                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <StartTime> tag.", note.file()->filename());
1492         case VerificationNote::Code::INVALID_SUBTITLE_START_TIME:
1493                 return String::compose("The XML for a SMPTE subtitle asset %1 has a non-zero <StartTime> tag.", note.file()->filename());
1494         case VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME:
1495                 return "The first subtitle or closed caption is less than 4 seconds from the start of the DCP.";
1496         case VerificationNote::Code::INVALID_SUBTITLE_DURATION:
1497                 return "At least one subtitle lasts less than 15 frames.";
1498         case VerificationNote::Code::INVALID_SUBTITLE_SPACING:
1499                 return "At least one pair of subtitles is separated by less than 2 frames.";
1500         case VerificationNote::Code::SUBTITLE_OVERLAPS_REEL_BOUNDARY:
1501                 return "At least one subtitle extends outside of its reel.";
1502         case VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT:
1503                 return "There are more than 3 subtitle lines in at least one place in the DCP.";
1504         case VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH:
1505                 return "There are more than 52 characters in at least one subtitle line.";
1506         case VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH:
1507                 return "There are more than 79 characters in at least one subtitle line.";
1508         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT:
1509                 return "There are more than 3 closed caption lines in at least one place.";
1510         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH:
1511                 return "There are more than 32 characters in at least one closed caption line.";
1512         case VerificationNote::Code::INVALID_SOUND_FRAME_RATE:
1513                 return String::compose("The sound asset %1 has a sampling rate of %2", note.file()->filename(), note.note().get());
1514         case VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT:
1515                 return String::compose("The CPL %1 has no <AnnotationText> tag.", note.note().get());
1516         case VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT:
1517                 return String::compose("The CPL %1 has an <AnnotationText> which differs from its <ContentTitleText>", note.note().get());
1518         case VerificationNote::Code::MISMATCHED_ASSET_DURATION:
1519                 return "All assets in a reel do not have the same duration.";
1520         case VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS:
1521                 return "At least one reel contains a subtitle asset, but some reel(s) do not";
1522         case VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS:
1523                 return "At least one reel has closed captions, but reels have different numbers of closed caption assets.";
1524         case VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT:
1525                 return String::compose("The subtitle asset %1 has no <EntryPoint> tag.", note.note().get());
1526         case VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT:
1527                 return String::compose("The subtitle asset %1 has an <EntryPoint> other than 0.", note.note().get());
1528         case VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT:
1529                 return String::compose("The closed caption asset %1 has no <EntryPoint> tag.", note.note().get());
1530         case VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT:
1531                 return String::compose("The closed caption asset %1 has an <EntryPoint> other than 0.", note.note().get());
1532         case VerificationNote::Code::MISSING_HASH:
1533                 return String::compose("The asset %1 has no <Hash> tag in the CPL.", note.note().get());
1534         case VerificationNote::Code::MISSING_FFEC_IN_FEATURE:
1535                 return "The DCP is marked as a Feature but there is no FFEC (first frame of end credits) marker";
1536         case VerificationNote::Code::MISSING_FFMC_IN_FEATURE:
1537                 return "The DCP is marked as a Feature but there is no FFMC (first frame of moving credits) marker";
1538         case VerificationNote::Code::MISSING_FFOC:
1539                 return "There should be a FFOC (first frame of content) marker";
1540         case VerificationNote::Code::MISSING_LFOC:
1541                 return "There should be a LFOC (last frame of content) marker";
1542         case VerificationNote::Code::INCORRECT_FFOC:
1543                 return String::compose("The FFOC marker is %1 instead of 1", note.note().get());
1544         case VerificationNote::Code::INCORRECT_LFOC:
1545                 return String::compose("The LFOC marker is %1 instead of 1 less than the duration of the last reel.", note.note().get());
1546         case VerificationNote::Code::MISSING_CPL_METADATA:
1547                 return String::compose("The CPL %1 has no <CompositionMetadataAsset> tag.", note.note().get());
1548         case VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER:
1549                 return String::compose("The CPL %1 has no <VersionNumber> in its <CompositionMetadataAsset>.", note.note().get());
1550         case VerificationNote::Code::MISSING_EXTENSION_METADATA:
1551                 return String::compose("The CPL %1 has no <ExtensionMetadata> in its <CompositionMetadataAsset>.", note.note().get());
1552         case VerificationNote::Code::INVALID_EXTENSION_METADATA:
1553                 return String::compose("The CPL %1 has a malformed <ExtensionMetadata> (%2).", note.file()->filename(), note.note().get());
1554         case VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT:
1555                 return String::compose("The CPL %1, which has encrypted content, is not signed.", note.note().get());
1556         case VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT:
1557                 return String::compose("The PKL %1, which has encrypted content, is not signed.", note.note().get());
1558         case VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL:
1559                 return String::compose("The PKL %1 has only one CPL but its <AnnotationText> does not match the CPL's <ContentTitleText>.", note.note().get());
1560         case VerificationNote::Code::PARTIALLY_ENCRYPTED:
1561                 return "Some assets are encrypted but some are not.";
1562         case VerificationNote::Code::INVALID_JPEG2000_CODESTREAM:
1563                 return String::compose("The JPEG2000 codestream for at least one frame is invalid (%1)", note.note().get());
1564         case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_2K:
1565                 return String::compose("The JPEG2000 codestream uses %1 guard bits in a 2K image instead of 1.", note.note().get());
1566         case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_4K:
1567                 return String::compose("The JPEG2000 codestream uses %1 guard bits in a 4K image instead of 2.", note.note().get());
1568         case VerificationNote::Code::INVALID_JPEG2000_TILE_SIZE:
1569                 return "The JPEG2000 tile size is not the same as the image size.";
1570         case VerificationNote::Code::INVALID_JPEG2000_CODE_BLOCK_WIDTH:
1571                 return String::compose("The JPEG2000 codestream uses a code block width of %1 instead of 32.", note.note().get());
1572         case VerificationNote::Code::INVALID_JPEG2000_CODE_BLOCK_HEIGHT:
1573                 return String::compose("The JPEG2000 codestream uses a code block height of %1 instead of 32.", note.note().get());
1574         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER_COUNT_FOR_2K:
1575                 return String::compose("%1 POC markers found in 2K JPEG2000 codestream instead of 0.", note.note().get());
1576         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER_COUNT_FOR_4K:
1577                 return String::compose("%1 POC markers found in 4K JPEG2000 codestream instead of 1.", note.note().get());
1578         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER:
1579                 return String::compose("Incorrect POC marker content found (%1)", note.note().get());
1580         case VerificationNote::Code::INVALID_JPEG2000_POC_MARKER_LOCATION:
1581                 return "POC marker found outside main header";
1582         case VerificationNote::Code::INVALID_JPEG2000_TILE_PARTS_FOR_2K:
1583                 return String::compose("The JPEG2000 codestream has %1 tile parts in a 2K image instead of 3.", note.note().get());
1584         case VerificationNote::Code::INVALID_JPEG2000_TILE_PARTS_FOR_4K:
1585                 return String::compose("The JPEG2000 codestream has %1 tile parts in a 4K image instead of 6.", note.note().get());
1586         case VerificationNote::Code::MISSING_JPEG200_TLM_MARKER:
1587                 return "No TLM marker was found in a JPEG2000 codestream.";
1588         }
1589
1590         return "";
1591 }
1592
1593
1594 bool
1595 dcp::operator== (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
1596 {
1597         return a.type() == b.type() && a.code() == b.code() && a.note() == b.note() && a.file() == b.file() && a.line() == b.line();
1598 }
1599
1600
1601 std::ostream&
1602 dcp::operator<< (std::ostream& s, dcp::VerificationNote const& note)
1603 {
1604         s << note_to_string (note);
1605         if (note.note()) {
1606                 s << " [" << note.note().get() << "]";
1607         }
1608         if (note.file()) {
1609                 s << " [" << note.file().get() << "]";
1610         }
1611         if (note.line()) {
1612                 s << " [" << note.line().get() << "]";
1613         }
1614         return s;
1615 }
1616