5644d36998c69d2eb5acbe7f6367dee08d49e764
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2021 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34 #include "verify.h"
35 #include "dcp.h"
36 #include "cpl.h"
37 #include "reel.h"
38 #include "reel_closed_caption_asset.h"
39 #include "reel_picture_asset.h"
40 #include "reel_sound_asset.h"
41 #include "reel_subtitle_asset.h"
42 #include "interop_subtitle_asset.h"
43 #include "mono_picture_asset.h"
44 #include "mono_picture_frame.h"
45 #include "stereo_picture_asset.h"
46 #include "stereo_picture_frame.h"
47 #include "exceptions.h"
48 #include "compose.hpp"
49 #include "raw_convert.h"
50 #include "reel_markers_asset.h"
51 #include "smpte_subtitle_asset.h"
52 #include <xercesc/util/PlatformUtils.hpp>
53 #include <xercesc/parsers/XercesDOMParser.hpp>
54 #include <xercesc/parsers/AbstractDOMParser.hpp>
55 #include <xercesc/sax/HandlerBase.hpp>
56 #include <xercesc/dom/DOMImplementation.hpp>
57 #include <xercesc/dom/DOMImplementationLS.hpp>
58 #include <xercesc/dom/DOMImplementationRegistry.hpp>
59 #include <xercesc/dom/DOMLSParser.hpp>
60 #include <xercesc/dom/DOMException.hpp>
61 #include <xercesc/dom/DOMDocument.hpp>
62 #include <xercesc/dom/DOMNodeList.hpp>
63 #include <xercesc/dom/DOMError.hpp>
64 #include <xercesc/dom/DOMLocator.hpp>
65 #include <xercesc/dom/DOMNamedNodeMap.hpp>
66 #include <xercesc/dom/DOMAttr.hpp>
67 #include <xercesc/dom/DOMErrorHandler.hpp>
68 #include <xercesc/framework/LocalFileInputSource.hpp>
69 #include <xercesc/framework/MemBufInputSource.hpp>
70 #include <boost/noncopyable.hpp>
71 #include <boost/algorithm/string.hpp>
72 #include <map>
73 #include <vector>
74 #include <iostream>
75
76 using std::list;
77 using std::vector;
78 using std::string;
79 using std::cout;
80 using std::map;
81 using std::max;
82 using std::set;
83 using std::shared_ptr;
84 using std::make_shared;
85 using boost::optional;
86 using boost::function;
87 using std::dynamic_pointer_cast;
88
89 using namespace dcp;
90 using namespace xercesc;
91
92 static
93 string
94 xml_ch_to_string (XMLCh const * a)
95 {
96         char* x = XMLString::transcode(a);
97         string const o(x);
98         XMLString::release(&x);
99         return o;
100 }
101
102 class XMLValidationError
103 {
104 public:
105         XMLValidationError (SAXParseException const & e)
106                 : _message (xml_ch_to_string(e.getMessage()))
107                 , _line (e.getLineNumber())
108                 , _column (e.getColumnNumber())
109                 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
110                 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
111         {
112
113         }
114
115         string message () const {
116                 return _message;
117         }
118
119         uint64_t line () const {
120                 return _line;
121         }
122
123         uint64_t column () const {
124                 return _column;
125         }
126
127         string public_id () const {
128                 return _public_id;
129         }
130
131         string system_id () const {
132                 return _system_id;
133         }
134
135 private:
136         string _message;
137         uint64_t _line;
138         uint64_t _column;
139         string _public_id;
140         string _system_id;
141 };
142
143
144 class DCPErrorHandler : public ErrorHandler
145 {
146 public:
147         void warning(const SAXParseException& e)
148         {
149                 maybe_add (XMLValidationError(e));
150         }
151
152         void error(const SAXParseException& e)
153         {
154                 maybe_add (XMLValidationError(e));
155         }
156
157         void fatalError(const SAXParseException& e)
158         {
159                 maybe_add (XMLValidationError(e));
160         }
161
162         void resetErrors() {
163                 _errors.clear ();
164         }
165
166         list<XMLValidationError> errors () const {
167                 return _errors;
168         }
169
170 private:
171         void maybe_add (XMLValidationError e)
172         {
173                 /* XXX: nasty hack */
174                 if (
175                         e.message().find("schema document") != string::npos &&
176                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
177                         ) {
178                         return;
179                 }
180
181                 _errors.push_back (e);
182         }
183
184         list<XMLValidationError> _errors;
185 };
186
187 class StringToXMLCh : public boost::noncopyable
188 {
189 public:
190         StringToXMLCh (string a)
191         {
192                 _buffer = XMLString::transcode(a.c_str());
193         }
194
195         ~StringToXMLCh ()
196         {
197                 XMLString::release (&_buffer);
198         }
199
200         XMLCh const * get () const {
201                 return _buffer;
202         }
203
204 private:
205         XMLCh* _buffer;
206 };
207
208 class LocalFileResolver : public EntityResolver
209 {
210 public:
211         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
212                 : _xsd_dtd_directory (xsd_dtd_directory)
213         {
214                 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
215                  * found without being here.
216                  */
217                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
218                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
219                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
220                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
221                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
222                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
223                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
224                 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
225                 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
226                 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd");
227                 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
228                 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
229                 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
230         }
231
232         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
233         {
234                 if (!system_id) {
235                         return 0;
236                 }
237                 auto system_id_str = xml_ch_to_string (system_id);
238                 auto p = _xsd_dtd_directory;
239                 if (_files.find(system_id_str) == _files.end()) {
240                         p /= system_id_str;
241                 } else {
242                         p /= _files[system_id_str];
243                 }
244                 StringToXMLCh ch (p.string());
245                 return new LocalFileInputSource(ch.get());
246         }
247
248 private:
249         void add (string uri, string file)
250         {
251                 _files[uri] = file;
252         }
253
254         std::map<string, string> _files;
255         boost::filesystem::path _xsd_dtd_directory;
256 };
257
258
259 static void
260 parse (XercesDOMParser& parser, boost::filesystem::path xml)
261 {
262         parser.parse(xml.string().c_str());
263 }
264
265
266 static void
267 parse (XercesDOMParser& parser, string xml)
268 {
269         xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
270         parser.parse(buf);
271 }
272
273
274 template <class T>
275 void
276 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, vector<VerificationNote>& notes)
277 {
278         try {
279                 XMLPlatformUtils::Initialize ();
280         } catch (XMLException& e) {
281                 throw MiscError ("Failed to initialise xerces library");
282         }
283
284         DCPErrorHandler error_handler;
285
286         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
287         {
288                 XercesDOMParser parser;
289                 parser.setValidationScheme(XercesDOMParser::Val_Always);
290                 parser.setDoNamespaces(true);
291                 parser.setDoSchema(true);
292
293                 vector<string> schema;
294                 schema.push_back("xml.xsd");
295                 schema.push_back("xmldsig-core-schema.xsd");
296                 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
297                 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
298                 schema.push_back("SMPTE-429-9-2007-AM.xsd");
299                 schema.push_back("Main-Stereo-Picture-CPL.xsd");
300                 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
301                 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
302                 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
303                 schema.push_back("DCSubtitle.v1.mattsson.xsd");
304                 schema.push_back("DCDMSubtitle-2010.xsd");
305                 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
306                 schema.push_back("SMPTE-429-16.xsd");
307                 schema.push_back("Dolby-2012-AD.xsd");
308                 schema.push_back("SMPTE-429-10-2008.xsd");
309                 schema.push_back("xlink.xsd");
310                 schema.push_back("SMPTE-335-2012.xsd");
311                 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
312                 schema.push_back("isdcf-mca.xsd");
313                 schema.push_back("SMPTE-429-12-2008.xsd");
314
315                 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
316                  * Schemas that are not mentioned in this list are not read, and the things
317                  * they describe are not checked.
318                  */
319                 string locations;
320                 for (auto i: schema) {
321                         locations += String::compose("%1 %1 ", i, i);
322                 }
323
324                 parser.setExternalSchemaLocation(locations.c_str());
325                 parser.setValidationSchemaFullChecking(true);
326                 parser.setErrorHandler(&error_handler);
327
328                 LocalFileResolver resolver (xsd_dtd_directory);
329                 parser.setEntityResolver(&resolver);
330
331                 try {
332                         parser.resetDocumentPool();
333                         parse(parser, xml);
334                 } catch (XMLException& e) {
335                         throw MiscError(xml_ch_to_string(e.getMessage()));
336                 } catch (DOMException& e) {
337                         throw MiscError(xml_ch_to_string(e.getMessage()));
338                 } catch (...) {
339                         throw MiscError("Unknown exception from xerces");
340                 }
341         }
342
343         XMLPlatformUtils::Terminate ();
344
345         for (auto i: error_handler.errors()) {
346                 notes.push_back (
347                         VerificationNote(
348                                 VerificationNote::VERIFY_ERROR,
349                                 VerificationNote::XML_VALIDATION_ERROR,
350                                 i.message(),
351                                 boost::trim_copy(i.public_id() + " " + i.system_id()),
352                                 i.line()
353                                 )
354                         );
355         }
356 }
357
358
359 enum VerifyAssetResult {
360         VERIFY_ASSET_RESULT_GOOD,
361         VERIFY_ASSET_RESULT_CPL_PKL_DIFFER,
362         VERIFY_ASSET_RESULT_BAD
363 };
364
365
366 static VerifyAssetResult
367 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
368 {
369         auto const actual_hash = reel_mxf->asset_ref()->hash(progress);
370
371         auto pkls = dcp->pkls();
372         /* We've read this DCP in so it must have at least one PKL */
373         DCP_ASSERT (!pkls.empty());
374
375         auto asset = reel_mxf->asset_ref().asset();
376
377         optional<string> pkl_hash;
378         for (auto i: pkls) {
379                 pkl_hash = i->hash (reel_mxf->asset_ref()->id());
380                 if (pkl_hash) {
381                         break;
382                 }
383         }
384
385         DCP_ASSERT (pkl_hash);
386
387         auto cpl_hash = reel_mxf->hash();
388         if (cpl_hash && *cpl_hash != *pkl_hash) {
389                 return VERIFY_ASSET_RESULT_CPL_PKL_DIFFER;
390         }
391
392         if (actual_hash != *pkl_hash) {
393                 return VERIFY_ASSET_RESULT_BAD;
394         }
395
396         return VERIFY_ASSET_RESULT_GOOD;
397 }
398
399
400 void
401 verify_language_tag (string tag, vector<VerificationNote>& notes)
402 {
403         try {
404                 dcp::LanguageTag test (tag);
405         } catch (dcp::LanguageTagError &) {
406                 notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::BAD_LANGUAGE, tag));
407         }
408 }
409
410
411 enum VerifyPictureAssetResult
412 {
413         VERIFY_PICTURE_ASSET_RESULT_GOOD,
414         VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE,
415         VERIFY_PICTURE_ASSET_RESULT_BAD,
416 };
417
418
419 int
420 biggest_frame_size (shared_ptr<const MonoPictureFrame> frame)
421 {
422         return frame->size ();
423 }
424
425 int
426 biggest_frame_size (shared_ptr<const StereoPictureFrame> frame)
427 {
428         return max(frame->left()->size(), frame->right()->size());
429 }
430
431
432 template <class A, class R, class F>
433 optional<VerifyPictureAssetResult>
434 verify_picture_asset_type (shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
435 {
436         auto asset = dynamic_pointer_cast<A>(reel_mxf->asset_ref().asset());
437         if (!asset) {
438                 return optional<VerifyPictureAssetResult>();
439         }
440
441         int biggest_frame = 0;
442         auto reader = asset->start_read ();
443         auto const duration = asset->intrinsic_duration ();
444         for (int64_t i = 0; i < duration; ++i) {
445                 shared_ptr<const F> frame = reader->get_frame (i);
446                 biggest_frame = max(biggest_frame, biggest_frame_size(frame));
447                 progress (float(i) / duration);
448         }
449
450         static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
451         static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
452         if (biggest_frame > max_frame) {
453                 return VERIFY_PICTURE_ASSET_RESULT_BAD;
454         } else if (biggest_frame > risky_frame) {
455                 return VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE;
456         }
457
458         return VERIFY_PICTURE_ASSET_RESULT_GOOD;
459 }
460
461
462 static VerifyPictureAssetResult
463 verify_picture_asset (shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
464 {
465         auto r = verify_picture_asset_type<MonoPictureAsset, MonoPictureAssetReader, MonoPictureFrame>(reel_mxf, progress);
466         if (!r) {
467                 r = verify_picture_asset_type<StereoPictureAsset, StereoPictureAssetReader, StereoPictureFrame>(reel_mxf, progress);
468         }
469
470         DCP_ASSERT (r);
471         return *r;
472 }
473
474
475 static void
476 verify_main_picture_asset (
477         shared_ptr<const DCP> dcp,
478         shared_ptr<const ReelPictureAsset> reel_asset,
479         function<void (string, optional<boost::filesystem::path>)> stage,
480         function<void (float)> progress,
481         vector<VerificationNote>& notes
482         )
483 {
484         auto asset = reel_asset->asset();
485         auto const file = *asset->file();
486         stage ("Checking picture asset hash", file);
487         auto const r = verify_asset (dcp, reel_asset, progress);
488         switch (r) {
489                 case VERIFY_ASSET_RESULT_BAD:
490                         notes.push_back (
491                                 VerificationNote(
492                                         VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_HASH_INCORRECT, file
493                                         )
494                                 );
495                         break;
496                 case VERIFY_ASSET_RESULT_CPL_PKL_DIFFER:
497                         notes.push_back (
498                                 VerificationNote(
499                                         VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_PICTURE_HASHES_DIFFER, file
500                                         )
501                                 );
502                         break;
503                 default:
504                         break;
505         }
506         stage ("Checking picture frame sizes", asset->file());
507         auto const pr = verify_picture_asset (reel_asset, progress);
508         switch (pr) {
509                 case VERIFY_PICTURE_ASSET_RESULT_BAD:
510                         notes.push_back (
511                                 VerificationNote(
512                                         VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_FRAME_TOO_LARGE_IN_BYTES, file
513                                         )
514                                 );
515                         break;
516                 case VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE:
517                         notes.push_back (
518                                 VerificationNote(
519                                         VerificationNote::VERIFY_WARNING, VerificationNote::PICTURE_FRAME_NEARLY_TOO_LARGE_IN_BYTES, file
520                                         )
521                                 );
522                         break;
523                 default:
524                         break;
525         }
526
527         /* Only flat/scope allowed by Bv2.1 */
528         if (
529                 asset->size() != dcp::Size(2048, 858) &&
530                 asset->size() != dcp::Size(1998, 1080) &&
531                 asset->size() != dcp::Size(4096, 1716) &&
532                 asset->size() != dcp::Size(3996, 2160)) {
533                 notes.push_back(
534                         VerificationNote(
535                                 VerificationNote::VERIFY_BV21_ERROR,
536                                 VerificationNote::PICTURE_ASSET_INVALID_SIZE_IN_PIXELS,
537                                 String::compose("%1x%2", asset->size().width, asset->size().height),
538                                 file
539                                 )
540                         );
541         }
542
543         /* Only 24, 25, 48fps allowed for 2K */
544         if (
545                 (asset->size() == dcp::Size(2048, 858) || asset->size() == dcp::Size(1998, 1080)) &&
546                 (asset->edit_rate() != dcp::Fraction(24, 1) && asset->edit_rate() != dcp::Fraction(25, 1) && asset->edit_rate() != dcp::Fraction(48, 1))
547            ) {
548                 notes.push_back(
549                         VerificationNote(
550                                 VerificationNote::VERIFY_BV21_ERROR,
551                                 VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_2K,
552                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
553                                 file
554                                 )
555                         );
556         }
557
558         if (asset->size() == dcp::Size(4096, 1716) || asset->size() == dcp::Size(3996, 2160)) {
559                 /* Only 24fps allowed for 4K */
560                 if (asset->edit_rate() != dcp::Fraction(24, 1)) {
561                         notes.push_back(
562                                 VerificationNote(
563                                         VerificationNote::VERIFY_BV21_ERROR,
564                                         VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_4K,
565                                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
566                                         file
567                                         )
568                                 );
569                 }
570
571                 /* Only 2D allowed for 4K */
572                 if (dynamic_pointer_cast<const StereoPictureAsset>(asset)) {
573                         notes.push_back(
574                                 VerificationNote(
575                                         VerificationNote::VERIFY_BV21_ERROR,
576                                         VerificationNote::PICTURE_ASSET_4K_3D,
577                                         file
578                                         )
579                                 );
580
581                 }
582         }
583
584 }
585
586
587 static void
588 verify_main_sound_asset (
589         shared_ptr<const DCP> dcp,
590         shared_ptr<const ReelSoundAsset> reel_asset,
591         function<void (string, optional<boost::filesystem::path>)> stage,
592         function<void (float)> progress,
593         vector<VerificationNote>& notes
594         )
595 {
596         auto asset = reel_asset->asset();
597         stage ("Checking sound asset hash", asset->file());
598         auto const r = verify_asset (dcp, reel_asset, progress);
599         switch (r) {
600                 case VERIFY_ASSET_RESULT_BAD:
601                         notes.push_back (
602                                 VerificationNote(
603                                         VerificationNote::VERIFY_ERROR, VerificationNote::SOUND_HASH_INCORRECT, *asset->file()
604                                         )
605                                 );
606                         break;
607                 case VERIFY_ASSET_RESULT_CPL_PKL_DIFFER:
608                         notes.push_back (
609                                 VerificationNote(
610                                         VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_SOUND_HASHES_DIFFER, *asset->file()
611                                         )
612                                 );
613                         break;
614                 default:
615                         break;
616         }
617
618         stage ("Checking sound asset metadata", asset->file());
619
620         verify_language_tag (asset->language(), notes);
621         if (asset->sampling_rate() != 48000) {
622                 notes.push_back (
623                         VerificationNote(
624                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::INVALID_SOUND_FRAME_RATE, *asset->file()
625                                 )
626                         );
627         }
628 }
629
630
631 static void
632 verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, vector<VerificationNote>& notes)
633 {
634         /* XXX: is Language compulsory? */
635         if (reel_asset->language()) {
636                 verify_language_tag (*reel_asset->language(), notes);
637         }
638
639         if (!reel_asset->entry_point()) {
640                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_SUBTITLE_ENTRY_POINT });
641         } else if (reel_asset->entry_point().get()) {
642                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::SUBTITLE_ENTRY_POINT_NON_ZERO });
643         }
644 }
645
646
647 static void
648 verify_closed_caption_reel (shared_ptr<const ReelClosedCaptionAsset> reel_asset, vector<VerificationNote>& notes)
649 {
650         /* XXX: is Language compulsory? */
651         if (reel_asset->language()) {
652                 verify_language_tag (*reel_asset->language(), notes);
653         }
654
655         if (!reel_asset->entry_point()) {
656                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_CLOSED_CAPTION_ENTRY_POINT });
657         } else if (reel_asset->entry_point().get()) {
658                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::CLOSED_CAPTION_ENTRY_POINT_NON_ZERO });
659         }
660 }
661
662
663 struct State
664 {
665         boost::optional<string> subtitle_language;
666 };
667
668
669
670 void
671 verify_smpte_subtitle_asset (
672         shared_ptr<const dcp::SMPTESubtitleAsset> asset,
673         vector<VerificationNote>& notes,
674         State& state
675         )
676 {
677         if (asset->language()) {
678                 auto const language = *asset->language();
679                 verify_language_tag (language, notes);
680                 if (!state.subtitle_language) {
681                         state.subtitle_language = language;
682                 } else if (state.subtitle_language != language) {
683                         notes.push_back (
684                                 VerificationNote(
685                                         VerificationNote::VERIFY_BV21_ERROR, VerificationNote::SUBTITLE_LANGUAGES_DIFFER, *asset->file()
686                                         )
687                                 );
688                 }
689         } else {
690                 notes.push_back (
691                         VerificationNote(
692                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_SUBTITLE_LANGUAGE, *asset->file()
693                                 )
694                         );
695         }
696         if (boost::filesystem::file_size(*asset->file()) > 115 * 1024 * 1024) {
697                 notes.push_back (
698                         VerificationNote(
699                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::TIMED_TEXT_ASSET_TOO_LARGE_IN_BYTES, *asset->file()
700                                 )
701                         );
702         }
703         /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB"
704          * but I'm hoping that checking for the total size of all fonts being <= 10MB will do.
705          */
706         auto fonts = asset->font_data ();
707         int total_size = 0;
708         for (auto i: fonts) {
709                 total_size += i.second.size();
710         }
711         if (total_size > 10 * 1024 * 1024) {
712                 notes.push_back (
713                         VerificationNote(
714                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::TIMED_TEXT_FONTS_TOO_LARGE_IN_BYTES, *asset->file()
715                                 )
716                         );
717         }
718
719         if (!asset->start_time()) {
720                 notes.push_back (
721                         VerificationNote(
722                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_SUBTITLE_START_TIME, *asset->file())
723                         );
724         } else if (asset->start_time() != dcp::Time()) {
725                 notes.push_back (
726                         VerificationNote(
727                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::SUBTITLE_START_TIME_NON_ZERO, *asset->file())
728                         );
729         }
730 }
731
732
733 static void
734 verify_subtitle_asset (
735         shared_ptr<const SubtitleAsset> asset,
736         function<void (string, optional<boost::filesystem::path>)> stage,
737         boost::filesystem::path xsd_dtd_directory,
738         vector<VerificationNote>& notes,
739         State& state
740         )
741 {
742         stage ("Checking subtitle XML", asset->file());
743         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
744          * gets passed through libdcp which may clean up and therefore hide errors.
745          */
746         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
747
748         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
749         if (smpte) {
750                 verify_smpte_subtitle_asset (smpte, notes, state);
751         }
752 }
753
754
755 static void
756 verify_closed_caption_asset (
757         shared_ptr<const SubtitleAsset> asset,
758         function<void (string, optional<boost::filesystem::path>)> stage,
759         boost::filesystem::path xsd_dtd_directory,
760         vector<VerificationNote>& notes,
761         State& state
762         )
763 {
764         verify_subtitle_asset (asset, stage, xsd_dtd_directory, notes, state);
765
766         if (asset->raw_xml().size() > 256 * 1024) {
767                 notes.push_back (
768                         VerificationNote(
769                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::CLOSED_CAPTION_XML_TOO_LARGE_IN_BYTES, *asset->file()
770                                 )
771                         );
772         }
773 }
774
775
776 static
777 void
778 check_text_timing (
779         vector<shared_ptr<dcp::Reel>> reels,
780         optional<int> picture_frame_rate,
781         vector<VerificationNote>& notes,
782         std::function<bool (shared_ptr<dcp::Reel>)> check,
783         std::function<string (shared_ptr<dcp::Reel>)> xml,
784         std::function<int64_t (shared_ptr<dcp::Reel>)> duration
785         )
786 {
787         /* end of last subtitle (in editable units) */
788         optional<int64_t> last_out;
789         auto too_short = false;
790         auto too_close = false;
791         auto too_early = false;
792         /* current reel start time (in editable units) */
793         int64_t reel_offset = 0;
794
795         std::function<void (cxml::ConstNodePtr, int, int, bool)> parse;
796         parse = [&parse, &last_out, &too_short, &too_close, &too_early, &reel_offset](cxml::ConstNodePtr node, int tcr, int pfr, bool first_reel) {
797                 if (node->name() == "Subtitle") {
798                         dcp::Time in (node->string_attribute("TimeIn"), tcr);
799                         dcp::Time out (node->string_attribute("TimeOut"), tcr);
800                         if (first_reel && in < dcp::Time(0, 0, 4, 0, tcr)) {
801                                 too_early = true;
802                         }
803                         auto length = out - in;
804                         if (length.as_editable_units(pfr) < 15) {
805                                 too_short = true;
806                         }
807                         if (last_out) {
808                                 /* XXX: this feels dubious - is it really what Bv2.1 means? */
809                                 auto distance = reel_offset + in.as_editable_units(pfr) - *last_out;
810                                 if (distance >= 0 && distance < 2) {
811                                         too_close = true;
812                                 }
813                         }
814                         last_out = reel_offset + out.as_editable_units(pfr);
815                 } else {
816                         for (auto i: node->node_children()) {
817                                 parse(i, tcr, pfr, first_reel);
818                         }
819                 }
820         };
821
822         for (auto i = 0U; i < reels.size(); ++i) {
823                 if (!check(reels[i])) {
824                         continue;
825                 }
826
827                 /* We need to look at <Subtitle> instances in the XML being checked, so we can't use the subtitles
828                  * read in by libdcp's parser.
829                  */
830
831                 auto doc = make_shared<cxml::Document>("SubtitleReel");
832                 doc->read_string (xml(reels[i]));
833                 auto const tcr = doc->number_child<int>("TimeCodeRate");
834                 parse (doc, tcr, picture_frame_rate.get_value_or(24), i == 0);
835                 reel_offset += duration(reels[i]);
836         }
837
838         if (too_early) {
839                 notes.push_back(
840                         VerificationNote(
841                                 VerificationNote::VERIFY_WARNING, VerificationNote::FIRST_TEXT_TOO_EARLY
842                                 )
843                         );
844         }
845
846         if (too_short) {
847                 notes.push_back (
848                         VerificationNote(
849                                 VerificationNote::VERIFY_WARNING, VerificationNote::SUBTITLE_TOO_SHORT
850                                 )
851                         );
852         }
853
854         if (too_close) {
855                 notes.push_back (
856                         VerificationNote(
857                                 VerificationNote::VERIFY_WARNING, VerificationNote::SUBTITLE_TOO_CLOSE
858                                 )
859                         );
860         }
861 }
862
863
864 struct LinesCharactersResult
865 {
866         bool warning_length_exceeded = false;
867         bool error_length_exceeded = false;
868         bool line_count_exceeded = false;
869 };
870
871
872 static
873 void
874 check_text_lines_and_characters (
875         shared_ptr<SubtitleAsset> asset,
876         int warning_length,
877         int error_length,
878         LinesCharactersResult* result
879         )
880 {
881         class Event
882         {
883         public:
884                 Event (dcp::Time time_, float position_, int characters_)
885                         : time (time_)
886                         , position (position_)
887                         , characters (characters_)
888                 {}
889
890                 Event (dcp::Time time_, shared_ptr<Event> start_)
891                         : time (time_)
892                         , start (start_)
893                 {}
894
895                 dcp::Time time;
896                 int position; //< position from 0 at top of screen to 100 at bottom
897                 int characters;
898                 shared_ptr<Event> start;
899         };
900
901         vector<shared_ptr<Event>> events;
902
903         auto position = [](shared_ptr<const SubtitleString> sub) {
904                 switch (sub->v_align()) {
905                 case VALIGN_TOP:
906                         return lrintf(sub->v_position() * 100);
907                 case VALIGN_CENTER:
908                         return lrintf((0.5f + sub->v_position()) * 100);
909                 case VALIGN_BOTTOM:
910                         return lrintf((1.0f - sub->v_position()) * 100);
911                 }
912
913                 return 0L;
914         };
915
916         for (auto j: asset->subtitles()) {
917                 auto text = dynamic_pointer_cast<const SubtitleString>(j);
918                 if (text) {
919                         auto in = make_shared<Event>(text->in(), position(text), text->text().length());
920                         events.push_back(in);
921                         events.push_back(make_shared<Event>(text->out(), in));
922                 }
923         }
924
925         std::sort(events.begin(), events.end(), [](shared_ptr<Event> const& a, shared_ptr<Event>const& b) {
926                 return a->time < b->time;
927         });
928
929         map<int, int> current;
930         for (auto i: events) {
931                 if (current.size() > 3) {
932                         result->line_count_exceeded = true;
933                 }
934                 for (auto j: current) {
935                         if (j.second >= warning_length) {
936                                 result->warning_length_exceeded = true;
937                         }
938                         if (j.second >= error_length) {
939                                 result->error_length_exceeded = true;
940                         }
941                 }
942
943                 if (i->start) {
944                         /* end of a subtitle */
945                         DCP_ASSERT (current.find(i->start->position) != current.end());
946                         if (current[i->start->position] == i->start->characters) {
947                                 current.erase(i->start->position);
948                         } else {
949                                 current[i->start->position] -= i->start->characters;
950                         }
951                 } else {
952                         /* start of a subtitle */
953                         if (current.find(i->position) == current.end()) {
954                                 current[i->position] = i->characters;
955                         } else {
956                                 current[i->position] += i->characters;
957                         }
958                 }
959         }
960 }
961
962
963 static
964 void
965 check_text_timing (vector<shared_ptr<dcp::Reel>> reels, vector<VerificationNote>& notes)
966 {
967         if (reels.empty()) {
968                 return;
969         }
970
971         optional<int> picture_frame_rate;
972         if (reels[0]->main_picture()) {
973                 picture_frame_rate = reels[0]->main_picture()->frame_rate().numerator;
974         }
975
976         if (reels[0]->main_subtitle()) {
977                 check_text_timing (reels, picture_frame_rate, notes,
978                         [](shared_ptr<dcp::Reel> reel) {
979                                 return static_cast<bool>(reel->main_subtitle());
980                         },
981                         [](shared_ptr<dcp::Reel> reel) {
982                                 return reel->main_subtitle()->asset()->raw_xml();
983                         },
984                         [](shared_ptr<dcp::Reel> reel) {
985                                 return reel->main_subtitle()->actual_duration();
986                         }
987                 );
988         }
989
990         for (auto i = 0U; i < reels[0]->closed_captions().size(); ++i) {
991                 check_text_timing (reels, picture_frame_rate, notes,
992                         [i](shared_ptr<dcp::Reel> reel) {
993                                 return i < reel->closed_captions().size();
994                         },
995                         [i](shared_ptr<dcp::Reel> reel) {
996                                 return reel->closed_captions()[i]->asset()->raw_xml();
997                         },
998                         [i](shared_ptr<dcp::Reel> reel) {
999                                 return reel->closed_captions()[i]->actual_duration();
1000                         }
1001                 );
1002         }
1003 }
1004
1005
1006 void
1007 check_extension_metadata (shared_ptr<dcp::CPL> cpl, vector<VerificationNote>& notes)
1008 {
1009         DCP_ASSERT (cpl->file());
1010         cxml::Document doc ("CompositionPlaylist");
1011         doc.read_file (cpl->file().get());
1012
1013         auto missing = false;
1014         string malformed;
1015
1016         if (auto reel_list = doc.node_child("ReelList")) {
1017                 auto reels = reel_list->node_children("Reel");
1018                 if (!reels.empty()) {
1019                         if (auto asset_list = reels[0]->optional_node_child("AssetList")) {
1020                                 if (auto metadata = asset_list->optional_node_child("CompositionMetadataAsset")) {
1021                                         if (auto extension_list = metadata->optional_node_child("ExtensionMetadataList")) {
1022                                                 missing = true;
1023                                                 for (auto extension: extension_list->node_children("ExtensionMetadata")) {
1024                                                         if (extension->optional_string_attribute("scope").get_value_or("") != "http://isdcf.com/ns/cplmd/app") {
1025                                                                 continue;
1026                                                         }
1027                                                         missing = false;
1028                                                         if (auto name = extension->optional_node_child("Name")) {
1029                                                                 if (name->content() != "Application") {
1030                                                                         malformed = "<Name> should be 'Application'";
1031                                                                 }
1032                                                         }
1033                                                         if (auto property_list = extension->optional_node_child("PropertyList")) {
1034                                                                 if (auto property = property_list->optional_node_child("Property")) {
1035                                                                         if (auto name = property->optional_node_child("Name")) {
1036                                                                                 if (name->content() != "DCP Constraints Profile") {
1037                                                                                         malformed = "<Name> property should be 'DCP Constraints Profile'";
1038                                                                                 }
1039                                                                         }
1040                                                                         if (auto value = property->optional_node_child("Value")) {
1041                                                                                 if (value->content() != "SMPTE-RDD-52:2020-Bv2.1") {
1042                                                                                         malformed = "<Value> property should be 'SMPTE-RDD-52:2020-Bv2.1'";
1043                                                                                 }
1044                                                                         }
1045                                                                 }
1046                                                         }
1047                                                 }
1048                                         } else {
1049                                                 missing = true;
1050                                         }
1051                                 }
1052                         }
1053                 }
1054         }
1055
1056         if (missing) {
1057                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_EXTENSION_METADATA});
1058         } else if (!malformed.empty()) {
1059                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::INVALID_EXTENSION_METADATA, malformed});
1060         }
1061 }
1062
1063
1064 bool
1065 pkl_has_encrypted_assets (shared_ptr<DCP> dcp, shared_ptr<PKL> pkl)
1066 {
1067         vector<string> encrypted;
1068         for (auto i: dcp->cpls()) {
1069                 for (auto j: i->reel_mxfs()) {
1070                         if (j->asset_ref().resolved()) {
1071                                 /* It's a bit surprising / broken but Interop subtitle assets are represented
1072                                  * in reels by ReelSubtitleAsset which inherits ReelMXF, so it's possible for
1073                                  * ReelMXFs to have assets which are not MXFs.
1074                                  */
1075                                 if (auto asset = dynamic_pointer_cast<MXF>(j->asset_ref().asset())) {
1076                                         if (asset->encrypted()) {
1077                                                 encrypted.push_back(j->asset_ref().id());
1078                                         }
1079                                 }
1080                         }
1081                 }
1082         }
1083
1084         for (auto i: pkl->asset_list()) {
1085                 if (find(encrypted.begin(), encrypted.end(), i->id()) != encrypted.end()) {
1086                         return true;
1087                 }
1088         }
1089
1090         return false;
1091 }
1092
1093
1094 vector<VerificationNote>
1095 dcp::verify (
1096         vector<boost::filesystem::path> directories,
1097         function<void (string, optional<boost::filesystem::path>)> stage,
1098         function<void (float)> progress,
1099         boost::filesystem::path xsd_dtd_directory
1100         )
1101 {
1102         xsd_dtd_directory = boost::filesystem::canonical (xsd_dtd_directory);
1103
1104         vector<VerificationNote> notes;
1105         State state;
1106
1107         vector<shared_ptr<DCP>> dcps;
1108         for (auto i: directories) {
1109                 dcps.push_back (shared_ptr<DCP> (new DCP (i)));
1110         }
1111
1112         for (auto dcp: dcps) {
1113                 stage ("Checking DCP", dcp->directory());
1114                 try {
1115                         dcp->read (&notes);
1116                 } catch (ReadError& e) {
1117                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
1118                 } catch (XMLError& e) {
1119                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
1120                 } catch (MXFFileError& e) {
1121                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
1122                 } catch (cxml::Error& e) {
1123                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
1124                 }
1125
1126                 if (dcp->standard() != dcp::SMPTE) {
1127                         notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::NOT_SMPTE));
1128                 }
1129
1130                 for (auto cpl: dcp->cpls()) {
1131                         stage ("Checking CPL", cpl->file());
1132                         validate_xml (cpl->file().get(), xsd_dtd_directory, notes);
1133
1134                         if (cpl->any_encrypted() && !cpl->all_encrypted()) {
1135                                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::PARTIALLY_ENCRYPTED});
1136                         }
1137
1138                         for (auto const& i: cpl->additional_subtitle_languages()) {
1139                                 verify_language_tag (i, notes);
1140                         }
1141
1142                         if (cpl->release_territory()) {
1143                                 if (!cpl->release_territory_scope() || cpl->release_territory_scope().get() != "http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata#scope/release-territory/UNM49") {
1144                                         auto terr = cpl->release_territory().get();
1145                                         /* Must be a valid region tag, or "001" */
1146                                         try {
1147                                                 LanguageTag::RegionSubtag test (terr);
1148                                         } catch (...) {
1149                                                 if (terr != "001") {
1150                                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::BAD_LANGUAGE, terr});
1151                                                 }
1152                                         }
1153                                 }
1154                         }
1155
1156                         if (dcp->standard() == dcp::SMPTE) {
1157                                 if (!cpl->annotation_text()) {
1158                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_ANNOTATION_TEXT_IN_CPL));
1159                                 } else if (cpl->annotation_text().get() != cpl->content_title_text()) {
1160                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_WARNING, VerificationNote::CPL_ANNOTATION_TEXT_DIFFERS_FROM_CONTENT_TITLE_TEXT));
1161                                 }
1162                         }
1163
1164                         for (auto i: dcp->pkls()) {
1165                                 /* Check that the CPL's hash corresponds to the PKL */
1166                                 optional<string> h = i->hash(cpl->id());
1167                                 if (h && make_digest(ArrayData(*cpl->file())) != *h) {
1168                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::CPL_HASH_INCORRECT));
1169                                 }
1170
1171                                 /* Check that any PKL with a single CPL has its AnnotationText the same as the CPL's ContentTitleText */
1172                                 optional<string> required_annotation_text;
1173                                 for (auto j: i->asset_list()) {
1174                                         /* See if this is a CPL */
1175                                         for (auto k: dcp->cpls()) {
1176                                                 if (j->id() == k->id()) {
1177                                                         if (!required_annotation_text) {
1178                                                                 /* First CPL we have found; this is the required AnnotationText unless we find another */
1179                                                                 required_annotation_text = cpl->content_title_text();
1180                                                         } else {
1181                                                                 /* There's more than one CPL so we don't care what the PKL's AnnotationText is */
1182                                                                 required_annotation_text = boost::none;
1183                                                         }
1184                                                 }
1185                                         }
1186                                 }
1187
1188                                 if (required_annotation_text && i->annotation_text() != required_annotation_text) {
1189                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::PKL_ANNOTATION_TEXT_DOES_NOT_MATCH_CPL_CONTENT_TITLE_TEXT, i->file().get()});
1190                                 }
1191                         }
1192
1193                         /* set to true if any reel has a MainSubtitle */
1194                         auto have_main_subtitle = false;
1195                         /* set to true if any reel has no MainSubtitle */
1196                         auto have_no_main_subtitle = false;
1197                         /* fewest number of closed caption assets seen in a reel */
1198                         size_t fewest_closed_captions = SIZE_MAX;
1199                         /* most number of closed caption assets seen in a reel */
1200                         size_t most_closed_captions = 0;
1201                         map<Marker, Time> markers_seen;
1202
1203                         for (auto reel: cpl->reels()) {
1204                                 stage ("Checking reel", optional<boost::filesystem::path>());
1205
1206                                 for (auto i: reel->assets()) {
1207                                         if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1208                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::DURATION_TOO_SMALL, i->id()));
1209                                         }
1210                                         if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1211                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INTRINSIC_DURATION_TOO_SMALL, i->id()));
1212                                         }
1213                                         auto mxf = dynamic_pointer_cast<ReelMXF>(i);
1214                                         if (mxf && !mxf->hash()) {
1215                                                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_HASH, i->id()});
1216                                         }
1217                                 }
1218
1219                                 if (dcp->standard() == dcp::SMPTE) {
1220                                         boost::optional<int64_t> duration;
1221                                         for (auto i: reel->assets()) {
1222                                                 if (!duration) {
1223                                                         duration = i->actual_duration();
1224                                                 } else if (*duration != i->actual_duration()) {
1225                                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISMATCHED_ASSET_DURATION, i->id()));
1226                                                         break;
1227                                                 }
1228                                         }
1229                                 }
1230
1231                                 if (reel->main_picture()) {
1232                                         /* Check reel stuff */
1233                                         auto const frame_rate = reel->main_picture()->frame_rate();
1234                                         if (frame_rate.denominator != 1 ||
1235                                             (frame_rate.numerator != 24 &&
1236                                              frame_rate.numerator != 25 &&
1237                                              frame_rate.numerator != 30 &&
1238                                              frame_rate.numerator != 48 &&
1239                                              frame_rate.numerator != 50 &&
1240                                              frame_rate.numerator != 60 &&
1241                                              frame_rate.numerator != 96)) {
1242                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INVALID_PICTURE_FRAME_RATE));
1243                                         }
1244                                         /* Check asset */
1245                                         if (reel->main_picture()->asset_ref().resolved()) {
1246                                                 verify_main_picture_asset (dcp, reel->main_picture(), stage, progress, notes);
1247                                         }
1248                                 }
1249
1250                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
1251                                         verify_main_sound_asset (dcp, reel->main_sound(), stage, progress, notes);
1252                                 }
1253
1254                                 if (reel->main_subtitle()) {
1255                                         verify_main_subtitle_reel (reel->main_subtitle(), notes);
1256                                         if (reel->main_subtitle()->asset_ref().resolved()) {
1257                                                 verify_subtitle_asset (reel->main_subtitle()->asset(), stage, xsd_dtd_directory, notes, state);
1258                                         }
1259                                         have_main_subtitle = true;
1260                                 } else {
1261                                         have_no_main_subtitle = true;
1262                                 }
1263
1264                                 for (auto i: reel->closed_captions()) {
1265                                         verify_closed_caption_reel (i, notes);
1266                                         if (i->asset_ref().resolved()) {
1267                                                 verify_closed_caption_asset (i->asset(), stage, xsd_dtd_directory, notes, state);
1268                                         }
1269                                 }
1270
1271                                 if (reel->main_markers()) {
1272                                         for (auto const& i: reel->main_markers()->get()) {
1273                                                 markers_seen.insert (i);
1274                                         }
1275                                 }
1276
1277                                 fewest_closed_captions = std::min (fewest_closed_captions, reel->closed_captions().size());
1278                                 most_closed_captions = std::max (most_closed_captions, reel->closed_captions().size());
1279                         }
1280
1281                         if (dcp->standard() == dcp::SMPTE) {
1282
1283                                 if (have_main_subtitle && have_no_main_subtitle) {
1284                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MAIN_SUBTITLE_NOT_IN_ALL_REELS});
1285                                 }
1286
1287                                 if (fewest_closed_captions != most_closed_captions) {
1288                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::CLOSED_CAPTION_ASSET_COUNTS_DIFFER});
1289                                 }
1290
1291                                 if (cpl->content_kind() == FEATURE) {
1292                                         if (markers_seen.find(dcp::Marker::FFEC) == markers_seen.end()) {
1293                                                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_FFEC_IN_FEATURE});
1294                                         }
1295                                         if (markers_seen.find(dcp::Marker::FFMC) == markers_seen.end()) {
1296                                                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_FFMC_IN_FEATURE});
1297                                         }
1298                                 }
1299
1300                                 auto ffoc = markers_seen.find(dcp::Marker::FFOC);
1301                                 if (ffoc == markers_seen.end()) {
1302                                         notes.push_back ({VerificationNote::VERIFY_WARNING, VerificationNote::MISSING_FFOC});
1303                                 } else if (ffoc->second.e != 1) {
1304                                         notes.push_back ({VerificationNote::VERIFY_WARNING, VerificationNote::INCORRECT_FFOC});
1305                                 }
1306
1307                                 auto lfoc = markers_seen.find(dcp::Marker::LFOC);
1308                                 if (lfoc == markers_seen.end()) {
1309                                         notes.push_back ({VerificationNote::VERIFY_WARNING, VerificationNote::MISSING_LFOC});
1310                                 } else if (lfoc->second.as_editable_units(lfoc->second.tcr) != (cpl->reels().back()->duration() - 1)) {
1311                                         notes.push_back ({VerificationNote::VERIFY_WARNING, VerificationNote::INCORRECT_LFOC});
1312                                 }
1313
1314                                 check_text_timing (cpl->reels(), notes);
1315
1316                                 LinesCharactersResult result;
1317                                 for (auto reel: cpl->reels()) {
1318                                         if (reel->main_subtitle() && reel->main_subtitle()->asset()) {
1319                                                 check_text_lines_and_characters (reel->main_subtitle()->asset(), 52, 79, &result);
1320                                         }
1321                                 }
1322
1323                                 if (result.line_count_exceeded) {
1324                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_WARNING, VerificationNote::TOO_MANY_SUBTITLE_LINES));
1325                                 }
1326                                 if (result.error_length_exceeded) {
1327                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_WARNING, VerificationNote::SUBTITLE_LINE_TOO_LONG));
1328                                 } else if (result.warning_length_exceeded) {
1329                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_WARNING, VerificationNote::SUBTITLE_LINE_LONGER_THAN_RECOMMENDED));
1330                                 }
1331
1332                                 result = LinesCharactersResult();
1333                                 for (auto reel: cpl->reels()) {
1334                                         for (auto i: reel->closed_captions()) {
1335                                                 if (i->asset()) {
1336                                                         check_text_lines_and_characters (i->asset(), 32, 32, &result);
1337                                                 }
1338                                         }
1339                                 }
1340
1341                                 if (result.line_count_exceeded) {
1342                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::TOO_MANY_CLOSED_CAPTION_LINES));
1343                                 }
1344                                 if (result.error_length_exceeded) {
1345                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::CLOSED_CAPTION_LINE_TOO_LONG));
1346                                 }
1347
1348                                 if (!cpl->full_content_title_text()) {
1349                                         /* Since FullContentTitleText is assumed always to exist if there's a CompositionMetadataAsset we
1350                                          * can use it as a proxy for CompositionMetadataAsset's existence.
1351                                          */
1352                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_CPL_METADATA});
1353                                 } else if (!cpl->version_number()) {
1354                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_CPL_METADATA_VERSION_NUMBER});
1355                                 }
1356
1357                                 check_extension_metadata (cpl, notes);
1358
1359                                 if (cpl->any_encrypted()) {
1360                                         cxml::Document doc ("CompositionPlaylist");
1361                                         DCP_ASSERT (cpl->file());
1362                                         doc.read_file (cpl->file().get());
1363                                         if (!doc.optional_node_child("Signature")) {
1364                                                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::CPL_WITH_ENCRYPTED_CONTENT_NOT_SIGNED, cpl->file().get()});
1365                                         }
1366                                 }
1367                         }
1368                 }
1369
1370                 for (auto pkl: dcp->pkls()) {
1371                         stage ("Checking PKL", pkl->file());
1372                         validate_xml (pkl->file().get(), xsd_dtd_directory, notes);
1373                         if (pkl_has_encrypted_assets(dcp, pkl)) {
1374                                 cxml::Document doc ("PackingList");
1375                                 doc.read_file (pkl->file().get());
1376                                 if (!doc.optional_node_child("Signature")) {
1377                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::PKL_WITH_ENCRYPTED_CONTENT_NOT_SIGNED, pkl->file().get()});
1378                                 }
1379                         }
1380                 }
1381
1382                 if (dcp->asset_map_path()) {
1383                         stage ("Checking ASSETMAP", dcp->asset_map_path().get());
1384                         validate_xml (dcp->asset_map_path().get(), xsd_dtd_directory, notes);
1385                 } else {
1386                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::MISSING_ASSETMAP));
1387                 }
1388         }
1389
1390         return notes;
1391 }
1392
1393 string
1394 dcp::note_to_string (dcp::VerificationNote note)
1395 {
1396         switch (note.code()) {
1397         case dcp::VerificationNote::GENERAL_READ:
1398                 return *note.note();
1399         case dcp::VerificationNote::CPL_HASH_INCORRECT:
1400                 return "The hash of the CPL in the PKL does not agree with the CPL file.";
1401         case dcp::VerificationNote::INVALID_PICTURE_FRAME_RATE:
1402                 return "The picture in a reel has an invalid frame rate.";
1403         case dcp::VerificationNote::PICTURE_HASH_INCORRECT:
1404                 return dcp::String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
1405         case dcp::VerificationNote::PKL_CPL_PICTURE_HASHES_DIFFER:
1406                 return dcp::String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
1407         case dcp::VerificationNote::SOUND_HASH_INCORRECT:
1408                 return dcp::String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
1409         case dcp::VerificationNote::PKL_CPL_SOUND_HASHES_DIFFER:
1410                 return dcp::String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
1411         case dcp::VerificationNote::EMPTY_ASSET_PATH:
1412                 return "The asset map contains an empty asset path.";
1413         case dcp::VerificationNote::MISSING_ASSET:
1414                 return String::compose("The file for an asset in the asset map cannot be found; missing file is %1.", note.file()->filename());
1415         case dcp::VerificationNote::MISMATCHED_STANDARD:
1416                 return "The DCP contains both SMPTE and Interop parts.";
1417         case dcp::VerificationNote::XML_VALIDATION_ERROR:
1418                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
1419         case dcp::VerificationNote::MISSING_ASSETMAP:
1420                 return "No ASSETMAP or ASSETMAP.xml was found.";
1421         case dcp::VerificationNote::INTRINSIC_DURATION_TOO_SMALL:
1422                 return String::compose("The intrinsic duration of an asset is less than 1 second long: %1", note.note().get());
1423         case dcp::VerificationNote::DURATION_TOO_SMALL:
1424                 return String::compose("The duration of an asset is less than 1 second long: %1", note.note().get());
1425         case dcp::VerificationNote::PICTURE_FRAME_TOO_LARGE_IN_BYTES:
1426                 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
1427         case dcp::VerificationNote::PICTURE_FRAME_NEARLY_TOO_LARGE_IN_BYTES:
1428                 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
1429         case dcp::VerificationNote::EXTERNAL_ASSET:
1430                 return String::compose("An asset that this DCP refers to is not included in the DCP.  It may be a VF.  Missing asset is %1.", note.note().get());
1431         case dcp::VerificationNote::NOT_SMPTE:
1432                 return "This DCP does not use the SMPTE standard, which is required for Bv2.1 compliance.";
1433         case dcp::VerificationNote::BAD_LANGUAGE:
1434                 return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
1435         case dcp::VerificationNote::PICTURE_ASSET_INVALID_SIZE_IN_PIXELS:
1436                 return String::compose("A picture asset's size (%1) is not one of those allowed by Bv2.1 (2048x858, 1998x1080, 4096x1716 or 3996x2160)", note.note().get());
1437         case dcp::VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_2K:
1438                 return String::compose("A picture asset's frame rate (%1) is not one of those allowed for 2K DCPs by Bv2.1 (24, 25 or 48fps)", note.note().get());
1439         case dcp::VerificationNote::PICTURE_ASSET_INVALID_FRAME_RATE_FOR_4K:
1440                 return String::compose("A picture asset's frame rate (%1) is not 24fps as required for 4K DCPs by Bv2.1", note.note().get());
1441         case dcp::VerificationNote::PICTURE_ASSET_4K_3D:
1442                 return "3D 4K DCPs are not allowed by Bv2.1";
1443         case dcp::VerificationNote::CLOSED_CAPTION_XML_TOO_LARGE_IN_BYTES:
1444                 return String::compose("The XML for the closed caption asset %1 is longer than the 256KB maximum required by Bv2.1", note.file()->filename());
1445         case dcp::VerificationNote::TIMED_TEXT_ASSET_TOO_LARGE_IN_BYTES:
1446                 return String::compose("The total size of the timed text asset %1 is larger than the 115MB maximum required by Bv2.1", note.file()->filename());
1447         case dcp::VerificationNote::TIMED_TEXT_FONTS_TOO_LARGE_IN_BYTES:
1448                 return String::compose("The total size of the fonts in timed text asset %1 is larger than the 10MB maximum required by Bv2.1", note.file()->filename());
1449         case dcp::VerificationNote::MISSING_SUBTITLE_LANGUAGE:
1450                 return String::compose("The XML for a SMPTE subtitle asset has no <Language> tag, which is required by Bv2.1", note.file()->filename());
1451         case dcp::VerificationNote::SUBTITLE_LANGUAGES_DIFFER:
1452                 return String::compose("Some subtitle assets have different <Language> tags than others", note.file()->filename());
1453         case dcp::VerificationNote::MISSING_SUBTITLE_START_TIME:
1454                 return String::compose("The XML for a SMPTE subtitle asset has no <StartTime> tag, which is required by Bv2.1", note.file()->filename());
1455         case dcp::VerificationNote::SUBTITLE_START_TIME_NON_ZERO:
1456                 return String::compose("The XML for a SMPTE subtitle asset has a non-zero <StartTime> tag, which is disallowed by Bv2.1", note.file()->filename());
1457         case dcp::VerificationNote::FIRST_TEXT_TOO_EARLY:
1458                 return "The first subtitle or closed caption is less than 4 seconds from the start of the DCP.";
1459         case dcp::VerificationNote::SUBTITLE_TOO_SHORT:
1460                 return "At least one subtitle is less than the minimum of 15 frames suggested by Bv2.1";
1461         case dcp::VerificationNote::SUBTITLE_TOO_CLOSE:
1462                 return "At least one pair of subtitles are separated by less than the the minimum of 2 frames suggested by Bv2.1";
1463         case dcp::VerificationNote::TOO_MANY_SUBTITLE_LINES:
1464                 return "There are more than 3 subtitle lines in at least one place in the DCP, which Bv2.1 advises against.";
1465         case dcp::VerificationNote::SUBTITLE_LINE_LONGER_THAN_RECOMMENDED:
1466                 return "There are more than 52 characters in at least one subtitle line, which Bv2.1 advises against.";
1467         case dcp::VerificationNote::SUBTITLE_LINE_TOO_LONG:
1468                 return "There are more than 79 characters in at least one subtitle line, which Bv2.1 strongly advises against.";
1469         case dcp::VerificationNote::TOO_MANY_CLOSED_CAPTION_LINES:
1470                 return "There are more than 3 closed caption lines in at least one place, which is disallowed by Bv2.1";
1471         case dcp::VerificationNote::CLOSED_CAPTION_LINE_TOO_LONG:
1472                 return "There are more than 32 characters in at least one closed caption line, which is disallowed by Bv2.1";
1473         case dcp::VerificationNote::INVALID_SOUND_FRAME_RATE:
1474                 return "A sound asset has a sampling rate other than 48kHz, which is disallowed by Bv2.1";
1475         case dcp::VerificationNote::MISSING_ANNOTATION_TEXT_IN_CPL:
1476                 return "The CPL has no <AnnotationText> tag, which is required by Bv2.1";
1477         case dcp::VerificationNote::CPL_ANNOTATION_TEXT_DIFFERS_FROM_CONTENT_TITLE_TEXT:
1478                 return "The CPL's <AnnotationText> differs from its <ContentTitleText>, which Bv2.1 advises against.";
1479         case dcp::VerificationNote::MISMATCHED_ASSET_DURATION:
1480                 return "All assets in a reel do not have the same duration, which is required by Bv2.1";
1481         case dcp::VerificationNote::MAIN_SUBTITLE_NOT_IN_ALL_REELS:
1482                 return "At least one reel contains a subtitle asset, but some reel(s) do not";
1483         case dcp::VerificationNote::CLOSED_CAPTION_ASSET_COUNTS_DIFFER:
1484                 return "At least one reel has closed captions, but reels have different numbers of closed caption assets.";
1485         case dcp::VerificationNote::MISSING_SUBTITLE_ENTRY_POINT:
1486                 return "Subtitle assets must have an <EntryPoint> tag.";
1487         case dcp::VerificationNote::SUBTITLE_ENTRY_POINT_NON_ZERO:
1488                 return "Subtitle assets must have an <EntryPoint> of 0.";
1489         case dcp::VerificationNote::MISSING_CLOSED_CAPTION_ENTRY_POINT:
1490                 return "Closed caption assets must have an <EntryPoint> tag.";
1491         case dcp::VerificationNote::CLOSED_CAPTION_ENTRY_POINT_NON_ZERO:
1492                 return "Closed caption assets must have an <EntryPoint> of 0.";
1493         case dcp::VerificationNote::MISSING_HASH:
1494                 return String::compose("An asset is missing a <Hash> tag: %1", note.note().get());
1495         case dcp::VerificationNote::MISSING_FFEC_IN_FEATURE:
1496                 return "The DCP is marked as a Feature but there is no FFEC (first frame of end credits) marker";
1497         case dcp::VerificationNote::MISSING_FFMC_IN_FEATURE:
1498                 return "The DCP is marked as a Feature but there is no FFMC (first frame of moving credits) marker";
1499         case dcp::VerificationNote::MISSING_FFOC:
1500                 return "There should be a FFOC (first frame of content) marker";
1501         case dcp::VerificationNote::MISSING_LFOC:
1502                 return "There should be a LFOC (last frame of content) marker";
1503         case dcp::VerificationNote::INCORRECT_FFOC:
1504                 return "The FFOC marker should bet set to 1";
1505         case dcp::VerificationNote::INCORRECT_LFOC:
1506                 return "The LFOC marker should be set to 1 less than the duration of the last reel";
1507         case dcp::VerificationNote::MISSING_CPL_METADATA:
1508                 return "There should be a <CompositionMetadataAsset> tag";
1509         case dcp::VerificationNote::MISSING_CPL_METADATA_VERSION_NUMBER:
1510                 return "The CPL metadata must contain a <VersionNumber>";
1511         case dcp::VerificationNote::MISSING_EXTENSION_METADATA:
1512                 return "The CPL metadata must contain <ExtensionMetadata>";
1513         case dcp::VerificationNote::INVALID_EXTENSION_METADATA:
1514                 return String::compose("The <ExtensionMetadata> is malformed in some way: %1", note.note().get());
1515         case dcp::VerificationNote::CPL_WITH_ENCRYPTED_CONTENT_NOT_SIGNED:
1516                 return String::compose("The CPL %1, which has encrypted content, is not signed", note.file()->filename());
1517         case dcp::VerificationNote::PKL_WITH_ENCRYPTED_CONTENT_NOT_SIGNED:
1518                 return String::compose("The PKL %1, which has encrypted content, is not signed", note.file()->filename());
1519         case dcp::VerificationNote::PKL_ANNOTATION_TEXT_DOES_NOT_MATCH_CPL_CONTENT_TITLE_TEXT:
1520                 return String::compose("The PKL %1 has only one CPL but its <AnnotationText> does not match the CPL's <ContentTitleText>", note.file()->filename());
1521         case dcp::VerificationNote::PARTIALLY_ENCRYPTED:
1522                 return "Some assets are encrypted but some are not";
1523         }
1524
1525         return "";
1526 }