382a67ba9e71a9517e480a9c6b234e1f1984143c
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2021 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34
35 /** @file  src/verify.cc
36  *  @brief dcp::verify() method and associated code
37  */
38
39
40 #include "compose.hpp"
41 #include "cpl.h"
42 #include "dcp.h"
43 #include "exceptions.h"
44 #include "interop_subtitle_asset.h"
45 #include "mono_picture_asset.h"
46 #include "mono_picture_frame.h"
47 #include "raw_convert.h"
48 #include "reel.h"
49 #include "reel_closed_caption_asset.h"
50 #include "reel_markers_asset.h"
51 #include "reel_picture_asset.h"
52 #include "reel_sound_asset.h"
53 #include "reel_subtitle_asset.h"
54 #include "smpte_subtitle_asset.h"
55 #include "stereo_picture_asset.h"
56 #include "stereo_picture_frame.h"
57 #include "verify.h"
58 #include "verify_j2k.h"
59 #include <xercesc/dom/DOMAttr.hpp>
60 #include <xercesc/dom/DOMDocument.hpp>
61 #include <xercesc/dom/DOMError.hpp>
62 #include <xercesc/dom/DOMErrorHandler.hpp>
63 #include <xercesc/dom/DOMException.hpp>
64 #include <xercesc/dom/DOMImplementation.hpp>
65 #include <xercesc/dom/DOMImplementationLS.hpp>
66 #include <xercesc/dom/DOMImplementationRegistry.hpp>
67 #include <xercesc/dom/DOMLSParser.hpp>
68 #include <xercesc/dom/DOMLocator.hpp>
69 #include <xercesc/dom/DOMNamedNodeMap.hpp>
70 #include <xercesc/dom/DOMNodeList.hpp>
71 #include <xercesc/framework/LocalFileInputSource.hpp>
72 #include <xercesc/framework/MemBufInputSource.hpp>
73 #include <xercesc/parsers/AbstractDOMParser.hpp>
74 #include <xercesc/parsers/XercesDOMParser.hpp>
75 #include <xercesc/sax/HandlerBase.hpp>
76 #include <xercesc/util/PlatformUtils.hpp>
77 #include <boost/algorithm/string.hpp>
78 #include <iostream>
79 #include <map>
80 #include <vector>
81
82
83 using std::list;
84 using std::vector;
85 using std::string;
86 using std::cout;
87 using std::map;
88 using std::max;
89 using std::set;
90 using std::shared_ptr;
91 using std::make_shared;
92 using boost::optional;
93 using boost::function;
94 using std::dynamic_pointer_cast;
95
96
97 using namespace dcp;
98 using namespace xercesc;
99
100
101 static
102 string
103 xml_ch_to_string (XMLCh const * a)
104 {
105         char* x = XMLString::transcode(a);
106         string const o(x);
107         XMLString::release(&x);
108         return o;
109 }
110
111
112 class XMLValidationError
113 {
114 public:
115         XMLValidationError (SAXParseException const & e)
116                 : _message (xml_ch_to_string(e.getMessage()))
117                 , _line (e.getLineNumber())
118                 , _column (e.getColumnNumber())
119                 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
120                 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
121         {
122
123         }
124
125         string message () const {
126                 return _message;
127         }
128
129         uint64_t line () const {
130                 return _line;
131         }
132
133         uint64_t column () const {
134                 return _column;
135         }
136
137         string public_id () const {
138                 return _public_id;
139         }
140
141         string system_id () const {
142                 return _system_id;
143         }
144
145 private:
146         string _message;
147         uint64_t _line;
148         uint64_t _column;
149         string _public_id;
150         string _system_id;
151 };
152
153
154 class DCPErrorHandler : public ErrorHandler
155 {
156 public:
157         void warning(const SAXParseException& e)
158         {
159                 maybe_add (XMLValidationError(e));
160         }
161
162         void error(const SAXParseException& e)
163         {
164                 maybe_add (XMLValidationError(e));
165         }
166
167         void fatalError(const SAXParseException& e)
168         {
169                 maybe_add (XMLValidationError(e));
170         }
171
172         void resetErrors() {
173                 _errors.clear ();
174         }
175
176         list<XMLValidationError> errors () const {
177                 return _errors;
178         }
179
180 private:
181         void maybe_add (XMLValidationError e)
182         {
183                 /* XXX: nasty hack */
184                 if (
185                         e.message().find("schema document") != string::npos &&
186                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
187                         ) {
188                         return;
189                 }
190
191                 _errors.push_back (e);
192         }
193
194         list<XMLValidationError> _errors;
195 };
196
197
198 class StringToXMLCh
199 {
200 public:
201         StringToXMLCh (string a)
202         {
203                 _buffer = XMLString::transcode(a.c_str());
204         }
205
206         StringToXMLCh (StringToXMLCh const&) = delete;
207         StringToXMLCh& operator= (StringToXMLCh const&) = delete;
208
209         ~StringToXMLCh ()
210         {
211                 XMLString::release (&_buffer);
212         }
213
214         XMLCh const * get () const {
215                 return _buffer;
216         }
217
218 private:
219         XMLCh* _buffer;
220 };
221
222
223 class LocalFileResolver : public EntityResolver
224 {
225 public:
226         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
227                 : _xsd_dtd_directory (xsd_dtd_directory)
228         {
229                 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
230                  * found without being here.
231                  */
232                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
233                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
234                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
235                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
236                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
237                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
238                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
239                 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
240                 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
241                 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd");
242                 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
243                 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
244                 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
245         }
246
247         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
248         {
249                 if (!system_id) {
250                         return 0;
251                 }
252                 auto system_id_str = xml_ch_to_string (system_id);
253                 auto p = _xsd_dtd_directory;
254                 if (_files.find(system_id_str) == _files.end()) {
255                         p /= system_id_str;
256                 } else {
257                         p /= _files[system_id_str];
258                 }
259                 StringToXMLCh ch (p.string());
260                 return new LocalFileInputSource(ch.get());
261         }
262
263 private:
264         void add (string uri, string file)
265         {
266                 _files[uri] = file;
267         }
268
269         std::map<string, string> _files;
270         boost::filesystem::path _xsd_dtd_directory;
271 };
272
273
274 static void
275 parse (XercesDOMParser& parser, boost::filesystem::path xml)
276 {
277         parser.parse(xml.string().c_str());
278 }
279
280
281 static void
282 parse (XercesDOMParser& parser, string xml)
283 {
284         xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
285         parser.parse(buf);
286 }
287
288
289 template <class T>
290 void
291 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, vector<VerificationNote>& notes)
292 {
293         try {
294                 XMLPlatformUtils::Initialize ();
295         } catch (XMLException& e) {
296                 throw MiscError ("Failed to initialise xerces library");
297         }
298
299         DCPErrorHandler error_handler;
300
301         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
302         {
303                 XercesDOMParser parser;
304                 parser.setValidationScheme(XercesDOMParser::Val_Always);
305                 parser.setDoNamespaces(true);
306                 parser.setDoSchema(true);
307
308                 vector<string> schema;
309                 schema.push_back("xml.xsd");
310                 schema.push_back("xmldsig-core-schema.xsd");
311                 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
312                 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
313                 schema.push_back("SMPTE-429-9-2007-AM.xsd");
314                 schema.push_back("Main-Stereo-Picture-CPL.xsd");
315                 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
316                 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
317                 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
318                 schema.push_back("DCSubtitle.v1.mattsson.xsd");
319                 schema.push_back("DCDMSubtitle-2010.xsd");
320                 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
321                 schema.push_back("SMPTE-429-16.xsd");
322                 schema.push_back("Dolby-2012-AD.xsd");
323                 schema.push_back("SMPTE-429-10-2008.xsd");
324                 schema.push_back("xlink.xsd");
325                 schema.push_back("SMPTE-335-2012.xsd");
326                 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
327                 schema.push_back("isdcf-mca.xsd");
328                 schema.push_back("SMPTE-429-12-2008.xsd");
329
330                 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
331                  * Schemas that are not mentioned in this list are not read, and the things
332                  * they describe are not checked.
333                  */
334                 string locations;
335                 for (auto i: schema) {
336                         locations += String::compose("%1 %1 ", i, i);
337                 }
338
339                 parser.setExternalSchemaLocation(locations.c_str());
340                 parser.setValidationSchemaFullChecking(true);
341                 parser.setErrorHandler(&error_handler);
342
343                 LocalFileResolver resolver (xsd_dtd_directory);
344                 parser.setEntityResolver(&resolver);
345
346                 try {
347                         parser.resetDocumentPool();
348                         parse(parser, xml);
349                 } catch (XMLException& e) {
350                         throw MiscError(xml_ch_to_string(e.getMessage()));
351                 } catch (DOMException& e) {
352                         throw MiscError(xml_ch_to_string(e.getMessage()));
353                 } catch (...) {
354                         throw MiscError("Unknown exception from xerces");
355                 }
356         }
357
358         XMLPlatformUtils::Terminate ();
359
360         for (auto i: error_handler.errors()) {
361                 notes.push_back ({
362                         VerificationNote::Type::ERROR,
363                         VerificationNote::Code::INVALID_XML,
364                         i.message(),
365                         boost::trim_copy(i.public_id() + " " + i.system_id()),
366                         i.line()
367                 });
368         }
369 }
370
371
372 enum class VerifyAssetResult {
373         GOOD,
374         CPL_PKL_DIFFER,
375         BAD
376 };
377
378
379 static VerifyAssetResult
380 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
381 {
382         auto const actual_hash = reel_file_asset->asset_ref()->hash(progress);
383
384         auto pkls = dcp->pkls();
385         /* We've read this DCP in so it must have at least one PKL */
386         DCP_ASSERT (!pkls.empty());
387
388         auto asset = reel_file_asset->asset_ref().asset();
389
390         optional<string> pkl_hash;
391         for (auto i: pkls) {
392                 pkl_hash = i->hash (reel_file_asset->asset_ref()->id());
393                 if (pkl_hash) {
394                         break;
395                 }
396         }
397
398         DCP_ASSERT (pkl_hash);
399
400         auto cpl_hash = reel_file_asset->hash();
401         if (cpl_hash && *cpl_hash != *pkl_hash) {
402                 return VerifyAssetResult::CPL_PKL_DIFFER;
403         }
404
405         if (actual_hash != *pkl_hash) {
406                 return VerifyAssetResult::BAD;
407         }
408
409         return VerifyAssetResult::GOOD;
410 }
411
412
413 void
414 verify_language_tag (string tag, vector<VerificationNote>& notes)
415 {
416         try {
417                 LanguageTag test (tag);
418         } catch (LanguageTagError &) {
419                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, tag});
420         }
421 }
422
423
424 static void
425 verify_picture_asset (shared_ptr<const ReelFileAsset> reel_file_asset, boost::filesystem::path file, vector<VerificationNote>& notes, function<void (float)> progress)
426 {
427         int biggest_frame = 0;
428         auto asset = dynamic_pointer_cast<PictureAsset>(reel_file_asset->asset_ref().asset());
429         auto const duration = asset->intrinsic_duration ();
430
431         auto check_and_add = [&notes](vector<VerificationNote> const& j2k_notes) {
432                 for (auto i: j2k_notes) {
433                         if (find(notes.begin(), notes.end(), i) == notes.end()) {
434                                 notes.push_back (i);
435                         }
436                 }
437         };
438
439         if (auto mono_asset = dynamic_pointer_cast<MonoPictureAsset>(reel_file_asset->asset_ref().asset())) {
440                 auto reader = mono_asset->start_read ();
441                 for (int64_t i = 0; i < duration; ++i) {
442                         auto frame = reader->get_frame (i);
443                         biggest_frame = max(biggest_frame, frame->size());
444                         if (!mono_asset->encrypted() || mono_asset->key()) {
445                                 vector<VerificationNote> j2k_notes;
446                                 verify_j2k (frame, j2k_notes);
447                                 check_and_add (j2k_notes);
448                         }
449                         progress (float(i) / duration);
450                 }
451         } else if (auto stereo_asset = dynamic_pointer_cast<StereoPictureAsset>(asset)) {
452                 auto reader = stereo_asset->start_read ();
453                 for (int64_t i = 0; i < duration; ++i) {
454                         auto frame = reader->get_frame (i);
455                         biggest_frame = max(biggest_frame, max(frame->left()->size(), frame->right()->size()));
456                         if (!stereo_asset->encrypted() || mono_asset->key()) {
457                                 vector<VerificationNote> j2k_notes;
458                                 verify_j2k (frame->left(), j2k_notes);
459                                 verify_j2k (frame->right(), j2k_notes);
460                                 check_and_add (j2k_notes);
461                         }
462                         progress (float(i) / duration);
463                 }
464
465         }
466
467         static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
468         static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
469         if (biggest_frame > max_frame) {
470                 notes.push_back ({
471                         VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
472                 });
473         } else if (biggest_frame > risky_frame) {
474                 notes.push_back ({
475                         VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
476                 });
477         }
478 }
479
480
481 static void
482 verify_main_picture_asset (
483         shared_ptr<const DCP> dcp,
484         shared_ptr<const ReelPictureAsset> reel_asset,
485         function<void (string, optional<boost::filesystem::path>)> stage,
486         function<void (float)> progress,
487         vector<VerificationNote>& notes
488         )
489 {
490         auto asset = reel_asset->asset();
491         auto const file = *asset->file();
492         stage ("Checking picture asset hash", file);
493         auto const r = verify_asset (dcp, reel_asset, progress);
494         switch (r) {
495                 case VerifyAssetResult::BAD:
496                         notes.push_back ({
497                                 VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_PICTURE_HASH, file
498                         });
499                         break;
500                 case VerifyAssetResult::CPL_PKL_DIFFER:
501                         notes.push_back ({
502                                 VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_PICTURE_HASHES, file
503                         });
504                         break;
505                 default:
506                         break;
507         }
508         stage ("Checking picture frame sizes", asset->file());
509         verify_picture_asset (reel_asset, file, notes, progress);
510
511         /* Only flat/scope allowed by Bv2.1 */
512         if (
513                 asset->size() != Size(2048, 858) &&
514                 asset->size() != Size(1998, 1080) &&
515                 asset->size() != Size(4096, 1716) &&
516                 asset->size() != Size(3996, 2160)) {
517                 notes.push_back({
518                         VerificationNote::Type::BV21_ERROR,
519                         VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS,
520                         String::compose("%1x%2", asset->size().width, asset->size().height),
521                         file
522                 });
523         }
524
525         /* Only 24, 25, 48fps allowed for 2K */
526         if (
527                 (asset->size() == Size(2048, 858) || asset->size() == Size(1998, 1080)) &&
528                 (asset->edit_rate() != Fraction(24, 1) && asset->edit_rate() != Fraction(25, 1) && asset->edit_rate() != Fraction(48, 1))
529            ) {
530                 notes.push_back({
531                         VerificationNote::Type::BV21_ERROR,
532                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K,
533                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
534                         file
535                 });
536         }
537
538         if (asset->size() == Size(4096, 1716) || asset->size() == Size(3996, 2160)) {
539                 /* Only 24fps allowed for 4K */
540                 if (asset->edit_rate() != Fraction(24, 1)) {
541                         notes.push_back({
542                                 VerificationNote::Type::BV21_ERROR,
543                                 VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K,
544                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
545                                 file
546                         });
547                 }
548
549                 /* Only 2D allowed for 4K */
550                 if (dynamic_pointer_cast<const StereoPictureAsset>(asset)) {
551                         notes.push_back({
552                                 VerificationNote::Type::BV21_ERROR,
553                                 VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D,
554                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
555                                 file
556                         });
557
558                 }
559         }
560
561 }
562
563
564 static void
565 verify_main_sound_asset (
566         shared_ptr<const DCP> dcp,
567         shared_ptr<const ReelSoundAsset> reel_asset,
568         function<void (string, optional<boost::filesystem::path>)> stage,
569         function<void (float)> progress,
570         vector<VerificationNote>& notes
571         )
572 {
573         auto asset = reel_asset->asset();
574         stage ("Checking sound asset hash", asset->file());
575         auto const r = verify_asset (dcp, reel_asset, progress);
576         switch (r) {
577                 case VerifyAssetResult::BAD:
578                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_SOUND_HASH, *asset->file()});
579                         break;
580                 case VerifyAssetResult::CPL_PKL_DIFFER:
581                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_SOUND_HASHES, *asset->file()});
582                         break;
583                 default:
584                         break;
585         }
586
587         stage ("Checking sound asset metadata", asset->file());
588
589         verify_language_tag (asset->language(), notes);
590         if (asset->sampling_rate() != 48000) {
591                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SOUND_FRAME_RATE, raw_convert<string>(asset->sampling_rate()), *asset->file()});
592         }
593 }
594
595
596 static void
597 verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, vector<VerificationNote>& notes)
598 {
599         /* XXX: is Language compulsory? */
600         if (reel_asset->language()) {
601                 verify_language_tag (*reel_asset->language(), notes);
602         }
603
604         if (!reel_asset->entry_point()) {
605                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT, reel_asset->id() });
606         } else if (reel_asset->entry_point().get()) {
607                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT, reel_asset->id() });
608         }
609 }
610
611
612 static void
613 verify_closed_caption_reel (shared_ptr<const ReelClosedCaptionAsset> reel_asset, vector<VerificationNote>& notes)
614 {
615         /* XXX: is Language compulsory? */
616         if (reel_asset->language()) {
617                 verify_language_tag (*reel_asset->language(), notes);
618         }
619
620         if (!reel_asset->entry_point()) {
621                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
622         } else if (reel_asset->entry_point().get()) {
623                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
624         }
625 }
626
627
628 struct State
629 {
630         boost::optional<string> subtitle_language;
631 };
632
633
634 /** Verify stuff that is common to both subtitles and closed captions */
635 void
636 verify_smpte_timed_text_asset (
637         shared_ptr<const SMPTESubtitleAsset> asset,
638         vector<VerificationNote>& notes
639         )
640 {
641         if (asset->language()) {
642                 verify_language_tag (*asset->language(), notes);
643         } else {
644                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE, *asset->file() });
645         }
646
647         auto const size = boost::filesystem::file_size(asset->file().get());
648         if (size > 115 * 1024 * 1024) {
649                 notes.push_back (
650                         { VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES, raw_convert<string>(size), *asset->file() }
651                         );
652         }
653
654         /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB"
655          * but I'm hoping that checking for the total size of all fonts being <= 10MB will do.
656          */
657         auto fonts = asset->font_data ();
658         int total_size = 0;
659         for (auto i: fonts) {
660                 total_size += i.second.size();
661         }
662         if (total_size > 10 * 1024 * 1024) {
663                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES, raw_convert<string>(total_size), asset->file().get() });
664         }
665
666         if (!asset->start_time()) {
667                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_START_TIME, asset->file().get() });
668         } else if (asset->start_time() != Time()) {
669                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SUBTITLE_START_TIME, asset->file().get() });
670         }
671 }
672
673
674 /** Verify SMPTE subtitle-only stuff */
675 void
676 verify_smpte_subtitle_asset (
677         shared_ptr<const SMPTESubtitleAsset> asset,
678         vector<VerificationNote>& notes,
679         State& state
680         )
681 {
682         if (asset->language()) {
683                 if (!state.subtitle_language) {
684                         state.subtitle_language = *asset->language();
685                 } else if (state.subtitle_language != *asset->language()) {
686                         notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES });
687                 }
688         }
689 }
690
691
692 /** Verify all subtitle stuff */
693 static void
694 verify_subtitle_asset (
695         shared_ptr<const SubtitleAsset> asset,
696         function<void (string, optional<boost::filesystem::path>)> stage,
697         boost::filesystem::path xsd_dtd_directory,
698         vector<VerificationNote>& notes,
699         State& state
700         )
701 {
702         stage ("Checking subtitle XML", asset->file());
703         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
704          * gets passed through libdcp which may clean up and therefore hide errors.
705          */
706         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
707
708         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
709         if (smpte) {
710                 verify_smpte_timed_text_asset (smpte, notes);
711                 verify_smpte_subtitle_asset (smpte, notes, state);
712         }
713 }
714
715
716 /** Verify all closed caption stuff */
717 static void
718 verify_closed_caption_asset (
719         shared_ptr<const SubtitleAsset> asset,
720         function<void (string, optional<boost::filesystem::path>)> stage,
721         boost::filesystem::path xsd_dtd_directory,
722         vector<VerificationNote>& notes
723         )
724 {
725         stage ("Checking closed caption XML", asset->file());
726         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
727          * gets passed through libdcp which may clean up and therefore hide errors.
728          */
729         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
730
731         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
732         if (smpte) {
733                 verify_smpte_timed_text_asset (smpte, notes);
734         }
735
736         if (asset->raw_xml().size() > 256 * 1024) {
737                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES, raw_convert<string>(asset->raw_xml().size()), *asset->file()});
738         }
739 }
740
741
742 static
743 void
744 verify_text_timing (
745         vector<shared_ptr<Reel>> reels,
746         int edit_rate,
747         vector<VerificationNote>& notes,
748         std::function<bool (shared_ptr<Reel>)> check,
749         std::function<string (shared_ptr<Reel>)> xml,
750         std::function<int64_t (shared_ptr<Reel>)> duration
751         )
752 {
753         /* end of last subtitle (in editable units) */
754         optional<int64_t> last_out;
755         auto too_short = false;
756         auto too_close = false;
757         auto too_early = false;
758         auto reel_overlap = false;
759         /* current reel start time (in editable units) */
760         int64_t reel_offset = 0;
761
762         std::function<void (cxml::ConstNodePtr, optional<int>, optional<Time>, int, bool)> parse;
763         parse = [&parse, &last_out, &too_short, &too_close, &too_early, &reel_offset](cxml::ConstNodePtr node, optional<int> tcr, optional<Time> start_time, int er, bool first_reel) {
764                 if (node->name() == "Subtitle") {
765                         Time in (node->string_attribute("TimeIn"), tcr);
766                         if (start_time) {
767                                 in -= *start_time;
768                         }
769                         Time out (node->string_attribute("TimeOut"), tcr);
770                         if (start_time) {
771                                 out -= *start_time;
772                         }
773                         if (first_reel && tcr && in < Time(0, 0, 4, 0, *tcr)) {
774                                 too_early = true;
775                         }
776                         auto length = out - in;
777                         if (length.as_editable_units_ceil(er) < 15) {
778                                 too_short = true;
779                         }
780                         if (last_out) {
781                                 /* XXX: this feels dubious - is it really what Bv2.1 means? */
782                                 auto distance = reel_offset + in.as_editable_units_ceil(er) - *last_out;
783                                 if (distance >= 0 && distance < 2) {
784                                         too_close = true;
785                                 }
786                         }
787                         last_out = reel_offset + out.as_editable_units_floor(er);
788                 } else {
789                         for (auto i: node->node_children()) {
790                                 parse(i, tcr, start_time, er, first_reel);
791                         }
792                 }
793         };
794
795         for (auto i = 0U; i < reels.size(); ++i) {
796                 if (!check(reels[i])) {
797                         continue;
798                 }
799
800                 /* We need to look at <Subtitle> instances in the XML being checked, so we can't use the subtitles
801                  * read in by libdcp's parser.
802                  */
803
804                 shared_ptr<cxml::Document> doc;
805                 optional<int> tcr;
806                 optional<Time> start_time;
807                 try {
808                         doc = make_shared<cxml::Document>("SubtitleReel");
809                         doc->read_string (xml(reels[i]));
810                         tcr = doc->number_child<int>("TimeCodeRate");
811                         auto start_time_string = doc->optional_string_child("StartTime");
812                         if (start_time_string) {
813                                 start_time = Time(*start_time_string, tcr);
814                         }
815                 } catch (...) {
816                         doc = make_shared<cxml::Document>("DCSubtitle");
817                         doc->read_string (xml(reels[i]));
818                 }
819                 parse (doc, tcr, start_time, edit_rate, i == 0);
820                 auto end = reel_offset + duration(reels[i]);
821                 if (last_out && *last_out > end) {
822                         reel_overlap = true;
823                 }
824                 reel_offset = end;
825         }
826
827         if (last_out && *last_out > reel_offset) {
828                 reel_overlap = true;
829         }
830
831         if (too_early) {
832                 notes.push_back({
833                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME
834                 });
835         }
836
837         if (too_short) {
838                 notes.push_back ({
839                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_DURATION
840                 });
841         }
842
843         if (too_close) {
844                 notes.push_back ({
845                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_SPACING
846                 });
847         }
848
849         if (reel_overlap) {
850                 notes.push_back ({
851                         VerificationNote::Type::ERROR, VerificationNote::Code::SUBTITLE_OVERLAPS_REEL_BOUNDARY
852                 });
853         }
854 }
855
856
857 struct LinesCharactersResult
858 {
859         bool warning_length_exceeded = false;
860         bool error_length_exceeded = false;
861         bool line_count_exceeded = false;
862 };
863
864
865 static
866 void
867 verify_text_lines_and_characters (
868         shared_ptr<SubtitleAsset> asset,
869         int warning_length,
870         int error_length,
871         LinesCharactersResult* result
872         )
873 {
874         class Event
875         {
876         public:
877                 Event (Time time_, float position_, int characters_)
878                         : time (time_)
879                         , position (position_)
880                         , characters (characters_)
881                 {}
882
883                 Event (Time time_, shared_ptr<Event> start_)
884                         : time (time_)
885                         , start (start_)
886                 {}
887
888                 Time time;
889                 int position; //< position from 0 at top of screen to 100 at bottom
890                 int characters;
891                 shared_ptr<Event> start;
892         };
893
894         vector<shared_ptr<Event>> events;
895
896         auto position = [](shared_ptr<const SubtitleString> sub) {
897                 switch (sub->v_align()) {
898                 case VAlign::TOP:
899                         return lrintf(sub->v_position() * 100);
900                 case VAlign::CENTER:
901                         return lrintf((0.5f + sub->v_position()) * 100);
902                 case VAlign::BOTTOM:
903                         return lrintf((1.0f - sub->v_position()) * 100);
904                 }
905
906                 return 0L;
907         };
908
909         for (auto j: asset->subtitles()) {
910                 auto text = dynamic_pointer_cast<const SubtitleString>(j);
911                 if (text) {
912                         auto in = make_shared<Event>(text->in(), position(text), text->text().length());
913                         events.push_back(in);
914                         events.push_back(make_shared<Event>(text->out(), in));
915                 }
916         }
917
918         std::sort(events.begin(), events.end(), [](shared_ptr<Event> const& a, shared_ptr<Event>const& b) {
919                 return a->time < b->time;
920         });
921
922         map<int, int> current;
923         for (auto i: events) {
924                 if (current.size() > 3) {
925                         result->line_count_exceeded = true;
926                 }
927                 for (auto j: current) {
928                         if (j.second >= warning_length) {
929                                 result->warning_length_exceeded = true;
930                         }
931                         if (j.second >= error_length) {
932                                 result->error_length_exceeded = true;
933                         }
934                 }
935
936                 if (i->start) {
937                         /* end of a subtitle */
938                         DCP_ASSERT (current.find(i->start->position) != current.end());
939                         if (current[i->start->position] == i->start->characters) {
940                                 current.erase(i->start->position);
941                         } else {
942                                 current[i->start->position] -= i->start->characters;
943                         }
944                 } else {
945                         /* start of a subtitle */
946                         if (current.find(i->position) == current.end()) {
947                                 current[i->position] = i->characters;
948                         } else {
949                                 current[i->position] += i->characters;
950                         }
951                 }
952         }
953 }
954
955
956 static
957 void
958 verify_text_timing (vector<shared_ptr<Reel>> reels, vector<VerificationNote>& notes)
959 {
960         if (reels.empty()) {
961                 return;
962         }
963
964         if (reels[0]->main_subtitle()) {
965                 verify_text_timing (reels, reels[0]->main_subtitle()->edit_rate().numerator, notes,
966                         [](shared_ptr<Reel> reel) {
967                                 return static_cast<bool>(reel->main_subtitle());
968                         },
969                         [](shared_ptr<Reel> reel) {
970                                 return reel->main_subtitle()->asset()->raw_xml();
971                         },
972                         [](shared_ptr<Reel> reel) {
973                                 return reel->main_subtitle()->actual_duration();
974                         }
975                 );
976         }
977
978         for (auto i = 0U; i < reels[0]->closed_captions().size(); ++i) {
979                 verify_text_timing (reels, reels[0]->closed_captions()[i]->edit_rate().numerator, notes,
980                         [i](shared_ptr<Reel> reel) {
981                                 return i < reel->closed_captions().size();
982                         },
983                         [i](shared_ptr<Reel> reel) {
984                                 return reel->closed_captions()[i]->asset()->raw_xml();
985                         },
986                         [i](shared_ptr<Reel> reel) {
987                                 return reel->closed_captions()[i]->actual_duration();
988                         }
989                 );
990         }
991 }
992
993
994 void
995 verify_extension_metadata (shared_ptr<CPL> cpl, vector<VerificationNote>& notes)
996 {
997         DCP_ASSERT (cpl->file());
998         cxml::Document doc ("CompositionPlaylist");
999         doc.read_file (cpl->file().get());
1000
1001         auto missing = false;
1002         string malformed;
1003
1004         if (auto reel_list = doc.node_child("ReelList")) {
1005                 auto reels = reel_list->node_children("Reel");
1006                 if (!reels.empty()) {
1007                         if (auto asset_list = reels[0]->optional_node_child("AssetList")) {
1008                                 if (auto metadata = asset_list->optional_node_child("CompositionMetadataAsset")) {
1009                                         if (auto extension_list = metadata->optional_node_child("ExtensionMetadataList")) {
1010                                                 missing = true;
1011                                                 for (auto extension: extension_list->node_children("ExtensionMetadata")) {
1012                                                         if (extension->optional_string_attribute("scope").get_value_or("") != "http://isdcf.com/ns/cplmd/app") {
1013                                                                 continue;
1014                                                         }
1015                                                         missing = false;
1016                                                         if (auto name = extension->optional_node_child("Name")) {
1017                                                                 if (name->content() != "Application") {
1018                                                                         malformed = "<Name> should be 'Application'";
1019                                                                 }
1020                                                         }
1021                                                         if (auto property_list = extension->optional_node_child("PropertyList")) {
1022                                                                 if (auto property = property_list->optional_node_child("Property")) {
1023                                                                         if (auto name = property->optional_node_child("Name")) {
1024                                                                                 if (name->content() != "DCP Constraints Profile") {
1025                                                                                         malformed = "<Name> property should be 'DCP Constraints Profile'";
1026                                                                                 }
1027                                                                         }
1028                                                                         if (auto value = property->optional_node_child("Value")) {
1029                                                                                 if (value->content() != "SMPTE-RDD-52:2020-Bv2.1") {
1030                                                                                         malformed = "<Value> property should be 'SMPTE-RDD-52:2020-Bv2.1'";
1031                                                                                 }
1032                                                                         }
1033                                                                 }
1034                                                         }
1035                                                 }
1036                                         } else {
1037                                                 missing = true;
1038                                         }
1039                                 }
1040                         }
1041                 }
1042         }
1043
1044         if (missing) {
1045                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_EXTENSION_METADATA, cpl->id(), cpl->file().get()});
1046         } else if (!malformed.empty()) {
1047                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_EXTENSION_METADATA, malformed, cpl->file().get()});
1048         }
1049 }
1050
1051
1052 bool
1053 pkl_has_encrypted_assets (shared_ptr<DCP> dcp, shared_ptr<PKL> pkl)
1054 {
1055         vector<string> encrypted;
1056         for (auto i: dcp->cpls()) {
1057                 for (auto j: i->reel_file_assets()) {
1058                         if (j->asset_ref().resolved()) {
1059                                 /* It's a bit surprising / broken but Interop subtitle assets are represented
1060                                  * in reels by ReelSubtitleAsset which inherits ReelFileAsset, so it's possible for
1061                                  * ReelFileAssets to have assets which are not MXFs.
1062                                  */
1063                                 if (auto asset = dynamic_pointer_cast<MXF>(j->asset_ref().asset())) {
1064                                         if (asset->encrypted()) {
1065                                                 encrypted.push_back(j->asset_ref().id());
1066                                         }
1067                                 }
1068                         }
1069                 }
1070         }
1071
1072         for (auto i: pkl->asset_list()) {
1073                 if (find(encrypted.begin(), encrypted.end(), i->id()) != encrypted.end()) {
1074                         return true;
1075                 }
1076         }
1077
1078         return false;
1079 }
1080
1081
1082 vector<VerificationNote>
1083 dcp::verify (
1084         vector<boost::filesystem::path> directories,
1085         function<void (string, optional<boost::filesystem::path>)> stage,
1086         function<void (float)> progress,
1087         optional<boost::filesystem::path> xsd_dtd_directory
1088         )
1089 {
1090         if (!xsd_dtd_directory) {
1091                 xsd_dtd_directory = resources_directory() / "xsd";
1092         }
1093         *xsd_dtd_directory = boost::filesystem::canonical (*xsd_dtd_directory);
1094
1095         vector<VerificationNote> notes;
1096         State state{};
1097
1098         vector<shared_ptr<DCP>> dcps;
1099         for (auto i: directories) {
1100                 dcps.push_back (make_shared<DCP>(i));
1101         }
1102
1103         for (auto dcp: dcps) {
1104                 stage ("Checking DCP", dcp->directory());
1105                 bool carry_on = true;
1106                 try {
1107                         dcp->read (&notes);
1108                 } catch (MissingAssetmapError& e) {
1109                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1110                         carry_on = false;
1111                 } catch (ReadError& e) {
1112                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1113                 } catch (XMLError& e) {
1114                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1115                 } catch (MXFFileError& e) {
1116                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1117                 } catch (cxml::Error& e) {
1118                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1119                 }
1120
1121                 if (!carry_on) {
1122                         continue;
1123                 }
1124
1125                 if (dcp->standard() != Standard::SMPTE) {
1126                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_STANDARD});
1127                 }
1128
1129                 for (auto cpl: dcp->cpls()) {
1130                         stage ("Checking CPL", cpl->file());
1131                         validate_xml (cpl->file().get(), *xsd_dtd_directory, notes);
1132
1133                         if (cpl->any_encrypted() && !cpl->all_encrypted()) {
1134                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::PARTIALLY_ENCRYPTED});
1135                         }
1136
1137                         for (auto const& i: cpl->additional_subtitle_languages()) {
1138                                 verify_language_tag (i, notes);
1139                         }
1140
1141                         if (cpl->release_territory()) {
1142                                 if (!cpl->release_territory_scope() || cpl->release_territory_scope().get() != "http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata#scope/release-territory/UNM49") {
1143                                         auto terr = cpl->release_territory().get();
1144                                         /* Must be a valid region tag, or "001" */
1145                                         try {
1146                                                 LanguageTag::RegionSubtag test (terr);
1147                                         } catch (...) {
1148                                                 if (terr != "001") {
1149                                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, terr});
1150                                                 }
1151                                         }
1152                                 }
1153                         }
1154
1155                         if (dcp->standard() == Standard::SMPTE) {
1156                                 if (!cpl->annotation_text()) {
1157                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1158                                 } else if (cpl->annotation_text().get() != cpl->content_title_text()) {
1159                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1160                                 }
1161                         }
1162
1163                         for (auto i: dcp->pkls()) {
1164                                 /* Check that the CPL's hash corresponds to the PKL */
1165                                 optional<string> h = i->hash(cpl->id());
1166                                 if (h && make_digest(ArrayData(*cpl->file())) != *h) {
1167                                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_CPL_HASHES, cpl->id(), cpl->file().get()});
1168                                 }
1169
1170                                 /* Check that any PKL with a single CPL has its AnnotationText the same as the CPL's ContentTitleText */
1171                                 optional<string> required_annotation_text;
1172                                 for (auto j: i->asset_list()) {
1173                                         /* See if this is a CPL */
1174                                         for (auto k: dcp->cpls()) {
1175                                                 if (j->id() == k->id()) {
1176                                                         if (!required_annotation_text) {
1177                                                                 /* First CPL we have found; this is the required AnnotationText unless we find another */
1178                                                                 required_annotation_text = cpl->content_title_text();
1179                                                         } else {
1180                                                                 /* There's more than one CPL so we don't care what the PKL's AnnotationText is */
1181                                                                 required_annotation_text = boost::none;
1182                                                         }
1183                                                 }
1184                                         }
1185                                 }
1186
1187                                 if (required_annotation_text && i->annotation_text() != required_annotation_text) {
1188                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL, i->id(), i->file().get()});
1189                                 }
1190                         }
1191
1192                         /* set to true if any reel has a MainSubtitle */
1193                         auto have_main_subtitle = false;
1194                         /* set to true if any reel has no MainSubtitle */
1195                         auto have_no_main_subtitle = false;
1196                         /* fewest number of closed caption assets seen in a reel */
1197                         size_t fewest_closed_captions = SIZE_MAX;
1198                         /* most number of closed caption assets seen in a reel */
1199                         size_t most_closed_captions = 0;
1200                         map<Marker, Time> markers_seen;
1201
1202                         for (auto reel: cpl->reels()) {
1203                                 stage ("Checking reel", optional<boost::filesystem::path>());
1204
1205                                 for (auto i: reel->assets()) {
1206                                         if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1207                                                 notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_DURATION, i->id()});
1208                                         }
1209                                         if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1210                                                 notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_INTRINSIC_DURATION, i->id()});
1211                                         }
1212                                         auto file_asset = dynamic_pointer_cast<ReelFileAsset>(i);
1213                                         if (file_asset && !file_asset->hash()) {
1214                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_HASH, i->id()});
1215                                         }
1216                                 }
1217
1218                                 if (dcp->standard() == Standard::SMPTE) {
1219                                         boost::optional<int64_t> duration;
1220                                         for (auto i: reel->assets()) {
1221                                                 if (!duration) {
1222                                                         duration = i->actual_duration();
1223                                                 } else if (*duration != i->actual_duration()) {
1224                                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_ASSET_DURATION});
1225                                                         break;
1226                                                 }
1227                                         }
1228                                 }
1229
1230                                 if (reel->main_picture()) {
1231                                         /* Check reel stuff */
1232                                         auto const frame_rate = reel->main_picture()->frame_rate();
1233                                         if (frame_rate.denominator != 1 ||
1234                                             (frame_rate.numerator != 24 &&
1235                                              frame_rate.numerator != 25 &&
1236                                              frame_rate.numerator != 30 &&
1237                                              frame_rate.numerator != 48 &&
1238                                              frame_rate.numerator != 50 &&
1239                                              frame_rate.numerator != 60 &&
1240                                              frame_rate.numerator != 96)) {
1241                                                 notes.push_back ({
1242                                                         VerificationNote::Type::ERROR,
1243                                                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE,
1244                                                         String::compose("%1/%2", frame_rate.numerator, frame_rate.denominator)
1245                                                 });
1246                                         }
1247                                         /* Check asset */
1248                                         if (reel->main_picture()->asset_ref().resolved()) {
1249                                                 verify_main_picture_asset (dcp, reel->main_picture(), stage, progress, notes);
1250                                         }
1251                                 }
1252
1253                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
1254                                         verify_main_sound_asset (dcp, reel->main_sound(), stage, progress, notes);
1255                                 }
1256
1257                                 if (reel->main_subtitle()) {
1258                                         verify_main_subtitle_reel (reel->main_subtitle(), notes);
1259                                         if (reel->main_subtitle()->asset_ref().resolved()) {
1260                                                 verify_subtitle_asset (reel->main_subtitle()->asset(), stage, *xsd_dtd_directory, notes, state);
1261                                         }
1262                                         have_main_subtitle = true;
1263                                 } else {
1264                                         have_no_main_subtitle = true;
1265                                 }
1266
1267                                 for (auto i: reel->closed_captions()) {
1268                                         verify_closed_caption_reel (i, notes);
1269                                         if (i->asset_ref().resolved()) {
1270                                                 verify_closed_caption_asset (i->asset(), stage, *xsd_dtd_directory, notes);
1271                                         }
1272                                 }
1273
1274                                 if (reel->main_markers()) {
1275                                         for (auto const& i: reel->main_markers()->get()) {
1276                                                 markers_seen.insert (i);
1277                                         }
1278                                 }
1279
1280                                 fewest_closed_captions = std::min (fewest_closed_captions, reel->closed_captions().size());
1281                                 most_closed_captions = std::max (most_closed_captions, reel->closed_captions().size());
1282                         }
1283
1284                         verify_text_timing (cpl->reels(), notes);
1285
1286                         if (dcp->standard() == Standard::SMPTE) {
1287
1288                                 if (have_main_subtitle && have_no_main_subtitle) {
1289                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS});
1290                                 }
1291
1292                                 if (fewest_closed_captions != most_closed_captions) {
1293                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS});
1294                                 }
1295
1296                                 if (cpl->content_kind() == ContentKind::FEATURE) {
1297                                         if (markers_seen.find(Marker::FFEC) == markers_seen.end()) {
1298                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFEC_IN_FEATURE});
1299                                         }
1300                                         if (markers_seen.find(Marker::FFMC) == markers_seen.end()) {
1301                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFMC_IN_FEATURE});
1302                                         }
1303                                 }
1304
1305                                 auto ffoc = markers_seen.find(Marker::FFOC);
1306                                 if (ffoc == markers_seen.end()) {
1307                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_FFOC});
1308                                 } else if (ffoc->second.e != 1) {
1309                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_FFOC, raw_convert<string>(ffoc->second.e)});
1310                                 }
1311
1312                                 auto lfoc = markers_seen.find(Marker::LFOC);
1313                                 if (lfoc == markers_seen.end()) {
1314                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_LFOC});
1315                                 } else {
1316                                         auto lfoc_time = lfoc->second.as_editable_units_ceil(lfoc->second.tcr);
1317                                         if (lfoc_time != (cpl->reels().back()->duration() - 1)) {
1318                                                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_LFOC, raw_convert<string>(lfoc_time)});
1319                                         }
1320                                 }
1321
1322                                 LinesCharactersResult result;
1323                                 for (auto reel: cpl->reels()) {
1324                                         if (reel->main_subtitle() && reel->main_subtitle()->asset()) {
1325                                                 verify_text_lines_and_characters (reel->main_subtitle()->asset(), 52, 79, &result);
1326                                         }
1327                                 }
1328
1329                                 if (result.line_count_exceeded) {
1330                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT});
1331                                 }
1332                                 if (result.error_length_exceeded) {
1333                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH});
1334                                 } else if (result.warning_length_exceeded) {
1335                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH});
1336                                 }
1337
1338                                 result = LinesCharactersResult();
1339                                 for (auto reel: cpl->reels()) {
1340                                         for (auto i: reel->closed_captions()) {
1341                                                 if (i->asset()) {
1342                                                         verify_text_lines_and_characters (i->asset(), 32, 32, &result);
1343                                                 }
1344                                         }
1345                                 }
1346
1347                                 if (result.line_count_exceeded) {
1348                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT});
1349                                 }
1350                                 if (result.error_length_exceeded) {
1351                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH});
1352                                 }
1353
1354                                 if (!cpl->full_content_title_text()) {
1355                                         /* Since FullContentTitleText is assumed always to exist if there's a CompositionMetadataAsset we
1356                                          * can use it as a proxy for CompositionMetadataAsset's existence.
1357                                          */
1358                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA, cpl->id(), cpl->file().get()});
1359                                 } else if (!cpl->version_number()) {
1360                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER, cpl->id(), cpl->file().get()});
1361                                 }
1362
1363                                 verify_extension_metadata (cpl, notes);
1364
1365                                 if (cpl->any_encrypted()) {
1366                                         cxml::Document doc ("CompositionPlaylist");
1367                                         DCP_ASSERT (cpl->file());
1368                                         doc.read_file (cpl->file().get());
1369                                         if (!doc.optional_node_child("Signature")) {
1370                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT, cpl->id(), cpl->file().get()});
1371                                         }
1372                                 }
1373                         }
1374                 }
1375
1376                 for (auto pkl: dcp->pkls()) {
1377                         stage ("Checking PKL", pkl->file());
1378                         validate_xml (pkl->file().get(), *xsd_dtd_directory, notes);
1379                         if (pkl_has_encrypted_assets(dcp, pkl)) {
1380                                 cxml::Document doc ("PackingList");
1381                                 doc.read_file (pkl->file().get());
1382                                 if (!doc.optional_node_child("Signature")) {
1383                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT, pkl->id(), pkl->file().get()});
1384                                 }
1385                         }
1386                 }
1387
1388                 if (dcp->asset_map_path()) {
1389                         stage ("Checking ASSETMAP", dcp->asset_map_path().get());
1390                         validate_xml (dcp->asset_map_path().get(), *xsd_dtd_directory, notes);
1391                 } else {
1392                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_ASSETMAP});
1393                 }
1394         }
1395
1396         return notes;
1397 }
1398
1399
1400 string
1401 dcp::note_to_string (VerificationNote note)
1402 {
1403         /** These strings should say what is wrong, incorporating any extra details (ID, filenames etc.).
1404          *
1405          *  e.g. "ClosedCaption asset has no <EntryPoint> tag.",
1406          *  not "ClosedCaption assets must have an <EntryPoint> tag."
1407          *
1408          *  It's OK to use XML tag names where they are clear.
1409          *  If both ID and filename are available, use only the ID.
1410          *  End messages with a full stop.
1411          *  Messages should not mention whether or not their errors are a part of Bv2.1.
1412          */
1413         switch (note.code()) {
1414         case VerificationNote::Code::FAILED_READ:
1415                 return *note.note();
1416         case VerificationNote::Code::MISMATCHED_CPL_HASHES:
1417                 return String::compose("The hash of the CPL %1 in the PKL does not agree with the CPL file.", note.note().get());
1418         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE:
1419                 return String::compose("The picture in a reel has an invalid frame rate %1.", note.note().get());
1420         case VerificationNote::Code::INCORRECT_PICTURE_HASH:
1421                 return String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
1422         case VerificationNote::Code::MISMATCHED_PICTURE_HASHES:
1423                 return String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
1424         case VerificationNote::Code::INCORRECT_SOUND_HASH:
1425                 return String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
1426         case VerificationNote::Code::MISMATCHED_SOUND_HASHES:
1427                 return String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
1428         case VerificationNote::Code::EMPTY_ASSET_PATH:
1429                 return "The asset map contains an empty asset path.";
1430         case VerificationNote::Code::MISSING_ASSET:
1431                 return String::compose("The file %1 for an asset in the asset map cannot be found.", note.file()->filename());
1432         case VerificationNote::Code::MISMATCHED_STANDARD:
1433                 return "The DCP contains both SMPTE and Interop parts.";
1434         case VerificationNote::Code::INVALID_XML:
1435                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
1436         case VerificationNote::Code::MISSING_ASSETMAP:
1437                 return "No ASSETMAP or ASSETMAP.xml was found.";
1438         case VerificationNote::Code::INVALID_INTRINSIC_DURATION:
1439                 return String::compose("The intrinsic duration of the asset %1 is less than 1 second.", note.note().get());
1440         case VerificationNote::Code::INVALID_DURATION:
1441                 return String::compose("The duration of the asset %1 is less than 1 second.", note.note().get());
1442         case VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1443                 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
1444         case VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1445                 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
1446         case VerificationNote::Code::EXTERNAL_ASSET:
1447                 return String::compose("The asset %1 that this DCP refers to is not included in the DCP.  It may be a VF.", note.note().get());
1448         case VerificationNote::Code::INVALID_STANDARD:
1449                 return "This DCP does not use the SMPTE standard.";
1450         case VerificationNote::Code::INVALID_LANGUAGE:
1451                 return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
1452         case VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS:
1453                 return String::compose("The size %1 of picture asset %2 is not allowed.", note.note().get(), note.file()->filename());
1454         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K:
1455                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 2K DCPs.", note.note().get(), note.file()->filename());
1456         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K:
1457                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 4K DCPs.", note.note().get(), note.file()->filename());
1458         case VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D:
1459                 return "3D 4K DCPs are not allowed.";
1460         case VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES:
1461                 return String::compose("The size %1 of the closed caption asset %2 is larger than the 256KB maximum.", note.note().get(), note.file()->filename());
1462         case VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES:
1463                 return String::compose("The size %1 of the timed text asset %2 is larger than the 115MB maximum.", note.note().get(), note.file()->filename());
1464         case VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES:
1465                 return String::compose("The size %1 of the fonts in timed text asset %2 is larger than the 10MB maximum.", note.note().get(), note.file()->filename());
1466         case VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE:
1467                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <Language> tag.", note.file()->filename());
1468         case VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES:
1469                 return "Some subtitle assets have different <Language> tags than others";
1470         case VerificationNote::Code::MISSING_SUBTITLE_START_TIME:
1471                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <StartTime> tag.", note.file()->filename());
1472         case VerificationNote::Code::INVALID_SUBTITLE_START_TIME:
1473                 return String::compose("The XML for a SMPTE subtitle asset %1 has a non-zero <StartTime> tag.", note.file()->filename());
1474         case VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME:
1475                 return "The first subtitle or closed caption is less than 4 seconds from the start of the DCP.";
1476         case VerificationNote::Code::INVALID_SUBTITLE_DURATION:
1477                 return "At least one subtitle lasts less than 15 frames.";
1478         case VerificationNote::Code::INVALID_SUBTITLE_SPACING:
1479                 return "At least one pair of subtitles is separated by less than 2 frames.";
1480         case VerificationNote::Code::SUBTITLE_OVERLAPS_REEL_BOUNDARY:
1481                 return "At least one subtitle extends outside of its reel.";
1482         case VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT:
1483                 return "There are more than 3 subtitle lines in at least one place in the DCP.";
1484         case VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH:
1485                 return "There are more than 52 characters in at least one subtitle line.";
1486         case VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH:
1487                 return "There are more than 79 characters in at least one subtitle line.";
1488         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT:
1489                 return "There are more than 3 closed caption lines in at least one place.";
1490         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH:
1491                 return "There are more than 32 characters in at least one closed caption line.";
1492         case VerificationNote::Code::INVALID_SOUND_FRAME_RATE:
1493                 return String::compose("The sound asset %1 has a sampling rate of %2", note.file()->filename(), note.note().get());
1494         case VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT:
1495                 return String::compose("The CPL %1 has no <AnnotationText> tag.", note.note().get());
1496         case VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT:
1497                 return String::compose("The CPL %1 has an <AnnotationText> which differs from its <ContentTitleText>", note.note().get());
1498         case VerificationNote::Code::MISMATCHED_ASSET_DURATION:
1499                 return "All assets in a reel do not have the same duration.";
1500         case VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS:
1501                 return "At least one reel contains a subtitle asset, but some reel(s) do not";
1502         case VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS:
1503                 return "At least one reel has closed captions, but reels have different numbers of closed caption assets.";
1504         case VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT:
1505                 return String::compose("The subtitle asset %1 has no <EntryPoint> tag.", note.note().get());
1506         case VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT:
1507                 return String::compose("The subtitle asset %1 has an <EntryPoint> other than 0.", note.note().get());
1508         case VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT:
1509                 return String::compose("The closed caption asset %1 has no <EntryPoint> tag.", note.note().get());
1510         case VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT:
1511                 return String::compose("The closed caption asset %1 has an <EntryPoint> other than 0.", note.note().get());
1512         case VerificationNote::Code::MISSING_HASH:
1513                 return String::compose("The asset %1 has no <Hash> tag in the CPL.", note.note().get());
1514         case VerificationNote::Code::MISSING_FFEC_IN_FEATURE:
1515                 return "The DCP is marked as a Feature but there is no FFEC (first frame of end credits) marker";
1516         case VerificationNote::Code::MISSING_FFMC_IN_FEATURE:
1517                 return "The DCP is marked as a Feature but there is no FFMC (first frame of moving credits) marker";
1518         case VerificationNote::Code::MISSING_FFOC:
1519                 return "There should be a FFOC (first frame of content) marker";
1520         case VerificationNote::Code::MISSING_LFOC:
1521                 return "There should be a LFOC (last frame of content) marker";
1522         case VerificationNote::Code::INCORRECT_FFOC:
1523                 return String::compose("The FFOC marker is %1 instead of 1", note.note().get());
1524         case VerificationNote::Code::INCORRECT_LFOC:
1525                 return String::compose("The LFOC marker is %1 instead of 1 less than the duration of the last reel.", note.note().get());
1526         case VerificationNote::Code::MISSING_CPL_METADATA:
1527                 return String::compose("The CPL %1 has no <CompositionMetadataAsset> tag.", note.note().get());
1528         case VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER:
1529                 return String::compose("The CPL %1 has no <VersionNumber> in its <CompositionMetadataAsset>.", note.note().get());
1530         case VerificationNote::Code::MISSING_EXTENSION_METADATA:
1531                 return String::compose("The CPL %1 has no <ExtensionMetadata> in its <CompositionMetadataAsset>.", note.note().get());
1532         case VerificationNote::Code::INVALID_EXTENSION_METADATA:
1533                 return String::compose("The CPL %1 has a malformed <ExtensionMetadata> (%2).", note.file()->filename(), note.note().get());
1534         case VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT:
1535                 return String::compose("The CPL %1, which has encrypted content, is not signed.", note.note().get());
1536         case VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT:
1537                 return String::compose("The PKL %1, which has encrypted content, is not signed.", note.note().get());
1538         case VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL:
1539                 return String::compose("The PKL %1 has only one CPL but its <AnnotationText> does not match the CPL's <ContentTitleText>.", note.note().get());
1540         case VerificationNote::Code::PARTIALLY_ENCRYPTED:
1541                 return "Some assets are encrypted but some are not.";
1542         case VerificationNote::Code::INVALID_JPEG2000_CODESTREAM:
1543                 return String::compose("The JPEG2000 codestream for at least one frame is invalid (%1)", note.note().get());
1544         case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_2K:
1545                 return String::compose("The JPEG2000 codestream uses %1 guard bits in a 2K image instead of 1.", note.note().get());
1546         case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_4K:
1547                 return String::compose("The JPEG2000 codestream uses %1 guard bits in a 4K image instead of 2.", note.note().get());
1548         case VerificationNote::Code::INVALID_JPEG2000_TILE_SIZE:
1549                 return "The JPEG2000 tile size is not the same as the image size.";
1550         case VerificationNote::Code::INVALID_JPEG2000_CODE_BLOCK_WIDTH:
1551                 return String::compose("The JPEG2000 codestream uses a code block width of %1 instead of 32.", note.note().get());
1552         case VerificationNote::Code::INVALID_JPEG2000_CODE_BLOCK_HEIGHT:
1553                 return String::compose("The JPEG2000 codestream uses a code block height of %1 instead of 32.", note.note().get());
1554         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER_COUNT_FOR_2K:
1555                 return String::compose("%1 POC markers found in 2K JPEG2000 codestream instead of 0.", note.note().get());
1556         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER_COUNT_FOR_4K:
1557                 return String::compose("%1 POC markers found in 4K JPEG2000 codestream instead of 1.", note.note().get());
1558         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER:
1559                 return String::compose("Incorrect POC marker content found (%1)", note.note().get());
1560         case VerificationNote::Code::INVALID_JPEG2000_POC_MARKER_LOCATION:
1561                 return "POC marker found outside main header";
1562         case VerificationNote::Code::INVALID_JPEG2000_TILE_PARTS_FOR_2K:
1563                 return String::compose("The JPEG2000 codestream has %1 tile parts in a 2K image instead of 3.", note.note().get());
1564         case VerificationNote::Code::INVALID_JPEG2000_TILE_PARTS_FOR_4K:
1565                 return String::compose("The JPEG2000 codestream has %1 tile parts in a 4K image instead of 6.", note.note().get());
1566         case VerificationNote::Code::MISSING_JPEG200_TLM_MARKER:
1567                 return "No TLM marker was found in a JPEG2000 codestream.";
1568         }
1569
1570         return "";
1571 }
1572
1573
1574 bool
1575 dcp::operator== (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
1576 {
1577         return a.type() == b.type() && a.code() == b.code() && a.note() == b.note() && a.file() == b.file() && a.line() == b.line();
1578 }
1579
1580
1581 std::ostream&
1582 dcp::operator<< (std::ostream& s, dcp::VerificationNote const& note)
1583 {
1584         s << note_to_string (note);
1585         if (note.note()) {
1586                 s << " [" << note.note().get() << "]";
1587         }
1588         if (note.file()) {
1589                 s << " [" << note.file().get() << "]";
1590         }
1591         if (note.line()) {
1592                 s << " [" << note.line().get() << "]";
1593         }
1594         return s;
1595 }
1596