Call verify_j2k when verifying DCPs.
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2021 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34
35 /** @file  src/verify.cc
36  *  @brief dcp::verify() method and associated code
37  */
38
39
40 #include "compose.hpp"
41 #include "cpl.h"
42 #include "dcp.h"
43 #include "exceptions.h"
44 #include "interop_subtitle_asset.h"
45 #include "mono_picture_asset.h"
46 #include "mono_picture_frame.h"
47 #include "raw_convert.h"
48 #include "reel.h"
49 #include "reel_closed_caption_asset.h"
50 #include "reel_markers_asset.h"
51 #include "reel_picture_asset.h"
52 #include "reel_sound_asset.h"
53 #include "reel_subtitle_asset.h"
54 #include "smpte_subtitle_asset.h"
55 #include "stereo_picture_asset.h"
56 #include "stereo_picture_frame.h"
57 #include "verify.h"
58 #include "verify_j2k.h"
59 #include <xercesc/dom/DOMAttr.hpp>
60 #include <xercesc/dom/DOMDocument.hpp>
61 #include <xercesc/dom/DOMError.hpp>
62 #include <xercesc/dom/DOMErrorHandler.hpp>
63 #include <xercesc/dom/DOMException.hpp>
64 #include <xercesc/dom/DOMImplementation.hpp>
65 #include <xercesc/dom/DOMImplementationLS.hpp>
66 #include <xercesc/dom/DOMImplementationRegistry.hpp>
67 #include <xercesc/dom/DOMLSParser.hpp>
68 #include <xercesc/dom/DOMLocator.hpp>
69 #include <xercesc/dom/DOMNamedNodeMap.hpp>
70 #include <xercesc/dom/DOMNodeList.hpp>
71 #include <xercesc/framework/LocalFileInputSource.hpp>
72 #include <xercesc/framework/MemBufInputSource.hpp>
73 #include <xercesc/parsers/AbstractDOMParser.hpp>
74 #include <xercesc/parsers/XercesDOMParser.hpp>
75 #include <xercesc/sax/HandlerBase.hpp>
76 #include <xercesc/util/PlatformUtils.hpp>
77 #include <boost/algorithm/string.hpp>
78 #include <iostream>
79 #include <map>
80 #include <vector>
81
82
83 using std::list;
84 using std::vector;
85 using std::string;
86 using std::cout;
87 using std::map;
88 using std::max;
89 using std::set;
90 using std::shared_ptr;
91 using std::make_shared;
92 using boost::optional;
93 using boost::function;
94 using std::dynamic_pointer_cast;
95
96
97 using namespace dcp;
98 using namespace xercesc;
99
100
101 static
102 string
103 xml_ch_to_string (XMLCh const * a)
104 {
105         char* x = XMLString::transcode(a);
106         string const o(x);
107         XMLString::release(&x);
108         return o;
109 }
110
111
112 class XMLValidationError
113 {
114 public:
115         XMLValidationError (SAXParseException const & e)
116                 : _message (xml_ch_to_string(e.getMessage()))
117                 , _line (e.getLineNumber())
118                 , _column (e.getColumnNumber())
119                 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
120                 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
121         {
122
123         }
124
125         string message () const {
126                 return _message;
127         }
128
129         uint64_t line () const {
130                 return _line;
131         }
132
133         uint64_t column () const {
134                 return _column;
135         }
136
137         string public_id () const {
138                 return _public_id;
139         }
140
141         string system_id () const {
142                 return _system_id;
143         }
144
145 private:
146         string _message;
147         uint64_t _line;
148         uint64_t _column;
149         string _public_id;
150         string _system_id;
151 };
152
153
154 class DCPErrorHandler : public ErrorHandler
155 {
156 public:
157         void warning(const SAXParseException& e)
158         {
159                 maybe_add (XMLValidationError(e));
160         }
161
162         void error(const SAXParseException& e)
163         {
164                 maybe_add (XMLValidationError(e));
165         }
166
167         void fatalError(const SAXParseException& e)
168         {
169                 maybe_add (XMLValidationError(e));
170         }
171
172         void resetErrors() {
173                 _errors.clear ();
174         }
175
176         list<XMLValidationError> errors () const {
177                 return _errors;
178         }
179
180 private:
181         void maybe_add (XMLValidationError e)
182         {
183                 /* XXX: nasty hack */
184                 if (
185                         e.message().find("schema document") != string::npos &&
186                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
187                         ) {
188                         return;
189                 }
190
191                 _errors.push_back (e);
192         }
193
194         list<XMLValidationError> _errors;
195 };
196
197
198 class StringToXMLCh
199 {
200 public:
201         StringToXMLCh (string a)
202         {
203                 _buffer = XMLString::transcode(a.c_str());
204         }
205
206         StringToXMLCh (StringToXMLCh const&) = delete;
207         StringToXMLCh& operator= (StringToXMLCh const&) = delete;
208
209         ~StringToXMLCh ()
210         {
211                 XMLString::release (&_buffer);
212         }
213
214         XMLCh const * get () const {
215                 return _buffer;
216         }
217
218 private:
219         XMLCh* _buffer;
220 };
221
222
223 class LocalFileResolver : public EntityResolver
224 {
225 public:
226         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
227                 : _xsd_dtd_directory (xsd_dtd_directory)
228         {
229                 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
230                  * found without being here.
231                  */
232                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
233                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
234                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
235                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
236                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
237                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
238                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
239                 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
240                 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
241                 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd");
242                 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
243                 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
244                 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
245         }
246
247         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
248         {
249                 if (!system_id) {
250                         return 0;
251                 }
252                 auto system_id_str = xml_ch_to_string (system_id);
253                 auto p = _xsd_dtd_directory;
254                 if (_files.find(system_id_str) == _files.end()) {
255                         p /= system_id_str;
256                 } else {
257                         p /= _files[system_id_str];
258                 }
259                 StringToXMLCh ch (p.string());
260                 return new LocalFileInputSource(ch.get());
261         }
262
263 private:
264         void add (string uri, string file)
265         {
266                 _files[uri] = file;
267         }
268
269         std::map<string, string> _files;
270         boost::filesystem::path _xsd_dtd_directory;
271 };
272
273
274 static void
275 parse (XercesDOMParser& parser, boost::filesystem::path xml)
276 {
277         parser.parse(xml.string().c_str());
278 }
279
280
281 static void
282 parse (XercesDOMParser& parser, string xml)
283 {
284         xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
285         parser.parse(buf);
286 }
287
288
289 template <class T>
290 void
291 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, vector<VerificationNote>& notes)
292 {
293         try {
294                 XMLPlatformUtils::Initialize ();
295         } catch (XMLException& e) {
296                 throw MiscError ("Failed to initialise xerces library");
297         }
298
299         DCPErrorHandler error_handler;
300
301         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
302         {
303                 XercesDOMParser parser;
304                 parser.setValidationScheme(XercesDOMParser::Val_Always);
305                 parser.setDoNamespaces(true);
306                 parser.setDoSchema(true);
307
308                 vector<string> schema;
309                 schema.push_back("xml.xsd");
310                 schema.push_back("xmldsig-core-schema.xsd");
311                 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
312                 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
313                 schema.push_back("SMPTE-429-9-2007-AM.xsd");
314                 schema.push_back("Main-Stereo-Picture-CPL.xsd");
315                 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
316                 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
317                 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
318                 schema.push_back("DCSubtitle.v1.mattsson.xsd");
319                 schema.push_back("DCDMSubtitle-2010.xsd");
320                 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
321                 schema.push_back("SMPTE-429-16.xsd");
322                 schema.push_back("Dolby-2012-AD.xsd");
323                 schema.push_back("SMPTE-429-10-2008.xsd");
324                 schema.push_back("xlink.xsd");
325                 schema.push_back("SMPTE-335-2012.xsd");
326                 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
327                 schema.push_back("isdcf-mca.xsd");
328                 schema.push_back("SMPTE-429-12-2008.xsd");
329
330                 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
331                  * Schemas that are not mentioned in this list are not read, and the things
332                  * they describe are not checked.
333                  */
334                 string locations;
335                 for (auto i: schema) {
336                         locations += String::compose("%1 %1 ", i, i);
337                 }
338
339                 parser.setExternalSchemaLocation(locations.c_str());
340                 parser.setValidationSchemaFullChecking(true);
341                 parser.setErrorHandler(&error_handler);
342
343                 LocalFileResolver resolver (xsd_dtd_directory);
344                 parser.setEntityResolver(&resolver);
345
346                 try {
347                         parser.resetDocumentPool();
348                         parse(parser, xml);
349                 } catch (XMLException& e) {
350                         throw MiscError(xml_ch_to_string(e.getMessage()));
351                 } catch (DOMException& e) {
352                         throw MiscError(xml_ch_to_string(e.getMessage()));
353                 } catch (...) {
354                         throw MiscError("Unknown exception from xerces");
355                 }
356         }
357
358         XMLPlatformUtils::Terminate ();
359
360         for (auto i: error_handler.errors()) {
361                 notes.push_back ({
362                         VerificationNote::Type::ERROR,
363                         VerificationNote::Code::INVALID_XML,
364                         i.message(),
365                         boost::trim_copy(i.public_id() + " " + i.system_id()),
366                         i.line()
367                 });
368         }
369 }
370
371
372 enum class VerifyAssetResult {
373         GOOD,
374         CPL_PKL_DIFFER,
375         BAD
376 };
377
378
379 static VerifyAssetResult
380 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
381 {
382         auto const actual_hash = reel_file_asset->asset_ref()->hash(progress);
383
384         auto pkls = dcp->pkls();
385         /* We've read this DCP in so it must have at least one PKL */
386         DCP_ASSERT (!pkls.empty());
387
388         auto asset = reel_file_asset->asset_ref().asset();
389
390         optional<string> pkl_hash;
391         for (auto i: pkls) {
392                 pkl_hash = i->hash (reel_file_asset->asset_ref()->id());
393                 if (pkl_hash) {
394                         break;
395                 }
396         }
397
398         DCP_ASSERT (pkl_hash);
399
400         auto cpl_hash = reel_file_asset->hash();
401         if (cpl_hash && *cpl_hash != *pkl_hash) {
402                 return VerifyAssetResult::CPL_PKL_DIFFER;
403         }
404
405         if (actual_hash != *pkl_hash) {
406                 return VerifyAssetResult::BAD;
407         }
408
409         return VerifyAssetResult::GOOD;
410 }
411
412
413 void
414 verify_language_tag (string tag, vector<VerificationNote>& notes)
415 {
416         try {
417                 LanguageTag test (tag);
418         } catch (LanguageTagError &) {
419                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, tag});
420         }
421 }
422
423
424 static void
425 verify_picture_asset (shared_ptr<const ReelFileAsset> reel_file_asset, boost::filesystem::path file, vector<VerificationNote>& notes, function<void (float)> progress)
426 {
427         int biggest_frame = 0;
428         auto asset = dynamic_pointer_cast<PictureAsset>(reel_file_asset->asset_ref().asset());
429         auto const duration = asset->intrinsic_duration ();
430
431         auto check_and_add = [&notes](vector<VerificationNote> const& j2k_notes) {
432                 for (auto i: j2k_notes) {
433                         if (find(notes.begin(), notes.end(), i) == notes.end()) {
434                                 notes.push_back (i);
435                         }
436                 }
437         };
438
439         if (auto mono_asset = dynamic_pointer_cast<MonoPictureAsset>(reel_file_asset->asset_ref().asset())) {
440                 auto reader = mono_asset->start_read ();
441                 for (int64_t i = 0; i < duration; ++i) {
442                         auto frame = reader->get_frame (i);
443                         biggest_frame = max(biggest_frame, frame->size());
444                         vector<VerificationNote> j2k_notes;
445                         verify_j2k (frame, j2k_notes);
446                         check_and_add (j2k_notes);
447                         progress (float(i) / duration);
448                 }
449         } else if (auto stereo_asset = dynamic_pointer_cast<StereoPictureAsset>(asset)) {
450                 auto reader = stereo_asset->start_read ();
451                 for (int64_t i = 0; i < duration; ++i) {
452                         auto frame = reader->get_frame (i);
453                         biggest_frame = max(biggest_frame, max(frame->left()->size(), frame->right()->size()));
454                         vector<VerificationNote> j2k_notes;
455                         verify_j2k (frame->left(), j2k_notes);
456                         verify_j2k (frame->right(), j2k_notes);
457                         check_and_add (j2k_notes);
458                         progress (float(i) / duration);
459                 }
460
461         }
462
463         static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
464         static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
465         if (biggest_frame > max_frame) {
466                 notes.push_back ({
467                         VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
468                 });
469         } else if (biggest_frame > risky_frame) {
470                 notes.push_back ({
471                         VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
472                 });
473         }
474 }
475
476
477 static void
478 verify_main_picture_asset (
479         shared_ptr<const DCP> dcp,
480         shared_ptr<const ReelPictureAsset> reel_asset,
481         function<void (string, optional<boost::filesystem::path>)> stage,
482         function<void (float)> progress,
483         vector<VerificationNote>& notes
484         )
485 {
486         auto asset = reel_asset->asset();
487         auto const file = *asset->file();
488         stage ("Checking picture asset hash", file);
489         auto const r = verify_asset (dcp, reel_asset, progress);
490         switch (r) {
491                 case VerifyAssetResult::BAD:
492                         notes.push_back ({
493                                 VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_PICTURE_HASH, file
494                         });
495                         break;
496                 case VerifyAssetResult::CPL_PKL_DIFFER:
497                         notes.push_back ({
498                                 VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_PICTURE_HASHES, file
499                         });
500                         break;
501                 default:
502                         break;
503         }
504         stage ("Checking picture frame sizes", asset->file());
505         verify_picture_asset (reel_asset, file, notes, progress);
506
507         /* Only flat/scope allowed by Bv2.1 */
508         if (
509                 asset->size() != Size(2048, 858) &&
510                 asset->size() != Size(1998, 1080) &&
511                 asset->size() != Size(4096, 1716) &&
512                 asset->size() != Size(3996, 2160)) {
513                 notes.push_back({
514                         VerificationNote::Type::BV21_ERROR,
515                         VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS,
516                         String::compose("%1x%2", asset->size().width, asset->size().height),
517                         file
518                 });
519         }
520
521         /* Only 24, 25, 48fps allowed for 2K */
522         if (
523                 (asset->size() == Size(2048, 858) || asset->size() == Size(1998, 1080)) &&
524                 (asset->edit_rate() != Fraction(24, 1) && asset->edit_rate() != Fraction(25, 1) && asset->edit_rate() != Fraction(48, 1))
525            ) {
526                 notes.push_back({
527                         VerificationNote::Type::BV21_ERROR,
528                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K,
529                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
530                         file
531                 });
532         }
533
534         if (asset->size() == Size(4096, 1716) || asset->size() == Size(3996, 2160)) {
535                 /* Only 24fps allowed for 4K */
536                 if (asset->edit_rate() != Fraction(24, 1)) {
537                         notes.push_back({
538                                 VerificationNote::Type::BV21_ERROR,
539                                 VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K,
540                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
541                                 file
542                         });
543                 }
544
545                 /* Only 2D allowed for 4K */
546                 if (dynamic_pointer_cast<const StereoPictureAsset>(asset)) {
547                         notes.push_back({
548                                 VerificationNote::Type::BV21_ERROR,
549                                 VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D,
550                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
551                                 file
552                         });
553
554                 }
555         }
556
557 }
558
559
560 static void
561 verify_main_sound_asset (
562         shared_ptr<const DCP> dcp,
563         shared_ptr<const ReelSoundAsset> reel_asset,
564         function<void (string, optional<boost::filesystem::path>)> stage,
565         function<void (float)> progress,
566         vector<VerificationNote>& notes
567         )
568 {
569         auto asset = reel_asset->asset();
570         stage ("Checking sound asset hash", asset->file());
571         auto const r = verify_asset (dcp, reel_asset, progress);
572         switch (r) {
573                 case VerifyAssetResult::BAD:
574                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_SOUND_HASH, *asset->file()});
575                         break;
576                 case VerifyAssetResult::CPL_PKL_DIFFER:
577                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_SOUND_HASHES, *asset->file()});
578                         break;
579                 default:
580                         break;
581         }
582
583         stage ("Checking sound asset metadata", asset->file());
584
585         verify_language_tag (asset->language(), notes);
586         if (asset->sampling_rate() != 48000) {
587                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SOUND_FRAME_RATE, raw_convert<string>(asset->sampling_rate()), *asset->file()});
588         }
589 }
590
591
592 static void
593 verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, vector<VerificationNote>& notes)
594 {
595         /* XXX: is Language compulsory? */
596         if (reel_asset->language()) {
597                 verify_language_tag (*reel_asset->language(), notes);
598         }
599
600         if (!reel_asset->entry_point()) {
601                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT, reel_asset->id() });
602         } else if (reel_asset->entry_point().get()) {
603                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT, reel_asset->id() });
604         }
605 }
606
607
608 static void
609 verify_closed_caption_reel (shared_ptr<const ReelClosedCaptionAsset> reel_asset, vector<VerificationNote>& notes)
610 {
611         /* XXX: is Language compulsory? */
612         if (reel_asset->language()) {
613                 verify_language_tag (*reel_asset->language(), notes);
614         }
615
616         if (!reel_asset->entry_point()) {
617                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
618         } else if (reel_asset->entry_point().get()) {
619                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
620         }
621 }
622
623
624 struct State
625 {
626         boost::optional<string> subtitle_language;
627 };
628
629
630 /** Verify stuff that is common to both subtitles and closed captions */
631 void
632 verify_smpte_timed_text_asset (
633         shared_ptr<const SMPTESubtitleAsset> asset,
634         vector<VerificationNote>& notes
635         )
636 {
637         if (asset->language()) {
638                 verify_language_tag (*asset->language(), notes);
639         } else {
640                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE, *asset->file() });
641         }
642
643         auto const size = boost::filesystem::file_size(asset->file().get());
644         if (size > 115 * 1024 * 1024) {
645                 notes.push_back (
646                         { VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES, raw_convert<string>(size), *asset->file() }
647                         );
648         }
649
650         /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB"
651          * but I'm hoping that checking for the total size of all fonts being <= 10MB will do.
652          */
653         auto fonts = asset->font_data ();
654         int total_size = 0;
655         for (auto i: fonts) {
656                 total_size += i.second.size();
657         }
658         if (total_size > 10 * 1024 * 1024) {
659                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES, raw_convert<string>(total_size), asset->file().get() });
660         }
661
662         if (!asset->start_time()) {
663                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_START_TIME, asset->file().get() });
664         } else if (asset->start_time() != Time()) {
665                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SUBTITLE_START_TIME, asset->file().get() });
666         }
667 }
668
669
670 /** Verify SMPTE subtitle-only stuff */
671 void
672 verify_smpte_subtitle_asset (
673         shared_ptr<const SMPTESubtitleAsset> asset,
674         vector<VerificationNote>& notes,
675         State& state
676         )
677 {
678         if (asset->language()) {
679                 if (!state.subtitle_language) {
680                         state.subtitle_language = *asset->language();
681                 } else if (state.subtitle_language != *asset->language()) {
682                         notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES });
683                 }
684         }
685 }
686
687
688 /** Verify all subtitle stuff */
689 static void
690 verify_subtitle_asset (
691         shared_ptr<const SubtitleAsset> asset,
692         function<void (string, optional<boost::filesystem::path>)> stage,
693         boost::filesystem::path xsd_dtd_directory,
694         vector<VerificationNote>& notes,
695         State& state
696         )
697 {
698         stage ("Checking subtitle XML", asset->file());
699         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
700          * gets passed through libdcp which may clean up and therefore hide errors.
701          */
702         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
703
704         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
705         if (smpte) {
706                 verify_smpte_timed_text_asset (smpte, notes);
707                 verify_smpte_subtitle_asset (smpte, notes, state);
708         }
709 }
710
711
712 /** Verify all closed caption stuff */
713 static void
714 verify_closed_caption_asset (
715         shared_ptr<const SubtitleAsset> asset,
716         function<void (string, optional<boost::filesystem::path>)> stage,
717         boost::filesystem::path xsd_dtd_directory,
718         vector<VerificationNote>& notes
719         )
720 {
721         stage ("Checking closed caption XML", asset->file());
722         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
723          * gets passed through libdcp which may clean up and therefore hide errors.
724          */
725         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
726
727         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
728         if (smpte) {
729                 verify_smpte_timed_text_asset (smpte, notes);
730         }
731
732         if (asset->raw_xml().size() > 256 * 1024) {
733                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES, raw_convert<string>(asset->raw_xml().size()), *asset->file()});
734         }
735 }
736
737
738 static
739 void
740 verify_text_timing (
741         vector<shared_ptr<Reel>> reels,
742         int edit_rate,
743         vector<VerificationNote>& notes,
744         std::function<bool (shared_ptr<Reel>)> check,
745         std::function<string (shared_ptr<Reel>)> xml,
746         std::function<int64_t (shared_ptr<Reel>)> duration
747         )
748 {
749         /* end of last subtitle (in editable units) */
750         optional<int64_t> last_out;
751         auto too_short = false;
752         auto too_close = false;
753         auto too_early = false;
754         auto reel_overlap = false;
755         /* current reel start time (in editable units) */
756         int64_t reel_offset = 0;
757
758         std::function<void (cxml::ConstNodePtr, optional<int>, optional<Time>, int, bool)> parse;
759         parse = [&parse, &last_out, &too_short, &too_close, &too_early, &reel_offset](cxml::ConstNodePtr node, optional<int> tcr, optional<Time> start_time, int er, bool first_reel) {
760                 if (node->name() == "Subtitle") {
761                         Time in (node->string_attribute("TimeIn"), tcr);
762                         if (start_time) {
763                                 in -= *start_time;
764                         }
765                         Time out (node->string_attribute("TimeOut"), tcr);
766                         if (start_time) {
767                                 out -= *start_time;
768                         }
769                         if (first_reel && tcr && in < Time(0, 0, 4, 0, *tcr)) {
770                                 too_early = true;
771                         }
772                         auto length = out - in;
773                         if (length.as_editable_units_ceil(er) < 15) {
774                                 too_short = true;
775                         }
776                         if (last_out) {
777                                 /* XXX: this feels dubious - is it really what Bv2.1 means? */
778                                 auto distance = reel_offset + in.as_editable_units_ceil(er) - *last_out;
779                                 if (distance >= 0 && distance < 2) {
780                                         too_close = true;
781                                 }
782                         }
783                         last_out = reel_offset + out.as_editable_units_floor(er);
784                 } else {
785                         for (auto i: node->node_children()) {
786                                 parse(i, tcr, start_time, er, first_reel);
787                         }
788                 }
789         };
790
791         for (auto i = 0U; i < reels.size(); ++i) {
792                 if (!check(reels[i])) {
793                         continue;
794                 }
795
796                 /* We need to look at <Subtitle> instances in the XML being checked, so we can't use the subtitles
797                  * read in by libdcp's parser.
798                  */
799
800                 shared_ptr<cxml::Document> doc;
801                 optional<int> tcr;
802                 optional<Time> start_time;
803                 try {
804                         doc = make_shared<cxml::Document>("SubtitleReel");
805                         doc->read_string (xml(reels[i]));
806                         tcr = doc->number_child<int>("TimeCodeRate");
807                         auto start_time_string = doc->optional_string_child("StartTime");
808                         if (start_time_string) {
809                                 start_time = Time(*start_time_string, tcr);
810                         }
811                 } catch (...) {
812                         doc = make_shared<cxml::Document>("DCSubtitle");
813                         doc->read_string (xml(reels[i]));
814                 }
815                 parse (doc, tcr, start_time, edit_rate, i == 0);
816                 auto end = reel_offset + duration(reels[i]);
817                 if (last_out && *last_out > end) {
818                         reel_overlap = true;
819                 }
820                 reel_offset = end;
821         }
822
823         if (last_out && *last_out > reel_offset) {
824                 reel_overlap = true;
825         }
826
827         if (too_early) {
828                 notes.push_back({
829                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME
830                 });
831         }
832
833         if (too_short) {
834                 notes.push_back ({
835                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_DURATION
836                 });
837         }
838
839         if (too_close) {
840                 notes.push_back ({
841                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_SPACING
842                 });
843         }
844
845         if (reel_overlap) {
846                 notes.push_back ({
847                         VerificationNote::Type::ERROR, VerificationNote::Code::SUBTITLE_OVERLAPS_REEL_BOUNDARY
848                 });
849         }
850 }
851
852
853 struct LinesCharactersResult
854 {
855         bool warning_length_exceeded = false;
856         bool error_length_exceeded = false;
857         bool line_count_exceeded = false;
858 };
859
860
861 static
862 void
863 verify_text_lines_and_characters (
864         shared_ptr<SubtitleAsset> asset,
865         int warning_length,
866         int error_length,
867         LinesCharactersResult* result
868         )
869 {
870         class Event
871         {
872         public:
873                 Event (Time time_, float position_, int characters_)
874                         : time (time_)
875                         , position (position_)
876                         , characters (characters_)
877                 {}
878
879                 Event (Time time_, shared_ptr<Event> start_)
880                         : time (time_)
881                         , start (start_)
882                 {}
883
884                 Time time;
885                 int position; //< position from 0 at top of screen to 100 at bottom
886                 int characters;
887                 shared_ptr<Event> start;
888         };
889
890         vector<shared_ptr<Event>> events;
891
892         auto position = [](shared_ptr<const SubtitleString> sub) {
893                 switch (sub->v_align()) {
894                 case VAlign::TOP:
895                         return lrintf(sub->v_position() * 100);
896                 case VAlign::CENTER:
897                         return lrintf((0.5f + sub->v_position()) * 100);
898                 case VAlign::BOTTOM:
899                         return lrintf((1.0f - sub->v_position()) * 100);
900                 }
901
902                 return 0L;
903         };
904
905         for (auto j: asset->subtitles()) {
906                 auto text = dynamic_pointer_cast<const SubtitleString>(j);
907                 if (text) {
908                         auto in = make_shared<Event>(text->in(), position(text), text->text().length());
909                         events.push_back(in);
910                         events.push_back(make_shared<Event>(text->out(), in));
911                 }
912         }
913
914         std::sort(events.begin(), events.end(), [](shared_ptr<Event> const& a, shared_ptr<Event>const& b) {
915                 return a->time < b->time;
916         });
917
918         map<int, int> current;
919         for (auto i: events) {
920                 if (current.size() > 3) {
921                         result->line_count_exceeded = true;
922                 }
923                 for (auto j: current) {
924                         if (j.second >= warning_length) {
925                                 result->warning_length_exceeded = true;
926                         }
927                         if (j.second >= error_length) {
928                                 result->error_length_exceeded = true;
929                         }
930                 }
931
932                 if (i->start) {
933                         /* end of a subtitle */
934                         DCP_ASSERT (current.find(i->start->position) != current.end());
935                         if (current[i->start->position] == i->start->characters) {
936                                 current.erase(i->start->position);
937                         } else {
938                                 current[i->start->position] -= i->start->characters;
939                         }
940                 } else {
941                         /* start of a subtitle */
942                         if (current.find(i->position) == current.end()) {
943                                 current[i->position] = i->characters;
944                         } else {
945                                 current[i->position] += i->characters;
946                         }
947                 }
948         }
949 }
950
951
952 static
953 void
954 verify_text_timing (vector<shared_ptr<Reel>> reels, vector<VerificationNote>& notes)
955 {
956         if (reels.empty()) {
957                 return;
958         }
959
960         if (reels[0]->main_subtitle()) {
961                 verify_text_timing (reels, reels[0]->main_subtitle()->edit_rate().numerator, notes,
962                         [](shared_ptr<Reel> reel) {
963                                 return static_cast<bool>(reel->main_subtitle());
964                         },
965                         [](shared_ptr<Reel> reel) {
966                                 return reel->main_subtitle()->asset()->raw_xml();
967                         },
968                         [](shared_ptr<Reel> reel) {
969                                 return reel->main_subtitle()->actual_duration();
970                         }
971                 );
972         }
973
974         for (auto i = 0U; i < reels[0]->closed_captions().size(); ++i) {
975                 verify_text_timing (reels, reels[0]->closed_captions()[i]->edit_rate().numerator, notes,
976                         [i](shared_ptr<Reel> reel) {
977                                 return i < reel->closed_captions().size();
978                         },
979                         [i](shared_ptr<Reel> reel) {
980                                 return reel->closed_captions()[i]->asset()->raw_xml();
981                         },
982                         [i](shared_ptr<Reel> reel) {
983                                 return reel->closed_captions()[i]->actual_duration();
984                         }
985                 );
986         }
987 }
988
989
990 void
991 verify_extension_metadata (shared_ptr<CPL> cpl, vector<VerificationNote>& notes)
992 {
993         DCP_ASSERT (cpl->file());
994         cxml::Document doc ("CompositionPlaylist");
995         doc.read_file (cpl->file().get());
996
997         auto missing = false;
998         string malformed;
999
1000         if (auto reel_list = doc.node_child("ReelList")) {
1001                 auto reels = reel_list->node_children("Reel");
1002                 if (!reels.empty()) {
1003                         if (auto asset_list = reels[0]->optional_node_child("AssetList")) {
1004                                 if (auto metadata = asset_list->optional_node_child("CompositionMetadataAsset")) {
1005                                         if (auto extension_list = metadata->optional_node_child("ExtensionMetadataList")) {
1006                                                 missing = true;
1007                                                 for (auto extension: extension_list->node_children("ExtensionMetadata")) {
1008                                                         if (extension->optional_string_attribute("scope").get_value_or("") != "http://isdcf.com/ns/cplmd/app") {
1009                                                                 continue;
1010                                                         }
1011                                                         missing = false;
1012                                                         if (auto name = extension->optional_node_child("Name")) {
1013                                                                 if (name->content() != "Application") {
1014                                                                         malformed = "<Name> should be 'Application'";
1015                                                                 }
1016                                                         }
1017                                                         if (auto property_list = extension->optional_node_child("PropertyList")) {
1018                                                                 if (auto property = property_list->optional_node_child("Property")) {
1019                                                                         if (auto name = property->optional_node_child("Name")) {
1020                                                                                 if (name->content() != "DCP Constraints Profile") {
1021                                                                                         malformed = "<Name> property should be 'DCP Constraints Profile'";
1022                                                                                 }
1023                                                                         }
1024                                                                         if (auto value = property->optional_node_child("Value")) {
1025                                                                                 if (value->content() != "SMPTE-RDD-52:2020-Bv2.1") {
1026                                                                                         malformed = "<Value> property should be 'SMPTE-RDD-52:2020-Bv2.1'";
1027                                                                                 }
1028                                                                         }
1029                                                                 }
1030                                                         }
1031                                                 }
1032                                         } else {
1033                                                 missing = true;
1034                                         }
1035                                 }
1036                         }
1037                 }
1038         }
1039
1040         if (missing) {
1041                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_EXTENSION_METADATA, cpl->id(), cpl->file().get()});
1042         } else if (!malformed.empty()) {
1043                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_EXTENSION_METADATA, malformed, cpl->file().get()});
1044         }
1045 }
1046
1047
1048 bool
1049 pkl_has_encrypted_assets (shared_ptr<DCP> dcp, shared_ptr<PKL> pkl)
1050 {
1051         vector<string> encrypted;
1052         for (auto i: dcp->cpls()) {
1053                 for (auto j: i->reel_file_assets()) {
1054                         if (j->asset_ref().resolved()) {
1055                                 /* It's a bit surprising / broken but Interop subtitle assets are represented
1056                                  * in reels by ReelSubtitleAsset which inherits ReelFileAsset, so it's possible for
1057                                  * ReelFileAssets to have assets which are not MXFs.
1058                                  */
1059                                 if (auto asset = dynamic_pointer_cast<MXF>(j->asset_ref().asset())) {
1060                                         if (asset->encrypted()) {
1061                                                 encrypted.push_back(j->asset_ref().id());
1062                                         }
1063                                 }
1064                         }
1065                 }
1066         }
1067
1068         for (auto i: pkl->asset_list()) {
1069                 if (find(encrypted.begin(), encrypted.end(), i->id()) != encrypted.end()) {
1070                         return true;
1071                 }
1072         }
1073
1074         return false;
1075 }
1076
1077
1078 vector<VerificationNote>
1079 dcp::verify (
1080         vector<boost::filesystem::path> directories,
1081         function<void (string, optional<boost::filesystem::path>)> stage,
1082         function<void (float)> progress,
1083         optional<boost::filesystem::path> xsd_dtd_directory
1084         )
1085 {
1086         if (!xsd_dtd_directory) {
1087                 xsd_dtd_directory = resources_directory() / "xsd";
1088         }
1089         *xsd_dtd_directory = boost::filesystem::canonical (*xsd_dtd_directory);
1090
1091         vector<VerificationNote> notes;
1092         State state{};
1093
1094         vector<shared_ptr<DCP>> dcps;
1095         for (auto i: directories) {
1096                 dcps.push_back (make_shared<DCP>(i));
1097         }
1098
1099         for (auto dcp: dcps) {
1100                 stage ("Checking DCP", dcp->directory());
1101                 bool carry_on = true;
1102                 try {
1103                         dcp->read (&notes);
1104                 } catch (MissingAssetmapError& e) {
1105                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1106                         carry_on = false;
1107                 } catch (ReadError& e) {
1108                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1109                 } catch (XMLError& e) {
1110                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1111                 } catch (MXFFileError& e) {
1112                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1113                 } catch (cxml::Error& e) {
1114                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1115                 }
1116
1117                 if (!carry_on) {
1118                         continue;
1119                 }
1120
1121                 if (dcp->standard() != Standard::SMPTE) {
1122                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_STANDARD});
1123                 }
1124
1125                 for (auto cpl: dcp->cpls()) {
1126                         stage ("Checking CPL", cpl->file());
1127                         validate_xml (cpl->file().get(), *xsd_dtd_directory, notes);
1128
1129                         if (cpl->any_encrypted() && !cpl->all_encrypted()) {
1130                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::PARTIALLY_ENCRYPTED});
1131                         }
1132
1133                         for (auto const& i: cpl->additional_subtitle_languages()) {
1134                                 verify_language_tag (i, notes);
1135                         }
1136
1137                         if (cpl->release_territory()) {
1138                                 if (!cpl->release_territory_scope() || cpl->release_territory_scope().get() != "http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata#scope/release-territory/UNM49") {
1139                                         auto terr = cpl->release_territory().get();
1140                                         /* Must be a valid region tag, or "001" */
1141                                         try {
1142                                                 LanguageTag::RegionSubtag test (terr);
1143                                         } catch (...) {
1144                                                 if (terr != "001") {
1145                                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, terr});
1146                                                 }
1147                                         }
1148                                 }
1149                         }
1150
1151                         if (dcp->standard() == Standard::SMPTE) {
1152                                 if (!cpl->annotation_text()) {
1153                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1154                                 } else if (cpl->annotation_text().get() != cpl->content_title_text()) {
1155                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1156                                 }
1157                         }
1158
1159                         for (auto i: dcp->pkls()) {
1160                                 /* Check that the CPL's hash corresponds to the PKL */
1161                                 optional<string> h = i->hash(cpl->id());
1162                                 if (h && make_digest(ArrayData(*cpl->file())) != *h) {
1163                                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_CPL_HASHES, cpl->id(), cpl->file().get()});
1164                                 }
1165
1166                                 /* Check that any PKL with a single CPL has its AnnotationText the same as the CPL's ContentTitleText */
1167                                 optional<string> required_annotation_text;
1168                                 for (auto j: i->asset_list()) {
1169                                         /* See if this is a CPL */
1170                                         for (auto k: dcp->cpls()) {
1171                                                 if (j->id() == k->id()) {
1172                                                         if (!required_annotation_text) {
1173                                                                 /* First CPL we have found; this is the required AnnotationText unless we find another */
1174                                                                 required_annotation_text = cpl->content_title_text();
1175                                                         } else {
1176                                                                 /* There's more than one CPL so we don't care what the PKL's AnnotationText is */
1177                                                                 required_annotation_text = boost::none;
1178                                                         }
1179                                                 }
1180                                         }
1181                                 }
1182
1183                                 if (required_annotation_text && i->annotation_text() != required_annotation_text) {
1184                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL, i->id(), i->file().get()});
1185                                 }
1186                         }
1187
1188                         /* set to true if any reel has a MainSubtitle */
1189                         auto have_main_subtitle = false;
1190                         /* set to true if any reel has no MainSubtitle */
1191                         auto have_no_main_subtitle = false;
1192                         /* fewest number of closed caption assets seen in a reel */
1193                         size_t fewest_closed_captions = SIZE_MAX;
1194                         /* most number of closed caption assets seen in a reel */
1195                         size_t most_closed_captions = 0;
1196                         map<Marker, Time> markers_seen;
1197
1198                         for (auto reel: cpl->reels()) {
1199                                 stage ("Checking reel", optional<boost::filesystem::path>());
1200
1201                                 for (auto i: reel->assets()) {
1202                                         if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1203                                                 notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_DURATION, i->id()});
1204                                         }
1205                                         if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1206                                                 notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_INTRINSIC_DURATION, i->id()});
1207                                         }
1208                                         auto file_asset = dynamic_pointer_cast<ReelFileAsset>(i);
1209                                         if (file_asset && !file_asset->hash()) {
1210                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_HASH, i->id()});
1211                                         }
1212                                 }
1213
1214                                 if (dcp->standard() == Standard::SMPTE) {
1215                                         boost::optional<int64_t> duration;
1216                                         for (auto i: reel->assets()) {
1217                                                 if (!duration) {
1218                                                         duration = i->actual_duration();
1219                                                 } else if (*duration != i->actual_duration()) {
1220                                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_ASSET_DURATION});
1221                                                         break;
1222                                                 }
1223                                         }
1224                                 }
1225
1226                                 if (reel->main_picture()) {
1227                                         /* Check reel stuff */
1228                                         auto const frame_rate = reel->main_picture()->frame_rate();
1229                                         if (frame_rate.denominator != 1 ||
1230                                             (frame_rate.numerator != 24 &&
1231                                              frame_rate.numerator != 25 &&
1232                                              frame_rate.numerator != 30 &&
1233                                              frame_rate.numerator != 48 &&
1234                                              frame_rate.numerator != 50 &&
1235                                              frame_rate.numerator != 60 &&
1236                                              frame_rate.numerator != 96)) {
1237                                                 notes.push_back ({
1238                                                         VerificationNote::Type::ERROR,
1239                                                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE,
1240                                                         String::compose("%1/%2", frame_rate.numerator, frame_rate.denominator)
1241                                                 });
1242                                         }
1243                                         /* Check asset */
1244                                         if (reel->main_picture()->asset_ref().resolved()) {
1245                                                 verify_main_picture_asset (dcp, reel->main_picture(), stage, progress, notes);
1246                                         }
1247                                 }
1248
1249                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
1250                                         verify_main_sound_asset (dcp, reel->main_sound(), stage, progress, notes);
1251                                 }
1252
1253                                 if (reel->main_subtitle()) {
1254                                         verify_main_subtitle_reel (reel->main_subtitle(), notes);
1255                                         if (reel->main_subtitle()->asset_ref().resolved()) {
1256                                                 verify_subtitle_asset (reel->main_subtitle()->asset(), stage, *xsd_dtd_directory, notes, state);
1257                                         }
1258                                         have_main_subtitle = true;
1259                                 } else {
1260                                         have_no_main_subtitle = true;
1261                                 }
1262
1263                                 for (auto i: reel->closed_captions()) {
1264                                         verify_closed_caption_reel (i, notes);
1265                                         if (i->asset_ref().resolved()) {
1266                                                 verify_closed_caption_asset (i->asset(), stage, *xsd_dtd_directory, notes);
1267                                         }
1268                                 }
1269
1270                                 if (reel->main_markers()) {
1271                                         for (auto const& i: reel->main_markers()->get()) {
1272                                                 markers_seen.insert (i);
1273                                         }
1274                                 }
1275
1276                                 fewest_closed_captions = std::min (fewest_closed_captions, reel->closed_captions().size());
1277                                 most_closed_captions = std::max (most_closed_captions, reel->closed_captions().size());
1278                         }
1279
1280                         verify_text_timing (cpl->reels(), notes);
1281
1282                         if (dcp->standard() == Standard::SMPTE) {
1283
1284                                 if (have_main_subtitle && have_no_main_subtitle) {
1285                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS});
1286                                 }
1287
1288                                 if (fewest_closed_captions != most_closed_captions) {
1289                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS});
1290                                 }
1291
1292                                 if (cpl->content_kind() == ContentKind::FEATURE) {
1293                                         if (markers_seen.find(Marker::FFEC) == markers_seen.end()) {
1294                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFEC_IN_FEATURE});
1295                                         }
1296                                         if (markers_seen.find(Marker::FFMC) == markers_seen.end()) {
1297                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFMC_IN_FEATURE});
1298                                         }
1299                                 }
1300
1301                                 auto ffoc = markers_seen.find(Marker::FFOC);
1302                                 if (ffoc == markers_seen.end()) {
1303                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_FFOC});
1304                                 } else if (ffoc->second.e != 1) {
1305                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_FFOC, raw_convert<string>(ffoc->second.e)});
1306                                 }
1307
1308                                 auto lfoc = markers_seen.find(Marker::LFOC);
1309                                 if (lfoc == markers_seen.end()) {
1310                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_LFOC});
1311                                 } else {
1312                                         auto lfoc_time = lfoc->second.as_editable_units_ceil(lfoc->second.tcr);
1313                                         if (lfoc_time != (cpl->reels().back()->duration() - 1)) {
1314                                                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_LFOC, raw_convert<string>(lfoc_time)});
1315                                         }
1316                                 }
1317
1318                                 LinesCharactersResult result;
1319                                 for (auto reel: cpl->reels()) {
1320                                         if (reel->main_subtitle() && reel->main_subtitle()->asset()) {
1321                                                 verify_text_lines_and_characters (reel->main_subtitle()->asset(), 52, 79, &result);
1322                                         }
1323                                 }
1324
1325                                 if (result.line_count_exceeded) {
1326                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT});
1327                                 }
1328                                 if (result.error_length_exceeded) {
1329                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH});
1330                                 } else if (result.warning_length_exceeded) {
1331                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH});
1332                                 }
1333
1334                                 result = LinesCharactersResult();
1335                                 for (auto reel: cpl->reels()) {
1336                                         for (auto i: reel->closed_captions()) {
1337                                                 if (i->asset()) {
1338                                                         verify_text_lines_and_characters (i->asset(), 32, 32, &result);
1339                                                 }
1340                                         }
1341                                 }
1342
1343                                 if (result.line_count_exceeded) {
1344                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT});
1345                                 }
1346                                 if (result.error_length_exceeded) {
1347                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH});
1348                                 }
1349
1350                                 if (!cpl->full_content_title_text()) {
1351                                         /* Since FullContentTitleText is assumed always to exist if there's a CompositionMetadataAsset we
1352                                          * can use it as a proxy for CompositionMetadataAsset's existence.
1353                                          */
1354                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA, cpl->id(), cpl->file().get()});
1355                                 } else if (!cpl->version_number()) {
1356                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER, cpl->id(), cpl->file().get()});
1357                                 }
1358
1359                                 verify_extension_metadata (cpl, notes);
1360
1361                                 if (cpl->any_encrypted()) {
1362                                         cxml::Document doc ("CompositionPlaylist");
1363                                         DCP_ASSERT (cpl->file());
1364                                         doc.read_file (cpl->file().get());
1365                                         if (!doc.optional_node_child("Signature")) {
1366                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT, cpl->id(), cpl->file().get()});
1367                                         }
1368                                 }
1369                         }
1370                 }
1371
1372                 for (auto pkl: dcp->pkls()) {
1373                         stage ("Checking PKL", pkl->file());
1374                         validate_xml (pkl->file().get(), *xsd_dtd_directory, notes);
1375                         if (pkl_has_encrypted_assets(dcp, pkl)) {
1376                                 cxml::Document doc ("PackingList");
1377                                 doc.read_file (pkl->file().get());
1378                                 if (!doc.optional_node_child("Signature")) {
1379                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT, pkl->id(), pkl->file().get()});
1380                                 }
1381                         }
1382                 }
1383
1384                 if (dcp->asset_map_path()) {
1385                         stage ("Checking ASSETMAP", dcp->asset_map_path().get());
1386                         validate_xml (dcp->asset_map_path().get(), *xsd_dtd_directory, notes);
1387                 } else {
1388                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_ASSETMAP});
1389                 }
1390         }
1391
1392         return notes;
1393 }
1394
1395
1396 string
1397 dcp::note_to_string (VerificationNote note)
1398 {
1399         /** These strings should say what is wrong, incorporating any extra details (ID, filenames etc.).
1400          *
1401          *  e.g. "ClosedCaption asset has no <EntryPoint> tag.",
1402          *  not "ClosedCaption assets must have an <EntryPoint> tag."
1403          *
1404          *  It's OK to use XML tag names where they are clear.
1405          *  If both ID and filename are available, use only the ID.
1406          *  End messages with a full stop.
1407          *  Messages should not mention whether or not their errors are a part of Bv2.1.
1408          */
1409         switch (note.code()) {
1410         case VerificationNote::Code::FAILED_READ:
1411                 return *note.note();
1412         case VerificationNote::Code::MISMATCHED_CPL_HASHES:
1413                 return String::compose("The hash of the CPL %1 in the PKL does not agree with the CPL file.", note.note().get());
1414         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE:
1415                 return String::compose("The picture in a reel has an invalid frame rate %1.", note.note().get());
1416         case VerificationNote::Code::INCORRECT_PICTURE_HASH:
1417                 return String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
1418         case VerificationNote::Code::MISMATCHED_PICTURE_HASHES:
1419                 return String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
1420         case VerificationNote::Code::INCORRECT_SOUND_HASH:
1421                 return String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
1422         case VerificationNote::Code::MISMATCHED_SOUND_HASHES:
1423                 return String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
1424         case VerificationNote::Code::EMPTY_ASSET_PATH:
1425                 return "The asset map contains an empty asset path.";
1426         case VerificationNote::Code::MISSING_ASSET:
1427                 return String::compose("The file %1 for an asset in the asset map cannot be found.", note.file()->filename());
1428         case VerificationNote::Code::MISMATCHED_STANDARD:
1429                 return "The DCP contains both SMPTE and Interop parts.";
1430         case VerificationNote::Code::INVALID_XML:
1431                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
1432         case VerificationNote::Code::MISSING_ASSETMAP:
1433                 return "No ASSETMAP or ASSETMAP.xml was found.";
1434         case VerificationNote::Code::INVALID_INTRINSIC_DURATION:
1435                 return String::compose("The intrinsic duration of the asset %1 is less than 1 second.", note.note().get());
1436         case VerificationNote::Code::INVALID_DURATION:
1437                 return String::compose("The duration of the asset %1 is less than 1 second.", note.note().get());
1438         case VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1439                 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
1440         case VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1441                 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
1442         case VerificationNote::Code::EXTERNAL_ASSET:
1443                 return String::compose("The asset %1 that this DCP refers to is not included in the DCP.  It may be a VF.", note.note().get());
1444         case VerificationNote::Code::INVALID_STANDARD:
1445                 return "This DCP does not use the SMPTE standard.";
1446         case VerificationNote::Code::INVALID_LANGUAGE:
1447                 return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
1448         case VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS:
1449                 return String::compose("The size %1 of picture asset %2 is not allowed.", note.note().get(), note.file()->filename());
1450         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K:
1451                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 2K DCPs.", note.note().get(), note.file()->filename());
1452         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K:
1453                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 4K DCPs.", note.note().get(), note.file()->filename());
1454         case VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D:
1455                 return "3D 4K DCPs are not allowed.";
1456         case VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES:
1457                 return String::compose("The size %1 of the closed caption asset %2 is larger than the 256KB maximum.", note.note().get(), note.file()->filename());
1458         case VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES:
1459                 return String::compose("The size %1 of the timed text asset %2 is larger than the 115MB maximum.", note.note().get(), note.file()->filename());
1460         case VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES:
1461                 return String::compose("The size %1 of the fonts in timed text asset %2 is larger than the 10MB maximum.", note.note().get(), note.file()->filename());
1462         case VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE:
1463                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <Language> tag.", note.file()->filename());
1464         case VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES:
1465                 return "Some subtitle assets have different <Language> tags than others";
1466         case VerificationNote::Code::MISSING_SUBTITLE_START_TIME:
1467                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <StartTime> tag.", note.file()->filename());
1468         case VerificationNote::Code::INVALID_SUBTITLE_START_TIME:
1469                 return String::compose("The XML for a SMPTE subtitle asset %1 has a non-zero <StartTime> tag.", note.file()->filename());
1470         case VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME:
1471                 return "The first subtitle or closed caption is less than 4 seconds from the start of the DCP.";
1472         case VerificationNote::Code::INVALID_SUBTITLE_DURATION:
1473                 return "At least one subtitle lasts less than 15 frames.";
1474         case VerificationNote::Code::INVALID_SUBTITLE_SPACING:
1475                 return "At least one pair of subtitles is separated by less than 2 frames.";
1476         case VerificationNote::Code::SUBTITLE_OVERLAPS_REEL_BOUNDARY:
1477                 return "At least one subtitle extends outside of its reel.";
1478         case VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT:
1479                 return "There are more than 3 subtitle lines in at least one place in the DCP.";
1480         case VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH:
1481                 return "There are more than 52 characters in at least one subtitle line.";
1482         case VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH:
1483                 return "There are more than 79 characters in at least one subtitle line.";
1484         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT:
1485                 return "There are more than 3 closed caption lines in at least one place.";
1486         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH:
1487                 return "There are more than 32 characters in at least one closed caption line.";
1488         case VerificationNote::Code::INVALID_SOUND_FRAME_RATE:
1489                 return String::compose("The sound asset %1 has a sampling rate of %2", note.file()->filename(), note.note().get());
1490         case VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT:
1491                 return String::compose("The CPL %1 has no <AnnotationText> tag.", note.note().get());
1492         case VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT:
1493                 return String::compose("The CPL %1 has an <AnnotationText> which differs from its <ContentTitleText>", note.note().get());
1494         case VerificationNote::Code::MISMATCHED_ASSET_DURATION:
1495                 return "All assets in a reel do not have the same duration.";
1496         case VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS:
1497                 return "At least one reel contains a subtitle asset, but some reel(s) do not";
1498         case VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS:
1499                 return "At least one reel has closed captions, but reels have different numbers of closed caption assets.";
1500         case VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT:
1501                 return String::compose("The subtitle asset %1 has no <EntryPoint> tag.", note.note().get());
1502         case VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT:
1503                 return String::compose("The subtitle asset %1 has an <EntryPoint> other than 0.", note.note().get());
1504         case VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT:
1505                 return String::compose("The closed caption asset %1 has no <EntryPoint> tag.", note.note().get());
1506         case VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT:
1507                 return String::compose("The closed caption asset %1 has an <EntryPoint> other than 0.", note.note().get());
1508         case VerificationNote::Code::MISSING_HASH:
1509                 return String::compose("The asset %1 has no <Hash> tag in the CPL.", note.note().get());
1510         case VerificationNote::Code::MISSING_FFEC_IN_FEATURE:
1511                 return "The DCP is marked as a Feature but there is no FFEC (first frame of end credits) marker";
1512         case VerificationNote::Code::MISSING_FFMC_IN_FEATURE:
1513                 return "The DCP is marked as a Feature but there is no FFMC (first frame of moving credits) marker";
1514         case VerificationNote::Code::MISSING_FFOC:
1515                 return "There should be a FFOC (first frame of content) marker";
1516         case VerificationNote::Code::MISSING_LFOC:
1517                 return "There should be a LFOC (last frame of content) marker";
1518         case VerificationNote::Code::INCORRECT_FFOC:
1519                 return String::compose("The FFOC marker is %1 instead of 1", note.note().get());
1520         case VerificationNote::Code::INCORRECT_LFOC:
1521                 return String::compose("The LFOC marker is %1 instead of 1 less than the duration of the last reel.", note.note().get());
1522         case VerificationNote::Code::MISSING_CPL_METADATA:
1523                 return String::compose("The CPL %1 has no <CompositionMetadataAsset> tag.", note.note().get());
1524         case VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER:
1525                 return String::compose("The CPL %1 has no <VersionNumber> in its <CompositionMetadataAsset>.", note.note().get());
1526         case VerificationNote::Code::MISSING_EXTENSION_METADATA:
1527                 return String::compose("The CPL %1 has no <ExtensionMetadata> in its <CompositionMetadataAsset>.", note.note().get());
1528         case VerificationNote::Code::INVALID_EXTENSION_METADATA:
1529                 return String::compose("The CPL %1 has a malformed <ExtensionMetadata> (%2).", note.file()->filename(), note.note().get());
1530         case VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT:
1531                 return String::compose("The CPL %1, which has encrypted content, is not signed.", note.note().get());
1532         case VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT:
1533                 return String::compose("The PKL %1, which has encrypted content, is not signed.", note.note().get());
1534         case VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL:
1535                 return String::compose("The PKL %1 has only one CPL but its <AnnotationText> does not match the CPL's <ContentTitleText>.", note.note().get());
1536         case VerificationNote::Code::PARTIALLY_ENCRYPTED:
1537                 return "Some assets are encrypted but some are not.";
1538         case VerificationNote::Code::INVALID_JPEG2000_CODESTREAM:
1539                 return String::compose("The JPEG2000 codestream for at least one frame is invalid (%1)", note.note().get());
1540         case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_2K:
1541                 return String::compose("The JPEG2000 codestream uses %1 guard bits in a 2K image instead of 1.", note.note().get());
1542         case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_4K:
1543                 return String::compose("The JPEG2000 codestream uses %1 guard bits in a 4K image instead of 2.", note.note().get());
1544         case VerificationNote::Code::INVALID_JPEG2000_TILE_SIZE:
1545                 return "The JPEG2000 tile size is not the same as the image size.";
1546         case VerificationNote::Code::INVALID_JPEG2000_CODE_BLOCK_WIDTH:
1547                 return String::compose("The JPEG2000 codestream uses a code block width of %1 instead of 32.", note.note().get());
1548         case VerificationNote::Code::INVALID_JPEG2000_CODE_BLOCK_HEIGHT:
1549                 return String::compose("The JPEG2000 codestream uses a code block height of %1 instead of 32.", note.note().get());
1550         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER_COUNT_FOR_2K:
1551                 return String::compose("%1 POC markers found in 2K JPEG2000 codestream instead of 0.", note.note().get());
1552         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER_COUNT_FOR_4K:
1553                 return String::compose("%1 POC markers found in 4K JPEG2000 codestream instead of 1.", note.note().get());
1554         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER:
1555                 return String::compose("Incorrect POC marker content found (%1)", note.note().get());
1556         case VerificationNote::Code::INVALID_JPEG2000_POC_MARKER_LOCATION:
1557                 return "POC marker found outside main header";
1558         case VerificationNote::Code::INVALID_JPEG2000_TILE_PARTS_FOR_2K:
1559                 return String::compose("The JPEG2000 codestream has %1 tile parts in a 2K image instead of 3.", note.note().get());
1560         case VerificationNote::Code::INVALID_JPEG2000_TILE_PARTS_FOR_4K:
1561                 return String::compose("The JPEG2000 codestream has %1 tile parts in a 4K image instead of 6.", note.note().get());
1562         case VerificationNote::Code::MISSING_JPEG200_TLM_MARKER:
1563                 return "No TLM marker was found in a JPEG2000 codestream.";
1564         }
1565
1566         return "";
1567 }
1568
1569
1570 bool
1571 dcp::operator== (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
1572 {
1573         return a.type() == b.type() && a.code() == b.code() && a.note() == b.note() && a.file() == b.file() && a.line() == b.line();
1574 }
1575
1576
1577 std::ostream&
1578 dcp::operator<< (std::ostream& s, dcp::VerificationNote const& note)
1579 {
1580         s << note_to_string (note);
1581         if (note.note()) {
1582                 s << " [" << note.note().get() << "]";
1583         }
1584         if (note.file()) {
1585                 s << " [" << note.file().get() << "]";
1586         }
1587         if (note.line()) {
1588                 s << " [" << note.line().get() << "]";
1589         }
1590         return s;
1591 }
1592