Rename ReelMXF -> ReelFileAsset.
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2021 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34
35 /** @file  src/verify.cc
36  *  @brief dcp::verify() method and associated code
37  */
38
39
40 #include "verify.h"
41 #include "dcp.h"
42 #include "cpl.h"
43 #include "reel.h"
44 #include "reel_closed_caption_asset.h"
45 #include "reel_picture_asset.h"
46 #include "reel_sound_asset.h"
47 #include "reel_subtitle_asset.h"
48 #include "interop_subtitle_asset.h"
49 #include "mono_picture_asset.h"
50 #include "mono_picture_frame.h"
51 #include "stereo_picture_asset.h"
52 #include "stereo_picture_frame.h"
53 #include "exceptions.h"
54 #include "compose.hpp"
55 #include "raw_convert.h"
56 #include "reel_markers_asset.h"
57 #include "smpte_subtitle_asset.h"
58 #include <xercesc/util/PlatformUtils.hpp>
59 #include <xercesc/parsers/XercesDOMParser.hpp>
60 #include <xercesc/parsers/AbstractDOMParser.hpp>
61 #include <xercesc/sax/HandlerBase.hpp>
62 #include <xercesc/dom/DOMImplementation.hpp>
63 #include <xercesc/dom/DOMImplementationLS.hpp>
64 #include <xercesc/dom/DOMImplementationRegistry.hpp>
65 #include <xercesc/dom/DOMLSParser.hpp>
66 #include <xercesc/dom/DOMException.hpp>
67 #include <xercesc/dom/DOMDocument.hpp>
68 #include <xercesc/dom/DOMNodeList.hpp>
69 #include <xercesc/dom/DOMError.hpp>
70 #include <xercesc/dom/DOMLocator.hpp>
71 #include <xercesc/dom/DOMNamedNodeMap.hpp>
72 #include <xercesc/dom/DOMAttr.hpp>
73 #include <xercesc/dom/DOMErrorHandler.hpp>
74 #include <xercesc/framework/LocalFileInputSource.hpp>
75 #include <xercesc/framework/MemBufInputSource.hpp>
76 #include <boost/algorithm/string.hpp>
77 #include <map>
78 #include <vector>
79 #include <iostream>
80
81
82 using std::list;
83 using std::vector;
84 using std::string;
85 using std::cout;
86 using std::map;
87 using std::max;
88 using std::set;
89 using std::shared_ptr;
90 using std::make_shared;
91 using boost::optional;
92 using boost::function;
93 using std::dynamic_pointer_cast;
94
95
96 using namespace dcp;
97 using namespace xercesc;
98
99
100 static
101 string
102 xml_ch_to_string (XMLCh const * a)
103 {
104         char* x = XMLString::transcode(a);
105         string const o(x);
106         XMLString::release(&x);
107         return o;
108 }
109
110
111 class XMLValidationError
112 {
113 public:
114         XMLValidationError (SAXParseException const & e)
115                 : _message (xml_ch_to_string(e.getMessage()))
116                 , _line (e.getLineNumber())
117                 , _column (e.getColumnNumber())
118                 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
119                 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
120         {
121
122         }
123
124         string message () const {
125                 return _message;
126         }
127
128         uint64_t line () const {
129                 return _line;
130         }
131
132         uint64_t column () const {
133                 return _column;
134         }
135
136         string public_id () const {
137                 return _public_id;
138         }
139
140         string system_id () const {
141                 return _system_id;
142         }
143
144 private:
145         string _message;
146         uint64_t _line;
147         uint64_t _column;
148         string _public_id;
149         string _system_id;
150 };
151
152
153 class DCPErrorHandler : public ErrorHandler
154 {
155 public:
156         void warning(const SAXParseException& e)
157         {
158                 maybe_add (XMLValidationError(e));
159         }
160
161         void error(const SAXParseException& e)
162         {
163                 maybe_add (XMLValidationError(e));
164         }
165
166         void fatalError(const SAXParseException& e)
167         {
168                 maybe_add (XMLValidationError(e));
169         }
170
171         void resetErrors() {
172                 _errors.clear ();
173         }
174
175         list<XMLValidationError> errors () const {
176                 return _errors;
177         }
178
179 private:
180         void maybe_add (XMLValidationError e)
181         {
182                 /* XXX: nasty hack */
183                 if (
184                         e.message().find("schema document") != string::npos &&
185                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
186                         ) {
187                         return;
188                 }
189
190                 _errors.push_back (e);
191         }
192
193         list<XMLValidationError> _errors;
194 };
195
196
197 class StringToXMLCh
198 {
199 public:
200         StringToXMLCh (string a)
201         {
202                 _buffer = XMLString::transcode(a.c_str());
203         }
204
205         StringToXMLCh (StringToXMLCh const&) = delete;
206         StringToXMLCh& operator= (StringToXMLCh const&) = delete;
207
208         ~StringToXMLCh ()
209         {
210                 XMLString::release (&_buffer);
211         }
212
213         XMLCh const * get () const {
214                 return _buffer;
215         }
216
217 private:
218         XMLCh* _buffer;
219 };
220
221
222 class LocalFileResolver : public EntityResolver
223 {
224 public:
225         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
226                 : _xsd_dtd_directory (xsd_dtd_directory)
227         {
228                 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
229                  * found without being here.
230                  */
231                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
232                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
233                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
234                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
235                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
236                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
237                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
238                 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
239                 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
240                 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd");
241                 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
242                 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
243                 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
244         }
245
246         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
247         {
248                 if (!system_id) {
249                         return 0;
250                 }
251                 auto system_id_str = xml_ch_to_string (system_id);
252                 auto p = _xsd_dtd_directory;
253                 if (_files.find(system_id_str) == _files.end()) {
254                         p /= system_id_str;
255                 } else {
256                         p /= _files[system_id_str];
257                 }
258                 StringToXMLCh ch (p.string());
259                 return new LocalFileInputSource(ch.get());
260         }
261
262 private:
263         void add (string uri, string file)
264         {
265                 _files[uri] = file;
266         }
267
268         std::map<string, string> _files;
269         boost::filesystem::path _xsd_dtd_directory;
270 };
271
272
273 static void
274 parse (XercesDOMParser& parser, boost::filesystem::path xml)
275 {
276         parser.parse(xml.string().c_str());
277 }
278
279
280 static void
281 parse (XercesDOMParser& parser, string xml)
282 {
283         xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
284         parser.parse(buf);
285 }
286
287
288 template <class T>
289 void
290 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, vector<VerificationNote>& notes)
291 {
292         try {
293                 XMLPlatformUtils::Initialize ();
294         } catch (XMLException& e) {
295                 throw MiscError ("Failed to initialise xerces library");
296         }
297
298         DCPErrorHandler error_handler;
299
300         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
301         {
302                 XercesDOMParser parser;
303                 parser.setValidationScheme(XercesDOMParser::Val_Always);
304                 parser.setDoNamespaces(true);
305                 parser.setDoSchema(true);
306
307                 vector<string> schema;
308                 schema.push_back("xml.xsd");
309                 schema.push_back("xmldsig-core-schema.xsd");
310                 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
311                 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
312                 schema.push_back("SMPTE-429-9-2007-AM.xsd");
313                 schema.push_back("Main-Stereo-Picture-CPL.xsd");
314                 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
315                 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
316                 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
317                 schema.push_back("DCSubtitle.v1.mattsson.xsd");
318                 schema.push_back("DCDMSubtitle-2010.xsd");
319                 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
320                 schema.push_back("SMPTE-429-16.xsd");
321                 schema.push_back("Dolby-2012-AD.xsd");
322                 schema.push_back("SMPTE-429-10-2008.xsd");
323                 schema.push_back("xlink.xsd");
324                 schema.push_back("SMPTE-335-2012.xsd");
325                 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
326                 schema.push_back("isdcf-mca.xsd");
327                 schema.push_back("SMPTE-429-12-2008.xsd");
328
329                 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
330                  * Schemas that are not mentioned in this list are not read, and the things
331                  * they describe are not checked.
332                  */
333                 string locations;
334                 for (auto i: schema) {
335                         locations += String::compose("%1 %1 ", i, i);
336                 }
337
338                 parser.setExternalSchemaLocation(locations.c_str());
339                 parser.setValidationSchemaFullChecking(true);
340                 parser.setErrorHandler(&error_handler);
341
342                 LocalFileResolver resolver (xsd_dtd_directory);
343                 parser.setEntityResolver(&resolver);
344
345                 try {
346                         parser.resetDocumentPool();
347                         parse(parser, xml);
348                 } catch (XMLException& e) {
349                         throw MiscError(xml_ch_to_string(e.getMessage()));
350                 } catch (DOMException& e) {
351                         throw MiscError(xml_ch_to_string(e.getMessage()));
352                 } catch (...) {
353                         throw MiscError("Unknown exception from xerces");
354                 }
355         }
356
357         XMLPlatformUtils::Terminate ();
358
359         for (auto i: error_handler.errors()) {
360                 notes.push_back ({
361                         VerificationNote::Type::ERROR,
362                         VerificationNote::Code::INVALID_XML,
363                         i.message(),
364                         boost::trim_copy(i.public_id() + " " + i.system_id()),
365                         i.line()
366                 });
367         }
368 }
369
370
371 enum class VerifyAssetResult {
372         GOOD,
373         CPL_PKL_DIFFER,
374         BAD
375 };
376
377
378 static VerifyAssetResult
379 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
380 {
381         auto const actual_hash = reel_file_asset->asset_ref()->hash(progress);
382
383         auto pkls = dcp->pkls();
384         /* We've read this DCP in so it must have at least one PKL */
385         DCP_ASSERT (!pkls.empty());
386
387         auto asset = reel_file_asset->asset_ref().asset();
388
389         optional<string> pkl_hash;
390         for (auto i: pkls) {
391                 pkl_hash = i->hash (reel_file_asset->asset_ref()->id());
392                 if (pkl_hash) {
393                         break;
394                 }
395         }
396
397         DCP_ASSERT (pkl_hash);
398
399         auto cpl_hash = reel_file_asset->hash();
400         if (cpl_hash && *cpl_hash != *pkl_hash) {
401                 return VerifyAssetResult::CPL_PKL_DIFFER;
402         }
403
404         if (actual_hash != *pkl_hash) {
405                 return VerifyAssetResult::BAD;
406         }
407
408         return VerifyAssetResult::GOOD;
409 }
410
411
412 void
413 verify_language_tag (string tag, vector<VerificationNote>& notes)
414 {
415         try {
416                 LanguageTag test (tag);
417         } catch (LanguageTagError &) {
418                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, tag});
419         }
420 }
421
422
423 enum class VerifyPictureAssetResult
424 {
425         GOOD,
426         FRAME_NEARLY_TOO_LARGE,
427         BAD,
428 };
429
430
431 int
432 biggest_frame_size (shared_ptr<const MonoPictureFrame> frame)
433 {
434         return frame->size ();
435 }
436
437 int
438 biggest_frame_size (shared_ptr<const StereoPictureFrame> frame)
439 {
440         return max(frame->left()->size(), frame->right()->size());
441 }
442
443
444 template <class A, class R, class F>
445 optional<VerifyPictureAssetResult>
446 verify_picture_asset_type (shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
447 {
448         auto asset = dynamic_pointer_cast<A>(reel_file_asset->asset_ref().asset());
449         if (!asset) {
450                 return optional<VerifyPictureAssetResult>();
451         }
452
453         int biggest_frame = 0;
454         auto reader = asset->start_read ();
455         auto const duration = asset->intrinsic_duration ();
456         for (int64_t i = 0; i < duration; ++i) {
457                 shared_ptr<const F> frame = reader->get_frame (i);
458                 biggest_frame = max(biggest_frame, biggest_frame_size(frame));
459                 progress (float(i) / duration);
460         }
461
462         static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
463         static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
464         if (biggest_frame > max_frame) {
465                 return VerifyPictureAssetResult::BAD;
466         } else if (biggest_frame > risky_frame) {
467                 return VerifyPictureAssetResult::FRAME_NEARLY_TOO_LARGE;
468         }
469
470         return VerifyPictureAssetResult::GOOD;
471 }
472
473
474 static VerifyPictureAssetResult
475 verify_picture_asset (shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
476 {
477         auto r = verify_picture_asset_type<MonoPictureAsset, MonoPictureAssetReader, MonoPictureFrame>(reel_file_asset, progress);
478         if (!r) {
479                 r = verify_picture_asset_type<StereoPictureAsset, StereoPictureAssetReader, StereoPictureFrame>(reel_file_asset, progress);
480         }
481
482         DCP_ASSERT (r);
483         return *r;
484 }
485
486
487 static void
488 verify_main_picture_asset (
489         shared_ptr<const DCP> dcp,
490         shared_ptr<const ReelPictureAsset> reel_asset,
491         function<void (string, optional<boost::filesystem::path>)> stage,
492         function<void (float)> progress,
493         vector<VerificationNote>& notes
494         )
495 {
496         auto asset = reel_asset->asset();
497         auto const file = *asset->file();
498         stage ("Checking picture asset hash", file);
499         auto const r = verify_asset (dcp, reel_asset, progress);
500         switch (r) {
501                 case VerifyAssetResult::BAD:
502                         notes.push_back ({
503                                 VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_PICTURE_HASH, file
504                         });
505                         break;
506                 case VerifyAssetResult::CPL_PKL_DIFFER:
507                         notes.push_back ({
508                                 VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_PICTURE_HASHES, file
509                         });
510                         break;
511                 default:
512                         break;
513         }
514         stage ("Checking picture frame sizes", asset->file());
515         auto const pr = verify_picture_asset (reel_asset, progress);
516         switch (pr) {
517                 case VerifyPictureAssetResult::BAD:
518                         notes.push_back ({
519                                 VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
520                         });
521                         break;
522                 case VerifyPictureAssetResult::FRAME_NEARLY_TOO_LARGE:
523                         notes.push_back ({
524                                 VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
525                         });
526                         break;
527                 default:
528                         break;
529         }
530
531         /* Only flat/scope allowed by Bv2.1 */
532         if (
533                 asset->size() != Size(2048, 858) &&
534                 asset->size() != Size(1998, 1080) &&
535                 asset->size() != Size(4096, 1716) &&
536                 asset->size() != Size(3996, 2160)) {
537                 notes.push_back({
538                         VerificationNote::Type::BV21_ERROR,
539                         VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS,
540                         String::compose("%1x%2", asset->size().width, asset->size().height),
541                         file
542                 });
543         }
544
545         /* Only 24, 25, 48fps allowed for 2K */
546         if (
547                 (asset->size() == Size(2048, 858) || asset->size() == Size(1998, 1080)) &&
548                 (asset->edit_rate() != Fraction(24, 1) && asset->edit_rate() != Fraction(25, 1) && asset->edit_rate() != Fraction(48, 1))
549            ) {
550                 notes.push_back({
551                         VerificationNote::Type::BV21_ERROR,
552                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K,
553                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
554                         file
555                 });
556         }
557
558         if (asset->size() == Size(4096, 1716) || asset->size() == Size(3996, 2160)) {
559                 /* Only 24fps allowed for 4K */
560                 if (asset->edit_rate() != Fraction(24, 1)) {
561                         notes.push_back({
562                                 VerificationNote::Type::BV21_ERROR,
563                                 VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K,
564                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
565                                 file
566                         });
567                 }
568
569                 /* Only 2D allowed for 4K */
570                 if (dynamic_pointer_cast<const StereoPictureAsset>(asset)) {
571                         notes.push_back({
572                                 VerificationNote::Type::BV21_ERROR,
573                                 VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D,
574                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
575                                 file
576                         });
577
578                 }
579         }
580
581 }
582
583
584 static void
585 verify_main_sound_asset (
586         shared_ptr<const DCP> dcp,
587         shared_ptr<const ReelSoundAsset> reel_asset,
588         function<void (string, optional<boost::filesystem::path>)> stage,
589         function<void (float)> progress,
590         vector<VerificationNote>& notes
591         )
592 {
593         auto asset = reel_asset->asset();
594         stage ("Checking sound asset hash", asset->file());
595         auto const r = verify_asset (dcp, reel_asset, progress);
596         switch (r) {
597                 case VerifyAssetResult::BAD:
598                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_SOUND_HASH, *asset->file()});
599                         break;
600                 case VerifyAssetResult::CPL_PKL_DIFFER:
601                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_SOUND_HASHES, *asset->file()});
602                         break;
603                 default:
604                         break;
605         }
606
607         stage ("Checking sound asset metadata", asset->file());
608
609         verify_language_tag (asset->language(), notes);
610         if (asset->sampling_rate() != 48000) {
611                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SOUND_FRAME_RATE, raw_convert<string>(asset->sampling_rate()), *asset->file()});
612         }
613 }
614
615
616 static void
617 verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, vector<VerificationNote>& notes)
618 {
619         /* XXX: is Language compulsory? */
620         if (reel_asset->language()) {
621                 verify_language_tag (*reel_asset->language(), notes);
622         }
623
624         if (!reel_asset->entry_point()) {
625                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT, reel_asset->id() });
626         } else if (reel_asset->entry_point().get()) {
627                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT, reel_asset->id() });
628         }
629 }
630
631
632 static void
633 verify_closed_caption_reel (shared_ptr<const ReelClosedCaptionAsset> reel_asset, vector<VerificationNote>& notes)
634 {
635         /* XXX: is Language compulsory? */
636         if (reel_asset->language()) {
637                 verify_language_tag (*reel_asset->language(), notes);
638         }
639
640         if (!reel_asset->entry_point()) {
641                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
642         } else if (reel_asset->entry_point().get()) {
643                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
644         }
645 }
646
647
648 struct State
649 {
650         boost::optional<string> subtitle_language;
651 };
652
653
654
655 void
656 verify_smpte_subtitle_asset (
657         shared_ptr<const SMPTESubtitleAsset> asset,
658         vector<VerificationNote>& notes,
659         State& state
660         )
661 {
662         if (asset->language()) {
663                 auto const language = *asset->language();
664                 verify_language_tag (language, notes);
665                 if (!state.subtitle_language) {
666                         state.subtitle_language = language;
667                 } else if (state.subtitle_language != language) {
668                         notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES });
669                 }
670         } else {
671                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE, *asset->file() });
672         }
673         auto const size = boost::filesystem::file_size(asset->file().get());
674         if (size > 115 * 1024 * 1024) {
675                 notes.push_back (
676                         { VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES, raw_convert<string>(size), *asset->file() }
677                         );
678         }
679         /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB"
680          * but I'm hoping that checking for the total size of all fonts being <= 10MB will do.
681          */
682         auto fonts = asset->font_data ();
683         int total_size = 0;
684         for (auto i: fonts) {
685                 total_size += i.second.size();
686         }
687         if (total_size > 10 * 1024 * 1024) {
688                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES, raw_convert<string>(total_size), asset->file().get() });
689         }
690
691         if (!asset->start_time()) {
692                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_START_TIME, asset->file().get() });
693         } else if (asset->start_time() != Time()) {
694                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SUBTITLE_START_TIME, asset->file().get() });
695         }
696 }
697
698
699 static void
700 verify_subtitle_asset (
701         shared_ptr<const SubtitleAsset> asset,
702         function<void (string, optional<boost::filesystem::path>)> stage,
703         boost::filesystem::path xsd_dtd_directory,
704         vector<VerificationNote>& notes,
705         State& state
706         )
707 {
708         stage ("Checking subtitle XML", asset->file());
709         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
710          * gets passed through libdcp which may clean up and therefore hide errors.
711          */
712         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
713
714         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
715         if (smpte) {
716                 verify_smpte_subtitle_asset (smpte, notes, state);
717         }
718 }
719
720
721 static void
722 verify_closed_caption_asset (
723         shared_ptr<const SubtitleAsset> asset,
724         function<void (string, optional<boost::filesystem::path>)> stage,
725         boost::filesystem::path xsd_dtd_directory,
726         vector<VerificationNote>& notes,
727         State& state
728         )
729 {
730         verify_subtitle_asset (asset, stage, xsd_dtd_directory, notes, state);
731
732         if (asset->raw_xml().size() > 256 * 1024) {
733                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES, raw_convert<string>(asset->raw_xml().size()), *asset->file()});
734         }
735 }
736
737
738 static
739 void
740 verify_text_timing (
741         vector<shared_ptr<Reel>> reels,
742         optional<int> picture_frame_rate,
743         vector<VerificationNote>& notes,
744         std::function<bool (shared_ptr<Reel>)> check,
745         std::function<string (shared_ptr<Reel>)> xml,
746         std::function<int64_t (shared_ptr<Reel>)> duration
747         )
748 {
749         /* end of last subtitle (in editable units) */
750         optional<int64_t> last_out;
751         auto too_short = false;
752         auto too_close = false;
753         auto too_early = false;
754         /* current reel start time (in editable units) */
755         int64_t reel_offset = 0;
756
757         std::function<void (cxml::ConstNodePtr, int, int, bool)> parse;
758         parse = [&parse, &last_out, &too_short, &too_close, &too_early, &reel_offset](cxml::ConstNodePtr node, int tcr, int pfr, bool first_reel) {
759                 if (node->name() == "Subtitle") {
760                         Time in (node->string_attribute("TimeIn"), tcr);
761                         Time out (node->string_attribute("TimeOut"), tcr);
762                         if (first_reel && in < Time(0, 0, 4, 0, tcr)) {
763                                 too_early = true;
764                         }
765                         auto length = out - in;
766                         if (length.as_editable_units(pfr) < 15) {
767                                 too_short = true;
768                         }
769                         if (last_out) {
770                                 /* XXX: this feels dubious - is it really what Bv2.1 means? */
771                                 auto distance = reel_offset + in.as_editable_units(pfr) - *last_out;
772                                 if (distance >= 0 && distance < 2) {
773                                         too_close = true;
774                                 }
775                         }
776                         last_out = reel_offset + out.as_editable_units(pfr);
777                 } else {
778                         for (auto i: node->node_children()) {
779                                 parse(i, tcr, pfr, first_reel);
780                         }
781                 }
782         };
783
784         for (auto i = 0U; i < reels.size(); ++i) {
785                 if (!check(reels[i])) {
786                         continue;
787                 }
788
789                 /* We need to look at <Subtitle> instances in the XML being checked, so we can't use the subtitles
790                  * read in by libdcp's parser.
791                  */
792
793                 auto doc = make_shared<cxml::Document>("SubtitleReel");
794                 doc->read_string (xml(reels[i]));
795                 auto const tcr = doc->number_child<int>("TimeCodeRate");
796                 parse (doc, tcr, picture_frame_rate.get_value_or(24), i == 0);
797                 reel_offset += duration(reels[i]);
798         }
799
800         if (too_early) {
801                 notes.push_back({
802                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME
803                 });
804         }
805
806         if (too_short) {
807                 notes.push_back ({
808                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_DURATION
809                 });
810         }
811
812         if (too_close) {
813                 notes.push_back ({
814                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_SPACING
815                 });
816         }
817 }
818
819
820 struct LinesCharactersResult
821 {
822         bool warning_length_exceeded = false;
823         bool error_length_exceeded = false;
824         bool line_count_exceeded = false;
825 };
826
827
828 static
829 void
830 verify_text_lines_and_characters (
831         shared_ptr<SubtitleAsset> asset,
832         int warning_length,
833         int error_length,
834         LinesCharactersResult* result
835         )
836 {
837         class Event
838         {
839         public:
840                 Event (Time time_, float position_, int characters_)
841                         : time (time_)
842                         , position (position_)
843                         , characters (characters_)
844                 {}
845
846                 Event (Time time_, shared_ptr<Event> start_)
847                         : time (time_)
848                         , start (start_)
849                 {}
850
851                 Time time;
852                 int position; //< position from 0 at top of screen to 100 at bottom
853                 int characters;
854                 shared_ptr<Event> start;
855         };
856
857         vector<shared_ptr<Event>> events;
858
859         auto position = [](shared_ptr<const SubtitleString> sub) {
860                 switch (sub->v_align()) {
861                 case VAlign::TOP:
862                         return lrintf(sub->v_position() * 100);
863                 case VAlign::CENTER:
864                         return lrintf((0.5f + sub->v_position()) * 100);
865                 case VAlign::BOTTOM:
866                         return lrintf((1.0f - sub->v_position()) * 100);
867                 }
868
869                 return 0L;
870         };
871
872         for (auto j: asset->subtitles()) {
873                 auto text = dynamic_pointer_cast<const SubtitleString>(j);
874                 if (text) {
875                         auto in = make_shared<Event>(text->in(), position(text), text->text().length());
876                         events.push_back(in);
877                         events.push_back(make_shared<Event>(text->out(), in));
878                 }
879         }
880
881         std::sort(events.begin(), events.end(), [](shared_ptr<Event> const& a, shared_ptr<Event>const& b) {
882                 return a->time < b->time;
883         });
884
885         map<int, int> current;
886         for (auto i: events) {
887                 if (current.size() > 3) {
888                         result->line_count_exceeded = true;
889                 }
890                 for (auto j: current) {
891                         if (j.second >= warning_length) {
892                                 result->warning_length_exceeded = true;
893                         }
894                         if (j.second >= error_length) {
895                                 result->error_length_exceeded = true;
896                         }
897                 }
898
899                 if (i->start) {
900                         /* end of a subtitle */
901                         DCP_ASSERT (current.find(i->start->position) != current.end());
902                         if (current[i->start->position] == i->start->characters) {
903                                 current.erase(i->start->position);
904                         } else {
905                                 current[i->start->position] -= i->start->characters;
906                         }
907                 } else {
908                         /* start of a subtitle */
909                         if (current.find(i->position) == current.end()) {
910                                 current[i->position] = i->characters;
911                         } else {
912                                 current[i->position] += i->characters;
913                         }
914                 }
915         }
916 }
917
918
919 static
920 void
921 verify_text_timing (vector<shared_ptr<Reel>> reels, vector<VerificationNote>& notes)
922 {
923         if (reels.empty()) {
924                 return;
925         }
926
927         optional<int> picture_frame_rate;
928         if (reels[0]->main_picture()) {
929                 picture_frame_rate = reels[0]->main_picture()->frame_rate().numerator;
930         }
931
932         if (reels[0]->main_subtitle()) {
933                 verify_text_timing (reels, picture_frame_rate, notes,
934                         [](shared_ptr<Reel> reel) {
935                                 return static_cast<bool>(reel->main_subtitle());
936                         },
937                         [](shared_ptr<Reel> reel) {
938                                 return reel->main_subtitle()->asset()->raw_xml();
939                         },
940                         [](shared_ptr<Reel> reel) {
941                                 return reel->main_subtitle()->actual_duration();
942                         }
943                 );
944         }
945
946         for (auto i = 0U; i < reels[0]->closed_captions().size(); ++i) {
947                 verify_text_timing (reels, picture_frame_rate, notes,
948                         [i](shared_ptr<Reel> reel) {
949                                 return i < reel->closed_captions().size();
950                         },
951                         [i](shared_ptr<Reel> reel) {
952                                 return reel->closed_captions()[i]->asset()->raw_xml();
953                         },
954                         [i](shared_ptr<Reel> reel) {
955                                 return reel->closed_captions()[i]->actual_duration();
956                         }
957                 );
958         }
959 }
960
961
962 void
963 verify_extension_metadata (shared_ptr<CPL> cpl, vector<VerificationNote>& notes)
964 {
965         DCP_ASSERT (cpl->file());
966         cxml::Document doc ("CompositionPlaylist");
967         doc.read_file (cpl->file().get());
968
969         auto missing = false;
970         string malformed;
971
972         if (auto reel_list = doc.node_child("ReelList")) {
973                 auto reels = reel_list->node_children("Reel");
974                 if (!reels.empty()) {
975                         if (auto asset_list = reels[0]->optional_node_child("AssetList")) {
976                                 if (auto metadata = asset_list->optional_node_child("CompositionMetadataAsset")) {
977                                         if (auto extension_list = metadata->optional_node_child("ExtensionMetadataList")) {
978                                                 missing = true;
979                                                 for (auto extension: extension_list->node_children("ExtensionMetadata")) {
980                                                         if (extension->optional_string_attribute("scope").get_value_or("") != "http://isdcf.com/ns/cplmd/app") {
981                                                                 continue;
982                                                         }
983                                                         missing = false;
984                                                         if (auto name = extension->optional_node_child("Name")) {
985                                                                 if (name->content() != "Application") {
986                                                                         malformed = "<Name> should be 'Application'";
987                                                                 }
988                                                         }
989                                                         if (auto property_list = extension->optional_node_child("PropertyList")) {
990                                                                 if (auto property = property_list->optional_node_child("Property")) {
991                                                                         if (auto name = property->optional_node_child("Name")) {
992                                                                                 if (name->content() != "DCP Constraints Profile") {
993                                                                                         malformed = "<Name> property should be 'DCP Constraints Profile'";
994                                                                                 }
995                                                                         }
996                                                                         if (auto value = property->optional_node_child("Value")) {
997                                                                                 if (value->content() != "SMPTE-RDD-52:2020-Bv2.1") {
998                                                                                         malformed = "<Value> property should be 'SMPTE-RDD-52:2020-Bv2.1'";
999                                                                                 }
1000                                                                         }
1001                                                                 }
1002                                                         }
1003                                                 }
1004                                         } else {
1005                                                 missing = true;
1006                                         }
1007                                 }
1008                         }
1009                 }
1010         }
1011
1012         if (missing) {
1013                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_EXTENSION_METADATA, cpl->id(), cpl->file().get()});
1014         } else if (!malformed.empty()) {
1015                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_EXTENSION_METADATA, malformed, cpl->file().get()});
1016         }
1017 }
1018
1019
1020 bool
1021 pkl_has_encrypted_assets (shared_ptr<DCP> dcp, shared_ptr<PKL> pkl)
1022 {
1023         vector<string> encrypted;
1024         for (auto i: dcp->cpls()) {
1025                 for (auto j: i->reel_file_assets()) {
1026                         if (j->asset_ref().resolved()) {
1027                                 /* It's a bit surprising / broken but Interop subtitle assets are represented
1028                                  * in reels by ReelSubtitleAsset which inherits ReelFileAsset, so it's possible for
1029                                  * ReelFileAssets to have assets which are not MXFs.
1030                                  */
1031                                 if (auto asset = dynamic_pointer_cast<MXF>(j->asset_ref().asset())) {
1032                                         if (asset->encrypted()) {
1033                                                 encrypted.push_back(j->asset_ref().id());
1034                                         }
1035                                 }
1036                         }
1037                 }
1038         }
1039
1040         for (auto i: pkl->asset_list()) {
1041                 if (find(encrypted.begin(), encrypted.end(), i->id()) != encrypted.end()) {
1042                         return true;
1043                 }
1044         }
1045
1046         return false;
1047 }
1048
1049
1050 vector<VerificationNote>
1051 dcp::verify (
1052         vector<boost::filesystem::path> directories,
1053         function<void (string, optional<boost::filesystem::path>)> stage,
1054         function<void (float)> progress,
1055         boost::filesystem::path xsd_dtd_directory
1056         )
1057 {
1058         xsd_dtd_directory = boost::filesystem::canonical (xsd_dtd_directory);
1059
1060         vector<VerificationNote> notes;
1061         State state{};
1062
1063         vector<shared_ptr<DCP>> dcps;
1064         for (auto i: directories) {
1065                 dcps.push_back (shared_ptr<DCP> (new DCP (i)));
1066         }
1067
1068         for (auto dcp: dcps) {
1069                 stage ("Checking DCP", dcp->directory());
1070                 try {
1071                         dcp->read (&notes);
1072                 } catch (ReadError& e) {
1073                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1074                 } catch (XMLError& e) {
1075                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1076                 } catch (MXFFileError& e) {
1077                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1078                 } catch (cxml::Error& e) {
1079                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1080                 }
1081
1082                 if (dcp->standard() != Standard::SMPTE) {
1083                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_STANDARD});
1084                 }
1085
1086                 for (auto cpl: dcp->cpls()) {
1087                         stage ("Checking CPL", cpl->file());
1088                         validate_xml (cpl->file().get(), xsd_dtd_directory, notes);
1089
1090                         if (cpl->any_encrypted() && !cpl->all_encrypted()) {
1091                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::PARTIALLY_ENCRYPTED});
1092                         }
1093
1094                         for (auto const& i: cpl->additional_subtitle_languages()) {
1095                                 verify_language_tag (i, notes);
1096                         }
1097
1098                         if (cpl->release_territory()) {
1099                                 if (!cpl->release_territory_scope() || cpl->release_territory_scope().get() != "http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata#scope/release-territory/UNM49") {
1100                                         auto terr = cpl->release_territory().get();
1101                                         /* Must be a valid region tag, or "001" */
1102                                         try {
1103                                                 LanguageTag::RegionSubtag test (terr);
1104                                         } catch (...) {
1105                                                 if (terr != "001") {
1106                                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, terr});
1107                                                 }
1108                                         }
1109                                 }
1110                         }
1111
1112                         if (dcp->standard() == Standard::SMPTE) {
1113                                 if (!cpl->annotation_text()) {
1114                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1115                                 } else if (cpl->annotation_text().get() != cpl->content_title_text()) {
1116                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1117                                 }
1118                         }
1119
1120                         for (auto i: dcp->pkls()) {
1121                                 /* Check that the CPL's hash corresponds to the PKL */
1122                                 optional<string> h = i->hash(cpl->id());
1123                                 if (h && make_digest(ArrayData(*cpl->file())) != *h) {
1124                                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_CPL_HASHES, cpl->id(), cpl->file().get()});
1125                                 }
1126
1127                                 /* Check that any PKL with a single CPL has its AnnotationText the same as the CPL's ContentTitleText */
1128                                 optional<string> required_annotation_text;
1129                                 for (auto j: i->asset_list()) {
1130                                         /* See if this is a CPL */
1131                                         for (auto k: dcp->cpls()) {
1132                                                 if (j->id() == k->id()) {
1133                                                         if (!required_annotation_text) {
1134                                                                 /* First CPL we have found; this is the required AnnotationText unless we find another */
1135                                                                 required_annotation_text = cpl->content_title_text();
1136                                                         } else {
1137                                                                 /* There's more than one CPL so we don't care what the PKL's AnnotationText is */
1138                                                                 required_annotation_text = boost::none;
1139                                                         }
1140                                                 }
1141                                         }
1142                                 }
1143
1144                                 if (required_annotation_text && i->annotation_text() != required_annotation_text) {
1145                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL, i->id(), i->file().get()});
1146                                 }
1147                         }
1148
1149                         /* set to true if any reel has a MainSubtitle */
1150                         auto have_main_subtitle = false;
1151                         /* set to true if any reel has no MainSubtitle */
1152                         auto have_no_main_subtitle = false;
1153                         /* fewest number of closed caption assets seen in a reel */
1154                         size_t fewest_closed_captions = SIZE_MAX;
1155                         /* most number of closed caption assets seen in a reel */
1156                         size_t most_closed_captions = 0;
1157                         map<Marker, Time> markers_seen;
1158
1159                         for (auto reel: cpl->reels()) {
1160                                 stage ("Checking reel", optional<boost::filesystem::path>());
1161
1162                                 for (auto i: reel->assets()) {
1163                                         if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1164                                                 notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_DURATION, i->id()});
1165                                         }
1166                                         if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1167                                                 notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_INTRINSIC_DURATION, i->id()});
1168                                         }
1169                                         auto file_asset = dynamic_pointer_cast<ReelFileAsset>(i);
1170                                         if (file_asset && !file_asset->hash()) {
1171                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_HASH, i->id()});
1172                                         }
1173                                 }
1174
1175                                 if (dcp->standard() == Standard::SMPTE) {
1176                                         boost::optional<int64_t> duration;
1177                                         for (auto i: reel->assets()) {
1178                                                 if (!duration) {
1179                                                         duration = i->actual_duration();
1180                                                 } else if (*duration != i->actual_duration()) {
1181                                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_ASSET_DURATION});
1182                                                         break;
1183                                                 }
1184                                         }
1185                                 }
1186
1187                                 if (reel->main_picture()) {
1188                                         /* Check reel stuff */
1189                                         auto const frame_rate = reel->main_picture()->frame_rate();
1190                                         if (frame_rate.denominator != 1 ||
1191                                             (frame_rate.numerator != 24 &&
1192                                              frame_rate.numerator != 25 &&
1193                                              frame_rate.numerator != 30 &&
1194                                              frame_rate.numerator != 48 &&
1195                                              frame_rate.numerator != 50 &&
1196                                              frame_rate.numerator != 60 &&
1197                                              frame_rate.numerator != 96)) {
1198                                                 notes.push_back ({
1199                                                         VerificationNote::Type::ERROR,
1200                                                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE,
1201                                                         String::compose("%1/%2", frame_rate.numerator, frame_rate.denominator)
1202                                                 });
1203                                         }
1204                                         /* Check asset */
1205                                         if (reel->main_picture()->asset_ref().resolved()) {
1206                                                 verify_main_picture_asset (dcp, reel->main_picture(), stage, progress, notes);
1207                                         }
1208                                 }
1209
1210                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
1211                                         verify_main_sound_asset (dcp, reel->main_sound(), stage, progress, notes);
1212                                 }
1213
1214                                 if (reel->main_subtitle()) {
1215                                         verify_main_subtitle_reel (reel->main_subtitle(), notes);
1216                                         if (reel->main_subtitle()->asset_ref().resolved()) {
1217                                                 verify_subtitle_asset (reel->main_subtitle()->asset(), stage, xsd_dtd_directory, notes, state);
1218                                         }
1219                                         have_main_subtitle = true;
1220                                 } else {
1221                                         have_no_main_subtitle = true;
1222                                 }
1223
1224                                 for (auto i: reel->closed_captions()) {
1225                                         verify_closed_caption_reel (i, notes);
1226                                         if (i->asset_ref().resolved()) {
1227                                                 verify_closed_caption_asset (i->asset(), stage, xsd_dtd_directory, notes, state);
1228                                         }
1229                                 }
1230
1231                                 if (reel->main_markers()) {
1232                                         for (auto const& i: reel->main_markers()->get()) {
1233                                                 markers_seen.insert (i);
1234                                         }
1235                                 }
1236
1237                                 fewest_closed_captions = std::min (fewest_closed_captions, reel->closed_captions().size());
1238                                 most_closed_captions = std::max (most_closed_captions, reel->closed_captions().size());
1239                         }
1240
1241                         if (dcp->standard() == Standard::SMPTE) {
1242
1243                                 if (have_main_subtitle && have_no_main_subtitle) {
1244                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS});
1245                                 }
1246
1247                                 if (fewest_closed_captions != most_closed_captions) {
1248                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS});
1249                                 }
1250
1251                                 if (cpl->content_kind() == ContentKind::FEATURE) {
1252                                         if (markers_seen.find(Marker::FFEC) == markers_seen.end()) {
1253                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFEC_IN_FEATURE});
1254                                         }
1255                                         if (markers_seen.find(Marker::FFMC) == markers_seen.end()) {
1256                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFMC_IN_FEATURE});
1257                                         }
1258                                 }
1259
1260                                 auto ffoc = markers_seen.find(Marker::FFOC);
1261                                 if (ffoc == markers_seen.end()) {
1262                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_FFOC});
1263                                 } else if (ffoc->second.e != 1) {
1264                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_FFOC, raw_convert<string>(ffoc->second.e)});
1265                                 }
1266
1267                                 auto lfoc = markers_seen.find(Marker::LFOC);
1268                                 if (lfoc == markers_seen.end()) {
1269                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_LFOC});
1270                                 } else {
1271                                         auto lfoc_time = lfoc->second.as_editable_units(lfoc->second.tcr);
1272                                         if (lfoc_time != (cpl->reels().back()->duration() - 1)) {
1273                                                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_LFOC, raw_convert<string>(lfoc_time)});
1274                                         }
1275                                 }
1276
1277                                 verify_text_timing (cpl->reels(), notes);
1278
1279                                 LinesCharactersResult result;
1280                                 for (auto reel: cpl->reels()) {
1281                                         if (reel->main_subtitle() && reel->main_subtitle()->asset()) {
1282                                                 verify_text_lines_and_characters (reel->main_subtitle()->asset(), 52, 79, &result);
1283                                         }
1284                                 }
1285
1286                                 if (result.line_count_exceeded) {
1287                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT});
1288                                 }
1289                                 if (result.error_length_exceeded) {
1290                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH});
1291                                 } else if (result.warning_length_exceeded) {
1292                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH});
1293                                 }
1294
1295                                 result = LinesCharactersResult();
1296                                 for (auto reel: cpl->reels()) {
1297                                         for (auto i: reel->closed_captions()) {
1298                                                 if (i->asset()) {
1299                                                         verify_text_lines_and_characters (i->asset(), 32, 32, &result);
1300                                                 }
1301                                         }
1302                                 }
1303
1304                                 if (result.line_count_exceeded) {
1305                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT});
1306                                 }
1307                                 if (result.error_length_exceeded) {
1308                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH});
1309                                 }
1310
1311                                 if (!cpl->full_content_title_text()) {
1312                                         /* Since FullContentTitleText is assumed always to exist if there's a CompositionMetadataAsset we
1313                                          * can use it as a proxy for CompositionMetadataAsset's existence.
1314                                          */
1315                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA, cpl->id(), cpl->file().get()});
1316                                 } else if (!cpl->version_number()) {
1317                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER, cpl->id(), cpl->file().get()});
1318                                 }
1319
1320                                 verify_extension_metadata (cpl, notes);
1321
1322                                 if (cpl->any_encrypted()) {
1323                                         cxml::Document doc ("CompositionPlaylist");
1324                                         DCP_ASSERT (cpl->file());
1325                                         doc.read_file (cpl->file().get());
1326                                         if (!doc.optional_node_child("Signature")) {
1327                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT, cpl->id(), cpl->file().get()});
1328                                         }
1329                                 }
1330                         }
1331                 }
1332
1333                 for (auto pkl: dcp->pkls()) {
1334                         stage ("Checking PKL", pkl->file());
1335                         validate_xml (pkl->file().get(), xsd_dtd_directory, notes);
1336                         if (pkl_has_encrypted_assets(dcp, pkl)) {
1337                                 cxml::Document doc ("PackingList");
1338                                 doc.read_file (pkl->file().get());
1339                                 if (!doc.optional_node_child("Signature")) {
1340                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT, pkl->id(), pkl->file().get()});
1341                                 }
1342                         }
1343                 }
1344
1345                 if (dcp->asset_map_path()) {
1346                         stage ("Checking ASSETMAP", dcp->asset_map_path().get());
1347                         validate_xml (dcp->asset_map_path().get(), xsd_dtd_directory, notes);
1348                 } else {
1349                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_ASSETMAP});
1350                 }
1351         }
1352
1353         return notes;
1354 }
1355
1356
1357 string
1358 dcp::note_to_string (VerificationNote note)
1359 {
1360         /** These strings should say what is wrong, incorporating any extra details (ID, filenames etc.).
1361          *
1362          *  e.g. "ClosedCaption asset has no <EntryPoint> tag.",
1363          *  not "ClosedCaption assets must have an <EntryPoint> tag."
1364          *
1365          *  It's OK to use XML tag names where they are clear.
1366          *  If both ID and filename are available, use only the ID.
1367          *  End messages with a full stop.
1368          *  Messages should not mention whether or not their errors are a part of Bv2.1.
1369          */
1370         switch (note.code()) {
1371         case VerificationNote::Code::FAILED_READ:
1372                 return *note.note();
1373         case VerificationNote::Code::MISMATCHED_CPL_HASHES:
1374                 return String::compose("The hash of the CPL %1 in the PKL does not agree with the CPL file.", note.note().get());
1375         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE:
1376                 return String::compose("The picture in a reel has an invalid frame rate %1.", note.note().get());
1377         case VerificationNote::Code::INCORRECT_PICTURE_HASH:
1378                 return String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
1379         case VerificationNote::Code::MISMATCHED_PICTURE_HASHES:
1380                 return String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
1381         case VerificationNote::Code::INCORRECT_SOUND_HASH:
1382                 return String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
1383         case VerificationNote::Code::MISMATCHED_SOUND_HASHES:
1384                 return String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
1385         case VerificationNote::Code::EMPTY_ASSET_PATH:
1386                 return "The asset map contains an empty asset path.";
1387         case VerificationNote::Code::MISSING_ASSET:
1388                 return String::compose("The file %1 for an asset in the asset map cannot be found.", note.file()->filename());
1389         case VerificationNote::Code::MISMATCHED_STANDARD:
1390                 return "The DCP contains both SMPTE and Interop parts.";
1391         case VerificationNote::Code::INVALID_XML:
1392                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
1393         case VerificationNote::Code::MISSING_ASSETMAP:
1394                 return "No ASSETMAP or ASSETMAP.xml was found.";
1395         case VerificationNote::Code::INVALID_INTRINSIC_DURATION:
1396                 return String::compose("The intrinsic duration of the asset %1 is less than 1 second long.", note.note().get());
1397         case VerificationNote::Code::INVALID_DURATION:
1398                 return String::compose("The duration of the asset %1 is less than 1 second long.", note.note().get());
1399         case VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1400                 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
1401         case VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1402                 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
1403         case VerificationNote::Code::EXTERNAL_ASSET:
1404                 return String::compose("The asset %1 that this DCP refers to is not included in the DCP.  It may be a VF.", note.note().get());
1405         case VerificationNote::Code::INVALID_STANDARD:
1406                 return "This DCP does not use the SMPTE standard.";
1407         case VerificationNote::Code::INVALID_LANGUAGE:
1408                 return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
1409         case VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS:
1410                 return String::compose("The size %1 of picture asset %2 is not allowed.", note.note().get(), note.file()->filename());
1411         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K:
1412                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 2K DCPs.", note.note().get(), note.file()->filename());
1413         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K:
1414                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 4K DCPs.", note.note().get(), note.file()->filename());
1415         case VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D:
1416                 return "3D 4K DCPs are not allowed.";
1417         case VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES:
1418                 return String::compose("The size %1 of the closed caption asset %2 is larger than the 256KB maximum.", note.note().get(), note.file()->filename());
1419         case VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES:
1420                 return String::compose("The size %1 of the timed text asset %2 is larger than the 115MB maximum.", note.note().get(), note.file()->filename());
1421         case VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES:
1422                 return String::compose("The size %1 of the fonts in timed text asset %2 is larger than the 10MB maximum.", note.note().get(), note.file()->filename());
1423         case VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE:
1424                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <Language> tag.", note.file()->filename());
1425         case VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES:
1426                 return "Some subtitle assets have different <Language> tags than others";
1427         case VerificationNote::Code::MISSING_SUBTITLE_START_TIME:
1428                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <StartTime> tag.", note.file()->filename());
1429         case VerificationNote::Code::INVALID_SUBTITLE_START_TIME:
1430                 return String::compose("The XML for a SMPTE subtitle asset %1 has a non-zero <StartTime> tag.", note.file()->filename());
1431         case VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME:
1432                 return "The first subtitle or closed caption is less than 4 seconds from the start of the DCP.";
1433         case VerificationNote::Code::INVALID_SUBTITLE_DURATION:
1434                 return "At least one subtitle lasts less than 15 frames.";
1435         case VerificationNote::Code::INVALID_SUBTITLE_SPACING:
1436                 return "At least one pair of subtitles is separated by less than 2 frames.";
1437         case VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT:
1438                 return "There are more than 3 subtitle lines in at least one place in the DCP.";
1439         case VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH:
1440                 return "There are more than 52 characters in at least one subtitle line.";
1441         case VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH:
1442                 return "There are more than 79 characters in at least one subtitle line.";
1443         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT:
1444                 return "There are more than 3 closed caption lines in at least one place.";
1445         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH:
1446                 return "There are more than 32 characters in at least one closed caption line.";
1447         case VerificationNote::Code::INVALID_SOUND_FRAME_RATE:
1448                 return String::compose("The sound asset %1 has a sampling rate of %2", note.file()->filename(), note.note().get());
1449         case VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT:
1450                 return String::compose("The CPL %1 has no <AnnotationText> tag.", note.note().get());
1451         case VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT:
1452                 return String::compose("The CPL %1 has an <AnnotationText> which differs from its <ContentTitleText>", note.note().get());
1453         case VerificationNote::Code::MISMATCHED_ASSET_DURATION:
1454                 return "All assets in a reel do not have the same duration.";
1455         case VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS:
1456                 return "At least one reel contains a subtitle asset, but some reel(s) do not";
1457         case VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS:
1458                 return "At least one reel has closed captions, but reels have different numbers of closed caption assets.";
1459         case VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT:
1460                 return String::compose("The subtitle asset %1 has no <EntryPoint> tag.", note.note().get());
1461         case VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT:
1462                 return String::compose("The subtitle asset %1 has an <EntryPoint> other than 0.", note.note().get());
1463         case VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT:
1464                 return String::compose("The closed caption asset %1 has no <EntryPoint> tag.", note.note().get());
1465         case VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT:
1466                 return String::compose("The closed caption asset %1 has an <EntryPoint> other than 0.", note.note().get());
1467         case VerificationNote::Code::MISSING_HASH:
1468                 return String::compose("The asset %1 has no <Hash> tag in the CPL.", note.note().get());
1469         case VerificationNote::Code::MISSING_FFEC_IN_FEATURE:
1470                 return "The DCP is marked as a Feature but there is no FFEC (first frame of end credits) marker";
1471         case VerificationNote::Code::MISSING_FFMC_IN_FEATURE:
1472                 return "The DCP is marked as a Feature but there is no FFMC (first frame of moving credits) marker";
1473         case VerificationNote::Code::MISSING_FFOC:
1474                 return "There should be a FFOC (first frame of content) marker";
1475         case VerificationNote::Code::MISSING_LFOC:
1476                 return "There should be a LFOC (last frame of content) marker";
1477         case VerificationNote::Code::INCORRECT_FFOC:
1478                 return String::compose("The FFOC marker is %1 instead of 1", note.note().get());
1479         case VerificationNote::Code::INCORRECT_LFOC:
1480                 return String::compose("The LFOC marker is %1 instead of 1 less than the duration of the last reel.", note.note().get());
1481         case VerificationNote::Code::MISSING_CPL_METADATA:
1482                 return String::compose("The CPL %1 has no <CompositionMetadataAsset> tag.", note.note().get());
1483         case VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER:
1484                 return String::compose("The CPL %1 has no <VersionNumber> in its <CompositionMetadataAsset>.", note.note().get());
1485         case VerificationNote::Code::MISSING_EXTENSION_METADATA:
1486                 return String::compose("The CPL %1 has no <ExtensionMetadata> in its <CompositionMetadataAsset>.", note.note().get());
1487         case VerificationNote::Code::INVALID_EXTENSION_METADATA:
1488                 return String::compose("The CPL %1 has a malformed <ExtensionMetadata> (%2).", note.file()->filename(), note.note().get());
1489         case VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT:
1490                 return String::compose("The CPL %1, which has encrypted content, is not signed.", note.note().get());
1491         case VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT:
1492                 return String::compose("The PKL %1, which has encrypted content, is not signed.", note.note().get());
1493         case VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL:
1494                 return String::compose("The PKL %1 has only one CPL but its <AnnotationText> does not match the CPL's <ContentTitleText>", note.note().get());
1495         case VerificationNote::Code::PARTIALLY_ENCRYPTED:
1496                 return "Some assets are encrypted but some are not";
1497         }
1498
1499         return "";
1500 }
1501
1502
1503 bool
1504 dcp::operator== (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
1505 {
1506         return a.type() == b.type() && a.code() == b.code() && a.note() == b.note() && a.file() == b.file() && a.line() == b.line();
1507 }
1508
1509
1510 std::ostream&
1511 dcp::operator<< (std::ostream& s, dcp::VerificationNote const& note)
1512 {
1513         s << note_to_string (note);
1514         if (note.note()) {
1515                 s << " [" << note.note().get() << "]";
1516         }
1517         if (note.file()) {
1518                 s << " [" << note.file().get() << "]";
1519         }
1520         if (note.line()) {
1521                 s << " [" << note.line().get() << "]";
1522         }
1523         return s;
1524 }
1525