bddf1683cc1c8b926fcfdf216da2d212f21933ae
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2021 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34
35 /** @file  src/verify.cc
36  *  @brief dcp::verify() method and associated code
37  */
38
39
40 #include "verify.h"
41 #include "dcp.h"
42 #include "cpl.h"
43 #include "reel.h"
44 #include "reel_closed_caption_asset.h"
45 #include "reel_picture_asset.h"
46 #include "reel_sound_asset.h"
47 #include "reel_subtitle_asset.h"
48 #include "interop_subtitle_asset.h"
49 #include "mono_picture_asset.h"
50 #include "mono_picture_frame.h"
51 #include "stereo_picture_asset.h"
52 #include "stereo_picture_frame.h"
53 #include "exceptions.h"
54 #include "compose.hpp"
55 #include "raw_convert.h"
56 #include "reel_markers_asset.h"
57 #include "smpte_subtitle_asset.h"
58 #include <xercesc/util/PlatformUtils.hpp>
59 #include <xercesc/parsers/XercesDOMParser.hpp>
60 #include <xercesc/parsers/AbstractDOMParser.hpp>
61 #include <xercesc/sax/HandlerBase.hpp>
62 #include <xercesc/dom/DOMImplementation.hpp>
63 #include <xercesc/dom/DOMImplementationLS.hpp>
64 #include <xercesc/dom/DOMImplementationRegistry.hpp>
65 #include <xercesc/dom/DOMLSParser.hpp>
66 #include <xercesc/dom/DOMException.hpp>
67 #include <xercesc/dom/DOMDocument.hpp>
68 #include <xercesc/dom/DOMNodeList.hpp>
69 #include <xercesc/dom/DOMError.hpp>
70 #include <xercesc/dom/DOMLocator.hpp>
71 #include <xercesc/dom/DOMNamedNodeMap.hpp>
72 #include <xercesc/dom/DOMAttr.hpp>
73 #include <xercesc/dom/DOMErrorHandler.hpp>
74 #include <xercesc/framework/LocalFileInputSource.hpp>
75 #include <xercesc/framework/MemBufInputSource.hpp>
76 #include <boost/algorithm/string.hpp>
77 #include <map>
78 #include <vector>
79 #include <iostream>
80
81
82 using std::list;
83 using std::vector;
84 using std::string;
85 using std::cout;
86 using std::map;
87 using std::max;
88 using std::set;
89 using std::shared_ptr;
90 using std::make_shared;
91 using boost::optional;
92 using boost::function;
93 using std::dynamic_pointer_cast;
94
95
96 using namespace dcp;
97 using namespace xercesc;
98
99
100 static
101 string
102 xml_ch_to_string (XMLCh const * a)
103 {
104         char* x = XMLString::transcode(a);
105         string const o(x);
106         XMLString::release(&x);
107         return o;
108 }
109
110
111 class XMLValidationError
112 {
113 public:
114         XMLValidationError (SAXParseException const & e)
115                 : _message (xml_ch_to_string(e.getMessage()))
116                 , _line (e.getLineNumber())
117                 , _column (e.getColumnNumber())
118                 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
119                 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
120         {
121
122         }
123
124         string message () const {
125                 return _message;
126         }
127
128         uint64_t line () const {
129                 return _line;
130         }
131
132         uint64_t column () const {
133                 return _column;
134         }
135
136         string public_id () const {
137                 return _public_id;
138         }
139
140         string system_id () const {
141                 return _system_id;
142         }
143
144 private:
145         string _message;
146         uint64_t _line;
147         uint64_t _column;
148         string _public_id;
149         string _system_id;
150 };
151
152
153 class DCPErrorHandler : public ErrorHandler
154 {
155 public:
156         void warning(const SAXParseException& e)
157         {
158                 maybe_add (XMLValidationError(e));
159         }
160
161         void error(const SAXParseException& e)
162         {
163                 maybe_add (XMLValidationError(e));
164         }
165
166         void fatalError(const SAXParseException& e)
167         {
168                 maybe_add (XMLValidationError(e));
169         }
170
171         void resetErrors() {
172                 _errors.clear ();
173         }
174
175         list<XMLValidationError> errors () const {
176                 return _errors;
177         }
178
179 private:
180         void maybe_add (XMLValidationError e)
181         {
182                 /* XXX: nasty hack */
183                 if (
184                         e.message().find("schema document") != string::npos &&
185                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
186                         ) {
187                         return;
188                 }
189
190                 _errors.push_back (e);
191         }
192
193         list<XMLValidationError> _errors;
194 };
195
196
197 class StringToXMLCh
198 {
199 public:
200         StringToXMLCh (string a)
201         {
202                 _buffer = XMLString::transcode(a.c_str());
203         }
204
205         StringToXMLCh (StringToXMLCh const&) = delete;
206         StringToXMLCh& operator= (StringToXMLCh const&) = delete;
207
208         ~StringToXMLCh ()
209         {
210                 XMLString::release (&_buffer);
211         }
212
213         XMLCh const * get () const {
214                 return _buffer;
215         }
216
217 private:
218         XMLCh* _buffer;
219 };
220
221
222 class LocalFileResolver : public EntityResolver
223 {
224 public:
225         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
226                 : _xsd_dtd_directory (xsd_dtd_directory)
227         {
228                 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
229                  * found without being here.
230                  */
231                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
232                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
233                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
234                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
235                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
236                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
237                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
238                 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
239                 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
240                 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd");
241                 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
242                 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
243                 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
244         }
245
246         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
247         {
248                 if (!system_id) {
249                         return 0;
250                 }
251                 auto system_id_str = xml_ch_to_string (system_id);
252                 auto p = _xsd_dtd_directory;
253                 if (_files.find(system_id_str) == _files.end()) {
254                         p /= system_id_str;
255                 } else {
256                         p /= _files[system_id_str];
257                 }
258                 StringToXMLCh ch (p.string());
259                 return new LocalFileInputSource(ch.get());
260         }
261
262 private:
263         void add (string uri, string file)
264         {
265                 _files[uri] = file;
266         }
267
268         std::map<string, string> _files;
269         boost::filesystem::path _xsd_dtd_directory;
270 };
271
272
273 static void
274 parse (XercesDOMParser& parser, boost::filesystem::path xml)
275 {
276         parser.parse(xml.string().c_str());
277 }
278
279
280 static void
281 parse (XercesDOMParser& parser, string xml)
282 {
283         xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
284         parser.parse(buf);
285 }
286
287
288 template <class T>
289 void
290 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, vector<VerificationNote>& notes)
291 {
292         try {
293                 XMLPlatformUtils::Initialize ();
294         } catch (XMLException& e) {
295                 throw MiscError ("Failed to initialise xerces library");
296         }
297
298         DCPErrorHandler error_handler;
299
300         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
301         {
302                 XercesDOMParser parser;
303                 parser.setValidationScheme(XercesDOMParser::Val_Always);
304                 parser.setDoNamespaces(true);
305                 parser.setDoSchema(true);
306
307                 vector<string> schema;
308                 schema.push_back("xml.xsd");
309                 schema.push_back("xmldsig-core-schema.xsd");
310                 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
311                 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
312                 schema.push_back("SMPTE-429-9-2007-AM.xsd");
313                 schema.push_back("Main-Stereo-Picture-CPL.xsd");
314                 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
315                 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
316                 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
317                 schema.push_back("DCSubtitle.v1.mattsson.xsd");
318                 schema.push_back("DCDMSubtitle-2010.xsd");
319                 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
320                 schema.push_back("SMPTE-429-16.xsd");
321                 schema.push_back("Dolby-2012-AD.xsd");
322                 schema.push_back("SMPTE-429-10-2008.xsd");
323                 schema.push_back("xlink.xsd");
324                 schema.push_back("SMPTE-335-2012.xsd");
325                 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
326                 schema.push_back("isdcf-mca.xsd");
327                 schema.push_back("SMPTE-429-12-2008.xsd");
328
329                 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
330                  * Schemas that are not mentioned in this list are not read, and the things
331                  * they describe are not checked.
332                  */
333                 string locations;
334                 for (auto i: schema) {
335                         locations += String::compose("%1 %1 ", i, i);
336                 }
337
338                 parser.setExternalSchemaLocation(locations.c_str());
339                 parser.setValidationSchemaFullChecking(true);
340                 parser.setErrorHandler(&error_handler);
341
342                 LocalFileResolver resolver (xsd_dtd_directory);
343                 parser.setEntityResolver(&resolver);
344
345                 try {
346                         parser.resetDocumentPool();
347                         parse(parser, xml);
348                 } catch (XMLException& e) {
349                         throw MiscError(xml_ch_to_string(e.getMessage()));
350                 } catch (DOMException& e) {
351                         throw MiscError(xml_ch_to_string(e.getMessage()));
352                 } catch (...) {
353                         throw MiscError("Unknown exception from xerces");
354                 }
355         }
356
357         XMLPlatformUtils::Terminate ();
358
359         for (auto i: error_handler.errors()) {
360                 notes.push_back ({
361                         VerificationNote::Type::ERROR,
362                         VerificationNote::Code::INVALID_XML,
363                         i.message(),
364                         boost::trim_copy(i.public_id() + " " + i.system_id()),
365                         i.line()
366                 });
367         }
368 }
369
370
371 enum class VerifyAssetResult {
372         GOOD,
373         CPL_PKL_DIFFER,
374         BAD
375 };
376
377
378 static VerifyAssetResult
379 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
380 {
381         auto const actual_hash = reel_file_asset->asset_ref()->hash(progress);
382
383         auto pkls = dcp->pkls();
384         /* We've read this DCP in so it must have at least one PKL */
385         DCP_ASSERT (!pkls.empty());
386
387         auto asset = reel_file_asset->asset_ref().asset();
388
389         optional<string> pkl_hash;
390         for (auto i: pkls) {
391                 pkl_hash = i->hash (reel_file_asset->asset_ref()->id());
392                 if (pkl_hash) {
393                         break;
394                 }
395         }
396
397         DCP_ASSERT (pkl_hash);
398
399         auto cpl_hash = reel_file_asset->hash();
400         if (cpl_hash && *cpl_hash != *pkl_hash) {
401                 return VerifyAssetResult::CPL_PKL_DIFFER;
402         }
403
404         if (actual_hash != *pkl_hash) {
405                 return VerifyAssetResult::BAD;
406         }
407
408         return VerifyAssetResult::GOOD;
409 }
410
411
412 void
413 verify_language_tag (string tag, vector<VerificationNote>& notes)
414 {
415         try {
416                 LanguageTag test (tag);
417         } catch (LanguageTagError &) {
418                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, tag});
419         }
420 }
421
422
423 enum class VerifyPictureAssetResult
424 {
425         GOOD,
426         FRAME_NEARLY_TOO_LARGE,
427         BAD,
428 };
429
430
431 int
432 biggest_frame_size (shared_ptr<const MonoPictureFrame> frame)
433 {
434         return frame->size ();
435 }
436
437 int
438 biggest_frame_size (shared_ptr<const StereoPictureFrame> frame)
439 {
440         return max(frame->left()->size(), frame->right()->size());
441 }
442
443
444 template <class A, class R, class F>
445 optional<VerifyPictureAssetResult>
446 verify_picture_asset_type (shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
447 {
448         auto asset = dynamic_pointer_cast<A>(reel_file_asset->asset_ref().asset());
449         if (!asset) {
450                 return optional<VerifyPictureAssetResult>();
451         }
452
453         int biggest_frame = 0;
454         auto reader = asset->start_read ();
455         auto const duration = asset->intrinsic_duration ();
456         for (int64_t i = 0; i < duration; ++i) {
457                 shared_ptr<const F> frame = reader->get_frame (i);
458                 biggest_frame = max(biggest_frame, biggest_frame_size(frame));
459                 progress (float(i) / duration);
460         }
461
462         static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
463         static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
464         if (biggest_frame > max_frame) {
465                 return VerifyPictureAssetResult::BAD;
466         } else if (biggest_frame > risky_frame) {
467                 return VerifyPictureAssetResult::FRAME_NEARLY_TOO_LARGE;
468         }
469
470         return VerifyPictureAssetResult::GOOD;
471 }
472
473
474 static VerifyPictureAssetResult
475 verify_picture_asset (shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
476 {
477         auto r = verify_picture_asset_type<MonoPictureAsset, MonoPictureAssetReader, MonoPictureFrame>(reel_file_asset, progress);
478         if (!r) {
479                 r = verify_picture_asset_type<StereoPictureAsset, StereoPictureAssetReader, StereoPictureFrame>(reel_file_asset, progress);
480         }
481
482         DCP_ASSERT (r);
483         return *r;
484 }
485
486
487 static void
488 verify_main_picture_asset (
489         shared_ptr<const DCP> dcp,
490         shared_ptr<const ReelPictureAsset> reel_asset,
491         function<void (string, optional<boost::filesystem::path>)> stage,
492         function<void (float)> progress,
493         vector<VerificationNote>& notes
494         )
495 {
496         auto asset = reel_asset->asset();
497         auto const file = *asset->file();
498         stage ("Checking picture asset hash", file);
499         auto const r = verify_asset (dcp, reel_asset, progress);
500         switch (r) {
501                 case VerifyAssetResult::BAD:
502                         notes.push_back ({
503                                 VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_PICTURE_HASH, file
504                         });
505                         break;
506                 case VerifyAssetResult::CPL_PKL_DIFFER:
507                         notes.push_back ({
508                                 VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_PICTURE_HASHES, file
509                         });
510                         break;
511                 default:
512                         break;
513         }
514         stage ("Checking picture frame sizes", asset->file());
515         auto const pr = verify_picture_asset (reel_asset, progress);
516         switch (pr) {
517                 case VerifyPictureAssetResult::BAD:
518                         notes.push_back ({
519                                 VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
520                         });
521                         break;
522                 case VerifyPictureAssetResult::FRAME_NEARLY_TOO_LARGE:
523                         notes.push_back ({
524                                 VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
525                         });
526                         break;
527                 default:
528                         break;
529         }
530
531         /* Only flat/scope allowed by Bv2.1 */
532         if (
533                 asset->size() != Size(2048, 858) &&
534                 asset->size() != Size(1998, 1080) &&
535                 asset->size() != Size(4096, 1716) &&
536                 asset->size() != Size(3996, 2160)) {
537                 notes.push_back({
538                         VerificationNote::Type::BV21_ERROR,
539                         VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS,
540                         String::compose("%1x%2", asset->size().width, asset->size().height),
541                         file
542                 });
543         }
544
545         /* Only 24, 25, 48fps allowed for 2K */
546         if (
547                 (asset->size() == Size(2048, 858) || asset->size() == Size(1998, 1080)) &&
548                 (asset->edit_rate() != Fraction(24, 1) && asset->edit_rate() != Fraction(25, 1) && asset->edit_rate() != Fraction(48, 1))
549            ) {
550                 notes.push_back({
551                         VerificationNote::Type::BV21_ERROR,
552                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K,
553                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
554                         file
555                 });
556         }
557
558         if (asset->size() == Size(4096, 1716) || asset->size() == Size(3996, 2160)) {
559                 /* Only 24fps allowed for 4K */
560                 if (asset->edit_rate() != Fraction(24, 1)) {
561                         notes.push_back({
562                                 VerificationNote::Type::BV21_ERROR,
563                                 VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K,
564                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
565                                 file
566                         });
567                 }
568
569                 /* Only 2D allowed for 4K */
570                 if (dynamic_pointer_cast<const StereoPictureAsset>(asset)) {
571                         notes.push_back({
572                                 VerificationNote::Type::BV21_ERROR,
573                                 VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D,
574                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
575                                 file
576                         });
577
578                 }
579         }
580
581 }
582
583
584 static void
585 verify_main_sound_asset (
586         shared_ptr<const DCP> dcp,
587         shared_ptr<const ReelSoundAsset> reel_asset,
588         function<void (string, optional<boost::filesystem::path>)> stage,
589         function<void (float)> progress,
590         vector<VerificationNote>& notes
591         )
592 {
593         auto asset = reel_asset->asset();
594         stage ("Checking sound asset hash", asset->file());
595         auto const r = verify_asset (dcp, reel_asset, progress);
596         switch (r) {
597                 case VerifyAssetResult::BAD:
598                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_SOUND_HASH, *asset->file()});
599                         break;
600                 case VerifyAssetResult::CPL_PKL_DIFFER:
601                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_SOUND_HASHES, *asset->file()});
602                         break;
603                 default:
604                         break;
605         }
606
607         stage ("Checking sound asset metadata", asset->file());
608
609         verify_language_tag (asset->language(), notes);
610         if (asset->sampling_rate() != 48000) {
611                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SOUND_FRAME_RATE, raw_convert<string>(asset->sampling_rate()), *asset->file()});
612         }
613 }
614
615
616 static void
617 verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, vector<VerificationNote>& notes)
618 {
619         /* XXX: is Language compulsory? */
620         if (reel_asset->language()) {
621                 verify_language_tag (*reel_asset->language(), notes);
622         }
623
624         if (!reel_asset->entry_point()) {
625                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT, reel_asset->id() });
626         } else if (reel_asset->entry_point().get()) {
627                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT, reel_asset->id() });
628         }
629 }
630
631
632 static void
633 verify_closed_caption_reel (shared_ptr<const ReelClosedCaptionAsset> reel_asset, vector<VerificationNote>& notes)
634 {
635         /* XXX: is Language compulsory? */
636         if (reel_asset->language()) {
637                 verify_language_tag (*reel_asset->language(), notes);
638         }
639
640         if (!reel_asset->entry_point()) {
641                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
642         } else if (reel_asset->entry_point().get()) {
643                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
644         }
645 }
646
647
648 struct State
649 {
650         boost::optional<string> subtitle_language;
651 };
652
653
654 /** Verify stuff that is common to both subtitles and closed captions */
655 void
656 verify_smpte_timed_text_asset (
657         shared_ptr<const SMPTESubtitleAsset> asset,
658         vector<VerificationNote>& notes
659         )
660 {
661         if (asset->language()) {
662                 verify_language_tag (*asset->language(), notes);
663         } else {
664                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE, *asset->file() });
665         }
666
667         auto const size = boost::filesystem::file_size(asset->file().get());
668         if (size > 115 * 1024 * 1024) {
669                 notes.push_back (
670                         { VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES, raw_convert<string>(size), *asset->file() }
671                         );
672         }
673
674         /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB"
675          * but I'm hoping that checking for the total size of all fonts being <= 10MB will do.
676          */
677         auto fonts = asset->font_data ();
678         int total_size = 0;
679         for (auto i: fonts) {
680                 total_size += i.second.size();
681         }
682         if (total_size > 10 * 1024 * 1024) {
683                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES, raw_convert<string>(total_size), asset->file().get() });
684         }
685
686         if (!asset->start_time()) {
687                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_START_TIME, asset->file().get() });
688         } else if (asset->start_time() != Time()) {
689                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SUBTITLE_START_TIME, asset->file().get() });
690         }
691 }
692
693
694 /** Verify SMPTE subtitle-only stuff */
695 void
696 verify_smpte_subtitle_asset (
697         shared_ptr<const SMPTESubtitleAsset> asset,
698         vector<VerificationNote>& notes,
699         State& state
700         )
701 {
702         if (asset->language()) {
703                 if (!state.subtitle_language) {
704                         state.subtitle_language = *asset->language();
705                 } else if (state.subtitle_language != *asset->language()) {
706                         notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES });
707                 }
708         }
709 }
710
711
712 /** Verify all subtitle stuff */
713 static void
714 verify_subtitle_asset (
715         shared_ptr<const SubtitleAsset> asset,
716         function<void (string, optional<boost::filesystem::path>)> stage,
717         boost::filesystem::path xsd_dtd_directory,
718         vector<VerificationNote>& notes,
719         State& state
720         )
721 {
722         stage ("Checking subtitle XML", asset->file());
723         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
724          * gets passed through libdcp which may clean up and therefore hide errors.
725          */
726         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
727
728         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
729         if (smpte) {
730                 verify_smpte_timed_text_asset (smpte, notes);
731                 verify_smpte_subtitle_asset (smpte, notes, state);
732         }
733 }
734
735
736 /** Verify all closed caption stuff */
737 static void
738 verify_closed_caption_asset (
739         shared_ptr<const SubtitleAsset> asset,
740         function<void (string, optional<boost::filesystem::path>)> stage,
741         boost::filesystem::path xsd_dtd_directory,
742         vector<VerificationNote>& notes
743         )
744 {
745         stage ("Checking closed caption XML", asset->file());
746         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
747          * gets passed through libdcp which may clean up and therefore hide errors.
748          */
749         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
750
751         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
752         if (smpte) {
753                 verify_smpte_timed_text_asset (smpte, notes);
754         }
755
756         if (asset->raw_xml().size() > 256 * 1024) {
757                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES, raw_convert<string>(asset->raw_xml().size()), *asset->file()});
758         }
759 }
760
761
762 static
763 void
764 verify_text_timing (
765         vector<shared_ptr<Reel>> reels,
766         optional<int> picture_frame_rate,
767         vector<VerificationNote>& notes,
768         std::function<bool (shared_ptr<Reel>)> check,
769         std::function<string (shared_ptr<Reel>)> xml,
770         std::function<int64_t (shared_ptr<Reel>)> duration
771         )
772 {
773         /* end of last subtitle (in editable units) */
774         optional<int64_t> last_out;
775         auto too_short = false;
776         auto too_close = false;
777         auto too_early = false;
778         /* current reel start time (in editable units) */
779         int64_t reel_offset = 0;
780
781         std::function<void (cxml::ConstNodePtr, int, int, bool)> parse;
782         parse = [&parse, &last_out, &too_short, &too_close, &too_early, &reel_offset](cxml::ConstNodePtr node, int tcr, int pfr, bool first_reel) {
783                 if (node->name() == "Subtitle") {
784                         Time in (node->string_attribute("TimeIn"), tcr);
785                         Time out (node->string_attribute("TimeOut"), tcr);
786                         if (first_reel && in < Time(0, 0, 4, 0, tcr)) {
787                                 too_early = true;
788                         }
789                         auto length = out - in;
790                         if (length.as_editable_units(pfr) < 15) {
791                                 too_short = true;
792                         }
793                         if (last_out) {
794                                 /* XXX: this feels dubious - is it really what Bv2.1 means? */
795                                 auto distance = reel_offset + in.as_editable_units(pfr) - *last_out;
796                                 if (distance >= 0 && distance < 2) {
797                                         too_close = true;
798                                 }
799                         }
800                         last_out = reel_offset + out.as_editable_units(pfr);
801                 } else {
802                         for (auto i: node->node_children()) {
803                                 parse(i, tcr, pfr, first_reel);
804                         }
805                 }
806         };
807
808         for (auto i = 0U; i < reels.size(); ++i) {
809                 if (!check(reels[i])) {
810                         continue;
811                 }
812
813                 /* We need to look at <Subtitle> instances in the XML being checked, so we can't use the subtitles
814                  * read in by libdcp's parser.
815                  */
816
817                 auto doc = make_shared<cxml::Document>("SubtitleReel");
818                 doc->read_string (xml(reels[i]));
819                 auto const tcr = doc->number_child<int>("TimeCodeRate");
820                 parse (doc, tcr, picture_frame_rate.get_value_or(24), i == 0);
821                 reel_offset += duration(reels[i]);
822         }
823
824         if (too_early) {
825                 notes.push_back({
826                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME
827                 });
828         }
829
830         if (too_short) {
831                 notes.push_back ({
832                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_DURATION
833                 });
834         }
835
836         if (too_close) {
837                 notes.push_back ({
838                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_SPACING
839                 });
840         }
841 }
842
843
844 struct LinesCharactersResult
845 {
846         bool warning_length_exceeded = false;
847         bool error_length_exceeded = false;
848         bool line_count_exceeded = false;
849 };
850
851
852 static
853 void
854 verify_text_lines_and_characters (
855         shared_ptr<SubtitleAsset> asset,
856         int warning_length,
857         int error_length,
858         LinesCharactersResult* result
859         )
860 {
861         class Event
862         {
863         public:
864                 Event (Time time_, float position_, int characters_)
865                         : time (time_)
866                         , position (position_)
867                         , characters (characters_)
868                 {}
869
870                 Event (Time time_, shared_ptr<Event> start_)
871                         : time (time_)
872                         , start (start_)
873                 {}
874
875                 Time time;
876                 int position; //< position from 0 at top of screen to 100 at bottom
877                 int characters;
878                 shared_ptr<Event> start;
879         };
880
881         vector<shared_ptr<Event>> events;
882
883         auto position = [](shared_ptr<const SubtitleString> sub) {
884                 switch (sub->v_align()) {
885                 case VAlign::TOP:
886                         return lrintf(sub->v_position() * 100);
887                 case VAlign::CENTER:
888                         return lrintf((0.5f + sub->v_position()) * 100);
889                 case VAlign::BOTTOM:
890                         return lrintf((1.0f - sub->v_position()) * 100);
891                 }
892
893                 return 0L;
894         };
895
896         for (auto j: asset->subtitles()) {
897                 auto text = dynamic_pointer_cast<const SubtitleString>(j);
898                 if (text) {
899                         auto in = make_shared<Event>(text->in(), position(text), text->text().length());
900                         events.push_back(in);
901                         events.push_back(make_shared<Event>(text->out(), in));
902                 }
903         }
904
905         std::sort(events.begin(), events.end(), [](shared_ptr<Event> const& a, shared_ptr<Event>const& b) {
906                 return a->time < b->time;
907         });
908
909         map<int, int> current;
910         for (auto i: events) {
911                 if (current.size() > 3) {
912                         result->line_count_exceeded = true;
913                 }
914                 for (auto j: current) {
915                         if (j.second >= warning_length) {
916                                 result->warning_length_exceeded = true;
917                         }
918                         if (j.second >= error_length) {
919                                 result->error_length_exceeded = true;
920                         }
921                 }
922
923                 if (i->start) {
924                         /* end of a subtitle */
925                         DCP_ASSERT (current.find(i->start->position) != current.end());
926                         if (current[i->start->position] == i->start->characters) {
927                                 current.erase(i->start->position);
928                         } else {
929                                 current[i->start->position] -= i->start->characters;
930                         }
931                 } else {
932                         /* start of a subtitle */
933                         if (current.find(i->position) == current.end()) {
934                                 current[i->position] = i->characters;
935                         } else {
936                                 current[i->position] += i->characters;
937                         }
938                 }
939         }
940 }
941
942
943 static
944 void
945 verify_text_timing (vector<shared_ptr<Reel>> reels, vector<VerificationNote>& notes)
946 {
947         if (reels.empty()) {
948                 return;
949         }
950
951         optional<int> picture_frame_rate;
952         if (reels[0]->main_picture()) {
953                 picture_frame_rate = reels[0]->main_picture()->frame_rate().numerator;
954         }
955
956         if (reels[0]->main_subtitle()) {
957                 verify_text_timing (reels, picture_frame_rate, notes,
958                         [](shared_ptr<Reel> reel) {
959                                 return static_cast<bool>(reel->main_subtitle());
960                         },
961                         [](shared_ptr<Reel> reel) {
962                                 return reel->main_subtitle()->asset()->raw_xml();
963                         },
964                         [](shared_ptr<Reel> reel) {
965                                 return reel->main_subtitle()->actual_duration();
966                         }
967                 );
968         }
969
970         for (auto i = 0U; i < reels[0]->closed_captions().size(); ++i) {
971                 verify_text_timing (reels, picture_frame_rate, notes,
972                         [i](shared_ptr<Reel> reel) {
973                                 return i < reel->closed_captions().size();
974                         },
975                         [i](shared_ptr<Reel> reel) {
976                                 return reel->closed_captions()[i]->asset()->raw_xml();
977                         },
978                         [i](shared_ptr<Reel> reel) {
979                                 return reel->closed_captions()[i]->actual_duration();
980                         }
981                 );
982         }
983 }
984
985
986 void
987 verify_extension_metadata (shared_ptr<CPL> cpl, vector<VerificationNote>& notes)
988 {
989         DCP_ASSERT (cpl->file());
990         cxml::Document doc ("CompositionPlaylist");
991         doc.read_file (cpl->file().get());
992
993         auto missing = false;
994         string malformed;
995
996         if (auto reel_list = doc.node_child("ReelList")) {
997                 auto reels = reel_list->node_children("Reel");
998                 if (!reels.empty()) {
999                         if (auto asset_list = reels[0]->optional_node_child("AssetList")) {
1000                                 if (auto metadata = asset_list->optional_node_child("CompositionMetadataAsset")) {
1001                                         if (auto extension_list = metadata->optional_node_child("ExtensionMetadataList")) {
1002                                                 missing = true;
1003                                                 for (auto extension: extension_list->node_children("ExtensionMetadata")) {
1004                                                         if (extension->optional_string_attribute("scope").get_value_or("") != "http://isdcf.com/ns/cplmd/app") {
1005                                                                 continue;
1006                                                         }
1007                                                         missing = false;
1008                                                         if (auto name = extension->optional_node_child("Name")) {
1009                                                                 if (name->content() != "Application") {
1010                                                                         malformed = "<Name> should be 'Application'";
1011                                                                 }
1012                                                         }
1013                                                         if (auto property_list = extension->optional_node_child("PropertyList")) {
1014                                                                 if (auto property = property_list->optional_node_child("Property")) {
1015                                                                         if (auto name = property->optional_node_child("Name")) {
1016                                                                                 if (name->content() != "DCP Constraints Profile") {
1017                                                                                         malformed = "<Name> property should be 'DCP Constraints Profile'";
1018                                                                                 }
1019                                                                         }
1020                                                                         if (auto value = property->optional_node_child("Value")) {
1021                                                                                 if (value->content() != "SMPTE-RDD-52:2020-Bv2.1") {
1022                                                                                         malformed = "<Value> property should be 'SMPTE-RDD-52:2020-Bv2.1'";
1023                                                                                 }
1024                                                                         }
1025                                                                 }
1026                                                         }
1027                                                 }
1028                                         } else {
1029                                                 missing = true;
1030                                         }
1031                                 }
1032                         }
1033                 }
1034         }
1035
1036         if (missing) {
1037                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_EXTENSION_METADATA, cpl->id(), cpl->file().get()});
1038         } else if (!malformed.empty()) {
1039                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_EXTENSION_METADATA, malformed, cpl->file().get()});
1040         }
1041 }
1042
1043
1044 bool
1045 pkl_has_encrypted_assets (shared_ptr<DCP> dcp, shared_ptr<PKL> pkl)
1046 {
1047         vector<string> encrypted;
1048         for (auto i: dcp->cpls()) {
1049                 for (auto j: i->reel_file_assets()) {
1050                         if (j->asset_ref().resolved()) {
1051                                 /* It's a bit surprising / broken but Interop subtitle assets are represented
1052                                  * in reels by ReelSubtitleAsset which inherits ReelFileAsset, so it's possible for
1053                                  * ReelFileAssets to have assets which are not MXFs.
1054                                  */
1055                                 if (auto asset = dynamic_pointer_cast<MXF>(j->asset_ref().asset())) {
1056                                         if (asset->encrypted()) {
1057                                                 encrypted.push_back(j->asset_ref().id());
1058                                         }
1059                                 }
1060                         }
1061                 }
1062         }
1063
1064         for (auto i: pkl->asset_list()) {
1065                 if (find(encrypted.begin(), encrypted.end(), i->id()) != encrypted.end()) {
1066                         return true;
1067                 }
1068         }
1069
1070         return false;
1071 }
1072
1073
1074 vector<VerificationNote>
1075 dcp::verify (
1076         vector<boost::filesystem::path> directories,
1077         function<void (string, optional<boost::filesystem::path>)> stage,
1078         function<void (float)> progress,
1079         boost::filesystem::path xsd_dtd_directory
1080         )
1081 {
1082         xsd_dtd_directory = boost::filesystem::canonical (xsd_dtd_directory);
1083
1084         vector<VerificationNote> notes;
1085         State state{};
1086
1087         vector<shared_ptr<DCP>> dcps;
1088         for (auto i: directories) {
1089                 dcps.push_back (shared_ptr<DCP> (new DCP (i)));
1090         }
1091
1092         for (auto dcp: dcps) {
1093                 stage ("Checking DCP", dcp->directory());
1094                 try {
1095                         dcp->read (&notes);
1096                 } catch (ReadError& e) {
1097                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1098                 } catch (XMLError& e) {
1099                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1100                 } catch (MXFFileError& e) {
1101                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1102                 } catch (cxml::Error& e) {
1103                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1104                 }
1105
1106                 if (dcp->standard() != Standard::SMPTE) {
1107                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_STANDARD});
1108                 }
1109
1110                 for (auto cpl: dcp->cpls()) {
1111                         stage ("Checking CPL", cpl->file());
1112                         validate_xml (cpl->file().get(), xsd_dtd_directory, notes);
1113
1114                         if (cpl->any_encrypted() && !cpl->all_encrypted()) {
1115                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::PARTIALLY_ENCRYPTED});
1116                         }
1117
1118                         for (auto const& i: cpl->additional_subtitle_languages()) {
1119                                 verify_language_tag (i, notes);
1120                         }
1121
1122                         if (cpl->release_territory()) {
1123                                 if (!cpl->release_territory_scope() || cpl->release_territory_scope().get() != "http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata#scope/release-territory/UNM49") {
1124                                         auto terr = cpl->release_territory().get();
1125                                         /* Must be a valid region tag, or "001" */
1126                                         try {
1127                                                 LanguageTag::RegionSubtag test (terr);
1128                                         } catch (...) {
1129                                                 if (terr != "001") {
1130                                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, terr});
1131                                                 }
1132                                         }
1133                                 }
1134                         }
1135
1136                         if (dcp->standard() == Standard::SMPTE) {
1137                                 if (!cpl->annotation_text()) {
1138                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1139                                 } else if (cpl->annotation_text().get() != cpl->content_title_text()) {
1140                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1141                                 }
1142                         }
1143
1144                         for (auto i: dcp->pkls()) {
1145                                 /* Check that the CPL's hash corresponds to the PKL */
1146                                 optional<string> h = i->hash(cpl->id());
1147                                 if (h && make_digest(ArrayData(*cpl->file())) != *h) {
1148                                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_CPL_HASHES, cpl->id(), cpl->file().get()});
1149                                 }
1150
1151                                 /* Check that any PKL with a single CPL has its AnnotationText the same as the CPL's ContentTitleText */
1152                                 optional<string> required_annotation_text;
1153                                 for (auto j: i->asset_list()) {
1154                                         /* See if this is a CPL */
1155                                         for (auto k: dcp->cpls()) {
1156                                                 if (j->id() == k->id()) {
1157                                                         if (!required_annotation_text) {
1158                                                                 /* First CPL we have found; this is the required AnnotationText unless we find another */
1159                                                                 required_annotation_text = cpl->content_title_text();
1160                                                         } else {
1161                                                                 /* There's more than one CPL so we don't care what the PKL's AnnotationText is */
1162                                                                 required_annotation_text = boost::none;
1163                                                         }
1164                                                 }
1165                                         }
1166                                 }
1167
1168                                 if (required_annotation_text && i->annotation_text() != required_annotation_text) {
1169                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL, i->id(), i->file().get()});
1170                                 }
1171                         }
1172
1173                         /* set to true if any reel has a MainSubtitle */
1174                         auto have_main_subtitle = false;
1175                         /* set to true if any reel has no MainSubtitle */
1176                         auto have_no_main_subtitle = false;
1177                         /* fewest number of closed caption assets seen in a reel */
1178                         size_t fewest_closed_captions = SIZE_MAX;
1179                         /* most number of closed caption assets seen in a reel */
1180                         size_t most_closed_captions = 0;
1181                         map<Marker, Time> markers_seen;
1182
1183                         for (auto reel: cpl->reels()) {
1184                                 stage ("Checking reel", optional<boost::filesystem::path>());
1185
1186                                 for (auto i: reel->assets()) {
1187                                         if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1188                                                 notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_DURATION, i->id()});
1189                                         }
1190                                         if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1191                                                 notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_INTRINSIC_DURATION, i->id()});
1192                                         }
1193                                         auto file_asset = dynamic_pointer_cast<ReelFileAsset>(i);
1194                                         if (file_asset && !file_asset->hash()) {
1195                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_HASH, i->id()});
1196                                         }
1197                                 }
1198
1199                                 if (dcp->standard() == Standard::SMPTE) {
1200                                         boost::optional<int64_t> duration;
1201                                         for (auto i: reel->assets()) {
1202                                                 if (!duration) {
1203                                                         duration = i->actual_duration();
1204                                                 } else if (*duration != i->actual_duration()) {
1205                                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_ASSET_DURATION});
1206                                                         break;
1207                                                 }
1208                                         }
1209                                 }
1210
1211                                 if (reel->main_picture()) {
1212                                         /* Check reel stuff */
1213                                         auto const frame_rate = reel->main_picture()->frame_rate();
1214                                         if (frame_rate.denominator != 1 ||
1215                                             (frame_rate.numerator != 24 &&
1216                                              frame_rate.numerator != 25 &&
1217                                              frame_rate.numerator != 30 &&
1218                                              frame_rate.numerator != 48 &&
1219                                              frame_rate.numerator != 50 &&
1220                                              frame_rate.numerator != 60 &&
1221                                              frame_rate.numerator != 96)) {
1222                                                 notes.push_back ({
1223                                                         VerificationNote::Type::ERROR,
1224                                                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE,
1225                                                         String::compose("%1/%2", frame_rate.numerator, frame_rate.denominator)
1226                                                 });
1227                                         }
1228                                         /* Check asset */
1229                                         if (reel->main_picture()->asset_ref().resolved()) {
1230                                                 verify_main_picture_asset (dcp, reel->main_picture(), stage, progress, notes);
1231                                         }
1232                                 }
1233
1234                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
1235                                         verify_main_sound_asset (dcp, reel->main_sound(), stage, progress, notes);
1236                                 }
1237
1238                                 if (reel->main_subtitle()) {
1239                                         verify_main_subtitle_reel (reel->main_subtitle(), notes);
1240                                         if (reel->main_subtitle()->asset_ref().resolved()) {
1241                                                 verify_subtitle_asset (reel->main_subtitle()->asset(), stage, xsd_dtd_directory, notes, state);
1242                                         }
1243                                         have_main_subtitle = true;
1244                                 } else {
1245                                         have_no_main_subtitle = true;
1246                                 }
1247
1248                                 for (auto i: reel->closed_captions()) {
1249                                         verify_closed_caption_reel (i, notes);
1250                                         if (i->asset_ref().resolved()) {
1251                                                 verify_closed_caption_asset (i->asset(), stage, xsd_dtd_directory, notes);
1252                                         }
1253                                 }
1254
1255                                 if (reel->main_markers()) {
1256                                         for (auto const& i: reel->main_markers()->get()) {
1257                                                 markers_seen.insert (i);
1258                                         }
1259                                 }
1260
1261                                 fewest_closed_captions = std::min (fewest_closed_captions, reel->closed_captions().size());
1262                                 most_closed_captions = std::max (most_closed_captions, reel->closed_captions().size());
1263                         }
1264
1265                         if (dcp->standard() == Standard::SMPTE) {
1266
1267                                 if (have_main_subtitle && have_no_main_subtitle) {
1268                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS});
1269                                 }
1270
1271                                 if (fewest_closed_captions != most_closed_captions) {
1272                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS});
1273                                 }
1274
1275                                 if (cpl->content_kind() == ContentKind::FEATURE) {
1276                                         if (markers_seen.find(Marker::FFEC) == markers_seen.end()) {
1277                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFEC_IN_FEATURE});
1278                                         }
1279                                         if (markers_seen.find(Marker::FFMC) == markers_seen.end()) {
1280                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFMC_IN_FEATURE});
1281                                         }
1282                                 }
1283
1284                                 auto ffoc = markers_seen.find(Marker::FFOC);
1285                                 if (ffoc == markers_seen.end()) {
1286                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_FFOC});
1287                                 } else if (ffoc->second.e != 1) {
1288                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_FFOC, raw_convert<string>(ffoc->second.e)});
1289                                 }
1290
1291                                 auto lfoc = markers_seen.find(Marker::LFOC);
1292                                 if (lfoc == markers_seen.end()) {
1293                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_LFOC});
1294                                 } else {
1295                                         auto lfoc_time = lfoc->second.as_editable_units(lfoc->second.tcr);
1296                                         if (lfoc_time != (cpl->reels().back()->duration() - 1)) {
1297                                                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_LFOC, raw_convert<string>(lfoc_time)});
1298                                         }
1299                                 }
1300
1301                                 verify_text_timing (cpl->reels(), notes);
1302
1303                                 LinesCharactersResult result;
1304                                 for (auto reel: cpl->reels()) {
1305                                         if (reel->main_subtitle() && reel->main_subtitle()->asset()) {
1306                                                 verify_text_lines_and_characters (reel->main_subtitle()->asset(), 52, 79, &result);
1307                                         }
1308                                 }
1309
1310                                 if (result.line_count_exceeded) {
1311                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT});
1312                                 }
1313                                 if (result.error_length_exceeded) {
1314                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH});
1315                                 } else if (result.warning_length_exceeded) {
1316                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH});
1317                                 }
1318
1319                                 result = LinesCharactersResult();
1320                                 for (auto reel: cpl->reels()) {
1321                                         for (auto i: reel->closed_captions()) {
1322                                                 if (i->asset()) {
1323                                                         verify_text_lines_and_characters (i->asset(), 32, 32, &result);
1324                                                 }
1325                                         }
1326                                 }
1327
1328                                 if (result.line_count_exceeded) {
1329                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT});
1330                                 }
1331                                 if (result.error_length_exceeded) {
1332                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH});
1333                                 }
1334
1335                                 if (!cpl->full_content_title_text()) {
1336                                         /* Since FullContentTitleText is assumed always to exist if there's a CompositionMetadataAsset we
1337                                          * can use it as a proxy for CompositionMetadataAsset's existence.
1338                                          */
1339                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA, cpl->id(), cpl->file().get()});
1340                                 } else if (!cpl->version_number()) {
1341                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER, cpl->id(), cpl->file().get()});
1342                                 }
1343
1344                                 verify_extension_metadata (cpl, notes);
1345
1346                                 if (cpl->any_encrypted()) {
1347                                         cxml::Document doc ("CompositionPlaylist");
1348                                         DCP_ASSERT (cpl->file());
1349                                         doc.read_file (cpl->file().get());
1350                                         if (!doc.optional_node_child("Signature")) {
1351                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT, cpl->id(), cpl->file().get()});
1352                                         }
1353                                 }
1354                         }
1355                 }
1356
1357                 for (auto pkl: dcp->pkls()) {
1358                         stage ("Checking PKL", pkl->file());
1359                         validate_xml (pkl->file().get(), xsd_dtd_directory, notes);
1360                         if (pkl_has_encrypted_assets(dcp, pkl)) {
1361                                 cxml::Document doc ("PackingList");
1362                                 doc.read_file (pkl->file().get());
1363                                 if (!doc.optional_node_child("Signature")) {
1364                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT, pkl->id(), pkl->file().get()});
1365                                 }
1366                         }
1367                 }
1368
1369                 if (dcp->asset_map_path()) {
1370                         stage ("Checking ASSETMAP", dcp->asset_map_path().get());
1371                         validate_xml (dcp->asset_map_path().get(), xsd_dtd_directory, notes);
1372                 } else {
1373                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_ASSETMAP});
1374                 }
1375         }
1376
1377         return notes;
1378 }
1379
1380
1381 string
1382 dcp::note_to_string (VerificationNote note)
1383 {
1384         /** These strings should say what is wrong, incorporating any extra details (ID, filenames etc.).
1385          *
1386          *  e.g. "ClosedCaption asset has no <EntryPoint> tag.",
1387          *  not "ClosedCaption assets must have an <EntryPoint> tag."
1388          *
1389          *  It's OK to use XML tag names where they are clear.
1390          *  If both ID and filename are available, use only the ID.
1391          *  End messages with a full stop.
1392          *  Messages should not mention whether or not their errors are a part of Bv2.1.
1393          */
1394         switch (note.code()) {
1395         case VerificationNote::Code::FAILED_READ:
1396                 return *note.note();
1397         case VerificationNote::Code::MISMATCHED_CPL_HASHES:
1398                 return String::compose("The hash of the CPL %1 in the PKL does not agree with the CPL file.", note.note().get());
1399         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE:
1400                 return String::compose("The picture in a reel has an invalid frame rate %1.", note.note().get());
1401         case VerificationNote::Code::INCORRECT_PICTURE_HASH:
1402                 return String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
1403         case VerificationNote::Code::MISMATCHED_PICTURE_HASHES:
1404                 return String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
1405         case VerificationNote::Code::INCORRECT_SOUND_HASH:
1406                 return String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
1407         case VerificationNote::Code::MISMATCHED_SOUND_HASHES:
1408                 return String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
1409         case VerificationNote::Code::EMPTY_ASSET_PATH:
1410                 return "The asset map contains an empty asset path.";
1411         case VerificationNote::Code::MISSING_ASSET:
1412                 return String::compose("The file %1 for an asset in the asset map cannot be found.", note.file()->filename());
1413         case VerificationNote::Code::MISMATCHED_STANDARD:
1414                 return "The DCP contains both SMPTE and Interop parts.";
1415         case VerificationNote::Code::INVALID_XML:
1416                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
1417         case VerificationNote::Code::MISSING_ASSETMAP:
1418                 return "No ASSETMAP or ASSETMAP.xml was found.";
1419         case VerificationNote::Code::INVALID_INTRINSIC_DURATION:
1420                 return String::compose("The intrinsic duration of the asset %1 is less than 1 second.", note.note().get());
1421         case VerificationNote::Code::INVALID_DURATION:
1422                 return String::compose("The duration of the asset %1 is less than 1 second.", note.note().get());
1423         case VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1424                 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
1425         case VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1426                 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
1427         case VerificationNote::Code::EXTERNAL_ASSET:
1428                 return String::compose("The asset %1 that this DCP refers to is not included in the DCP.  It may be a VF.", note.note().get());
1429         case VerificationNote::Code::INVALID_STANDARD:
1430                 return "This DCP does not use the SMPTE standard.";
1431         case VerificationNote::Code::INVALID_LANGUAGE:
1432                 return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
1433         case VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS:
1434                 return String::compose("The size %1 of picture asset %2 is not allowed.", note.note().get(), note.file()->filename());
1435         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K:
1436                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 2K DCPs.", note.note().get(), note.file()->filename());
1437         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K:
1438                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 4K DCPs.", note.note().get(), note.file()->filename());
1439         case VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D:
1440                 return "3D 4K DCPs are not allowed.";
1441         case VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES:
1442                 return String::compose("The size %1 of the closed caption asset %2 is larger than the 256KB maximum.", note.note().get(), note.file()->filename());
1443         case VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES:
1444                 return String::compose("The size %1 of the timed text asset %2 is larger than the 115MB maximum.", note.note().get(), note.file()->filename());
1445         case VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES:
1446                 return String::compose("The size %1 of the fonts in timed text asset %2 is larger than the 10MB maximum.", note.note().get(), note.file()->filename());
1447         case VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE:
1448                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <Language> tag.", note.file()->filename());
1449         case VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES:
1450                 return "Some subtitle assets have different <Language> tags than others";
1451         case VerificationNote::Code::MISSING_SUBTITLE_START_TIME:
1452                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <StartTime> tag.", note.file()->filename());
1453         case VerificationNote::Code::INVALID_SUBTITLE_START_TIME:
1454                 return String::compose("The XML for a SMPTE subtitle asset %1 has a non-zero <StartTime> tag.", note.file()->filename());
1455         case VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME:
1456                 return "The first subtitle or closed caption is less than 4 seconds from the start of the DCP.";
1457         case VerificationNote::Code::INVALID_SUBTITLE_DURATION:
1458                 return "At least one subtitle lasts less than 15 frames.";
1459         case VerificationNote::Code::INVALID_SUBTITLE_SPACING:
1460                 return "At least one pair of subtitles is separated by less than 2 frames.";
1461         case VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT:
1462                 return "There are more than 3 subtitle lines in at least one place in the DCP.";
1463         case VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH:
1464                 return "There are more than 52 characters in at least one subtitle line.";
1465         case VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH:
1466                 return "There are more than 79 characters in at least one subtitle line.";
1467         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT:
1468                 return "There are more than 3 closed caption lines in at least one place.";
1469         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH:
1470                 return "There are more than 32 characters in at least one closed caption line.";
1471         case VerificationNote::Code::INVALID_SOUND_FRAME_RATE:
1472                 return String::compose("The sound asset %1 has a sampling rate of %2", note.file()->filename(), note.note().get());
1473         case VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT:
1474                 return String::compose("The CPL %1 has no <AnnotationText> tag.", note.note().get());
1475         case VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT:
1476                 return String::compose("The CPL %1 has an <AnnotationText> which differs from its <ContentTitleText>", note.note().get());
1477         case VerificationNote::Code::MISMATCHED_ASSET_DURATION:
1478                 return "All assets in a reel do not have the same duration.";
1479         case VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS:
1480                 return "At least one reel contains a subtitle asset, but some reel(s) do not";
1481         case VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS:
1482                 return "At least one reel has closed captions, but reels have different numbers of closed caption assets.";
1483         case VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT:
1484                 return String::compose("The subtitle asset %1 has no <EntryPoint> tag.", note.note().get());
1485         case VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT:
1486                 return String::compose("The subtitle asset %1 has an <EntryPoint> other than 0.", note.note().get());
1487         case VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT:
1488                 return String::compose("The closed caption asset %1 has no <EntryPoint> tag.", note.note().get());
1489         case VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT:
1490                 return String::compose("The closed caption asset %1 has an <EntryPoint> other than 0.", note.note().get());
1491         case VerificationNote::Code::MISSING_HASH:
1492                 return String::compose("The asset %1 has no <Hash> tag in the CPL.", note.note().get());
1493         case VerificationNote::Code::MISSING_FFEC_IN_FEATURE:
1494                 return "The DCP is marked as a Feature but there is no FFEC (first frame of end credits) marker";
1495         case VerificationNote::Code::MISSING_FFMC_IN_FEATURE:
1496                 return "The DCP is marked as a Feature but there is no FFMC (first frame of moving credits) marker";
1497         case VerificationNote::Code::MISSING_FFOC:
1498                 return "There should be a FFOC (first frame of content) marker";
1499         case VerificationNote::Code::MISSING_LFOC:
1500                 return "There should be a LFOC (last frame of content) marker";
1501         case VerificationNote::Code::INCORRECT_FFOC:
1502                 return String::compose("The FFOC marker is %1 instead of 1", note.note().get());
1503         case VerificationNote::Code::INCORRECT_LFOC:
1504                 return String::compose("The LFOC marker is %1 instead of 1 less than the duration of the last reel.", note.note().get());
1505         case VerificationNote::Code::MISSING_CPL_METADATA:
1506                 return String::compose("The CPL %1 has no <CompositionMetadataAsset> tag.", note.note().get());
1507         case VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER:
1508                 return String::compose("The CPL %1 has no <VersionNumber> in its <CompositionMetadataAsset>.", note.note().get());
1509         case VerificationNote::Code::MISSING_EXTENSION_METADATA:
1510                 return String::compose("The CPL %1 has no <ExtensionMetadata> in its <CompositionMetadataAsset>.", note.note().get());
1511         case VerificationNote::Code::INVALID_EXTENSION_METADATA:
1512                 return String::compose("The CPL %1 has a malformed <ExtensionMetadata> (%2).", note.file()->filename(), note.note().get());
1513         case VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT:
1514                 return String::compose("The CPL %1, which has encrypted content, is not signed.", note.note().get());
1515         case VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT:
1516                 return String::compose("The PKL %1, which has encrypted content, is not signed.", note.note().get());
1517         case VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL:
1518                 return String::compose("The PKL %1 has only one CPL but its <AnnotationText> does not match the CPL's <ContentTitleText>.", note.note().get());
1519         case VerificationNote::Code::PARTIALLY_ENCRYPTED:
1520                 return "Some assets are encrypted but some are not.";
1521         case VerificationNote::Code::INVALID_JPEG2000_CODESTREAM:
1522                 return String::compose("The JPEG2000 codestream for at least one frame is invalid (%1)", note.note().get());
1523         case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_2K:
1524                 return String::compose("The JPEG2000 codestream uses %1 guard bits in a 2K image instead of 1.", note.note().get());
1525         case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_4K:
1526                 return String::compose("The JPEG2000 codestream uses %1 guard bits in a 4K image instead of 2.", note.note().get());
1527         case VerificationNote::Code::INVALID_JPEG2000_TILE_SIZE:
1528                 return "The JPEG2000 tile size is not the same as the image size.";
1529         case VerificationNote::Code::INVALID_JPEG2000_CODE_BLOCK_WIDTH:
1530                 return String::compose("The JPEG2000 codestream uses a code block width of %1 instead of 32.", note.note().get());
1531         case VerificationNote::Code::INVALID_JPEG2000_CODE_BLOCK_HEIGHT:
1532                 return String::compose("The JPEG2000 codestream uses a code block height of %1 instead of 32.", note.note().get());
1533         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER_COUNT_FOR_2K:
1534                 return String::compose("%1 POC markers found in 2K JPEG2000 codestream instead of 0.", note.note().get());
1535         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER_COUNT_FOR_4K:
1536                 return String::compose("%1 POC markers found in 4K JPEG2000 codestream instead of 1.", note.note().get());
1537         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER:
1538                 return String::compose("Incorrect POC marker content found (%1)", note.note().get());
1539         case VerificationNote::Code::INVALID_JPEG2000_POC_MARKER_LOCATION:
1540                 return "POC marker found outside main header";
1541         case VerificationNote::Code::INVALID_JPEG2000_TILE_PARTS_FOR_2K:
1542                 return String::compose("The JPEG2000 codestream has %1 tile parts in a 2K image instead of 3.", note.note().get());
1543         case VerificationNote::Code::INVALID_JPEG2000_TILE_PARTS_FOR_4K:
1544                 return String::compose("The JPEG2000 codestream has %1 tile parts in a 4K image instead of 6.", note.note().get());
1545         case VerificationNote::Code::MISSING_JPEG200_TLM_MARKER:
1546                 return "No TLM marker was found in a JPEG2000 codestream.";
1547         }
1548
1549         return "";
1550 }
1551
1552
1553 bool
1554 dcp::operator== (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
1555 {
1556         return a.type() == b.type() && a.code() == b.code() && a.note() == b.note() && a.file() == b.file() && a.line() == b.line();
1557 }
1558
1559
1560 std::ostream&
1561 dcp::operator<< (std::ostream& s, dcp::VerificationNote const& note)
1562 {
1563         s << note_to_string (note);
1564         if (note.note()) {
1565                 s << " [" << note.note().get() << "]";
1566         }
1567         if (note.file()) {
1568                 s << " [" << note.file().get() << "]";
1569         }
1570         if (note.line()) {
1571                 s << " [" << note.line().get() << "]";
1572         }
1573         return s;
1574 }
1575