Tidying.
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2021 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34 #include "verify.h"
35 #include "dcp.h"
36 #include "cpl.h"
37 #include "reel.h"
38 #include "reel_closed_caption_asset.h"
39 #include "reel_picture_asset.h"
40 #include "reel_sound_asset.h"
41 #include "reel_subtitle_asset.h"
42 #include "interop_subtitle_asset.h"
43 #include "mono_picture_asset.h"
44 #include "mono_picture_frame.h"
45 #include "stereo_picture_asset.h"
46 #include "stereo_picture_frame.h"
47 #include "exceptions.h"
48 #include "compose.hpp"
49 #include "raw_convert.h"
50 #include "reel_markers_asset.h"
51 #include "smpte_subtitle_asset.h"
52 #include <xercesc/util/PlatformUtils.hpp>
53 #include <xercesc/parsers/XercesDOMParser.hpp>
54 #include <xercesc/parsers/AbstractDOMParser.hpp>
55 #include <xercesc/sax/HandlerBase.hpp>
56 #include <xercesc/dom/DOMImplementation.hpp>
57 #include <xercesc/dom/DOMImplementationLS.hpp>
58 #include <xercesc/dom/DOMImplementationRegistry.hpp>
59 #include <xercesc/dom/DOMLSParser.hpp>
60 #include <xercesc/dom/DOMException.hpp>
61 #include <xercesc/dom/DOMDocument.hpp>
62 #include <xercesc/dom/DOMNodeList.hpp>
63 #include <xercesc/dom/DOMError.hpp>
64 #include <xercesc/dom/DOMLocator.hpp>
65 #include <xercesc/dom/DOMNamedNodeMap.hpp>
66 #include <xercesc/dom/DOMAttr.hpp>
67 #include <xercesc/dom/DOMErrorHandler.hpp>
68 #include <xercesc/framework/LocalFileInputSource.hpp>
69 #include <xercesc/framework/MemBufInputSource.hpp>
70 #include <boost/algorithm/string.hpp>
71 #include <map>
72 #include <vector>
73 #include <iostream>
74
75 using std::list;
76 using std::vector;
77 using std::string;
78 using std::cout;
79 using std::map;
80 using std::max;
81 using std::set;
82 using std::shared_ptr;
83 using std::make_shared;
84 using boost::optional;
85 using boost::function;
86 using std::dynamic_pointer_cast;
87
88 using namespace dcp;
89 using namespace xercesc;
90
91 static
92 string
93 xml_ch_to_string (XMLCh const * a)
94 {
95         char* x = XMLString::transcode(a);
96         string const o(x);
97         XMLString::release(&x);
98         return o;
99 }
100
101 class XMLValidationError
102 {
103 public:
104         XMLValidationError (SAXParseException const & e)
105                 : _message (xml_ch_to_string(e.getMessage()))
106                 , _line (e.getLineNumber())
107                 , _column (e.getColumnNumber())
108                 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
109                 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
110         {
111
112         }
113
114         string message () const {
115                 return _message;
116         }
117
118         uint64_t line () const {
119                 return _line;
120         }
121
122         uint64_t column () const {
123                 return _column;
124         }
125
126         string public_id () const {
127                 return _public_id;
128         }
129
130         string system_id () const {
131                 return _system_id;
132         }
133
134 private:
135         string _message;
136         uint64_t _line;
137         uint64_t _column;
138         string _public_id;
139         string _system_id;
140 };
141
142
143 class DCPErrorHandler : public ErrorHandler
144 {
145 public:
146         void warning(const SAXParseException& e)
147         {
148                 maybe_add (XMLValidationError(e));
149         }
150
151         void error(const SAXParseException& e)
152         {
153                 maybe_add (XMLValidationError(e));
154         }
155
156         void fatalError(const SAXParseException& e)
157         {
158                 maybe_add (XMLValidationError(e));
159         }
160
161         void resetErrors() {
162                 _errors.clear ();
163         }
164
165         list<XMLValidationError> errors () const {
166                 return _errors;
167         }
168
169 private:
170         void maybe_add (XMLValidationError e)
171         {
172                 /* XXX: nasty hack */
173                 if (
174                         e.message().find("schema document") != string::npos &&
175                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
176                         ) {
177                         return;
178                 }
179
180                 _errors.push_back (e);
181         }
182
183         list<XMLValidationError> _errors;
184 };
185
186 class StringToXMLCh
187 {
188 public:
189         StringToXMLCh (string a)
190         {
191                 _buffer = XMLString::transcode(a.c_str());
192         }
193
194         StringToXMLCh (StringToXMLCh const&) = delete;
195         StringToXMLCh& operator= (StringToXMLCh const&) = delete;
196
197         ~StringToXMLCh ()
198         {
199                 XMLString::release (&_buffer);
200         }
201
202         XMLCh const * get () const {
203                 return _buffer;
204         }
205
206 private:
207         XMLCh* _buffer;
208 };
209
210 class LocalFileResolver : public EntityResolver
211 {
212 public:
213         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
214                 : _xsd_dtd_directory (xsd_dtd_directory)
215         {
216                 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
217                  * found without being here.
218                  */
219                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
220                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
221                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
222                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
223                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
224                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
225                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
226                 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
227                 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
228                 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd");
229                 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
230                 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
231                 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
232         }
233
234         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
235         {
236                 if (!system_id) {
237                         return 0;
238                 }
239                 auto system_id_str = xml_ch_to_string (system_id);
240                 auto p = _xsd_dtd_directory;
241                 if (_files.find(system_id_str) == _files.end()) {
242                         p /= system_id_str;
243                 } else {
244                         p /= _files[system_id_str];
245                 }
246                 StringToXMLCh ch (p.string());
247                 return new LocalFileInputSource(ch.get());
248         }
249
250 private:
251         void add (string uri, string file)
252         {
253                 _files[uri] = file;
254         }
255
256         std::map<string, string> _files;
257         boost::filesystem::path _xsd_dtd_directory;
258 };
259
260
261 static void
262 parse (XercesDOMParser& parser, boost::filesystem::path xml)
263 {
264         parser.parse(xml.string().c_str());
265 }
266
267
268 static void
269 parse (XercesDOMParser& parser, string xml)
270 {
271         xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
272         parser.parse(buf);
273 }
274
275
276 template <class T>
277 void
278 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, vector<VerificationNote>& notes)
279 {
280         try {
281                 XMLPlatformUtils::Initialize ();
282         } catch (XMLException& e) {
283                 throw MiscError ("Failed to initialise xerces library");
284         }
285
286         DCPErrorHandler error_handler;
287
288         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
289         {
290                 XercesDOMParser parser;
291                 parser.setValidationScheme(XercesDOMParser::Val_Always);
292                 parser.setDoNamespaces(true);
293                 parser.setDoSchema(true);
294
295                 vector<string> schema;
296                 schema.push_back("xml.xsd");
297                 schema.push_back("xmldsig-core-schema.xsd");
298                 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
299                 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
300                 schema.push_back("SMPTE-429-9-2007-AM.xsd");
301                 schema.push_back("Main-Stereo-Picture-CPL.xsd");
302                 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
303                 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
304                 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
305                 schema.push_back("DCSubtitle.v1.mattsson.xsd");
306                 schema.push_back("DCDMSubtitle-2010.xsd");
307                 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
308                 schema.push_back("SMPTE-429-16.xsd");
309                 schema.push_back("Dolby-2012-AD.xsd");
310                 schema.push_back("SMPTE-429-10-2008.xsd");
311                 schema.push_back("xlink.xsd");
312                 schema.push_back("SMPTE-335-2012.xsd");
313                 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
314                 schema.push_back("isdcf-mca.xsd");
315                 schema.push_back("SMPTE-429-12-2008.xsd");
316
317                 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
318                  * Schemas that are not mentioned in this list are not read, and the things
319                  * they describe are not checked.
320                  */
321                 string locations;
322                 for (auto i: schema) {
323                         locations += String::compose("%1 %1 ", i, i);
324                 }
325
326                 parser.setExternalSchemaLocation(locations.c_str());
327                 parser.setValidationSchemaFullChecking(true);
328                 parser.setErrorHandler(&error_handler);
329
330                 LocalFileResolver resolver (xsd_dtd_directory);
331                 parser.setEntityResolver(&resolver);
332
333                 try {
334                         parser.resetDocumentPool();
335                         parse(parser, xml);
336                 } catch (XMLException& e) {
337                         throw MiscError(xml_ch_to_string(e.getMessage()));
338                 } catch (DOMException& e) {
339                         throw MiscError(xml_ch_to_string(e.getMessage()));
340                 } catch (...) {
341                         throw MiscError("Unknown exception from xerces");
342                 }
343         }
344
345         XMLPlatformUtils::Terminate ();
346
347         for (auto i: error_handler.errors()) {
348                 notes.push_back ({
349                         VerificationNote::Type::ERROR,
350                         VerificationNote::Code::INVALID_XML,
351                         i.message(),
352                         boost::trim_copy(i.public_id() + " " + i.system_id()),
353                         i.line()
354                 });
355         }
356 }
357
358
359 enum class VerifyAssetResult {
360         GOOD,
361         CPL_PKL_DIFFER,
362         BAD
363 };
364
365
366 static VerifyAssetResult
367 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
368 {
369         auto const actual_hash = reel_mxf->asset_ref()->hash(progress);
370
371         auto pkls = dcp->pkls();
372         /* We've read this DCP in so it must have at least one PKL */
373         DCP_ASSERT (!pkls.empty());
374
375         auto asset = reel_mxf->asset_ref().asset();
376
377         optional<string> pkl_hash;
378         for (auto i: pkls) {
379                 pkl_hash = i->hash (reel_mxf->asset_ref()->id());
380                 if (pkl_hash) {
381                         break;
382                 }
383         }
384
385         DCP_ASSERT (pkl_hash);
386
387         auto cpl_hash = reel_mxf->hash();
388         if (cpl_hash && *cpl_hash != *pkl_hash) {
389                 return VerifyAssetResult::CPL_PKL_DIFFER;
390         }
391
392         if (actual_hash != *pkl_hash) {
393                 return VerifyAssetResult::BAD;
394         }
395
396         return VerifyAssetResult::GOOD;
397 }
398
399
400 void
401 verify_language_tag (string tag, vector<VerificationNote>& notes)
402 {
403         try {
404                 LanguageTag test (tag);
405         } catch (LanguageTagError &) {
406                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, tag});
407         }
408 }
409
410
411 enum class VerifyPictureAssetResult
412 {
413         GOOD,
414         FRAME_NEARLY_TOO_LARGE,
415         BAD,
416 };
417
418
419 int
420 biggest_frame_size (shared_ptr<const MonoPictureFrame> frame)
421 {
422         return frame->size ();
423 }
424
425 int
426 biggest_frame_size (shared_ptr<const StereoPictureFrame> frame)
427 {
428         return max(frame->left()->size(), frame->right()->size());
429 }
430
431
432 template <class A, class R, class F>
433 optional<VerifyPictureAssetResult>
434 verify_picture_asset_type (shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
435 {
436         auto asset = dynamic_pointer_cast<A>(reel_mxf->asset_ref().asset());
437         if (!asset) {
438                 return optional<VerifyPictureAssetResult>();
439         }
440
441         int biggest_frame = 0;
442         auto reader = asset->start_read ();
443         auto const duration = asset->intrinsic_duration ();
444         for (int64_t i = 0; i < duration; ++i) {
445                 shared_ptr<const F> frame = reader->get_frame (i);
446                 biggest_frame = max(biggest_frame, biggest_frame_size(frame));
447                 progress (float(i) / duration);
448         }
449
450         static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
451         static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
452         if (biggest_frame > max_frame) {
453                 return VerifyPictureAssetResult::BAD;
454         } else if (biggest_frame > risky_frame) {
455                 return VerifyPictureAssetResult::FRAME_NEARLY_TOO_LARGE;
456         }
457
458         return VerifyPictureAssetResult::GOOD;
459 }
460
461
462 static VerifyPictureAssetResult
463 verify_picture_asset (shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
464 {
465         auto r = verify_picture_asset_type<MonoPictureAsset, MonoPictureAssetReader, MonoPictureFrame>(reel_mxf, progress);
466         if (!r) {
467                 r = verify_picture_asset_type<StereoPictureAsset, StereoPictureAssetReader, StereoPictureFrame>(reel_mxf, progress);
468         }
469
470         DCP_ASSERT (r);
471         return *r;
472 }
473
474
475 static void
476 verify_main_picture_asset (
477         shared_ptr<const DCP> dcp,
478         shared_ptr<const ReelPictureAsset> reel_asset,
479         function<void (string, optional<boost::filesystem::path>)> stage,
480         function<void (float)> progress,
481         vector<VerificationNote>& notes
482         )
483 {
484         auto asset = reel_asset->asset();
485         auto const file = *asset->file();
486         stage ("Checking picture asset hash", file);
487         auto const r = verify_asset (dcp, reel_asset, progress);
488         switch (r) {
489                 case VerifyAssetResult::BAD:
490                         notes.push_back ({
491                                 VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_PICTURE_HASH, file
492                         });
493                         break;
494                 case VerifyAssetResult::CPL_PKL_DIFFER:
495                         notes.push_back ({
496                                 VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_PICTURE_HASHES, file
497                         });
498                         break;
499                 default:
500                         break;
501         }
502         stage ("Checking picture frame sizes", asset->file());
503         auto const pr = verify_picture_asset (reel_asset, progress);
504         switch (pr) {
505                 case VerifyPictureAssetResult::BAD:
506                         notes.push_back ({
507                                 VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
508                         });
509                         break;
510                 case VerifyPictureAssetResult::FRAME_NEARLY_TOO_LARGE:
511                         notes.push_back ({
512                                 VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
513                         });
514                         break;
515                 default:
516                         break;
517         }
518
519         /* Only flat/scope allowed by Bv2.1 */
520         if (
521                 asset->size() != Size(2048, 858) &&
522                 asset->size() != Size(1998, 1080) &&
523                 asset->size() != Size(4096, 1716) &&
524                 asset->size() != Size(3996, 2160)) {
525                 notes.push_back({
526                         VerificationNote::Type::BV21_ERROR,
527                         VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS,
528                         String::compose("%1x%2", asset->size().width, asset->size().height),
529                         file
530                 });
531         }
532
533         /* Only 24, 25, 48fps allowed for 2K */
534         if (
535                 (asset->size() == Size(2048, 858) || asset->size() == Size(1998, 1080)) &&
536                 (asset->edit_rate() != Fraction(24, 1) && asset->edit_rate() != Fraction(25, 1) && asset->edit_rate() != Fraction(48, 1))
537            ) {
538                 notes.push_back({
539                         VerificationNote::Type::BV21_ERROR,
540                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K,
541                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
542                         file
543                 });
544         }
545
546         if (asset->size() == Size(4096, 1716) || asset->size() == Size(3996, 2160)) {
547                 /* Only 24fps allowed for 4K */
548                 if (asset->edit_rate() != Fraction(24, 1)) {
549                         notes.push_back({
550                                 VerificationNote::Type::BV21_ERROR,
551                                 VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K,
552                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
553                                 file
554                         });
555                 }
556
557                 /* Only 2D allowed for 4K */
558                 if (dynamic_pointer_cast<const StereoPictureAsset>(asset)) {
559                         notes.push_back({
560                                 VerificationNote::Type::BV21_ERROR,
561                                 VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D,
562                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
563                                 file
564                         });
565
566                 }
567         }
568
569 }
570
571
572 static void
573 verify_main_sound_asset (
574         shared_ptr<const DCP> dcp,
575         shared_ptr<const ReelSoundAsset> reel_asset,
576         function<void (string, optional<boost::filesystem::path>)> stage,
577         function<void (float)> progress,
578         vector<VerificationNote>& notes
579         )
580 {
581         auto asset = reel_asset->asset();
582         stage ("Checking sound asset hash", asset->file());
583         auto const r = verify_asset (dcp, reel_asset, progress);
584         switch (r) {
585                 case VerifyAssetResult::BAD:
586                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_SOUND_HASH, *asset->file()});
587                         break;
588                 case VerifyAssetResult::CPL_PKL_DIFFER:
589                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_SOUND_HASHES, *asset->file()});
590                         break;
591                 default:
592                         break;
593         }
594
595         stage ("Checking sound asset metadata", asset->file());
596
597         verify_language_tag (asset->language(), notes);
598         if (asset->sampling_rate() != 48000) {
599                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SOUND_FRAME_RATE, raw_convert<string>(asset->sampling_rate()), *asset->file()});
600         }
601 }
602
603
604 static void
605 verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, vector<VerificationNote>& notes)
606 {
607         /* XXX: is Language compulsory? */
608         if (reel_asset->language()) {
609                 verify_language_tag (*reel_asset->language(), notes);
610         }
611
612         if (!reel_asset->entry_point()) {
613                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT, reel_asset->id() });
614         } else if (reel_asset->entry_point().get()) {
615                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT, reel_asset->id() });
616         }
617 }
618
619
620 static void
621 verify_closed_caption_reel (shared_ptr<const ReelClosedCaptionAsset> reel_asset, vector<VerificationNote>& notes)
622 {
623         /* XXX: is Language compulsory? */
624         if (reel_asset->language()) {
625                 verify_language_tag (*reel_asset->language(), notes);
626         }
627
628         if (!reel_asset->entry_point()) {
629                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
630         } else if (reel_asset->entry_point().get()) {
631                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
632         }
633 }
634
635
636 struct State
637 {
638         boost::optional<string> subtitle_language;
639 };
640
641
642
643 void
644 verify_smpte_subtitle_asset (
645         shared_ptr<const SMPTESubtitleAsset> asset,
646         vector<VerificationNote>& notes,
647         State& state
648         )
649 {
650         if (asset->language()) {
651                 auto const language = *asset->language();
652                 verify_language_tag (language, notes);
653                 if (!state.subtitle_language) {
654                         state.subtitle_language = language;
655                 } else if (state.subtitle_language != language) {
656                         notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES });
657                 }
658         } else {
659                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE, *asset->file() });
660         }
661         auto const size = boost::filesystem::file_size(asset->file().get());
662         if (size > 115 * 1024 * 1024) {
663                 notes.push_back (
664                         { VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES, raw_convert<string>(size), *asset->file() }
665                         );
666         }
667         /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB"
668          * but I'm hoping that checking for the total size of all fonts being <= 10MB will do.
669          */
670         auto fonts = asset->font_data ();
671         int total_size = 0;
672         for (auto i: fonts) {
673                 total_size += i.second.size();
674         }
675         if (total_size > 10 * 1024 * 1024) {
676                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES, raw_convert<string>(total_size), asset->file().get() });
677         }
678
679         if (!asset->start_time()) {
680                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_START_TIME, asset->file().get() });
681         } else if (asset->start_time() != Time()) {
682                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SUBTITLE_START_TIME, asset->file().get() });
683         }
684 }
685
686
687 static void
688 verify_subtitle_asset (
689         shared_ptr<const SubtitleAsset> asset,
690         function<void (string, optional<boost::filesystem::path>)> stage,
691         boost::filesystem::path xsd_dtd_directory,
692         vector<VerificationNote>& notes,
693         State& state
694         )
695 {
696         stage ("Checking subtitle XML", asset->file());
697         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
698          * gets passed through libdcp which may clean up and therefore hide errors.
699          */
700         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
701
702         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
703         if (smpte) {
704                 verify_smpte_subtitle_asset (smpte, notes, state);
705         }
706 }
707
708
709 static void
710 verify_closed_caption_asset (
711         shared_ptr<const SubtitleAsset> asset,
712         function<void (string, optional<boost::filesystem::path>)> stage,
713         boost::filesystem::path xsd_dtd_directory,
714         vector<VerificationNote>& notes,
715         State& state
716         )
717 {
718         verify_subtitle_asset (asset, stage, xsd_dtd_directory, notes, state);
719
720         if (asset->raw_xml().size() > 256 * 1024) {
721                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES, raw_convert<string>(asset->raw_xml().size()), *asset->file()});
722         }
723 }
724
725
726 static
727 void
728 verify_text_timing (
729         vector<shared_ptr<Reel>> reels,
730         optional<int> picture_frame_rate,
731         vector<VerificationNote>& notes,
732         std::function<bool (shared_ptr<Reel>)> check,
733         std::function<string (shared_ptr<Reel>)> xml,
734         std::function<int64_t (shared_ptr<Reel>)> duration
735         )
736 {
737         /* end of last subtitle (in editable units) */
738         optional<int64_t> last_out;
739         auto too_short = false;
740         auto too_close = false;
741         auto too_early = false;
742         /* current reel start time (in editable units) */
743         int64_t reel_offset = 0;
744
745         std::function<void (cxml::ConstNodePtr, int, int, bool)> parse;
746         parse = [&parse, &last_out, &too_short, &too_close, &too_early, &reel_offset](cxml::ConstNodePtr node, int tcr, int pfr, bool first_reel) {
747                 if (node->name() == "Subtitle") {
748                         Time in (node->string_attribute("TimeIn"), tcr);
749                         Time out (node->string_attribute("TimeOut"), tcr);
750                         if (first_reel && in < Time(0, 0, 4, 0, tcr)) {
751                                 too_early = true;
752                         }
753                         auto length = out - in;
754                         if (length.as_editable_units(pfr) < 15) {
755                                 too_short = true;
756                         }
757                         if (last_out) {
758                                 /* XXX: this feels dubious - is it really what Bv2.1 means? */
759                                 auto distance = reel_offset + in.as_editable_units(pfr) - *last_out;
760                                 if (distance >= 0 && distance < 2) {
761                                         too_close = true;
762                                 }
763                         }
764                         last_out = reel_offset + out.as_editable_units(pfr);
765                 } else {
766                         for (auto i: node->node_children()) {
767                                 parse(i, tcr, pfr, first_reel);
768                         }
769                 }
770         };
771
772         for (auto i = 0U; i < reels.size(); ++i) {
773                 if (!check(reels[i])) {
774                         continue;
775                 }
776
777                 /* We need to look at <Subtitle> instances in the XML being checked, so we can't use the subtitles
778                  * read in by libdcp's parser.
779                  */
780
781                 auto doc = make_shared<cxml::Document>("SubtitleReel");
782                 doc->read_string (xml(reels[i]));
783                 auto const tcr = doc->number_child<int>("TimeCodeRate");
784                 parse (doc, tcr, picture_frame_rate.get_value_or(24), i == 0);
785                 reel_offset += duration(reels[i]);
786         }
787
788         if (too_early) {
789                 notes.push_back({
790                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME
791                 });
792         }
793
794         if (too_short) {
795                 notes.push_back ({
796                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_DURATION
797                 });
798         }
799
800         if (too_close) {
801                 notes.push_back ({
802                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_SPACING
803                 });
804         }
805 }
806
807
808 struct LinesCharactersResult
809 {
810         bool warning_length_exceeded = false;
811         bool error_length_exceeded = false;
812         bool line_count_exceeded = false;
813 };
814
815
816 static
817 void
818 verify_text_lines_and_characters (
819         shared_ptr<SubtitleAsset> asset,
820         int warning_length,
821         int error_length,
822         LinesCharactersResult* result
823         )
824 {
825         class Event
826         {
827         public:
828                 Event (Time time_, float position_, int characters_)
829                         : time (time_)
830                         , position (position_)
831                         , characters (characters_)
832                 {}
833
834                 Event (Time time_, shared_ptr<Event> start_)
835                         : time (time_)
836                         , start (start_)
837                 {}
838
839                 Time time;
840                 int position; //< position from 0 at top of screen to 100 at bottom
841                 int characters;
842                 shared_ptr<Event> start;
843         };
844
845         vector<shared_ptr<Event>> events;
846
847         auto position = [](shared_ptr<const SubtitleString> sub) {
848                 switch (sub->v_align()) {
849                 case VAlign::TOP:
850                         return lrintf(sub->v_position() * 100);
851                 case VAlign::CENTER:
852                         return lrintf((0.5f + sub->v_position()) * 100);
853                 case VAlign::BOTTOM:
854                         return lrintf((1.0f - sub->v_position()) * 100);
855                 }
856
857                 return 0L;
858         };
859
860         for (auto j: asset->subtitles()) {
861                 auto text = dynamic_pointer_cast<const SubtitleString>(j);
862                 if (text) {
863                         auto in = make_shared<Event>(text->in(), position(text), text->text().length());
864                         events.push_back(in);
865                         events.push_back(make_shared<Event>(text->out(), in));
866                 }
867         }
868
869         std::sort(events.begin(), events.end(), [](shared_ptr<Event> const& a, shared_ptr<Event>const& b) {
870                 return a->time < b->time;
871         });
872
873         map<int, int> current;
874         for (auto i: events) {
875                 if (current.size() > 3) {
876                         result->line_count_exceeded = true;
877                 }
878                 for (auto j: current) {
879                         if (j.second >= warning_length) {
880                                 result->warning_length_exceeded = true;
881                         }
882                         if (j.second >= error_length) {
883                                 result->error_length_exceeded = true;
884                         }
885                 }
886
887                 if (i->start) {
888                         /* end of a subtitle */
889                         DCP_ASSERT (current.find(i->start->position) != current.end());
890                         if (current[i->start->position] == i->start->characters) {
891                                 current.erase(i->start->position);
892                         } else {
893                                 current[i->start->position] -= i->start->characters;
894                         }
895                 } else {
896                         /* start of a subtitle */
897                         if (current.find(i->position) == current.end()) {
898                                 current[i->position] = i->characters;
899                         } else {
900                                 current[i->position] += i->characters;
901                         }
902                 }
903         }
904 }
905
906
907 static
908 void
909 verify_text_timing (vector<shared_ptr<Reel>> reels, vector<VerificationNote>& notes)
910 {
911         if (reels.empty()) {
912                 return;
913         }
914
915         optional<int> picture_frame_rate;
916         if (reels[0]->main_picture()) {
917                 picture_frame_rate = reels[0]->main_picture()->frame_rate().numerator;
918         }
919
920         if (reels[0]->main_subtitle()) {
921                 verify_text_timing (reels, picture_frame_rate, notes,
922                         [](shared_ptr<Reel> reel) {
923                                 return static_cast<bool>(reel->main_subtitle());
924                         },
925                         [](shared_ptr<Reel> reel) {
926                                 return reel->main_subtitle()->asset()->raw_xml();
927                         },
928                         [](shared_ptr<Reel> reel) {
929                                 return reel->main_subtitle()->actual_duration();
930                         }
931                 );
932         }
933
934         for (auto i = 0U; i < reels[0]->closed_captions().size(); ++i) {
935                 verify_text_timing (reels, picture_frame_rate, notes,
936                         [i](shared_ptr<Reel> reel) {
937                                 return i < reel->closed_captions().size();
938                         },
939                         [i](shared_ptr<Reel> reel) {
940                                 return reel->closed_captions()[i]->asset()->raw_xml();
941                         },
942                         [i](shared_ptr<Reel> reel) {
943                                 return reel->closed_captions()[i]->actual_duration();
944                         }
945                 );
946         }
947 }
948
949
950 void
951 verify_extension_metadata (shared_ptr<CPL> cpl, vector<VerificationNote>& notes)
952 {
953         DCP_ASSERT (cpl->file());
954         cxml::Document doc ("CompositionPlaylist");
955         doc.read_file (cpl->file().get());
956
957         auto missing = false;
958         string malformed;
959
960         if (auto reel_list = doc.node_child("ReelList")) {
961                 auto reels = reel_list->node_children("Reel");
962                 if (!reels.empty()) {
963                         if (auto asset_list = reels[0]->optional_node_child("AssetList")) {
964                                 if (auto metadata = asset_list->optional_node_child("CompositionMetadataAsset")) {
965                                         if (auto extension_list = metadata->optional_node_child("ExtensionMetadataList")) {
966                                                 missing = true;
967                                                 for (auto extension: extension_list->node_children("ExtensionMetadata")) {
968                                                         if (extension->optional_string_attribute("scope").get_value_or("") != "http://isdcf.com/ns/cplmd/app") {
969                                                                 continue;
970                                                         }
971                                                         missing = false;
972                                                         if (auto name = extension->optional_node_child("Name")) {
973                                                                 if (name->content() != "Application") {
974                                                                         malformed = "<Name> should be 'Application'";
975                                                                 }
976                                                         }
977                                                         if (auto property_list = extension->optional_node_child("PropertyList")) {
978                                                                 if (auto property = property_list->optional_node_child("Property")) {
979                                                                         if (auto name = property->optional_node_child("Name")) {
980                                                                                 if (name->content() != "DCP Constraints Profile") {
981                                                                                         malformed = "<Name> property should be 'DCP Constraints Profile'";
982                                                                                 }
983                                                                         }
984                                                                         if (auto value = property->optional_node_child("Value")) {
985                                                                                 if (value->content() != "SMPTE-RDD-52:2020-Bv2.1") {
986                                                                                         malformed = "<Value> property should be 'SMPTE-RDD-52:2020-Bv2.1'";
987                                                                                 }
988                                                                         }
989                                                                 }
990                                                         }
991                                                 }
992                                         } else {
993                                                 missing = true;
994                                         }
995                                 }
996                         }
997                 }
998         }
999
1000         if (missing) {
1001                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_EXTENSION_METADATA, cpl->id(), cpl->file().get()});
1002         } else if (!malformed.empty()) {
1003                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_EXTENSION_METADATA, malformed, cpl->file().get()});
1004         }
1005 }
1006
1007
1008 bool
1009 pkl_has_encrypted_assets (shared_ptr<DCP> dcp, shared_ptr<PKL> pkl)
1010 {
1011         vector<string> encrypted;
1012         for (auto i: dcp->cpls()) {
1013                 for (auto j: i->reel_mxfs()) {
1014                         if (j->asset_ref().resolved()) {
1015                                 /* It's a bit surprising / broken but Interop subtitle assets are represented
1016                                  * in reels by ReelSubtitleAsset which inherits ReelMXF, so it's possible for
1017                                  * ReelMXFs to have assets which are not MXFs.
1018                                  */
1019                                 if (auto asset = dynamic_pointer_cast<MXF>(j->asset_ref().asset())) {
1020                                         if (asset->encrypted()) {
1021                                                 encrypted.push_back(j->asset_ref().id());
1022                                         }
1023                                 }
1024                         }
1025                 }
1026         }
1027
1028         for (auto i: pkl->asset_list()) {
1029                 if (find(encrypted.begin(), encrypted.end(), i->id()) != encrypted.end()) {
1030                         return true;
1031                 }
1032         }
1033
1034         return false;
1035 }
1036
1037
1038 vector<VerificationNote>
1039 dcp::verify (
1040         vector<boost::filesystem::path> directories,
1041         function<void (string, optional<boost::filesystem::path>)> stage,
1042         function<void (float)> progress,
1043         boost::filesystem::path xsd_dtd_directory
1044         )
1045 {
1046         xsd_dtd_directory = boost::filesystem::canonical (xsd_dtd_directory);
1047
1048         vector<VerificationNote> notes;
1049         State state{};
1050
1051         vector<shared_ptr<DCP>> dcps;
1052         for (auto i: directories) {
1053                 dcps.push_back (shared_ptr<DCP> (new DCP (i)));
1054         }
1055
1056         for (auto dcp: dcps) {
1057                 stage ("Checking DCP", dcp->directory());
1058                 try {
1059                         dcp->read (&notes);
1060                 } catch (ReadError& e) {
1061                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1062                 } catch (XMLError& e) {
1063                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1064                 } catch (MXFFileError& e) {
1065                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1066                 } catch (cxml::Error& e) {
1067                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1068                 }
1069
1070                 if (dcp->standard() != Standard::SMPTE) {
1071                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_STANDARD});
1072                 }
1073
1074                 for (auto cpl: dcp->cpls()) {
1075                         stage ("Checking CPL", cpl->file());
1076                         validate_xml (cpl->file().get(), xsd_dtd_directory, notes);
1077
1078                         if (cpl->any_encrypted() && !cpl->all_encrypted()) {
1079                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::PARTIALLY_ENCRYPTED});
1080                         }
1081
1082                         for (auto const& i: cpl->additional_subtitle_languages()) {
1083                                 verify_language_tag (i, notes);
1084                         }
1085
1086                         if (cpl->release_territory()) {
1087                                 if (!cpl->release_territory_scope() || cpl->release_territory_scope().get() != "http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata#scope/release-territory/UNM49") {
1088                                         auto terr = cpl->release_territory().get();
1089                                         /* Must be a valid region tag, or "001" */
1090                                         try {
1091                                                 LanguageTag::RegionSubtag test (terr);
1092                                         } catch (...) {
1093                                                 if (terr != "001") {
1094                                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, terr});
1095                                                 }
1096                                         }
1097                                 }
1098                         }
1099
1100                         if (dcp->standard() == Standard::SMPTE) {
1101                                 if (!cpl->annotation_text()) {
1102                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1103                                 } else if (cpl->annotation_text().get() != cpl->content_title_text()) {
1104                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1105                                 }
1106                         }
1107
1108                         for (auto i: dcp->pkls()) {
1109                                 /* Check that the CPL's hash corresponds to the PKL */
1110                                 optional<string> h = i->hash(cpl->id());
1111                                 if (h && make_digest(ArrayData(*cpl->file())) != *h) {
1112                                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_CPL_HASHES, cpl->id(), cpl->file().get()});
1113                                 }
1114
1115                                 /* Check that any PKL with a single CPL has its AnnotationText the same as the CPL's ContentTitleText */
1116                                 optional<string> required_annotation_text;
1117                                 for (auto j: i->asset_list()) {
1118                                         /* See if this is a CPL */
1119                                         for (auto k: dcp->cpls()) {
1120                                                 if (j->id() == k->id()) {
1121                                                         if (!required_annotation_text) {
1122                                                                 /* First CPL we have found; this is the required AnnotationText unless we find another */
1123                                                                 required_annotation_text = cpl->content_title_text();
1124                                                         } else {
1125                                                                 /* There's more than one CPL so we don't care what the PKL's AnnotationText is */
1126                                                                 required_annotation_text = boost::none;
1127                                                         }
1128                                                 }
1129                                         }
1130                                 }
1131
1132                                 if (required_annotation_text && i->annotation_text() != required_annotation_text) {
1133                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL, i->id(), i->file().get()});
1134                                 }
1135                         }
1136
1137                         /* set to true if any reel has a MainSubtitle */
1138                         auto have_main_subtitle = false;
1139                         /* set to true if any reel has no MainSubtitle */
1140                         auto have_no_main_subtitle = false;
1141                         /* fewest number of closed caption assets seen in a reel */
1142                         size_t fewest_closed_captions = SIZE_MAX;
1143                         /* most number of closed caption assets seen in a reel */
1144                         size_t most_closed_captions = 0;
1145                         map<Marker, Time> markers_seen;
1146
1147                         for (auto reel: cpl->reels()) {
1148                                 stage ("Checking reel", optional<boost::filesystem::path>());
1149
1150                                 for (auto i: reel->assets()) {
1151                                         if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1152                                                 notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_DURATION, i->id()});
1153                                         }
1154                                         if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1155                                                 notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_INTRINSIC_DURATION, i->id()});
1156                                         }
1157                                         auto mxf = dynamic_pointer_cast<ReelMXF>(i);
1158                                         if (mxf && !mxf->hash()) {
1159                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_HASH, i->id()});
1160                                         }
1161                                 }
1162
1163                                 if (dcp->standard() == Standard::SMPTE) {
1164                                         boost::optional<int64_t> duration;
1165                                         for (auto i: reel->assets()) {
1166                                                 if (!duration) {
1167                                                         duration = i->actual_duration();
1168                                                 } else if (*duration != i->actual_duration()) {
1169                                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_ASSET_DURATION});
1170                                                         break;
1171                                                 }
1172                                         }
1173                                 }
1174
1175                                 if (reel->main_picture()) {
1176                                         /* Check reel stuff */
1177                                         auto const frame_rate = reel->main_picture()->frame_rate();
1178                                         if (frame_rate.denominator != 1 ||
1179                                             (frame_rate.numerator != 24 &&
1180                                              frame_rate.numerator != 25 &&
1181                                              frame_rate.numerator != 30 &&
1182                                              frame_rate.numerator != 48 &&
1183                                              frame_rate.numerator != 50 &&
1184                                              frame_rate.numerator != 60 &&
1185                                              frame_rate.numerator != 96)) {
1186                                                 notes.push_back ({
1187                                                         VerificationNote::Type::ERROR,
1188                                                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE,
1189                                                         String::compose("%1/%2", frame_rate.numerator, frame_rate.denominator)
1190                                                 });
1191                                         }
1192                                         /* Check asset */
1193                                         if (reel->main_picture()->asset_ref().resolved()) {
1194                                                 verify_main_picture_asset (dcp, reel->main_picture(), stage, progress, notes);
1195                                         }
1196                                 }
1197
1198                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
1199                                         verify_main_sound_asset (dcp, reel->main_sound(), stage, progress, notes);
1200                                 }
1201
1202                                 if (reel->main_subtitle()) {
1203                                         verify_main_subtitle_reel (reel->main_subtitle(), notes);
1204                                         if (reel->main_subtitle()->asset_ref().resolved()) {
1205                                                 verify_subtitle_asset (reel->main_subtitle()->asset(), stage, xsd_dtd_directory, notes, state);
1206                                         }
1207                                         have_main_subtitle = true;
1208                                 } else {
1209                                         have_no_main_subtitle = true;
1210                                 }
1211
1212                                 for (auto i: reel->closed_captions()) {
1213                                         verify_closed_caption_reel (i, notes);
1214                                         if (i->asset_ref().resolved()) {
1215                                                 verify_closed_caption_asset (i->asset(), stage, xsd_dtd_directory, notes, state);
1216                                         }
1217                                 }
1218
1219                                 if (reel->main_markers()) {
1220                                         for (auto const& i: reel->main_markers()->get()) {
1221                                                 markers_seen.insert (i);
1222                                         }
1223                                 }
1224
1225                                 fewest_closed_captions = std::min (fewest_closed_captions, reel->closed_captions().size());
1226                                 most_closed_captions = std::max (most_closed_captions, reel->closed_captions().size());
1227                         }
1228
1229                         if (dcp->standard() == Standard::SMPTE) {
1230
1231                                 if (have_main_subtitle && have_no_main_subtitle) {
1232                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS});
1233                                 }
1234
1235                                 if (fewest_closed_captions != most_closed_captions) {
1236                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS});
1237                                 }
1238
1239                                 if (cpl->content_kind() == ContentKind::FEATURE) {
1240                                         if (markers_seen.find(Marker::FFEC) == markers_seen.end()) {
1241                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFEC_IN_FEATURE});
1242                                         }
1243                                         if (markers_seen.find(Marker::FFMC) == markers_seen.end()) {
1244                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFMC_IN_FEATURE});
1245                                         }
1246                                 }
1247
1248                                 auto ffoc = markers_seen.find(Marker::FFOC);
1249                                 if (ffoc == markers_seen.end()) {
1250                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_FFOC});
1251                                 } else if (ffoc->second.e != 1) {
1252                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_FFOC, raw_convert<string>(ffoc->second.e)});
1253                                 }
1254
1255                                 auto lfoc = markers_seen.find(Marker::LFOC);
1256                                 if (lfoc == markers_seen.end()) {
1257                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_LFOC});
1258                                 } else {
1259                                         auto lfoc_time = lfoc->second.as_editable_units(lfoc->second.tcr);
1260                                         if (lfoc_time != (cpl->reels().back()->duration() - 1)) {
1261                                                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_LFOC, raw_convert<string>(lfoc_time)});
1262                                         }
1263                                 }
1264
1265                                 verify_text_timing (cpl->reels(), notes);
1266
1267                                 LinesCharactersResult result;
1268                                 for (auto reel: cpl->reels()) {
1269                                         if (reel->main_subtitle() && reel->main_subtitle()->asset()) {
1270                                                 verify_text_lines_and_characters (reel->main_subtitle()->asset(), 52, 79, &result);
1271                                         }
1272                                 }
1273
1274                                 if (result.line_count_exceeded) {
1275                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT});
1276                                 }
1277                                 if (result.error_length_exceeded) {
1278                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH});
1279                                 } else if (result.warning_length_exceeded) {
1280                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH});
1281                                 }
1282
1283                                 result = LinesCharactersResult();
1284                                 for (auto reel: cpl->reels()) {
1285                                         for (auto i: reel->closed_captions()) {
1286                                                 if (i->asset()) {
1287                                                         verify_text_lines_and_characters (i->asset(), 32, 32, &result);
1288                                                 }
1289                                         }
1290                                 }
1291
1292                                 if (result.line_count_exceeded) {
1293                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT});
1294                                 }
1295                                 if (result.error_length_exceeded) {
1296                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH});
1297                                 }
1298
1299                                 if (!cpl->full_content_title_text()) {
1300                                         /* Since FullContentTitleText is assumed always to exist if there's a CompositionMetadataAsset we
1301                                          * can use it as a proxy for CompositionMetadataAsset's existence.
1302                                          */
1303                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA, cpl->id(), cpl->file().get()});
1304                                 } else if (!cpl->version_number()) {
1305                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER, cpl->id(), cpl->file().get()});
1306                                 }
1307
1308                                 verify_extension_metadata (cpl, notes);
1309
1310                                 if (cpl->any_encrypted()) {
1311                                         cxml::Document doc ("CompositionPlaylist");
1312                                         DCP_ASSERT (cpl->file());
1313                                         doc.read_file (cpl->file().get());
1314                                         if (!doc.optional_node_child("Signature")) {
1315                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT, cpl->id(), cpl->file().get()});
1316                                         }
1317                                 }
1318                         }
1319                 }
1320
1321                 for (auto pkl: dcp->pkls()) {
1322                         stage ("Checking PKL", pkl->file());
1323                         validate_xml (pkl->file().get(), xsd_dtd_directory, notes);
1324                         if (pkl_has_encrypted_assets(dcp, pkl)) {
1325                                 cxml::Document doc ("PackingList");
1326                                 doc.read_file (pkl->file().get());
1327                                 if (!doc.optional_node_child("Signature")) {
1328                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT, pkl->id(), pkl->file().get()});
1329                                 }
1330                         }
1331                 }
1332
1333                 if (dcp->asset_map_path()) {
1334                         stage ("Checking ASSETMAP", dcp->asset_map_path().get());
1335                         validate_xml (dcp->asset_map_path().get(), xsd_dtd_directory, notes);
1336                 } else {
1337                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_ASSETMAP});
1338                 }
1339         }
1340
1341         return notes;
1342 }
1343
1344 string
1345 dcp::note_to_string (VerificationNote note)
1346 {
1347         /** These strings should say what is wrong, incorporating any extra details (ID, filenames etc.).
1348          *
1349          *  e.g. "ClosedCaption asset has no <EntryPoint> tag.",
1350          *  not "ClosedCaption assets must have an <EntryPoint> tag."
1351          *
1352          *  It's OK to use XML tag names where they are clear.
1353          *  If both ID and filename are available, use only the ID.
1354          *  End messages with a full stop.
1355          *  Messages should not mention whether or not their errors are a part of Bv2.1.
1356          */
1357         switch (note.code()) {
1358         case VerificationNote::Code::FAILED_READ:
1359                 return *note.note();
1360         case VerificationNote::Code::MISMATCHED_CPL_HASHES:
1361                 return String::compose("The hash of the CPL %1 in the PKL does not agree with the CPL file.", note.note().get());
1362         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE:
1363                 return String::compose("The picture in a reel has an invalid frame rate %1.", note.note().get());
1364         case VerificationNote::Code::INCORRECT_PICTURE_HASH:
1365                 return String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
1366         case VerificationNote::Code::MISMATCHED_PICTURE_HASHES:
1367                 return String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
1368         case VerificationNote::Code::INCORRECT_SOUND_HASH:
1369                 return String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
1370         case VerificationNote::Code::MISMATCHED_SOUND_HASHES:
1371                 return String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
1372         case VerificationNote::Code::EMPTY_ASSET_PATH:
1373                 return "The asset map contains an empty asset path.";
1374         case VerificationNote::Code::MISSING_ASSET:
1375                 return String::compose("The file %1 for an asset in the asset map cannot be found.", note.file()->filename());
1376         case VerificationNote::Code::MISMATCHED_STANDARD:
1377                 return "The DCP contains both SMPTE and Interop parts.";
1378         case VerificationNote::Code::INVALID_XML:
1379                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
1380         case VerificationNote::Code::MISSING_ASSETMAP:
1381                 return "No ASSETMAP or ASSETMAP.xml was found.";
1382         case VerificationNote::Code::INVALID_INTRINSIC_DURATION:
1383                 return String::compose("The intrinsic duration of the asset %1 is less than 1 second long.", note.note().get());
1384         case VerificationNote::Code::INVALID_DURATION:
1385                 return String::compose("The duration of the asset %1 is less than 1 second long.", note.note().get());
1386         case VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1387                 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
1388         case VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1389                 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
1390         case VerificationNote::Code::EXTERNAL_ASSET:
1391                 return String::compose("The asset %1 that this DCP refers to is not included in the DCP.  It may be a VF.", note.note().get());
1392         case VerificationNote::Code::INVALID_STANDARD:
1393                 return "This DCP does not use the SMPTE standard.";
1394         case VerificationNote::Code::INVALID_LANGUAGE:
1395                 return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
1396         case VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS:
1397                 return String::compose("The size %1 of picture asset %2 is not allowed.", note.note().get(), note.file()->filename());
1398         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K:
1399                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 2K DCPs.", note.note().get(), note.file()->filename());
1400         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K:
1401                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 4K DCPs.", note.note().get(), note.file()->filename());
1402         case VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D:
1403                 return "3D 4K DCPs are not allowed.";
1404         case VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES:
1405                 return String::compose("The size %1 of the closed caption asset %2 is larger than the 256KB maximum.", note.note().get(), note.file()->filename());
1406         case VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES:
1407                 return String::compose("The size %1 of the timed text asset %2 is larger than the 115MB maximum.", note.note().get(), note.file()->filename());
1408         case VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES:
1409                 return String::compose("The size %1 of the fonts in timed text asset %2 is larger than the 10MB maximum.", note.note().get(), note.file()->filename());
1410         case VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE:
1411                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <Language> tag.", note.file()->filename());
1412         case VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES:
1413                 return "Some subtitle assets have different <Language> tags than others";
1414         case VerificationNote::Code::MISSING_SUBTITLE_START_TIME:
1415                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <StartTime> tag.", note.file()->filename());
1416         case VerificationNote::Code::INVALID_SUBTITLE_START_TIME:
1417                 return String::compose("The XML for a SMPTE subtitle asset %1 has a non-zero <StartTime> tag.", note.file()->filename());
1418         case VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME:
1419                 return "The first subtitle or closed caption is less than 4 seconds from the start of the DCP.";
1420         case VerificationNote::Code::INVALID_SUBTITLE_DURATION:
1421                 return "At least one subtitle lasts less than 15 frames.";
1422         case VerificationNote::Code::INVALID_SUBTITLE_SPACING:
1423                 return "At least one pair of subtitles is separated by less than 2 frames.";
1424         case VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT:
1425                 return "There are more than 3 subtitle lines in at least one place in the DCP.";
1426         case VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH:
1427                 return "There are more than 52 characters in at least one subtitle line.";
1428         case VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH:
1429                 return "There are more than 79 characters in at least one subtitle line.";
1430         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT:
1431                 return "There are more than 3 closed caption lines in at least one place.";
1432         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH:
1433                 return "There are more than 32 characters in at least one closed caption line.";
1434         case VerificationNote::Code::INVALID_SOUND_FRAME_RATE:
1435                 return String::compose("The sound asset %1 has a sampling rate of %2", note.file()->filename(), note.note().get());
1436         case VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT:
1437                 return String::compose("The CPL %1 has no <AnnotationText> tag.", note.note().get());
1438         case VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT:
1439                 return String::compose("The CPL %1 has an <AnnotationText> which differs from its <ContentTitleText>", note.note().get());
1440         case VerificationNote::Code::MISMATCHED_ASSET_DURATION:
1441                 return "All assets in a reel do not have the same duration.";
1442         case VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS:
1443                 return "At least one reel contains a subtitle asset, but some reel(s) do not";
1444         case VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS:
1445                 return "At least one reel has closed captions, but reels have different numbers of closed caption assets.";
1446         case VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT:
1447                 return String::compose("The subtitle asset %1 has no <EntryPoint> tag.", note.note().get());
1448         case VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT:
1449                 return String::compose("The subtitle asset %1 has an <EntryPoint> other than 0.", note.note().get());
1450         case VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT:
1451                 return String::compose("The closed caption asset %1 has no <EntryPoint> tag.", note.note().get());
1452         case VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT:
1453                 return String::compose("The closed caption asset %1 has an <EntryPoint> other than 0.", note.note().get());
1454         case VerificationNote::Code::MISSING_HASH:
1455                 return String::compose("The asset %1 has no <Hash> tag in the CPL.", note.note().get());
1456         case VerificationNote::Code::MISSING_FFEC_IN_FEATURE:
1457                 return "The DCP is marked as a Feature but there is no FFEC (first frame of end credits) marker";
1458         case VerificationNote::Code::MISSING_FFMC_IN_FEATURE:
1459                 return "The DCP is marked as a Feature but there is no FFMC (first frame of moving credits) marker";
1460         case VerificationNote::Code::MISSING_FFOC:
1461                 return "There should be a FFOC (first frame of content) marker";
1462         case VerificationNote::Code::MISSING_LFOC:
1463                 return "There should be a LFOC (last frame of content) marker";
1464         case VerificationNote::Code::INCORRECT_FFOC:
1465                 return String::compose("The FFOC marker is %1 instead of 1", note.note().get());
1466         case VerificationNote::Code::INCORRECT_LFOC:
1467                 return String::compose("The LFOC marker is %1 instead of 1 less than the duration of the last reel.", note.note().get());
1468         case VerificationNote::Code::MISSING_CPL_METADATA:
1469                 return String::compose("The CPL %1 has no <CompositionMetadataAsset> tag.", note.note().get());
1470         case VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER:
1471                 return String::compose("The CPL %1 has no <VersionNumber> in its <CompositionMetadataAsset>.", note.note().get());
1472         case VerificationNote::Code::MISSING_EXTENSION_METADATA:
1473                 return String::compose("The CPL %1 has no <ExtensionMetadata> in its <CompositionMetadataAsset>.", note.note().get());
1474         case VerificationNote::Code::INVALID_EXTENSION_METADATA:
1475                 return String::compose("The CPL %1 has a malformed <ExtensionMetadata> (%2).", note.file()->filename(), note.note().get());
1476         case VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT:
1477                 return String::compose("The CPL %1, which has encrypted content, is not signed.", note.note().get());
1478         case VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT:
1479                 return String::compose("The PKL %1, which has encrypted content, is not signed.", note.note().get());
1480         case VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL:
1481                 return String::compose("The PKL %1 has only one CPL but its <AnnotationText> does not match the CPL's <ContentTitleText>", note.note().get());
1482         case VerificationNote::Code::PARTIALLY_ENCRYPTED:
1483                 return "Some assets are encrypted but some are not";
1484         }
1485
1486         return "";
1487 }
1488
1489
1490 bool
1491 dcp::operator== (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
1492 {
1493         return a.type() == b.type() && a.code() == b.code() && a.note() == b.note() && a.file() == b.file() && a.line() == b.line();
1494 }
1495
1496 std::ostream&
1497 dcp::operator<< (std::ostream& s, dcp::VerificationNote const& note)
1498 {
1499         s << note_to_string (note);
1500         if (note.note()) {
1501                 s << " [" << note.note().get() << "]";
1502         }
1503         if (note.file()) {
1504                 s << " [" << note.file().get() << "]";
1505         }
1506         if (note.line()) {
1507                 s << " [" << note.line().get() << "]";
1508         }
1509         return s;
1510 }
1511