Fix various bugs in subtitle/ccap verification.
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2021 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34
35 /** @file  src/verify.cc
36  *  @brief dcp::verify() method and associated code
37  */
38
39
40 #include "verify.h"
41 #include "dcp.h"
42 #include "cpl.h"
43 #include "reel.h"
44 #include "reel_closed_caption_asset.h"
45 #include "reel_picture_asset.h"
46 #include "reel_sound_asset.h"
47 #include "reel_subtitle_asset.h"
48 #include "interop_subtitle_asset.h"
49 #include "mono_picture_asset.h"
50 #include "mono_picture_frame.h"
51 #include "stereo_picture_asset.h"
52 #include "stereo_picture_frame.h"
53 #include "exceptions.h"
54 #include "compose.hpp"
55 #include "raw_convert.h"
56 #include "reel_markers_asset.h"
57 #include "smpte_subtitle_asset.h"
58 #include <xercesc/util/PlatformUtils.hpp>
59 #include <xercesc/parsers/XercesDOMParser.hpp>
60 #include <xercesc/parsers/AbstractDOMParser.hpp>
61 #include <xercesc/sax/HandlerBase.hpp>
62 #include <xercesc/dom/DOMImplementation.hpp>
63 #include <xercesc/dom/DOMImplementationLS.hpp>
64 #include <xercesc/dom/DOMImplementationRegistry.hpp>
65 #include <xercesc/dom/DOMLSParser.hpp>
66 #include <xercesc/dom/DOMException.hpp>
67 #include <xercesc/dom/DOMDocument.hpp>
68 #include <xercesc/dom/DOMNodeList.hpp>
69 #include <xercesc/dom/DOMError.hpp>
70 #include <xercesc/dom/DOMLocator.hpp>
71 #include <xercesc/dom/DOMNamedNodeMap.hpp>
72 #include <xercesc/dom/DOMAttr.hpp>
73 #include <xercesc/dom/DOMErrorHandler.hpp>
74 #include <xercesc/framework/LocalFileInputSource.hpp>
75 #include <xercesc/framework/MemBufInputSource.hpp>
76 #include <boost/algorithm/string.hpp>
77 #include <map>
78 #include <vector>
79 #include <iostream>
80
81
82 using std::list;
83 using std::vector;
84 using std::string;
85 using std::cout;
86 using std::map;
87 using std::max;
88 using std::set;
89 using std::shared_ptr;
90 using std::make_shared;
91 using boost::optional;
92 using boost::function;
93 using std::dynamic_pointer_cast;
94
95
96 using namespace dcp;
97 using namespace xercesc;
98
99
100 static
101 string
102 xml_ch_to_string (XMLCh const * a)
103 {
104         char* x = XMLString::transcode(a);
105         string const o(x);
106         XMLString::release(&x);
107         return o;
108 }
109
110
111 class XMLValidationError
112 {
113 public:
114         XMLValidationError (SAXParseException const & e)
115                 : _message (xml_ch_to_string(e.getMessage()))
116                 , _line (e.getLineNumber())
117                 , _column (e.getColumnNumber())
118                 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
119                 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
120         {
121
122         }
123
124         string message () const {
125                 return _message;
126         }
127
128         uint64_t line () const {
129                 return _line;
130         }
131
132         uint64_t column () const {
133                 return _column;
134         }
135
136         string public_id () const {
137                 return _public_id;
138         }
139
140         string system_id () const {
141                 return _system_id;
142         }
143
144 private:
145         string _message;
146         uint64_t _line;
147         uint64_t _column;
148         string _public_id;
149         string _system_id;
150 };
151
152
153 class DCPErrorHandler : public ErrorHandler
154 {
155 public:
156         void warning(const SAXParseException& e)
157         {
158                 maybe_add (XMLValidationError(e));
159         }
160
161         void error(const SAXParseException& e)
162         {
163                 maybe_add (XMLValidationError(e));
164         }
165
166         void fatalError(const SAXParseException& e)
167         {
168                 maybe_add (XMLValidationError(e));
169         }
170
171         void resetErrors() {
172                 _errors.clear ();
173         }
174
175         list<XMLValidationError> errors () const {
176                 return _errors;
177         }
178
179 private:
180         void maybe_add (XMLValidationError e)
181         {
182                 /* XXX: nasty hack */
183                 if (
184                         e.message().find("schema document") != string::npos &&
185                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
186                         ) {
187                         return;
188                 }
189
190                 _errors.push_back (e);
191         }
192
193         list<XMLValidationError> _errors;
194 };
195
196
197 class StringToXMLCh
198 {
199 public:
200         StringToXMLCh (string a)
201         {
202                 _buffer = XMLString::transcode(a.c_str());
203         }
204
205         StringToXMLCh (StringToXMLCh const&) = delete;
206         StringToXMLCh& operator= (StringToXMLCh const&) = delete;
207
208         ~StringToXMLCh ()
209         {
210                 XMLString::release (&_buffer);
211         }
212
213         XMLCh const * get () const {
214                 return _buffer;
215         }
216
217 private:
218         XMLCh* _buffer;
219 };
220
221
222 class LocalFileResolver : public EntityResolver
223 {
224 public:
225         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
226                 : _xsd_dtd_directory (xsd_dtd_directory)
227         {
228                 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
229                  * found without being here.
230                  */
231                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
232                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
233                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
234                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
235                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
236                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
237                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
238                 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
239                 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
240                 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd");
241                 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
242                 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
243                 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
244         }
245
246         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
247         {
248                 if (!system_id) {
249                         return 0;
250                 }
251                 auto system_id_str = xml_ch_to_string (system_id);
252                 auto p = _xsd_dtd_directory;
253                 if (_files.find(system_id_str) == _files.end()) {
254                         p /= system_id_str;
255                 } else {
256                         p /= _files[system_id_str];
257                 }
258                 StringToXMLCh ch (p.string());
259                 return new LocalFileInputSource(ch.get());
260         }
261
262 private:
263         void add (string uri, string file)
264         {
265                 _files[uri] = file;
266         }
267
268         std::map<string, string> _files;
269         boost::filesystem::path _xsd_dtd_directory;
270 };
271
272
273 static void
274 parse (XercesDOMParser& parser, boost::filesystem::path xml)
275 {
276         parser.parse(xml.string().c_str());
277 }
278
279
280 static void
281 parse (XercesDOMParser& parser, string xml)
282 {
283         xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
284         parser.parse(buf);
285 }
286
287
288 template <class T>
289 void
290 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, vector<VerificationNote>& notes)
291 {
292         try {
293                 XMLPlatformUtils::Initialize ();
294         } catch (XMLException& e) {
295                 throw MiscError ("Failed to initialise xerces library");
296         }
297
298         DCPErrorHandler error_handler;
299
300         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
301         {
302                 XercesDOMParser parser;
303                 parser.setValidationScheme(XercesDOMParser::Val_Always);
304                 parser.setDoNamespaces(true);
305                 parser.setDoSchema(true);
306
307                 vector<string> schema;
308                 schema.push_back("xml.xsd");
309                 schema.push_back("xmldsig-core-schema.xsd");
310                 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
311                 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
312                 schema.push_back("SMPTE-429-9-2007-AM.xsd");
313                 schema.push_back("Main-Stereo-Picture-CPL.xsd");
314                 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
315                 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
316                 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
317                 schema.push_back("DCSubtitle.v1.mattsson.xsd");
318                 schema.push_back("DCDMSubtitle-2010.xsd");
319                 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
320                 schema.push_back("SMPTE-429-16.xsd");
321                 schema.push_back("Dolby-2012-AD.xsd");
322                 schema.push_back("SMPTE-429-10-2008.xsd");
323                 schema.push_back("xlink.xsd");
324                 schema.push_back("SMPTE-335-2012.xsd");
325                 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
326                 schema.push_back("isdcf-mca.xsd");
327                 schema.push_back("SMPTE-429-12-2008.xsd");
328
329                 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
330                  * Schemas that are not mentioned in this list are not read, and the things
331                  * they describe are not checked.
332                  */
333                 string locations;
334                 for (auto i: schema) {
335                         locations += String::compose("%1 %1 ", i, i);
336                 }
337
338                 parser.setExternalSchemaLocation(locations.c_str());
339                 parser.setValidationSchemaFullChecking(true);
340                 parser.setErrorHandler(&error_handler);
341
342                 LocalFileResolver resolver (xsd_dtd_directory);
343                 parser.setEntityResolver(&resolver);
344
345                 try {
346                         parser.resetDocumentPool();
347                         parse(parser, xml);
348                 } catch (XMLException& e) {
349                         throw MiscError(xml_ch_to_string(e.getMessage()));
350                 } catch (DOMException& e) {
351                         throw MiscError(xml_ch_to_string(e.getMessage()));
352                 } catch (...) {
353                         throw MiscError("Unknown exception from xerces");
354                 }
355         }
356
357         XMLPlatformUtils::Terminate ();
358
359         for (auto i: error_handler.errors()) {
360                 notes.push_back ({
361                         VerificationNote::Type::ERROR,
362                         VerificationNote::Code::INVALID_XML,
363                         i.message(),
364                         boost::trim_copy(i.public_id() + " " + i.system_id()),
365                         i.line()
366                 });
367         }
368 }
369
370
371 enum class VerifyAssetResult {
372         GOOD,
373         CPL_PKL_DIFFER,
374         BAD
375 };
376
377
378 static VerifyAssetResult
379 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
380 {
381         auto const actual_hash = reel_file_asset->asset_ref()->hash(progress);
382
383         auto pkls = dcp->pkls();
384         /* We've read this DCP in so it must have at least one PKL */
385         DCP_ASSERT (!pkls.empty());
386
387         auto asset = reel_file_asset->asset_ref().asset();
388
389         optional<string> pkl_hash;
390         for (auto i: pkls) {
391                 pkl_hash = i->hash (reel_file_asset->asset_ref()->id());
392                 if (pkl_hash) {
393                         break;
394                 }
395         }
396
397         DCP_ASSERT (pkl_hash);
398
399         auto cpl_hash = reel_file_asset->hash();
400         if (cpl_hash && *cpl_hash != *pkl_hash) {
401                 return VerifyAssetResult::CPL_PKL_DIFFER;
402         }
403
404         if (actual_hash != *pkl_hash) {
405                 return VerifyAssetResult::BAD;
406         }
407
408         return VerifyAssetResult::GOOD;
409 }
410
411
412 void
413 verify_language_tag (string tag, vector<VerificationNote>& notes)
414 {
415         try {
416                 LanguageTag test (tag);
417         } catch (LanguageTagError &) {
418                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, tag});
419         }
420 }
421
422
423 enum class VerifyPictureAssetResult
424 {
425         GOOD,
426         FRAME_NEARLY_TOO_LARGE,
427         BAD,
428 };
429
430
431 int
432 biggest_frame_size (shared_ptr<const MonoPictureFrame> frame)
433 {
434         return frame->size ();
435 }
436
437 int
438 biggest_frame_size (shared_ptr<const StereoPictureFrame> frame)
439 {
440         return max(frame->left()->size(), frame->right()->size());
441 }
442
443
444 template <class A, class R, class F>
445 optional<VerifyPictureAssetResult>
446 verify_picture_asset_type (shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
447 {
448         auto asset = dynamic_pointer_cast<A>(reel_file_asset->asset_ref().asset());
449         if (!asset) {
450                 return optional<VerifyPictureAssetResult>();
451         }
452
453         int biggest_frame = 0;
454         auto reader = asset->start_read ();
455         auto const duration = asset->intrinsic_duration ();
456         for (int64_t i = 0; i < duration; ++i) {
457                 shared_ptr<const F> frame = reader->get_frame (i);
458                 biggest_frame = max(biggest_frame, biggest_frame_size(frame));
459                 progress (float(i) / duration);
460         }
461
462         static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
463         static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
464         if (biggest_frame > max_frame) {
465                 return VerifyPictureAssetResult::BAD;
466         } else if (biggest_frame > risky_frame) {
467                 return VerifyPictureAssetResult::FRAME_NEARLY_TOO_LARGE;
468         }
469
470         return VerifyPictureAssetResult::GOOD;
471 }
472
473
474 static VerifyPictureAssetResult
475 verify_picture_asset (shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
476 {
477         auto r = verify_picture_asset_type<MonoPictureAsset, MonoPictureAssetReader, MonoPictureFrame>(reel_file_asset, progress);
478         if (!r) {
479                 r = verify_picture_asset_type<StereoPictureAsset, StereoPictureAssetReader, StereoPictureFrame>(reel_file_asset, progress);
480         }
481
482         DCP_ASSERT (r);
483         return *r;
484 }
485
486
487 static void
488 verify_main_picture_asset (
489         shared_ptr<const DCP> dcp,
490         shared_ptr<const ReelPictureAsset> reel_asset,
491         function<void (string, optional<boost::filesystem::path>)> stage,
492         function<void (float)> progress,
493         vector<VerificationNote>& notes
494         )
495 {
496         auto asset = reel_asset->asset();
497         auto const file = *asset->file();
498         stage ("Checking picture asset hash", file);
499         auto const r = verify_asset (dcp, reel_asset, progress);
500         switch (r) {
501                 case VerifyAssetResult::BAD:
502                         notes.push_back ({
503                                 VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_PICTURE_HASH, file
504                         });
505                         break;
506                 case VerifyAssetResult::CPL_PKL_DIFFER:
507                         notes.push_back ({
508                                 VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_PICTURE_HASHES, file
509                         });
510                         break;
511                 default:
512                         break;
513         }
514         stage ("Checking picture frame sizes", asset->file());
515         auto const pr = verify_picture_asset (reel_asset, progress);
516         switch (pr) {
517                 case VerifyPictureAssetResult::BAD:
518                         notes.push_back ({
519                                 VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
520                         });
521                         break;
522                 case VerifyPictureAssetResult::FRAME_NEARLY_TOO_LARGE:
523                         notes.push_back ({
524                                 VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
525                         });
526                         break;
527                 default:
528                         break;
529         }
530
531         /* Only flat/scope allowed by Bv2.1 */
532         if (
533                 asset->size() != Size(2048, 858) &&
534                 asset->size() != Size(1998, 1080) &&
535                 asset->size() != Size(4096, 1716) &&
536                 asset->size() != Size(3996, 2160)) {
537                 notes.push_back({
538                         VerificationNote::Type::BV21_ERROR,
539                         VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS,
540                         String::compose("%1x%2", asset->size().width, asset->size().height),
541                         file
542                 });
543         }
544
545         /* Only 24, 25, 48fps allowed for 2K */
546         if (
547                 (asset->size() == Size(2048, 858) || asset->size() == Size(1998, 1080)) &&
548                 (asset->edit_rate() != Fraction(24, 1) && asset->edit_rate() != Fraction(25, 1) && asset->edit_rate() != Fraction(48, 1))
549            ) {
550                 notes.push_back({
551                         VerificationNote::Type::BV21_ERROR,
552                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K,
553                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
554                         file
555                 });
556         }
557
558         if (asset->size() == Size(4096, 1716) || asset->size() == Size(3996, 2160)) {
559                 /* Only 24fps allowed for 4K */
560                 if (asset->edit_rate() != Fraction(24, 1)) {
561                         notes.push_back({
562                                 VerificationNote::Type::BV21_ERROR,
563                                 VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K,
564                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
565                                 file
566                         });
567                 }
568
569                 /* Only 2D allowed for 4K */
570                 if (dynamic_pointer_cast<const StereoPictureAsset>(asset)) {
571                         notes.push_back({
572                                 VerificationNote::Type::BV21_ERROR,
573                                 VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D,
574                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
575                                 file
576                         });
577
578                 }
579         }
580
581 }
582
583
584 static void
585 verify_main_sound_asset (
586         shared_ptr<const DCP> dcp,
587         shared_ptr<const ReelSoundAsset> reel_asset,
588         function<void (string, optional<boost::filesystem::path>)> stage,
589         function<void (float)> progress,
590         vector<VerificationNote>& notes
591         )
592 {
593         auto asset = reel_asset->asset();
594         stage ("Checking sound asset hash", asset->file());
595         auto const r = verify_asset (dcp, reel_asset, progress);
596         switch (r) {
597                 case VerifyAssetResult::BAD:
598                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_SOUND_HASH, *asset->file()});
599                         break;
600                 case VerifyAssetResult::CPL_PKL_DIFFER:
601                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_SOUND_HASHES, *asset->file()});
602                         break;
603                 default:
604                         break;
605         }
606
607         stage ("Checking sound asset metadata", asset->file());
608
609         verify_language_tag (asset->language(), notes);
610         if (asset->sampling_rate() != 48000) {
611                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SOUND_FRAME_RATE, raw_convert<string>(asset->sampling_rate()), *asset->file()});
612         }
613 }
614
615
616 static void
617 verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, vector<VerificationNote>& notes)
618 {
619         /* XXX: is Language compulsory? */
620         if (reel_asset->language()) {
621                 verify_language_tag (*reel_asset->language(), notes);
622         }
623
624         if (!reel_asset->entry_point()) {
625                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT, reel_asset->id() });
626         } else if (reel_asset->entry_point().get()) {
627                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT, reel_asset->id() });
628         }
629 }
630
631
632 static void
633 verify_closed_caption_reel (shared_ptr<const ReelClosedCaptionAsset> reel_asset, vector<VerificationNote>& notes)
634 {
635         /* XXX: is Language compulsory? */
636         if (reel_asset->language()) {
637                 verify_language_tag (*reel_asset->language(), notes);
638         }
639
640         if (!reel_asset->entry_point()) {
641                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
642         } else if (reel_asset->entry_point().get()) {
643                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
644         }
645 }
646
647
648 struct State
649 {
650         boost::optional<string> subtitle_language;
651 };
652
653
654 /** Verify stuff that is common to both subtitles and closed captions */
655 void
656 verify_smpte_timed_text_asset (
657         shared_ptr<const SMPTESubtitleAsset> asset,
658         vector<VerificationNote>& notes
659         )
660 {
661         if (asset->language()) {
662                 verify_language_tag (*asset->language(), notes);
663         } else {
664                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE, *asset->file() });
665         }
666
667         auto const size = boost::filesystem::file_size(asset->file().get());
668         if (size > 115 * 1024 * 1024) {
669                 notes.push_back (
670                         { VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES, raw_convert<string>(size), *asset->file() }
671                         );
672         }
673
674         /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB"
675          * but I'm hoping that checking for the total size of all fonts being <= 10MB will do.
676          */
677         auto fonts = asset->font_data ();
678         int total_size = 0;
679         for (auto i: fonts) {
680                 total_size += i.second.size();
681         }
682         if (total_size > 10 * 1024 * 1024) {
683                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES, raw_convert<string>(total_size), asset->file().get() });
684         }
685
686         if (!asset->start_time()) {
687                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_START_TIME, asset->file().get() });
688         } else if (asset->start_time() != Time()) {
689                 notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SUBTITLE_START_TIME, asset->file().get() });
690         }
691 }
692
693
694 /** Verify SMPTE subtitle-only stuff */
695 void
696 verify_smpte_subtitle_asset (
697         shared_ptr<const SMPTESubtitleAsset> asset,
698         vector<VerificationNote>& notes,
699         State& state
700         )
701 {
702         if (asset->language()) {
703                 if (!state.subtitle_language) {
704                         state.subtitle_language = *asset->language();
705                 } else if (state.subtitle_language != *asset->language()) {
706                         notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES });
707                 }
708         }
709 }
710
711
712 /** Verify all subtitle stuff */
713 static void
714 verify_subtitle_asset (
715         shared_ptr<const SubtitleAsset> asset,
716         function<void (string, optional<boost::filesystem::path>)> stage,
717         boost::filesystem::path xsd_dtd_directory,
718         vector<VerificationNote>& notes,
719         State& state
720         )
721 {
722         stage ("Checking subtitle XML", asset->file());
723         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
724          * gets passed through libdcp which may clean up and therefore hide errors.
725          */
726         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
727
728         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
729         if (smpte) {
730                 verify_smpte_timed_text_asset (smpte, notes);
731                 verify_smpte_subtitle_asset (smpte, notes, state);
732         }
733 }
734
735
736 /** Verify all closed caption stuff */
737 static void
738 verify_closed_caption_asset (
739         shared_ptr<const SubtitleAsset> asset,
740         function<void (string, optional<boost::filesystem::path>)> stage,
741         boost::filesystem::path xsd_dtd_directory,
742         vector<VerificationNote>& notes
743         )
744 {
745         stage ("Checking closed caption XML", asset->file());
746         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
747          * gets passed through libdcp which may clean up and therefore hide errors.
748          */
749         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
750
751         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
752         if (smpte) {
753                 verify_smpte_timed_text_asset (smpte, notes);
754         }
755
756         if (asset->raw_xml().size() > 256 * 1024) {
757                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES, raw_convert<string>(asset->raw_xml().size()), *asset->file()});
758         }
759 }
760
761
762 static
763 void
764 verify_text_timing (
765         vector<shared_ptr<Reel>> reels,
766         int edit_rate,
767         vector<VerificationNote>& notes,
768         std::function<bool (shared_ptr<Reel>)> check,
769         std::function<string (shared_ptr<Reel>)> xml,
770         std::function<int64_t (shared_ptr<Reel>)> duration
771         )
772 {
773         /* end of last subtitle (in editable units) */
774         optional<int64_t> last_out;
775         auto too_short = false;
776         auto too_close = false;
777         auto too_early = false;
778         auto reel_overlap = false;
779         /* current reel start time (in editable units) */
780         int64_t reel_offset = 0;
781
782         std::function<void (cxml::ConstNodePtr, optional<int>, optional<Time>, int, bool)> parse;
783         parse = [&parse, &last_out, &too_short, &too_close, &too_early, &reel_offset](cxml::ConstNodePtr node, optional<int> tcr, optional<Time> start_time, int er, bool first_reel) {
784                 if (node->name() == "Subtitle") {
785                         Time in (node->string_attribute("TimeIn"), tcr);
786                         if (start_time) {
787                                 in -= *start_time;
788                         }
789                         Time out (node->string_attribute("TimeOut"), tcr);
790                         if (start_time) {
791                                 out -= *start_time;
792                         }
793                         if (first_reel && tcr && in < Time(0, 0, 4, 0, *tcr)) {
794                                 too_early = true;
795                         }
796                         auto length = out - in;
797                         if (length.as_editable_units_ceil(er) < 15) {
798                                 too_short = true;
799                         }
800                         if (last_out) {
801                                 /* XXX: this feels dubious - is it really what Bv2.1 means? */
802                                 auto distance = reel_offset + in.as_editable_units_ceil(er) - *last_out;
803                                 if (distance >= 0 && distance < 2) {
804                                         too_close = true;
805                                 }
806                         }
807                         last_out = reel_offset + out.as_editable_units_floor(er);
808                 } else {
809                         for (auto i: node->node_children()) {
810                                 parse(i, tcr, start_time, er, first_reel);
811                         }
812                 }
813         };
814
815         for (auto i = 0U; i < reels.size(); ++i) {
816                 if (!check(reels[i])) {
817                         continue;
818                 }
819
820                 /* We need to look at <Subtitle> instances in the XML being checked, so we can't use the subtitles
821                  * read in by libdcp's parser.
822                  */
823
824                 shared_ptr<cxml::Document> doc;
825                 optional<int> tcr;
826                 optional<Time> start_time;
827                 try {
828                         doc = make_shared<cxml::Document>("SubtitleReel");
829                         doc->read_string (xml(reels[i]));
830                         tcr = doc->number_child<int>("TimeCodeRate");
831                         auto start_time_string = doc->optional_string_child("StartTime");
832                         if (start_time_string) {
833                                 start_time = Time(*start_time_string, tcr);
834                         }
835                 } catch (...) {
836                         doc = make_shared<cxml::Document>("DCSubtitle");
837                         doc->read_string (xml(reels[i]));
838                 }
839                 parse (doc, tcr, start_time, edit_rate, i == 0);
840                 auto end = reel_offset + duration(reels[i]);
841                 if (last_out && *last_out > end) {
842                         reel_overlap = true;
843                 }
844                 reel_offset = end;
845         }
846
847         if (last_out && *last_out > reel_offset) {
848                 reel_overlap = true;
849         }
850
851         if (too_early) {
852                 notes.push_back({
853                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME
854                 });
855         }
856
857         if (too_short) {
858                 notes.push_back ({
859                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_DURATION
860                 });
861         }
862
863         if (too_close) {
864                 notes.push_back ({
865                         VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_SPACING
866                 });
867         }
868
869         if (reel_overlap) {
870                 notes.push_back ({
871                         VerificationNote::Type::ERROR, VerificationNote::Code::SUBTITLE_OVERLAPS_REEL_BOUNDARY
872                 });
873         }
874 }
875
876
877 struct LinesCharactersResult
878 {
879         bool warning_length_exceeded = false;
880         bool error_length_exceeded = false;
881         bool line_count_exceeded = false;
882 };
883
884
885 static
886 void
887 verify_text_lines_and_characters (
888         shared_ptr<SubtitleAsset> asset,
889         int warning_length,
890         int error_length,
891         LinesCharactersResult* result
892         )
893 {
894         class Event
895         {
896         public:
897                 Event (Time time_, float position_, int characters_)
898                         : time (time_)
899                         , position (position_)
900                         , characters (characters_)
901                 {}
902
903                 Event (Time time_, shared_ptr<Event> start_)
904                         : time (time_)
905                         , start (start_)
906                 {}
907
908                 Time time;
909                 int position; //< position from 0 at top of screen to 100 at bottom
910                 int characters;
911                 shared_ptr<Event> start;
912         };
913
914         vector<shared_ptr<Event>> events;
915
916         auto position = [](shared_ptr<const SubtitleString> sub) {
917                 switch (sub->v_align()) {
918                 case VAlign::TOP:
919                         return lrintf(sub->v_position() * 100);
920                 case VAlign::CENTER:
921                         return lrintf((0.5f + sub->v_position()) * 100);
922                 case VAlign::BOTTOM:
923                         return lrintf((1.0f - sub->v_position()) * 100);
924                 }
925
926                 return 0L;
927         };
928
929         for (auto j: asset->subtitles()) {
930                 auto text = dynamic_pointer_cast<const SubtitleString>(j);
931                 if (text) {
932                         auto in = make_shared<Event>(text->in(), position(text), text->text().length());
933                         events.push_back(in);
934                         events.push_back(make_shared<Event>(text->out(), in));
935                 }
936         }
937
938         std::sort(events.begin(), events.end(), [](shared_ptr<Event> const& a, shared_ptr<Event>const& b) {
939                 return a->time < b->time;
940         });
941
942         map<int, int> current;
943         for (auto i: events) {
944                 if (current.size() > 3) {
945                         result->line_count_exceeded = true;
946                 }
947                 for (auto j: current) {
948                         if (j.second >= warning_length) {
949                                 result->warning_length_exceeded = true;
950                         }
951                         if (j.second >= error_length) {
952                                 result->error_length_exceeded = true;
953                         }
954                 }
955
956                 if (i->start) {
957                         /* end of a subtitle */
958                         DCP_ASSERT (current.find(i->start->position) != current.end());
959                         if (current[i->start->position] == i->start->characters) {
960                                 current.erase(i->start->position);
961                         } else {
962                                 current[i->start->position] -= i->start->characters;
963                         }
964                 } else {
965                         /* start of a subtitle */
966                         if (current.find(i->position) == current.end()) {
967                                 current[i->position] = i->characters;
968                         } else {
969                                 current[i->position] += i->characters;
970                         }
971                 }
972         }
973 }
974
975
976 static
977 void
978 verify_text_timing (vector<shared_ptr<Reel>> reels, vector<VerificationNote>& notes)
979 {
980         if (reels.empty()) {
981                 return;
982         }
983
984         if (reels[0]->main_subtitle()) {
985                 verify_text_timing (reels, reels[0]->main_subtitle()->edit_rate().numerator, notes,
986                         [](shared_ptr<Reel> reel) {
987                                 return static_cast<bool>(reel->main_subtitle());
988                         },
989                         [](shared_ptr<Reel> reel) {
990                                 return reel->main_subtitle()->asset()->raw_xml();
991                         },
992                         [](shared_ptr<Reel> reel) {
993                                 return reel->main_subtitle()->actual_duration();
994                         }
995                 );
996         }
997
998         for (auto i = 0U; i < reels[0]->closed_captions().size(); ++i) {
999                 verify_text_timing (reels, reels[0]->closed_captions()[i]->edit_rate().numerator, notes,
1000                         [i](shared_ptr<Reel> reel) {
1001                                 return i < reel->closed_captions().size();
1002                         },
1003                         [i](shared_ptr<Reel> reel) {
1004                                 return reel->closed_captions()[i]->asset()->raw_xml();
1005                         },
1006                         [i](shared_ptr<Reel> reel) {
1007                                 return reel->closed_captions()[i]->actual_duration();
1008                         }
1009                 );
1010         }
1011 }
1012
1013
1014 void
1015 verify_extension_metadata (shared_ptr<CPL> cpl, vector<VerificationNote>& notes)
1016 {
1017         DCP_ASSERT (cpl->file());
1018         cxml::Document doc ("CompositionPlaylist");
1019         doc.read_file (cpl->file().get());
1020
1021         auto missing = false;
1022         string malformed;
1023
1024         if (auto reel_list = doc.node_child("ReelList")) {
1025                 auto reels = reel_list->node_children("Reel");
1026                 if (!reels.empty()) {
1027                         if (auto asset_list = reels[0]->optional_node_child("AssetList")) {
1028                                 if (auto metadata = asset_list->optional_node_child("CompositionMetadataAsset")) {
1029                                         if (auto extension_list = metadata->optional_node_child("ExtensionMetadataList")) {
1030                                                 missing = true;
1031                                                 for (auto extension: extension_list->node_children("ExtensionMetadata")) {
1032                                                         if (extension->optional_string_attribute("scope").get_value_or("") != "http://isdcf.com/ns/cplmd/app") {
1033                                                                 continue;
1034                                                         }
1035                                                         missing = false;
1036                                                         if (auto name = extension->optional_node_child("Name")) {
1037                                                                 if (name->content() != "Application") {
1038                                                                         malformed = "<Name> should be 'Application'";
1039                                                                 }
1040                                                         }
1041                                                         if (auto property_list = extension->optional_node_child("PropertyList")) {
1042                                                                 if (auto property = property_list->optional_node_child("Property")) {
1043                                                                         if (auto name = property->optional_node_child("Name")) {
1044                                                                                 if (name->content() != "DCP Constraints Profile") {
1045                                                                                         malformed = "<Name> property should be 'DCP Constraints Profile'";
1046                                                                                 }
1047                                                                         }
1048                                                                         if (auto value = property->optional_node_child("Value")) {
1049                                                                                 if (value->content() != "SMPTE-RDD-52:2020-Bv2.1") {
1050                                                                                         malformed = "<Value> property should be 'SMPTE-RDD-52:2020-Bv2.1'";
1051                                                                                 }
1052                                                                         }
1053                                                                 }
1054                                                         }
1055                                                 }
1056                                         } else {
1057                                                 missing = true;
1058                                         }
1059                                 }
1060                         }
1061                 }
1062         }
1063
1064         if (missing) {
1065                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_EXTENSION_METADATA, cpl->id(), cpl->file().get()});
1066         } else if (!malformed.empty()) {
1067                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_EXTENSION_METADATA, malformed, cpl->file().get()});
1068         }
1069 }
1070
1071
1072 bool
1073 pkl_has_encrypted_assets (shared_ptr<DCP> dcp, shared_ptr<PKL> pkl)
1074 {
1075         vector<string> encrypted;
1076         for (auto i: dcp->cpls()) {
1077                 for (auto j: i->reel_file_assets()) {
1078                         if (j->asset_ref().resolved()) {
1079                                 /* It's a bit surprising / broken but Interop subtitle assets are represented
1080                                  * in reels by ReelSubtitleAsset which inherits ReelFileAsset, so it's possible for
1081                                  * ReelFileAssets to have assets which are not MXFs.
1082                                  */
1083                                 if (auto asset = dynamic_pointer_cast<MXF>(j->asset_ref().asset())) {
1084                                         if (asset->encrypted()) {
1085                                                 encrypted.push_back(j->asset_ref().id());
1086                                         }
1087                                 }
1088                         }
1089                 }
1090         }
1091
1092         for (auto i: pkl->asset_list()) {
1093                 if (find(encrypted.begin(), encrypted.end(), i->id()) != encrypted.end()) {
1094                         return true;
1095                 }
1096         }
1097
1098         return false;
1099 }
1100
1101
1102 vector<VerificationNote>
1103 dcp::verify (
1104         vector<boost::filesystem::path> directories,
1105         function<void (string, optional<boost::filesystem::path>)> stage,
1106         function<void (float)> progress,
1107         boost::filesystem::path xsd_dtd_directory
1108         )
1109 {
1110         xsd_dtd_directory = boost::filesystem::canonical (xsd_dtd_directory);
1111
1112         vector<VerificationNote> notes;
1113         State state{};
1114
1115         vector<shared_ptr<DCP>> dcps;
1116         for (auto i: directories) {
1117                 dcps.push_back (shared_ptr<DCP> (new DCP (i)));
1118         }
1119
1120         for (auto dcp: dcps) {
1121                 stage ("Checking DCP", dcp->directory());
1122                 try {
1123                         dcp->read (&notes);
1124                 } catch (ReadError& e) {
1125                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1126                 } catch (XMLError& e) {
1127                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1128                 } catch (MXFFileError& e) {
1129                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1130                 } catch (cxml::Error& e) {
1131                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
1132                 }
1133
1134                 if (dcp->standard() != Standard::SMPTE) {
1135                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_STANDARD});
1136                 }
1137
1138                 for (auto cpl: dcp->cpls()) {
1139                         stage ("Checking CPL", cpl->file());
1140                         validate_xml (cpl->file().get(), xsd_dtd_directory, notes);
1141
1142                         if (cpl->any_encrypted() && !cpl->all_encrypted()) {
1143                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::PARTIALLY_ENCRYPTED});
1144                         }
1145
1146                         for (auto const& i: cpl->additional_subtitle_languages()) {
1147                                 verify_language_tag (i, notes);
1148                         }
1149
1150                         if (cpl->release_territory()) {
1151                                 if (!cpl->release_territory_scope() || cpl->release_territory_scope().get() != "http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata#scope/release-territory/UNM49") {
1152                                         auto terr = cpl->release_territory().get();
1153                                         /* Must be a valid region tag, or "001" */
1154                                         try {
1155                                                 LanguageTag::RegionSubtag test (terr);
1156                                         } catch (...) {
1157                                                 if (terr != "001") {
1158                                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, terr});
1159                                                 }
1160                                         }
1161                                 }
1162                         }
1163
1164                         if (dcp->standard() == Standard::SMPTE) {
1165                                 if (!cpl->annotation_text()) {
1166                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1167                                 } else if (cpl->annotation_text().get() != cpl->content_title_text()) {
1168                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1169                                 }
1170                         }
1171
1172                         for (auto i: dcp->pkls()) {
1173                                 /* Check that the CPL's hash corresponds to the PKL */
1174                                 optional<string> h = i->hash(cpl->id());
1175                                 if (h && make_digest(ArrayData(*cpl->file())) != *h) {
1176                                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_CPL_HASHES, cpl->id(), cpl->file().get()});
1177                                 }
1178
1179                                 /* Check that any PKL with a single CPL has its AnnotationText the same as the CPL's ContentTitleText */
1180                                 optional<string> required_annotation_text;
1181                                 for (auto j: i->asset_list()) {
1182                                         /* See if this is a CPL */
1183                                         for (auto k: dcp->cpls()) {
1184                                                 if (j->id() == k->id()) {
1185                                                         if (!required_annotation_text) {
1186                                                                 /* First CPL we have found; this is the required AnnotationText unless we find another */
1187                                                                 required_annotation_text = cpl->content_title_text();
1188                                                         } else {
1189                                                                 /* There's more than one CPL so we don't care what the PKL's AnnotationText is */
1190                                                                 required_annotation_text = boost::none;
1191                                                         }
1192                                                 }
1193                                         }
1194                                 }
1195
1196                                 if (required_annotation_text && i->annotation_text() != required_annotation_text) {
1197                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL, i->id(), i->file().get()});
1198                                 }
1199                         }
1200
1201                         /* set to true if any reel has a MainSubtitle */
1202                         auto have_main_subtitle = false;
1203                         /* set to true if any reel has no MainSubtitle */
1204                         auto have_no_main_subtitle = false;
1205                         /* fewest number of closed caption assets seen in a reel */
1206                         size_t fewest_closed_captions = SIZE_MAX;
1207                         /* most number of closed caption assets seen in a reel */
1208                         size_t most_closed_captions = 0;
1209                         map<Marker, Time> markers_seen;
1210
1211                         for (auto reel: cpl->reels()) {
1212                                 stage ("Checking reel", optional<boost::filesystem::path>());
1213
1214                                 for (auto i: reel->assets()) {
1215                                         if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1216                                                 notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_DURATION, i->id()});
1217                                         }
1218                                         if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1219                                                 notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_INTRINSIC_DURATION, i->id()});
1220                                         }
1221                                         auto file_asset = dynamic_pointer_cast<ReelFileAsset>(i);
1222                                         if (file_asset && !file_asset->hash()) {
1223                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_HASH, i->id()});
1224                                         }
1225                                 }
1226
1227                                 if (dcp->standard() == Standard::SMPTE) {
1228                                         boost::optional<int64_t> duration;
1229                                         for (auto i: reel->assets()) {
1230                                                 if (!duration) {
1231                                                         duration = i->actual_duration();
1232                                                 } else if (*duration != i->actual_duration()) {
1233                                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_ASSET_DURATION});
1234                                                         break;
1235                                                 }
1236                                         }
1237                                 }
1238
1239                                 if (reel->main_picture()) {
1240                                         /* Check reel stuff */
1241                                         auto const frame_rate = reel->main_picture()->frame_rate();
1242                                         if (frame_rate.denominator != 1 ||
1243                                             (frame_rate.numerator != 24 &&
1244                                              frame_rate.numerator != 25 &&
1245                                              frame_rate.numerator != 30 &&
1246                                              frame_rate.numerator != 48 &&
1247                                              frame_rate.numerator != 50 &&
1248                                              frame_rate.numerator != 60 &&
1249                                              frame_rate.numerator != 96)) {
1250                                                 notes.push_back ({
1251                                                         VerificationNote::Type::ERROR,
1252                                                         VerificationNote::Code::INVALID_PICTURE_FRAME_RATE,
1253                                                         String::compose("%1/%2", frame_rate.numerator, frame_rate.denominator)
1254                                                 });
1255                                         }
1256                                         /* Check asset */
1257                                         if (reel->main_picture()->asset_ref().resolved()) {
1258                                                 verify_main_picture_asset (dcp, reel->main_picture(), stage, progress, notes);
1259                                         }
1260                                 }
1261
1262                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
1263                                         verify_main_sound_asset (dcp, reel->main_sound(), stage, progress, notes);
1264                                 }
1265
1266                                 if (reel->main_subtitle()) {
1267                                         verify_main_subtitle_reel (reel->main_subtitle(), notes);
1268                                         if (reel->main_subtitle()->asset_ref().resolved()) {
1269                                                 verify_subtitle_asset (reel->main_subtitle()->asset(), stage, xsd_dtd_directory, notes, state);
1270                                         }
1271                                         have_main_subtitle = true;
1272                                 } else {
1273                                         have_no_main_subtitle = true;
1274                                 }
1275
1276                                 for (auto i: reel->closed_captions()) {
1277                                         verify_closed_caption_reel (i, notes);
1278                                         if (i->asset_ref().resolved()) {
1279                                                 verify_closed_caption_asset (i->asset(), stage, xsd_dtd_directory, notes);
1280                                         }
1281                                 }
1282
1283                                 if (reel->main_markers()) {
1284                                         for (auto const& i: reel->main_markers()->get()) {
1285                                                 markers_seen.insert (i);
1286                                         }
1287                                 }
1288
1289                                 fewest_closed_captions = std::min (fewest_closed_captions, reel->closed_captions().size());
1290                                 most_closed_captions = std::max (most_closed_captions, reel->closed_captions().size());
1291                         }
1292
1293                         verify_text_timing (cpl->reels(), notes);
1294
1295                         if (dcp->standard() == Standard::SMPTE) {
1296
1297                                 if (have_main_subtitle && have_no_main_subtitle) {
1298                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS});
1299                                 }
1300
1301                                 if (fewest_closed_captions != most_closed_captions) {
1302                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS});
1303                                 }
1304
1305                                 if (cpl->content_kind() == ContentKind::FEATURE) {
1306                                         if (markers_seen.find(Marker::FFEC) == markers_seen.end()) {
1307                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFEC_IN_FEATURE});
1308                                         }
1309                                         if (markers_seen.find(Marker::FFMC) == markers_seen.end()) {
1310                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_FFMC_IN_FEATURE});
1311                                         }
1312                                 }
1313
1314                                 auto ffoc = markers_seen.find(Marker::FFOC);
1315                                 if (ffoc == markers_seen.end()) {
1316                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_FFOC});
1317                                 } else if (ffoc->second.e != 1) {
1318                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_FFOC, raw_convert<string>(ffoc->second.e)});
1319                                 }
1320
1321                                 auto lfoc = markers_seen.find(Marker::LFOC);
1322                                 if (lfoc == markers_seen.end()) {
1323                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSING_LFOC});
1324                                 } else {
1325                                         auto lfoc_time = lfoc->second.as_editable_units_ceil(lfoc->second.tcr);
1326                                         if (lfoc_time != (cpl->reels().back()->duration() - 1)) {
1327                                                 notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_LFOC, raw_convert<string>(lfoc_time)});
1328                                         }
1329                                 }
1330
1331                                 LinesCharactersResult result;
1332                                 for (auto reel: cpl->reels()) {
1333                                         if (reel->main_subtitle() && reel->main_subtitle()->asset()) {
1334                                                 verify_text_lines_and_characters (reel->main_subtitle()->asset(), 52, 79, &result);
1335                                         }
1336                                 }
1337
1338                                 if (result.line_count_exceeded) {
1339                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT});
1340                                 }
1341                                 if (result.error_length_exceeded) {
1342                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH});
1343                                 } else if (result.warning_length_exceeded) {
1344                                         notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH});
1345                                 }
1346
1347                                 result = LinesCharactersResult();
1348                                 for (auto reel: cpl->reels()) {
1349                                         for (auto i: reel->closed_captions()) {
1350                                                 if (i->asset()) {
1351                                                         verify_text_lines_and_characters (i->asset(), 32, 32, &result);
1352                                                 }
1353                                         }
1354                                 }
1355
1356                                 if (result.line_count_exceeded) {
1357                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT});
1358                                 }
1359                                 if (result.error_length_exceeded) {
1360                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH});
1361                                 }
1362
1363                                 if (!cpl->full_content_title_text()) {
1364                                         /* Since FullContentTitleText is assumed always to exist if there's a CompositionMetadataAsset we
1365                                          * can use it as a proxy for CompositionMetadataAsset's existence.
1366                                          */
1367                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA, cpl->id(), cpl->file().get()});
1368                                 } else if (!cpl->version_number()) {
1369                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER, cpl->id(), cpl->file().get()});
1370                                 }
1371
1372                                 verify_extension_metadata (cpl, notes);
1373
1374                                 if (cpl->any_encrypted()) {
1375                                         cxml::Document doc ("CompositionPlaylist");
1376                                         DCP_ASSERT (cpl->file());
1377                                         doc.read_file (cpl->file().get());
1378                                         if (!doc.optional_node_child("Signature")) {
1379                                                 notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT, cpl->id(), cpl->file().get()});
1380                                         }
1381                                 }
1382                         }
1383                 }
1384
1385                 for (auto pkl: dcp->pkls()) {
1386                         stage ("Checking PKL", pkl->file());
1387                         validate_xml (pkl->file().get(), xsd_dtd_directory, notes);
1388                         if (pkl_has_encrypted_assets(dcp, pkl)) {
1389                                 cxml::Document doc ("PackingList");
1390                                 doc.read_file (pkl->file().get());
1391                                 if (!doc.optional_node_child("Signature")) {
1392                                         notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT, pkl->id(), pkl->file().get()});
1393                                 }
1394                         }
1395                 }
1396
1397                 if (dcp->asset_map_path()) {
1398                         stage ("Checking ASSETMAP", dcp->asset_map_path().get());
1399                         validate_xml (dcp->asset_map_path().get(), xsd_dtd_directory, notes);
1400                 } else {
1401                         notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_ASSETMAP});
1402                 }
1403         }
1404
1405         return notes;
1406 }
1407
1408
1409 string
1410 dcp::note_to_string (VerificationNote note)
1411 {
1412         /** These strings should say what is wrong, incorporating any extra details (ID, filenames etc.).
1413          *
1414          *  e.g. "ClosedCaption asset has no <EntryPoint> tag.",
1415          *  not "ClosedCaption assets must have an <EntryPoint> tag."
1416          *
1417          *  It's OK to use XML tag names where they are clear.
1418          *  If both ID and filename are available, use only the ID.
1419          *  End messages with a full stop.
1420          *  Messages should not mention whether or not their errors are a part of Bv2.1.
1421          */
1422         switch (note.code()) {
1423         case VerificationNote::Code::FAILED_READ:
1424                 return *note.note();
1425         case VerificationNote::Code::MISMATCHED_CPL_HASHES:
1426                 return String::compose("The hash of the CPL %1 in the PKL does not agree with the CPL file.", note.note().get());
1427         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE:
1428                 return String::compose("The picture in a reel has an invalid frame rate %1.", note.note().get());
1429         case VerificationNote::Code::INCORRECT_PICTURE_HASH:
1430                 return String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
1431         case VerificationNote::Code::MISMATCHED_PICTURE_HASHES:
1432                 return String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
1433         case VerificationNote::Code::INCORRECT_SOUND_HASH:
1434                 return String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
1435         case VerificationNote::Code::MISMATCHED_SOUND_HASHES:
1436                 return String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
1437         case VerificationNote::Code::EMPTY_ASSET_PATH:
1438                 return "The asset map contains an empty asset path.";
1439         case VerificationNote::Code::MISSING_ASSET:
1440                 return String::compose("The file %1 for an asset in the asset map cannot be found.", note.file()->filename());
1441         case VerificationNote::Code::MISMATCHED_STANDARD:
1442                 return "The DCP contains both SMPTE and Interop parts.";
1443         case VerificationNote::Code::INVALID_XML:
1444                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
1445         case VerificationNote::Code::MISSING_ASSETMAP:
1446                 return "No ASSETMAP or ASSETMAP.xml was found.";
1447         case VerificationNote::Code::INVALID_INTRINSIC_DURATION:
1448                 return String::compose("The intrinsic duration of the asset %1 is less than 1 second.", note.note().get());
1449         case VerificationNote::Code::INVALID_DURATION:
1450                 return String::compose("The duration of the asset %1 is less than 1 second.", note.note().get());
1451         case VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1452                 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
1453         case VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1454                 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
1455         case VerificationNote::Code::EXTERNAL_ASSET:
1456                 return String::compose("The asset %1 that this DCP refers to is not included in the DCP.  It may be a VF.", note.note().get());
1457         case VerificationNote::Code::INVALID_STANDARD:
1458                 return "This DCP does not use the SMPTE standard.";
1459         case VerificationNote::Code::INVALID_LANGUAGE:
1460                 return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
1461         case VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS:
1462                 return String::compose("The size %1 of picture asset %2 is not allowed.", note.note().get(), note.file()->filename());
1463         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K:
1464                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 2K DCPs.", note.note().get(), note.file()->filename());
1465         case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K:
1466                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 4K DCPs.", note.note().get(), note.file()->filename());
1467         case VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D:
1468                 return "3D 4K DCPs are not allowed.";
1469         case VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES:
1470                 return String::compose("The size %1 of the closed caption asset %2 is larger than the 256KB maximum.", note.note().get(), note.file()->filename());
1471         case VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES:
1472                 return String::compose("The size %1 of the timed text asset %2 is larger than the 115MB maximum.", note.note().get(), note.file()->filename());
1473         case VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES:
1474                 return String::compose("The size %1 of the fonts in timed text asset %2 is larger than the 10MB maximum.", note.note().get(), note.file()->filename());
1475         case VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE:
1476                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <Language> tag.", note.file()->filename());
1477         case VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES:
1478                 return "Some subtitle assets have different <Language> tags than others";
1479         case VerificationNote::Code::MISSING_SUBTITLE_START_TIME:
1480                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <StartTime> tag.", note.file()->filename());
1481         case VerificationNote::Code::INVALID_SUBTITLE_START_TIME:
1482                 return String::compose("The XML for a SMPTE subtitle asset %1 has a non-zero <StartTime> tag.", note.file()->filename());
1483         case VerificationNote::Code::INVALID_SUBTITLE_FIRST_TEXT_TIME:
1484                 return "The first subtitle or closed caption is less than 4 seconds from the start of the DCP.";
1485         case VerificationNote::Code::INVALID_SUBTITLE_DURATION:
1486                 return "At least one subtitle lasts less than 15 frames.";
1487         case VerificationNote::Code::INVALID_SUBTITLE_SPACING:
1488                 return "At least one pair of subtitles is separated by less than 2 frames.";
1489         case VerificationNote::Code::SUBTITLE_OVERLAPS_REEL_BOUNDARY:
1490                 return "At least one subtitle extends outside of its reel.";
1491         case VerificationNote::Code::INVALID_SUBTITLE_LINE_COUNT:
1492                 return "There are more than 3 subtitle lines in at least one place in the DCP.";
1493         case VerificationNote::Code::NEARLY_INVALID_SUBTITLE_LINE_LENGTH:
1494                 return "There are more than 52 characters in at least one subtitle line.";
1495         case VerificationNote::Code::INVALID_SUBTITLE_LINE_LENGTH:
1496                 return "There are more than 79 characters in at least one subtitle line.";
1497         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_COUNT:
1498                 return "There are more than 3 closed caption lines in at least one place.";
1499         case VerificationNote::Code::INVALID_CLOSED_CAPTION_LINE_LENGTH:
1500                 return "There are more than 32 characters in at least one closed caption line.";
1501         case VerificationNote::Code::INVALID_SOUND_FRAME_RATE:
1502                 return String::compose("The sound asset %1 has a sampling rate of %2", note.file()->filename(), note.note().get());
1503         case VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT:
1504                 return String::compose("The CPL %1 has no <AnnotationText> tag.", note.note().get());
1505         case VerificationNote::Code::MISMATCHED_CPL_ANNOTATION_TEXT:
1506                 return String::compose("The CPL %1 has an <AnnotationText> which differs from its <ContentTitleText>", note.note().get());
1507         case VerificationNote::Code::MISMATCHED_ASSET_DURATION:
1508                 return "All assets in a reel do not have the same duration.";
1509         case VerificationNote::Code::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS:
1510                 return "At least one reel contains a subtitle asset, but some reel(s) do not";
1511         case VerificationNote::Code::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS:
1512                 return "At least one reel has closed captions, but reels have different numbers of closed caption assets.";
1513         case VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT:
1514                 return String::compose("The subtitle asset %1 has no <EntryPoint> tag.", note.note().get());
1515         case VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT:
1516                 return String::compose("The subtitle asset %1 has an <EntryPoint> other than 0.", note.note().get());
1517         case VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT:
1518                 return String::compose("The closed caption asset %1 has no <EntryPoint> tag.", note.note().get());
1519         case VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT:
1520                 return String::compose("The closed caption asset %1 has an <EntryPoint> other than 0.", note.note().get());
1521         case VerificationNote::Code::MISSING_HASH:
1522                 return String::compose("The asset %1 has no <Hash> tag in the CPL.", note.note().get());
1523         case VerificationNote::Code::MISSING_FFEC_IN_FEATURE:
1524                 return "The DCP is marked as a Feature but there is no FFEC (first frame of end credits) marker";
1525         case VerificationNote::Code::MISSING_FFMC_IN_FEATURE:
1526                 return "The DCP is marked as a Feature but there is no FFMC (first frame of moving credits) marker";
1527         case VerificationNote::Code::MISSING_FFOC:
1528                 return "There should be a FFOC (first frame of content) marker";
1529         case VerificationNote::Code::MISSING_LFOC:
1530                 return "There should be a LFOC (last frame of content) marker";
1531         case VerificationNote::Code::INCORRECT_FFOC:
1532                 return String::compose("The FFOC marker is %1 instead of 1", note.note().get());
1533         case VerificationNote::Code::INCORRECT_LFOC:
1534                 return String::compose("The LFOC marker is %1 instead of 1 less than the duration of the last reel.", note.note().get());
1535         case VerificationNote::Code::MISSING_CPL_METADATA:
1536                 return String::compose("The CPL %1 has no <CompositionMetadataAsset> tag.", note.note().get());
1537         case VerificationNote::Code::MISSING_CPL_METADATA_VERSION_NUMBER:
1538                 return String::compose("The CPL %1 has no <VersionNumber> in its <CompositionMetadataAsset>.", note.note().get());
1539         case VerificationNote::Code::MISSING_EXTENSION_METADATA:
1540                 return String::compose("The CPL %1 has no <ExtensionMetadata> in its <CompositionMetadataAsset>.", note.note().get());
1541         case VerificationNote::Code::INVALID_EXTENSION_METADATA:
1542                 return String::compose("The CPL %1 has a malformed <ExtensionMetadata> (%2).", note.file()->filename(), note.note().get());
1543         case VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT:
1544                 return String::compose("The CPL %1, which has encrypted content, is not signed.", note.note().get());
1545         case VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT:
1546                 return String::compose("The PKL %1, which has encrypted content, is not signed.", note.note().get());
1547         case VerificationNote::Code::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL:
1548                 return String::compose("The PKL %1 has only one CPL but its <AnnotationText> does not match the CPL's <ContentTitleText>.", note.note().get());
1549         case VerificationNote::Code::PARTIALLY_ENCRYPTED:
1550                 return "Some assets are encrypted but some are not.";
1551         case VerificationNote::Code::INVALID_JPEG2000_CODESTREAM:
1552                 return String::compose("The JPEG2000 codestream for at least one frame is invalid (%1)", note.note().get());
1553         case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_2K:
1554                 return String::compose("The JPEG2000 codestream uses %1 guard bits in a 2K image instead of 1.", note.note().get());
1555         case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_4K:
1556                 return String::compose("The JPEG2000 codestream uses %1 guard bits in a 4K image instead of 2.", note.note().get());
1557         case VerificationNote::Code::INVALID_JPEG2000_TILE_SIZE:
1558                 return "The JPEG2000 tile size is not the same as the image size.";
1559         case VerificationNote::Code::INVALID_JPEG2000_CODE_BLOCK_WIDTH:
1560                 return String::compose("The JPEG2000 codestream uses a code block width of %1 instead of 32.", note.note().get());
1561         case VerificationNote::Code::INVALID_JPEG2000_CODE_BLOCK_HEIGHT:
1562                 return String::compose("The JPEG2000 codestream uses a code block height of %1 instead of 32.", note.note().get());
1563         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER_COUNT_FOR_2K:
1564                 return String::compose("%1 POC markers found in 2K JPEG2000 codestream instead of 0.", note.note().get());
1565         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER_COUNT_FOR_4K:
1566                 return String::compose("%1 POC markers found in 4K JPEG2000 codestream instead of 1.", note.note().get());
1567         case VerificationNote::Code::INCORRECT_JPEG2000_POC_MARKER:
1568                 return String::compose("Incorrect POC marker content found (%1)", note.note().get());
1569         case VerificationNote::Code::INVALID_JPEG2000_POC_MARKER_LOCATION:
1570                 return "POC marker found outside main header";
1571         case VerificationNote::Code::INVALID_JPEG2000_TILE_PARTS_FOR_2K:
1572                 return String::compose("The JPEG2000 codestream has %1 tile parts in a 2K image instead of 3.", note.note().get());
1573         case VerificationNote::Code::INVALID_JPEG2000_TILE_PARTS_FOR_4K:
1574                 return String::compose("The JPEG2000 codestream has %1 tile parts in a 4K image instead of 6.", note.note().get());
1575         case VerificationNote::Code::MISSING_JPEG200_TLM_MARKER:
1576                 return "No TLM marker was found in a JPEG2000 codestream.";
1577         }
1578
1579         return "";
1580 }
1581
1582
1583 bool
1584 dcp::operator== (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
1585 {
1586         return a.type() == b.type() && a.code() == b.code() && a.note() == b.note() && a.file() == b.file() && a.line() == b.line();
1587 }
1588
1589
1590 std::ostream&
1591 dcp::operator<< (std::ostream& s, dcp::VerificationNote const& note)
1592 {
1593         s << note_to_string (note);
1594         if (note.note()) {
1595                 s << " [" << note.note().get() << "]";
1596         }
1597         if (note.file()) {
1598                 s << " [" << note.file().get() << "]";
1599         }
1600         if (note.line()) {
1601                 s << " [" << note.line().get() << "]";
1602         }
1603         return s;
1604 }
1605