54b0a26afa321799952987194b77430813dfb3a5
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2021 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34 #include "verify.h"
35 #include "dcp.h"
36 #include "cpl.h"
37 #include "reel.h"
38 #include "reel_closed_caption_asset.h"
39 #include "reel_picture_asset.h"
40 #include "reel_sound_asset.h"
41 #include "reel_subtitle_asset.h"
42 #include "interop_subtitle_asset.h"
43 #include "mono_picture_asset.h"
44 #include "mono_picture_frame.h"
45 #include "stereo_picture_asset.h"
46 #include "stereo_picture_frame.h"
47 #include "exceptions.h"
48 #include "compose.hpp"
49 #include "raw_convert.h"
50 #include "reel_markers_asset.h"
51 #include "smpte_subtitle_asset.h"
52 #include <xercesc/util/PlatformUtils.hpp>
53 #include <xercesc/parsers/XercesDOMParser.hpp>
54 #include <xercesc/parsers/AbstractDOMParser.hpp>
55 #include <xercesc/sax/HandlerBase.hpp>
56 #include <xercesc/dom/DOMImplementation.hpp>
57 #include <xercesc/dom/DOMImplementationLS.hpp>
58 #include <xercesc/dom/DOMImplementationRegistry.hpp>
59 #include <xercesc/dom/DOMLSParser.hpp>
60 #include <xercesc/dom/DOMException.hpp>
61 #include <xercesc/dom/DOMDocument.hpp>
62 #include <xercesc/dom/DOMNodeList.hpp>
63 #include <xercesc/dom/DOMError.hpp>
64 #include <xercesc/dom/DOMLocator.hpp>
65 #include <xercesc/dom/DOMNamedNodeMap.hpp>
66 #include <xercesc/dom/DOMAttr.hpp>
67 #include <xercesc/dom/DOMErrorHandler.hpp>
68 #include <xercesc/framework/LocalFileInputSource.hpp>
69 #include <xercesc/framework/MemBufInputSource.hpp>
70 #include <boost/noncopyable.hpp>
71 #include <boost/algorithm/string.hpp>
72 #include <map>
73 #include <vector>
74 #include <iostream>
75
76 using std::list;
77 using std::vector;
78 using std::string;
79 using std::cout;
80 using std::map;
81 using std::max;
82 using std::set;
83 using std::shared_ptr;
84 using std::make_shared;
85 using boost::optional;
86 using boost::function;
87 using std::dynamic_pointer_cast;
88
89 using namespace dcp;
90 using namespace xercesc;
91
92 static
93 string
94 xml_ch_to_string (XMLCh const * a)
95 {
96         char* x = XMLString::transcode(a);
97         string const o(x);
98         XMLString::release(&x);
99         return o;
100 }
101
102 class XMLValidationError
103 {
104 public:
105         XMLValidationError (SAXParseException const & e)
106                 : _message (xml_ch_to_string(e.getMessage()))
107                 , _line (e.getLineNumber())
108                 , _column (e.getColumnNumber())
109                 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
110                 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
111         {
112
113         }
114
115         string message () const {
116                 return _message;
117         }
118
119         uint64_t line () const {
120                 return _line;
121         }
122
123         uint64_t column () const {
124                 return _column;
125         }
126
127         string public_id () const {
128                 return _public_id;
129         }
130
131         string system_id () const {
132                 return _system_id;
133         }
134
135 private:
136         string _message;
137         uint64_t _line;
138         uint64_t _column;
139         string _public_id;
140         string _system_id;
141 };
142
143
144 class DCPErrorHandler : public ErrorHandler
145 {
146 public:
147         void warning(const SAXParseException& e)
148         {
149                 maybe_add (XMLValidationError(e));
150         }
151
152         void error(const SAXParseException& e)
153         {
154                 maybe_add (XMLValidationError(e));
155         }
156
157         void fatalError(const SAXParseException& e)
158         {
159                 maybe_add (XMLValidationError(e));
160         }
161
162         void resetErrors() {
163                 _errors.clear ();
164         }
165
166         list<XMLValidationError> errors () const {
167                 return _errors;
168         }
169
170 private:
171         void maybe_add (XMLValidationError e)
172         {
173                 /* XXX: nasty hack */
174                 if (
175                         e.message().find("schema document") != string::npos &&
176                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
177                         ) {
178                         return;
179                 }
180
181                 _errors.push_back (e);
182         }
183
184         list<XMLValidationError> _errors;
185 };
186
187 class StringToXMLCh : public boost::noncopyable
188 {
189 public:
190         StringToXMLCh (string a)
191         {
192                 _buffer = XMLString::transcode(a.c_str());
193         }
194
195         ~StringToXMLCh ()
196         {
197                 XMLString::release (&_buffer);
198         }
199
200         XMLCh const * get () const {
201                 return _buffer;
202         }
203
204 private:
205         XMLCh* _buffer;
206 };
207
208 class LocalFileResolver : public EntityResolver
209 {
210 public:
211         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
212                 : _xsd_dtd_directory (xsd_dtd_directory)
213         {
214                 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
215                  * found without being here.
216                  */
217                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
218                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
219                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
220                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
221                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
222                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
223                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
224                 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
225                 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
226                 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd");
227                 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
228                 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
229                 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
230         }
231
232         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
233         {
234                 if (!system_id) {
235                         return 0;
236                 }
237                 auto system_id_str = xml_ch_to_string (system_id);
238                 auto p = _xsd_dtd_directory;
239                 if (_files.find(system_id_str) == _files.end()) {
240                         p /= system_id_str;
241                 } else {
242                         p /= _files[system_id_str];
243                 }
244                 StringToXMLCh ch (p.string());
245                 return new LocalFileInputSource(ch.get());
246         }
247
248 private:
249         void add (string uri, string file)
250         {
251                 _files[uri] = file;
252         }
253
254         std::map<string, string> _files;
255         boost::filesystem::path _xsd_dtd_directory;
256 };
257
258
259 static void
260 parse (XercesDOMParser& parser, boost::filesystem::path xml)
261 {
262         parser.parse(xml.string().c_str());
263 }
264
265
266 static void
267 parse (XercesDOMParser& parser, string xml)
268 {
269         xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
270         parser.parse(buf);
271 }
272
273
274 template <class T>
275 void
276 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, vector<VerificationNote>& notes)
277 {
278         try {
279                 XMLPlatformUtils::Initialize ();
280         } catch (XMLException& e) {
281                 throw MiscError ("Failed to initialise xerces library");
282         }
283
284         DCPErrorHandler error_handler;
285
286         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
287         {
288                 XercesDOMParser parser;
289                 parser.setValidationScheme(XercesDOMParser::Val_Always);
290                 parser.setDoNamespaces(true);
291                 parser.setDoSchema(true);
292
293                 vector<string> schema;
294                 schema.push_back("xml.xsd");
295                 schema.push_back("xmldsig-core-schema.xsd");
296                 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
297                 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
298                 schema.push_back("SMPTE-429-9-2007-AM.xsd");
299                 schema.push_back("Main-Stereo-Picture-CPL.xsd");
300                 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
301                 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
302                 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
303                 schema.push_back("DCSubtitle.v1.mattsson.xsd");
304                 schema.push_back("DCDMSubtitle-2010.xsd");
305                 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
306                 schema.push_back("SMPTE-429-16.xsd");
307                 schema.push_back("Dolby-2012-AD.xsd");
308                 schema.push_back("SMPTE-429-10-2008.xsd");
309                 schema.push_back("xlink.xsd");
310                 schema.push_back("SMPTE-335-2012.xsd");
311                 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
312                 schema.push_back("isdcf-mca.xsd");
313                 schema.push_back("SMPTE-429-12-2008.xsd");
314
315                 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
316                  * Schemas that are not mentioned in this list are not read, and the things
317                  * they describe are not checked.
318                  */
319                 string locations;
320                 for (auto i: schema) {
321                         locations += String::compose("%1 %1 ", i, i);
322                 }
323
324                 parser.setExternalSchemaLocation(locations.c_str());
325                 parser.setValidationSchemaFullChecking(true);
326                 parser.setErrorHandler(&error_handler);
327
328                 LocalFileResolver resolver (xsd_dtd_directory);
329                 parser.setEntityResolver(&resolver);
330
331                 try {
332                         parser.resetDocumentPool();
333                         parse(parser, xml);
334                 } catch (XMLException& e) {
335                         throw MiscError(xml_ch_to_string(e.getMessage()));
336                 } catch (DOMException& e) {
337                         throw MiscError(xml_ch_to_string(e.getMessage()));
338                 } catch (...) {
339                         throw MiscError("Unknown exception from xerces");
340                 }
341         }
342
343         XMLPlatformUtils::Terminate ();
344
345         for (auto i: error_handler.errors()) {
346                 notes.push_back ({
347                         VerificationNote::VERIFY_ERROR,
348                         VerificationNote::INVALID_XML,
349                         i.message(),
350                         boost::trim_copy(i.public_id() + " " + i.system_id()),
351                         i.line()
352                 });
353         }
354 }
355
356
357 enum VerifyAssetResult {
358         VERIFY_ASSET_RESULT_GOOD,
359         VERIFY_ASSET_RESULT_CPL_PKL_DIFFER,
360         VERIFY_ASSET_RESULT_BAD
361 };
362
363
364 static VerifyAssetResult
365 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
366 {
367         auto const actual_hash = reel_mxf->asset_ref()->hash(progress);
368
369         auto pkls = dcp->pkls();
370         /* We've read this DCP in so it must have at least one PKL */
371         DCP_ASSERT (!pkls.empty());
372
373         auto asset = reel_mxf->asset_ref().asset();
374
375         optional<string> pkl_hash;
376         for (auto i: pkls) {
377                 pkl_hash = i->hash (reel_mxf->asset_ref()->id());
378                 if (pkl_hash) {
379                         break;
380                 }
381         }
382
383         DCP_ASSERT (pkl_hash);
384
385         auto cpl_hash = reel_mxf->hash();
386         if (cpl_hash && *cpl_hash != *pkl_hash) {
387                 return VERIFY_ASSET_RESULT_CPL_PKL_DIFFER;
388         }
389
390         if (actual_hash != *pkl_hash) {
391                 return VERIFY_ASSET_RESULT_BAD;
392         }
393
394         return VERIFY_ASSET_RESULT_GOOD;
395 }
396
397
398 void
399 verify_language_tag (string tag, vector<VerificationNote>& notes)
400 {
401         try {
402                 LanguageTag test (tag);
403         } catch (LanguageTagError &) {
404                 notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::INVALID_LANGUAGE, tag));
405         }
406 }
407
408
409 enum VerifyPictureAssetResult
410 {
411         VERIFY_PICTURE_ASSET_RESULT_GOOD,
412         VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE,
413         VERIFY_PICTURE_ASSET_RESULT_BAD,
414 };
415
416
417 int
418 biggest_frame_size (shared_ptr<const MonoPictureFrame> frame)
419 {
420         return frame->size ();
421 }
422
423 int
424 biggest_frame_size (shared_ptr<const StereoPictureFrame> frame)
425 {
426         return max(frame->left()->size(), frame->right()->size());
427 }
428
429
430 template <class A, class R, class F>
431 optional<VerifyPictureAssetResult>
432 verify_picture_asset_type (shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
433 {
434         auto asset = dynamic_pointer_cast<A>(reel_mxf->asset_ref().asset());
435         if (!asset) {
436                 return optional<VerifyPictureAssetResult>();
437         }
438
439         int biggest_frame = 0;
440         auto reader = asset->start_read ();
441         auto const duration = asset->intrinsic_duration ();
442         for (int64_t i = 0; i < duration; ++i) {
443                 shared_ptr<const F> frame = reader->get_frame (i);
444                 biggest_frame = max(biggest_frame, biggest_frame_size(frame));
445                 progress (float(i) / duration);
446         }
447
448         static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
449         static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
450         if (biggest_frame > max_frame) {
451                 return VERIFY_PICTURE_ASSET_RESULT_BAD;
452         } else if (biggest_frame > risky_frame) {
453                 return VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE;
454         }
455
456         return VERIFY_PICTURE_ASSET_RESULT_GOOD;
457 }
458
459
460 static VerifyPictureAssetResult
461 verify_picture_asset (shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
462 {
463         auto r = verify_picture_asset_type<MonoPictureAsset, MonoPictureAssetReader, MonoPictureFrame>(reel_mxf, progress);
464         if (!r) {
465                 r = verify_picture_asset_type<StereoPictureAsset, StereoPictureAssetReader, StereoPictureFrame>(reel_mxf, progress);
466         }
467
468         DCP_ASSERT (r);
469         return *r;
470 }
471
472
473 static void
474 verify_main_picture_asset (
475         shared_ptr<const DCP> dcp,
476         shared_ptr<const ReelPictureAsset> reel_asset,
477         function<void (string, optional<boost::filesystem::path>)> stage,
478         function<void (float)> progress,
479         vector<VerificationNote>& notes
480         )
481 {
482         auto asset = reel_asset->asset();
483         auto const file = *asset->file();
484         stage ("Checking picture asset hash", file);
485         auto const r = verify_asset (dcp, reel_asset, progress);
486         switch (r) {
487                 case VERIFY_ASSET_RESULT_BAD:
488                         notes.push_back (
489                                 VerificationNote(
490                                         VerificationNote::VERIFY_ERROR, VerificationNote::INCORRECT_PICTURE_HASH, file
491                                         )
492                                 );
493                         break;
494                 case VERIFY_ASSET_RESULT_CPL_PKL_DIFFER:
495                         notes.push_back (
496                                 VerificationNote(
497                                         VerificationNote::VERIFY_ERROR, VerificationNote::MISMATCHED_PICTURE_HASHES, file
498                                         )
499                                 );
500                         break;
501                 default:
502                         break;
503         }
504         stage ("Checking picture frame sizes", asset->file());
505         auto const pr = verify_picture_asset (reel_asset, progress);
506         switch (pr) {
507                 case VERIFY_PICTURE_ASSET_RESULT_BAD:
508                         notes.push_back ({
509                                 VerificationNote::VERIFY_ERROR, VerificationNote::INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
510                         });
511                         break;
512                 case VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE:
513                         notes.push_back ({
514                                 VerificationNote::VERIFY_WARNING, VerificationNote::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
515                         });
516                         break;
517                 default:
518                         break;
519         }
520
521         /* Only flat/scope allowed by Bv2.1 */
522         if (
523                 asset->size() != Size(2048, 858) &&
524                 asset->size() != Size(1998, 1080) &&
525                 asset->size() != Size(4096, 1716) &&
526                 asset->size() != Size(3996, 2160)) {
527                 notes.push_back(
528                         VerificationNote(
529                                 VerificationNote::VERIFY_BV21_ERROR,
530                                 VerificationNote::INVALID_PICTURE_SIZE_IN_PIXELS,
531                                 String::compose("%1x%2", asset->size().width, asset->size().height),
532                                 file
533                                 )
534                         );
535         }
536
537         /* Only 24, 25, 48fps allowed for 2K */
538         if (
539                 (asset->size() == Size(2048, 858) || asset->size() == Size(1998, 1080)) &&
540                 (asset->edit_rate() != Fraction(24, 1) && asset->edit_rate() != Fraction(25, 1) && asset->edit_rate() != Fraction(48, 1))
541            ) {
542                 notes.push_back(
543                         VerificationNote(
544                                 VerificationNote::VERIFY_BV21_ERROR,
545                                 VerificationNote::INVALID_PICTURE_FRAME_RATE_FOR_2K,
546                                 String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
547                                 file
548                                 )
549                         );
550         }
551
552         if (asset->size() == Size(4096, 1716) || asset->size() == Size(3996, 2160)) {
553                 /* Only 24fps allowed for 4K */
554                 if (asset->edit_rate() != Fraction(24, 1)) {
555                         notes.push_back(
556                                 VerificationNote(
557                                         VerificationNote::VERIFY_BV21_ERROR,
558                                         VerificationNote::INVALID_PICTURE_FRAME_RATE_FOR_4K,
559                                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
560                                         file
561                                         )
562                                 );
563                 }
564
565                 /* Only 2D allowed for 4K */
566                 if (dynamic_pointer_cast<const StereoPictureAsset>(asset)) {
567                         notes.push_back(
568                                 VerificationNote(
569                                         VerificationNote::VERIFY_BV21_ERROR,
570                                         VerificationNote::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D,
571                                         String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator),
572                                         file
573                                         )
574                                 );
575
576                 }
577         }
578
579 }
580
581
582 static void
583 verify_main_sound_asset (
584         shared_ptr<const DCP> dcp,
585         shared_ptr<const ReelSoundAsset> reel_asset,
586         function<void (string, optional<boost::filesystem::path>)> stage,
587         function<void (float)> progress,
588         vector<VerificationNote>& notes
589         )
590 {
591         auto asset = reel_asset->asset();
592         stage ("Checking sound asset hash", asset->file());
593         auto const r = verify_asset (dcp, reel_asset, progress);
594         switch (r) {
595                 case VERIFY_ASSET_RESULT_BAD:
596                         notes.push_back (
597                                 VerificationNote(
598                                         VerificationNote::VERIFY_ERROR, VerificationNote::INCORRECT_SOUND_HASH, *asset->file()
599                                         )
600                                 );
601                         break;
602                 case VERIFY_ASSET_RESULT_CPL_PKL_DIFFER:
603                         notes.push_back (
604                                 VerificationNote(
605                                         VerificationNote::VERIFY_ERROR, VerificationNote::MISMATCHED_SOUND_HASHES, *asset->file()
606                                         )
607                                 );
608                         break;
609                 default:
610                         break;
611         }
612
613         stage ("Checking sound asset metadata", asset->file());
614
615         verify_language_tag (asset->language(), notes);
616         if (asset->sampling_rate() != 48000) {
617                 notes.push_back (
618                         VerificationNote(
619                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::INVALID_SOUND_FRAME_RATE, raw_convert<string>(asset->sampling_rate()), *asset->file()
620                                 )
621                         );
622         }
623 }
624
625
626 static void
627 verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, vector<VerificationNote>& notes)
628 {
629         /* XXX: is Language compulsory? */
630         if (reel_asset->language()) {
631                 verify_language_tag (*reel_asset->language(), notes);
632         }
633
634         if (!reel_asset->entry_point()) {
635                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_SUBTITLE_ENTRY_POINT, reel_asset->id() });
636         } else if (reel_asset->entry_point().get()) {
637                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::INCORRECT_SUBTITLE_ENTRY_POINT, reel_asset->id() });
638         }
639 }
640
641
642 static void
643 verify_closed_caption_reel (shared_ptr<const ReelClosedCaptionAsset> reel_asset, vector<VerificationNote>& notes)
644 {
645         /* XXX: is Language compulsory? */
646         if (reel_asset->language()) {
647                 verify_language_tag (*reel_asset->language(), notes);
648         }
649
650         if (!reel_asset->entry_point()) {
651                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
652         } else if (reel_asset->entry_point().get()) {
653                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::INCORRECT_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() });
654         }
655 }
656
657
658 struct State
659 {
660         boost::optional<string> subtitle_language;
661 };
662
663
664
665 void
666 verify_smpte_subtitle_asset (
667         shared_ptr<const SMPTESubtitleAsset> asset,
668         vector<VerificationNote>& notes,
669         State& state
670         )
671 {
672         if (asset->language()) {
673                 auto const language = *asset->language();
674                 verify_language_tag (language, notes);
675                 if (!state.subtitle_language) {
676                         state.subtitle_language = language;
677                 } else if (state.subtitle_language != language) {
678                         notes.push_back ({ VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISMATCHED_SUBTITLE_LANGUAGES });
679                 }
680         } else {
681                 notes.push_back ({ VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_SUBTITLE_LANGUAGE, *asset->file() });
682         }
683         auto const size = boost::filesystem::file_size(asset->file().get());
684         if (size > 115 * 1024 * 1024) {
685                 notes.push_back (
686                         { VerificationNote::VERIFY_BV21_ERROR, VerificationNote::INVALID_TIMED_TEXT_SIZE_IN_BYTES, raw_convert<string>(size), *asset->file() }
687                         );
688         }
689         /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB"
690          * but I'm hoping that checking for the total size of all fonts being <= 10MB will do.
691          */
692         auto fonts = asset->font_data ();
693         int total_size = 0;
694         for (auto i: fonts) {
695                 total_size += i.second.size();
696         }
697         if (total_size > 10 * 1024 * 1024) {
698                 notes.push_back ({ VerificationNote::VERIFY_BV21_ERROR, VerificationNote::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES, raw_convert<string>(total_size), asset->file().get() });
699         }
700
701         if (!asset->start_time()) {
702                 notes.push_back ({ VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_SUBTITLE_START_TIME, asset->file().get() });
703         } else if (asset->start_time() != Time()) {
704                 notes.push_back ({ VerificationNote::VERIFY_BV21_ERROR, VerificationNote::INVALID_SUBTITLE_START_TIME, asset->file().get() });
705         }
706 }
707
708
709 static void
710 verify_subtitle_asset (
711         shared_ptr<const SubtitleAsset> asset,
712         function<void (string, optional<boost::filesystem::path>)> stage,
713         boost::filesystem::path xsd_dtd_directory,
714         vector<VerificationNote>& notes,
715         State& state
716         )
717 {
718         stage ("Checking subtitle XML", asset->file());
719         /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
720          * gets passed through libdcp which may clean up and therefore hide errors.
721          */
722         validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
723
724         auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
725         if (smpte) {
726                 verify_smpte_subtitle_asset (smpte, notes, state);
727         }
728 }
729
730
731 static void
732 verify_closed_caption_asset (
733         shared_ptr<const SubtitleAsset> asset,
734         function<void (string, optional<boost::filesystem::path>)> stage,
735         boost::filesystem::path xsd_dtd_directory,
736         vector<VerificationNote>& notes,
737         State& state
738         )
739 {
740         verify_subtitle_asset (asset, stage, xsd_dtd_directory, notes, state);
741
742         if (asset->raw_xml().size() > 256 * 1024) {
743                 notes.push_back (
744                         VerificationNote(
745                                 VerificationNote::VERIFY_BV21_ERROR, VerificationNote::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES, raw_convert<string>(asset->raw_xml().size()), *asset->file()
746                                 )
747                         );
748         }
749 }
750
751
752 static
753 void
754 check_text_timing (
755         vector<shared_ptr<Reel>> reels,
756         optional<int> picture_frame_rate,
757         vector<VerificationNote>& notes,
758         std::function<bool (shared_ptr<Reel>)> check,
759         std::function<string (shared_ptr<Reel>)> xml,
760         std::function<int64_t (shared_ptr<Reel>)> duration
761         )
762 {
763         /* end of last subtitle (in editable units) */
764         optional<int64_t> last_out;
765         auto too_short = false;
766         auto too_close = false;
767         auto too_early = false;
768         /* current reel start time (in editable units) */
769         int64_t reel_offset = 0;
770
771         std::function<void (cxml::ConstNodePtr, int, int, bool)> parse;
772         parse = [&parse, &last_out, &too_short, &too_close, &too_early, &reel_offset](cxml::ConstNodePtr node, int tcr, int pfr, bool first_reel) {
773                 if (node->name() == "Subtitle") {
774                         Time in (node->string_attribute("TimeIn"), tcr);
775                         Time out (node->string_attribute("TimeOut"), tcr);
776                         if (first_reel && in < Time(0, 0, 4, 0, tcr)) {
777                                 too_early = true;
778                         }
779                         auto length = out - in;
780                         if (length.as_editable_units(pfr) < 15) {
781                                 too_short = true;
782                         }
783                         if (last_out) {
784                                 /* XXX: this feels dubious - is it really what Bv2.1 means? */
785                                 auto distance = reel_offset + in.as_editable_units(pfr) - *last_out;
786                                 if (distance >= 0 && distance < 2) {
787                                         too_close = true;
788                                 }
789                         }
790                         last_out = reel_offset + out.as_editable_units(pfr);
791                 } else {
792                         for (auto i: node->node_children()) {
793                                 parse(i, tcr, pfr, first_reel);
794                         }
795                 }
796         };
797
798         for (auto i = 0U; i < reels.size(); ++i) {
799                 if (!check(reels[i])) {
800                         continue;
801                 }
802
803                 /* We need to look at <Subtitle> instances in the XML being checked, so we can't use the subtitles
804                  * read in by libdcp's parser.
805                  */
806
807                 auto doc = make_shared<cxml::Document>("SubtitleReel");
808                 doc->read_string (xml(reels[i]));
809                 auto const tcr = doc->number_child<int>("TimeCodeRate");
810                 parse (doc, tcr, picture_frame_rate.get_value_or(24), i == 0);
811                 reel_offset += duration(reels[i]);
812         }
813
814         if (too_early) {
815                 notes.push_back({
816                         VerificationNote::VERIFY_WARNING, VerificationNote::INVALID_SUBTITLE_FIRST_TEXT_TIME
817                 });
818         }
819
820         if (too_short) {
821                 notes.push_back ({
822                         VerificationNote::VERIFY_WARNING, VerificationNote::INVALID_SUBTITLE_DURATION
823                 });
824         }
825
826         if (too_close) {
827                 notes.push_back ({
828                         VerificationNote::VERIFY_WARNING, VerificationNote::INVALID_SUBTITLE_SPACING
829                 });
830         }
831 }
832
833
834 struct LinesCharactersResult
835 {
836         bool warning_length_exceeded = false;
837         bool error_length_exceeded = false;
838         bool line_count_exceeded = false;
839 };
840
841
842 static
843 void
844 check_text_lines_and_characters (
845         shared_ptr<SubtitleAsset> asset,
846         int warning_length,
847         int error_length,
848         LinesCharactersResult* result
849         )
850 {
851         class Event
852         {
853         public:
854                 Event (Time time_, float position_, int characters_)
855                         : time (time_)
856                         , position (position_)
857                         , characters (characters_)
858                 {}
859
860                 Event (Time time_, shared_ptr<Event> start_)
861                         : time (time_)
862                         , start (start_)
863                 {}
864
865                 Time time;
866                 int position; //< position from 0 at top of screen to 100 at bottom
867                 int characters;
868                 shared_ptr<Event> start;
869         };
870
871         vector<shared_ptr<Event>> events;
872
873         auto position = [](shared_ptr<const SubtitleString> sub) {
874                 switch (sub->v_align()) {
875                 case VALIGN_TOP:
876                         return lrintf(sub->v_position() * 100);
877                 case VALIGN_CENTER:
878                         return lrintf((0.5f + sub->v_position()) * 100);
879                 case VALIGN_BOTTOM:
880                         return lrintf((1.0f - sub->v_position()) * 100);
881                 }
882
883                 return 0L;
884         };
885
886         for (auto j: asset->subtitles()) {
887                 auto text = dynamic_pointer_cast<const SubtitleString>(j);
888                 if (text) {
889                         auto in = make_shared<Event>(text->in(), position(text), text->text().length());
890                         events.push_back(in);
891                         events.push_back(make_shared<Event>(text->out(), in));
892                 }
893         }
894
895         std::sort(events.begin(), events.end(), [](shared_ptr<Event> const& a, shared_ptr<Event>const& b) {
896                 return a->time < b->time;
897         });
898
899         map<int, int> current;
900         for (auto i: events) {
901                 if (current.size() > 3) {
902                         result->line_count_exceeded = true;
903                 }
904                 for (auto j: current) {
905                         if (j.second >= warning_length) {
906                                 result->warning_length_exceeded = true;
907                         }
908                         if (j.second >= error_length) {
909                                 result->error_length_exceeded = true;
910                         }
911                 }
912
913                 if (i->start) {
914                         /* end of a subtitle */
915                         DCP_ASSERT (current.find(i->start->position) != current.end());
916                         if (current[i->start->position] == i->start->characters) {
917                                 current.erase(i->start->position);
918                         } else {
919                                 current[i->start->position] -= i->start->characters;
920                         }
921                 } else {
922                         /* start of a subtitle */
923                         if (current.find(i->position) == current.end()) {
924                                 current[i->position] = i->characters;
925                         } else {
926                                 current[i->position] += i->characters;
927                         }
928                 }
929         }
930 }
931
932
933 static
934 void
935 check_text_timing (vector<shared_ptr<Reel>> reels, vector<VerificationNote>& notes)
936 {
937         if (reels.empty()) {
938                 return;
939         }
940
941         optional<int> picture_frame_rate;
942         if (reels[0]->main_picture()) {
943                 picture_frame_rate = reels[0]->main_picture()->frame_rate().numerator;
944         }
945
946         if (reels[0]->main_subtitle()) {
947                 check_text_timing (reels, picture_frame_rate, notes,
948                         [](shared_ptr<Reel> reel) {
949                                 return static_cast<bool>(reel->main_subtitle());
950                         },
951                         [](shared_ptr<Reel> reel) {
952                                 return reel->main_subtitle()->asset()->raw_xml();
953                         },
954                         [](shared_ptr<Reel> reel) {
955                                 return reel->main_subtitle()->actual_duration();
956                         }
957                 );
958         }
959
960         for (auto i = 0U; i < reels[0]->closed_captions().size(); ++i) {
961                 check_text_timing (reels, picture_frame_rate, notes,
962                         [i](shared_ptr<Reel> reel) {
963                                 return i < reel->closed_captions().size();
964                         },
965                         [i](shared_ptr<Reel> reel) {
966                                 return reel->closed_captions()[i]->asset()->raw_xml();
967                         },
968                         [i](shared_ptr<Reel> reel) {
969                                 return reel->closed_captions()[i]->actual_duration();
970                         }
971                 );
972         }
973 }
974
975
976 void
977 check_extension_metadata (shared_ptr<CPL> cpl, vector<VerificationNote>& notes)
978 {
979         DCP_ASSERT (cpl->file());
980         cxml::Document doc ("CompositionPlaylist");
981         doc.read_file (cpl->file().get());
982
983         auto missing = false;
984         string malformed;
985
986         if (auto reel_list = doc.node_child("ReelList")) {
987                 auto reels = reel_list->node_children("Reel");
988                 if (!reels.empty()) {
989                         if (auto asset_list = reels[0]->optional_node_child("AssetList")) {
990                                 if (auto metadata = asset_list->optional_node_child("CompositionMetadataAsset")) {
991                                         if (auto extension_list = metadata->optional_node_child("ExtensionMetadataList")) {
992                                                 missing = true;
993                                                 for (auto extension: extension_list->node_children("ExtensionMetadata")) {
994                                                         if (extension->optional_string_attribute("scope").get_value_or("") != "http://isdcf.com/ns/cplmd/app") {
995                                                                 continue;
996                                                         }
997                                                         missing = false;
998                                                         if (auto name = extension->optional_node_child("Name")) {
999                                                                 if (name->content() != "Application") {
1000                                                                         malformed = "<Name> should be 'Application'";
1001                                                                 }
1002                                                         }
1003                                                         if (auto property_list = extension->optional_node_child("PropertyList")) {
1004                                                                 if (auto property = property_list->optional_node_child("Property")) {
1005                                                                         if (auto name = property->optional_node_child("Name")) {
1006                                                                                 if (name->content() != "DCP Constraints Profile") {
1007                                                                                         malformed = "<Name> property should be 'DCP Constraints Profile'";
1008                                                                                 }
1009                                                                         }
1010                                                                         if (auto value = property->optional_node_child("Value")) {
1011                                                                                 if (value->content() != "SMPTE-RDD-52:2020-Bv2.1") {
1012                                                                                         malformed = "<Value> property should be 'SMPTE-RDD-52:2020-Bv2.1'";
1013                                                                                 }
1014                                                                         }
1015                                                                 }
1016                                                         }
1017                                                 }
1018                                         } else {
1019                                                 missing = true;
1020                                         }
1021                                 }
1022                         }
1023                 }
1024         }
1025
1026         if (missing) {
1027                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_EXTENSION_METADATA, cpl->id(), cpl->file().get()});
1028         } else if (!malformed.empty()) {
1029                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::INVALID_EXTENSION_METADATA, malformed, cpl->file().get()});
1030         }
1031 }
1032
1033
1034 bool
1035 pkl_has_encrypted_assets (shared_ptr<DCP> dcp, shared_ptr<PKL> pkl)
1036 {
1037         vector<string> encrypted;
1038         for (auto i: dcp->cpls()) {
1039                 for (auto j: i->reel_mxfs()) {
1040                         if (j->asset_ref().resolved()) {
1041                                 /* It's a bit surprising / broken but Interop subtitle assets are represented
1042                                  * in reels by ReelSubtitleAsset which inherits ReelMXF, so it's possible for
1043                                  * ReelMXFs to have assets which are not MXFs.
1044                                  */
1045                                 if (auto asset = dynamic_pointer_cast<MXF>(j->asset_ref().asset())) {
1046                                         if (asset->encrypted()) {
1047                                                 encrypted.push_back(j->asset_ref().id());
1048                                         }
1049                                 }
1050                         }
1051                 }
1052         }
1053
1054         for (auto i: pkl->asset_list()) {
1055                 if (find(encrypted.begin(), encrypted.end(), i->id()) != encrypted.end()) {
1056                         return true;
1057                 }
1058         }
1059
1060         return false;
1061 }
1062
1063
1064 vector<VerificationNote>
1065 dcp::verify (
1066         vector<boost::filesystem::path> directories,
1067         function<void (string, optional<boost::filesystem::path>)> stage,
1068         function<void (float)> progress,
1069         boost::filesystem::path xsd_dtd_directory
1070         )
1071 {
1072         xsd_dtd_directory = boost::filesystem::canonical (xsd_dtd_directory);
1073
1074         vector<VerificationNote> notes;
1075         State state;
1076
1077         vector<shared_ptr<DCP>> dcps;
1078         for (auto i: directories) {
1079                 dcps.push_back (shared_ptr<DCP> (new DCP (i)));
1080         }
1081
1082         for (auto dcp: dcps) {
1083                 stage ("Checking DCP", dcp->directory());
1084                 try {
1085                         dcp->read (&notes);
1086                 } catch (ReadError& e) {
1087                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::FAILED_READ, string(e.what())));
1088                 } catch (XMLError& e) {
1089                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::FAILED_READ, string(e.what())));
1090                 } catch (MXFFileError& e) {
1091                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::FAILED_READ, string(e.what())));
1092                 } catch (cxml::Error& e) {
1093                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::FAILED_READ, string(e.what())));
1094                 }
1095
1096                 if (dcp->standard() != SMPTE) {
1097                         notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::INVALID_STANDARD));
1098                 }
1099
1100                 for (auto cpl: dcp->cpls()) {
1101                         stage ("Checking CPL", cpl->file());
1102                         validate_xml (cpl->file().get(), xsd_dtd_directory, notes);
1103
1104                         if (cpl->any_encrypted() && !cpl->all_encrypted()) {
1105                                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::PARTIALLY_ENCRYPTED});
1106                         }
1107
1108                         for (auto const& i: cpl->additional_subtitle_languages()) {
1109                                 verify_language_tag (i, notes);
1110                         }
1111
1112                         if (cpl->release_territory()) {
1113                                 if (!cpl->release_territory_scope() || cpl->release_territory_scope().get() != "http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata#scope/release-territory/UNM49") {
1114                                         auto terr = cpl->release_territory().get();
1115                                         /* Must be a valid region tag, or "001" */
1116                                         try {
1117                                                 LanguageTag::RegionSubtag test (terr);
1118                                         } catch (...) {
1119                                                 if (terr != "001") {
1120                                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::INVALID_LANGUAGE, terr});
1121                                                 }
1122                                         }
1123                                 }
1124                         }
1125
1126                         if (dcp->standard() == SMPTE) {
1127                                 if (!cpl->annotation_text()) {
1128                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1129                                 } else if (cpl->annotation_text().get() != cpl->content_title_text()) {
1130                                         notes.push_back ({VerificationNote::VERIFY_WARNING, VerificationNote::MISMATCHED_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
1131                                 }
1132                         }
1133
1134                         for (auto i: dcp->pkls()) {
1135                                 /* Check that the CPL's hash corresponds to the PKL */
1136                                 optional<string> h = i->hash(cpl->id());
1137                                 if (h && make_digest(ArrayData(*cpl->file())) != *h) {
1138                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::MISMATCHED_CPL_HASHES, cpl->id(), cpl->file().get()));
1139                                 }
1140
1141                                 /* Check that any PKL with a single CPL has its AnnotationText the same as the CPL's ContentTitleText */
1142                                 optional<string> required_annotation_text;
1143                                 for (auto j: i->asset_list()) {
1144                                         /* See if this is a CPL */
1145                                         for (auto k: dcp->cpls()) {
1146                                                 if (j->id() == k->id()) {
1147                                                         if (!required_annotation_text) {
1148                                                                 /* First CPL we have found; this is the required AnnotationText unless we find another */
1149                                                                 required_annotation_text = cpl->content_title_text();
1150                                                         } else {
1151                                                                 /* There's more than one CPL so we don't care what the PKL's AnnotationText is */
1152                                                                 required_annotation_text = boost::none;
1153                                                         }
1154                                                 }
1155                                         }
1156                                 }
1157
1158                                 if (required_annotation_text && i->annotation_text() != required_annotation_text) {
1159                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL, i->id(), i->file().get()});
1160                                 }
1161                         }
1162
1163                         /* set to true if any reel has a MainSubtitle */
1164                         auto have_main_subtitle = false;
1165                         /* set to true if any reel has no MainSubtitle */
1166                         auto have_no_main_subtitle = false;
1167                         /* fewest number of closed caption assets seen in a reel */
1168                         size_t fewest_closed_captions = SIZE_MAX;
1169                         /* most number of closed caption assets seen in a reel */
1170                         size_t most_closed_captions = 0;
1171                         map<Marker, Time> markers_seen;
1172
1173                         for (auto reel: cpl->reels()) {
1174                                 stage ("Checking reel", optional<boost::filesystem::path>());
1175
1176                                 for (auto i: reel->assets()) {
1177                                         if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1178                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INVALID_DURATION, i->id()));
1179                                         }
1180                                         if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
1181                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INVALID_INTRINSIC_DURATION, i->id()));
1182                                         }
1183                                         auto mxf = dynamic_pointer_cast<ReelMXF>(i);
1184                                         if (mxf && !mxf->hash()) {
1185                                                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_HASH, i->id()});
1186                                         }
1187                                 }
1188
1189                                 if (dcp->standard() == SMPTE) {
1190                                         boost::optional<int64_t> duration;
1191                                         for (auto i: reel->assets()) {
1192                                                 if (!duration) {
1193                                                         duration = i->actual_duration();
1194                                                 } else if (*duration != i->actual_duration()) {
1195                                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISMATCHED_ASSET_DURATION});
1196                                                         break;
1197                                                 }
1198                                         }
1199                                 }
1200
1201                                 if (reel->main_picture()) {
1202                                         /* Check reel stuff */
1203                                         auto const frame_rate = reel->main_picture()->frame_rate();
1204                                         if (frame_rate.denominator != 1 ||
1205                                             (frame_rate.numerator != 24 &&
1206                                              frame_rate.numerator != 25 &&
1207                                              frame_rate.numerator != 30 &&
1208                                              frame_rate.numerator != 48 &&
1209                                              frame_rate.numerator != 50 &&
1210                                              frame_rate.numerator != 60 &&
1211                                              frame_rate.numerator != 96)) {
1212                                                 notes.push_back ({
1213                                                         VerificationNote::VERIFY_ERROR,
1214                                                         VerificationNote::INVALID_PICTURE_FRAME_RATE,
1215                                                         String::compose("%1/%2", frame_rate.numerator, frame_rate.denominator)
1216                                                 });
1217                                         }
1218                                         /* Check asset */
1219                                         if (reel->main_picture()->asset_ref().resolved()) {
1220                                                 verify_main_picture_asset (dcp, reel->main_picture(), stage, progress, notes);
1221                                         }
1222                                 }
1223
1224                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
1225                                         verify_main_sound_asset (dcp, reel->main_sound(), stage, progress, notes);
1226                                 }
1227
1228                                 if (reel->main_subtitle()) {
1229                                         verify_main_subtitle_reel (reel->main_subtitle(), notes);
1230                                         if (reel->main_subtitle()->asset_ref().resolved()) {
1231                                                 verify_subtitle_asset (reel->main_subtitle()->asset(), stage, xsd_dtd_directory, notes, state);
1232                                         }
1233                                         have_main_subtitle = true;
1234                                 } else {
1235                                         have_no_main_subtitle = true;
1236                                 }
1237
1238                                 for (auto i: reel->closed_captions()) {
1239                                         verify_closed_caption_reel (i, notes);
1240                                         if (i->asset_ref().resolved()) {
1241                                                 verify_closed_caption_asset (i->asset(), stage, xsd_dtd_directory, notes, state);
1242                                         }
1243                                 }
1244
1245                                 if (reel->main_markers()) {
1246                                         for (auto const& i: reel->main_markers()->get()) {
1247                                                 markers_seen.insert (i);
1248                                         }
1249                                 }
1250
1251                                 fewest_closed_captions = std::min (fewest_closed_captions, reel->closed_captions().size());
1252                                 most_closed_captions = std::max (most_closed_captions, reel->closed_captions().size());
1253                         }
1254
1255                         if (dcp->standard() == SMPTE) {
1256
1257                                 if (have_main_subtitle && have_no_main_subtitle) {
1258                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS});
1259                                 }
1260
1261                                 if (fewest_closed_captions != most_closed_captions) {
1262                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS});
1263                                 }
1264
1265                                 if (cpl->content_kind() == FEATURE) {
1266                                         if (markers_seen.find(Marker::FFEC) == markers_seen.end()) {
1267                                                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_FFEC_IN_FEATURE});
1268                                         }
1269                                         if (markers_seen.find(Marker::FFMC) == markers_seen.end()) {
1270                                                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_FFMC_IN_FEATURE});
1271                                         }
1272                                 }
1273
1274                                 auto ffoc = markers_seen.find(Marker::FFOC);
1275                                 if (ffoc == markers_seen.end()) {
1276                                         notes.push_back ({VerificationNote::VERIFY_WARNING, VerificationNote::MISSING_FFOC});
1277                                 } else if (ffoc->second.e != 1) {
1278                                         notes.push_back ({VerificationNote::VERIFY_WARNING, VerificationNote::INCORRECT_FFOC, raw_convert<string>(ffoc->second.e)});
1279                                 }
1280
1281                                 auto lfoc = markers_seen.find(Marker::LFOC);
1282                                 if (lfoc == markers_seen.end()) {
1283                                         notes.push_back ({VerificationNote::VERIFY_WARNING, VerificationNote::MISSING_LFOC});
1284                                 } else {
1285                                         auto lfoc_time = lfoc->second.as_editable_units(lfoc->second.tcr);
1286                                         if (lfoc_time != (cpl->reels().back()->duration() - 1)) {
1287                                                 notes.push_back ({VerificationNote::VERIFY_WARNING, VerificationNote::INCORRECT_LFOC, raw_convert<string>(lfoc_time)});
1288                                         }
1289                                 }
1290
1291                                 check_text_timing (cpl->reels(), notes);
1292
1293                                 LinesCharactersResult result;
1294                                 for (auto reel: cpl->reels()) {
1295                                         if (reel->main_subtitle() && reel->main_subtitle()->asset()) {
1296                                                 check_text_lines_and_characters (reel->main_subtitle()->asset(), 52, 79, &result);
1297                                         }
1298                                 }
1299
1300                                 if (result.line_count_exceeded) {
1301                                         notes.push_back ({VerificationNote::VERIFY_WARNING, VerificationNote::INVALID_SUBTITLE_LINE_COUNT});
1302                                 }
1303                                 if (result.error_length_exceeded) {
1304                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_WARNING, VerificationNote::INVALID_SUBTITLE_LINE_LENGTH));
1305                                 } else if (result.warning_length_exceeded) {
1306                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_WARNING, VerificationNote::NEARLY_INVALID_SUBTITLE_LINE_LENGTH));
1307                                 }
1308
1309                                 result = LinesCharactersResult();
1310                                 for (auto reel: cpl->reels()) {
1311                                         for (auto i: reel->closed_captions()) {
1312                                                 if (i->asset()) {
1313                                                         check_text_lines_and_characters (i->asset(), 32, 32, &result);
1314                                                 }
1315                                         }
1316                                 }
1317
1318                                 if (result.line_count_exceeded) {
1319                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::INVALID_CLOSED_CAPTION_LINE_COUNT});
1320                                 }
1321                                 if (result.error_length_exceeded) {
1322                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::INVALID_CLOSED_CAPTION_LINE_LENGTH});
1323                                 }
1324
1325                                 if (!cpl->full_content_title_text()) {
1326                                         /* Since FullContentTitleText is assumed always to exist if there's a CompositionMetadataAsset we
1327                                          * can use it as a proxy for CompositionMetadataAsset's existence.
1328                                          */
1329                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_CPL_METADATA, cpl->id(), cpl->file().get()});
1330                                 } else if (!cpl->version_number()) {
1331                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::MISSING_CPL_METADATA_VERSION_NUMBER, cpl->id(), cpl->file().get()});
1332                                 }
1333
1334                                 check_extension_metadata (cpl, notes);
1335
1336                                 if (cpl->any_encrypted()) {
1337                                         cxml::Document doc ("CompositionPlaylist");
1338                                         DCP_ASSERT (cpl->file());
1339                                         doc.read_file (cpl->file().get());
1340                                         if (!doc.optional_node_child("Signature")) {
1341                                                 notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT, cpl->id(), cpl->file().get()});
1342                                         }
1343                                 }
1344                         }
1345                 }
1346
1347                 for (auto pkl: dcp->pkls()) {
1348                         stage ("Checking PKL", pkl->file());
1349                         validate_xml (pkl->file().get(), xsd_dtd_directory, notes);
1350                         if (pkl_has_encrypted_assets(dcp, pkl)) {
1351                                 cxml::Document doc ("PackingList");
1352                                 doc.read_file (pkl->file().get());
1353                                 if (!doc.optional_node_child("Signature")) {
1354                                         notes.push_back ({VerificationNote::VERIFY_BV21_ERROR, VerificationNote::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT, pkl->id(), pkl->file().get()});
1355                                 }
1356                         }
1357                 }
1358
1359                 if (dcp->asset_map_path()) {
1360                         stage ("Checking ASSETMAP", dcp->asset_map_path().get());
1361                         validate_xml (dcp->asset_map_path().get(), xsd_dtd_directory, notes);
1362                 } else {
1363                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::MISSING_ASSETMAP));
1364                 }
1365         }
1366
1367         return notes;
1368 }
1369
1370 string
1371 dcp::note_to_string (VerificationNote note)
1372 {
1373         /** These strings should say what is wrong, incorporating any extra details (ID, filenames etc.).
1374          *
1375          *  e.g. "ClosedCaption asset has no <EntryPoint> tag.",
1376          *  not "ClosedCaption assets must have an <EntryPoint> tag."
1377          *
1378          *  It's OK to use XML tag names where they are clear.
1379          *  If both ID and filename are available, use only the ID.
1380          *  End messages with a full stop.
1381          *  Messages should not mention whether or not their errors are a part of Bv2.1.
1382          */
1383         switch (note.code()) {
1384         case VerificationNote::FAILED_READ:
1385                 return *note.note();
1386         case VerificationNote::MISMATCHED_CPL_HASHES:
1387                 return String::compose("The hash of the CPL %1 in the PKL does not agree with the CPL file.", note.note().get());
1388         case VerificationNote::INVALID_PICTURE_FRAME_RATE:
1389                 return String::compose("The picture in a reel has an invalid frame rate %1.", note.note().get());
1390         case VerificationNote::INCORRECT_PICTURE_HASH:
1391                 return String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
1392         case VerificationNote::MISMATCHED_PICTURE_HASHES:
1393                 return String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
1394         case VerificationNote::INCORRECT_SOUND_HASH:
1395                 return String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
1396         case VerificationNote::MISMATCHED_SOUND_HASHES:
1397                 return String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
1398         case VerificationNote::EMPTY_ASSET_PATH:
1399                 return "The asset map contains an empty asset path.";
1400         case VerificationNote::MISSING_ASSET:
1401                 return String::compose("The file %1 for an asset in the asset map cannot be found.", note.file()->filename());
1402         case VerificationNote::MISMATCHED_STANDARD:
1403                 return "The DCP contains both SMPTE and Interop parts.";
1404         case VerificationNote::INVALID_XML:
1405                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
1406         case VerificationNote::MISSING_ASSETMAP:
1407                 return "No ASSETMAP or ASSETMAP.xml was found.";
1408         case VerificationNote::INVALID_INTRINSIC_DURATION:
1409                 return String::compose("The intrinsic duration of the asset %1 is less than 1 second long.", note.note().get());
1410         case VerificationNote::INVALID_DURATION:
1411                 return String::compose("The duration of the asset %1 is less than 1 second long.", note.note().get());
1412         case VerificationNote::INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1413                 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
1414         case VerificationNote::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
1415                 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
1416         case VerificationNote::EXTERNAL_ASSET:
1417                 return String::compose("The asset %1 that this DCP refers to is not included in the DCP.  It may be a VF.", note.note().get());
1418         case VerificationNote::INVALID_STANDARD:
1419                 return "This DCP does not use the SMPTE standard.";
1420         case VerificationNote::INVALID_LANGUAGE:
1421                 return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
1422         case VerificationNote::INVALID_PICTURE_SIZE_IN_PIXELS:
1423                 return String::compose("The size %1 of picture asset %2 is not allowed.", note.note().get(), note.file()->filename());
1424         case VerificationNote::INVALID_PICTURE_FRAME_RATE_FOR_2K:
1425                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 2K DCPs.", note.note().get(), note.file()->filename());
1426         case VerificationNote::INVALID_PICTURE_FRAME_RATE_FOR_4K:
1427                 return String::compose("The frame rate %1 of picture asset %2 is not allowed for 4K DCPs.", note.note().get(), note.file()->filename());
1428         case VerificationNote::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D:
1429                 return "3D 4K DCPs are not allowed.";
1430         case VerificationNote::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES:
1431                 return String::compose("The size %1 of the closed caption asset %2 is larger than the 256KB maximum.", note.note().get(), note.file()->filename());
1432         case VerificationNote::INVALID_TIMED_TEXT_SIZE_IN_BYTES:
1433                 return String::compose("The size %1 of the timed text asset %2 is larger than the 115MB maximum.", note.note().get(), note.file()->filename());
1434         case VerificationNote::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES:
1435                 return String::compose("The size %1 of the fonts in timed text asset %2 is larger than the 10MB maximum.", note.note().get(), note.file()->filename());
1436         case VerificationNote::MISSING_SUBTITLE_LANGUAGE:
1437                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <Language> tag.", note.file()->filename());
1438         case VerificationNote::MISMATCHED_SUBTITLE_LANGUAGES:
1439                 return "Some subtitle assets have different <Language> tags than others";
1440         case VerificationNote::MISSING_SUBTITLE_START_TIME:
1441                 return String::compose("The XML for the SMPTE subtitle asset %1 has no <StartTime> tag.", note.file()->filename());
1442         case VerificationNote::INVALID_SUBTITLE_START_TIME:
1443                 return String::compose("The XML for a SMPTE subtitle asset %1 has a non-zero <StartTime> tag.", note.file()->filename());
1444         case VerificationNote::INVALID_SUBTITLE_FIRST_TEXT_TIME:
1445                 return "The first subtitle or closed caption is less than 4 seconds from the start of the DCP.";
1446         case VerificationNote::INVALID_SUBTITLE_DURATION:
1447                 return "At least one subtitle lasts less than 15 frames.";
1448         case VerificationNote::INVALID_SUBTITLE_SPACING:
1449                 return "At least one pair of subtitles is separated by less than 2 frames.";
1450         case VerificationNote::INVALID_SUBTITLE_LINE_COUNT:
1451                 return "There are more than 3 subtitle lines in at least one place in the DCP.";
1452         case VerificationNote::NEARLY_INVALID_SUBTITLE_LINE_LENGTH:
1453                 return "There are more than 52 characters in at least one subtitle line.";
1454         case VerificationNote::INVALID_SUBTITLE_LINE_LENGTH:
1455                 return "There are more than 79 characters in at least one subtitle line.";
1456         case VerificationNote::INVALID_CLOSED_CAPTION_LINE_COUNT:
1457                 return "There are more than 3 closed caption lines in at least one place.";
1458         case VerificationNote::INVALID_CLOSED_CAPTION_LINE_LENGTH:
1459                 return "There are more than 32 characters in at least one closed caption line.";
1460         case VerificationNote::INVALID_SOUND_FRAME_RATE:
1461                 return String::compose("The sound asset %1 has a sampling rate of %2", note.file()->filename(), note.note().get());
1462         case VerificationNote::MISSING_CPL_ANNOTATION_TEXT:
1463                 return String::compose("The CPL %1 has no <AnnotationText> tag.", note.note().get());
1464         case VerificationNote::MISMATCHED_CPL_ANNOTATION_TEXT:
1465                 return String::compose("The CPL %1 has an <AnnotationText> which differs from its <ContentTitleText>", note.note().get());
1466         case VerificationNote::MISMATCHED_ASSET_DURATION:
1467                 return "All assets in a reel do not have the same duration.";
1468         case VerificationNote::MISSING_MAIN_SUBTITLE_FROM_SOME_REELS:
1469                 return "At least one reel contains a subtitle asset, but some reel(s) do not";
1470         case VerificationNote::MISMATCHED_CLOSED_CAPTION_ASSET_COUNTS:
1471                 return "At least one reel has closed captions, but reels have different numbers of closed caption assets.";
1472         case VerificationNote::MISSING_SUBTITLE_ENTRY_POINT:
1473                 return String::compose("The subtitle asset %1 has no <EntryPoint> tag.", note.note().get());
1474         case VerificationNote::INCORRECT_SUBTITLE_ENTRY_POINT:
1475                 return String::compose("The subtitle asset %1 has an <EntryPoint> other than 0.", note.note().get());
1476         case VerificationNote::MISSING_CLOSED_CAPTION_ENTRY_POINT:
1477                 return String::compose("The closed caption asset %1 has no <EntryPoint> tag.", note.note().get());
1478         case VerificationNote::INCORRECT_CLOSED_CAPTION_ENTRY_POINT:
1479                 return String::compose("The closed caption asset %1 has an <EntryPoint> other than 0.", note.note().get());
1480         case VerificationNote::MISSING_HASH:
1481                 return String::compose("The asset %1 has no <Hash> tag in the CPL.", note.note().get());
1482         case VerificationNote::MISSING_FFEC_IN_FEATURE:
1483                 return "The DCP is marked as a Feature but there is no FFEC (first frame of end credits) marker";
1484         case VerificationNote::MISSING_FFMC_IN_FEATURE:
1485                 return "The DCP is marked as a Feature but there is no FFMC (first frame of moving credits) marker";
1486         case VerificationNote::MISSING_FFOC:
1487                 return "There should be a FFOC (first frame of content) marker";
1488         case VerificationNote::MISSING_LFOC:
1489                 return "There should be a LFOC (last frame of content) marker";
1490         case VerificationNote::INCORRECT_FFOC:
1491                 return String::compose("The FFOC marker is %1 instead of 1", note.note().get());
1492         case VerificationNote::INCORRECT_LFOC:
1493                 return String::compose("The LFOC marker is %1 instead of 1 less than the duration of the last reel.", note.note().get());
1494         case VerificationNote::MISSING_CPL_METADATA:
1495                 return String::compose("The CPL %1 has no <CompositionMetadataAsset> tag.", note.note().get());
1496         case VerificationNote::MISSING_CPL_METADATA_VERSION_NUMBER:
1497                 return String::compose("The CPL %1 has no <VersionNumber> in its <CompositionMetadataAsset>.", note.note().get());
1498         case VerificationNote::MISSING_EXTENSION_METADATA:
1499                 return String::compose("The CPL %1 has no <ExtensionMetadata> in its <CompositionMetadataAsset>.", note.note().get());
1500         case VerificationNote::INVALID_EXTENSION_METADATA:
1501                 return String::compose("The CPL %1 has a malformed <ExtensionMetadata> (%2).", note.file()->filename(), note.note().get());
1502         case VerificationNote::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT:
1503                 return String::compose("The CPL %1, which has encrypted content, is not signed.", note.note().get());
1504         case VerificationNote::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT:
1505                 return String::compose("The PKL %1, which has encrypted content, is not signed.", note.note().get());
1506         case VerificationNote::MISMATCHED_PKL_ANNOTATION_TEXT_WITH_CPL:
1507                 return String::compose("The PKL %1 has only one CPL but its <AnnotationText> does not match the CPL's <ContentTitleText>", note.note().get());
1508         case VerificationNote::PARTIALLY_ENCRYPTED:
1509                 return "Some assets are encrypted but some are not";
1510         }
1511
1512         return "";
1513 }
1514
1515
1516 bool
1517 dcp::operator== (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
1518 {
1519         return a.type() == b.type() && a.code() == b.code() && a.note() == b.note() && a.file() == b.file() && a.line() == b.line();
1520 }
1521
1522 std::ostream&
1523 dcp::operator<< (std::ostream& s, dcp::VerificationNote const& note)
1524 {
1525         s << note_to_string (note);
1526         if (note.note()) {
1527                 s << " [" << note.note().get() << "]";
1528         }
1529         if (note.file()) {
1530                 s << " [" << note.file().get() << "]";
1531         }
1532         if (note.line()) {
1533                 s << " [" << note.line().get() << "]";
1534         }
1535         return s;
1536 }
1537