2 Copyright (C) 2018-2020 Carl Hetherington <cth@carlh.net>
4 This file is part of libdcp.
6 libdcp is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
11 libdcp is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with libdcp. If not, see <http://www.gnu.org/licenses/>.
19 In addition, as a special exception, the copyright holders give
20 permission to link the code of portions of this program with the
21 OpenSSL library under certain conditions as described in each
22 individual source file, and distribute linked combinations
25 You must obey the GNU General Public License in all respects
26 for all of the code used other than OpenSSL. If you modify
27 file(s) with this exception, you may extend this exception to your
28 version of the file(s), but you are not obligated to do so. If you
29 do not wish to do so, delete this exception statement from your
30 version. If you delete this exception statement from all source
31 files in the program, then also delete it here.
38 #include "reel_picture_asset.h"
39 #include "reel_sound_asset.h"
40 #include "reel_subtitle_asset.h"
41 #include "interop_subtitle_asset.h"
42 #include "mono_picture_asset.h"
43 #include "mono_picture_frame.h"
44 #include "stereo_picture_asset.h"
45 #include "stereo_picture_frame.h"
46 #include "exceptions.h"
47 #include "compose.hpp"
48 #include "raw_convert.h"
49 #include <xercesc/util/PlatformUtils.hpp>
50 #include <xercesc/parsers/XercesDOMParser.hpp>
51 #include <xercesc/parsers/AbstractDOMParser.hpp>
52 #include <xercesc/sax/HandlerBase.hpp>
53 #include <xercesc/dom/DOMImplementation.hpp>
54 #include <xercesc/dom/DOMImplementationLS.hpp>
55 #include <xercesc/dom/DOMImplementationRegistry.hpp>
56 #include <xercesc/dom/DOMLSParser.hpp>
57 #include <xercesc/dom/DOMException.hpp>
58 #include <xercesc/dom/DOMDocument.hpp>
59 #include <xercesc/dom/DOMNodeList.hpp>
60 #include <xercesc/dom/DOMError.hpp>
61 #include <xercesc/dom/DOMLocator.hpp>
62 #include <xercesc/dom/DOMNamedNodeMap.hpp>
63 #include <xercesc/dom/DOMAttr.hpp>
64 #include <xercesc/dom/DOMErrorHandler.hpp>
65 #include <xercesc/framework/LocalFileInputSource.hpp>
66 #include <xercesc/framework/MemBufInputSource.hpp>
67 #include <boost/noncopyable.hpp>
68 #include <boost/foreach.hpp>
69 #include <boost/algorithm/string.hpp>
81 using boost::shared_ptr;
82 using boost::optional;
83 using boost::function;
84 using boost::dynamic_pointer_cast;
87 using namespace xercesc;
91 xml_ch_to_string (XMLCh const * a)
93 char* x = XMLString::transcode(a);
95 XMLString::release(&x);
99 class XMLValidationError
102 XMLValidationError (SAXParseException const & e)
103 : _message (xml_ch_to_string(e.getMessage()))
104 , _line (e.getLineNumber())
105 , _column (e.getColumnNumber())
106 , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
107 , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
112 string message () const {
116 uint64_t line () const {
120 uint64_t column () const {
124 string public_id () const {
128 string system_id () const {
141 class DCPErrorHandler : public ErrorHandler
144 void warning(const SAXParseException& e)
146 maybe_add (XMLValidationError(e));
149 void error(const SAXParseException& e)
151 maybe_add (XMLValidationError(e));
154 void fatalError(const SAXParseException& e)
156 maybe_add (XMLValidationError(e));
163 list<XMLValidationError> errors () const {
168 void maybe_add (XMLValidationError e)
170 /* XXX: nasty hack */
172 e.message().find("schema document") != string::npos &&
173 e.message().find("has different target namespace from the one specified in instance document") != string::npos
178 _errors.push_back (e);
181 list<XMLValidationError> _errors;
184 class StringToXMLCh : public boost::noncopyable
187 StringToXMLCh (string a)
189 _buffer = XMLString::transcode(a.c_str());
194 XMLString::release (&_buffer);
197 XMLCh const * get () const {
205 class LocalFileResolver : public EntityResolver
208 LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
209 : _xsd_dtd_directory (xsd_dtd_directory)
211 /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
212 * found without being here.
214 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
215 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
216 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
217 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
218 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
219 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
220 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
221 add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
222 add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
223 add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd");
224 add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
225 add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
226 add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
229 InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
234 string system_id_str = xml_ch_to_string (system_id);
235 boost::filesystem::path p = _xsd_dtd_directory;
236 if (_files.find(system_id_str) == _files.end()) {
239 p /= _files[system_id_str];
241 StringToXMLCh ch (p.string());
242 return new LocalFileInputSource(ch.get());
246 void add (string uri, string file)
251 std::map<string, string> _files;
252 boost::filesystem::path _xsd_dtd_directory;
257 parse (XercesDOMParser& parser, boost::filesystem::path xml)
259 parser.parse(xml.string().c_str());
264 parse (XercesDOMParser& parser, std::string xml)
266 xercesc::MemBufInputSource buf(reinterpret_cast<unsigned char const*>(xml.c_str()), xml.size(), "");
273 validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, list<VerificationNote>& notes)
276 XMLPlatformUtils::Initialize ();
277 } catch (XMLException& e) {
278 throw MiscError ("Failed to initialise xerces library");
281 DCPErrorHandler error_handler;
283 /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
285 XercesDOMParser parser;
286 parser.setValidationScheme(XercesDOMParser::Val_Always);
287 parser.setDoNamespaces(true);
288 parser.setDoSchema(true);
290 vector<string> schema;
291 schema.push_back("xml.xsd");
292 schema.push_back("xmldsig-core-schema.xsd");
293 schema.push_back("SMPTE-429-7-2006-CPL.xsd");
294 schema.push_back("SMPTE-429-8-2006-PKL.xsd");
295 schema.push_back("SMPTE-429-9-2007-AM.xsd");
296 schema.push_back("Main-Stereo-Picture-CPL.xsd");
297 schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
298 schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
299 schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
300 schema.push_back("DCSubtitle.v1.mattsson.xsd");
301 schema.push_back("DCDMSubtitle-2010.xsd");
302 schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
303 schema.push_back("SMPTE-429-16.xsd");
304 schema.push_back("Dolby-2012-AD.xsd");
305 schema.push_back("SMPTE-429-10-2008.xsd");
306 schema.push_back("xlink.xsd");
307 schema.push_back("SMPTE-335-2012.xsd");
308 schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
309 schema.push_back("isdcf-mca.xsd");
310 schema.push_back("SMPTE-429-12-2008.xsd");
312 /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
313 * Schemas that are not mentioned in this list are not read, and the things
314 * they describe are not checked.
317 BOOST_FOREACH (string i, schema) {
318 locations += String::compose("%1 %1 ", i, i);
321 parser.setExternalSchemaLocation(locations.c_str());
322 parser.setValidationSchemaFullChecking(true);
323 parser.setErrorHandler(&error_handler);
325 LocalFileResolver resolver (xsd_dtd_directory);
326 parser.setEntityResolver(&resolver);
329 parser.resetDocumentPool();
331 } catch (XMLException& e) {
332 throw MiscError(xml_ch_to_string(e.getMessage()));
333 } catch (DOMException& e) {
334 throw MiscError(xml_ch_to_string(e.getMessage()));
336 throw MiscError("Unknown exception from xerces");
340 XMLPlatformUtils::Terminate ();
342 BOOST_FOREACH (XMLValidationError i, error_handler.errors()) {
345 VerificationNote::VERIFY_ERROR,
346 VerificationNote::XML_VALIDATION_ERROR,
348 boost::trim_copy(i.public_id() + " " + i.system_id()),
356 enum VerifyAssetResult {
357 VERIFY_ASSET_RESULT_GOOD,
358 VERIFY_ASSET_RESULT_CPL_PKL_DIFFER,
359 VERIFY_ASSET_RESULT_BAD
363 static VerifyAssetResult
364 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelMXF> reel_mxf, function<void (float)> progress)
366 string const actual_hash = reel_mxf->asset_ref()->hash(progress);
368 list<shared_ptr<PKL> > pkls = dcp->pkls();
369 /* We've read this DCP in so it must have at least one PKL */
370 DCP_ASSERT (!pkls.empty());
372 shared_ptr<Asset> asset = reel_mxf->asset_ref().asset();
374 optional<string> pkl_hash;
375 BOOST_FOREACH (shared_ptr<PKL> i, pkls) {
376 pkl_hash = i->hash (reel_mxf->asset_ref()->id());
382 DCP_ASSERT (pkl_hash);
384 optional<string> cpl_hash = reel_mxf->hash();
385 if (cpl_hash && *cpl_hash != *pkl_hash) {
386 return VERIFY_ASSET_RESULT_CPL_PKL_DIFFER;
389 if (actual_hash != *pkl_hash) {
390 return VERIFY_ASSET_RESULT_BAD;
393 return VERIFY_ASSET_RESULT_GOOD;
397 enum VerifyPictureAssetResult
399 VERIFY_PICTURE_ASSET_RESULT_GOOD,
400 VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_BIG,
401 VERIFY_PICTURE_ASSET_RESULT_BAD,
406 biggest_frame_size (shared_ptr<const MonoPictureFrame> frame)
408 return frame->j2k_size ();
412 biggest_frame_size (shared_ptr<const StereoPictureFrame> frame)
414 return max(frame->left_j2k_size(), frame->right_j2k_size());
418 template <class A, class R, class F>
419 optional<VerifyPictureAssetResult>
420 verify_picture_asset_type (shared_ptr<ReelMXF> reel_mxf, function<void (float)> progress)
422 shared_ptr<A> asset = dynamic_pointer_cast<A>(reel_mxf->asset_ref().asset());
424 return optional<VerifyPictureAssetResult>();
427 int biggest_frame = 0;
428 shared_ptr<R> reader = asset->start_read ();
429 int64_t const duration = asset->intrinsic_duration ();
430 for (int64_t i = 0; i < duration; ++i) {
431 shared_ptr<const F> frame = reader->get_frame (i);
432 biggest_frame = max(biggest_frame, biggest_frame_size(frame));
433 progress (float(i) / duration);
436 static const int max_frame = rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
437 static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
438 if (biggest_frame > max_frame) {
439 return VERIFY_PICTURE_ASSET_RESULT_BAD;
440 } else if (biggest_frame > risky_frame) {
441 return VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_BIG;
444 return VERIFY_PICTURE_ASSET_RESULT_GOOD;
448 static VerifyPictureAssetResult
449 verify_picture_asset (shared_ptr<ReelMXF> reel_mxf, function<void (float)> progress)
451 optional<VerifyPictureAssetResult> r = verify_picture_asset_type<MonoPictureAsset, MonoPictureAssetReader, MonoPictureFrame>(reel_mxf, progress);
453 r = verify_picture_asset_type<StereoPictureAsset, StereoPictureAssetReader, StereoPictureFrame>(reel_mxf, progress);
462 verify_main_picture_asset (
463 shared_ptr<const DCP> dcp,
464 shared_ptr<const Reel> reel,
465 function<void (string, optional<boost::filesystem::path>)> stage,
466 function<void (float)> progress,
467 list<VerificationNote>& notes
470 boost::filesystem::path const file = *reel->main_picture()->asset()->file();
471 stage ("Checking picture asset hash", file);
472 VerifyAssetResult const r = verify_asset (dcp, reel->main_picture(), progress);
474 case VERIFY_ASSET_RESULT_BAD:
477 VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_HASH_INCORRECT, file
481 case VERIFY_ASSET_RESULT_CPL_PKL_DIFFER:
484 VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_PICTURE_HASHES_DISAGREE, file
491 stage ("Checking picture frame sizes", reel->main_picture()->asset()->file());
492 VerifyPictureAssetResult const pr = verify_picture_asset (reel->main_picture(), progress);
494 case VERIFY_PICTURE_ASSET_RESULT_BAD:
497 VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_FRAME_TOO_LARGE, file
501 case VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_BIG:
504 VerificationNote::VERIFY_WARNING, VerificationNote::PICTURE_FRAME_NEARLY_TOO_LARGE, file
515 verify_main_sound_asset (
516 shared_ptr<const DCP> dcp,
517 shared_ptr<const Reel> reel,
518 function<void (string, optional<boost::filesystem::path>)> stage,
519 function<void (float)> progress,
520 list<VerificationNote>& notes
523 stage ("Checking sound asset hash", reel->main_sound()->asset()->file());
524 VerifyAssetResult const r = verify_asset (dcp, reel->main_sound(), progress);
526 case VERIFY_ASSET_RESULT_BAD:
529 VerificationNote::VERIFY_ERROR, VerificationNote::SOUND_HASH_INCORRECT, *reel->main_sound()->asset()->file()
533 case VERIFY_ASSET_RESULT_CPL_PKL_DIFFER:
536 VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_SOUND_HASHES_DISAGREE, *reel->main_sound()->asset()->file()
547 verify_main_subtitle_asset (
548 shared_ptr<const Reel> reel,
549 function<void (string, optional<boost::filesystem::path>)> stage,
550 boost::filesystem::path xsd_dtd_directory,
551 list<VerificationNote>& notes
554 shared_ptr<ReelSubtitleAsset> reel_asset = reel->main_subtitle ();
555 stage ("Checking subtitle XML", reel->main_subtitle()->asset()->file());
556 /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
557 * gets passed through libdcp which may clean up and therefore hide errors.
559 validate_xml (reel->main_subtitle()->asset()->raw_xml(), xsd_dtd_directory, notes);
563 list<VerificationNote>
565 vector<boost::filesystem::path> directories,
566 function<void (string, optional<boost::filesystem::path>)> stage,
567 function<void (float)> progress,
568 boost::filesystem::path xsd_dtd_directory
571 xsd_dtd_directory = boost::filesystem::canonical (xsd_dtd_directory);
573 list<VerificationNote> notes;
575 list<shared_ptr<DCP> > dcps;
576 BOOST_FOREACH (boost::filesystem::path i, directories) {
577 dcps.push_back (shared_ptr<DCP> (new DCP (i)));
580 BOOST_FOREACH (shared_ptr<DCP> dcp, dcps) {
581 stage ("Checking DCP", dcp->directory());
584 } catch (ReadError& e) {
585 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
586 } catch (XMLError& e) {
587 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
588 } catch (MXFFileError& e) {
589 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
590 } catch (cxml::Error& e) {
591 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
594 BOOST_FOREACH (shared_ptr<CPL> cpl, dcp->cpls()) {
595 stage ("Checking CPL", cpl->file());
596 validate_xml (cpl->file().get(), xsd_dtd_directory, notes);
598 /* Check that the CPL's hash corresponds to the PKL */
599 BOOST_FOREACH (shared_ptr<PKL> i, dcp->pkls()) {
600 optional<string> h = i->hash(cpl->id());
601 if (h && make_digest(Data(*cpl->file())) != *h) {
602 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::CPL_HASH_INCORRECT));
606 BOOST_FOREACH (shared_ptr<Reel> reel, cpl->reels()) {
607 stage ("Checking reel", optional<boost::filesystem::path>());
609 BOOST_FOREACH (shared_ptr<ReelAsset> i, reel->assets()) {
610 if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
611 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::DURATION_TOO_SMALL, i->id()));
613 if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
614 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INTRINSIC_DURATION_TOO_SMALL, i->id()));
618 if (reel->main_picture()) {
619 /* Check reel stuff */
620 Fraction const frame_rate = reel->main_picture()->frame_rate();
621 if (frame_rate.denominator != 1 ||
622 (frame_rate.numerator != 24 &&
623 frame_rate.numerator != 25 &&
624 frame_rate.numerator != 30 &&
625 frame_rate.numerator != 48 &&
626 frame_rate.numerator != 50 &&
627 frame_rate.numerator != 60 &&
628 frame_rate.numerator != 96)) {
629 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INVALID_PICTURE_FRAME_RATE));
632 if (reel->main_picture()->asset_ref().resolved()) {
633 verify_main_picture_asset (dcp, reel, stage, progress, notes);
637 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
638 verify_main_sound_asset (dcp, reel, stage, progress, notes);
641 if (reel->main_subtitle() && reel->main_subtitle()->asset_ref().resolved()) {
642 verify_main_subtitle_asset (reel, stage, xsd_dtd_directory, notes);
647 BOOST_FOREACH (shared_ptr<PKL> pkl, dcp->pkls()) {
648 stage ("Checking PKL", pkl->file());
649 validate_xml (pkl->file().get(), xsd_dtd_directory, notes);
652 if (dcp->asset_map_path()) {
653 stage ("Checking ASSETMAP", dcp->asset_map_path().get());
654 validate_xml (dcp->asset_map_path().get(), xsd_dtd_directory, notes);
656 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::MISSING_ASSETMAP));
664 dcp::note_to_string (dcp::VerificationNote note)
666 switch (note.code()) {
667 case dcp::VerificationNote::GENERAL_READ:
669 case dcp::VerificationNote::CPL_HASH_INCORRECT:
670 return "The hash of the CPL in the PKL does not agree with the CPL file.";
671 case dcp::VerificationNote::INVALID_PICTURE_FRAME_RATE:
672 return "The picture in a reel has an invalid frame rate.";
673 case dcp::VerificationNote::PICTURE_HASH_INCORRECT:
674 return dcp::String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
675 case dcp::VerificationNote::PKL_CPL_PICTURE_HASHES_DISAGREE:
676 return dcp::String::compose("The PKL and CPL hashes disagree for the picture asset %1.", note.file()->filename());
677 case dcp::VerificationNote::SOUND_HASH_INCORRECT:
678 return dcp::String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
679 case dcp::VerificationNote::PKL_CPL_SOUND_HASHES_DISAGREE:
680 return dcp::String::compose("The PKL and CPL hashes disagree for the sound asset %1.", note.file()->filename());
681 case dcp::VerificationNote::EMPTY_ASSET_PATH:
682 return "The asset map contains an empty asset path.";
683 case dcp::VerificationNote::MISSING_ASSET:
684 return String::compose("The file for an asset in the asset map cannot be found; missing file is %1.", note.file()->filename());
685 case dcp::VerificationNote::MISMATCHED_STANDARD:
686 return "The DCP contains both SMPTE and Interop parts.";
687 case dcp::VerificationNote::XML_VALIDATION_ERROR:
688 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
689 case dcp::VerificationNote::MISSING_ASSETMAP:
690 return "No ASSETMAP or ASSETMAP.xml was found.";
691 case dcp::VerificationNote::INTRINSIC_DURATION_TOO_SMALL:
692 return String::compose("The intrinsic duration of an asset is less than 1 second long: %1", note.note().get());
693 case dcp::VerificationNote::DURATION_TOO_SMALL:
694 return String::compose("The duration of an asset is less than 1 second long: %1", note.note().get());
695 case dcp::VerificationNote::PICTURE_FRAME_TOO_LARGE:
696 return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
697 case dcp::VerificationNote::PICTURE_FRAME_NEARLY_TOO_LARGE:
698 return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
699 case dcp::VerificationNote::EXTERNAL_ASSET:
700 return String::compose("An asset that this DCP refers to is not included in the DCP. It may be a VF. Missing asset is %1.", note.note().get());