X-Git-Url: https://main.carlh.net/gitweb/?a=blobdiff_plain;f=src%2Fverify.cc;h=97758e6192e632aae3bf4c498c167a90120818ad;hb=2c1faeb15715794525f48110c2b8a9df96b387c1;hp=79663450d99a3d05a99ef20d4cf468b7d439fd36;hpb=4d6c8aaf0167cde3bb63e9014604243bdc47b1a1;p=libdcp.git diff --git a/src/verify.cc b/src/verify.cc index 79663450..97758e61 100644 --- a/src/verify.cc +++ b/src/verify.cc @@ -1,5 +1,5 @@ /* - Copyright (C) 2018-2020 Carl Hetherington + Copyright (C) 2018-2021 Carl Hetherington This file is part of libdcp. @@ -31,15 +31,30 @@ files in the program, then also delete it here. */ + +/** @file src/verify.cc + * @brief dcp::verify() method and associated code + */ + + #include "verify.h" #include "dcp.h" #include "cpl.h" #include "reel.h" +#include "reel_closed_caption_asset.h" #include "reel_picture_asset.h" #include "reel_sound_asset.h" +#include "reel_subtitle_asset.h" +#include "interop_subtitle_asset.h" +#include "mono_picture_asset.h" +#include "mono_picture_frame.h" +#include "stereo_picture_asset.h" +#include "stereo_picture_frame.h" #include "exceptions.h" #include "compose.hpp" #include "raw_convert.h" +#include "reel_markers_asset.h" +#include "smpte_subtitle_asset.h" #include #include #include @@ -57,31 +72,30 @@ #include #include #include -#include -#include +#include #include #include -#include #include #include + using std::list; using std::vector; using std::string; using std::cout; using std::map; -using boost::shared_ptr; +using std::max; +using std::set; +using std::shared_ptr; +using std::make_shared; using boost::optional; using boost::function; +using std::dynamic_pointer_cast; + using namespace dcp; using namespace xercesc; -enum Result { - RESULT_GOOD, - RESULT_CPL_PKL_DIFFER, - RESULT_BAD -}; static string @@ -93,6 +107,7 @@ xml_ch_to_string (XMLCh const * a) return o; } + class XMLValidationError { public: @@ -100,6 +115,8 @@ public: : _message (xml_ch_to_string(e.getMessage())) , _line (e.getLineNumber()) , _column (e.getColumnNumber()) + , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "") + , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "") { } @@ -116,10 +133,20 @@ public: return _column; } + string public_id () const { + return _public_id; + } + + string system_id () const { + return _system_id; + } + private: string _message; uint64_t _line; uint64_t _column; + string _public_id; + string _system_id; }; @@ -166,7 +193,8 @@ private: list _errors; }; -class StringToXMLCh : public boost::noncopyable + +class StringToXMLCh { public: StringToXMLCh (string a) @@ -174,6 +202,9 @@ public: _buffer = XMLString::transcode(a.c_str()); } + StringToXMLCh (StringToXMLCh const&) = delete; + StringToXMLCh& operator= (StringToXMLCh const&) = delete; + ~StringToXMLCh () { XMLString::release (&_buffer); @@ -187,12 +218,16 @@ private: XMLCh* _buffer; }; + class LocalFileResolver : public EntityResolver { public: LocalFileResolver (boost::filesystem::path xsd_dtd_directory) : _xsd_dtd_directory (xsd_dtd_directory) { + /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically + * found without being here. + */ add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd"); add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd"); add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd"); @@ -200,16 +235,26 @@ public: add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd"); add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd"); add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd"); + add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd"); + add("interop-subs", "DCSubtitle.v1.mattsson.xsd"); + add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd"); + add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd"); + add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd"); + add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd"); } InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id) { - string system_id_str = xml_ch_to_string (system_id); - if (_files.find(system_id_str) == _files.end()) { + if (!system_id) { return 0; } - - boost::filesystem::path p = _xsd_dtd_directory / _files[system_id_str]; + auto system_id_str = xml_ch_to_string (system_id); + auto p = _xsd_dtd_directory; + if (_files.find(system_id_str) == _files.end()) { + p /= system_id_str; + } else { + p /= _files[system_id_str]; + } StringToXMLCh ch (p.string()); return new LocalFileInputSource(ch.get()); } @@ -224,9 +269,25 @@ private: boost::filesystem::path _xsd_dtd_directory; }; -static + +static void +parse (XercesDOMParser& parser, boost::filesystem::path xml) +{ + parser.parse(xml.string().c_str()); +} + + +static void +parse (XercesDOMParser& parser, string xml) +{ + xercesc::MemBufInputSource buf(reinterpret_cast(xml.c_str()), xml.size(), ""); + parser.parse(buf); +} + + +template void -validate_xml (boost::filesystem::path xml_file, boost::filesystem::path xsd_dtd_directory, list& notes) +validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, vector& notes) { try { XMLPlatformUtils::Initialize (); @@ -243,23 +304,35 @@ validate_xml (boost::filesystem::path xml_file, boost::filesystem::path xsd_dtd_ parser.setDoNamespaces(true); parser.setDoSchema(true); - map schema; - schema["http://www.w3.org/2000/09/xmldsig#"] = "xmldsig-core-schema.xsd"; - schema["http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd"] = "xmldsig-core-schema.xsd"; - schema["http://www.smpte-ra.org/schemas/429-7/2006/CPL"] = "SMPTE-429-7-2006-CPL.xsd"; - schema["http://www.smpte-ra.org/schemas/429-8/2006/PKL"] = "SMPTE-429-8-2006-PKL.xsd"; - schema["http://www.smpte-ra.org/schemas/429-9/2007/AM"] = "SMPTE-429-9-2007-AM.xsd"; - schema["http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd"] = "Main-Stereo-Picture-CPL.xsd"; - schema["http://www.digicine.com/PROTO-ASDCP-CPL-20040511#"] = "PROTO-ASDCP-CPL-20040511.xsd"; - schema["http://www.digicine.com/PROTO-ASDCP-PKL-20040311#"] = "PROTO-ASDCP-PKL-20040311.xsd"; - schema["http://www.digicine.com/PROTO-ASDCP-AM-20040311#"] = "PROTO-ASDCP-AM-20040311.xsd"; - + vector schema; + schema.push_back("xml.xsd"); + schema.push_back("xmldsig-core-schema.xsd"); + schema.push_back("SMPTE-429-7-2006-CPL.xsd"); + schema.push_back("SMPTE-429-8-2006-PKL.xsd"); + schema.push_back("SMPTE-429-9-2007-AM.xsd"); + schema.push_back("Main-Stereo-Picture-CPL.xsd"); + schema.push_back("PROTO-ASDCP-CPL-20040511.xsd"); + schema.push_back("PROTO-ASDCP-PKL-20040311.xsd"); + schema.push_back("PROTO-ASDCP-AM-20040311.xsd"); + schema.push_back("DCSubtitle.v1.mattsson.xsd"); + schema.push_back("DCDMSubtitle-2010.xsd"); + schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd"); + schema.push_back("SMPTE-429-16.xsd"); + schema.push_back("Dolby-2012-AD.xsd"); + schema.push_back("SMPTE-429-10-2008.xsd"); + schema.push_back("xlink.xsd"); + schema.push_back("SMPTE-335-2012.xsd"); + schema.push_back("SMPTE-395-2014-13-1-aaf.xsd"); + schema.push_back("isdcf-mca.xsd"); + schema.push_back("SMPTE-429-12-2008.xsd"); + + /* XXX: I'm not especially clear what this is for, but it seems to be necessary. + * Schemas that are not mentioned in this list are not read, and the things + * they describe are not checked. + */ string locations; - for (map::const_iterator i = schema.begin(); i != schema.end(); ++i) { - locations += i->first; - locations += " "; - boost::filesystem::path p = xsd_dtd_directory / i->second; - locations += p.string() + " "; + for (auto i: schema) { + locations += String::compose("%1 %1 ", i, i); } parser.setExternalSchemaLocation(locations.c_str()); @@ -271,7 +344,7 @@ validate_xml (boost::filesystem::path xml_file, boost::filesystem::path xsd_dtd_ try { parser.resetDocumentPool(); - parser.parse(xml_file.string().c_str()); + parse(parser, xml); } catch (XMLException& e) { throw MiscError(xml_ch_to_string(e.getMessage())); } catch (DOMException& e) { @@ -283,33 +356,39 @@ validate_xml (boost::filesystem::path xml_file, boost::filesystem::path xsd_dtd_ XMLPlatformUtils::Terminate (); - BOOST_FOREACH (XMLValidationError i, error_handler.errors()) { - notes.push_back ( - VerificationNote( - VerificationNote::VERIFY_ERROR, - VerificationNote::XML_VALIDATION_ERROR, - i.message(), - xml_file, - i.line() - ) - ); + for (auto i: error_handler.errors()) { + notes.push_back ({ + VerificationNote::Type::ERROR, + VerificationNote::Code::INVALID_XML, + i.message(), + boost::trim_copy(i.public_id() + " " + i.system_id()), + i.line() + }); } } -static Result -verify_asset (shared_ptr dcp, shared_ptr reel_mxf, function progress) + +enum class VerifyAssetResult { + GOOD, + CPL_PKL_DIFFER, + BAD +}; + + +static VerifyAssetResult +verify_asset (shared_ptr dcp, shared_ptr reel_file_asset, function progress) { - string const actual_hash = reel_mxf->asset_ref()->hash(progress); + auto const actual_hash = reel_file_asset->asset_ref()->hash(progress); - list > pkls = dcp->pkls(); + auto pkls = dcp->pkls(); /* We've read this DCP in so it must have at least one PKL */ DCP_ASSERT (!pkls.empty()); - shared_ptr asset = reel_mxf->asset_ref().asset(); + auto asset = reel_file_asset->asset_ref().asset(); optional pkl_hash; - BOOST_FOREACH (shared_ptr i, pkls) { - pkl_hash = i->hash (reel_mxf->asset_ref()->id()); + for (auto i: pkls) { + pkl_hash = i->hash (reel_file_asset->asset_ref()->id()); if (pkl_hash) { break; } @@ -317,20 +396,710 @@ verify_asset (shared_ptr dcp, shared_ptr reel_mxf, function cpl_hash = reel_mxf->hash(); + auto cpl_hash = reel_file_asset->hash(); if (cpl_hash && *cpl_hash != *pkl_hash) { - return RESULT_CPL_PKL_DIFFER; + return VerifyAssetResult::CPL_PKL_DIFFER; } if (actual_hash != *pkl_hash) { - return RESULT_BAD; + return VerifyAssetResult::BAD; + } + + return VerifyAssetResult::GOOD; +} + + +void +verify_language_tag (string tag, vector& notes) +{ + try { + LanguageTag test (tag); + } catch (LanguageTagError &) { + notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_LANGUAGE, tag}); + } +} + + +enum class VerifyPictureAssetResult +{ + GOOD, + FRAME_NEARLY_TOO_LARGE, + BAD, +}; + + +int +biggest_frame_size (shared_ptr frame) +{ + return frame->size (); +} + +int +biggest_frame_size (shared_ptr frame) +{ + return max(frame->left()->size(), frame->right()->size()); +} + + +template +optional +verify_picture_asset_type (shared_ptr reel_file_asset, function progress) +{ + auto asset = dynamic_pointer_cast(reel_file_asset->asset_ref().asset()); + if (!asset) { + return optional(); + } + + int biggest_frame = 0; + auto reader = asset->start_read (); + auto const duration = asset->intrinsic_duration (); + for (int64_t i = 0; i < duration; ++i) { + shared_ptr frame = reader->get_frame (i); + biggest_frame = max(biggest_frame, biggest_frame_size(frame)); + progress (float(i) / duration); + } + + static const int max_frame = rint(250 * 1000000 / (8 * asset->edit_rate().as_float())); + static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float())); + if (biggest_frame > max_frame) { + return VerifyPictureAssetResult::BAD; + } else if (biggest_frame > risky_frame) { + return VerifyPictureAssetResult::FRAME_NEARLY_TOO_LARGE; + } + + return VerifyPictureAssetResult::GOOD; +} + + +static VerifyPictureAssetResult +verify_picture_asset (shared_ptr reel_file_asset, function progress) +{ + auto r = verify_picture_asset_type(reel_file_asset, progress); + if (!r) { + r = verify_picture_asset_type(reel_file_asset, progress); + } + + DCP_ASSERT (r); + return *r; +} + + +static void +verify_main_picture_asset ( + shared_ptr dcp, + shared_ptr reel_asset, + function)> stage, + function progress, + vector& notes + ) +{ + auto asset = reel_asset->asset(); + auto const file = *asset->file(); + stage ("Checking picture asset hash", file); + auto const r = verify_asset (dcp, reel_asset, progress); + switch (r) { + case VerifyAssetResult::BAD: + notes.push_back ({ + VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_PICTURE_HASH, file + }); + break; + case VerifyAssetResult::CPL_PKL_DIFFER: + notes.push_back ({ + VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_PICTURE_HASHES, file + }); + break; + default: + break; + } + stage ("Checking picture frame sizes", asset->file()); + auto const pr = verify_picture_asset (reel_asset, progress); + switch (pr) { + case VerifyPictureAssetResult::BAD: + notes.push_back ({ + VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file + }); + break; + case VerifyPictureAssetResult::FRAME_NEARLY_TOO_LARGE: + notes.push_back ({ + VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file + }); + break; + default: + break; + } + + /* Only flat/scope allowed by Bv2.1 */ + if ( + asset->size() != Size(2048, 858) && + asset->size() != Size(1998, 1080) && + asset->size() != Size(4096, 1716) && + asset->size() != Size(3996, 2160)) { + notes.push_back({ + VerificationNote::Type::BV21_ERROR, + VerificationNote::Code::INVALID_PICTURE_SIZE_IN_PIXELS, + String::compose("%1x%2", asset->size().width, asset->size().height), + file + }); + } + + /* Only 24, 25, 48fps allowed for 2K */ + if ( + (asset->size() == Size(2048, 858) || asset->size() == Size(1998, 1080)) && + (asset->edit_rate() != Fraction(24, 1) && asset->edit_rate() != Fraction(25, 1) && asset->edit_rate() != Fraction(48, 1)) + ) { + notes.push_back({ + VerificationNote::Type::BV21_ERROR, + VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_2K, + String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator), + file + }); + } + + if (asset->size() == Size(4096, 1716) || asset->size() == Size(3996, 2160)) { + /* Only 24fps allowed for 4K */ + if (asset->edit_rate() != Fraction(24, 1)) { + notes.push_back({ + VerificationNote::Type::BV21_ERROR, + VerificationNote::Code::INVALID_PICTURE_FRAME_RATE_FOR_4K, + String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator), + file + }); + } + + /* Only 2D allowed for 4K */ + if (dynamic_pointer_cast(asset)) { + notes.push_back({ + VerificationNote::Type::BV21_ERROR, + VerificationNote::Code::INVALID_PICTURE_ASSET_RESOLUTION_FOR_3D, + String::compose("%1/%2", asset->edit_rate().numerator, asset->edit_rate().denominator), + file + }); + + } + } + +} + + +static void +verify_main_sound_asset ( + shared_ptr dcp, + shared_ptr reel_asset, + function)> stage, + function progress, + vector& notes + ) +{ + auto asset = reel_asset->asset(); + stage ("Checking sound asset hash", asset->file()); + auto const r = verify_asset (dcp, reel_asset, progress); + switch (r) { + case VerifyAssetResult::BAD: + notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_SOUND_HASH, *asset->file()}); + break; + case VerifyAssetResult::CPL_PKL_DIFFER: + notes.push_back ({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_SOUND_HASHES, *asset->file()}); + break; + default: + break; + } + + stage ("Checking sound asset metadata", asset->file()); + + verify_language_tag (asset->language(), notes); + if (asset->sampling_rate() != 48000) { + notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SOUND_FRAME_RATE, raw_convert(asset->sampling_rate()), *asset->file()}); + } +} + + +static void +verify_main_subtitle_reel (shared_ptr reel_asset, vector& notes) +{ + /* XXX: is Language compulsory? */ + if (reel_asset->language()) { + verify_language_tag (*reel_asset->language(), notes); + } + + if (!reel_asset->entry_point()) { + notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_ENTRY_POINT, reel_asset->id() }); + } else if (reel_asset->entry_point().get()) { + notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_SUBTITLE_ENTRY_POINT, reel_asset->id() }); + } +} + + +static void +verify_closed_caption_reel (shared_ptr reel_asset, vector& notes) +{ + /* XXX: is Language compulsory? */ + if (reel_asset->language()) { + verify_language_tag (*reel_asset->language(), notes); + } + + if (!reel_asset->entry_point()) { + notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() }); + } else if (reel_asset->entry_point().get()) { + notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INCORRECT_CLOSED_CAPTION_ENTRY_POINT, reel_asset->id() }); + } +} + + +struct State +{ + boost::optional subtitle_language; +}; + + +/** Verify stuff that is common to both subtitles and closed captions */ +void +verify_smpte_timed_text_asset ( + shared_ptr asset, + vector& notes + ) +{ + if (asset->language()) { + verify_language_tag (*asset->language(), notes); + } else { + notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE, *asset->file() }); + } + + auto const size = boost::filesystem::file_size(asset->file().get()); + if (size > 115 * 1024 * 1024) { + notes.push_back ( + { VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES, raw_convert(size), *asset->file() } + ); + } + + /* XXX: I'm not sure what Bv2.1_7.2.1 means when it says "the font resource shall not be larger than 10MB" + * but I'm hoping that checking for the total size of all fonts being <= 10MB will do. + */ + auto fonts = asset->font_data (); + int total_size = 0; + for (auto i: fonts) { + total_size += i.second.size(); + } + if (total_size > 10 * 1024 * 1024) { + notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_FONT_SIZE_IN_BYTES, raw_convert(total_size), asset->file().get() }); + } + + if (!asset->start_time()) { + notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_START_TIME, asset->file().get() }); + } else if (asset->start_time() != Time()) { + notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_SUBTITLE_START_TIME, asset->file().get() }); + } +} + + +/** Verify SMPTE subtitle-only stuff */ +void +verify_smpte_subtitle_asset ( + shared_ptr asset, + vector& notes, + State& state + ) +{ + if (asset->language()) { + if (!state.subtitle_language) { + state.subtitle_language = *asset->language(); + } else if (state.subtitle_language != *asset->language()) { + notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISMATCHED_SUBTITLE_LANGUAGES }); + } + } +} + + +/** Verify all subtitle stuff */ +static void +verify_subtitle_asset ( + shared_ptr asset, + function)> stage, + boost::filesystem::path xsd_dtd_directory, + vector& notes, + State& state + ) +{ + stage ("Checking subtitle XML", asset->file()); + /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk + * gets passed through libdcp which may clean up and therefore hide errors. + */ + validate_xml (asset->raw_xml(), xsd_dtd_directory, notes); + + auto smpte = dynamic_pointer_cast(asset); + if (smpte) { + verify_smpte_timed_text_asset (smpte, notes); + verify_smpte_subtitle_asset (smpte, notes, state); + } +} + + +/** Verify all closed caption stuff */ +static void +verify_closed_caption_asset ( + shared_ptr asset, + function)> stage, + boost::filesystem::path xsd_dtd_directory, + vector& notes + ) +{ + stage ("Checking closed caption XML", asset->file()); + /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk + * gets passed through libdcp which may clean up and therefore hide errors. + */ + validate_xml (asset->raw_xml(), xsd_dtd_directory, notes); + + auto smpte = dynamic_pointer_cast(asset); + if (smpte) { + verify_smpte_timed_text_asset (smpte, notes); + } + + if (asset->raw_xml().size() > 256 * 1024) { + notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_CLOSED_CAPTION_XML_SIZE_IN_BYTES, raw_convert(asset->raw_xml().size()), *asset->file()}); + } +} + + +static +void +verify_text_timing ( + vector> reels, + int edit_rate, + vector& notes, + std::function)> check, + std::function)> xml, + std::function)> duration + ) +{ + /* end of last subtitle (in editable units) */ + optional last_out; + auto too_short = false; + auto too_close = false; + auto too_early = false; + auto reel_overlap = false; + /* current reel start time (in editable units) */ + int64_t reel_offset = 0; + + std::function, optional