Report every frame (with index) that has a JPEG2000 codestream error (DoM #2698).
[libdcp.git] / src / verify.cc
index f570a41deda4474434199001da289f2b3df4b018..5901ffde638b510b877e20f75676cc903c4c5719 100644 (file)
@@ -41,6 +41,7 @@
 #include "cpl.h"
 #include "dcp.h"
 #include "exceptions.h"
+#include "filesystem.h"
 #include "interop_subtitle_asset.h"
 #include "mono_picture_asset.h"
 #include "mono_picture_frame.h"
@@ -386,7 +387,15 @@ enum class VerifyAssetResult {
 static VerifyAssetResult
 verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
 {
-       auto const actual_hash = reel_file_asset->asset_ref()->hash(progress);
+       /* When reading the DCP the hash will have been set to the one from the PKL/CPL.
+        * We want to calculate the hash of the actual file contents here, so that we
+        * can check it.  unset_hash() means that this calculation will happen on the
+        * call to hash().
+        */
+       reel_file_asset->asset_ref()->unset_hash();
+       auto const actual_hash = reel_file_asset->asset_ref()->hash([progress](int64_t done, int64_t total) {
+               progress(float(done) / total);
+       });
 
        auto pkls = dcp->pkls();
        /* We've read this DCP in so it must have at least one PKL */
@@ -429,7 +438,7 @@ verify_language_tag (string tag, vector<VerificationNote>& notes)
 
 
 static void
-verify_picture_asset (shared_ptr<const ReelFileAsset> reel_file_asset, boost::filesystem::path file, vector<VerificationNote>& notes, function<void (float)> progress)
+verify_picture_asset(shared_ptr<const ReelFileAsset> reel_file_asset, boost::filesystem::path file, int64_t start_frame, vector<VerificationNote>& notes, function<void (float)> progress)
 {
        int biggest_frame = 0;
        auto asset = dynamic_pointer_cast<PictureAsset>(reel_file_asset->asset_ref().asset());
@@ -450,7 +459,7 @@ verify_picture_asset (shared_ptr<const ReelFileAsset> reel_file_asset, boost::fi
                        biggest_frame = max(biggest_frame, frame->size());
                        if (!mono_asset->encrypted() || mono_asset->key()) {
                                vector<VerificationNote> j2k_notes;
-                               verify_j2k(frame, i, mono_asset->frame_rate().numerator, j2k_notes);
+                               verify_j2k(frame, start_frame, i, mono_asset->frame_rate().numerator, j2k_notes);
                                check_and_add (j2k_notes);
                        }
                        progress (float(i) / duration);
@@ -462,8 +471,8 @@ verify_picture_asset (shared_ptr<const ReelFileAsset> reel_file_asset, boost::fi
                        biggest_frame = max(biggest_frame, max(frame->left()->size(), frame->right()->size()));
                        if (!stereo_asset->encrypted() || stereo_asset->key()) {
                                vector<VerificationNote> j2k_notes;
-                               verify_j2k(frame->left(), i, stereo_asset->frame_rate().numerator, j2k_notes);
-                               verify_j2k(frame->right(), i, stereo_asset->frame_rate().numerator, j2k_notes);
+                               verify_j2k(frame->left(), start_frame, i, stereo_asset->frame_rate().numerator, j2k_notes);
+                               verify_j2k(frame->right(), start_frame, i, stereo_asset->frame_rate().numerator, j2k_notes);
                                check_and_add (j2k_notes);
                        }
                        progress (float(i) / duration);
@@ -489,6 +498,7 @@ static void
 verify_main_picture_asset (
        shared_ptr<const DCP> dcp,
        shared_ptr<const ReelPictureAsset> reel_asset,
+       int64_t start_frame,
        function<void (string, optional<boost::filesystem::path>)> stage,
        function<void (float)> progress,
        VerificationOptions options,
@@ -498,7 +508,7 @@ verify_main_picture_asset (
        auto asset = reel_asset->asset();
        auto const file = *asset->file();
 
-       if (options.check_asset_hashes && (!options.maximum_asset_size_for_hash_check || boost::filesystem::file_size(file) < *options.maximum_asset_size_for_hash_check)) {
+       if (options.check_asset_hashes && (!options.maximum_asset_size_for_hash_check || filesystem::file_size(file) < *options.maximum_asset_size_for_hash_check)) {
                stage ("Checking picture asset hash", file);
                auto const r = verify_asset (dcp, reel_asset, progress);
                switch (r) {
@@ -518,7 +528,7 @@ verify_main_picture_asset (
        }
 
        stage ("Checking picture frame sizes", asset->file());
-       verify_picture_asset (reel_asset, file, notes, progress);
+       verify_picture_asset(reel_asset, file, start_frame, notes, progress);
 
        /* Only flat/scope allowed by Bv2.1 */
        if (
@@ -594,7 +604,7 @@ verify_main_sound_asset (
        auto asset = reel_asset->asset();
        auto const file = *asset->file();
 
-       if (options.check_asset_hashes && (!options.maximum_asset_size_for_hash_check || boost::filesystem::file_size(file) < *options.maximum_asset_size_for_hash_check)) {
+       if (options.check_asset_hashes && (!options.maximum_asset_size_for_hash_check || filesystem::file_size(file) < *options.maximum_asset_size_for_hash_check)) {
                stage("Checking sound asset hash", file);
                auto const r = verify_asset (dcp, reel_asset, progress);
                switch (r) {
@@ -672,7 +682,7 @@ verify_smpte_timed_text_asset (
                notes.push_back ({ VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_SUBTITLE_LANGUAGE, *asset->file() });
        }
 
-       auto const size = boost::filesystem::file_size(asset->file().get());
+       auto const size = filesystem::file_size(asset->file().get());
        if (size > 115 * 1024 * 1024) {
                notes.push_back (
                        { VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_TIMED_TEXT_SIZE_IN_BYTES, raw_convert<string>(size), *asset->file() }
@@ -709,9 +719,9 @@ verify_smpte_timed_text_asset (
 }
 
 
-/** Verify Interop subtitle-only stuff */
+/** Verify Interop subtitle / CCAP stuff */
 void
-verify_interop_subtitle_asset(shared_ptr<const InteropSubtitleAsset> asset, vector<VerificationNote>& notes)
+verify_interop_text_asset(shared_ptr<const InteropSubtitleAsset> asset, vector<VerificationNote>& notes)
 {
        if (asset->subtitles().empty()) {
                notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_SUBTITLE, asset->id(), asset->file().get() });
@@ -800,7 +810,7 @@ verify_subtitle_asset (
 
        auto interop = dynamic_pointer_cast<const InteropSubtitleAsset>(asset);
        if (interop) {
-               verify_interop_subtitle_asset(interop, notes);
+               verify_interop_text_asset(interop, notes);
                if (namespace_count(asset, "DCSubtitle") > 1) {
                        notes.push_back({ VerificationNote::Type::WARNING, VerificationNote::Code::INCORRECT_SUBTITLE_NAMESPACE_COUNT, asset->id() });
                }
@@ -842,6 +852,11 @@ verify_closed_caption_asset (
                notes.push_back ({VerificationNote::Type::WARNING, VerificationNote::Code::MISSED_CHECK_OF_ENCRYPTED});
        }
 
+       auto interop = dynamic_pointer_cast<const InteropSubtitleAsset>(asset);
+       if (interop) {
+               verify_interop_text_asset(interop, notes);
+       }
+
        auto smpte = dynamic_pointer_cast<const SMPTESubtitleAsset>(asset);
        if (smpte) {
                verify_smpte_timed_text_asset (smpte, reel_asset_duration, notes);
@@ -853,12 +868,14 @@ verify_closed_caption_asset (
 static
 void
 verify_text_details (
+       dcp::Standard standard,
        vector<shared_ptr<Reel>> reels,
        int edit_rate,
        vector<VerificationNote>& notes,
        std::function<bool (shared_ptr<Reel>)> check,
        std::function<optional<string> (shared_ptr<Reel>)> xml,
-       std::function<int64_t (shared_ptr<Reel>)> duration
+       std::function<int64_t (shared_ptr<Reel>)> duration,
+       std::function<std::string (shared_ptr<Reel>)> id
        )
 {
        /* end of last subtitle (in editable units) */
@@ -870,11 +887,19 @@ verify_text_details (
        auto empty_text = false;
        /* current reel start time (in editable units) */
        int64_t reel_offset = 0;
-       vector<string> font_ids;
        optional<string> missing_load_font_id;
 
-       std::function<void (cxml::ConstNodePtr, optional<int>, optional<Time>, int, bool)> parse;
-       parse = [&parse, &last_out, &too_short, &too_close, &too_early, &empty_text, &reel_offset, &font_ids, &missing_load_font_id](cxml::ConstNodePtr node, optional<int> tcr, optional<Time> start_time, int er, bool first_reel) {
+       std::function<void (cxml::ConstNodePtr, optional<int>, optional<Time>, int, bool, bool&, vector<string>&)> parse;
+
+       parse = [&parse, &last_out, &too_short, &too_close, &too_early, &empty_text, &reel_offset, &missing_load_font_id](
+               cxml::ConstNodePtr node,
+               optional<int> tcr,
+               optional<Time> start_time,
+               int er,
+               bool first_reel,
+               bool& has_text,
+               vector<string>& font_ids
+               ) {
                if (node->name() == "Subtitle") {
                        Time in (node->string_attribute("TimeIn"), tcr);
                        if (start_time) {
@@ -914,9 +939,12 @@ verify_text_details (
                        if (!node_has_content(node)) {
                                empty_text = true;
                        }
+                       has_text = true;
                } else if (node->name() == "LoadFont") {
                        if (auto const id = node->optional_string_attribute("Id")) {
                                font_ids.push_back(*id);
+                       } else if (auto const id = node->optional_string_attribute("ID")) {
+                               font_ids.push_back(*id);
                        }
                } else if (node->name() == "Font") {
                        if (auto const font_id = node->optional_string_attribute("Id")) {
@@ -926,7 +954,7 @@ verify_text_details (
                        }
                }
                for (auto i: node->node_children()) {
-                       parse(i, tcr, start_time, er, first_reel);
+                       parse(i, tcr, start_time, er, first_reel, has_text, font_ids);
                }
        };
 
@@ -948,24 +976,32 @@ verify_text_details (
                shared_ptr<cxml::Document> doc;
                optional<int> tcr;
                optional<Time> start_time;
-               try {
+               switch (standard) {
+               case dcp::Standard::INTEROP:
+                       doc = make_shared<cxml::Document>("DCSubtitle");
+                       doc->read_string (*reel_xml);
+                       break;
+               case dcp::Standard::SMPTE:
                        doc = make_shared<cxml::Document>("SubtitleReel");
                        doc->read_string (*reel_xml);
                        tcr = doc->number_child<int>("TimeCodeRate");
-                       auto start_time_string = doc->optional_string_child("StartTime");
-                       if (start_time_string) {
+                       if (auto start_time_string = doc->optional_string_child("StartTime")) {
                                start_time = Time(*start_time_string, tcr);
                        }
-               } catch (...) {
-                       doc = make_shared<cxml::Document>("DCSubtitle");
-                       doc->read_string (*reel_xml);
+                       break;
                }
-               parse (doc, tcr, start_time, edit_rate, i == 0);
+               bool has_text = false;
+               vector<string> font_ids;
+               parse(doc, tcr, start_time, edit_rate, i == 0, has_text, font_ids);
                auto end = reel_offset + duration(reels[i]);
                if (last_out && *last_out > end) {
                        reel_overlap = true;
                }
                reel_offset = end;
+
+               if (standard == dcp::Standard::SMPTE && has_text && font_ids.empty()) {
+                       notes.push_back(dcp::VerificationNote(dcp::VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_LOAD_FONT).set_id(id(reels[i])));
+               }
        }
 
        if (last_out && *last_out > reel_offset) {
@@ -1003,7 +1039,7 @@ verify_text_details (
        }
 
        if (missing_load_font_id) {
-               notes.push_back(dcp::VerificationNote(VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_LOAD_FONT).set_id(*missing_load_font_id));
+               notes.push_back(dcp::VerificationNote(VerificationNote::Type::ERROR, VerificationNote::Code::MISSING_LOAD_FONT_FOR_FONT).set_id(*missing_load_font_id));
        }
 }
 
@@ -1216,34 +1252,31 @@ verify_text_lines_and_characters (
 
 static
 void
-verify_text_details (vector<shared_ptr<Reel>> reels, vector<VerificationNote>& notes)
+verify_text_details(dcp::Standard standard, vector<shared_ptr<Reel>> reels, vector<VerificationNote>& notes)
 {
        if (reels.empty()) {
                return;
        }
 
        if (reels[0]->main_subtitle()) {
-               verify_text_details (reels, reels[0]->main_subtitle()->edit_rate().numerator, notes,
+               verify_text_details(standard, reels, reels[0]->main_subtitle()->edit_rate().numerator, notes,
                        [](shared_ptr<Reel> reel) {
                                return static_cast<bool>(reel->main_subtitle());
                        },
                        [](shared_ptr<Reel> reel) {
-                               auto interop = dynamic_pointer_cast<ReelInteropSubtitleAsset>(reel->main_subtitle());
-                               if (interop) {
-                                       return interop->asset()->raw_xml();
-                               }
-                               auto smpte = dynamic_pointer_cast<ReelSMPTESubtitleAsset>(reel->main_subtitle());
-                               DCP_ASSERT (smpte);
-                               return smpte->asset()->raw_xml();
+                               return reel->main_subtitle()->asset()->raw_xml();
                        },
                        [](shared_ptr<Reel> reel) {
                                return reel->main_subtitle()->actual_duration();
+                       },
+                       [](shared_ptr<Reel> reel) {
+                               return reel->main_subtitle()->id();
                        }
                );
        }
 
        for (auto i = 0U; i < reels[0]->closed_captions().size(); ++i) {
-               verify_text_details (reels, reels[0]->closed_captions()[i]->edit_rate().numerator, notes,
+               verify_text_details(standard, reels, reels[0]->closed_captions()[i]->edit_rate().numerator, notes,
                        [i](shared_ptr<Reel> reel) {
                                return i < reel->closed_captions().size();
                        },
@@ -1252,6 +1285,9 @@ verify_text_details (vector<shared_ptr<Reel>> reels, vector<VerificationNote>& n
                        },
                        [i](shared_ptr<Reel> reel) {
                                return reel->closed_captions()[i]->actual_duration();
+                       },
+                       [i](shared_ptr<Reel> reel) {
+                               return reel->closed_captions()[i]->id();
                        }
                );
        }
@@ -1265,7 +1301,7 @@ verify_extension_metadata(shared_ptr<const CPL> cpl, vector<VerificationNote>& n
 {
        DCP_ASSERT (cpl->file());
        cxml::Document doc ("CompositionPlaylist");
-       doc.read_file (cpl->file().get());
+       doc.read_file(dcp::filesystem::fix_long_path(cpl->file().get()));
 
        auto missing = false;
        string malformed;
@@ -1349,6 +1385,7 @@ verify_reel(
        shared_ptr<const DCP> dcp,
        shared_ptr<const CPL> cpl,
        shared_ptr<const Reel> reel,
+       int64_t start_frame,
        optional<dcp::Size> main_picture_active_area,
        function<void (string, optional<boost::filesystem::path>)> stage,
        boost::filesystem::path xsd_dtd_directory,
@@ -1407,7 +1444,7 @@ verify_reel(
                }
                /* Check asset */
                if (reel->main_picture()->asset_ref().resolved()) {
-                       verify_main_picture_asset(dcp, reel->main_picture(), stage, progress, options, notes);
+                       verify_main_picture_asset(dcp, reel->main_picture(), start_frame, stage, progress, options, notes);
                        auto const asset_size = reel->main_picture()->asset()->size();
                        if (main_picture_active_area) {
                                if (main_picture_active_area->width > asset_size.width) {
@@ -1428,6 +1465,7 @@ verify_reel(
                                }
                        }
                }
+
        }
 
        if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
@@ -1520,6 +1558,15 @@ verify_cpl(
                }
        }
 
+       for (auto version: cpl->content_versions()) {
+               if (version.label_text.empty()) {
+                       notes.push_back(
+                               dcp::VerificationNote(VerificationNote::Type::WARNING, VerificationNote::Code::EMPTY_CONTENT_VERSION_LABEL_TEXT, cpl->file().get()).set_id(cpl->id())
+                               );
+                       break;
+               }
+       }
+
        if (dcp->standard() == Standard::SMPTE) {
                if (!cpl->annotation_text()) {
                        notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::MISSING_CPL_ANNOTATION_TEXT, cpl->id(), cpl->file().get()});
@@ -1585,12 +1632,14 @@ verify_cpl(
                        });
        }
 
+       int64_t frame = 0;
        for (auto reel: cpl->reels()) {
                stage("Checking reel", optional<boost::filesystem::path>());
                verify_reel(
                        dcp,
                        cpl,
                        reel,
+                       frame,
                        main_picture_active_area,
                        stage,
                        xsd_dtd_directory,
@@ -1604,9 +1653,10 @@ verify_cpl(
                        &fewest_closed_captions,
                        &markers_seen
                        );
+               frame += reel->duration();
        }
 
-       verify_text_details(cpl->reels(), notes);
+       verify_text_details(dcp->standard().get_value_or(dcp::Standard::SMPTE), cpl->reels(), notes);
 
        if (dcp->standard() == Standard::SMPTE) {
                if (auto msc = cpl->main_sound_configuration()) {
@@ -1697,7 +1747,7 @@ verify_cpl(
                if (cpl->any_encrypted()) {
                        cxml::Document doc("CompositionPlaylist");
                        DCP_ASSERT(cpl->file());
-                       doc.read_file(cpl->file().get());
+                       doc.read_file(dcp::filesystem::fix_long_path(cpl->file().get()));
                        if (!doc.optional_node_child("Signature")) {
                                notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_CPL_WITH_ENCRYPTED_CONTENT, cpl->id(), cpl->file().get()});
                        }
@@ -1719,7 +1769,7 @@ verify_pkl(
 
        if (pkl_has_encrypted_assets(dcp, pkl)) {
                cxml::Document doc("PackingList");
-               doc.read_file(pkl->file().get());
+               doc.read_file(dcp::filesystem::fix_long_path(pkl->file().get()));
                if (!doc.optional_node_child("Signature")) {
                        notes.push_back({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::UNSIGNED_PKL_WITH_ENCRYPTED_CONTENT, pkl->id(), pkl->file().get()});
                }
@@ -1762,6 +1812,7 @@ verify_assetmap(
 vector<VerificationNote>
 dcp::verify (
        vector<boost::filesystem::path> directories,
+       vector<dcp::DecryptedKDM> kdms,
        function<void (string, optional<boost::filesystem::path>)> stage,
        function<void (float)> progress,
        VerificationOptions options,
@@ -1771,7 +1822,7 @@ dcp::verify (
        if (!xsd_dtd_directory) {
                xsd_dtd_directory = resources_directory() / "xsd";
        }
-       *xsd_dtd_directory = boost::filesystem::canonical (*xsd_dtd_directory);
+       *xsd_dtd_directory = filesystem::canonical(*xsd_dtd_directory);
 
        vector<VerificationNote> notes;
        State state{};
@@ -1809,6 +1860,10 @@ dcp::verify (
                        notes.push_back ({VerificationNote::Type::BV21_ERROR, VerificationNote::Code::INVALID_STANDARD});
                }
 
+               for (auto kdm: kdms) {
+                       dcp->add(kdm);
+               }
+
                for (auto cpl: dcp->cpls()) {
                        verify_cpl(
                                dcp,
@@ -1984,7 +2039,12 @@ dcp::note_to_string (VerificationNote note)
        case VerificationNote::Code::PARTIALLY_ENCRYPTED:
                return "Some assets are encrypted but some are not.";
        case VerificationNote::Code::INVALID_JPEG2000_CODESTREAM:
-               return String::compose("The JPEG2000 codestream for at least one frame is invalid (%1).", note.note().get());
+               return String::compose(
+                       "Frame %1 (timecode %2) has an invalid JPEG2000 codestream (%2).",
+                       note.frame().get(),
+                       dcp::Time(note.frame().get(), note.frame_rate().get(), note.frame_rate().get()).as_string(dcp::Standard::SMPTE),
+                       note.note().get()
+                       );
        case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_2K:
                return String::compose("The JPEG2000 codestream uses %1 guard bits in a 2K image instead of 1.", note.note().get());
        case VerificationNote::Code::INVALID_JPEG2000_GUARD_BITS_FOR_4K:
@@ -2057,8 +2117,14 @@ dcp::note_to_string (VerificationNote note)
                        );
        case VerificationNote::Code::INCORRECT_SUBTITLE_NAMESPACE_COUNT:
                return String::compose("The XML in the subtitle asset %1 has more than one namespace declaration.", note.note().get());
-       case VerificationNote::Code::MISSING_LOAD_FONT:
+       case VerificationNote::Code::MISSING_LOAD_FONT_FOR_FONT:
                return String::compose("A subtitle or closed caption refers to a font with ID %1 that does not have a corresponding <LoadFont> node", note.id().get());
+       case VerificationNote::Code::MISSING_LOAD_FONT:
+               return String::compose("The SMPTE subtitle asset %1 has <Text> nodes but no <LoadFont> node", note.id().get());
+       case VerificationNote::Code::MISMATCHED_ASSET_MAP_ID:
+               return String::compose("The asset with ID %1 in the asset map actually has an id of %2", note.id().get(), note.other_id().get());
+       case VerificationNote::Code::EMPTY_CONTENT_VERSION_LABEL_TEXT:
+               return String::compose("The <LabelText> in a <ContentVersion> in CPL %1 is empty", note.id().get());
        }
 
        return "";
@@ -2068,7 +2134,17 @@ dcp::note_to_string (VerificationNote note)
 bool
 dcp::operator== (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
 {
-       return a.type() == b.type() && a.code() == b.code() && a.note() == b.note() && a.file() == b.file() && a.line() == b.line();
+       return a.type() == b.type() &&
+               a.code() == b.code() &&
+               a.note() == b.note() &&
+               a.file() == b.file() &&
+               a.line() == b.line() &&
+               a.frame() == b.frame() &&
+               a.component() == b.component() &&
+               a.size() == b.size() &&
+               a.id() == b.id() &&
+               a.other_id() == b.other_id() &&
+               a.frame_rate() == b.frame_rate();
 }
 
 
@@ -2091,7 +2167,27 @@ dcp::operator< (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
                return a.file().get_value_or("") < b.file().get_value_or("");
        }
 
-       return a.line().get_value_or(0) < b.line().get_value_or(0);
+       if (a.line() != b.line()) {
+               return a.line().get_value_or(0) < b.line().get_value_or(0);
+       }
+
+       if (a.frame() != b.frame()) {
+               return a.frame().get_value_or(0) < b.frame().get_value_or(0);
+       }
+
+       if (a.component() != b.component()) {
+               return a.component().get_value_or(0) < b.component().get_value_or(0);
+       }
+
+       if (a.size() != b.size()) {
+               return a.size().get_value_or(0) < b.size().get_value_or(0);
+       }
+
+       if (a.id() != b.id()) {
+               return a.id().get_value_or("") < b.id().get_value_or("");
+       }
+
+       return a.other_id().get_value_or("") < b.other_id().get_value_or("");
 }