Speculative fix for assertion failure (DoM #2839).
[libdcp.git] / src / verify.cc
index 5901ffde638b510b877e20f75676cc903c4c5719..ec8925f2ffba6a5a6c2713d0f7d6a4dd8cb39a16 100644 (file)
@@ -81,6 +81,7 @@
 #include <boost/algorithm/string.hpp>
 #include <iostream>
 #include <map>
+#include <numeric>
 #include <regex>
 #include <set>
 #include <vector>
@@ -385,15 +386,24 @@ enum class VerifyAssetResult {
 
 
 static VerifyAssetResult
-verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelFileAsset> reel_file_asset, function<void (float)> progress)
+verify_asset(
+       shared_ptr<const DCP> dcp,
+       shared_ptr<const ReelFileAsset> reel_file_asset,
+       function<void (float)> progress,
+       string* reference_hash,
+       string* calculated_hash
+       )
 {
+       DCP_ASSERT(reference_hash);
+       DCP_ASSERT(calculated_hash);
+
        /* When reading the DCP the hash will have been set to the one from the PKL/CPL.
         * We want to calculate the hash of the actual file contents here, so that we
         * can check it.  unset_hash() means that this calculation will happen on the
         * call to hash().
         */
        reel_file_asset->asset_ref()->unset_hash();
-       auto const actual_hash = reel_file_asset->asset_ref()->hash([progress](int64_t done, int64_t total) {
+       *calculated_hash = reel_file_asset->asset_ref()->hash([progress](int64_t done, int64_t total) {
                progress(float(done) / total);
        });
 
@@ -403,22 +413,23 @@ verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelFileAsset> reel_fi
 
        auto asset = reel_file_asset->asset_ref().asset();
 
-       optional<string> pkl_hash;
+       optional<string> maybe_pkl_hash;
        for (auto i: pkls) {
-               pkl_hash = i->hash (reel_file_asset->asset_ref()->id());
-               if (pkl_hash) {
+               maybe_pkl_hash = i->hash (reel_file_asset->asset_ref()->id());
+               if (maybe_pkl_hash) {
                        break;
                }
        }
 
-       DCP_ASSERT (pkl_hash);
+       DCP_ASSERT(maybe_pkl_hash);
+       *reference_hash = *maybe_pkl_hash;
 
        auto cpl_hash = reel_file_asset->hash();
-       if (cpl_hash && *cpl_hash != *pkl_hash) {
+       if (cpl_hash && *cpl_hash != *reference_hash) {
                return VerifyAssetResult::CPL_PKL_DIFFER;
        }
 
-       if (actual_hash != *pkl_hash) {
+       if (*calculated_hash != *reference_hash) {
                return VerifyAssetResult::BAD;
        }
 
@@ -440,7 +451,6 @@ verify_language_tag (string tag, vector<VerificationNote>& notes)
 static void
 verify_picture_asset(shared_ptr<const ReelFileAsset> reel_file_asset, boost::filesystem::path file, int64_t start_frame, vector<VerificationNote>& notes, function<void (float)> progress)
 {
-       int biggest_frame = 0;
        auto asset = dynamic_pointer_cast<PictureAsset>(reel_file_asset->asset_ref().asset());
        auto const duration = asset->intrinsic_duration ();
 
@@ -452,11 +462,30 @@ verify_picture_asset(shared_ptr<const ReelFileAsset> reel_file_asset, boost::fil
                }
        };
 
+       int const max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
+       int const risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
+
+       auto check_frame_size = [max_frame, risky_frame, file, start_frame](int index, int size, int frame_rate, vector<VerificationNote>& notes) {
+               if (size > max_frame) {
+                       notes.push_back(
+                               VerificationNote(
+                                       VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
+                                       ).set_frame(start_frame + index).set_frame_rate(frame_rate)
+                       );
+               } else if (size > risky_frame) {
+                       notes.push_back(
+                               VerificationNote(
+                                       VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
+                                       ).set_frame(start_frame + index).set_frame_rate(frame_rate)
+                       );
+               }
+       };
+
        if (auto mono_asset = dynamic_pointer_cast<MonoPictureAsset>(reel_file_asset->asset_ref().asset())) {
                auto reader = mono_asset->start_read ();
                for (int64_t i = 0; i < duration; ++i) {
                        auto frame = reader->get_frame (i);
-                       biggest_frame = max(biggest_frame, frame->size());
+                       check_frame_size(i, frame->size(), mono_asset->frame_rate().numerator, notes);
                        if (!mono_asset->encrypted() || mono_asset->key()) {
                                vector<VerificationNote> j2k_notes;
                                verify_j2k(frame, start_frame, i, mono_asset->frame_rate().numerator, j2k_notes);
@@ -468,7 +497,8 @@ verify_picture_asset(shared_ptr<const ReelFileAsset> reel_file_asset, boost::fil
                auto reader = stereo_asset->start_read ();
                for (int64_t i = 0; i < duration; ++i) {
                        auto frame = reader->get_frame (i);
-                       biggest_frame = max(biggest_frame, max(frame->left()->size(), frame->right()->size()));
+                       check_frame_size(i, frame->left()->size(), stereo_asset->frame_rate().numerator, notes);
+                       check_frame_size(i, frame->right()->size(), stereo_asset->frame_rate().numerator, notes);
                        if (!stereo_asset->encrypted() || stereo_asset->key()) {
                                vector<VerificationNote> j2k_notes;
                                verify_j2k(frame->left(), start_frame, i, stereo_asset->frame_rate().numerator, j2k_notes);
@@ -479,18 +509,6 @@ verify_picture_asset(shared_ptr<const ReelFileAsset> reel_file_asset, boost::fil
                }
 
        }
-
-       static const int max_frame =   rint(250 * 1000000 / (8 * asset->edit_rate().as_float()));
-       static const int risky_frame = rint(230 * 1000000 / (8 * asset->edit_rate().as_float()));
-       if (biggest_frame > max_frame) {
-               notes.push_back ({
-                       VerificationNote::Type::ERROR, VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
-               });
-       } else if (biggest_frame > risky_frame) {
-               notes.push_back ({
-                       VerificationNote::Type::WARNING, VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES, file
-               });
-       }
 }
 
 
@@ -510,12 +528,18 @@ verify_main_picture_asset (
 
        if (options.check_asset_hashes && (!options.maximum_asset_size_for_hash_check || filesystem::file_size(file) < *options.maximum_asset_size_for_hash_check)) {
                stage ("Checking picture asset hash", file);
-               auto const r = verify_asset (dcp, reel_asset, progress);
+               string reference_hash;
+               string calculated_hash;
+               auto const r = verify_asset(dcp, reel_asset, progress, &reference_hash, &calculated_hash);
                switch (r) {
                        case VerifyAssetResult::BAD:
-                               notes.push_back ({
-                                       VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_PICTURE_HASH, file
-                               });
+                               notes.push_back(
+                                       dcp::VerificationNote(
+                                               VerificationNote::Type::ERROR,
+                                               VerificationNote::Code::INCORRECT_PICTURE_HASH,
+                                               file
+                                               ).set_reference_hash(reference_hash).set_calculated_hash(calculated_hash)
+                                       );
                                break;
                        case VerifyAssetResult::CPL_PKL_DIFFER:
                                notes.push_back ({
@@ -606,10 +630,18 @@ verify_main_sound_asset (
 
        if (options.check_asset_hashes && (!options.maximum_asset_size_for_hash_check || filesystem::file_size(file) < *options.maximum_asset_size_for_hash_check)) {
                stage("Checking sound asset hash", file);
-               auto const r = verify_asset (dcp, reel_asset, progress);
+               string reference_hash;
+               string calculated_hash;
+               auto const r = verify_asset(dcp, reel_asset, progress, &reference_hash, &calculated_hash);
                switch (r) {
                        case VerifyAssetResult::BAD:
-                               notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::INCORRECT_SOUND_HASH, file});
+                               notes.push_back(
+                                       dcp::VerificationNote(
+                                               VerificationNote::Type::ERROR,
+                                               VerificationNote::Code::INCORRECT_SOUND_HASH,
+                                               file
+                                               ).set_reference_hash(reference_hash).set_calculated_hash(calculated_hash)
+                                       );
                                break;
                        case VerifyAssetResult::CPL_PKL_DIFFER:
                                notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_SOUND_HASHES, file});
@@ -1151,18 +1183,9 @@ verify_closed_caption_details (
 }
 
 
-struct LinesCharactersResult
-{
-       bool warning_length_exceeded = false;
-       bool error_length_exceeded = false;
-       bool line_count_exceeded = false;
-};
-
-
-static
 void
-verify_text_lines_and_characters (
-       shared_ptr<SubtitleAsset> asset,
+dcp::verify_text_lines_and_characters(
+       shared_ptr<const SubtitleAsset> asset,
        int warning_length,
        int error_length,
        LinesCharactersResult* result
@@ -1183,8 +1206,8 @@ verify_text_lines_and_characters (
                {}
 
                Time time;
-               int position; //< position from 0 at top of screen to 100 at bottom
-               int characters;
+               int position = 0;   ///< vertical position from 0 at top of screen to 100 at bottom
+               int characters = 0; ///< number of characters in the text of this event
                shared_ptr<Event> start;
        };
 
@@ -1203,6 +1226,7 @@ verify_text_lines_and_characters (
                return 0L;
        };
 
+       /* Make a list of "subtitle starts" and "subtitle ends" events */
        for (auto j: asset->subtitles()) {
                auto text = dynamic_pointer_cast<const SubtitleString>(j);
                if (text) {
@@ -1212,20 +1236,25 @@ verify_text_lines_and_characters (
                }
        }
 
-       std::sort(events.begin(), events.end(), [](shared_ptr<Event> const& a, shared_ptr<Event>const& b) {
+       std::sort(events.begin(), events.end(), [](shared_ptr<Event> const& a, shared_ptr<Event> const& b) {
                return a->time < b->time;
        });
 
-       map<int, int> current;
+       /* The number of characters currently displayed at different vertical positions, i.e. on
+        * what we consider different lines.  Key is the vertical position (0 to 100) and the value
+        * is a list of the active subtitles in that position.
+        */
+       map<int, vector<shared_ptr<Event>>> current;
        for (auto i: events) {
                if (current.size() > 3) {
                        result->line_count_exceeded = true;
                }
                for (auto j: current) {
-                       if (j.second > warning_length) {
+                       int length = std::accumulate(j.second.begin(), j.second.end(), 0, [](int total, shared_ptr<const Event> event) { return total + event->characters; });
+                       if (length > warning_length) {
                                result->warning_length_exceeded = true;
                        }
-                       if (j.second > error_length) {
+                       if (length > error_length) {
                                result->error_length_exceeded = true;
                        }
                }
@@ -1233,17 +1262,20 @@ verify_text_lines_and_characters (
                if (i->start) {
                        /* end of a subtitle */
                        DCP_ASSERT (current.find(i->start->position) != current.end());
-                       if (current[i->start->position] == i->start->characters) {
+                       auto current_position = current[i->start->position];
+                       auto iter = std::find(current_position.begin(), current_position.end(), i->start);
+                       if (iter != current_position.end()) {
+                               current_position.erase(iter);
+                       }
+                       if (current_position.empty()) {
                                current.erase(i->start->position);
-                       } else {
-                               current[i->start->position] -= i->start->characters;
                        }
                } else {
                        /* start of a subtitle */
                        if (current.find(i->position) == current.end()) {
-                               current[i->position] = i->characters;
+                               current[i->position] = vector<shared_ptr<Event>>{i};
                        } else {
-                               current[i->position] += i->characters;
+                               current[i->position].push_back(i);
                        }
                }
        }
@@ -1258,7 +1290,7 @@ verify_text_details(dcp::Standard standard, vector<shared_ptr<Reel>> reels, vect
                return;
        }
 
-       if (reels[0]->main_subtitle()) {
+       if (reels[0]->main_subtitle() && reels[0]->main_subtitle()->asset_ref().resolved()) {
                verify_text_details(standard, reels, reels[0]->main_subtitle()->edit_rate().numerator, notes,
                        [](shared_ptr<Reel> reel) {
                                return static_cast<bool>(reel->main_subtitle());
@@ -1578,8 +1610,16 @@ verify_cpl(
        for (auto i: dcp->pkls()) {
                /* Check that the CPL's hash corresponds to the PKL */
                optional<string> h = i->hash(cpl->id());
-               if (h && make_digest(ArrayData(*cpl->file())) != *h) {
-                       notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::MISMATCHED_CPL_HASHES, cpl->id(), cpl->file().get()});
+               auto calculated_cpl_hash = make_digest(ArrayData(*cpl->file()));
+               if (h && calculated_cpl_hash != *h) {
+                       notes.push_back(
+                               dcp::VerificationNote(
+                                       VerificationNote::Type::ERROR,
+                                       VerificationNote::Code::MISMATCHED_CPL_HASHES,
+                                       cpl->id(),
+                                       cpl->file().get()
+                                       ).set_calculated_hash(calculated_cpl_hash).set_reference_hash(*h)
+                               );
                }
 
                /* Check that any PKL with a single CPL has its AnnotationText the same as the CPL's ContentTitleText */
@@ -1706,7 +1746,7 @@ verify_cpl(
 
                LinesCharactersResult result;
                for (auto reel: cpl->reels()) {
-                       if (reel->main_subtitle() && reel->main_subtitle()->asset()) {
+                       if (reel->main_subtitle() && reel->main_subtitle()->asset_ref().resolved()) {
                                verify_text_lines_and_characters(reel->main_subtitle()->asset(), 52, 79, &result);
                        }
                }
@@ -1865,16 +1905,20 @@ dcp::verify (
                }
 
                for (auto cpl: dcp->cpls()) {
-                       verify_cpl(
-                               dcp,
-                               cpl,
-                               stage,
-                               *xsd_dtd_directory,
-                               progress,
-                               options,
-                               notes,
-                               state
-                               );
+                       try {
+                               verify_cpl(
+                                       dcp,
+                                       cpl,
+                                       stage,
+                                       *xsd_dtd_directory,
+                                       progress,
+                                       options,
+                                       notes,
+                                       state
+                                       );
+                       } catch (ReadError& e) {
+                               notes.push_back({VerificationNote::Type::ERROR, VerificationNote::Code::FAILED_READ, string(e.what())});
+                       }
                }
 
                for (auto pkl: dcp->pkls()) {
@@ -1911,15 +1955,15 @@ dcp::note_to_string (VerificationNote note)
        case VerificationNote::Code::FAILED_READ:
                return *note.note();
        case VerificationNote::Code::MISMATCHED_CPL_HASHES:
-               return String::compose("The hash of the CPL %1 in the PKL does not agree with the CPL file.", note.note().get());
+               return String::compose("The hash (%1) of the CPL (%2) in the PKL does not agree with the CPL file (%3).", note.reference_hash().get(), note.note().get(), note.calculated_hash().get());
        case VerificationNote::Code::INVALID_PICTURE_FRAME_RATE:
                return String::compose("The picture in a reel has an invalid frame rate %1.", note.note().get());
        case VerificationNote::Code::INCORRECT_PICTURE_HASH:
-               return String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
+               return String::compose("The hash (%1) of the picture asset %2 does not agree with the PKL file (%3).", note.calculated_hash().get(), note.file()->filename(), note.reference_hash().get());
        case VerificationNote::Code::MISMATCHED_PICTURE_HASHES:
                return String::compose("The PKL and CPL hashes differ for the picture asset %1.", note.file()->filename());
        case VerificationNote::Code::INCORRECT_SOUND_HASH:
-               return String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
+               return String::compose("The hash (%1) of the sound asset %2 does not agree with the PKL file (%3).", note.calculated_hash().get(), note.file()->filename(), note.reference_hash().get());
        case VerificationNote::Code::MISMATCHED_SOUND_HASHES:
                return String::compose("The PKL and CPL hashes differ for the sound asset %1.", note.file()->filename());
        case VerificationNote::Code::EMPTY_ASSET_PATH:
@@ -1937,9 +1981,19 @@ dcp::note_to_string (VerificationNote note)
        case VerificationNote::Code::INVALID_DURATION:
                return String::compose("The duration of the asset %1 is less than 1 second.", note.note().get());
        case VerificationNote::Code::INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
-               return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
+               return String::compose(
+                       "Frame %1 (timecode %2) in asset %3 has an instantaneous bit rate that is larger than the limit of 250Mbit/s.",
+                       note.frame().get(),
+                       dcp::Time(note.frame().get(), note.frame_rate().get(), note.frame_rate().get()).as_string(dcp::Standard::SMPTE),
+                       note.file()->filename()
+                       );
        case VerificationNote::Code::NEARLY_INVALID_PICTURE_FRAME_SIZE_IN_BYTES:
-               return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
+               return String::compose(
+                       "Frame %1 (timecode %2) in asset %3 has an instantaneous bit rate that is close to the limit of 250Mbit/s.",
+                       note.frame().get(),
+                       dcp::Time(note.frame().get(), note.frame_rate().get(), note.frame_rate().get()).as_string(dcp::Standard::SMPTE),
+                       note.file()->filename()
+                       );
        case VerificationNote::Code::EXTERNAL_ASSET:
                return String::compose("The asset %1 that this DCP refers to is not included in the DCP.  It may be a VF.", note.note().get());
        case VerificationNote::Code::THREED_ASSET_MARKED_AS_TWOD:
@@ -2040,7 +2094,7 @@ dcp::note_to_string (VerificationNote note)
                return "Some assets are encrypted but some are not.";
        case VerificationNote::Code::INVALID_JPEG2000_CODESTREAM:
                return String::compose(
-                       "Frame %1 (timecode %2) has an invalid JPEG2000 codestream (%2).",
+                       "Frame %1 (timecode %2) has an invalid JPEG2000 codestream (%3).",
                        note.frame().get(),
                        dcp::Time(note.frame().get(), note.frame_rate().get(), note.frame_rate().get()).as_string(dcp::Standard::SMPTE),
                        note.note().get()
@@ -2125,6 +2179,10 @@ dcp::note_to_string (VerificationNote note)
                return String::compose("The asset with ID %1 in the asset map actually has an id of %2", note.id().get(), note.other_id().get());
        case VerificationNote::Code::EMPTY_CONTENT_VERSION_LABEL_TEXT:
                return String::compose("The <LabelText> in a <ContentVersion> in CPL %1 is empty", note.id().get());
+       case VerificationNote::Code::INVALID_CPL_NAMESPACE:
+               return String::compose("The namespace %1 in CPL %2 is invalid", note.note().get(), note.file()->filename());
+       case VerificationNote::Code::MISSING_CPL_CONTENT_VERSION:
+               return String::compose("The CPL %1 has no <ContentVersion> tag", note.note().get());
        }
 
        return "";
@@ -2144,7 +2202,9 @@ dcp::operator== (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
                a.size() == b.size() &&
                a.id() == b.id() &&
                a.other_id() == b.other_id() &&
-               a.frame_rate() == b.frame_rate();
+               a.frame_rate() == b.frame_rate() &&
+               a.reference_hash() == b.reference_hash() &&
+               a.calculated_hash() == b.calculated_hash();
 }
 
 
@@ -2187,7 +2247,11 @@ dcp::operator< (dcp::VerificationNote const& a, dcp::VerificationNote const& b)
                return a.id().get_value_or("") < b.id().get_value_or("");
        }
 
-       return a.other_id().get_value_or("") < b.other_id().get_value_or("");
+       if (a.other_id() != b.other_id()) {
+               return a.other_id().get_value_or("") < b.other_id().get_value_or("");
+       }
+
+       return a.frame_rate().get_value_or(0) != b.frame_rate().get_value_or(0);
 }