Bv2.1 6.2.1: Check that subtitle XML <Language> conforms to RFC 5646.
[libdcp.git] / src / verify.cc
index 9a0895f6f8e79872c802aa3f552d8b0973baf0b3..89d84eef97c46fe82d503ce0bf74f75f730fa272 100644 (file)
@@ -46,6 +46,7 @@
 #include "exceptions.h"
 #include "compose.hpp"
 #include "raw_convert.h"
+#include "smpte_subtitle_asset.h"
 #include <xercesc/util/PlatformUtils.hpp>
 #include <xercesc/parsers/XercesDOMParser.hpp>
 #include <xercesc/parsers/AbstractDOMParser.hpp>
@@ -78,10 +79,10 @@ using std::string;
 using std::cout;
 using std::map;
 using std::max;
-using boost::shared_ptr;
+using std::shared_ptr;
 using boost::optional;
 using boost::function;
-using boost::dynamic_pointer_cast;
+using std::dynamic_pointer_cast;
 
 using namespace dcp;
 using namespace xercesc;
@@ -103,6 +104,8 @@ public:
                : _message (xml_ch_to_string(e.getMessage()))
                , _line (e.getLineNumber())
                , _column (e.getColumnNumber())
+               , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
+               , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
        {
 
        }
@@ -119,10 +122,20 @@ public:
                return _column;
        }
 
+       string public_id () const {
+               return _public_id;
+       }
+
+       string system_id () const {
+               return _system_id;
+       }
+
 private:
        string _message;
        uint64_t _line;
        uint64_t _column;
+       string _public_id;
+       string _system_id;
 };
 
 
@@ -196,6 +209,9 @@ public:
        LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
                : _xsd_dtd_directory (xsd_dtd_directory)
        {
+               /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
+                * found without being here.
+                */
                add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
                add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
                add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
@@ -206,10 +222,16 @@ public:
                add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
                add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
                add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd");
+               add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
+               add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
+               add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
        }
 
        InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
        {
+               if (!system_id) {
+                       return 0;
+               }
                string system_id_str = xml_ch_to_string (system_id);
                boost::filesystem::path p = _xsd_dtd_directory;
                if (_files.find(system_id_str) == _files.end()) {
@@ -267,6 +289,7 @@ validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, list<Verificatio
                parser.setDoSchema(true);
 
                vector<string> schema;
+               schema.push_back("xml.xsd");
                schema.push_back("xmldsig-core-schema.xsd");
                schema.push_back("SMPTE-429-7-2006-CPL.xsd");
                schema.push_back("SMPTE-429-8-2006-PKL.xsd");
@@ -278,8 +301,19 @@ validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, list<Verificatio
                schema.push_back("DCSubtitle.v1.mattsson.xsd");
                schema.push_back("DCDMSubtitle-2010.xsd");
                schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
-
-               /* XXX: I'm not especially clear what this is for, but it seems to be necessary */
+               schema.push_back("SMPTE-429-16.xsd");
+               schema.push_back("Dolby-2012-AD.xsd");
+               schema.push_back("SMPTE-429-10-2008.xsd");
+               schema.push_back("xlink.xsd");
+               schema.push_back("SMPTE-335-2012.xsd");
+               schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
+               schema.push_back("isdcf-mca.xsd");
+               schema.push_back("SMPTE-429-12-2008.xsd");
+
+               /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
+                * Schemas that are not mentioned in this list are not read, and the things
+                * they describe are not checked.
+                */
                string locations;
                BOOST_FOREACH (string i, schema) {
                        locations += String::compose("%1 %1 ", i, i);
@@ -312,7 +346,7 @@ validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, list<Verificatio
                                VerificationNote::VERIFY_ERROR,
                                VerificationNote::XML_VALIDATION_ERROR,
                                i.message(),
-                               xml,
+                               boost::trim_copy(i.public_id() + " " + i.system_id()),
                                i.line()
                                )
                        );
@@ -361,10 +395,21 @@ verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelMXF> reel_mxf, fun
 }
 
 
+void
+verify_language_tag (string tag, list<VerificationNote>& notes)
+{
+       try {
+               dcp::LanguageTag test (tag);
+       } catch (dcp::LanguageTagError &) {
+               notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::BAD_LANGUAGE, tag));
+       }
+}
+
+
 enum VerifyPictureAssetResult
 {
        VERIFY_PICTURE_ASSET_RESULT_GOOD,
-       VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_BIG,
+       VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE,
        VERIFY_PICTURE_ASSET_RESULT_BAD,
 };
 
@@ -372,13 +417,13 @@ enum VerifyPictureAssetResult
 int
 biggest_frame_size (shared_ptr<const MonoPictureFrame> frame)
 {
-       return frame->j2k_size ();
+       return frame->size ();
 }
 
 int
 biggest_frame_size (shared_ptr<const StereoPictureFrame> frame)
 {
-       return max(frame->left_j2k_size(), frame->right_j2k_size());
+       return max(frame->left()->size(), frame->right()->size());
 }
 
 
@@ -405,7 +450,7 @@ verify_picture_asset_type (shared_ptr<ReelMXF> reel_mxf, function<void (float)>
        if (biggest_frame > max_frame) {
                return VERIFY_PICTURE_ASSET_RESULT_BAD;
        } else if (biggest_frame > risky_frame) {
-               return VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_BIG;
+               return VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE;
        }
 
        return VERIFY_PICTURE_ASSET_RESULT_GOOD;
@@ -465,7 +510,7 @@ verify_main_picture_asset (
                                        )
                                );
                        break;
-               case VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_BIG:
+               case VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE:
                        notes.push_back (
                                VerificationNote(
                                        VerificationNote::VERIFY_WARNING, VerificationNote::PICTURE_FRAME_NEARLY_TOO_LARGE, file
@@ -510,6 +555,16 @@ verify_main_sound_asset (
 }
 
 
+static void
+verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, list<VerificationNote>& notes)
+{
+       /* XXX: is Language compulsory? */
+       if (reel_asset->language()) {
+               verify_language_tag (*reel_asset->language(), notes);
+       }
+}
+
+
 static void
 verify_main_subtitle_asset (
        shared_ptr<const Reel> reel,
@@ -518,12 +573,19 @@ verify_main_subtitle_asset (
        list<VerificationNote>& notes
        )
 {
-       shared_ptr<ReelSubtitleAsset> reel_asset = reel->main_subtitle ();
-       stage ("Checking subtitle XML", reel->main_subtitle()->asset()->file());
+       shared_ptr<SubtitleAsset> asset = reel->main_subtitle()->asset();
+       stage ("Checking subtitle XML", asset->file());
        /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
         * gets passed through libdcp which may clean up and therefore hide errors.
         */
-       validate_xml (reel->main_subtitle()->asset()->raw_xml(), xsd_dtd_directory, notes);
+       validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
+
+       shared_ptr<SMPTESubtitleAsset> smpte = dynamic_pointer_cast<SMPTESubtitleAsset>(asset);
+       if (smpte) {
+               if (smpte->language()) {
+                       verify_language_tag (*smpte->language(), notes);
+               }
+       }
 }
 
 
@@ -552,10 +614,16 @@ dcp::verify (
                        notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
                } catch (XMLError& e) {
                        notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
+               } catch (MXFFileError& e) {
+                       notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
                } catch (cxml::Error& e) {
                        notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
                }
 
+               if (dcp->standard() != dcp::SMPTE) {
+                       notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::NOT_SMPTE));
+               }
+
                BOOST_FOREACH (shared_ptr<CPL> cpl, dcp->cpls()) {
                        stage ("Checking CPL", cpl->file());
                        validate_xml (cpl->file().get(), xsd_dtd_directory, notes);
@@ -563,7 +631,7 @@ dcp::verify (
                        /* Check that the CPL's hash corresponds to the PKL */
                        BOOST_FOREACH (shared_ptr<PKL> i, dcp->pkls()) {
                                optional<string> h = i->hash(cpl->id());
-                               if (h && make_digest(Data(*cpl->file())) != *h) {
+                               if (h && make_digest(ArrayData(*cpl->file())) != *h) {
                                        notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::CPL_HASH_INCORRECT));
                                }
                        }
@@ -603,8 +671,11 @@ dcp::verify (
                                        verify_main_sound_asset (dcp, reel, stage, progress, notes);
                                }
 
-                               if (reel->main_subtitle() && reel->main_subtitle()->asset_ref().resolved()) {
-                                       verify_main_subtitle_asset (reel, stage, xsd_dtd_directory, notes);
+                               if (reel->main_subtitle()) {
+                                       verify_main_subtitle_reel (reel->main_subtitle(), notes);
+                                       if (reel->main_subtitle()->asset_ref().resolved()) {
+                                               verify_main_subtitle_asset (reel, stage, xsd_dtd_directory, notes);
+                                       }
                                }
                        }
                }
@@ -663,6 +734,10 @@ dcp::note_to_string (dcp::VerificationNote note)
                return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
        case dcp::VerificationNote::EXTERNAL_ASSET:
                return String::compose("An asset that this DCP refers to is not included in the DCP.  It may be a VF.  Missing asset is %1.", note.note().get());
+       case dcp::VerificationNote::NOT_SMPTE:
+               return "This DCP does not use the SMPTE standard, which is required for Bv2.1 compliance.";
+       case dcp::VerificationNote::BAD_LANGUAGE:
+               return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
        }
 
        return "";