Bv2.1 6.2.1: Check that subtitle XML <Language> conforms to RFC 5646.
[libdcp.git] / src / verify.cc
index 11eb75d2188ec1b89522c12e26ea05d18d21305f..89d84eef97c46fe82d503ce0bf74f75f730fa272 100644 (file)
@@ -46,6 +46,7 @@
 #include "exceptions.h"
 #include "compose.hpp"
 #include "raw_convert.h"
+#include "smpte_subtitle_asset.h"
 #include <xercesc/util/PlatformUtils.hpp>
 #include <xercesc/parsers/XercesDOMParser.hpp>
 #include <xercesc/parsers/AbstractDOMParser.hpp>
@@ -78,10 +79,10 @@ using std::string;
 using std::cout;
 using std::map;
 using std::max;
-using boost::shared_ptr;
+using std::shared_ptr;
 using boost::optional;
 using boost::function;
-using boost::dynamic_pointer_cast;
+using std::dynamic_pointer_cast;
 
 using namespace dcp;
 using namespace xercesc;
@@ -103,6 +104,8 @@ public:
                : _message (xml_ch_to_string(e.getMessage()))
                , _line (e.getLineNumber())
                , _column (e.getColumnNumber())
+               , _public_id (e.getPublicId() ? xml_ch_to_string(e.getPublicId()) : "")
+               , _system_id (e.getSystemId() ? xml_ch_to_string(e.getSystemId()) : "")
        {
 
        }
@@ -119,10 +122,20 @@ public:
                return _column;
        }
 
+       string public_id () const {
+               return _public_id;
+       }
+
+       string system_id () const {
+               return _system_id;
+       }
+
 private:
        string _message;
        uint64_t _line;
        uint64_t _column;
+       string _public_id;
+       string _system_id;
 };
 
 
@@ -196,6 +209,9 @@ public:
        LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
                : _xsd_dtd_directory (xsd_dtd_directory)
        {
+               /* XXX: I'm not clear on what things need to be in this list; some XSDs are apparently, magically
+                * found without being here.
+                */
                add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
                add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
                add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
@@ -203,18 +219,26 @@ public:
                add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
                add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
                add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
+               add("http://www.digicine.com/PROTO-ASDCP-CC-CPL-20070926#", "PROTO-ASDCP-CC-CPL-20070926.xsd");
                add("interop-subs", "DCSubtitle.v1.mattsson.xsd");
                add("http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd", "SMPTE-428-7-2010-DCST.xsd");
+               add("http://www.smpte-ra.org/schemas/429-16/2014/CPL-Metadata", "SMPTE-429-16.xsd");
+               add("http://www.dolby.com/schemas/2012/AD", "Dolby-2012-AD.xsd");
+               add("http://www.smpte-ra.org/schemas/429-10/2008/Main-Stereo-Picture-CPL", "SMPTE-429-10-2008.xsd");
        }
 
        InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
        {
+               if (!system_id) {
+                       return 0;
+               }
                string system_id_str = xml_ch_to_string (system_id);
+               boost::filesystem::path p = _xsd_dtd_directory;
                if (_files.find(system_id_str) == _files.end()) {
-                       return 0;
+                       p /= system_id_str;
+               } else {
+                       p /= _files[system_id_str];
                }
-
-               boost::filesystem::path p = _xsd_dtd_directory / _files[system_id_str];
                StringToXMLCh ch (p.string());
                return new LocalFileInputSource(ch.get());
        }
@@ -264,25 +288,35 @@ validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, list<Verificatio
                parser.setDoNamespaces(true);
                parser.setDoSchema(true);
 
-               map<string, string> schema;
-               schema["http://www.w3.org/2000/09/xmldsig#"] = "xmldsig-core-schema.xsd";
-               schema["http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd"] = "xmldsig-core-schema.xsd";
-               schema["http://www.smpte-ra.org/schemas/429-7/2006/CPL"] = "SMPTE-429-7-2006-CPL.xsd";
-               schema["http://www.smpte-ra.org/schemas/429-8/2006/PKL"] = "SMPTE-429-8-2006-PKL.xsd";
-               schema["http://www.smpte-ra.org/schemas/429-9/2007/AM"] = "SMPTE-429-9-2007-AM.xsd";
-               schema["http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd"] = "Main-Stereo-Picture-CPL.xsd";
-               schema["http://www.digicine.com/PROTO-ASDCP-CPL-20040511#"] = "PROTO-ASDCP-CPL-20040511.xsd";
-               schema["http://www.digicine.com/PROTO-ASDCP-PKL-20040311#"] = "PROTO-ASDCP-PKL-20040311.xsd";
-               schema["http://www.digicine.com/PROTO-ASDCP-AM-20040311#"] = "PROTO-ASDCP-AM-20040311.xsd";
-               schema["interop-subs"] = "DCSubtitle.v1.mattsson.xsd";
-               schema["http://www.smpte-ra.org/schemas/428-7/2010/DCST.xsd"] = "DCDMSubtitle-2010.xsd";
-
+               vector<string> schema;
+               schema.push_back("xml.xsd");
+               schema.push_back("xmldsig-core-schema.xsd");
+               schema.push_back("SMPTE-429-7-2006-CPL.xsd");
+               schema.push_back("SMPTE-429-8-2006-PKL.xsd");
+               schema.push_back("SMPTE-429-9-2007-AM.xsd");
+               schema.push_back("Main-Stereo-Picture-CPL.xsd");
+               schema.push_back("PROTO-ASDCP-CPL-20040511.xsd");
+               schema.push_back("PROTO-ASDCP-PKL-20040311.xsd");
+               schema.push_back("PROTO-ASDCP-AM-20040311.xsd");
+               schema.push_back("DCSubtitle.v1.mattsson.xsd");
+               schema.push_back("DCDMSubtitle-2010.xsd");
+               schema.push_back("PROTO-ASDCP-CC-CPL-20070926.xsd");
+               schema.push_back("SMPTE-429-16.xsd");
+               schema.push_back("Dolby-2012-AD.xsd");
+               schema.push_back("SMPTE-429-10-2008.xsd");
+               schema.push_back("xlink.xsd");
+               schema.push_back("SMPTE-335-2012.xsd");
+               schema.push_back("SMPTE-395-2014-13-1-aaf.xsd");
+               schema.push_back("isdcf-mca.xsd");
+               schema.push_back("SMPTE-429-12-2008.xsd");
+
+               /* XXX: I'm not especially clear what this is for, but it seems to be necessary.
+                * Schemas that are not mentioned in this list are not read, and the things
+                * they describe are not checked.
+                */
                string locations;
-               for (map<string, string>::const_iterator i = schema.begin(); i != schema.end(); ++i) {
-                       locations += i->first;
-                       locations += " ";
-                       boost::filesystem::path p = xsd_dtd_directory / i->second;
-                       locations += p.string() + " ";
+               BOOST_FOREACH (string i, schema) {
+                       locations += String::compose("%1 %1 ", i, i);
                }
 
                parser.setExternalSchemaLocation(locations.c_str());
@@ -312,7 +346,7 @@ validate_xml (T xml, boost::filesystem::path xsd_dtd_directory, list<Verificatio
                                VerificationNote::VERIFY_ERROR,
                                VerificationNote::XML_VALIDATION_ERROR,
                                i.message(),
-                               xml,
+                               boost::trim_copy(i.public_id() + " " + i.system_id()),
                                i.line()
                                )
                        );
@@ -361,10 +395,21 @@ verify_asset (shared_ptr<const DCP> dcp, shared_ptr<const ReelMXF> reel_mxf, fun
 }
 
 
+void
+verify_language_tag (string tag, list<VerificationNote>& notes)
+{
+       try {
+               dcp::LanguageTag test (tag);
+       } catch (dcp::LanguageTagError &) {
+               notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::BAD_LANGUAGE, tag));
+       }
+}
+
+
 enum VerifyPictureAssetResult
 {
        VERIFY_PICTURE_ASSET_RESULT_GOOD,
-       VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_BIG,
+       VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE,
        VERIFY_PICTURE_ASSET_RESULT_BAD,
 };
 
@@ -372,13 +417,13 @@ enum VerifyPictureAssetResult
 int
 biggest_frame_size (shared_ptr<const MonoPictureFrame> frame)
 {
-       return frame->j2k_size ();
+       return frame->size ();
 }
 
 int
 biggest_frame_size (shared_ptr<const StereoPictureFrame> frame)
 {
-       return max(frame->left_j2k_size(), frame->right_j2k_size());
+       return max(frame->left()->size(), frame->right()->size());
 }
 
 
@@ -405,7 +450,7 @@ verify_picture_asset_type (shared_ptr<ReelMXF> reel_mxf, function<void (float)>
        if (biggest_frame > max_frame) {
                return VERIFY_PICTURE_ASSET_RESULT_BAD;
        } else if (biggest_frame > risky_frame) {
-               return VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_BIG;
+               return VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE;
        }
 
        return VERIFY_PICTURE_ASSET_RESULT_GOOD;
@@ -465,7 +510,7 @@ verify_main_picture_asset (
                                        )
                                );
                        break;
-               case VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_BIG:
+               case VERIFY_PICTURE_ASSET_RESULT_FRAME_NEARLY_TOO_LARGE:
                        notes.push_back (
                                VerificationNote(
                                        VerificationNote::VERIFY_WARNING, VerificationNote::PICTURE_FRAME_NEARLY_TOO_LARGE, file
@@ -510,6 +555,16 @@ verify_main_sound_asset (
 }
 
 
+static void
+verify_main_subtitle_reel (shared_ptr<const ReelSubtitleAsset> reel_asset, list<VerificationNote>& notes)
+{
+       /* XXX: is Language compulsory? */
+       if (reel_asset->language()) {
+               verify_language_tag (*reel_asset->language(), notes);
+       }
+}
+
+
 static void
 verify_main_subtitle_asset (
        shared_ptr<const Reel> reel,
@@ -518,12 +573,19 @@ verify_main_subtitle_asset (
        list<VerificationNote>& notes
        )
 {
-       shared_ptr<ReelSubtitleAsset> reel_asset = reel->main_subtitle ();
-       stage ("Checking subtitle XML", reel->main_subtitle()->asset()->file());
+       shared_ptr<SubtitleAsset> asset = reel->main_subtitle()->asset();
+       stage ("Checking subtitle XML", asset->file());
        /* Note: we must not use SubtitleAsset::xml_as_string() here as that will mean the data on disk
         * gets passed through libdcp which may clean up and therefore hide errors.
         */
-       validate_xml (reel->main_subtitle()->asset()->raw_xml(), xsd_dtd_directory, notes);
+       validate_xml (asset->raw_xml(), xsd_dtd_directory, notes);
+
+       shared_ptr<SMPTESubtitleAsset> smpte = dynamic_pointer_cast<SMPTESubtitleAsset>(asset);
+       if (smpte) {
+               if (smpte->language()) {
+                       verify_language_tag (*smpte->language(), notes);
+               }
+       }
 }
 
 
@@ -552,6 +614,14 @@ dcp::verify (
                        notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
                } catch (XMLError& e) {
                        notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
+               } catch (MXFFileError& e) {
+                       notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
+               } catch (cxml::Error& e) {
+                       notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
+               }
+
+               if (dcp->standard() != dcp::SMPTE) {
+                       notes.push_back (VerificationNote(VerificationNote::VERIFY_BV21_ERROR, VerificationNote::NOT_SMPTE));
                }
 
                BOOST_FOREACH (shared_ptr<CPL> cpl, dcp->cpls()) {
@@ -561,7 +631,7 @@ dcp::verify (
                        /* Check that the CPL's hash corresponds to the PKL */
                        BOOST_FOREACH (shared_ptr<PKL> i, dcp->pkls()) {
                                optional<string> h = i->hash(cpl->id());
-                               if (h && make_digest(Data(*cpl->file())) != *h) {
+                               if (h && make_digest(ArrayData(*cpl->file())) != *h) {
                                        notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::CPL_HASH_INCORRECT));
                                }
                        }
@@ -601,8 +671,11 @@ dcp::verify (
                                        verify_main_sound_asset (dcp, reel, stage, progress, notes);
                                }
 
-                               if (reel->main_subtitle() && reel->main_subtitle()->asset_ref().resolved()) {
-                                       verify_main_subtitle_asset (reel, stage, xsd_dtd_directory, notes);
+                               if (reel->main_subtitle()) {
+                                       verify_main_subtitle_reel (reel->main_subtitle(), notes);
+                                       if (reel->main_subtitle()->asset_ref().resolved()) {
+                                               verify_main_subtitle_asset (reel, stage, xsd_dtd_directory, notes);
+                                       }
                                }
                        }
                }
@@ -630,17 +703,17 @@ dcp::note_to_string (dcp::VerificationNote note)
        case dcp::VerificationNote::GENERAL_READ:
                return *note.note();
        case dcp::VerificationNote::CPL_HASH_INCORRECT:
-               return "The hash of the CPL in the PKL does not agree with the CPL file";
+               return "The hash of the CPL in the PKL does not agree with the CPL file.";
        case dcp::VerificationNote::INVALID_PICTURE_FRAME_RATE:
-               return "The picture in a reel has an invalid frame rate";
+               return "The picture in a reel has an invalid frame rate.";
        case dcp::VerificationNote::PICTURE_HASH_INCORRECT:
-               return dcp::String::compose("The hash of the picture asset %1 does not agree with the PKL file", note.file()->filename());
+               return dcp::String::compose("The hash of the picture asset %1 does not agree with the PKL file.", note.file()->filename());
        case dcp::VerificationNote::PKL_CPL_PICTURE_HASHES_DISAGREE:
-               return dcp::String::compose("The PKL and CPL hashes disagree for the picture asset %1", note.file()->filename());
+               return dcp::String::compose("The PKL and CPL hashes disagree for the picture asset %1.", note.file()->filename());
        case dcp::VerificationNote::SOUND_HASH_INCORRECT:
-               return dcp::String::compose("The hash of the sound asset %1 does not agree with the PKL file", note.file()->filename());
+               return dcp::String::compose("The hash of the sound asset %1 does not agree with the PKL file.", note.file()->filename());
        case dcp::VerificationNote::PKL_CPL_SOUND_HASHES_DISAGREE:
-               return dcp::String::compose("The PKL and CPL hashes disagree for the sound asset %1", note.file()->filename());
+               return dcp::String::compose("The PKL and CPL hashes disagree for the sound asset %1.", note.file()->filename());
        case dcp::VerificationNote::EMPTY_ASSET_PATH:
                return "The asset map contains an empty asset path.";
        case dcp::VerificationNote::MISSING_ASSET:
@@ -650,15 +723,21 @@ dcp::note_to_string (dcp::VerificationNote note)
        case dcp::VerificationNote::XML_VALIDATION_ERROR:
                return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
        case dcp::VerificationNote::MISSING_ASSETMAP:
-               return "No ASSETMAP or ASSETMAP.xml was found";
+               return "No ASSETMAP or ASSETMAP.xml was found.";
        case dcp::VerificationNote::INTRINSIC_DURATION_TOO_SMALL:
                return String::compose("The intrinsic duration of an asset is less than 1 second long: %1", note.note().get());
        case dcp::VerificationNote::DURATION_TOO_SMALL:
                return String::compose("The duration of an asset is less than 1 second long: %1", note.note().get());
        case dcp::VerificationNote::PICTURE_FRAME_TOO_LARGE:
-               return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place", note.file()->filename());
+               return String::compose("The instantaneous bit rate of the picture asset %1 is larger than the limit of 250Mbit/s in at least one place.", note.file()->filename());
        case dcp::VerificationNote::PICTURE_FRAME_NEARLY_TOO_LARGE:
-               return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place", note.file()->filename());
+               return String::compose("The instantaneous bit rate of the picture asset %1 is close to the limit of 250Mbit/s in at least one place.", note.file()->filename());
+       case dcp::VerificationNote::EXTERNAL_ASSET:
+               return String::compose("An asset that this DCP refers to is not included in the DCP.  It may be a VF.  Missing asset is %1.", note.note().get());
+       case dcp::VerificationNote::NOT_SMPTE:
+               return "This DCP does not use the SMPTE standard, which is required for Bv2.1 compliance.";
+       case dcp::VerificationNote::BAD_LANGUAGE:
+               return String::compose("The DCP specifies a language '%1' which does not conform to the RFC 5646 standard.", note.note().get());
        }
 
        return "";