Check ASSETMAP; add a PKL test; tidy up calls to validate_xml().
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2019 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34 #include "verify.h"
35 #include "dcp.h"
36 #include "cpl.h"
37 #include "reel.h"
38 #include "reel_picture_asset.h"
39 #include "reel_sound_asset.h"
40 #include "exceptions.h"
41 #include "compose.hpp"
42 #include "raw_convert.h"
43 #include <xercesc/util/PlatformUtils.hpp>
44 #include <xercesc/parsers/XercesDOMParser.hpp>
45 #include <xercesc/parsers/AbstractDOMParser.hpp>
46 #include <xercesc/sax/HandlerBase.hpp>
47 #include <xercesc/dom/DOMImplementation.hpp>
48 #include <xercesc/dom/DOMImplementationLS.hpp>
49 #include <xercesc/dom/DOMImplementationRegistry.hpp>
50 #include <xercesc/dom/DOMLSParser.hpp>
51 #include <xercesc/dom/DOMException.hpp>
52 #include <xercesc/dom/DOMDocument.hpp>
53 #include <xercesc/dom/DOMNodeList.hpp>
54 #include <xercesc/dom/DOMError.hpp>
55 #include <xercesc/dom/DOMLocator.hpp>
56 #include <xercesc/dom/DOMNamedNodeMap.hpp>
57 #include <xercesc/dom/DOMAttr.hpp>
58 #include <xercesc/dom/DOMErrorHandler.hpp>
59 #include <xercesc/framework/LocalFileInputSource.hpp>
60 #include <boost/noncopyable.hpp>
61 #include <boost/foreach.hpp>
62 #include <boost/algorithm/string.hpp>
63 #include <boost/regex.hpp>
64 #include <map>
65 #include <list>
66 #include <vector>
67 #include <iostream>
68
69 using std::list;
70 using std::vector;
71 using std::string;
72 using std::cout;
73 using std::map;
74 using boost::shared_ptr;
75 using boost::optional;
76 using boost::function;
77
78 using namespace dcp;
79 using namespace xercesc;
80
81 enum Result {
82         RESULT_GOOD,
83         RESULT_CPL_PKL_DIFFER,
84         RESULT_BAD
85 };
86
87 static
88 string
89 xml_ch_to_string (XMLCh const * a)
90 {
91         char* x = XMLString::transcode(a);
92         string const o(x);
93         XMLString::release(&x);
94         return o;
95 }
96
97 class XMLValidationError
98 {
99 public:
100         XMLValidationError (SAXParseException const & e)
101                 : _message (xml_ch_to_string(e.getMessage()))
102                 , _line (e.getLineNumber())
103                 , _column (e.getColumnNumber())
104         {
105
106         }
107
108         string message () const {
109                 return _message;
110         }
111
112         uint64_t line () const {
113                 return _line;
114         }
115
116         uint64_t column () const {
117                 return _column;
118         }
119
120 private:
121         string _message;
122         uint64_t _line;
123         uint64_t _column;
124 };
125
126
127 class DCPErrorHandler : public ErrorHandler
128 {
129 public:
130         void warning(const SAXParseException& e)
131         {
132                 maybe_add (XMLValidationError(e));
133         }
134
135         void error(const SAXParseException& e)
136         {
137                 maybe_add (XMLValidationError(e));
138         }
139
140         void fatalError(const SAXParseException& e)
141         {
142                 maybe_add (XMLValidationError(e));
143         }
144
145         void resetErrors() {
146                 _errors.clear ();
147         }
148
149         list<XMLValidationError> errors () const {
150                 return _errors;
151         }
152
153 private:
154         void maybe_add (XMLValidationError e)
155         {
156                 /* XXX: nasty hack */
157                 if (
158                         e.message() ==
159                         "schema document '/home/carl/src/libdcp/xsd/xml.xsd' has different target namespace "
160                         "from the one specified in instance document 'http://www.w3.org/2001/03/xml.xsd'" ||
161                         e.message() ==
162                         "schema document '/home/carl/src/libdcp/xsd/xmldsig-core-schema.xsd' has different target namespace "
163                         "from the one specified in instance document 'http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd'" ||
164                         e.message() ==
165                         "schema document '/home/carl/src/libdcp/xsd/SMPTE-429-8-2006-PKL.xsd' has different target namespace "
166                         "from the one specified in instance document 'http://www.smpte-ra.org/schemas/429-8/2006/PKL'"
167                         ) {
168                         return;
169                 }
170
171                 _errors.push_back (e);
172         }
173
174         list<XMLValidationError> _errors;
175 };
176
177 class StringToXMLCh : public boost::noncopyable
178 {
179 public:
180         StringToXMLCh (string a)
181         {
182                 _buffer = XMLString::transcode(a.c_str());
183         }
184
185         ~StringToXMLCh ()
186         {
187                 XMLString::release (&_buffer);
188         }
189
190         XMLCh const * get () const {
191                 return _buffer;
192         }
193
194 private:
195         XMLCh* _buffer;
196 };
197
198 class LocalFileResolver : public EntityResolver
199 {
200 public:
201         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
202                 : _xsd_dtd_directory (xsd_dtd_directory)
203         {
204                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
205                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
206                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
207         }
208
209         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
210         {
211                 string system_id_str = xml_ch_to_string (system_id);
212                 if (_files.find(system_id_str) == _files.end()) {
213                         return 0;
214                 }
215
216                 boost::filesystem::path p = _xsd_dtd_directory / _files[system_id_str];
217                 StringToXMLCh ch (p.string());
218                 return new LocalFileInputSource(ch.get());
219         }
220
221 private:
222         void add (string uri, string file)
223         {
224                 _files[uri] = file;
225         }
226
227         std::map<string, string> _files;
228         boost::filesystem::path _xsd_dtd_directory;
229 };
230
231 static
232 void
233 validate_xml (boost::filesystem::path xml_file, boost::filesystem::path xsd_dtd_directory, list<VerificationNote>& notes)
234 {
235         try {
236                 XMLPlatformUtils::Initialize ();
237         } catch (XMLException& e) {
238                 throw MiscError ("Failed to initialise xerces library");
239         }
240
241         DCPErrorHandler error_handler;
242
243         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
244         {
245                 XercesDOMParser parser;
246                 parser.setValidationScheme(XercesDOMParser::Val_Always);
247                 parser.setDoNamespaces(true);
248                 parser.setDoSchema(true);
249
250                 map<string, string> schema;
251                 schema["http://www.w3.org/2000/09/xmldsig#"] = "xmldsig-core-schema.xsd";
252                 schema["http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd"] = "xmldsig-core-schema.xsd";
253                 schema["http://www.smpte-ra.org/schemas/429-7/2006/CPL"] = "SMPTE-429-7-2006-CPL.xsd";
254                 schema["http://www.smpte-ra.org/schemas/429-8/2006/PKL"] = "SMPTE-429-8-2006-PKL.xsd";
255                 schema["http://www.smpte-ra.org/schemas/429-9/2007/AM"] = "SMPTE-429-9-2007-AM.xsd";
256                 schema["http://www.w3.org/2001/03/xml.xsd"] = "xml.xsd";
257
258                 string locations;
259                 for (map<string, string>::const_iterator i = schema.begin(); i != schema.end(); ++i) {
260                         locations += i->first;
261                         locations += " ";
262                         boost::filesystem::path p = xsd_dtd_directory / i->second;
263                         locations += p.string() + " ";
264                 }
265
266                 parser.setExternalSchemaLocation(locations.c_str());
267                 parser.setValidationSchemaFullChecking(true);
268                 parser.setErrorHandler(&error_handler);
269
270                 LocalFileResolver resolver (xsd_dtd_directory);
271                 parser.setEntityResolver(&resolver);
272
273                 try {
274                         parser.resetDocumentPool();
275                         parser.parse(xml_file.string().c_str());
276                 } catch (XMLException& e) {
277                         throw MiscError(xml_ch_to_string(e.getMessage()));
278                 } catch (DOMException& e) {
279                         throw MiscError(xml_ch_to_string(e.getMessage()));
280                 } catch (...) {
281                         throw MiscError("Unknown exception from xerces");
282                 }
283         }
284
285         XMLPlatformUtils::Terminate ();
286
287         BOOST_FOREACH (XMLValidationError i, error_handler.errors()) {
288                 notes.push_back (
289                         VerificationNote(
290                                 VerificationNote::VERIFY_ERROR,
291                                 VerificationNote::Code::XML_VALIDATION_ERROR,
292                                 i.message(),
293                                 xml_file,
294                                 i.line()
295                                 )
296                         );
297         }
298 }
299
300 static Result
301 verify_asset (shared_ptr<DCP> dcp, shared_ptr<ReelMXF> reel_mxf, function<void (float)> progress)
302 {
303         string const actual_hash = reel_mxf->asset_ref()->hash(progress);
304
305         list<shared_ptr<PKL> > pkls = dcp->pkls();
306         /* We've read this DCP in so it must have at least one PKL */
307         DCP_ASSERT (!pkls.empty());
308
309         shared_ptr<Asset> asset = reel_mxf->asset_ref().asset();
310
311         optional<string> pkl_hash;
312         BOOST_FOREACH (shared_ptr<PKL> i, pkls) {
313                 pkl_hash = i->hash (reel_mxf->asset_ref()->id());
314                 if (pkl_hash) {
315                         break;
316                 }
317         }
318
319         DCP_ASSERT (pkl_hash);
320
321         optional<string> cpl_hash = reel_mxf->hash();
322         if (cpl_hash && *cpl_hash != *pkl_hash) {
323                 return RESULT_CPL_PKL_DIFFER;
324         }
325
326         if (actual_hash != *pkl_hash) {
327                 return RESULT_BAD;
328         }
329
330         return RESULT_GOOD;
331 }
332
333
334 list<VerificationNote>
335 dcp::verify (
336         vector<boost::filesystem::path> directories,
337         function<void (string, optional<boost::filesystem::path>)> stage,
338         function<void (float)> progress,
339         boost::filesystem::path xsd_dtd_directory
340         )
341 {
342         xsd_dtd_directory = boost::filesystem::canonical (xsd_dtd_directory);
343
344         list<VerificationNote> notes;
345
346         list<shared_ptr<DCP> > dcps;
347         BOOST_FOREACH (boost::filesystem::path i, directories) {
348                 dcps.push_back (shared_ptr<DCP> (new DCP (i)));
349         }
350
351         BOOST_FOREACH (shared_ptr<DCP> dcp, dcps) {
352                 stage ("Checking DCP", dcp->directory());
353                 try {
354                         dcp->read (&notes);
355                 } catch (DCPReadError& e) {
356                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::Code::GENERAL_READ, string(e.what())));
357                 } catch (XMLError& e) {
358                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::Code::GENERAL_READ, string(e.what())));
359                 }
360
361                 BOOST_FOREACH (shared_ptr<CPL> cpl, dcp->cpls()) {
362                         stage ("Checking CPL", cpl->file());
363                         validate_xml (cpl->file().get(), xsd_dtd_directory, notes);
364
365                         /* Check that the CPL's hash corresponds to the PKL */
366                         BOOST_FOREACH (shared_ptr<PKL> i, dcp->pkls()) {
367                                 optional<string> h = i->hash(cpl->id());
368                                 if (h && make_digest(Data(*cpl->file())) != *h) {
369                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::CPL_HASH_INCORRECT));
370                                 }
371                         }
372
373                         BOOST_FOREACH (shared_ptr<Reel> reel, cpl->reels()) {
374                                 stage ("Checking reel", optional<boost::filesystem::path>());
375                                 if (reel->main_picture()) {
376                                         /* Check reel stuff */
377                                         Fraction const frame_rate = reel->main_picture()->frame_rate();
378                                         if (frame_rate.denominator != 1 ||
379                                             (frame_rate.numerator != 24 &&
380                                              frame_rate.numerator != 25 &&
381                                              frame_rate.numerator != 30 &&
382                                              frame_rate.numerator != 48 &&
383                                              frame_rate.numerator != 50 &&
384                                              frame_rate.numerator != 60 &&
385                                              frame_rate.numerator != 96)) {
386                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INVALID_PICTURE_FRAME_RATE));
387                                         }
388                                         /* Check asset */
389                                         if (reel->main_picture()->asset_ref().resolved()) {
390                                                 stage ("Checking picture asset hash", reel->main_picture()->asset()->file());
391                                                 Result const r = verify_asset (dcp, reel->main_picture(), progress);
392                                                 switch (r) {
393                                                 case RESULT_BAD:
394                                                         notes.push_back (
395                                                                 VerificationNote(
396                                                                         VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_HASH_INCORRECT, *reel->main_picture()->asset()->file()
397                                                                         )
398                                                                 );
399                                                         break;
400                                                 case RESULT_CPL_PKL_DIFFER:
401                                                         notes.push_back (
402                                                                 VerificationNote(
403                                                                         VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_PICTURE_HASHES_DISAGREE, *reel->main_picture()->asset()->file()
404                                                                         )
405                                                                 );
406                                                         break;
407                                                 default:
408                                                         break;
409                                                 }
410                                         }
411                                 }
412                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
413                                         stage ("Checking sound asset hash", reel->main_sound()->asset()->file());
414                                         Result const r = verify_asset (dcp, reel->main_sound(), progress);
415                                         switch (r) {
416                                         case RESULT_BAD:
417                                                 notes.push_back (
418                                                         VerificationNote(
419                                                                 VerificationNote::VERIFY_ERROR, VerificationNote::SOUND_HASH_INCORRECT, *reel->main_sound()->asset()->file()
420                                                                 )
421                                                         );
422                                                 break;
423                                         case RESULT_CPL_PKL_DIFFER:
424                                                 notes.push_back (
425                                                         VerificationNote(
426                                                                 VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_SOUND_HASHES_DISAGREE, *reel->main_sound()->asset()->file()
427                                                                 )
428                                                         );
429                                                 break;
430                                         default:
431                                                 break;
432                                         }
433                                 }
434                         }
435                 }
436
437                 BOOST_FOREACH (shared_ptr<PKL> pkl, dcp->pkls()) {
438                         stage ("Checking PKL", pkl->file());
439                         validate_xml (pkl->file().get(), xsd_dtd_directory, notes);
440                 }
441
442                 stage ("Checking ASSETMAP", dcp->asset_map_path().get());
443                 validate_xml (dcp->asset_map_path().get(), xsd_dtd_directory, notes);
444
445         }
446
447         return notes;
448 }
449
450 string
451 dcp::note_to_string (dcp::VerificationNote note)
452 {
453         switch (note.code()) {
454         case dcp::VerificationNote::GENERAL_READ:
455                 return *note.note();
456         case dcp::VerificationNote::CPL_HASH_INCORRECT:
457                 return "The hash of the CPL in the PKL does not agree with the CPL file";
458         case dcp::VerificationNote::INVALID_PICTURE_FRAME_RATE:
459                 return "The picture in a reel has an invalid frame rate";
460         case dcp::VerificationNote::PICTURE_HASH_INCORRECT:
461                 return dcp::String::compose("The hash of the picture asset %1 does not agree with the PKL file", note.file()->filename());
462         case dcp::VerificationNote::PKL_CPL_PICTURE_HASHES_DISAGREE:
463                 return dcp::String::compose("The PKL and CPL hashes disagree for the picture asset %1", note.file()->filename());
464         case dcp::VerificationNote::SOUND_HASH_INCORRECT:
465                 return dcp::String::compose("The hash of the sound asset %1 does not agree with the PKL file", note.file()->filename());
466         case dcp::VerificationNote::PKL_CPL_SOUND_HASHES_DISAGREE:
467                 return dcp::String::compose("The PKL and CPL hashes disagree for the sound asset %1", note.file()->filename());
468         case dcp::VerificationNote::EMPTY_ASSET_PATH:
469                 return "The asset map contains an empty asset path.";
470         case dcp::VerificationNote::MISSING_ASSET:
471                 return String::compose("The file for an asset in the asset map cannot be found; missing file is %1.", note.file()->filename());
472         case dcp::VerificationNote::MISMATCHED_STANDARD:
473                 return "The DCP contains both SMPTE and Interop parts.";
474         case dcp::VerificationNote::XML_VALIDATION_ERROR:
475                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
476         }
477
478         return "";
479 }