Also check PKL XML.
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2019 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34 #include "verify.h"
35 #include "dcp.h"
36 #include "cpl.h"
37 #include "reel.h"
38 #include "reel_picture_asset.h"
39 #include "reel_sound_asset.h"
40 #include "exceptions.h"
41 #include "compose.hpp"
42 #include "raw_convert.h"
43 #include <xercesc/util/PlatformUtils.hpp>
44 #include <xercesc/parsers/XercesDOMParser.hpp>
45 #include <xercesc/parsers/AbstractDOMParser.hpp>
46 #include <xercesc/sax/HandlerBase.hpp>
47 #include <xercesc/dom/DOMImplementation.hpp>
48 #include <xercesc/dom/DOMImplementationLS.hpp>
49 #include <xercesc/dom/DOMImplementationRegistry.hpp>
50 #include <xercesc/dom/DOMLSParser.hpp>
51 #include <xercesc/dom/DOMException.hpp>
52 #include <xercesc/dom/DOMDocument.hpp>
53 #include <xercesc/dom/DOMNodeList.hpp>
54 #include <xercesc/dom/DOMError.hpp>
55 #include <xercesc/dom/DOMLocator.hpp>
56 #include <xercesc/dom/DOMNamedNodeMap.hpp>
57 #include <xercesc/dom/DOMAttr.hpp>
58 #include <xercesc/dom/DOMErrorHandler.hpp>
59 #include <xercesc/framework/LocalFileInputSource.hpp>
60 #include <boost/noncopyable.hpp>
61 #include <boost/foreach.hpp>
62 #include <boost/algorithm/string.hpp>
63 #include <boost/regex.hpp>
64 #include <map>
65 #include <list>
66 #include <vector>
67 #include <iostream>
68
69 using std::list;
70 using std::vector;
71 using std::string;
72 using std::cout;
73 using std::map;
74 using boost::shared_ptr;
75 using boost::optional;
76 using boost::function;
77
78 using namespace dcp;
79 using namespace xercesc;
80
81 enum Result {
82         RESULT_GOOD,
83         RESULT_CPL_PKL_DIFFER,
84         RESULT_BAD
85 };
86
87 static
88 string
89 xml_ch_to_string (XMLCh const * a)
90 {
91         char* x = XMLString::transcode(a);
92         string const o(x);
93         XMLString::release(&x);
94         return o;
95 }
96
97 class XMLValidationError
98 {
99 public:
100         XMLValidationError (SAXParseException const & e)
101                 : _message (xml_ch_to_string(e.getMessage()))
102                 , _line (e.getLineNumber())
103                 , _column (e.getColumnNumber())
104         {
105
106         }
107
108         string message () const {
109                 return _message;
110         }
111
112         uint64_t line () const {
113                 return _line;
114         }
115
116         uint64_t column () const {
117                 return _column;
118         }
119
120 private:
121         string _message;
122         uint64_t _line;
123         uint64_t _column;
124 };
125
126
127 class DCPErrorHandler : public ErrorHandler
128 {
129 public:
130         void warning(const SAXParseException& e)
131         {
132                 maybe_add (XMLValidationError(e));
133         }
134
135         void error(const SAXParseException& e)
136         {
137                 maybe_add (XMLValidationError(e));
138         }
139
140         void fatalError(const SAXParseException& e)
141         {
142                 maybe_add (XMLValidationError(e));
143         }
144
145         void resetErrors() {
146                 _errors.clear ();
147         }
148
149         list<XMLValidationError> errors () const {
150                 return _errors;
151         }
152
153 private:
154         void maybe_add (XMLValidationError e)
155         {
156                 /* XXX: nasty hack */
157                 if (
158                         e.message() ==
159                         "schema document '/home/carl/src/libdcp/xsd/xml.xsd' has different target namespace "
160                         "from the one specified in instance document 'http://www.w3.org/2001/03/xml.xsd'" ||
161                         e.message() ==
162                         "schema document '/home/carl/src/libdcp/xsd/xmldsig-core-schema.xsd' has different target namespace "
163                         "from the one specified in instance document 'http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd'" ||
164                         e.message() ==
165                         "schema document '/home/carl/src/libdcp/xsd/SMPTE-429-8-2006-PKL.xsd' has different target namespace "
166                         "from the one specified in instance document 'http://www.smpte-ra.org/schemas/429-8/2006/PKL'"
167                         ) {
168                         return;
169                 }
170
171                 _errors.push_back (e);
172         }
173
174         list<XMLValidationError> _errors;
175 };
176
177 class StringToXMLCh : public boost::noncopyable
178 {
179 public:
180         StringToXMLCh (string a)
181         {
182                 _buffer = XMLString::transcode(a.c_str());
183         }
184
185         ~StringToXMLCh ()
186         {
187                 XMLString::release (&_buffer);
188         }
189
190         XMLCh const * get () const {
191                 return _buffer;
192         }
193
194 private:
195         XMLCh* _buffer;
196 };
197
198 class LocalFileResolver : public EntityResolver
199 {
200 public:
201         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
202                 : _xsd_dtd_directory (xsd_dtd_directory)
203         {
204                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
205                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
206                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
207         }
208
209         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
210         {
211                 string system_id_str = xml_ch_to_string (system_id);
212                 if (_files.find(system_id_str) == _files.end()) {
213                         return 0;
214                 }
215
216                 boost::filesystem::path p = _xsd_dtd_directory / _files[system_id_str];
217                 StringToXMLCh ch (p.string());
218                 return new LocalFileInputSource(ch.get());
219         }
220
221 private:
222         void add (string uri, string file)
223         {
224                 _files[uri] = file;
225         }
226
227         std::map<string, string> _files;
228         boost::filesystem::path _xsd_dtd_directory;
229 };
230
231 static
232 list<XMLValidationError>
233 validate_xml (boost::filesystem::path xml_file, boost::filesystem::path xsd_dtd_directory)
234 {
235         try {
236                 XMLPlatformUtils::Initialize ();
237         } catch (XMLException& e) {
238                 throw MiscError ("Failed to initialise xerces library");
239         }
240
241         DCPErrorHandler error_handler;
242
243         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
244         {
245                 XercesDOMParser parser;
246                 parser.setValidationScheme(XercesDOMParser::Val_Always);
247                 parser.setDoNamespaces(true);
248                 parser.setDoSchema(true);
249
250                 map<string, string> schema;
251                 schema["http://www.w3.org/2000/09/xmldsig#"] = "xmldsig-core-schema.xsd";
252                 schema["http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd"] = "xmldsig-core-schema.xsd";
253                 schema["http://www.smpte-ra.org/schemas/429-7/2006/CPL"] = "SMPTE-429-7-2006-CPL.xsd";
254                 schema["http://www.smpte-ra.org/schemas/429-8/2006/PKL"] = "SMPTE-429-8-2006-PKL.xsd";
255                 schema["http://www.w3.org/2001/03/xml.xsd"] = "xml.xsd";
256
257                 string locations;
258                 for (map<string, string>::const_iterator i = schema.begin(); i != schema.end(); ++i) {
259                         locations += i->first;
260                         locations += " ";
261                         boost::filesystem::path p = xsd_dtd_directory / i->second;
262                         locations += p.string() + " ";
263                 }
264
265                 parser.setExternalSchemaLocation(locations.c_str());
266                 parser.setValidationSchemaFullChecking(true);
267                 parser.setErrorHandler(&error_handler);
268
269                 LocalFileResolver resolver (xsd_dtd_directory);
270                 parser.setEntityResolver(&resolver);
271
272                 try {
273                         parser.resetDocumentPool();
274                         parser.parse(xml_file.string().c_str());
275                 } catch (XMLException& e) {
276                         throw MiscError(xml_ch_to_string(e.getMessage()));
277                 } catch (DOMException& e) {
278                         throw MiscError(xml_ch_to_string(e.getMessage()));
279                 } catch (...) {
280                         throw MiscError("Unknown exception from xerces");
281                 }
282         }
283
284         XMLPlatformUtils::Terminate ();
285
286         return error_handler.errors ();
287 }
288
289 static Result
290 verify_asset (shared_ptr<DCP> dcp, shared_ptr<ReelMXF> reel_mxf, function<void (float)> progress)
291 {
292         string const actual_hash = reel_mxf->asset_ref()->hash(progress);
293
294         list<shared_ptr<PKL> > pkls = dcp->pkls();
295         /* We've read this DCP in so it must have at least one PKL */
296         DCP_ASSERT (!pkls.empty());
297
298         shared_ptr<Asset> asset = reel_mxf->asset_ref().asset();
299
300         optional<string> pkl_hash;
301         BOOST_FOREACH (shared_ptr<PKL> i, pkls) {
302                 pkl_hash = i->hash (reel_mxf->asset_ref()->id());
303                 if (pkl_hash) {
304                         break;
305                 }
306         }
307
308         DCP_ASSERT (pkl_hash);
309
310         optional<string> cpl_hash = reel_mxf->hash();
311         if (cpl_hash && *cpl_hash != *pkl_hash) {
312                 return RESULT_CPL_PKL_DIFFER;
313         }
314
315         if (actual_hash != *pkl_hash) {
316                 return RESULT_BAD;
317         }
318
319         return RESULT_GOOD;
320 }
321
322
323 list<VerificationNote>
324 dcp::verify (
325         vector<boost::filesystem::path> directories,
326         function<void (string, optional<boost::filesystem::path>)> stage,
327         function<void (float)> progress,
328         boost::filesystem::path xsd_dtd_directory
329         )
330 {
331         xsd_dtd_directory = boost::filesystem::canonical (xsd_dtd_directory);
332
333         list<VerificationNote> notes;
334
335         list<shared_ptr<DCP> > dcps;
336         BOOST_FOREACH (boost::filesystem::path i, directories) {
337                 dcps.push_back (shared_ptr<DCP> (new DCP (i)));
338         }
339
340         BOOST_FOREACH (shared_ptr<DCP> dcp, dcps) {
341                 stage ("Checking DCP", dcp->directory());
342                 try {
343                         dcp->read (&notes);
344                 } catch (DCPReadError& e) {
345                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::Code::GENERAL_READ, string(e.what())));
346                 } catch (XMLError& e) {
347                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::Code::GENERAL_READ, string(e.what())));
348                 }
349
350                 BOOST_FOREACH (shared_ptr<CPL> cpl, dcp->cpls()) {
351                         stage ("Checking CPL", cpl->file());
352
353                         list<XMLValidationError> errors = validate_xml (cpl->file().get(), xsd_dtd_directory);
354                         BOOST_FOREACH (XMLValidationError i, errors) {
355                                 notes.push_back (VerificationNote(
356                                                          VerificationNote::VERIFY_ERROR, VerificationNote::Code::XML_VALIDATION_ERROR,
357                                                          String::compose("%1 (file %2, line %3)", i.message(), cpl->file()->string(), i.line())
358                                                          ));
359                         }
360
361                         /* Check that the CPL's hash corresponds to the PKL */
362                         BOOST_FOREACH (shared_ptr<PKL> i, dcp->pkls()) {
363                                 optional<string> h = i->hash(cpl->id());
364                                 if (h && make_digest(Data(*cpl->file())) != *h) {
365                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::CPL_HASH_INCORRECT));
366                                 }
367                         }
368
369                         BOOST_FOREACH (shared_ptr<Reel> reel, cpl->reels()) {
370                                 stage ("Checking reel", optional<boost::filesystem::path>());
371                                 if (reel->main_picture()) {
372                                         /* Check reel stuff */
373                                         Fraction const frame_rate = reel->main_picture()->frame_rate();
374                                         if (frame_rate.denominator != 1 ||
375                                             (frame_rate.numerator != 24 &&
376                                              frame_rate.numerator != 25 &&
377                                              frame_rate.numerator != 30 &&
378                                              frame_rate.numerator != 48 &&
379                                              frame_rate.numerator != 50 &&
380                                              frame_rate.numerator != 60 &&
381                                              frame_rate.numerator != 96)) {
382                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INVALID_PICTURE_FRAME_RATE));
383                                         }
384                                         /* Check asset */
385                                         if (reel->main_picture()->asset_ref().resolved()) {
386                                                 stage ("Checking picture asset hash", reel->main_picture()->asset()->file());
387                                                 Result const r = verify_asset (dcp, reel->main_picture(), progress);
388                                                 switch (r) {
389                                                 case RESULT_BAD:
390                                                         notes.push_back (
391                                                                         VerificationNote(
392                                                                                 VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_HASH_INCORRECT, *reel->main_picture()->asset()->file()
393                                                                                 )
394                                                                         );
395                                                         break;
396                                                 case RESULT_CPL_PKL_DIFFER:
397                                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_PICTURE_HASHES_DISAGREE));
398                                                         break;
399                                                 default:
400                                                         break;
401                                                 }
402                                         }
403                                 }
404                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
405                                         stage ("Checking sound asset hash", reel->main_sound()->asset()->file());
406                                         Result const r = verify_asset (dcp, reel->main_sound(), progress);
407                                         switch (r) {
408                                         case RESULT_BAD:
409                                                 notes.push_back (
410                                                                 VerificationNote(
411                                                                         VerificationNote::VERIFY_ERROR, VerificationNote::SOUND_HASH_INCORRECT, *reel->main_sound()->asset()->file()
412                                                                         )
413                                                                 );
414                                                 break;
415                                         case RESULT_CPL_PKL_DIFFER:
416                                                 notes.push_back (VerificationNote (VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_SOUND_HASHES_DISAGREE));
417                                                 break;
418                                         default:
419                                                 break;
420                                         }
421                                 }
422                         }
423                 }
424
425                 BOOST_FOREACH (shared_ptr<PKL> pkl, dcp->pkls()) {
426                         stage ("Checking PKL", pkl->file());
427
428                         list<XMLValidationError> errors = validate_xml (pkl->file().get(), xsd_dtd_directory);
429                         BOOST_FOREACH (XMLValidationError i, errors) {
430                                 notes.push_back (VerificationNote(
431                                                          VerificationNote::VERIFY_ERROR, VerificationNote::Code::XML_VALIDATION_ERROR,
432                                                          String::compose("%1 (file %2, line %3)", i.message(), pkl->file()->string(), i.line())
433                                                          ));
434                         }
435                 }
436         }
437
438         return notes;
439 }
440
441 string
442 dcp::note_to_string (dcp::VerificationNote note)
443 {
444         switch (note.code()) {
445         case dcp::VerificationNote::GENERAL_READ:
446                 return *note.note();
447         case dcp::VerificationNote::CPL_HASH_INCORRECT:
448                 return "The hash of the CPL in the PKL does not agree with the CPL file";
449         case dcp::VerificationNote::INVALID_PICTURE_FRAME_RATE:
450                 return "The picture in a reel has an invalid frame rate";
451         case dcp::VerificationNote::PICTURE_HASH_INCORRECT:
452                 return dcp::String::compose("The hash of the picture asset %1 does not agree with the PKL file", note.file()->filename());
453         case dcp::VerificationNote::PKL_CPL_PICTURE_HASHES_DISAGREE:
454                 return "The PKL and CPL hashes disagree for a picture asset.";
455         case dcp::VerificationNote::SOUND_HASH_INCORRECT:
456                 return dcp::String::compose("The hash of the sound asset %1 does not agree with the PKL file", note.file()->filename());
457         case dcp::VerificationNote::PKL_CPL_SOUND_HASHES_DISAGREE:
458                 return "The PKL and CPL hashes disagree for a sound asset.";
459         case dcp::VerificationNote::EMPTY_ASSET_PATH:
460                 return "The asset map contains an empty asset path.";
461         case dcp::VerificationNote::MISSING_ASSET:
462                 return "The file for an asset in the asset map cannot be found.";
463         case dcp::VerificationNote::MISMATCHED_STANDARD:
464                 return "The DCP contains both SMPTE and Interop parts.";
465         case dcp::VerificationNote::XML_VALIDATION_ERROR:
466                 return "An XML file is badly formed.";
467         }
468
469         return "";
470 }