Remove now-unused boost regex.
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2019 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34 #include "verify.h"
35 #include "dcp.h"
36 #include "cpl.h"
37 #include "reel.h"
38 #include "reel_picture_asset.h"
39 #include "reel_sound_asset.h"
40 #include "exceptions.h"
41 #include "compose.hpp"
42 #include "raw_convert.h"
43 #include <xercesc/util/PlatformUtils.hpp>
44 #include <xercesc/parsers/XercesDOMParser.hpp>
45 #include <xercesc/parsers/AbstractDOMParser.hpp>
46 #include <xercesc/sax/HandlerBase.hpp>
47 #include <xercesc/dom/DOMImplementation.hpp>
48 #include <xercesc/dom/DOMImplementationLS.hpp>
49 #include <xercesc/dom/DOMImplementationRegistry.hpp>
50 #include <xercesc/dom/DOMLSParser.hpp>
51 #include <xercesc/dom/DOMException.hpp>
52 #include <xercesc/dom/DOMDocument.hpp>
53 #include <xercesc/dom/DOMNodeList.hpp>
54 #include <xercesc/dom/DOMError.hpp>
55 #include <xercesc/dom/DOMLocator.hpp>
56 #include <xercesc/dom/DOMNamedNodeMap.hpp>
57 #include <xercesc/dom/DOMAttr.hpp>
58 #include <xercesc/dom/DOMErrorHandler.hpp>
59 #include <xercesc/framework/LocalFileInputSource.hpp>
60 #include <boost/noncopyable.hpp>
61 #include <boost/foreach.hpp>
62 #include <boost/algorithm/string.hpp>
63 #include <map>
64 #include <list>
65 #include <vector>
66 #include <iostream>
67
68 using std::list;
69 using std::vector;
70 using std::string;
71 using std::cout;
72 using std::map;
73 using boost::shared_ptr;
74 using boost::optional;
75 using boost::function;
76
77 using namespace dcp;
78 using namespace xercesc;
79
80 enum Result {
81         RESULT_GOOD,
82         RESULT_CPL_PKL_DIFFER,
83         RESULT_BAD
84 };
85
86 static
87 string
88 xml_ch_to_string (XMLCh const * a)
89 {
90         char* x = XMLString::transcode(a);
91         string const o(x);
92         XMLString::release(&x);
93         return o;
94 }
95
96 class XMLValidationError
97 {
98 public:
99         XMLValidationError (SAXParseException const & e)
100                 : _message (xml_ch_to_string(e.getMessage()))
101                 , _line (e.getLineNumber())
102                 , _column (e.getColumnNumber())
103         {
104
105         }
106
107         string message () const {
108                 return _message;
109         }
110
111         uint64_t line () const {
112                 return _line;
113         }
114
115         uint64_t column () const {
116                 return _column;
117         }
118
119 private:
120         string _message;
121         uint64_t _line;
122         uint64_t _column;
123 };
124
125
126 class DCPErrorHandler : public ErrorHandler
127 {
128 public:
129         void warning(const SAXParseException& e)
130         {
131                 maybe_add (XMLValidationError(e));
132         }
133
134         void error(const SAXParseException& e)
135         {
136                 maybe_add (XMLValidationError(e));
137         }
138
139         void fatalError(const SAXParseException& e)
140         {
141                 maybe_add (XMLValidationError(e));
142         }
143
144         void resetErrors() {
145                 _errors.clear ();
146         }
147
148         list<XMLValidationError> errors () const {
149                 return _errors;
150         }
151
152 private:
153         void maybe_add (XMLValidationError e)
154         {
155                 /* XXX: nasty hack */
156                 if (
157                         e.message() ==
158                         "schema document '/home/carl/src/libdcp/xsd/xml.xsd' has different target namespace "
159                         "from the one specified in instance document 'http://www.w3.org/2001/03/xml.xsd'" ||
160                         e.message() ==
161                         "schema document '/home/carl/src/libdcp/xsd/xmldsig-core-schema.xsd' has different target namespace "
162                         "from the one specified in instance document 'http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd'" ||
163                         e.message() ==
164                         "schema document '/home/carl/src/libdcp/xsd/SMPTE-429-8-2006-PKL.xsd' has different target namespace "
165                         "from the one specified in instance document 'http://www.smpte-ra.org/schemas/429-8/2006/PKL'"
166                         ) {
167                         return;
168                 }
169
170                 _errors.push_back (e);
171         }
172
173         list<XMLValidationError> _errors;
174 };
175
176 class StringToXMLCh : public boost::noncopyable
177 {
178 public:
179         StringToXMLCh (string a)
180         {
181                 _buffer = XMLString::transcode(a.c_str());
182         }
183
184         ~StringToXMLCh ()
185         {
186                 XMLString::release (&_buffer);
187         }
188
189         XMLCh const * get () const {
190                 return _buffer;
191         }
192
193 private:
194         XMLCh* _buffer;
195 };
196
197 class LocalFileResolver : public EntityResolver
198 {
199 public:
200         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
201                 : _xsd_dtd_directory (xsd_dtd_directory)
202         {
203                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
204                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
205                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
206         }
207
208         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
209         {
210                 string system_id_str = xml_ch_to_string (system_id);
211                 if (_files.find(system_id_str) == _files.end()) {
212                         return 0;
213                 }
214
215                 boost::filesystem::path p = _xsd_dtd_directory / _files[system_id_str];
216                 StringToXMLCh ch (p.string());
217                 return new LocalFileInputSource(ch.get());
218         }
219
220 private:
221         void add (string uri, string file)
222         {
223                 _files[uri] = file;
224         }
225
226         std::map<string, string> _files;
227         boost::filesystem::path _xsd_dtd_directory;
228 };
229
230 static
231 void
232 validate_xml (boost::filesystem::path xml_file, boost::filesystem::path xsd_dtd_directory, list<VerificationNote>& notes)
233 {
234         try {
235                 XMLPlatformUtils::Initialize ();
236         } catch (XMLException& e) {
237                 throw MiscError ("Failed to initialise xerces library");
238         }
239
240         DCPErrorHandler error_handler;
241
242         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
243         {
244                 XercesDOMParser parser;
245                 parser.setValidationScheme(XercesDOMParser::Val_Always);
246                 parser.setDoNamespaces(true);
247                 parser.setDoSchema(true);
248
249                 map<string, string> schema;
250                 schema["http://www.w3.org/2000/09/xmldsig#"] = "xmldsig-core-schema.xsd";
251                 schema["http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd"] = "xmldsig-core-schema.xsd";
252                 schema["http://www.smpte-ra.org/schemas/429-7/2006/CPL"] = "SMPTE-429-7-2006-CPL.xsd";
253                 schema["http://www.smpte-ra.org/schemas/429-8/2006/PKL"] = "SMPTE-429-8-2006-PKL.xsd";
254                 schema["http://www.smpte-ra.org/schemas/429-9/2007/AM"] = "SMPTE-429-9-2007-AM.xsd";
255                 schema["http://www.w3.org/2001/03/xml.xsd"] = "xml.xsd";
256
257                 string locations;
258                 for (map<string, string>::const_iterator i = schema.begin(); i != schema.end(); ++i) {
259                         locations += i->first;
260                         locations += " ";
261                         boost::filesystem::path p = xsd_dtd_directory / i->second;
262                         locations += p.string() + " ";
263                 }
264
265                 parser.setExternalSchemaLocation(locations.c_str());
266                 parser.setValidationSchemaFullChecking(true);
267                 parser.setErrorHandler(&error_handler);
268
269                 LocalFileResolver resolver (xsd_dtd_directory);
270                 parser.setEntityResolver(&resolver);
271
272                 try {
273                         parser.resetDocumentPool();
274                         parser.parse(xml_file.string().c_str());
275                 } catch (XMLException& e) {
276                         throw MiscError(xml_ch_to_string(e.getMessage()));
277                 } catch (DOMException& e) {
278                         throw MiscError(xml_ch_to_string(e.getMessage()));
279                 } catch (...) {
280                         throw MiscError("Unknown exception from xerces");
281                 }
282         }
283
284         XMLPlatformUtils::Terminate ();
285
286         BOOST_FOREACH (XMLValidationError i, error_handler.errors()) {
287                 notes.push_back (
288                         VerificationNote(
289                                 VerificationNote::VERIFY_ERROR,
290                                 VerificationNote::Code::XML_VALIDATION_ERROR,
291                                 i.message(),
292                                 xml_file,
293                                 i.line()
294                                 )
295                         );
296         }
297 }
298
299 static Result
300 verify_asset (shared_ptr<DCP> dcp, shared_ptr<ReelMXF> reel_mxf, function<void (float)> progress)
301 {
302         string const actual_hash = reel_mxf->asset_ref()->hash(progress);
303
304         list<shared_ptr<PKL> > pkls = dcp->pkls();
305         /* We've read this DCP in so it must have at least one PKL */
306         DCP_ASSERT (!pkls.empty());
307
308         shared_ptr<Asset> asset = reel_mxf->asset_ref().asset();
309
310         optional<string> pkl_hash;
311         BOOST_FOREACH (shared_ptr<PKL> i, pkls) {
312                 pkl_hash = i->hash (reel_mxf->asset_ref()->id());
313                 if (pkl_hash) {
314                         break;
315                 }
316         }
317
318         DCP_ASSERT (pkl_hash);
319
320         optional<string> cpl_hash = reel_mxf->hash();
321         if (cpl_hash && *cpl_hash != *pkl_hash) {
322                 return RESULT_CPL_PKL_DIFFER;
323         }
324
325         if (actual_hash != *pkl_hash) {
326                 return RESULT_BAD;
327         }
328
329         return RESULT_GOOD;
330 }
331
332
333 list<VerificationNote>
334 dcp::verify (
335         vector<boost::filesystem::path> directories,
336         function<void (string, optional<boost::filesystem::path>)> stage,
337         function<void (float)> progress,
338         boost::filesystem::path xsd_dtd_directory
339         )
340 {
341         xsd_dtd_directory = boost::filesystem::canonical (xsd_dtd_directory);
342
343         list<VerificationNote> notes;
344
345         list<shared_ptr<DCP> > dcps;
346         BOOST_FOREACH (boost::filesystem::path i, directories) {
347                 dcps.push_back (shared_ptr<DCP> (new DCP (i)));
348         }
349
350         BOOST_FOREACH (shared_ptr<DCP> dcp, dcps) {
351                 stage ("Checking DCP", dcp->directory());
352                 try {
353                         dcp->read (&notes);
354                 } catch (DCPReadError& e) {
355                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::Code::GENERAL_READ, string(e.what())));
356                 } catch (XMLError& e) {
357                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::Code::GENERAL_READ, string(e.what())));
358                 }
359
360                 BOOST_FOREACH (shared_ptr<CPL> cpl, dcp->cpls()) {
361                         stage ("Checking CPL", cpl->file());
362                         validate_xml (cpl->file().get(), xsd_dtd_directory, notes);
363
364                         /* Check that the CPL's hash corresponds to the PKL */
365                         BOOST_FOREACH (shared_ptr<PKL> i, dcp->pkls()) {
366                                 optional<string> h = i->hash(cpl->id());
367                                 if (h && make_digest(Data(*cpl->file())) != *h) {
368                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::CPL_HASH_INCORRECT));
369                                 }
370                         }
371
372                         BOOST_FOREACH (shared_ptr<Reel> reel, cpl->reels()) {
373                                 stage ("Checking reel", optional<boost::filesystem::path>());
374                                 if (reel->main_picture()) {
375                                         /* Check reel stuff */
376                                         Fraction const frame_rate = reel->main_picture()->frame_rate();
377                                         if (frame_rate.denominator != 1 ||
378                                             (frame_rate.numerator != 24 &&
379                                              frame_rate.numerator != 25 &&
380                                              frame_rate.numerator != 30 &&
381                                              frame_rate.numerator != 48 &&
382                                              frame_rate.numerator != 50 &&
383                                              frame_rate.numerator != 60 &&
384                                              frame_rate.numerator != 96)) {
385                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INVALID_PICTURE_FRAME_RATE));
386                                         }
387                                         /* Check asset */
388                                         if (reel->main_picture()->asset_ref().resolved()) {
389                                                 stage ("Checking picture asset hash", reel->main_picture()->asset()->file());
390                                                 Result const r = verify_asset (dcp, reel->main_picture(), progress);
391                                                 switch (r) {
392                                                 case RESULT_BAD:
393                                                         notes.push_back (
394                                                                 VerificationNote(
395                                                                         VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_HASH_INCORRECT, *reel->main_picture()->asset()->file()
396                                                                         )
397                                                                 );
398                                                         break;
399                                                 case RESULT_CPL_PKL_DIFFER:
400                                                         notes.push_back (
401                                                                 VerificationNote(
402                                                                         VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_PICTURE_HASHES_DISAGREE, *reel->main_picture()->asset()->file()
403                                                                         )
404                                                                 );
405                                                         break;
406                                                 default:
407                                                         break;
408                                                 }
409                                         }
410                                 }
411                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
412                                         stage ("Checking sound asset hash", reel->main_sound()->asset()->file());
413                                         Result const r = verify_asset (dcp, reel->main_sound(), progress);
414                                         switch (r) {
415                                         case RESULT_BAD:
416                                                 notes.push_back (
417                                                         VerificationNote(
418                                                                 VerificationNote::VERIFY_ERROR, VerificationNote::SOUND_HASH_INCORRECT, *reel->main_sound()->asset()->file()
419                                                                 )
420                                                         );
421                                                 break;
422                                         case RESULT_CPL_PKL_DIFFER:
423                                                 notes.push_back (
424                                                         VerificationNote(
425                                                                 VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_SOUND_HASHES_DISAGREE, *reel->main_sound()->asset()->file()
426                                                                 )
427                                                         );
428                                                 break;
429                                         default:
430                                                 break;
431                                         }
432                                 }
433                         }
434                 }
435
436                 BOOST_FOREACH (shared_ptr<PKL> pkl, dcp->pkls()) {
437                         stage ("Checking PKL", pkl->file());
438                         validate_xml (pkl->file().get(), xsd_dtd_directory, notes);
439                 }
440
441                 stage ("Checking ASSETMAP", dcp->asset_map_path().get());
442                 validate_xml (dcp->asset_map_path().get(), xsd_dtd_directory, notes);
443
444         }
445
446         return notes;
447 }
448
449 string
450 dcp::note_to_string (dcp::VerificationNote note)
451 {
452         switch (note.code()) {
453         case dcp::VerificationNote::GENERAL_READ:
454                 return *note.note();
455         case dcp::VerificationNote::CPL_HASH_INCORRECT:
456                 return "The hash of the CPL in the PKL does not agree with the CPL file";
457         case dcp::VerificationNote::INVALID_PICTURE_FRAME_RATE:
458                 return "The picture in a reel has an invalid frame rate";
459         case dcp::VerificationNote::PICTURE_HASH_INCORRECT:
460                 return dcp::String::compose("The hash of the picture asset %1 does not agree with the PKL file", note.file()->filename());
461         case dcp::VerificationNote::PKL_CPL_PICTURE_HASHES_DISAGREE:
462                 return dcp::String::compose("The PKL and CPL hashes disagree for the picture asset %1", note.file()->filename());
463         case dcp::VerificationNote::SOUND_HASH_INCORRECT:
464                 return dcp::String::compose("The hash of the sound asset %1 does not agree with the PKL file", note.file()->filename());
465         case dcp::VerificationNote::PKL_CPL_SOUND_HASHES_DISAGREE:
466                 return dcp::String::compose("The PKL and CPL hashes disagree for the sound asset %1", note.file()->filename());
467         case dcp::VerificationNote::EMPTY_ASSET_PATH:
468                 return "The asset map contains an empty asset path.";
469         case dcp::VerificationNote::MISSING_ASSET:
470                 return String::compose("The file for an asset in the asset map cannot be found; missing file is %1.", note.file()->filename());
471         case dcp::VerificationNote::MISMATCHED_STANDARD:
472                 return "The DCP contains both SMPTE and Interop parts.";
473         case dcp::VerificationNote::XML_VALIDATION_ERROR:
474                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
475         }
476
477         return "";
478 }