s/DCPReadError/ReadError/g
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2020 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34 #include "verify.h"
35 #include "dcp.h"
36 #include "cpl.h"
37 #include "reel.h"
38 #include "reel_picture_asset.h"
39 #include "reel_sound_asset.h"
40 #include "exceptions.h"
41 #include "compose.hpp"
42 #include "raw_convert.h"
43 #include <xercesc/util/PlatformUtils.hpp>
44 #include <xercesc/parsers/XercesDOMParser.hpp>
45 #include <xercesc/parsers/AbstractDOMParser.hpp>
46 #include <xercesc/sax/HandlerBase.hpp>
47 #include <xercesc/dom/DOMImplementation.hpp>
48 #include <xercesc/dom/DOMImplementationLS.hpp>
49 #include <xercesc/dom/DOMImplementationRegistry.hpp>
50 #include <xercesc/dom/DOMLSParser.hpp>
51 #include <xercesc/dom/DOMException.hpp>
52 #include <xercesc/dom/DOMDocument.hpp>
53 #include <xercesc/dom/DOMNodeList.hpp>
54 #include <xercesc/dom/DOMError.hpp>
55 #include <xercesc/dom/DOMLocator.hpp>
56 #include <xercesc/dom/DOMNamedNodeMap.hpp>
57 #include <xercesc/dom/DOMAttr.hpp>
58 #include <xercesc/dom/DOMErrorHandler.hpp>
59 #include <xercesc/framework/LocalFileInputSource.hpp>
60 #include <boost/noncopyable.hpp>
61 #include <boost/foreach.hpp>
62 #include <boost/algorithm/string.hpp>
63 #include <map>
64 #include <list>
65 #include <vector>
66 #include <iostream>
67
68 using std::list;
69 using std::vector;
70 using std::string;
71 using std::cout;
72 using std::map;
73 using boost::shared_ptr;
74 using boost::optional;
75 using boost::function;
76
77 using namespace dcp;
78 using namespace xercesc;
79
80 enum Result {
81         RESULT_GOOD,
82         RESULT_CPL_PKL_DIFFER,
83         RESULT_BAD
84 };
85
86 static
87 string
88 xml_ch_to_string (XMLCh const * a)
89 {
90         char* x = XMLString::transcode(a);
91         string const o(x);
92         XMLString::release(&x);
93         return o;
94 }
95
96 class XMLValidationError
97 {
98 public:
99         XMLValidationError (SAXParseException const & e)
100                 : _message (xml_ch_to_string(e.getMessage()))
101                 , _line (e.getLineNumber())
102                 , _column (e.getColumnNumber())
103         {
104
105         }
106
107         string message () const {
108                 return _message;
109         }
110
111         uint64_t line () const {
112                 return _line;
113         }
114
115         uint64_t column () const {
116                 return _column;
117         }
118
119 private:
120         string _message;
121         uint64_t _line;
122         uint64_t _column;
123 };
124
125
126 class DCPErrorHandler : public ErrorHandler
127 {
128 public:
129         void warning(const SAXParseException& e)
130         {
131                 maybe_add (XMLValidationError(e));
132         }
133
134         void error(const SAXParseException& e)
135         {
136                 maybe_add (XMLValidationError(e));
137         }
138
139         void fatalError(const SAXParseException& e)
140         {
141                 maybe_add (XMLValidationError(e));
142         }
143
144         void resetErrors() {
145                 _errors.clear ();
146         }
147
148         list<XMLValidationError> errors () const {
149                 return _errors;
150         }
151
152 private:
153         void maybe_add (XMLValidationError e)
154         {
155                 /* XXX: nasty hack */
156                 if (
157                         e.message().find("schema document") != string::npos &&
158                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
159                         ) {
160                         return;
161                 }
162
163                 _errors.push_back (e);
164         }
165
166         list<XMLValidationError> _errors;
167 };
168
169 class StringToXMLCh : public boost::noncopyable
170 {
171 public:
172         StringToXMLCh (string a)
173         {
174                 _buffer = XMLString::transcode(a.c_str());
175         }
176
177         ~StringToXMLCh ()
178         {
179                 XMLString::release (&_buffer);
180         }
181
182         XMLCh const * get () const {
183                 return _buffer;
184         }
185
186 private:
187         XMLCh* _buffer;
188 };
189
190 class LocalFileResolver : public EntityResolver
191 {
192 public:
193         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
194                 : _xsd_dtd_directory (xsd_dtd_directory)
195         {
196                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
197                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
198                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
199                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
200                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
201                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
202                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
203         }
204
205         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
206         {
207                 string system_id_str = xml_ch_to_string (system_id);
208                 if (_files.find(system_id_str) == _files.end()) {
209                         return 0;
210                 }
211
212                 boost::filesystem::path p = _xsd_dtd_directory / _files[system_id_str];
213                 StringToXMLCh ch (p.string());
214                 return new LocalFileInputSource(ch.get());
215         }
216
217 private:
218         void add (string uri, string file)
219         {
220                 _files[uri] = file;
221         }
222
223         std::map<string, string> _files;
224         boost::filesystem::path _xsd_dtd_directory;
225 };
226
227 static
228 void
229 validate_xml (boost::filesystem::path xml_file, boost::filesystem::path xsd_dtd_directory, list<VerificationNote>& notes)
230 {
231         try {
232                 XMLPlatformUtils::Initialize ();
233         } catch (XMLException& e) {
234                 throw MiscError ("Failed to initialise xerces library");
235         }
236
237         DCPErrorHandler error_handler;
238
239         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
240         {
241                 XercesDOMParser parser;
242                 parser.setValidationScheme(XercesDOMParser::Val_Always);
243                 parser.setDoNamespaces(true);
244                 parser.setDoSchema(true);
245
246                 map<string, string> schema;
247                 schema["http://www.w3.org/2000/09/xmldsig#"] = "xmldsig-core-schema.xsd";
248                 schema["http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd"] = "xmldsig-core-schema.xsd";
249                 schema["http://www.smpte-ra.org/schemas/429-7/2006/CPL"] = "SMPTE-429-7-2006-CPL.xsd";
250                 schema["http://www.smpte-ra.org/schemas/429-8/2006/PKL"] = "SMPTE-429-8-2006-PKL.xsd";
251                 schema["http://www.smpte-ra.org/schemas/429-9/2007/AM"] = "SMPTE-429-9-2007-AM.xsd";
252                 schema["http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd"] = "Main-Stereo-Picture-CPL.xsd";
253                 schema["http://www.digicine.com/PROTO-ASDCP-CPL-20040511#"] = "PROTO-ASDCP-CPL-20040511.xsd";
254                 schema["http://www.digicine.com/PROTO-ASDCP-PKL-20040311#"] = "PROTO-ASDCP-PKL-20040311.xsd";
255                 schema["http://www.digicine.com/PROTO-ASDCP-AM-20040311#"] = "PROTO-ASDCP-AM-20040311.xsd";
256
257                 string locations;
258                 for (map<string, string>::const_iterator i = schema.begin(); i != schema.end(); ++i) {
259                         locations += i->first;
260                         locations += " ";
261                         boost::filesystem::path p = xsd_dtd_directory / i->second;
262                         locations += p.string() + " ";
263                 }
264
265                 parser.setExternalSchemaLocation(locations.c_str());
266                 parser.setValidationSchemaFullChecking(true);
267                 parser.setErrorHandler(&error_handler);
268
269                 LocalFileResolver resolver (xsd_dtd_directory);
270                 parser.setEntityResolver(&resolver);
271
272                 try {
273                         parser.resetDocumentPool();
274                         parser.parse(xml_file.string().c_str());
275                 } catch (XMLException& e) {
276                         throw MiscError(xml_ch_to_string(e.getMessage()));
277                 } catch (DOMException& e) {
278                         throw MiscError(xml_ch_to_string(e.getMessage()));
279                 } catch (...) {
280                         throw MiscError("Unknown exception from xerces");
281                 }
282         }
283
284         XMLPlatformUtils::Terminate ();
285
286         BOOST_FOREACH (XMLValidationError i, error_handler.errors()) {
287                 notes.push_back (
288                         VerificationNote(
289                                 VerificationNote::VERIFY_ERROR,
290                                 VerificationNote::XML_VALIDATION_ERROR,
291                                 i.message(),
292                                 xml_file,
293                                 i.line()
294                                 )
295                         );
296         }
297 }
298
299 static Result
300 verify_asset (shared_ptr<DCP> dcp, shared_ptr<ReelMXF> reel_mxf, function<void (float)> progress)
301 {
302         string const actual_hash = reel_mxf->asset_ref()->hash(progress);
303
304         list<shared_ptr<PKL> > pkls = dcp->pkls();
305         /* We've read this DCP in so it must have at least one PKL */
306         DCP_ASSERT (!pkls.empty());
307
308         shared_ptr<Asset> asset = reel_mxf->asset_ref().asset();
309
310         optional<string> pkl_hash;
311         BOOST_FOREACH (shared_ptr<PKL> i, pkls) {
312                 pkl_hash = i->hash (reel_mxf->asset_ref()->id());
313                 if (pkl_hash) {
314                         break;
315                 }
316         }
317
318         DCP_ASSERT (pkl_hash);
319
320         optional<string> cpl_hash = reel_mxf->hash();
321         if (cpl_hash && *cpl_hash != *pkl_hash) {
322                 return RESULT_CPL_PKL_DIFFER;
323         }
324
325         if (actual_hash != *pkl_hash) {
326                 return RESULT_BAD;
327         }
328
329         return RESULT_GOOD;
330 }
331
332
333 list<VerificationNote>
334 dcp::verify (
335         vector<boost::filesystem::path> directories,
336         function<void (string, optional<boost::filesystem::path>)> stage,
337         function<void (float)> progress,
338         boost::filesystem::path xsd_dtd_directory
339         )
340 {
341         xsd_dtd_directory = boost::filesystem::canonical (xsd_dtd_directory);
342
343         list<VerificationNote> notes;
344
345         list<shared_ptr<DCP> > dcps;
346         BOOST_FOREACH (boost::filesystem::path i, directories) {
347                 dcps.push_back (shared_ptr<DCP> (new DCP (i)));
348         }
349
350         BOOST_FOREACH (shared_ptr<DCP> dcp, dcps) {
351                 stage ("Checking DCP", dcp->directory());
352                 try {
353                         dcp->read (&notes);
354                 } catch (ReadError& e) {
355                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
356                 } catch (XMLError& e) {
357                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
358                 }
359
360                 BOOST_FOREACH (shared_ptr<CPL> cpl, dcp->cpls()) {
361                         stage ("Checking CPL", cpl->file());
362                         validate_xml (cpl->file().get(), xsd_dtd_directory, notes);
363
364                         /* Check that the CPL's hash corresponds to the PKL */
365                         BOOST_FOREACH (shared_ptr<PKL> i, dcp->pkls()) {
366                                 optional<string> h = i->hash(cpl->id());
367                                 if (h && make_digest(Data(*cpl->file())) != *h) {
368                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::CPL_HASH_INCORRECT));
369                                 }
370                         }
371
372                         BOOST_FOREACH (shared_ptr<Reel> reel, cpl->reels()) {
373                                 stage ("Checking reel", optional<boost::filesystem::path>());
374
375                                 BOOST_FOREACH (shared_ptr<ReelAsset> i, reel->assets()) {
376                                         if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
377                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::DURATION_TOO_SMALL, i->id()));
378                                         }
379                                         if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
380                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INTRINSIC_DURATION_TOO_SMALL, i->id()));
381                                         }
382                                 }
383
384                                 if (reel->main_picture()) {
385                                         /* Check reel stuff */
386                                         Fraction const frame_rate = reel->main_picture()->frame_rate();
387                                         if (frame_rate.denominator != 1 ||
388                                             (frame_rate.numerator != 24 &&
389                                              frame_rate.numerator != 25 &&
390                                              frame_rate.numerator != 30 &&
391                                              frame_rate.numerator != 48 &&
392                                              frame_rate.numerator != 50 &&
393                                              frame_rate.numerator != 60 &&
394                                              frame_rate.numerator != 96)) {
395                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INVALID_PICTURE_FRAME_RATE));
396                                         }
397                                         /* Check asset */
398                                         if (reel->main_picture()->asset_ref().resolved()) {
399                                                 stage ("Checking picture asset hash", reel->main_picture()->asset()->file());
400                                                 Result const r = verify_asset (dcp, reel->main_picture(), progress);
401                                                 switch (r) {
402                                                 case RESULT_BAD:
403                                                         notes.push_back (
404                                                                 VerificationNote(
405                                                                         VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_HASH_INCORRECT, *reel->main_picture()->asset()->file()
406                                                                         )
407                                                                 );
408                                                         break;
409                                                 case RESULT_CPL_PKL_DIFFER:
410                                                         notes.push_back (
411                                                                 VerificationNote(
412                                                                         VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_PICTURE_HASHES_DISAGREE, *reel->main_picture()->asset()->file()
413                                                                         )
414                                                                 );
415                                                         break;
416                                                 default:
417                                                         break;
418                                                 }
419                                         }
420                                 }
421                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
422                                         stage ("Checking sound asset hash", reel->main_sound()->asset()->file());
423                                         Result const r = verify_asset (dcp, reel->main_sound(), progress);
424                                         switch (r) {
425                                         case RESULT_BAD:
426                                                 notes.push_back (
427                                                         VerificationNote(
428                                                                 VerificationNote::VERIFY_ERROR, VerificationNote::SOUND_HASH_INCORRECT, *reel->main_sound()->asset()->file()
429                                                                 )
430                                                         );
431                                                 break;
432                                         case RESULT_CPL_PKL_DIFFER:
433                                                 notes.push_back (
434                                                         VerificationNote(
435                                                                 VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_SOUND_HASHES_DISAGREE, *reel->main_sound()->asset()->file()
436                                                                 )
437                                                         );
438                                                 break;
439                                         default:
440                                                 break;
441                                         }
442                                 }
443                         }
444                 }
445
446                 BOOST_FOREACH (shared_ptr<PKL> pkl, dcp->pkls()) {
447                         stage ("Checking PKL", pkl->file());
448                         validate_xml (pkl->file().get(), xsd_dtd_directory, notes);
449                 }
450
451                 if (dcp->asset_map_path()) {
452                         stage ("Checking ASSETMAP", dcp->asset_map_path().get());
453                         validate_xml (dcp->asset_map_path().get(), xsd_dtd_directory, notes);
454                 } else {
455                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::MISSING_ASSETMAP));
456                 }
457         }
458
459         return notes;
460 }
461
462 string
463 dcp::note_to_string (dcp::VerificationNote note)
464 {
465         switch (note.code()) {
466         case dcp::VerificationNote::GENERAL_READ:
467                 return *note.note();
468         case dcp::VerificationNote::CPL_HASH_INCORRECT:
469                 return "The hash of the CPL in the PKL does not agree with the CPL file";
470         case dcp::VerificationNote::INVALID_PICTURE_FRAME_RATE:
471                 return "The picture in a reel has an invalid frame rate";
472         case dcp::VerificationNote::PICTURE_HASH_INCORRECT:
473                 return dcp::String::compose("The hash of the picture asset %1 does not agree with the PKL file", note.file()->filename());
474         case dcp::VerificationNote::PKL_CPL_PICTURE_HASHES_DISAGREE:
475                 return dcp::String::compose("The PKL and CPL hashes disagree for the picture asset %1", note.file()->filename());
476         case dcp::VerificationNote::SOUND_HASH_INCORRECT:
477                 return dcp::String::compose("The hash of the sound asset %1 does not agree with the PKL file", note.file()->filename());
478         case dcp::VerificationNote::PKL_CPL_SOUND_HASHES_DISAGREE:
479                 return dcp::String::compose("The PKL and CPL hashes disagree for the sound asset %1", note.file()->filename());
480         case dcp::VerificationNote::EMPTY_ASSET_PATH:
481                 return "The asset map contains an empty asset path.";
482         case dcp::VerificationNote::MISSING_ASSET:
483                 return String::compose("The file for an asset in the asset map cannot be found; missing file is %1.", note.file()->filename());
484         case dcp::VerificationNote::MISMATCHED_STANDARD:
485                 return "The DCP contains both SMPTE and Interop parts.";
486         case dcp::VerificationNote::XML_VALIDATION_ERROR:
487                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
488         case dcp::VerificationNote::MISSING_ASSETMAP:
489                 return "No ASSETMAP or ASSETMAP.xml was found";
490         case dcp::VerificationNote::INTRINSIC_DURATION_TOO_SMALL:
491                 return String::compose("The intrinsic duration of an asset is less than 1 second long: %1", note.note().get());
492         case dcp::VerificationNote::DURATION_TOO_SMALL:
493                 return String::compose("The duration of an asset is less than 1 second long: %1", note.note().get());
494         }
495
496         return "";
497 }