Result is specific to verify_asset().
[libdcp.git] / src / verify.cc
1 /*
2     Copyright (C) 2018-2020 Carl Hetherington <cth@carlh.net>
3
4     This file is part of libdcp.
5
6     libdcp is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10
11     libdcp is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15
16     You should have received a copy of the GNU General Public License
17     along with libdcp.  If not, see <http://www.gnu.org/licenses/>.
18
19     In addition, as a special exception, the copyright holders give
20     permission to link the code of portions of this program with the
21     OpenSSL library under certain conditions as described in each
22     individual source file, and distribute linked combinations
23     including the two.
24
25     You must obey the GNU General Public License in all respects
26     for all of the code used other than OpenSSL.  If you modify
27     file(s) with this exception, you may extend this exception to your
28     version of the file(s), but you are not obligated to do so.  If you
29     do not wish to do so, delete this exception statement from your
30     version.  If you delete this exception statement from all source
31     files in the program, then also delete it here.
32 */
33
34 #include "verify.h"
35 #include "dcp.h"
36 #include "cpl.h"
37 #include "reel.h"
38 #include "reel_picture_asset.h"
39 #include "reel_sound_asset.h"
40 #include "exceptions.h"
41 #include "compose.hpp"
42 #include "raw_convert.h"
43 #include <xercesc/util/PlatformUtils.hpp>
44 #include <xercesc/parsers/XercesDOMParser.hpp>
45 #include <xercesc/parsers/AbstractDOMParser.hpp>
46 #include <xercesc/sax/HandlerBase.hpp>
47 #include <xercesc/dom/DOMImplementation.hpp>
48 #include <xercesc/dom/DOMImplementationLS.hpp>
49 #include <xercesc/dom/DOMImplementationRegistry.hpp>
50 #include <xercesc/dom/DOMLSParser.hpp>
51 #include <xercesc/dom/DOMException.hpp>
52 #include <xercesc/dom/DOMDocument.hpp>
53 #include <xercesc/dom/DOMNodeList.hpp>
54 #include <xercesc/dom/DOMError.hpp>
55 #include <xercesc/dom/DOMLocator.hpp>
56 #include <xercesc/dom/DOMNamedNodeMap.hpp>
57 #include <xercesc/dom/DOMAttr.hpp>
58 #include <xercesc/dom/DOMErrorHandler.hpp>
59 #include <xercesc/framework/LocalFileInputSource.hpp>
60 #include <boost/noncopyable.hpp>
61 #include <boost/foreach.hpp>
62 #include <boost/algorithm/string.hpp>
63 #include <map>
64 #include <list>
65 #include <vector>
66 #include <iostream>
67
68 using std::list;
69 using std::vector;
70 using std::string;
71 using std::cout;
72 using std::map;
73 using boost::shared_ptr;
74 using boost::optional;
75 using boost::function;
76
77 using namespace dcp;
78 using namespace xercesc;
79
80 static
81 string
82 xml_ch_to_string (XMLCh const * a)
83 {
84         char* x = XMLString::transcode(a);
85         string const o(x);
86         XMLString::release(&x);
87         return o;
88 }
89
90 class XMLValidationError
91 {
92 public:
93         XMLValidationError (SAXParseException const & e)
94                 : _message (xml_ch_to_string(e.getMessage()))
95                 , _line (e.getLineNumber())
96                 , _column (e.getColumnNumber())
97         {
98
99         }
100
101         string message () const {
102                 return _message;
103         }
104
105         uint64_t line () const {
106                 return _line;
107         }
108
109         uint64_t column () const {
110                 return _column;
111         }
112
113 private:
114         string _message;
115         uint64_t _line;
116         uint64_t _column;
117 };
118
119
120 class DCPErrorHandler : public ErrorHandler
121 {
122 public:
123         void warning(const SAXParseException& e)
124         {
125                 maybe_add (XMLValidationError(e));
126         }
127
128         void error(const SAXParseException& e)
129         {
130                 maybe_add (XMLValidationError(e));
131         }
132
133         void fatalError(const SAXParseException& e)
134         {
135                 maybe_add (XMLValidationError(e));
136         }
137
138         void resetErrors() {
139                 _errors.clear ();
140         }
141
142         list<XMLValidationError> errors () const {
143                 return _errors;
144         }
145
146 private:
147         void maybe_add (XMLValidationError e)
148         {
149                 /* XXX: nasty hack */
150                 if (
151                         e.message().find("schema document") != string::npos &&
152                         e.message().find("has different target namespace from the one specified in instance document") != string::npos
153                         ) {
154                         return;
155                 }
156
157                 _errors.push_back (e);
158         }
159
160         list<XMLValidationError> _errors;
161 };
162
163 class StringToXMLCh : public boost::noncopyable
164 {
165 public:
166         StringToXMLCh (string a)
167         {
168                 _buffer = XMLString::transcode(a.c_str());
169         }
170
171         ~StringToXMLCh ()
172         {
173                 XMLString::release (&_buffer);
174         }
175
176         XMLCh const * get () const {
177                 return _buffer;
178         }
179
180 private:
181         XMLCh* _buffer;
182 };
183
184 class LocalFileResolver : public EntityResolver
185 {
186 public:
187         LocalFileResolver (boost::filesystem::path xsd_dtd_directory)
188                 : _xsd_dtd_directory (xsd_dtd_directory)
189         {
190                 add("http://www.w3.org/2001/XMLSchema.dtd", "XMLSchema.dtd");
191                 add("http://www.w3.org/2001/03/xml.xsd", "xml.xsd");
192                 add("http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd", "xmldsig-core-schema.xsd");
193                 add("http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd", "Main-Stereo-Picture-CPL.xsd");
194                 add("http://www.digicine.com/PROTO-ASDCP-CPL-20040511.xsd", "PROTO-ASDCP-CPL-20040511.xsd");
195                 add("http://www.digicine.com/PROTO-ASDCP-PKL-20040311.xsd", "PROTO-ASDCP-PKL-20040311.xsd");
196                 add("http://www.digicine.com/PROTO-ASDCP-AM-20040311.xsd", "PROTO-ASDCP-AM-20040311.xsd");
197         }
198
199         InputSource* resolveEntity(XMLCh const *, XMLCh const * system_id)
200         {
201                 string system_id_str = xml_ch_to_string (system_id);
202                 if (_files.find(system_id_str) == _files.end()) {
203                         return 0;
204                 }
205
206                 boost::filesystem::path p = _xsd_dtd_directory / _files[system_id_str];
207                 StringToXMLCh ch (p.string());
208                 return new LocalFileInputSource(ch.get());
209         }
210
211 private:
212         void add (string uri, string file)
213         {
214                 _files[uri] = file;
215         }
216
217         std::map<string, string> _files;
218         boost::filesystem::path _xsd_dtd_directory;
219 };
220
221 static
222 void
223 validate_xml (boost::filesystem::path xml_file, boost::filesystem::path xsd_dtd_directory, list<VerificationNote>& notes)
224 {
225         try {
226                 XMLPlatformUtils::Initialize ();
227         } catch (XMLException& e) {
228                 throw MiscError ("Failed to initialise xerces library");
229         }
230
231         DCPErrorHandler error_handler;
232
233         /* All the xerces objects in this scope must be destroyed before XMLPlatformUtils::Terminate() is called */
234         {
235                 XercesDOMParser parser;
236                 parser.setValidationScheme(XercesDOMParser::Val_Always);
237                 parser.setDoNamespaces(true);
238                 parser.setDoSchema(true);
239
240                 map<string, string> schema;
241                 schema["http://www.w3.org/2000/09/xmldsig#"] = "xmldsig-core-schema.xsd";
242                 schema["http://www.w3.org/TR/2002/REC-xmldsig-core-20020212/xmldsig-core-schema.xsd"] = "xmldsig-core-schema.xsd";
243                 schema["http://www.smpte-ra.org/schemas/429-7/2006/CPL"] = "SMPTE-429-7-2006-CPL.xsd";
244                 schema["http://www.smpte-ra.org/schemas/429-8/2006/PKL"] = "SMPTE-429-8-2006-PKL.xsd";
245                 schema["http://www.smpte-ra.org/schemas/429-9/2007/AM"] = "SMPTE-429-9-2007-AM.xsd";
246                 schema["http://www.digicine.com/schemas/437-Y/2007/Main-Stereo-Picture-CPL.xsd"] = "Main-Stereo-Picture-CPL.xsd";
247                 schema["http://www.digicine.com/PROTO-ASDCP-CPL-20040511#"] = "PROTO-ASDCP-CPL-20040511.xsd";
248                 schema["http://www.digicine.com/PROTO-ASDCP-PKL-20040311#"] = "PROTO-ASDCP-PKL-20040311.xsd";
249                 schema["http://www.digicine.com/PROTO-ASDCP-AM-20040311#"] = "PROTO-ASDCP-AM-20040311.xsd";
250
251                 string locations;
252                 for (map<string, string>::const_iterator i = schema.begin(); i != schema.end(); ++i) {
253                         locations += i->first;
254                         locations += " ";
255                         boost::filesystem::path p = xsd_dtd_directory / i->second;
256                         locations += p.string() + " ";
257                 }
258
259                 parser.setExternalSchemaLocation(locations.c_str());
260                 parser.setValidationSchemaFullChecking(true);
261                 parser.setErrorHandler(&error_handler);
262
263                 LocalFileResolver resolver (xsd_dtd_directory);
264                 parser.setEntityResolver(&resolver);
265
266                 try {
267                         parser.resetDocumentPool();
268                         parser.parse(xml_file.string().c_str());
269                 } catch (XMLException& e) {
270                         throw MiscError(xml_ch_to_string(e.getMessage()));
271                 } catch (DOMException& e) {
272                         throw MiscError(xml_ch_to_string(e.getMessage()));
273                 } catch (...) {
274                         throw MiscError("Unknown exception from xerces");
275                 }
276         }
277
278         XMLPlatformUtils::Terminate ();
279
280         BOOST_FOREACH (XMLValidationError i, error_handler.errors()) {
281                 notes.push_back (
282                         VerificationNote(
283                                 VerificationNote::VERIFY_ERROR,
284                                 VerificationNote::XML_VALIDATION_ERROR,
285                                 i.message(),
286                                 xml_file,
287                                 i.line()
288                                 )
289                         );
290         }
291 }
292
293
294 enum VerifyAssetResult {
295         VERIFY_ASSET_RESULT_GOOD,
296         VERIFY_ASSET_RESULT_CPL_PKL_DIFFER,
297         VERIFY_ASSET_RESULT_BAD
298 };
299
300
301 static VerifyAssetResult
302 verify_asset (shared_ptr<DCP> dcp, shared_ptr<ReelMXF> reel_mxf, function<void (float)> progress)
303 {
304         string const actual_hash = reel_mxf->asset_ref()->hash(progress);
305
306         list<shared_ptr<PKL> > pkls = dcp->pkls();
307         /* We've read this DCP in so it must have at least one PKL */
308         DCP_ASSERT (!pkls.empty());
309
310         shared_ptr<Asset> asset = reel_mxf->asset_ref().asset();
311
312         optional<string> pkl_hash;
313         BOOST_FOREACH (shared_ptr<PKL> i, pkls) {
314                 pkl_hash = i->hash (reel_mxf->asset_ref()->id());
315                 if (pkl_hash) {
316                         break;
317                 }
318         }
319
320         DCP_ASSERT (pkl_hash);
321
322         optional<string> cpl_hash = reel_mxf->hash();
323         if (cpl_hash && *cpl_hash != *pkl_hash) {
324                 return VERIFY_ASSET_RESULT_CPL_PKL_DIFFER;
325         }
326
327         if (actual_hash != *pkl_hash) {
328                 return VERIFY_ASSET_RESULT_BAD;
329         }
330
331         return VERIFY_ASSET_RESULT_GOOD;
332 }
333
334
335 list<VerificationNote>
336 dcp::verify (
337         vector<boost::filesystem::path> directories,
338         function<void (string, optional<boost::filesystem::path>)> stage,
339         function<void (float)> progress,
340         boost::filesystem::path xsd_dtd_directory
341         )
342 {
343         xsd_dtd_directory = boost::filesystem::canonical (xsd_dtd_directory);
344
345         list<VerificationNote> notes;
346
347         list<shared_ptr<DCP> > dcps;
348         BOOST_FOREACH (boost::filesystem::path i, directories) {
349                 dcps.push_back (shared_ptr<DCP> (new DCP (i)));
350         }
351
352         BOOST_FOREACH (shared_ptr<DCP> dcp, dcps) {
353                 stage ("Checking DCP", dcp->directory());
354                 try {
355                         dcp->read (&notes);
356                 } catch (ReadError& e) {
357                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
358                 } catch (XMLError& e) {
359                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::GENERAL_READ, string(e.what())));
360                 }
361
362                 BOOST_FOREACH (shared_ptr<CPL> cpl, dcp->cpls()) {
363                         stage ("Checking CPL", cpl->file());
364                         validate_xml (cpl->file().get(), xsd_dtd_directory, notes);
365
366                         /* Check that the CPL's hash corresponds to the PKL */
367                         BOOST_FOREACH (shared_ptr<PKL> i, dcp->pkls()) {
368                                 optional<string> h = i->hash(cpl->id());
369                                 if (h && make_digest(Data(*cpl->file())) != *h) {
370                                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::CPL_HASH_INCORRECT));
371                                 }
372                         }
373
374                         BOOST_FOREACH (shared_ptr<Reel> reel, cpl->reels()) {
375                                 stage ("Checking reel", optional<boost::filesystem::path>());
376
377                                 BOOST_FOREACH (shared_ptr<ReelAsset> i, reel->assets()) {
378                                         if (i->duration() && (i->duration().get() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
379                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::DURATION_TOO_SMALL, i->id()));
380                                         }
381                                         if ((i->intrinsic_duration() * i->edit_rate().denominator / i->edit_rate().numerator) < 1) {
382                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INTRINSIC_DURATION_TOO_SMALL, i->id()));
383                                         }
384                                 }
385
386                                 if (reel->main_picture()) {
387                                         /* Check reel stuff */
388                                         Fraction const frame_rate = reel->main_picture()->frame_rate();
389                                         if (frame_rate.denominator != 1 ||
390                                             (frame_rate.numerator != 24 &&
391                                              frame_rate.numerator != 25 &&
392                                              frame_rate.numerator != 30 &&
393                                              frame_rate.numerator != 48 &&
394                                              frame_rate.numerator != 50 &&
395                                              frame_rate.numerator != 60 &&
396                                              frame_rate.numerator != 96)) {
397                                                 notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::INVALID_PICTURE_FRAME_RATE));
398                                         }
399                                         /* Check asset */
400                                         if (reel->main_picture()->asset_ref().resolved()) {
401                                                 stage ("Checking picture asset hash", reel->main_picture()->asset()->file());
402                                                 VerifyAssetResult const r = verify_asset (dcp, reel->main_picture(), progress);
403                                                 switch (r) {
404                                                 case VERIFY_ASSET_RESULT_BAD:
405                                                         notes.push_back (
406                                                                 VerificationNote(
407                                                                         VerificationNote::VERIFY_ERROR, VerificationNote::PICTURE_HASH_INCORRECT, *reel->main_picture()->asset()->file()
408                                                                         )
409                                                                 );
410                                                         break;
411                                                 case VERIFY_ASSET_RESULT_CPL_PKL_DIFFER:
412                                                         notes.push_back (
413                                                                 VerificationNote(
414                                                                         VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_PICTURE_HASHES_DISAGREE, *reel->main_picture()->asset()->file()
415                                                                         )
416                                                                 );
417                                                         break;
418                                                 default:
419                                                         break;
420                                                 }
421                                         }
422                                 }
423                                 if (reel->main_sound() && reel->main_sound()->asset_ref().resolved()) {
424                                         stage ("Checking sound asset hash", reel->main_sound()->asset()->file());
425                                         VerifyAssetResult const r = verify_asset (dcp, reel->main_sound(), progress);
426                                         switch (r) {
427                                         case VERIFY_ASSET_RESULT_BAD:
428                                                 notes.push_back (
429                                                         VerificationNote(
430                                                                 VerificationNote::VERIFY_ERROR, VerificationNote::SOUND_HASH_INCORRECT, *reel->main_sound()->asset()->file()
431                                                                 )
432                                                         );
433                                                 break;
434                                         case VERIFY_ASSET_RESULT_CPL_PKL_DIFFER:
435                                                 notes.push_back (
436                                                         VerificationNote(
437                                                                 VerificationNote::VERIFY_ERROR, VerificationNote::PKL_CPL_SOUND_HASHES_DISAGREE, *reel->main_sound()->asset()->file()
438                                                                 )
439                                                         );
440                                                 break;
441                                         default:
442                                                 break;
443                                         }
444                                 }
445                         }
446                 }
447
448                 BOOST_FOREACH (shared_ptr<PKL> pkl, dcp->pkls()) {
449                         stage ("Checking PKL", pkl->file());
450                         validate_xml (pkl->file().get(), xsd_dtd_directory, notes);
451                 }
452
453                 if (dcp->asset_map_path()) {
454                         stage ("Checking ASSETMAP", dcp->asset_map_path().get());
455                         validate_xml (dcp->asset_map_path().get(), xsd_dtd_directory, notes);
456                 } else {
457                         notes.push_back (VerificationNote(VerificationNote::VERIFY_ERROR, VerificationNote::MISSING_ASSETMAP));
458                 }
459         }
460
461         return notes;
462 }
463
464 string
465 dcp::note_to_string (dcp::VerificationNote note)
466 {
467         switch (note.code()) {
468         case dcp::VerificationNote::GENERAL_READ:
469                 return *note.note();
470         case dcp::VerificationNote::CPL_HASH_INCORRECT:
471                 return "The hash of the CPL in the PKL does not agree with the CPL file";
472         case dcp::VerificationNote::INVALID_PICTURE_FRAME_RATE:
473                 return "The picture in a reel has an invalid frame rate";
474         case dcp::VerificationNote::PICTURE_HASH_INCORRECT:
475                 return dcp::String::compose("The hash of the picture asset %1 does not agree with the PKL file", note.file()->filename());
476         case dcp::VerificationNote::PKL_CPL_PICTURE_HASHES_DISAGREE:
477                 return dcp::String::compose("The PKL and CPL hashes disagree for the picture asset %1", note.file()->filename());
478         case dcp::VerificationNote::SOUND_HASH_INCORRECT:
479                 return dcp::String::compose("The hash of the sound asset %1 does not agree with the PKL file", note.file()->filename());
480         case dcp::VerificationNote::PKL_CPL_SOUND_HASHES_DISAGREE:
481                 return dcp::String::compose("The PKL and CPL hashes disagree for the sound asset %1", note.file()->filename());
482         case dcp::VerificationNote::EMPTY_ASSET_PATH:
483                 return "The asset map contains an empty asset path.";
484         case dcp::VerificationNote::MISSING_ASSET:
485                 return String::compose("The file for an asset in the asset map cannot be found; missing file is %1.", note.file()->filename());
486         case dcp::VerificationNote::MISMATCHED_STANDARD:
487                 return "The DCP contains both SMPTE and Interop parts.";
488         case dcp::VerificationNote::XML_VALIDATION_ERROR:
489                 return String::compose("An XML file is badly formed: %1 (%2:%3)", note.note().get(), note.file()->filename(), note.line().get());
490         case dcp::VerificationNote::MISSING_ASSETMAP:
491                 return "No ASSETMAP or ASSETMAP.xml was found";
492         case dcp::VerificationNote::INTRINSIC_DURATION_TOO_SMALL:
493                 return String::compose("The intrinsic duration of an asset is less than 1 second long: %1", note.note().get());
494         case dcp::VerificationNote::DURATION_TOO_SMALL:
495                 return String::compose("The duration of an asset is less than 1 second long: %1", note.note().get());
496         }
497
498         return "";
499 }