src/stl_binary_reader.cc

   1 /*
   2     Copyright (C) 2014-2020 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 #include "stl_binary_reader.h"
  21 #include "exceptions.h"
  22 #include "iso6937.h"
  23 #include "stl_util.h"
  24 #include "compose.hpp"
  25 #include <boost/lexical_cast.hpp>
  26 #include <boost/algorithm/string.hpp>
  27 #include <boost/locale.hpp>
  28 #include <iostream>
  29
  30 using std::map;
  31 using std::vector;
  32 using std::cout;
  33 using std::string;
  34 using std::istream;
  35 using boost::lexical_cast;
  36 using boost::algorithm::replace_all;
  37 using boost::is_any_of;
  38 using boost::locale::conv::utf_to_utf;
  39 using boost::shared_ptr;
  40 using namespace sub;
  41
  42 namespace sub {
  43
  44 class InputReader : public boost::noncopyable
  45 {
  46 public:
  47         InputReader ()
  48                 : _buffer (new unsigned char[1024])
  49         {
  50
  51         }
  52
  53         virtual ~InputReader ()
  54         {
  55                 delete[] _buffer;
  56         }
  57
  58         virtual void read (int size, string what) = 0;
  59
  60         string get_string (int offset, int length) const
  61         {
  62                 string s;
  63                 for (int i = 0; i < length; ++i) {
  64                         s += _buffer[offset + i];
  65                 }
  66
  67                 return s;
  68         }
  69
  70         int get_int (int offset, int length) const
  71         {
  72                 int v = 0;
  73                 for (int i = 0; i < length; ++i) {
  74                         v |= _buffer[offset + i] << (8 * i);
  75                 }
  76
  77                 return v;
  78         }
  79
  80         Time get_timecode (int offset, int frame_rate) const
  81         {
  82                 return Time::from_hmsf (_buffer[offset], _buffer[offset + 1], _buffer[offset + 2], _buffer[offset + 3], Rational (frame_rate, 1));
  83         }
  84
  85 protected:
  86         unsigned char* _buffer;
  87 };
  88
  89
  90 class StreamInputReader : public InputReader
  91 {
  92 public:
  93         StreamInputReader (istream& in)
  94                 : _in (in)
  95         {
  96
  97         }
  98
  99         void read (int size, string what)
 100         {
 101                 _in.read (reinterpret_cast<char *>(_buffer), size);
 102                 if (_in.gcount() != size) {
 103                         throw STLError (String::compose("Could not read %1 block from binary STL file", what));
 104                 }
 105         }
 106
 107 private:
 108         std::istream& _in;
 109 };
 110
 111 class FILEInputReader : public InputReader
 112 {
 113 public:
 114         FILEInputReader (FILE* in)
 115                 : _in (in)
 116         {
 117
 118         }
 119
 120         void read (int size, string what)
 121         {
 122                 size_t const N = fread (_buffer, 1, size, _in);
 123                 if (static_cast<int>(N) != size) {
 124                         throw STLError (String::compose("Could not read %1 block from binary STL file", what));
 125                 }
 126         }
 127
 128 private:
 129         FILE* _in;
 130 };
 131
 132 }
 133
 134 STLBinaryReader::STLBinaryReader (istream& in)
 135 {
 136         read (shared_ptr<InputReader>(new StreamInputReader(in)));
 137 }
 138
 139 STLBinaryReader::STLBinaryReader (FILE* in)
 140 {
 141         read (shared_ptr<InputReader>(new FILEInputReader(in)));
 142 }
 143
 144 void STLBinaryReader::read (shared_ptr<InputReader> reader)
 145 {
 146         reader->read (1024, "GSI");
 147
 148         code_page_number = atoi (reader->get_string(0, 3).c_str());
 149         frame_rate = stl_dfc_to_frame_rate (reader->get_string(3, 8));
 150         display_standard = _tables.display_standard_file_to_enum (reader->get_string(11, 1));
 151         language_group = _tables.language_group_file_to_enum (reader->get_string(12, 2));
 152         language = _tables.language_file_to_enum (reader->get_string(14, 2));
 153         original_programme_title = reader->get_string(16, 32);
 154         original_episode_title = reader->get_string(48, 32);
 155         translated_programme_title = reader->get_string(80, 32);
 156         translated_episode_title = reader->get_string(112, 32);
 157         translator_name = reader->get_string(144, 32);
 158         translator_contact_details = reader->get_string(176, 32);
 159         subtitle_list_reference_code = reader->get_string(208, 16);
 160         creation_date = reader->get_string(224, 6);
 161         revision_date = reader->get_string(230, 6);
 162         revision_number = reader->get_string(236, 2);
 163
 164         tti_blocks = atoi (reader->get_string(238, 5).c_str());
 165         number_of_subtitles = atoi (reader->get_string(243, 5).c_str());
 166         subtitle_groups = atoi (reader->get_string(248, 3).c_str());
 167         maximum_characters = atoi (reader->get_string(251, 2).c_str());
 168         maximum_rows = atoi (reader->get_string(253, 2).c_str());
 169
 170         if (maximum_rows == 99) {
 171                 /* https://tech.ebu.ch/docs/tech/tech3360.pdf says
 172                    "It is recommended that for files with a large MNR value (e.g. '99') the
 173                    font size (height) should be defined as ~ 1/15 of the 'Subtitle Safe Area'
 174                    and a lineHeight of 120% is used to achieve a row height of ~ 1/12 of the height
 175                    of the 'Subtitle Safe Area'.
 176                 */
 177                 maximum_rows = 12;
 178         }
 179
 180         timecode_status = _tables.timecode_status_file_to_enum (reader->get_string(255, 1));
 181         start_of_programme = reader->get_string(256, 8);
 182         first_in_cue = reader->get_string(264, 8);
 183         disks = atoi (reader->get_string(272, 1).c_str());
 184         disk_sequence_number = atoi (reader->get_string(273, 1).c_str());
 185         country_of_origin = reader->get_string(274, 3);
 186         publisher = reader->get_string(277, 32);
 187         editor_name = reader->get_string(309, 32);
 188         editor_contact_details = reader->get_string(341, 32);
 189
 190         for (int i = 0; i < tti_blocks; ++i) {
 191
 192                 reader->read (128, "TTI");
 193
 194                 if (_tables.comment_file_to_enum (reader->get_int(15, 1)) == COMMENT_YES) {
 195                         continue;
 196                 }
 197
 198                 string const whole = reader->get_string(16, 112);
 199
 200                 /* Split the text up into lines (8Ah is a new line) */
 201                 vector<string> lines;
 202                 split (lines, whole, is_any_of ("\x8a"));
 203
 204                 /* Italic / underline specifications can span lines, so we need to track them
 205                    outside the lines loop.
 206                 */
 207                 bool italic = false;
 208                 bool underline = false;
 209
 210                 for (size_t i = 0; i < lines.size(); ++i) {
 211                         RawSubtitle sub;
 212                         sub.from = reader->get_timecode(5, frame_rate);
 213                         sub.to = reader->get_timecode(9, frame_rate);
 214                         /* XXX: only the verticial position of the first TTI block should be used (says the spec),
 215                            so using reader->get_int(13, 1) here is wrong if i > 0
 216                          */
 217                         sub.vertical_position.line = reader->get_int(13, 1) + i;
 218                         sub.vertical_position.lines = maximum_rows;
 219                         sub.vertical_position.reference = TOP_OF_SCREEN;
 220                         sub.italic = italic;
 221                         sub.underline = underline;
 222
 223                         /* XXX: not sure what to do with JC = 0, "unchanged presentation" */
 224                         int const h = reader->get_int(14, 1);
 225                         switch (h) {
 226                         case 0:
 227                         case 2:
 228                                 sub.horizontal_position.reference = HORIZONTAL_CENTRE_OF_SCREEN;
 229                                 break;
 230                         case 1:
 231                                 sub.horizontal_position.reference = LEFT_OF_SCREEN;
 232                                 break;
 233                         case 3:
 234                                 sub.horizontal_position.reference = RIGHT_OF_SCREEN;
 235                                 break;
 236                         }
 237
 238                         /* Loop over characters */
 239                         string text;
 240                         for (size_t j = 0; j < lines[i].size(); ++j) {
 241
 242                                 unsigned char const c = static_cast<unsigned char> (lines[i][j]);
 243
 244                                 if (c == 0x8f) {
 245                                         /* Unused space i.e. end of line */
 246                                         break;
 247                                 }
 248
 249                                 if (c >= 0x80 && c <= 0x83) {
 250                                         /* Italic or underline control code */
 251                                         sub.text = utf_to_utf<char> (iso6937_to_utf16 (text.c_str()));
 252                                         _subs.push_back (sub);
 253                                         text.clear ();
 254                                 }
 255
 256                                 switch (c) {
 257                                 case 0x80:
 258                                         italic = true;
 259                                         break;
 260                                 case 0x81:
 261                                         italic = false;
 262                                         break;
 263                                 case 0x82:
 264                                         underline = true;
 265                                         break;
 266                                 case 0x83:
 267                                         underline = false;
 268                                         break;
 269                                 default:
 270                                         text += lines[i][j];
 271                                         break;
 272                                 }
 273
 274                                 sub.italic = italic;
 275                                 sub.underline = underline;
 276                         }
 277
 278                         if (!text.empty ()) {
 279                                 sub.text = utf_to_utf<char> (iso6937_to_utf16 (text.c_str()));
 280                                 _subs.push_back (sub);
 281                         }
 282
 283                         /* XXX: justification */
 284                 }
 285         }
 286 }
 287
 288 map<string, string>
 289 STLBinaryReader::metadata () const
 290 {
 291         map<string, string> m;
 292
 293         m["Code page number"] = lexical_cast<string> (code_page_number);
 294         m["Frame rate"] = lexical_cast<string> (frame_rate);
 295         m["Display standard"] = _tables.display_standard_enum_to_description (display_standard);
 296         m["Language group"] = _tables.language_group_enum_to_description (language_group);
 297         m["Language"] = _tables.language_enum_to_description (language);
 298         m["Original programme title"] = original_programme_title;
 299         m["Original episode title"] = original_episode_title;
 300         m["Translated programme title"] = translated_programme_title;
 301         m["Translated episode title"] = translated_episode_title;
 302         m["Translator name"] = translator_name;
 303         m["Translator contact details"] = translator_contact_details;
 304         m["Subtitle list reference code"] = subtitle_list_reference_code;
 305         m["Creation date"] = creation_date;
 306         m["Revision date"] = revision_date;
 307         m["Revision number"] = revision_number;
 308         m["TTI blocks"] = lexical_cast<string> (tti_blocks);
 309         m["Number of subtitles"] = lexical_cast<string> (number_of_subtitles);
 310         m["Subtitle groups"] = lexical_cast<string> (subtitle_groups);
 311         m["Maximum characters"] = lexical_cast<string> (maximum_characters);
 312         m["Maximum rows"] = lexical_cast<string> (maximum_rows);
 313         m["Timecode status"] = _tables.timecode_status_enum_to_description (timecode_status);
 314         m["Start of programme"] = start_of_programme;
 315         m["First in cue"] = first_in_cue;
 316         m["Disks"] = lexical_cast<string> (disks);
 317         m["Disk sequence number"] = lexical_cast<string> (disk_sequence_number);
 318         m["Country of origin"] = country_of_origin;
 319         m["Publisher"] = publisher;
 320         m["Editor name"] = editor_name;
 321         m["Editor contact details"] = editor_contact_details;
 322
 323         return m;
 324 }