src/stl_binary_reader.cc

   1 /*
   2     Copyright (C) 2014 Carl Hetherington <cth@carlh.net>
   3
   4     This program is free software; you can redistribute it and/or modify
   5     it under the terms of the GNU General Public License as published by
   6     the Free Software Foundation; either version 2 of the License, or
   7     (at your option) any later version.
   8
   9     This program is distributed in the hope that it will be useful,
  10     but WITHOUT ANY WARRANTY; without even the implied warranty of
  11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12     GNU General Public License for more details.
  13
  14     You should have received a copy of the GNU General Public License
  15     along with this program; if not, write to the Free Software
  16     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  17
  18 */
  19
  20 #include <boost/lexical_cast.hpp>
  21 #include <boost/algorithm/string.hpp>
  22 #include <boost/locale.hpp>
  23 #include "stl_binary_reader.h"
  24 #include "exceptions.h"
  25 #include "iso6937.h"
  26 #include "compose.hpp"
  27
  28 using std::map;
  29 using std::vector;
  30 using std::cout;
  31 using std::string;
  32 using std::istream;
  33 using boost::lexical_cast;
  34 using boost::algorithm::replace_all;
  35 using boost::is_any_of;
  36 using boost::locale::conv::utf_to_utf;
  37 using namespace sub;
  38
  39 STLBinaryReader::STLBinaryReader (istream& in)
  40         : _buffer (new unsigned char[1024])
  41 {
  42         in.read ((char *) _buffer, 1024);
  43         if (in.gcount() != 1024) {
  44                 throw STLError ("Could not read GSI block from binary STL file");
  45         }
  46
  47         code_page_number = atoi (get_string (0, 3).c_str ());
  48
  49         string const dfc = get_string (3, 8);
  50         if (dfc == "STL24.01") {
  51                 frame_rate = 24;
  52         } else if (dfc == "STL25.01") {
  53                 frame_rate = 25;
  54         } else if (dfc == "STL30.01") {
  55                 frame_rate = 30;
  56         } else {
  57                 throw STLError (String::compose ("Unknown disk format code %1 in binary STL file", dfc));
  58         }
  59
  60         display_standard = _tables.display_standard_file_to_enum (get_string (11, 1));
  61         language_group = _tables.language_group_file_to_enum (get_string (12, 2));
  62         language = _tables.language_file_to_enum (get_string (14, 2));
  63         original_programme_title = get_string (16, 32);
  64         original_episode_title = get_string (48, 32);
  65         translated_programme_title = get_string (80, 32);
  66         translated_episode_title = get_string (112, 32);
  67         translator_name = get_string (144, 32);
  68         translator_contact_details = get_string (176, 32);
  69         subtitle_list_reference_code = get_string (208, 32);
  70         creation_date = get_string (224, 6);
  71         revision_date = get_string (230, 6);
  72         revision_number = get_string (236, 2);
  73
  74         tti_blocks = atoi (get_string (238, 5).c_str ());
  75         number_of_subtitles = atoi (get_string (243, 5).c_str ());
  76         subtitle_groups = atoi (get_string (248, 3).c_str ());
  77         maximum_characters = atoi (get_string (251, 2).c_str ());
  78         maximum_rows = atoi (get_string (253, 2).c_str ());
  79         timecode_status = _tables.timecode_status_file_to_enum (get_string (255, 1));
  80         start_of_programme = get_string (256, 8);
  81         first_in_cue = get_string (256, 8);
  82         disks = atoi (get_string (272, 1).c_str ());
  83         disk_sequence_number = atoi (get_string (273, 1).c_str ());
  84         country_of_origin = get_string (274, 3);
  85         publisher = get_string (277, 32);
  86         editor_name = get_string (309, 32);
  87         editor_contact_details = get_string (341, 32);
  88
  89         for (int i = 0; i < tti_blocks; ++i) {
  90
  91                 in.read ((char *) _buffer, 128);
  92                 if (in.gcount() != 128) {
  93                         throw STLError ("Could not read TTI block from binary STL file");
  94                 }
  95
  96                 if (_tables.comment_file_to_enum (get_int (15, 1)) == COMMENT_YES) {
  97                         continue;
  98                 }
  99
 100                 string const whole = get_string (16, 112);
 101
 102                 /* Split the text up into lines (8Ah is a new line) */
 103                 vector<string> lines;
 104                 split (lines, whole, is_any_of ("\x8a"));
 105
 106                 for (size_t i = 0; i < lines.size(); ++i) {
 107                         RawSubtitle sub;
 108                         sub.from.set_frame (get_timecode (5));
 109                         sub.to.set_frame (get_timecode (9));
 110                         sub.vertical_position.line = get_int (13, 1) + i;
 111
 112                         /* XXX: justification, effects */
 113
 114                         /* 8Fh is unused space, so trim the string to the first instance of that */
 115                         size_t unused = lines[i].find_first_of ('\x8f');
 116                         if (unused != string::npos) {
 117                                 lines[i] = lines[i].substr (0, unused);
 118                         }
 119
 120                         sub.text = utf_to_utf<char> (iso6937_to_utf16 (lines[i].c_str()));
 121                         _subs.push_back (sub);
 122                 }
 123         }
 124 }
 125
 126 STLBinaryReader::~STLBinaryReader ()
 127 {
 128         delete[] _buffer;
 129 }
 130
 131 string
 132 STLBinaryReader::get_string (int offset, int length) const
 133 {
 134         string s;
 135         for (int i = 0; i < length; ++i) {
 136                 s += _buffer[offset + i];
 137         }
 138
 139         return s;
 140 }
 141
 142 int
 143 STLBinaryReader::get_int (int offset, int length) const
 144 {
 145         int v = 0;
 146         for (int i = 0; i < length; ++i) {
 147                 v |= _buffer[offset + i] << (8 * i);
 148         }
 149
 150         return v;
 151 }
 152
 153 FrameTime
 154 STLBinaryReader::get_timecode (int offset) const
 155 {
 156         return FrameTime (_buffer[offset], _buffer[offset + 1], _buffer[offset + 2], _buffer[offset + 3]);
 157 }
 158
 159 map<string, string>
 160 STLBinaryReader::metadata () const
 161 {
 162         map<string, string> m;
 163
 164         m["Code page number"] = lexical_cast<string> (code_page_number);
 165         m["Frame rate"] = lexical_cast<string> (frame_rate);
 166         m["Display standard"] = _tables.display_standard_enum_to_description (display_standard);
 167         m["Language group"] = _tables.language_group_enum_to_description (language_group);
 168         m["Language"] = _tables.language_enum_to_description (language);
 169         m["Original programme title"] = original_programme_title;
 170         m["Original episode title"] = original_episode_title;
 171         m["Translated programme title"] = translated_programme_title;
 172         m["Translated episode title"] = translated_episode_title;
 173         m["Translator name"] = translator_name;
 174         m["Translator contact details"] = translator_contact_details;
 175         m["Subtitle list reference code"] = subtitle_list_reference_code;
 176         m["Creation date"] = creation_date;
 177         m["Revision date"] = revision_date;
 178         m["Revision number"] = revision_number;
 179         m["TTI blocks"] = lexical_cast<string> (tti_blocks);
 180         m["Number of subtitles"] = lexical_cast<string> (number_of_subtitles);
 181         m["Subtitle groups"] = lexical_cast<string> (subtitle_groups);
 182         m["Maximum characters"] = lexical_cast<string> (maximum_characters);
 183         m["Maximum rows"] = lexical_cast<string> (maximum_rows);
 184         m["Timecode status"] = _tables.timecode_status_enum_to_description (timecode_status);
 185         m["Start of programme"] = start_of_programme;
 186         m["First in cue"] = first_in_cue;
 187         m["Disks"] = lexical_cast<string> (disks);
 188         m["Disk sequence number"] = lexical_cast<string> (disk_sequence_number);
 189         m["Country of origin"] = country_of_origin;
 190         m["Publisher"] = publisher;
 191         m["Editor name"] = editor_name;
 192         m["Editor contact details"] = editor_contact_details;
 193
 194         return m;
 195 }