Rudimentary SSA parser.
authorCarl Hetherington <cth@carlh.net>
Thu, 11 Feb 2016 14:57:02 +0000 (14:57 +0000)
committerCarl Hetherington <cth@carlh.net>
Thu, 11 Feb 2016 14:57:02 +0000 (14:57 +0000)
src/font_size.cc
src/font_size.h
src/ssa_reader.cc [new file with mode: 0644]
src/ssa_reader.h [new file with mode: 0644]
src/wscript
test/ssa_reader_test.cc [new file with mode: 0644]
test/wscript

index d11ee89e4f979acefcd2f325db06963dbd7e556b..f3e6c8982f51e83aa82d8d0341f3f4348e9c6065 100644 (file)
@@ -40,3 +40,11 @@ FontSize::points (int screen_height_in_points) const
 
        return _proportional.get() * screen_height_in_points;
 }
+
+FontSize
+FontSize::from_points (int p)
+{
+       FontSize s;
+       s.set_points (p);
+       return s;
+}
index 6439c8f7d114a44297491062ff672e56ecb00ceb..62bbd09854051c1dc91a192ed1449560fc209d80 100644 (file)
@@ -52,6 +52,8 @@ public:
        float proportional (int screen_height_in_points) const;
        int points (int screen_height_in_points) const;
 
+       static FontSize from_points (int p);
+
 private:
        /** as a proportion of screen height */
        boost::optional<float> _proportional;
diff --git a/src/ssa_reader.cc b/src/ssa_reader.cc
new file mode 100644 (file)
index 0000000..f3180a7
--- /dev/null
@@ -0,0 +1,236 @@
+/*
+    Copyright (C) 2016 Carl Hetherington <cth@carlh.net>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+#include "ssa_reader.h"
+#include "util.h"
+#include "sub_assert.h"
+#include "raw_convert.h"
+#include "subtitle.h"
+#include <boost/algorithm/string.hpp>
+#include <boost/bind.hpp>
+#include <boost/foreach.hpp>
+#include <sstream>
+#include <vector>
+
+using std::string;
+using std::stringstream;
+using std::vector;
+using std::map;
+using std::cout;
+using boost::optional;
+using boost::function;
+using namespace boost::algorithm;
+using namespace sub;
+
+/** @param s Subtitle string encoded in UTF-8 */
+SSAReader::SSAReader (string const & s)
+{
+       stringstream str (s);
+       this->read (boost::bind (&get_line_stringstream, &str));
+}
+
+/** @param f Subtitle file encoded in UTF-8 */
+SSAReader::SSAReader (FILE* f)
+{
+       this->read (boost::bind (&get_line_file, f));
+}
+
+class Style
+{
+public:
+       Style ()
+               : font_size (24)
+               , primary_colour (255, 255, 255)
+               , bold (false)
+               , italic (false)
+       {}
+
+       Style (string format_line, string style_line)
+               : font_size (24)
+               , primary_colour (255, 255, 255)
+               , bold (false)
+               , italic (false)
+       {
+               vector<string> keys;
+               split (keys, format_line, is_any_of (","));
+               vector<string> style;
+               split (style, style_line, is_any_of (","));
+
+               SUB_ASSERT (!keys.empty());
+               SUB_ASSERT (!style.empty());
+               SUB_ASSERT (keys.size() == style.size());
+
+               for (size_t i = 0; i < style.size(); ++i) {
+                       trim (keys[i]);
+                       trim (style[i]);
+                       if (keys[i] == "Name") {
+                               name = style[i];
+                       } else if (keys[i] == "Fontname") {
+                               font_name = style[i];
+                       } else if (keys[i] == "Fontsize") {
+                               font_size = raw_convert<int> (style[i]);
+                       } else if (keys[i] == "PrimaryColour") {
+                               primary_colour = colour (raw_convert<int> (style[i]));
+                       } else if (keys[i] == "BackColour") {
+                               back_colour = colour (raw_convert<int> (style[i]));
+                       } else if (keys[i] == "Bold") {
+                               bold = style[i] == "-1";
+                       } else if (keys[i] == "Italic") {
+                               italic = style[i] == "-1";
+                       } else if (keys[i] == "BorderStyle") {
+                               if (style[i] == "1") {
+                                       effect = SHADOW;
+                               }
+                       }
+               }
+       }
+
+       string name;
+       optional<string> font_name;
+       int font_size;
+       Colour primary_colour;
+       /** outline colour */
+       optional<Colour> back_colour;
+       bool bold;
+       bool italic;
+       optional<Effect> effect;
+
+private:
+       Colour colour (int c) const
+       {
+               return Colour (
+                       ((c & 0x0000ff) >>  0) / 255.0,
+                       ((c & 0x00ff00) >>  8) / 255.0,
+                       ((c & 0xff0000) >> 16) / 255.0
+                       );
+       }
+};
+
+Time
+SSAReader::parse_time (string t) const
+{
+       vector<string> bits;
+       split (bits, t, is_any_of (":."));
+       SUB_ASSERT (bits.size() == 4);
+       return Time::from_hms (
+               raw_convert<int> (bits[0]),
+               raw_convert<int> (bits[1]),
+               raw_convert<int> (bits[2]),
+               raw_convert<int> (bits[3]) * 10
+               );
+}
+
+void
+SSAReader::read (function<optional<string> ()> get_line)
+{
+       enum {
+               INFO,
+               STYLES,
+               EVENTS
+       } part = INFO;
+
+       map<string, Style> styles;
+       string style_format_line;
+       vector<string> event_format;
+
+       while (true) {
+               optional<string> line = get_line ();
+               if (!line) {
+                       break;
+               }
+
+               trim (*line);
+
+               if (starts_with (*line, ";") || line->empty ()) {
+                       continue;
+               }
+
+               if (starts_with (*line, "[")) {
+                       /* Section heading */
+                       if (line.get() == "[Script Info]") {
+                               part = INFO;
+                       } else if (line.get() == "[V4 Styles]") {
+                               part = STYLES;
+                       } else if (line.get() == "[Events]") {
+                               part = EVENTS;
+                       }
+                       continue;
+               }
+
+               size_t const colon = line->find (":");
+               SUB_ASSERT (colon != string::npos);
+               SUB_ASSERT (line->length() > colon + 1);
+               string const type = line->substr (0, colon);
+               string const body = line->substr (colon + 2);
+
+               switch (part) {
+               case INFO:
+                       break;
+               case STYLES:
+                       if (type == "Format") {
+                               style_format_line = body;
+                       } else if (type == "Style") {
+                               SUB_ASSERT (!style_format_line.empty ());
+                               Style s (style_format_line, body);
+                               styles[s.name] = s;
+                       }
+                       break;
+               case EVENTS:
+                       if (type == "Format") {
+                               split (event_format, body, is_any_of (","));
+                               BOOST_FOREACH (string& i, event_format) {
+                                       trim (i);
+                               }
+                       } else if (type == "Dialogue") {
+                               SUB_ASSERT (!event_format.empty ());
+                               vector<string> event;
+                               split (event, body, is_any_of (","));
+
+                               SUB_ASSERT (!event.empty());
+                               SUB_ASSERT (event_format.size() == event.size());
+
+                               RawSubtitle sub;
+
+                               for (size_t i = 0; i < event.size(); ++i) {
+                                       trim (event[i]);
+                                       if (event_format[i] == "Start") {
+                                               sub.from = parse_time (event[i]);
+                                       } else if (event_format[i] == "End") {
+                                               sub.to = parse_time (event[i]);
+                                       } else if (event_format[i] == "Style") {
+                                               SUB_ASSERT (styles.find(event[i]) != styles.end());
+                                               Style style = styles[event[i]];
+                                               sub.font = style.font_name;
+                                               sub.font_size = FontSize::from_points (style.font_size);
+                                               sub.colour = style.primary_colour;
+                                               sub.effect_colour = style.back_colour;
+                                               sub.bold = style.bold;
+                                               sub.italic = style.italic;
+                                               sub.effect = style.effect;
+                                       } else if (event_format[i] == "Text") {
+                                               sub.text = event[i];
+                                       }
+                               }
+
+                               _subs.push_back (sub);
+                       }
+               }
+
+       }
+}
diff --git a/src/ssa_reader.h b/src/ssa_reader.h
new file mode 100644 (file)
index 0000000..0f575ff
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+    Copyright (C) 2016 Carl Hetherington <cth@carlh.net>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+/** @file  src/ssa_reader.h
+ *  @brief SSAReader class.
+ */
+
+#ifndef LIBSUB_SSA_READER_H
+#define LIBSUB_SSA_READER_H
+
+#include "reader.h"
+#include <boost/function.hpp>
+
+namespace sub {
+
+class SSAReader : public Reader
+{
+public:
+       SSAReader (FILE* f);
+       SSAReader (std::string const & subs);
+
+private:
+       void read (boost::function<boost::optional<std::string> ()> get_line);
+       Time parse_time (std::string t) const;
+};
+
+}
+
+#endif
index e881a7889ecd12084d4aab6074e019b3f1318975..9136ea1324149c46f886d577ddecb63718ed2463 100644 (file)
@@ -25,6 +25,7 @@ def build(bld):
                  reader.cc
                  reader_factory.cc
                  smpte_dcp_reader.cc
+                 ssa_reader.cc
                  stl_binary_reader.cc
                  stl_binary_tables.cc
                  stl_binary_writer.cc
@@ -54,6 +55,7 @@ def build(bld):
               raw_subtitle.h
               reader.h
               smpte_dcp_reader.h
+              ssa_reader.h
               stl_binary_tables.h
               stl_binary_reader.h
               stl_binary_writer.h
diff --git a/test/ssa_reader_test.cc b/test/ssa_reader_test.cc
new file mode 100644 (file)
index 0000000..c7594d7
--- /dev/null
@@ -0,0 +1,71 @@
+/*
+    Copyright (C) 2016 Carl Hetherington <cth@carlh.net>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+#include "test.h"
+#include "ssa_reader.h"
+#include "collect.h"
+#include "subtitle.h"
+#include <boost/test/unit_test.hpp>
+#include <boost/filesystem.hpp>
+#include <cstdio>
+
+using std::list;
+
+BOOST_AUTO_TEST_CASE (ssa_reader_test)
+{
+       boost::filesystem::path p = private_test / "example.ssa";
+       FILE* f = fopen (p.string().c_str(), "r");
+       sub::SSAReader reader (f);
+       fclose (f);
+       list<sub::Subtitle> subs = sub::collect<std::list<sub::Subtitle> > (reader.subtitles ());
+
+       list<sub::Subtitle>::iterator i = subs.begin ();
+
+       BOOST_REQUIRE (i != subs.end ());
+       BOOST_CHECK_EQUAL (i->from, sub::Time::from_hms (0, 2, 40, 650));
+       BOOST_CHECK_EQUAL (i->to, sub::Time::from_hms (0, 2, 41, 790));
+       list<sub::Line>::iterator j = i->lines.begin();
+       BOOST_REQUIRE (j != i->lines.end ());
+       BOOST_REQUIRE_EQUAL (j->blocks.size(), 1);
+       sub::Block b = j->blocks.front ();
+       BOOST_CHECK_EQUAL (b.text, "Et les enregistrements de ses ondes delta ?");
+       BOOST_CHECK_EQUAL (b.font.get(), "Wolf_Rain");
+       BOOST_CHECK_EQUAL (b.font_size.points().get(), 56);
+       BOOST_CHECK_EQUAL (b.bold, false);
+       BOOST_CHECK_EQUAL (b.italic, false);
+       BOOST_CHECK_EQUAL (b.underline, false);
+       ++i;
+
+       BOOST_REQUIRE (i != subs.end ());
+       BOOST_CHECK_EQUAL (i->from, sub::Time::from_hms (0, 2, 42, 420));
+       BOOST_CHECK_EQUAL (i->to, sub::Time::from_hms (0, 2, 44, 150));
+       j = i->lines.begin();
+       BOOST_REQUIRE (j != i->lines.end ());
+       BOOST_REQUIRE_EQUAL (j->blocks.size(), 1);
+       b = j->blocks.front ();
+       BOOST_CHECK_EQUAL (b.text, "Toujours rien.");
+       BOOST_CHECK_EQUAL (b.font.get(), "Wolf_Rain");
+       BOOST_CHECK_EQUAL (b.font_size.points().get(), 56);
+       BOOST_CHECK_EQUAL (b.bold, false);
+       BOOST_CHECK_EQUAL (b.italic, false);
+       BOOST_CHECK_EQUAL (b.underline, false);
+       ++i;
+
+       BOOST_CHECK (i == subs.end());
+}
index 94965ec2a099146ac3156dd2df0293bdcff2a103..b7d91cb0dd7a61aacbffa45350d0bd8bbcdf7823 100644 (file)
@@ -23,6 +23,7 @@ def build(bld):
                  dcp_reader_test.cc
                  dcp_to_stl_binary_test.cc
                  iso6937_test.cc
+                 ssa_reader_test.cc
                  stl_binary_reader_test.cc
                  stl_binary_writer_test.cc
                  stl_text_reader_test.cc