Strip Unicode U+202B (right-to-left-embedding) code; it looks like DoM does RTL ...
[libsub.git] / src / ssa_reader.cc
index c1614a8ce5f15d07b1d0b5716eff58776b31df2a..471931fe2056141e75a23c1bd0c86112c05c62d9 100644 (file)
 #include "sub_assert.h"
 #include "raw_convert.h"
 #include "subtitle.h"
+#include <locked_sstream.h>
 #include <boost/algorithm/string.hpp>
 #include <boost/bind.hpp>
 #include <boost/foreach.hpp>
-#include <sstream>
 #include <iostream>
 #include <vector>
 
 using std::string;
-using std::stringstream;
 using std::vector;
 using std::map;
 using std::cout;
@@ -43,7 +42,7 @@ using namespace sub;
 /** @param s Subtitle string encoded in UTF-8 */
 SSAReader::SSAReader (string const & s)
 {
-       stringstream str (s);
+       locked_stringstream str (s);
        this->read (boost::bind (&get_line_stringstream, &str));
 }
 
@@ -61,6 +60,8 @@ public:
                , primary_colour (255, 255, 255)
                , bold (false)
                , italic (false)
+               , underline (false)
+               , horizontal_reference (HORIZONTAL_CENTRE_OF_SCREEN)
                , vertical_reference (BOTTOM_OF_SCREEN)
                , vertical_margin (0)
        {}
@@ -70,13 +71,15 @@ public:
                , primary_colour (255, 255, 255)
                , bold (false)
                , italic (false)
+               , underline (false)
+               , horizontal_reference (HORIZONTAL_CENTRE_OF_SCREEN)
                , vertical_reference (BOTTOM_OF_SCREEN)
                , vertical_margin (0)
        {
                vector<string> keys;
-               split (keys, format_line, is_any_of (","));
+               split (keys, format_line, boost::is_any_of (","));
                vector<string> style;
-               split (style, style_line, is_any_of (","));
+               split (style, style_line, boost::is_any_of (","));
 
                SUB_ASSERT (!keys.empty());
                SUB_ASSERT (!style.empty());
@@ -99,18 +102,31 @@ public:
                                bold = style[i] == "-1";
                        } else if (keys[i] == "Italic") {
                                italic = style[i] == "-1";
+                       } else if (keys[i] == "Underline") {
+                               underline = style[i] == "-1";
                        } else if (keys[i] == "BorderStyle") {
                                if (style[i] == "1") {
                                        effect = SHADOW;
                                }
                        } else if (keys[i] == "Alignment") {
                                /* These values from libass' source code */
+                               switch ((raw_convert<int> (style[i]) - 1) % 3) {
+                               case 0:
+                                       horizontal_reference = LEFT_OF_SCREEN;
+                                       break;
+                               case 1:
+                                       horizontal_reference = HORIZONTAL_CENTRE_OF_SCREEN;
+                                       break;
+                               case 2:
+                                       horizontal_reference = RIGHT_OF_SCREEN;
+                                       break;
+                               }
                                switch (raw_convert<int> (style[i]) & 12) {
                                case 4:
                                        vertical_reference = TOP_OF_SCREEN;
                                        break;
                                case 8:
-                                       vertical_reference = CENTRE_OF_SCREEN;
+                                       vertical_reference = VERTICAL_CENTRE_OF_SCREEN;
                                        break;
                                case 0:
                                        vertical_reference = BOTTOM_OF_SCREEN;
@@ -130,7 +146,9 @@ public:
        optional<Colour> back_colour;
        bool bold;
        bool italic;
+       bool underline;
        optional<Effect> effect;
+       HorizontalReference horizontal_reference;
        VerticalReference vertical_reference;
        int vertical_margin;
 
@@ -164,7 +182,7 @@ SSAReader::parse_time (string t) const
  *  @return List of RawSubtitles to represent line with vertical reference TOP_OF_SUBTITLE.
  */
 list<RawSubtitle>
-SSAReader::parse_line (RawSubtitle base, string line)
+SSAReader::parse_line (RawSubtitle base, string line, int play_res_x, int play_res_y)
 {
        enum {
                TEXT,
@@ -209,7 +227,7 @@ SSAReader::parse_line (RawSubtitle base, string line)
        case TOP_OF_SUBTITLE:
                /* Nothing to do */
                break;
-       case CENTRE_OF_SCREEN:
+       case VERTICAL_CENTRE_OF_SCREEN:
                current.vertical_position.proportional = current.vertical_position.proportional.get() - ((line_breaks + 1) * line_size) / 2;
                break;
        case BOTTOM_OF_SCREEN:
@@ -230,7 +248,7 @@ SSAReader::parse_line (RawSubtitle base, string line)
                        }
                        break;
                case STYLE:
-                       if (c == '}') {
+                       if (c == '}' || c == '\\') {
                                if (!current.text.empty ()) {
                                        subs.push_back (current);
                                        current.text = "";
@@ -239,23 +257,47 @@ SSAReader::parse_line (RawSubtitle base, string line)
                                        current.italic = true;
                                } else if (style == "\\i0" || style == "\\i") {
                                        current.italic = false;
+                               } else if (style == "\\b1") {
+                                       current.bold = true;
+                               } else if (style == "\\b0") {
+                                       current.bold = false;
+                               } else if (style == "\\u1") {
+                                       current.underline = true;
+                               } else if (style == "\\u0") {
+                                       current.underline = false;
                                } else if (style == "\\an1" || style == "\\an2" || style == "\\an3") {
                                        current.vertical_position.reference = sub::BOTTOM_OF_SCREEN;
                                } else if (style == "\\an4" || style == "\\an5" || style == "\\an6") {
-                                       current.vertical_position.reference = sub::CENTRE_OF_SCREEN;
+                                       current.vertical_position.reference = sub::VERTICAL_CENTRE_OF_SCREEN;
                                } else if (style == "\\an7" || style == "\\an8" || style == "\\an9") {
                                        current.vertical_position.reference = sub::TOP_OF_SCREEN;
+                               } else if (boost::starts_with(style, "\\pos")) {
+                                       vector<string> bits;
+                                       boost::algorithm::split (bits, style, boost::is_any_of("(,"));
+                                       SUB_ASSERT (bits.size() == 3);
+                                       current.horizontal_position.reference = sub::LEFT_OF_SCREEN;
+                                       current.horizontal_position.proportional = raw_convert<float>(bits[1]) / play_res_x;
+                                       current.vertical_position.reference = sub::TOP_OF_SCREEN;
+                                       current.vertical_position.proportional = raw_convert<float>(bits[2]) / play_res_y;
+                               } else if (boost::starts_with(style, "\\fs")) {
+                                       SUB_ASSERT (style.length() > 3);
+                                       current.font_size.set_points (raw_convert<int>(style.substr(3)));
                                }
                                style = "";
+                       }
+
+                       if (c == '}') {
                                state = TEXT;
                        } else {
                                style += c;
                        }
                        break;
                case BACKSLASH:
-                       if ((c == 'n' || c == 'N') && !current.text.empty ()) {
-                               subs.push_back (current);
-                               current.text = "";
+                       if (c == 'n' || c == 'N') {
+                               if (!current.text.empty ()) {
+                                       subs.push_back (current);
+                                       current.text = "";
+                               }
                                /* Move down one line (1.2 times the font size) */
                                if (current.vertical_position.reference.get() == BOTTOM_OF_SCREEN) {
                                        current.vertical_position.proportional = current.vertical_position.proportional.get() - line_size;
@@ -284,6 +326,7 @@ SSAReader::read (function<optional<string> ()> get_line)
                EVENTS
        } part = INFO;
 
+       int play_res_x = 288;
        int play_res_y = 288;
        map<string, Style> styles;
        string style_format_line;
@@ -306,7 +349,7 @@ SSAReader::read (function<optional<string> ()> get_line)
                        /* Section heading */
                        if (line.get() == "[Script Info]") {
                                part = INFO;
-                       } else if (line.get() == "[V4 Styles]") {
+                       } else if (line.get() == "[V4 Styles]" || line.get() == "[V4+ Styles]") {
                                part = STYLES;
                        } else if (line.get() == "[Events]") {
                                part = EVENTS;
@@ -316,13 +359,15 @@ SSAReader::read (function<optional<string> ()> get_line)
 
                size_t const colon = line->find (":");
                SUB_ASSERT (colon != string::npos);
-               SUB_ASSERT (line->length() > colon + 1);
                string const type = line->substr (0, colon);
-               string const body = line->substr (colon + 2);
+               string body = line->substr (colon + 1);
+               trim (body);
 
                switch (part) {
                case INFO:
-                       if (type == "PlayResY") {
+                       if (type == "PlayResX") {
+                               play_res_x = raw_convert<int> (body);
+                       } else if (type == "PlayResY") {
                                play_res_y = raw_convert<int> (body);
                        }
                        break;
@@ -367,6 +412,10 @@ SSAReader::read (function<optional<string> ()> get_line)
                                        } else if (event_format[i] == "End") {
                                                sub.to = parse_time (event[i]);
                                        } else if (event_format[i] == "Style") {
+                                               /* libass trims leading '*'s from style names, commenting that
+                                                  "they seem to mean literally nothing".  Go figure...
+                                               */
+                                               trim_left_if (event[i], boost::is_any_of ("*"));
                                                SUB_ASSERT (styles.find(event[i]) != styles.end());
                                                Style style = styles[event[i]];
                                                sub.font = style.font_name;
@@ -375,13 +424,15 @@ SSAReader::read (function<optional<string> ()> get_line)
                                                sub.effect_colour = style.back_colour;
                                                sub.bold = style.bold;
                                                sub.italic = style.italic;
+                                               sub.underline = style.underline;
                                                sub.effect = style.effect;
+                                               sub.horizontal_position.reference = style.horizontal_reference;
                                                sub.vertical_position.reference = style.vertical_reference;
                                                sub.vertical_position.proportional = float(style.vertical_margin) / play_res_y;
                                        } else if (event_format[i] == "MarginV") {
                                                sub.vertical_position.proportional = raw_convert<float>(event[i]) / play_res_y;
                                        } else if (event_format[i] == "Text") {
-                                               BOOST_FOREACH (sub::RawSubtitle j, parse_line (sub, event[i])) {
+                                               BOOST_FOREACH (sub::RawSubtitle j, parse_line (sub, event[i], play_res_x, play_res_y)) {
                                                        _subs.push_back (j);
                                                }
                                        }