Cleanup: handle Filter objects by value rather than by reference.
[dcpomatic.git] / src / lib / text_decoder.cc
index 58f631e59124855aae836432d4eac5f7ff67e03c..75fa33605b0f2f3188f1fc47141a0d609cebd632 100644 (file)
@@ -87,19 +87,55 @@ set_forced_appearance(shared_ptr<const TextContent> content, StringText& subtitl
 }
 
 
+string
+TextDecoder::remove_invalid_characters_for_xml(string text)
+{
+       string output;
+
+       /* https://www.w3.org/TR/REC-xml/#charsets says that XML may only contain 0x9, 0xa, 0xd below 0x32.
+        * Not sure if we should be doing direct UTF-8 manipulation here.
+        */
+       for (size_t i = 0; i < text.length(); ++i) {
+               auto const c = text[i];
+               if ((c & 0xe0) == 0xc0) {
+                       // start of 2-byte code point
+                       output += c;
+                       output += text[i + 1];
+                       ++i;
+               } else if ((c & 0xf0) == 0xe0) {
+                       // start of 3-byte code point
+                       output += c;
+                       output += text[i + 1];
+                       output += text[i + 2];
+                       i += 2;
+               } else if ((c & 0xf8) == 0xf0) {
+                       // start of 4-byte code point
+                       output += c;
+                       output += text[i + 1];
+                       output += text[i + 2];
+                       output += text[i + 3];
+                       i += 3;
+               } else {
+                       if (c >= 0x20 || c == 0x9 || c == 0xa || c == 0xd) {
+                               output += c;
+                       }
+               }
+       }
+
+       return output;
+}
+
+
 void
 TextDecoder::emit_plain_start(ContentTime from, vector<dcp::SubtitleString> subtitles, dcp::SubtitleStandard valign_standard)
 {
        vector<StringText> string_texts;
 
        for (auto& subtitle: subtitles) {
-               auto string_text = StringText(
-                       subtitle,
-                       content()->outline_width(),
-                       content()->get_font(subtitle.font().get_value_or("")),
-                       valign_standard
-                       );
-               string_text.set_text(string_text.text());
+               auto font = content()->get_font(subtitle.font().get_value_or(""));
+               DCPOMATIC_ASSERT(font);
+               auto string_text = StringText(subtitle, content()->outline_width(), font, valign_standard);
+               string_text.set_text(remove_invalid_characters_for_xml(string_text.text()));
                set_forced_appearance(content(), string_text);
                string_texts.push_back(string_text);
        }
@@ -254,7 +290,7 @@ TextDecoder::emit_plain_start (ContentTime from, sub::Subtitle const & sub_subti
                                v_align,
                                0,
                                dcp::Direction::LTR,
-                               block.text,
+                               remove_invalid_characters_for_xml(block.text),
                                dcp::Effect::NONE,
                                dcp_colour(block.effect_colour.get_value_or(sub::Colour(0, 0, 0))),
                                /* Hack: we should use subtitle.fade_up and subtitle.fade_down here
@@ -265,13 +301,17 @@ TextDecoder::emit_plain_start (ContentTime from, sub::Subtitle const & sub_subti
                                */
                                dcp::Time (),
                                dcp::Time (),
-                               0
+                               0,
+                               std::vector<dcp::Ruby>()
                                );
 
+                       auto font = content()->get_font(block.font.get_value_or(""));
+                       DCPOMATIC_ASSERT(font);
+
                        auto string_text = StringText(
                                dcp_subtitle,
                                content()->outline_width(),
-                               content()->get_font(block.font.get_value_or("")),
+                               font,
                                dcp::SubtitleStandard::SMPTE_2014
                                );
                        set_forced_appearance(content(), string_text);