--- /dev/null
+/*
+ Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+
+#include "exceptions.h"
+#include "subrip_reader.h"
+#include "util.h"
+#include "web_vtt_reader.h"
+#include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string_regex.hpp>
+#include <boost/bind.hpp>
+#include <boost/regex.hpp>
+#include <iostream>
+#include <vector>
+
+
+using std::function;
+using std::string;
+using std::vector;
+using boost::optional;
+using namespace sub;
+
+
+WebVTTReader::WebVTTReader(FILE* file)
+{
+ this->read(boost::bind(&get_line_file, file));
+}
+
+
+WebVTTReader::WebVTTReader(string subs)
+{
+ this->read(boost::bind(&get_line_string, &subs));
+}
+
+
+void
+WebVTTReader::read(std::function<optional<string> ()> get_line)
+{
+ enum class State {
+ /* expecting WEBVTT */
+ HEADER,
+ /* awaiting a NOTE or a subtitle timing line */
+ DATA,
+ /* reading the text of a subtitle */
+ SUBTITLE,
+ /* reading a note */
+ NOTE
+ } state = State::HEADER;
+
+ RawSubtitle rs;
+
+ rs.vertical_position.line = 0;
+ rs.vertical_position.reference = TOP_OF_SUBTITLE;
+
+ while (true) {
+ auto line = get_line();
+ if (!line) {
+ break;
+ }
+
+ trim_right_if(*line, boost::is_any_of("\n\r"));
+ remove_unicode_bom(line);
+
+ /* Keep some history in case there is an error to report */
+ _context.push_back(*line);
+ if (_context.size() > 5) {
+ _context.pop_front();
+ }
+
+ switch (state) {
+ case State::HEADER:
+ if (!boost::starts_with(*line, "WEBVTT")) {
+ throw WebVTTError("No WEBVTT header found");
+ }
+ state = State::DATA;
+ break;
+ case State::DATA:
+ if (boost::starts_with(*line, "NOTE")) {
+ state = State::NOTE;
+ } else if (line->find("-->") != string::npos) {
+ /* Further trim this line, removing spaces from the end */
+ trim_right_if(*line, boost::is_any_of(" "));
+
+ vector<string> parts;
+ boost::algorithm::split(parts, *line, boost::algorithm::is_any_of(" "), boost::token_compress_on);
+
+ if (parts.size() != 3 && parts.size() != 7) {
+ for (int i = 0; i < 2; ++i) {
+ auto ex = get_line();
+ if (ex) {
+ _context.push_back(*ex);
+ }
+ }
+ throw WebVTTError(*line, "a time line", _context);
+ }
+
+ string expected;
+ auto from = SubripReader::convert_time(parts[0], &expected);
+ if (!from) {
+ throw WebVTTError(parts[0], expected, _context);
+ }
+ rs.from = *from;
+
+ auto to = SubripReader::convert_time(parts[2], &expected);
+ if (!to) {
+ throw WebVTTError(parts[2], expected, _context);
+ }
+ rs.to = *to;
+
+ rs.vertical_position.line = 0;
+ state = State::SUBTITLE;
+ } else if (!line->empty()) {
+ throw WebVTTError(*line, "a note or time", _context);
+ }
+ break;
+ case State::SUBTITLE:
+ if (line->empty()) {
+ state = State::DATA;
+ } else {
+ /* Split up this line on unicode "LINE SEPARATOR". This feels hacky but also
+ * the least unpleasant place to do it.
+ */
+ vector<string> sub_lines;
+ boost::algorithm::split_regex(sub_lines, *line, boost::regex("\xe2\x80\xa8"));
+ for (auto sub_line: sub_lines) {
+ rs.text = sub_line;
+ _subs.push_back(rs);
+ rs.vertical_position.line = rs.vertical_position.line.get() + 1;
+ }
+ }
+ break;
+ case State::NOTE:
+ if (line->empty()) {
+ state = State::DATA;
+ }
+ break;
+ }
+
+ }
+}
+
--- /dev/null
+/*
+ Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+
+#ifndef LIBSUB_WEB_VTT_READER_H
+#define LIBSUB_WEB_VTT_READER_H
+
+
+#include "reader.h"
+#include <cstdio>
+#include <string>
+
+
+namespace sub {
+
+
+class WebVTTReader : public Reader
+{
+public:
+ WebVTTReader(FILE* file);
+ WebVTTReader(std::string subs);
+
+private:
+ void read(std::function<boost::optional<std::string> ()> get_line);
+
+ std::list<std::string> _context;
+};
+
+
+}
+
+#endif
+
util.cc
vertical_reference.cc
vertical_position.cc
+ web_vtt_reader.cc
"""
headers = """
subtitle.h
vertical_position.h
vertical_reference.h
+ web_vtt_reader.h
"""
bld.install_files('${PREFIX}/include/libsub%s/sub' % bld.env.API_VERSION, headers)
--- /dev/null
+WEBVTT - you can put something here
+
+NOTE You can have notes
+That span multiple lines
+
+00:00:41,090 --> 00:00:42,210
+This is a subtitle
+and that's a line break
+
+00:01:01,010 --> 00:01:02,100
+This is some stuff.
--- /dev/null
+/*
+ Copyright (C) 2022 Carl Hetherington <cth@carlh.net>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+*/
+
+#include "web_vtt_reader.h"
+#include "subtitle.h"
+#include "test.h"
+#include "exceptions.h"
+#include "collect.h"
+#include <boost/test/unit_test.hpp>
+#include <boost/filesystem.hpp>
+#include <cmath>
+#include <iostream>
+#include <cstdio>
+
+
+using std::cerr;
+using std::vector;
+using std::fabs;
+
+
+/* Test reading of a VTT file */
+BOOST_AUTO_TEST_CASE(vtt_reader_test)
+{
+ auto f = fopen("test/data/test.vtt", "r");
+ sub::WebVTTReader reader(f);
+ fclose(f);
+ auto subs = sub::collect<std::vector<sub::Subtitle>>(reader.subtitles());
+
+ auto i = subs.begin();
+
+
+ /* First subtitle */
+
+ BOOST_REQUIRE(i != subs.end());
+ BOOST_CHECK_EQUAL(i->from, sub::Time::from_hms(0, 0, 41, 90));
+ BOOST_CHECK_EQUAL(i->to, sub::Time::from_hms(0, 0, 42, 210));
+
+ auto j = i->lines.begin();
+ BOOST_CHECK(j != i->lines.end());
+ BOOST_REQUIRE_EQUAL(j->blocks.size(), 1);
+ auto b = j->blocks[0];
+ BOOST_CHECK_EQUAL(b.text, "This is a subtitle");
+ /* No font is specified by WebVTT, so none should be seen here */
+ BOOST_CHECK(!b.font);
+ BOOST_CHECK(!b.font_size.specified());
+ BOOST_CHECK_EQUAL(b.bold, false);
+ BOOST_CHECK_EQUAL(b.italic, false);
+ BOOST_CHECK_EQUAL(b.underline, false);
+ BOOST_REQUIRE(j->vertical_position.line);
+ BOOST_CHECK_EQUAL(j->vertical_position.line.get(), 0);
+ BOOST_CHECK_EQUAL(j->vertical_position.reference.get(), sub::TOP_OF_SUBTITLE);
+ ++j;
+
+ BOOST_CHECK(j != i->lines.end());
+ BOOST_REQUIRE_EQUAL(j->blocks.size(), 1);
+ b = j->blocks[0];
+ BOOST_CHECK_EQUAL(b.text, "and that's a line break");
+ /* No font is specified by WebVTT, so none should be seen here */
+ BOOST_CHECK(!b.font);
+ BOOST_CHECK(!b.font_size.specified());
+ BOOST_CHECK_EQUAL(b.bold, false);
+ BOOST_CHECK_EQUAL(b.italic, false);
+ BOOST_CHECK_EQUAL(b.underline, false);
+ BOOST_REQUIRE(j->vertical_position.line);
+ BOOST_CHECK_EQUAL(j->vertical_position.line.get(), 1);
+ BOOST_CHECK_EQUAL(j->vertical_position.reference.get(), sub::TOP_OF_SUBTITLE);
+ ++i;
+
+
+ /* Second subtitle */
+
+ BOOST_REQUIRE(i != subs.end());
+ BOOST_CHECK_EQUAL(i->from, sub::Time::from_hms(0, 1, 1, 10));
+ BOOST_CHECK_EQUAL(i->to, sub::Time::from_hms(0, 1, 2, 100));
+
+ BOOST_CHECK_EQUAL(i->lines.size(), 1);
+ sub::Line l = i->lines[0];
+ BOOST_CHECK_EQUAL(l.blocks.size(), 1);
+ BOOST_CHECK_EQUAL(l.vertical_position.line.get(), 0);
+ BOOST_CHECK_EQUAL(l.vertical_position.reference.get(), sub::TOP_OF_SUBTITLE);
+
+ BOOST_REQUIRE_EQUAL(l.blocks.size(), 1U);
+ b = l.blocks[0];
+ BOOST_CHECK_EQUAL(b.text, "This is some stuff.");
+ /* No font is specified by WebVTT, so none should be seen here */
+ BOOST_CHECK(!b.font);
+ BOOST_CHECK(!b.font_size.specified());
+ BOOST_CHECK_EQUAL(b.bold, false);
+ BOOST_CHECK_EQUAL(b.italic, false);
+ BOOST_CHECK_EQUAL(b.underline, false);
+}
+
time_test.cc
test.cc
vertical_position_test.cc
+ webvtt_reader_test.cc
"""
obj.target = 'tests'
obj.install_path = ''