From 2b0cad926f8206390db09a659f6d63341a912871 Mon Sep 17 00:00:00 2001 From: Carl Hetherington Date: Wed, 9 Nov 2022 00:38:33 +0100 Subject: [PATCH] WIP: VP9 encoder. --- src/lib/constants.h | 4 + src/lib/dcp_encoder.cc | 30 +++++- src/lib/dcp_encoder.h | 8 +- src/lib/vp9_encoder.cc | 198 +++++++++++++++++++++++++++++++++++++ src/lib/vp9_encoder.h | 56 +++++++++++ src/lib/vp9_pcm_block.cc | 71 +++++++++++++ src/lib/vp9_pcm_block.h | 41 ++++++++ src/lib/wscript | 2 + test/data | 2 +- test/sign_language_test.cc | 40 ++++++++ test/wscript | 1 + 11 files changed, 450 insertions(+), 3 deletions(-) create mode 100644 src/lib/vp9_encoder.cc create mode 100644 src/lib/vp9_encoder.h create mode 100644 src/lib/vp9_pcm_block.cc create mode 100644 src/lib/vp9_pcm_block.h create mode 100644 test/sign_language_test.cc diff --git a/src/lib/constants.h b/src/lib/constants.h index b6476cc99..cef8f4cff 100644 --- a/src/lib/constants.h +++ b/src/lib/constants.h @@ -49,6 +49,10 @@ #define CERTIFICATE_VALIDITY_PERIOD (10 * 365) #define SIGN_LANGUAGE_WIDTH 480 #define SIGN_LANGUAGE_HEIGHT 640 +#define SIGN_LANGUAGE_VIDEO_FRAME_RATE 24 +#define SIGN_LANGUAGE_AUDIO_FRAME_RATE 48000 +#define SIGN_LANGUAGE_CHUNK_LENGTH_IN_SECONDS 2 +#define AUDIO_BYTES_PER_SAMPLE 3 #endif diff --git a/src/lib/dcp_encoder.cc b/src/lib/dcp_encoder.cc index 186ec59c2..fc5945235 100644 --- a/src/lib/dcp_encoder.cc +++ b/src/lib/dcp_encoder.cc @@ -83,6 +83,8 @@ DCPEncoder::DCPEncoder (shared_ptr film, weak_ptr job) } } } + + _sign_language = contains_sign_language(film->content()); } DCPEncoder::~DCPEncoder () @@ -126,13 +128,39 @@ DCPEncoder::video (shared_ptr data, DCPTime time) { if (data->type() == VideoType::MAIN) { _j2k_encoder.encode(data, time); + } else { + _vp9_encoder.encode(data->image(boost::bind(&PlayerVideo::force, AV_PIX_FMT_YUV420P), VideoRange::VIDEO, false), time); + } +} + + +void +DCPEncoder::write_pending_audio() +{ + if (_pending_audio.empty()) { + return; + } + + auto const last_pending_audio = _pending_audio.back(); + auto const pending_audio_end = last_pending_audio.second + DCPTime::from_frames(last_pending_audio.first->frames(), _film->video_frame_rate()); + auto const write_until = std::min(pending_audio_end, _vp9_encoder.end()); + if (write_until == _last_audio_write_to) { + return; } + + /* XXX: need to mix _pending_audio with the output from vp9_encoder */ } + void DCPEncoder::audio (shared_ptr data, DCPTime time) { - _writer.write(data, time); + if (!_sign_language) { + _writer.write(data, time); + } else { + _pending_audio.push_back({data, time}); + write_pending_audio(); + } auto job = _job.lock (); DCPOMATIC_ASSERT (job); diff --git a/src/lib/dcp_encoder.h b/src/lib/dcp_encoder.h index ad77f6951..69ae5b7b2 100644 --- a/src/lib/dcp_encoder.h +++ b/src/lib/dcp_encoder.h @@ -25,6 +25,7 @@ #include "encoder.h" #include "player_text.h" #include "j2k_encoder.h" +#include "vp9_encoder.h" #include "writer.h" #include @@ -36,7 +37,6 @@ class Player; class PlayerVideo; -/** @class DCPEncoder */ class DCPEncoder : public Encoder { public: @@ -60,10 +60,16 @@ private: void text (PlayerText, TextType, boost::optional, dcpomatic::DCPTimePeriod); void atmos (std::shared_ptr, dcpomatic::DCPTime, AtmosMetadata metadata); + void write_pending_audio(); + Writer _writer; J2KEncoder _j2k_encoder; + VP9Encoder _vp9_encoder; bool _finishing; bool _non_burnt_subtitles; + bool _sign_language; + dcpomatic::DCPTime _last_audio_write_to; + std::list, dcpomatic::DCPTime>> _pending_audio; boost::signals2::scoped_connection _player_video_connection; boost::signals2::scoped_connection _player_audio_connection; diff --git a/src/lib/vp9_encoder.cc b/src/lib/vp9_encoder.cc new file mode 100644 index 000000000..bb4756804 --- /dev/null +++ b/src/lib/vp9_encoder.cc @@ -0,0 +1,198 @@ +/* + Copyright (C) 2022 Carl Hetherington + + This file is part of DCP-o-matic. + + DCP-o-matic is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + DCP-o-matic is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with DCP-o-matic. If not, see . + +*/ + + +#include "constants.h" +#include "exceptions.h" +#include "image.h" +#include "ffmpeg_wrapper.h" +#include "vp9_encoder.h" +#include "util.h" +#include +extern "C" { +#include +#include +} +#include + + +#include "i18n.h" + + +using std::shared_ptr; +using std::string; +using std::vector; +using dcp::raw_convert; +using namespace dcpomatic; + + +int +write_packet(void* opaque, uint8_t* buffer, int size) +{ + /* Here comes the webm header from the format context */ + auto& header = reinterpret_cast(opaque)->_header; + header.resize(size); + memcpy(header.data(), buffer, size); + return 0; +} + + +VP9Encoder::VP9Encoder() +{ + av_log_set_callback(ffmpeg_log_callback); + + auto codec = avcodec_find_encoder_by_name("libvpx-vp9"); + if (!codec) { + throw EncodeError("avcodec_find_encoder_by_name failed for libvpx-vp9"); + } + + _codec_context = avcodec_alloc_context3(codec); + if (!_codec_context) { + throw std::bad_alloc(); + } + + _codec_context->width = SIGN_LANGUAGE_WIDTH; + _codec_context->height = SIGN_LANGUAGE_HEIGHT; + _codec_context->time_base = AVRational{1, SIGN_LANGUAGE_VIDEO_FRAME_RATE}; + _codec_context->pix_fmt = AV_PIX_FMT_YUV420P; + // We need to take up the same bits as the equivalent length of audio + // For some reason the ISDCF example script divides that bitrate by 2; + // maybe to give some overhead? + _codec_context->bit_rate = SIGN_LANGUAGE_AUDIO_FRAME_RATE * AUDIO_BYTES_PER_SAMPLE * 8 / 2; + + /* All these options are taken from + * https://github.com/ISDCF/Sign-Language-Video-Encoding/blob/master/encode-vp9-wav + */ + + AVDictionary* options = nullptr; + + auto constexpr chunk_length_in_video_frames = SIGN_LANGUAGE_CHUNK_LENGTH_IN_SECONDS * SIGN_LANGUAGE_VIDEO_FRAME_RATE; + + av_dict_set_int(&options, "keyint_min", chunk_length_in_video_frames, 0); + // GOP size + av_dict_set_int(&options, "g", chunk_length_in_video_frames, 0); + av_dict_set_int(&options, "speed", 6, 0); + av_dict_set_int(&options, "tile-columns", 4, 0); + av_dict_set_int(&options, "frame-parallel", 1, 0); + av_dict_set_int(&options, "static-thresh", 0, 0); + av_dict_set_int(&options, "max-intra-rate", 300, 0); + av_dict_set(&options, "deadline", "realtime", 0); + av_dict_set_int(&options, "lag-in-frames", 0, 0); + av_dict_set_int(&options, "error-resilient", 1, 0); + av_dict_set_int(&options, "minrate", _codec_context->bit_rate, 0); + av_dict_set_int(&options, "maxrate", _codec_context->bit_rate, 0); + + int r = avcodec_open2(_codec_context, codec, &options); + if (r < 0) { + throw EncodeError(N_("avcodec_open2"), N_("VP9Encoder::VP9Encoder")); + } + + vector header_buffer(32768); + auto avio_context = avio_alloc_context(header_buffer.data(), header_buffer.size(), 1, this, nullptr, write_packet, nullptr); + if (!avio_context) { + throw std::bad_alloc(); + } + AVFormatContext* format_context; + r = avformat_alloc_output_context2(&format_context, nullptr, "webm", nullptr); + if (r < 0) { + throw EncodeError(N_("avformat_alloc_output_context2"), N_("VP9Encoder::VP9Encoder")); + } + format_context->pb = avio_context; + + av_dict_set(&format_context->metadata, "MAJOR_BRAND", "isom", 0); + av_dict_set(&format_context->metadata, "MINOR_VERSION", "512", 0); + av_dict_set(&format_context->metadata, "COMPATIBLE_BRANDS", "isomiso2avc1mp41", 0); + av_dict_set(&format_context->metadata, "ENCODER", "Lavf59.27.100", 0); + + auto stream = avformat_new_stream(format_context, codec); + if (!stream) { + throw EncodeError(N_("avformat_new_stream"), N_("VP9Encoder::VP9Encoder")); + } + + stream->disposition |= AV_DISPOSITION_DEFAULT; + r = avcodec_parameters_from_context(stream->codecpar, _codec_context); + if (r < 0) { + throw EncodeError(N_("avcodec_parameters_from_context"), N_("VP9Encoder::VP9Encoder"), r); + } + stream->avg_frame_rate = { 24, 1 }; + stream->codecpar->color_range = AVCOL_RANGE_MPEG; + stream->codecpar->chroma_location = AVCHROMA_LOC_LEFT; + stream->codecpar->field_order = AV_FIELD_PROGRESSIVE; + + av_dict_set(&stream->metadata, "ENCODER", "Lavc59.37.100 libvpx-vp9", 0); + av_dict_set(&stream->metadata, "HANDLER_NAME", "VideoHandler", 0); + av_dict_set(&stream->metadata, "VENDOR_ID", "[0][0][0][0]", 0); + + AVDictionary* format_options = nullptr; + r = avformat_write_header(format_context, &format_options); + if (r < 0) { + throw EncodeError(N_("avformat_write_header"), N_("VP9Encoder::VP9Encoder")); + } + + avformat_free_context(format_context); +} + + +VP9Encoder::~VP9Encoder() +{ + avcodec_close(_codec_context); +} + + +void +VP9Encoder::encode(shared_ptr image, DCPTime time) +{ + DCPOMATIC_ASSERT(image->pixel_format() == AV_PIX_FMT_YUV420P); + DCPOMATIC_ASSERT(image->size() == dcp::Size(SIGN_LANGUAGE_WIDTH, SIGN_LANGUAGE_HEIGHT)); + + auto frame = av_frame_alloc(); + if (!frame) { + throw std::bad_alloc(); + } + + for (int i = 0; i < 3; ++i) { + auto buffer = _pending_images.create_buffer(image, i); + frame->buf[i] = av_buffer_ref(buffer); + frame->data[i] = buffer->data; + frame->linesize[i] = image->stride()[i]; + av_buffer_unref(&buffer); + } + + frame->width = image->size().width; + frame->height = image->size().height; + frame->format = image->pixel_format(); + frame->pts = time.get() * SIGN_LANGUAGE_VIDEO_FRAME_RATE / DCPTime::HZ; + + int r = avcodec_send_frame(_codec_context, frame); + av_frame_free(&frame); + if (r < 0) { + throw EncodeError(N_("avcodec_send_frame"), N_("VP9Encoder::video"), r); + } + + ffmpeg::Packet packet; + r = avcodec_receive_packet(_codec_context, packet.get()); + if (r < 0 && r != AVERROR(EAGAIN)) { + throw EncodeError(N_("avcodec_receive_packet"), N_("VP9Encoder::video"), r); + } else if (r >= 0) { + _pcm_blocks.emplace_back(std::make_pair(time, VP9PCMBlock(packet.get()->data, packet.get()->size, _header))); + _end = time + DCPTime::from_seconds(SIGN_LANGUAGE_CHUNK_LENGTH_IN_SECONDS); + } +} + diff --git a/src/lib/vp9_encoder.h b/src/lib/vp9_encoder.h new file mode 100644 index 000000000..ca5babace --- /dev/null +++ b/src/lib/vp9_encoder.h @@ -0,0 +1,56 @@ +/* + Copyright (C) 2022 Carl Hetherington + + This file is part of DCP-o-matic. + + DCP-o-matic is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + DCP-o-matic is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with DCP-o-matic. If not, see . + +*/ + + +#include "dcpomatic_time.h" +#include "image_store.h" +#include "vp9_pcm_block.h" + + +struct AVCodecContext; + + +class VP9Encoder +{ +public: + VP9Encoder(); + + VP9Encoder(VP9Encoder const&) = delete; + VP9Encoder& operator=(VP9Encoder const&) = delete; + + ~VP9Encoder(); + + void encode(std::shared_ptr image, dcpomatic::DCPTime time); + + dcpomatic::DCPTime end() const { + return _end; + } + +private: + friend int write_packet(void*, uint8_t*, int); + + AVCodecContext* _codec_context = nullptr; + + ImageStore _pending_images; + std::vector _header; + std::vector> _pcm_blocks; + dcpomatic::DCPTime _end; +}; + diff --git a/src/lib/vp9_pcm_block.cc b/src/lib/vp9_pcm_block.cc new file mode 100644 index 000000000..a606b4f93 --- /dev/null +++ b/src/lib/vp9_pcm_block.cc @@ -0,0 +1,71 @@ +/* + Copyright (C) 2022 Carl Hetherington + + This file is part of DCP-o-matic. + + DCP-o-matic is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + DCP-o-matic is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with DCP-o-matic. If not, see . + +*/ + + +#include "constants.h" +#include "dcpomatic_assert.h" +#include "util.h" +#include "vp9_pcm_block.h" + + +using std::vector; + + +auto constexpr pcm_block_size = SIGN_LANGUAGE_AUDIO_FRAME_RATE * AUDIO_BYTES_PER_SAMPLE * SIGN_LANGUAGE_CHUNK_LENGTH_IN_SECONDS; + + +VP9PCMBlock::VP9PCMBlock(uint8_t const* vp9_segment, int vp9_segment_size, vector const& ebml_header) + : _data(pcm_block_size) +{ + uint32_t const ebml_header_size = ebml_header.size(); + + DCPOMATIC_ASSERT(vp9_segment_size < static_cast((pcm_block_size - 20 - ebml_header_size))); + + // H_1 + _data[0] = 0xff; + _data[1] = 0xff; + _data[2] = 0xff; + _data[3] = 0xff; + // Length of VP9 segment, big-endian, unsigned + _data[4] = (vp9_segment_size >> 24) & 0xff; + _data[5] = (vp9_segment_size >> 16) & 0xff; + _data[6] = (vp9_segment_size >> 8) & 0xff; + _data[7] = (vp9_segment_size >> 0) & 0xff; + // Length of PCM block, big-endian, unsigned + _data[8] = (pcm_block_size >> 24) & 0xff; + _data[9] = (pcm_block_size >> 16) & 0xff; + _data[10] = (pcm_block_size >> 8) & 0xff; + _data[11] = (pcm_block_size >> 0) & 0xff; + // Length of VP9 EBML header, big-endian, unsigned + _data[12] = (ebml_header_size >> 24) & 0xff; + _data[13] = (ebml_header_size >> 16) & 0xff; + _data[14] = (ebml_header_size >> 8) & 0xff; + _data[15] = (ebml_header_size >> 0) & 0xff; + // H_2 + _data[16] = 0xff; + _data[17] = 0xff; + _data[18] = 0xff; + _data[19] = 0xff; + // VP9 EBML header + std::copy(ebml_header.begin(), ebml_header.end(), _data.begin() + 20); + // VP9 payload + memcpy(_data.data() + 20 + ebml_header.size(), vp9_segment, vp9_segment_size); +} + diff --git a/src/lib/vp9_pcm_block.h b/src/lib/vp9_pcm_block.h new file mode 100644 index 000000000..10b51e56e --- /dev/null +++ b/src/lib/vp9_pcm_block.h @@ -0,0 +1,41 @@ +/* + Copyright (C) 2022 Carl Hetherington + + This file is part of DCP-o-matic. + + DCP-o-matic is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + DCP-o-matic is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with DCP-o-matic. If not, see . + +*/ + + +#ifndef DCPOMATIC_VP9_PCM_BLOCK_H +#define DCPOMATIC_VP9_PCM_BLOCK_H + + +#include +#include + + +class VP9PCMBlock +{ +public: + VP9PCMBlock(uint8_t const* vp9_segment, int vp9_segment_size, std::vector const& ebml_header); + +private: + std::vector _data; +}; + + +#endif + diff --git a/src/lib/wscript b/src/lib/wscript index 9d7e5abae..42aeeb29a 100644 --- a/src/lib/wscript +++ b/src/lib/wscript @@ -208,6 +208,8 @@ sources = """ video_range.cc video_ring_buffers.cc video_type.cc + vp9_encoder.cc + vp9_pcm_block.cc writer.cc zipper.cc """ diff --git a/test/data b/test/data index bedc98152..17ee84d24 160000 --- a/test/data +++ b/test/data @@ -1 +1 @@ -Subproject commit bedc98152b3ea06a2e73ea14bda4178af652ed64 +Subproject commit 17ee84d24db14fe45023b02ae42d595562f6e4c3 diff --git a/test/sign_language_test.cc b/test/sign_language_test.cc new file mode 100644 index 000000000..e15356d92 --- /dev/null +++ b/test/sign_language_test.cc @@ -0,0 +1,40 @@ +/* + Copyright (C) 2022 Carl Hetherington + + This file is part of DCP-o-matic. + + DCP-o-matic is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + DCP-o-matic is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with DCP-o-matic. If not, see . + +*/ + + +#include "lib/content.h" +#include "lib/content_factory.h" +#include "lib/video_content.h" +#include "lib/video_type.h" +#include "test.h" +#include + + +BOOST_AUTO_TEST_CASE(simple_sign_language_encode) +{ + auto content = content_factory("test/data/10s_sign_language.mp4"); + BOOST_REQUIRE(!content.empty()); + auto film = new_test_film2("simple_sign_language_encode", content); + content[0]->video->set_type(VideoType::SIGN_LANGUAGE); + + make_and_verify_dcp(film); +} + + diff --git a/test/wscript b/test/wscript index c31a37a76..b95fdbcfb 100644 --- a/test/wscript +++ b/test/wscript @@ -130,6 +130,7 @@ def build(bld): scaling_test.cc scope_guard_test.cc scoped_temporary_test.cc + sign_language_test.cc silence_padding_test.cc shuffler_test.cc skip_frame_test.cc -- 2.30.2