Work around deadlock when destroying J2KEncoder with a full writer queue (#2784).
[dcpomatic.git] / src / lib / j2k_encoder.cc
index 9898ede787bb9b578892451fb00f28cb1f6205bb..32d2fefc2830c86fec1f057179c70d5dd74ef7e6 100644 (file)
@@ -1,5 +1,5 @@
 /*
-    Copyright (C) 2012-2019 Carl Hetherington <cth@carlh.net>
+    Copyright (C) 2012-2021 Carl Hetherington <cth@carlh.net>
 
     This file is part of DCP-o-matic.
 
 
 */
 
+
 /** @file src/j2k_encoder.cc
  *  @brief J2K encoder class.
  */
 
-#include "j2k_encoder.h"
-#include "util.h"
-#include "film.h"
-#include "log.h"
-#include "dcpomatic_log.h"
+
+#include "compose.hpp"
 #include "config.h"
-#include "dcp_video.h"
 #include "cross.h"
-#include "writer.h"
+#include "dcp_video.h"
+#include "dcpomatic_log.h"
+#include "encode_server_description.h"
 #include "encode_server_finder.h"
-#include "player.h"
+#include "film.h"
+#include "j2k_encoder.h"
+#include "log.h"
 #include "player_video.h"
-#include "encode_server_description.h"
-#include "compose.hpp"
+#include "util.h"
+#include "writer.h"
 #include <libcxml/cxml.h>
-#include <boost/foreach.hpp>
 #include <iostream>
 
 #include "i18n.h"
 
-using std::list;
+
 using std::cout;
 using std::exception;
-using boost::shared_ptr;
-using boost::weak_ptr;
+using std::list;
+using std::make_shared;
+using std::shared_ptr;
+using std::weak_ptr;
 using boost::optional;
 using dcp::Data;
 using namespace dcpomatic;
 
+
 /** @param film Film that we are encoding.
  *  @param writer Writer that we are using.
  */
-J2KEncoder::J2KEncoder (shared_ptr<const Film> film, shared_ptr<Writer> writer)
+J2KEncoder::J2KEncoder(shared_ptr<const Film> film, Writer& writer)
        : _film (film)
        , _history (200)
        , _writer (writer)
 {
-       servers_list_changed ();
 }
 
+
 J2KEncoder::~J2KEncoder ()
 {
+       _server_found_connection.disconnect();
+
+       /* One of our encoder threads may be waiting on Writer::write() to return, if that method
+        * is blocked with the writer queue full waiting for _full_condition.  In that case, the
+        * attempt to terminate the encoder threads below (in terminate_threads()) will fail because
+        * the encoder thread waiting for ::write() will have interruption disabled.
+        *
+        * To work around that, make the writer into a zombie to unblock any pending write()s and
+        * not block on any future ones.
+        */
+       _writer.zombify();
+
        boost::mutex::scoped_lock lm (_threads_mutex);
        terminate_threads ();
 }
 
+
 void
 J2KEncoder::begin ()
 {
-       weak_ptr<J2KEncoder> wp = shared_from_this ();
-       _server_found_connection = EncodeServerFinder::instance()->ServersListChanged.connect (
-               boost::bind (&J2KEncoder::call_servers_list_changed, wp)
+       _server_found_connection = EncodeServerFinder::instance()->ServersListChanged.connect(
+               boost::bind(&J2KEncoder::servers_list_changed, this)
                );
+       servers_list_changed ();
 }
 
-/* We don't want the servers-list-changed callback trying to do things
-   during destruction of J2KEncoder, and I think this is the neatest way
-   to achieve that.
-*/
-void
-J2KEncoder::call_servers_list_changed (weak_ptr<J2KEncoder> encoder)
-{
-       shared_ptr<J2KEncoder> e = encoder.lock ();
-       if (e) {
-               e->servers_list_changed ();
-       }
-}
 
 void
 J2KEncoder::end ()
@@ -127,13 +131,13 @@ J2KEncoder::end ()
             So just mop up anything left in the queue here.
        */
 
-       for (list<shared_ptr<DCPVideo> >::iterator i = _queue.begin(); i != _queue.end(); ++i) {
-               LOG_GENERAL (N_("Encode left-over frame %1"), (*i)->index ());
+       for (auto const& i: _queue) {
+               LOG_GENERAL(N_("Encode left-over frame %1"), i.index());
                try {
-                       _writer->write (
-                               shared_ptr<dcp::Data>(new dcp::ArrayData((*i)->encode_locally())),
-                               (*i)->index(),
-                               (*i)->eyes()
+                       _writer.write(
+                               make_shared<dcp::ArrayData>(i.encode_locally()),
+                               i.index(),
+                               i.eyes()
                                );
                        frame_done ();
                } catch (std::exception& e) {
@@ -142,6 +146,7 @@ J2KEncoder::end ()
        }
 }
 
+
 /** @return an estimate of the current number of frames we are encoding per second,
  *  if known.
  */
@@ -151,6 +156,7 @@ J2KEncoder::current_encoding_rate () const
        return _history.rate ();
 }
 
+
 /** @return Number of video frames that have been queued for encoding */
 int
 J2KEncoder::video_frames_enqueued () const
@@ -162,6 +168,7 @@ J2KEncoder::video_frames_enqueued () const
        return _last_player_video_time->frames_floor (_film->video_frame_rate ());
 }
 
+
 /** Should be called when a frame has been encoded successfully */
 void
 J2KEncoder::frame_done ()
@@ -169,6 +176,7 @@ J2KEncoder::frame_done ()
        _history.event ();
 }
 
+
 /** Called to request encoding of the next video frame in the DCP.  This is called in order,
  *  so each time the supplied frame is the one after the previous one.
  *  pv represents one video frame, and could be empty if there is nothing to encode
@@ -199,40 +207,39 @@ J2KEncoder::encode (shared_ptr<PlayerVideo> pv, DCPTime time)
                LOG_TIMING ("decoder-wake queue=%1 threads=%2", _queue.size(), threads);
        }
 
-       _writer->rethrow ();
+       _writer.rethrow();
        /* Re-throw any exception raised by one of our threads.  If more
           than one has thrown an exception, only one will be rethrown, I think;
           but then, if that happens something has gone badly wrong.
        */
        rethrow ();
 
-       Frame const position = time.frames_floor(_film->video_frame_rate());
+       auto const position = time.frames_floor(_film->video_frame_rate());
 
-       if (_writer->can_fake_write (position)) {
+       if (_writer.can_fake_write(position)) {
                /* We can fake-write this frame */
                LOG_DEBUG_ENCODE("Frame @ %1 FAKE", to_string(time));
-               _writer->fake_write (position, pv->eyes ());
+               _writer.fake_write(position, pv->eyes ());
                frame_done ();
        } else if (pv->has_j2k() && !_film->reencode_j2k()) {
                LOG_DEBUG_ENCODE("Frame @ %1 J2K", to_string(time));
                /* This frame already has J2K data, so just write it */
-               _writer->write (pv->j2k(), position, pv->eyes ());
-       } else if (_last_player_video[pv->eyes()] && _writer->can_repeat(position) && pv->same (_last_player_video[pv->eyes()])) {
+               _writer.write(pv->j2k(), position, pv->eyes ());
+               frame_done ();
+       } else if (_last_player_video[pv->eyes()] && _writer.can_repeat(position) && pv->same(_last_player_video[pv->eyes()])) {
                LOG_DEBUG_ENCODE("Frame @ %1 REPEAT", to_string(time));
-               _writer->repeat (position, pv->eyes ());
+               _writer.repeat(position, pv->eyes());
        } else {
                LOG_DEBUG_ENCODE("Frame @ %1 ENCODE", to_string(time));
                /* Queue this new frame for encoding */
                LOG_TIMING ("add-frame-to-queue queue=%1", _queue.size ());
-               _queue.push_back (shared_ptr<DCPVideo> (
-                                         new DCPVideo (
-                                                 pv,
-                                                 position,
-                                                 _film->video_frame_rate(),
-                                                 _film->j2k_bandwidth(),
-                                                 _film->resolution()
-                                                 )
-                                         ));
+               _queue.push_back (DCPVideo(
+                               pv,
+                               position,
+                               _film->video_frame_rate(),
+                               _film->j2k_bandwidth(),
+                               _film->resolution()
+                               ));
 
                /* The queue might not be empty any more, so notify anything which is
                   waiting on that.
@@ -267,10 +274,13 @@ J2KEncoder::terminate_threads ()
        _threads.reset ();
 }
 
+
 void
 J2KEncoder::encoder_thread (optional<EncodeServerDescription> server)
 try
 {
+       start_of_thread ("J2KEncoder");
+
        if (server) {
                LOG_TIMING ("start-encoder-thread thread=%1 server=%2", thread_id (), server->host_name ());
        } else {
@@ -292,7 +302,7 @@ try
                }
 
                LOG_TIMING ("encoder-wake thread=%1 queue=%2", thread_id(), _queue.size());
-               shared_ptr<DCPVideo> vf = _queue.front ();
+               auto vf = _queue.front ();
 
                /* We're about to commit to either encoding this frame or putting it back onto the queue,
                   so we must not be interrupted until one or other of these things have happened.  This
@@ -301,7 +311,7 @@ try
                {
                        boost::this_thread::disable_interruption dis;
 
-                       LOG_TIMING ("encoder-pop thread=%1 frame=%2 eyes=%3", thread_id(), vf->index(), (int) vf->eyes ());
+                       LOG_TIMING ("encoder-pop thread=%1 frame=%2 eyes=%3", thread_id(), vf.index(), static_cast<int>(vf.eyes()));
                        _queue.pop_front ();
 
                        lock.unlock ();
@@ -311,7 +321,7 @@ try
                        /* We need to encode this input */
                        if (server) {
                                try {
-                                       encoded.reset(new dcp::ArrayData(vf->encode_remotely(server.get())));
+                                       encoded = make_shared<dcp::ArrayData>(vf.encode_remotely(server.get()));
 
                                        if (remote_backoff > 0) {
                                                LOG_GENERAL ("%1 was lost, but now she is found; removing backoff", server->host_name ());
@@ -327,15 +337,15 @@ try
                                        }
                                        LOG_ERROR (
                                                N_("Remote encode of %1 on %2 failed (%3); thread sleeping for %4s"),
-                                               vf->index(), server->host_name(), e.what(), remote_backoff
+                                               vf.index(), server->host_name(), e.what(), remote_backoff
                                                );
                                }
 
                        } else {
                                try {
-                                       LOG_TIMING ("start-local-encode thread=%1 frame=%2", thread_id(), vf->index());
-                                       encoded.reset(new dcp::ArrayData(vf->encode_locally()));
-                                       LOG_TIMING ("finish-local-encode thread=%1 frame=%2", thread_id(), vf->index());
+                                       LOG_TIMING ("start-local-encode thread=%1 frame=%2", thread_id(), vf.index());
+                                       encoded = make_shared<dcp::ArrayData>(vf.encode_locally());
+                                       LOG_TIMING ("finish-local-encode thread=%1 frame=%2", thread_id(), vf.index());
                                } catch (std::exception& e) {
                                        /* This is very bad, so don't cope with it, just pass it on */
                                        LOG_ERROR (N_("Local encode failed (%1)"), e.what ());
@@ -344,11 +354,11 @@ try
                        }
 
                        if (encoded) {
-                               _writer->write (encoded, vf->index(), vf->eyes());
+                               _writer.write(encoded, vf.index(), vf.eyes());
                                frame_done ();
                        } else {
                                lock.lock ();
-                               LOG_GENERAL (N_("[%1] J2KEncoder thread pushes frame %2 back onto queue after failure"), thread_id(), vf->index());
+                               LOG_GENERAL (N_("[%1] J2KEncoder thread pushes frame %2 back onto queue after failure"), thread_id(), vf.index());
                                _queue.push_front (vf);
                                lock.unlock ();
                        }
@@ -374,20 +384,21 @@ catch (...)
        _full_condition.notify_all ();
 }
 
+
 void
 J2KEncoder::servers_list_changed ()
 {
        boost::mutex::scoped_lock lm (_threads_mutex);
 
        terminate_threads ();
-       _threads.reset (new boost::thread_group());
+       _threads = make_shared<boost::thread_group>();
 
        /* XXX: could re-use threads */
 
        if (!Config::instance()->only_servers_encode ()) {
                for (int i = 0; i < Config::instance()->master_encoding_threads (); ++i) {
 #ifdef DCPOMATIC_LINUX
-                       boost::thread* t = _threads->create_thread(boost::bind(&J2KEncoder::encoder_thread, this, optional<EncodeServerDescription>()));
+                       auto t = _threads->create_thread(boost::bind(&J2KEncoder::encoder_thread, this, optional<EncodeServerDescription>()));
                        pthread_setname_np (t->native_handle(), "encode-worker");
 #else
                        _threads->create_thread(boost::bind(&J2KEncoder::encoder_thread, this, optional<EncodeServerDescription>()));
@@ -395,7 +406,7 @@ J2KEncoder::servers_list_changed ()
                }
        }
 
-       BOOST_FOREACH (EncodeServerDescription i, EncodeServerFinder::instance()->servers()) {
+       for (auto i: EncodeServerFinder::instance()->servers()) {
                if (!i.current_link_version()) {
                        continue;
                }
@@ -406,5 +417,5 @@ J2KEncoder::servers_list_changed ()
                }
        }
 
-       _writer->set_encoder_threads (_threads->size());
+       _writer.set_encoder_threads(_threads->size());
 }