Add memory buffer allocated by cuda and XYZ->RGB conversion;
authorCarl Hetherington <cth@carlh.net>
Wed, 12 Aug 2020 19:42:37 +0000 (21:42 +0200)
committerCarl Hetherington <cth@carlh.net>
Sun, 13 Sep 2020 18:22:44 +0000 (20:22 +0200)
slower than previous.

src/lib/fastvideo_player_video_preparer.cc
src/lib/fastvideo_player_video_preparer.h

index 4e0f267ab72dc9c41d3ceda877fd24429302d4e0..6e092c0608610335142185c3ec98e9358d44368b 100644 (file)
@@ -6,11 +6,13 @@
 #include "j2k_image_proxy.h"
 #include "player_video.h"
 #include "timer.h"
+#include <cuda_runtime.h>
 #include <fastvideo_decoder_j2k.h>
 #include <fastvideo_sdk.h>
 #include <boost/bind.hpp>
 
 
+using std::bad_alloc;
 using boost::bind;
 using boost::const_pointer_cast;
 using boost::dynamic_pointer_cast;
@@ -22,6 +24,7 @@ FastvideoPlayerVideoPreparer::FastvideoPlayerVideoPreparer (boost::function<AVPi
        : _stop_thread (false)
        , _decoder (0)
        , _setup_done (false)
+       , _decoded (0)
        , _cpu (pixel_format, aligned, fast)
 {
        fastSdkParametersHandle_t sdk_parameters;
@@ -49,6 +52,7 @@ FastvideoPlayerVideoPreparer::~FastvideoPlayerVideoPreparer ()
        if (_setup_done) {
                fastDecoderJ2kDestroy(_decoder);
                fastExportToHostDestroy(_adapter);
+               cudaFreeHost (_decoded);
        }
 }
 
@@ -142,15 +146,29 @@ FastvideoPlayerVideoPreparer::transform_and_extract ()
                        /* XXX: this should be memlocked or whatever fastMalloc does */
                        shared_ptr<J2KImageProxy> proxy = const_pointer_cast<J2KImageProxy>(dynamic_pointer_cast<const J2KImageProxy>(pv->image_proxy()));
                        DCPOMATIC_ASSERT (proxy);
-                       dcp::Size const size = proxy->size();
-                       shared_ptr<dcpomatic::Image> image(new dcpomatic::Image(AV_PIX_FMT_RGB24, size, true));
                        fastExportParameters_t export_parameters;
                        export_parameters.convert = FAST_CONVERT_NONE;
-                       fastStatus_t r = fastExportToHostCopy(_adapter, image->data()[0], size.width, image->stride()[0], size.height, &export_parameters);
+                       dcp::Size const size = proxy->size();
+                       fastStatus_t r = fastExportToHostCopy(_adapter, _decoded, size.width, _decoded_stride, size.height, &export_parameters);
                        if (r != FAST_OK) {
                                throw FastvideoError ("ExportToHostCopy", r);
                        }
 
+                       shared_ptr<dcpomatic::Image> image(new dcpomatic::Image(AV_PIX_FMT_XYZ12LE, size, true));
+                       uint8_t* from = reinterpret_cast<uint8_t*>(_decoded);
+                       uint16_t* to = reinterpret_cast<uint16_t*>(image->data()[0]);
+                       for (int y = 0; y < size.height; ++y) {
+                               uint8_t* from_p = from;
+                               uint16_t* to_p = to;
+                               for (int x = 0; x < size.width; ++x) {
+                                       *to_p++ = uint16_t(*from_p++) << 8;
+                                       *to_p++ = uint16_t(*from_p++) << 8;
+                                       *to_p++ = uint16_t(*from_p++) << 8;
+                               }
+                               from += _decoded_stride;
+                               to += image->stride()[0] / 2;
+                       }
+
                        timestamped_printf("fv sets image for %d\n", pv->time.frames_round(24));
                        proxy->set_image (image);
 
@@ -222,4 +240,11 @@ FastvideoPlayerVideoPreparer::setup (dcp::Data sample)
        if (r != FAST_OK) {
                throw FastvideoError ("ExportToHostCreate");
        }
+
+       _decoded_stride = info.width * 3;
+       _decoded_stride += 32 - (_decoded_stride % 32);
+       cudaError e = cudaMallocHost (&_decoded, _decoded_stride * info.height);
+       if (e != cudaSuccess) {
+               throw bad_alloc ();
+       }
 }
index 384d8edebb2d4cf63c3c553c8de784ca8c6e0742..753af6cbd4cd8c91b6affa8458b46366920b1562 100644 (file)
@@ -40,6 +40,8 @@ private:
        bool _setup_done;
 
        std::vector<boost::weak_ptr<PlayerVideo> > _batch;
+       void* _decoded;
+       int _decoded_stride;
 
        CPUPlayerVideoPreparer _cpu;