Add pixel format 66 (AV_PIX_FMT_YUV422P10LE) to make_part_black().

[dcpomatic.git] / src / lib / image.cc
diff --git a/src/lib/image.cc b/src/lib/image.cc

index 46c085ff2cf17943b6bd3b2b11d90779fefdfefc..63ae34ce96e5d58e411749baab3001f93b7264ec 100644 (file)
--- a/src/lib/image.cc
+++ b/src/lib/image.cc
@@ -1,5 +1,5 @@
  /*
-    Copyright (C) 2012-2016 Carl Hetherington <cth@carlh.net>
+    Copyright (C) 2012-2021 Carl Hetherington <cth@carlh.net>
  
      This file is part of DCP-o-matic.
  
@@ -18,24 +18,26 @@
  
  */
  
+
  /** @file src/image.cc
   *  @brief A class to describe a video image.
   */
  
-#include "image.h"
+
+#include "compose.hpp"
+#include "dcpomatic_socket.h"
  #include "exceptions.h"
-#include "timer.h"
+#include "image.h"
  #include "rect.h"
+#include "timer.h"
  #include "util.h"
-#include "compose.hpp"
-#include "dcpomatic_socket.h"
  #include <dcp/rgb_xyz.h>
  #include <dcp/transfer_function.h>
  extern "C" {
-#include <libswscale/swscale.h>
-#include <libavutil/pixfmt.h>
-#include <libavutil/pixdesc.h>
  #include <libavutil/frame.h>
+#include <libavutil/pixdesc.h>
+#include <libavutil/pixfmt.h>
+#include <libswscale/swscale.h>
  }
  #include <png.h>
  #if HAVE_VALGRIND_MEMCHECK_H
@@ -43,18 +45,35 @@ extern "C" {
  #endif
  #include <iostream>
  
+
  #include "i18n.h"
  
-using std::string;
-using std::min;
-using std::max;
-using std::cout;
+
  using std::cerr;
+using std::cout;
  using std::list;
+using std::make_shared;
+using std::max;
+using std::min;
  using std::runtime_error;
-using boost::shared_ptr;
+using std::shared_ptr;
+using std::string;
  using dcp::Size;
  
+
+/** The memory alignment, in bytes, used for each row of an image if aligment is requested */
+#define ALIGNMENT 64
+
+/* U/V black value for 8-bit colour */
+static uint8_t const eight_bit_uv =    (1 << 7) - 1;
+/* U/V black value for 9-bit colour */
+static uint16_t const nine_bit_uv =    (1 << 8) - 1;
+/* U/V black value for 10-bit colour */
+static uint16_t const ten_bit_uv =     (1 << 9) - 1;
+/* U/V black value for 16-bit colour */
+static uint16_t const sixteen_bit_uv = (1 << 15) - 1;
+
+
  int
  Image::vertical_factor (int n) const
  {
@@ -62,12 +81,12 @@ Image::vertical_factor (int n) const
                 return 1;
         }
  
-       AVPixFmtDescriptor const * d = av_pix_fmt_desc_get(_pixel_format);
+       auto d = av_pix_fmt_desc_get(_pixel_format);
         if (!d) {
                 throw PixelFormatError ("line_factor()", _pixel_format);
         }
  
-       return pow (2.0f, d->log2_chroma_h);
+       return lrintf(powf(2.0f, d->log2_chroma_h));
  }
  
  int
@@ -77,12 +96,12 @@ Image::horizontal_factor (int n) const
                 return 1;
         }
  
-       AVPixFmtDescriptor const * d = av_pix_fmt_desc_get(_pixel_format);
+       auto d = av_pix_fmt_desc_get(_pixel_format);
         if (!d) {
                 throw PixelFormatError ("sample_size()", _pixel_format);
         }
  
-       return pow (2.0f, d->log2_chroma_w);
+       return lrintf(powf(2.0f, d->log2_chroma_w));
  }
  
  /** @param n Component index.
@@ -92,8 +111,8 @@ dcp::Size
  Image::sample_size (int n) const
  {
         return dcp::Size (
-               lrint (ceil (static_cast<double>(size().width) / horizontal_factor (n))),
-               lrint (ceil (static_cast<double>(size().height) / vertical_factor (n)))
+               lrint (ceil(static_cast<double>(size().width) / horizontal_factor(n))),
+               lrint (ceil(static_cast<double>(size().height) / vertical_factor(n)))
                 );
  }
  
@@ -101,7 +120,7 @@ Image::sample_size (int n) const
  int
  Image::planes () const
  {
-       AVPixFmtDescriptor const * d = av_pix_fmt_desc_get(_pixel_format);
+       auto d = av_pix_fmt_desc_get(_pixel_format);
         if (!d) {
                 throw PixelFormatError ("planes()", _pixel_format);
         }
@@ -117,19 +136,46 @@ Image::planes () const
         return d->nb_components;
  }
  
+
+static
+int
+round_width_for_subsampling (int p, AVPixFmtDescriptor const * desc)
+{
+       return p & ~ ((1 << desc->log2_chroma_w) - 1);
+}
+
+
+static
+int
+round_height_for_subsampling (int p, AVPixFmtDescriptor const * desc)
+{
+       return p & ~ ((1 << desc->log2_chroma_h) - 1);
+}
+
+
  /** Crop this image, scale it to `inter_size' and then place it in a black frame of `out_size'.
   *  @param crop Amount to crop by.
   *  @param inter_size Size to scale the cropped image to.
   *  @param out_size Size of output frame; if this is larger than inter_size there will be black padding.
   *  @param yuv_to_rgb YUV to RGB transformation to use, if required.
+ *  @param video_range Video range of the image.
   *  @param out_format Output pixel format.
   *  @param out_aligned true to make the output image aligned.
+ *  @param out_video_range Video range to use for the output image.
   *  @param fast Try to be fast at the possible expense of quality; at present this means using
   *  fast bilinear rather than bicubic scaling.
   */
  shared_ptr<Image>
  Image::crop_scale_window (
-       Crop crop, dcp::Size inter_size, dcp::Size out_size, dcp::YUVToRGB yuv_to_rgb, AVPixelFormat out_format, bool out_aligned, bool fast
+       Crop crop,
+       dcp::Size inter_size,
+       dcp::Size out_size,
+       dcp::YUVToRGB yuv_to_rgb,
+       VideoRange video_range,
+       AVPixelFormat out_format,
+       VideoRange out_video_range,
+       bool out_aligned,
+       bool fast
         ) const
  {
         /* Empirical testing suggests that sws_scale() will crash if
@@ -140,36 +186,40 @@ Image::crop_scale_window (
         DCPOMATIC_ASSERT (out_size.width >= inter_size.width);
         DCPOMATIC_ASSERT (out_size.height >= inter_size.height);
  
-       /* Here's an image of out_size.  Below we may write to it starting at an offset so we get some padding.
-          Hence we want to write in the following pattern:
-
-          block start   write start                                  line end
-          |..(padding)..|<------line-size------------->|..(padding)..|
-          |..(padding)..|<------line-size------------->|..(padding)..|
-          |..(padding)..|<------line-size------------->|..(padding)..|
+       auto out = make_shared<Image>(out_format, out_size, out_aligned);
+       out->make_black ();
  
-          where line-size is of the smaller (inter_size) image and the full padded line length is that of
-          out_size.  To get things to work we have to tell FFmpeg that the stride is that of out_size.
-          However some parts of FFmpeg (notably rgb48Toxyz12 in swscale.c) process data for the full
-          specified *stride*.  This does not matter until we get to the last line:
+       auto in_desc = av_pix_fmt_desc_get (_pixel_format);
+       if (!in_desc) {
+               throw PixelFormatError ("crop_scale_window()", _pixel_format);
+       }
  
-          block start   write start                                  line end
-          |..(padding)..|<------line-size------------->|XXXwrittenXXX|
-          |XXXwrittenXXX|<------line-size------------->|XXXwrittenXXX|
-          |XXXwrittenXXX|<------line-size------------->|XXXwrittenXXXXXXwrittenXXX
-                                                                      ^^^^ out of bounds
+       /* Round down so that we crop only the number of pixels that is straightforward
+        * considering any subsampling.
+        */
+       Crop corrected_crop(
+               round_width_for_subsampling(crop.left, in_desc),
+               round_width_for_subsampling(crop.right, in_desc),
+               round_height_for_subsampling(crop.top, in_desc),
+               round_height_for_subsampling(crop.bottom, in_desc)
+               );
  
-          To get around this, we ask Image to overallocate its buffers by the overrun.
-       */
+       /* Also check that we aren't cropping more image than there actually is */
+       if ((corrected_crop.left + corrected_crop.right) >= (size().width - 4)) {
+               corrected_crop.left = 0;
+               corrected_crop.right = size().width - 4;
+       }
  
-       shared_ptr<Image> out (new Image (out_format, out_size, out_aligned, (out_size.width - inter_size.width) / 2));
-       out->make_black ();
+       if ((corrected_crop.top + corrected_crop.bottom) >= (size().height - 4)) {
+               corrected_crop.top = 0;
+               corrected_crop.bottom = size().height - 4;
+       }
  
         /* Size of the image after any crop */
-       dcp::Size const cropped_size = crop.apply (size ());
+       auto const cropped_size = corrected_crop.apply (size());
  
         /* Scale context for a scale from cropped_size to inter_size */
-       struct SwsContext* scale_context = sws_getContext (
+       auto scale_context = sws_getContext (
                         cropped_size.width, cropped_size.height, pixel_format(),
                         inter_size.width, inter_size.height, out_format,
                         fast ? SWS_FAST_BILINEAR : SWS_BICUBIC, 0, 0, 0
@@ -179,8 +229,8 @@ Image::crop_scale_window (
                 throw runtime_error (N_("Could not allocate SwsContext"));
         }
  
-       DCPOMATIC_ASSERT (yuv_to_rgb < dcp::YUV_TO_RGB_COUNT);
-       int const lut[dcp::YUV_TO_RGB_COUNT] = {
+       DCPOMATIC_ASSERT (yuv_to_rgb < dcp::YUVToRGB::COUNT);
+       int const lut[static_cast<int>(dcp::YUVToRGB::COUNT)] = {
                 SWS_CS_ITU601,
                 SWS_CS_ITU709
         };
@@ -192,47 +242,38 @@ Image::crop_scale_window (
            0 -> destination range MPEG (i.e. "video", 16-235)
            1 -> destination range JPEG (i.e. "full", 0-255)
  
-          But remember: sws_setColorspaceDetails ignores
-          these parameters unless the image isYUV or isGray
-          (if it's neither, it uses video range for source
-          and destination).
+          But remember: sws_setColorspaceDetails ignores these
+          parameters unless the both source and destination images
+          are isYUV or isGray.  (If either is not, it uses video range).
         */
         sws_setColorspaceDetails (
                 scale_context,
-               sws_getCoefficients (lut[yuv_to_rgb]), 0,
-               sws_getCoefficients (lut[yuv_to_rgb]), 0,
+               sws_getCoefficients (lut[static_cast<int>(yuv_to_rgb)]), video_range == VideoRange::VIDEO ? 0 : 1,
+               sws_getCoefficients (lut[static_cast<int>(yuv_to_rgb)]), out_video_range == VideoRange::VIDEO ? 0 : 1,
                 0, 1 << 16, 1 << 16
                 );
  
-       AVPixFmtDescriptor const * in_desc = av_pix_fmt_desc_get (_pixel_format);
-       if (!in_desc) {
-               throw PixelFormatError ("crop_scale_window()", _pixel_format);
-       }
-
         /* Prepare input data pointers with crop */
         uint8_t* scale_in_data[planes()];
         for (int c = 0; c < planes(); ++c) {
-               /* To work out the crop in bytes, start by multiplying
-                  the crop by the (average) bytes per pixel.  Then
-                  round down so that we don't crop a subsampled pixel until
-                  we've cropped all of its Y-channel pixels.
-               */
-               int const x = lrintf (bytes_per_pixel(c) * crop.left) & ~ ((int) in_desc->log2_chroma_w);
-               scale_in_data[c] = data()[c] + x + stride()[c] * (crop.top / vertical_factor(c));
+               int const x = lrintf(bytes_per_pixel(c) * corrected_crop.left);
+               scale_in_data[c] = data()[c] + x + stride()[c] * (corrected_crop.top / vertical_factor(c));
         }
  
-       /* Corner of the image within out_size */
-       Position<int> const corner ((out_size.width - inter_size.width) / 2, (out_size.height - inter_size.height) / 2);
-
-       AVPixFmtDescriptor const * out_desc = av_pix_fmt_desc_get (out_format);
+       auto out_desc = av_pix_fmt_desc_get (out_format);
         if (!out_desc) {
                 throw PixelFormatError ("crop_scale_window()", out_format);
         }
  
+       /* Corner of the image within out_size */
+       Position<int> const corner (
+               round_width_for_subsampling((out_size.width - inter_size.width) / 2, out_desc),
+               round_height_for_subsampling((out_size.height - inter_size.height) / 2, out_desc)
+               );
+
         uint8_t* scale_out_data[out->planes()];
         for (int c = 0; c < out->planes(); ++c) {
-               /* See the note in the crop loop above */
-               int const x = lrintf (out->bytes_per_pixel(c) * corner.x) & ~ ((int) out_desc->log2_chroma_w);
+               int const x = lrintf(out->bytes_per_pixel(c) * corner.x);
                 scale_out_data[c] = out->data()[c] + x + out->stride()[c] * (corner.y / out->vertical_factor(c));
         }
  
@@ -245,6 +286,14 @@ Image::crop_scale_window (
  
         sws_freeContext (scale_context);
  
+       if (corrected_crop != Crop() && cropped_size == inter_size) {
+               /* We are cropping without any scaling or pixel format conversion, so FFmpeg may have left some
+                  data behind in our image.  Clear it out.  It may get to the point where we should just stop
+                  trying to be clever with cropping.
+               */
+               out->make_part_black (corner.x + cropped_size.width, out_size.width - cropped_size.width);
+       }
+
         return out;
  }
  
@@ -269,16 +318,15 @@ Image::scale (dcp::Size out_size, dcp::YUVToRGB yuv_to_rgb, AVPixelFormat out_fo
         */
         DCPOMATIC_ASSERT (aligned ());
  
-       shared_ptr<Image> scaled (new Image (out_format, out_size, out_aligned));
-
-       struct SwsContext* scale_context = sws_getContext (
+       auto scaled = make_shared<Image>(out_format, out_size, out_aligned);
+       auto scale_context = sws_getContext (
                 size().width, size().height, pixel_format(),
                 out_size.width, out_size.height, out_format,
                 (fast ? SWS_FAST_BILINEAR : SWS_BICUBIC) | SWS_ACCURATE_RND, 0, 0, 0
                 );
  
-       DCPOMATIC_ASSERT (yuv_to_rgb < dcp::YUV_TO_RGB_COUNT);
-       int const lut[dcp::YUV_TO_RGB_COUNT] = {
+       DCPOMATIC_ASSERT (yuv_to_rgb < dcp::YUVToRGB::COUNT);
+       int const lut[static_cast<int>(dcp::YUVToRGB::COUNT)] = {
                 SWS_CS_ITU601,
                 SWS_CS_ITU709
         };
@@ -290,15 +338,14 @@ Image::scale (dcp::Size out_size, dcp::YUVToRGB yuv_to_rgb, AVPixelFormat out_fo
            0 -> destination range MPEG (i.e. "video", 16-235)
            1 -> destination range JPEG (i.e. "full", 0-255)
  
-          But remember: sws_setColorspaceDetails ignores
-          these parameters unless the image isYUV or isGray
-          (if it's neither, it uses video range for source
-          and destination).
+          But remember: sws_setColorspaceDetails ignores these
+          parameters unless the corresponding image isYUV or isGray.
+          (If it's neither, it uses video range).
         */
         sws_setColorspaceDetails (
                 scale_context,
-               sws_getCoefficients (lut[yuv_to_rgb]), 0,
-               sws_getCoefficients (lut[yuv_to_rgb]), 0,
+               sws_getCoefficients (lut[static_cast<int>(yuv_to_rgb)]), 0,
+               sws_getCoefficients (lut[static_cast<int>(yuv_to_rgb)]), 0,
                 0, 1 << 16, 1 << 16
                 );
  
@@ -320,7 +367,7 @@ Image::yuv_16_black (uint16_t v, bool alpha)
  {
         memset (data()[0], 0, sample_size(0).height * stride()[0]);
         for (int i = 1; i < 3; ++i) {
-               int16_t* p = reinterpret_cast<int16_t*> (data()[i]);
+               auto p = reinterpret_cast<int16_t*> (data()[i]);
                 int const lines = sample_size(i).height;
                 for (int y = 0; y < lines; ++y) {
                         /* We divide by 2 here because we are writing 2 bytes at a time */
@@ -343,17 +390,59 @@ Image::swap_16 (uint16_t v)
  }
  
  void
-Image::make_black ()
+Image::make_part_black (int const start, int const width)
  {
-       /* U/V black value for 8-bit colour */
-       static uint8_t const eight_bit_uv =     (1 << 7) - 1;
-       /* U/V black value for 9-bit colour */
-       static uint16_t const nine_bit_uv =     (1 << 8) - 1;
-       /* U/V black value for 10-bit colour */
-       static uint16_t const ten_bit_uv =      (1 << 9) - 1;
-       /* U/V black value for 16-bit colour */
-       static uint16_t const sixteen_bit_uv =  (1 << 15) - 1;
+       switch (_pixel_format) {
+       case AV_PIX_FMT_RGB24:
+       case AV_PIX_FMT_ARGB:
+       case AV_PIX_FMT_RGBA:
+       case AV_PIX_FMT_ABGR:
+       case AV_PIX_FMT_BGRA:
+       case AV_PIX_FMT_RGB555LE:
+       case AV_PIX_FMT_RGB48LE:
+       case AV_PIX_FMT_RGB48BE:
+       case AV_PIX_FMT_XYZ12LE:
+       {
+               int const h = sample_size(0).height;
+               int const bpp = bytes_per_pixel(0);
+               int const s = stride()[0];
+               uint8_t* p = data()[0];
+               for (int y = 0; y < h; y++) {
+                       memset (p + start * bpp, 0, width * bpp);
+                       p += s;
+               }
+               break;
+       }
+       case AV_PIX_FMT_YUV422P10LE:
+       {
+               int const bpp_0 = bytes_per_pixel(0);
+               int const h_0 = sample_size(0).height;
+               int const stride_0 = stride()[0];
+               auto p = data()[0];
+               for (int y = 0; y < h_0; ++y) {
+                       memset (p + start * bpp_0, 0xff, width * bpp_0);
+                       p += stride_0;
+               }
+               for (int i = 1; i < 3; ++i) {
+                       auto p = reinterpret_cast<int16_t*>(data()[i]);
+                       int const lines = sample_size(i).height;
+                       for (int y = 0; y < lines; ++y) {
+                               for (int x = start / 2; x < (start + width) / 2; ++x) {
+                                       p[x] = ten_bit_uv;
+                               }
+                               p += stride()[i] / 2;
+                       }
+               }
+               break;
+       }
+       default:
+               throw PixelFormatError ("make_part_black()", _pixel_format);
+       }
+}
  
+void
+Image::make_black ()
+{
         switch (_pixel_format) {
         case AV_PIX_FMT_YUV420P:
         case AV_PIX_FMT_YUV422P:
@@ -469,7 +558,7 @@ Image::make_black ()
  void
  Image::make_transparent ()
  {
-       if (_pixel_format != AV_PIX_FMT_BGRA) {
+       if (_pixel_format != AV_PIX_FMT_BGRA && _pixel_format != AV_PIX_FMT_RGBA) {
                 throw PixelFormatError ("make_transparent()", _pixel_format);
         }
  
@@ -479,8 +568,11 @@ Image::make_transparent ()
  void
  Image::alpha_blend (shared_ptr<const Image> other, Position<int> position)
  {
-       /* We're blending BGRA images; first byte is blue, second byte is green, third byte red, fourth byte alpha */
-       DCPOMATIC_ASSERT (other->pixel_format() == AV_PIX_FMT_BGRA);
+       /* We're blending RGBA or BGRA images */
+       DCPOMATIC_ASSERT (other->pixel_format() == AV_PIX_FMT_BGRA || other->pixel_format() == AV_PIX_FMT_RGBA);
+       int const blue = other->pixel_format() == AV_PIX_FMT_BGRA ? 0 : 2;
+       int const red = other->pixel_format() == AV_PIX_FMT_BGRA ? 2 : 0;
+
         int const other_bpp = 4;
  
         int start_tx = position.x;
@@ -509,9 +601,9 @@ Image::alpha_blend (shared_ptr<const Image> other, Position<int> position)
                         uint8_t* op = other->data()[0] + oy * other->stride()[0];
                         for (int tx = start_tx, ox = start_ox; tx < size().width && ox < other->size().width; ++tx, ++ox) {
                                 float const alpha = float (op[3]) / 255;
-                               tp[0] = op[2] * alpha + tp[0] * (1 - alpha);
+                               tp[0] = op[red] * alpha + tp[0] * (1 - alpha);
                                 tp[1] = op[1] * alpha + tp[1] * (1 - alpha);
-                               tp[2] = op[0] * alpha + tp[2] * (1 - alpha);
+                               tp[2] = op[blue] * alpha + tp[2] * (1 - alpha);
  
                                 tp += this_bpp;
                                 op += other_bpp;
@@ -527,9 +619,9 @@ Image::alpha_blend (shared_ptr<const Image> other, Position<int> position)
                         uint8_t* op = other->data()[0] + oy * other->stride()[0];
                         for (int tx = start_tx, ox = start_ox; tx < size().width && ox < other->size().width; ++tx, ++ox) {
                                 float const alpha = float (op[3]) / 255;
-                               tp[0] = op[0] * alpha + tp[0] * (1 - alpha);
+                               tp[0] = op[blue] * alpha + tp[0] * (1 - alpha);
                                 tp[1] = op[1] * alpha + tp[1] * (1 - alpha);
-                               tp[2] = op[2] * alpha + tp[2] * (1 - alpha);
+                               tp[2] = op[red] * alpha + tp[2] * (1 - alpha);
                                 tp[3] = op[3] * alpha + tp[3] * (1 - alpha);
  
                                 tp += this_bpp;
@@ -546,9 +638,9 @@ Image::alpha_blend (shared_ptr<const Image> other, Position<int> position)
                         uint8_t* op = other->data()[0] + oy * other->stride()[0];
                         for (int tx = start_tx, ox = start_ox; tx < size().width && ox < other->size().width; ++tx, ++ox) {
                                 float const alpha = float (op[3]) / 255;
-                               tp[0] = op[2] * alpha + tp[0] * (1 - alpha);
+                               tp[0] = op[red] * alpha + tp[0] * (1 - alpha);
                                 tp[1] = op[1] * alpha + tp[1] * (1 - alpha);
-                               tp[2] = op[0] * alpha + tp[2] * (1 - alpha);
+                               tp[2] = op[blue] * alpha + tp[2] * (1 - alpha);
                                 tp[3] = op[3] * alpha + tp[3] * (1 - alpha);
  
                                 tp += this_bpp;
@@ -566,9 +658,9 @@ Image::alpha_blend (shared_ptr<const Image> other, Position<int> position)
                         for (int tx = start_tx, ox = start_ox; tx < size().width && ox < other->size().width; ++tx, ++ox) {
                                 float const alpha = float (op[3]) / 255;
                                 /* Blend high bytes */
-                               tp[1] = op[2] * alpha + tp[1] * (1 - alpha);
+                               tp[1] = op[red] * alpha + tp[1] * (1 - alpha);
                                 tp[3] = op[1] * alpha + tp[3] * (1 - alpha);
-                               tp[5] = op[0] * alpha + tp[5] * (1 - alpha);
+                               tp[5] = op[blue] * alpha + tp[5] * (1 - alpha);
  
                                 tp += this_bpp;
                                 op += other_bpp;
@@ -578,7 +670,7 @@ Image::alpha_blend (shared_ptr<const Image> other, Position<int> position)
         }
         case AV_PIX_FMT_XYZ12LE:
         {
-               dcp::ColourConversion conv = dcp::ColourConversion::srgb_to_xyz();
+               auto conv = dcp::ColourConversion::srgb_to_xyz();
                 double fast_matrix[9];
                 dcp::combined_rgb_to_xyz (conv, fast_matrix);
                 double const * lut_in = conv.in()->lut (8, false);
@@ -591,9 +683,9 @@ Image::alpha_blend (shared_ptr<const Image> other, Position<int> position)
                                 float const alpha = float (op[3]) / 255;
  
                                 /* Convert sRGB to XYZ; op is BGRA.  First, input gamma LUT */
-                               double const r = lut_in[op[2]];
+                               double const r = lut_in[op[red]];
                                 double const g = lut_in[op[1]];
-                               double const b = lut_in[op[0]];
+                               double const b = lut_in[op[blue]];
  
                                 /* RGB to XYZ, including Bradford transform and DCI companding */
                                 double const x = max (0.0, min (65535.0, r * fast_matrix[0] + g * fast_matrix[1] + b * fast_matrix[2]));
@@ -613,7 +705,7 @@ Image::alpha_blend (shared_ptr<const Image> other, Position<int> position)
         }
         case AV_PIX_FMT_YUV420P:
         {
-               shared_ptr<Image> yuv = other->convert_pixel_format (dcp::YUV_TO_RGB_REC709, _pixel_format, false, false);
+               auto yuv = other->convert_pixel_format (dcp::YUVToRGB::REC709, _pixel_format, false, false);
                 dcp::Size const ts = size();
                 dcp::Size const os = yuv->size();
                 for (int ty = start_ty, oy = start_oy; ty < ts.height && oy < os.height; ++ty, ++oy) {
@@ -648,7 +740,7 @@ Image::alpha_blend (shared_ptr<const Image> other, Position<int> position)
         }
         case AV_PIX_FMT_YUV420P10:
         {
-               shared_ptr<Image> yuv = other->convert_pixel_format (dcp::YUV_TO_RGB_REC709, _pixel_format, false, false);
+               auto yuv = other->convert_pixel_format (dcp::YUVToRGB::REC709, _pixel_format, false, false);
                 dcp::Size const ts = size();
                 dcp::Size const os = yuv->size();
                 for (int ty = start_ty, oy = start_oy; ty < ts.height && oy < os.height; ++ty, ++oy) {
@@ -683,7 +775,7 @@ Image::alpha_blend (shared_ptr<const Image> other, Position<int> position)
         }
         case AV_PIX_FMT_YUV422P10LE:
         {
-               shared_ptr<Image> yuv = other->convert_pixel_format (dcp::YUV_TO_RGB_REC709, _pixel_format, false, false);
+               auto yuv = other->convert_pixel_format (dcp::YUVToRGB::REC709, _pixel_format, false, false);
                 dcp::Size const ts = size();
                 dcp::Size const os = yuv->size();
                 for (int ty = start_ty, oy = start_oy; ty < ts.height && oy < os.height; ++ty, ++oy) {
@@ -763,7 +855,7 @@ Image::write_to_socket (shared_ptr<Socket> socket) const
  float
  Image::bytes_per_pixel (int c) const
  {
-       AVPixFmtDescriptor const * d = av_pix_fmt_desc_get(_pixel_format);
+       auto d = av_pix_fmt_desc_get(_pixel_format);
         if (!d) {
                 throw PixelFormatError ("bytes_per_pixel()", _pixel_format);
         }
@@ -811,14 +903,12 @@ Image::bytes_per_pixel (int c) const
   *
   *  @param p Pixel format.
   *  @param s Size in pixels.
- *  @param aligned true to make each row of this image aligned to a 32-byte boundary.
- *  @param extra_pixels Amount of extra "run-off" memory to allocate at the end of each plane in pixels.
+ *  @param aligned true to make each row of this image aligned to a ALIGNMENT-byte boundary.
   */
-Image::Image (AVPixelFormat p, dcp::Size s, bool aligned, int extra_pixels)
+Image::Image (AVPixelFormat p, dcp::Size s, bool aligned)
         : _size (s)
         , _pixel_format (p)
         , _aligned (aligned)
-       , _extra_pixels (extra_pixels)
  {
         allocate ();
  }
@@ -837,7 +927,7 @@ Image::allocate ()
  
         for (int i = 0; i < planes(); ++i) {
                 _line_size[i] = ceil (_size.width * bytes_per_pixel(i));
-               _stride[i] = stride_round_up (i, _line_size, _aligned ? 32 : 1);
+               _stride[i] = stride_round_up (i, _line_size, _aligned ? ALIGNMENT : 1);
  
                 /* The assembler function ff_rgb24ToY_avx (in libswscale/x86/input.asm)
                    uses a 16-byte fetch to read three bytes (R/G/B) of image data.
@@ -850,25 +940,47 @@ Image::allocate ()
  
                    Further to the above, valgrind is now telling me that ff_rgb24ToY_ssse3
                    over-reads by more then _avx.  I can't follow the code to work out how much,
-                  so I'll just over-allocate by 32 bytes and have done with it.  Empirical
+                  so I'll just over-allocate by ALIGNMENT bytes and have done with it.  Empirical
                    testing suggests that it works.
+
+                  In addition to these concerns, we may read/write as much as a whole extra line
+                  at the end of each plane in cases where we are messing with offsets in order to
+                  do pad or crop.  To solve this we over-allocate by an extra _stride[i] bytes.
+
+                  As an example: we may write to images starting at an offset so we get some padding.
+                  Hence we want to write in the following pattern:
+
+                  block start   write start                                  line end
+                  |..(padding)..|<------line-size------------->|..(padding)..|
+                  |..(padding)..|<------line-size------------->|..(padding)..|
+                  |..(padding)..|<------line-size------------->|..(padding)..|
+
+                  where line-size is of the smaller (inter_size) image and the full padded line length is that of
+                  out_size.  To get things to work we have to tell FFmpeg that the stride is that of out_size.
+                  However some parts of FFmpeg (notably rgb48Toxyz12 in swscale.c) process data for the full
+                  specified *stride*.  This does not matter until we get to the last line:
+
+                  block start   write start                                  line end
+                  |..(padding)..|<------line-size------------->|XXXwrittenXXX|
+                  |XXXwrittenXXX|<------line-size------------->|XXXwrittenXXX|
+                  |XXXwrittenXXX|<------line-size------------->|XXXwrittenXXXXXXwrittenXXX
+                                                                              ^^^^ out of bounds
                 */
-               _data[i] = (uint8_t *) wrapped_av_malloc (_stride[i] * sample_size(i).height + _extra_pixels * bytes_per_pixel(i) + 32);
+               _data[i] = (uint8_t *) wrapped_av_malloc (_stride[i] * (sample_size(i).height + 1) + ALIGNMENT);
  #if HAVE_VALGRIND_MEMCHECK_H
                 /* The data between the end of the line size and the stride is undefined but processed by
                    libswscale, causing lots of valgrind errors.  Mark it all defined to quell these errors.
                 */
-               VALGRIND_MAKE_MEM_DEFINED (_data[i], _stride[i] * sample_size(i).height + _extra_pixels * bytes_per_pixel(i) + 32);
+               VALGRIND_MAKE_MEM_DEFINED (_data[i], _stride[i] * (sample_size(i).height + 1) + ALIGNMENT);
  #endif
         }
  }
  
  Image::Image (Image const & other)
-       : boost::enable_shared_from_this<Image>(other)
+       : std::enable_shared_from_this<Image>(other)
         , _size (other._size)
         , _pixel_format (other._pixel_format)
         , _aligned (other._aligned)
-       , _extra_pixels (other._extra_pixels)
  {
         allocate ();
  
@@ -886,10 +998,11 @@ Image::Image (Image const & other)
  
  Image::Image (AVFrame* frame)
         : _size (frame->width, frame->height)
-       , _pixel_format (static_cast<AVPixelFormat> (frame->format))
+       , _pixel_format (static_cast<AVPixelFormat>(frame->format))
         , _aligned (true)
-       , _extra_pixels (0)
  {
+       DCPOMATIC_ASSERT (_pixel_format != AV_PIX_FMT_NONE);
+
         allocate ();
  
         for (int i = 0; i < planes(); ++i) {
@@ -909,7 +1022,6 @@ Image::Image (shared_ptr<const Image> other, bool aligned)
         : _size (other->_size)
         , _pixel_format (other->_pixel_format)
         , _aligned (aligned)
-       , _extra_pixels (other->_extra_pixels)
  {
         allocate ();
  
@@ -951,10 +1063,8 @@ Image::swap (Image & other)
         }
  
         std::swap (_aligned, other._aligned);
-       std::swap (_extra_pixels, other._extra_pixels);
  }
  
-/** Destroy a Image */
  Image::~Image ()
  {
         for (int i = 0; i < planes(); ++i) {
@@ -996,11 +1106,12 @@ Image::aligned () const
         return _aligned;
  }
  
+
  PositionImage
  merge (list<PositionImage> images)
  {
         if (images.empty ()) {
-               return PositionImage ();
+               return {};
         }
  
         if (images.size() == 1) {
@@ -1008,19 +1119,20 @@ merge (list<PositionImage> images)
         }
  
         dcpomatic::Rect<int> all (images.front().position, images.front().image->size().width, images.front().image->size().height);
-       for (list<PositionImage>::const_iterator i = images.begin(); i != images.end(); ++i) {
-               all.extend (dcpomatic::Rect<int> (i->position, i->image->size().width, i->image->size().height));
+       for (auto const& i: images) {
+               all.extend (dcpomatic::Rect<int>(i.position, i.image->size().width, i.image->size().height));
         }
  
-       shared_ptr<Image> merged (new Image (images.front().image->pixel_format (), dcp::Size (all.width, all.height), true));
+       auto merged = make_shared<Image>(images.front().image->pixel_format(), dcp::Size(all.width, all.height), true);
         merged->make_transparent ();
-       for (list<PositionImage>::const_iterator i = images.begin(); i != images.end(); ++i) {
-               merged->alpha_blend (i->image, i->position - all.position());
+       for (auto const& i: images) {
+               merged->alpha_blend (i.image, i.position - all.position());
         }
  
         return PositionImage (merged, all.position ());
  }
  
+
  bool
  operator== (Image const & a, Image const & b)
  {
@@ -1055,49 +1167,61 @@ operator== (Image const & a, Image const & b)
  void
  Image::fade (float f)
  {
+       /* U/V black value for 8-bit colour */
+       static int const eight_bit_uv =    (1 << 7) - 1;
+       /* U/V black value for 10-bit colour */
+       static uint16_t const ten_bit_uv = (1 << 9) - 1;
+
         switch (_pixel_format) {
         case AV_PIX_FMT_YUV420P:
-       case AV_PIX_FMT_YUV422P:
-       case AV_PIX_FMT_YUV444P:
-       case AV_PIX_FMT_YUV411P:
-       case AV_PIX_FMT_YUVJ420P:
-       case AV_PIX_FMT_YUVJ422P:
-       case AV_PIX_FMT_YUVJ444P:
-       case AV_PIX_FMT_RGB24:
-       case AV_PIX_FMT_ARGB:
-       case AV_PIX_FMT_RGBA:
-       case AV_PIX_FMT_ABGR:
-       case AV_PIX_FMT_BGRA:
-       case AV_PIX_FMT_RGB555LE:
-               /* 8-bit */
-               for (int c = 0; c < 3; ++c) {
+       {
+               /* Y */
+               uint8_t* p = data()[0];
+               int const lines = sample_size(0).height;
+               for (int y = 0; y < lines; ++y) {
+                       uint8_t* q = p;
+                       for (int x = 0; x < line_size()[0]; ++x) {
+                               *q = int(float(*q) * f);
+                               ++q;
+                       }
+                       p += stride()[0];
+               }
+
+               /* U, V */
+               for (int c = 1; c < 3; ++c) {
                         uint8_t* p = data()[c];
                         int const lines = sample_size(c).height;
                         for (int y = 0; y < lines; ++y) {
                                 uint8_t* q = p;
                                 for (int x = 0; x < line_size()[c]; ++x) {
-                                       *q = int (float (*q) * f);
+                                       *q = eight_bit_uv + int((int(*q) - eight_bit_uv) * f);
                                         ++q;
                                 }
                                 p += stride()[c];
                         }
                 }
+
                 break;
+       }
+
+       case AV_PIX_FMT_RGB24:
+       {
+               /* 8-bit */
+               uint8_t* p = data()[0];
+               int const lines = sample_size(0).height;
+               for (int y = 0; y < lines; ++y) {
+                       uint8_t* q = p;
+                       for (int x = 0; x < line_size()[0]; ++x) {
+                               *q = int (float (*q) * f);
+                               ++q;
+                       }
+                       p += stride()[0];
+               }
+               break;
+       }
  
-       case AV_PIX_FMT_YUV422P9LE:
-       case AV_PIX_FMT_YUV444P9LE:
-       case AV_PIX_FMT_YUV422P10LE:
-       case AV_PIX_FMT_YUV444P10LE:
-       case AV_PIX_FMT_YUV422P16LE:
-       case AV_PIX_FMT_YUV444P16LE:
-       case AV_PIX_FMT_YUVA420P9LE:
-       case AV_PIX_FMT_YUVA422P9LE:
-       case AV_PIX_FMT_YUVA444P9LE:
-       case AV_PIX_FMT_YUVA420P10LE:
-       case AV_PIX_FMT_YUVA422P10LE:
-       case AV_PIX_FMT_YUVA444P10LE:
-       case AV_PIX_FMT_RGB48LE:
         case AV_PIX_FMT_XYZ12LE:
+       case AV_PIX_FMT_RGB48LE:
                 /* 16-bit little-endian */
                 for (int c = 0; c < 3; ++c) {
                         int const stride_pixels = stride()[c] / 2;
@@ -1115,22 +1239,26 @@ Image::fade (float f)
                 }
                 break;
  
-       case AV_PIX_FMT_YUV422P9BE:
-       case AV_PIX_FMT_YUV444P9BE:
-       case AV_PIX_FMT_YUV444P10BE:
-       case AV_PIX_FMT_YUV422P10BE:
-       case AV_PIX_FMT_YUVA420P9BE:
-       case AV_PIX_FMT_YUVA422P9BE:
-       case AV_PIX_FMT_YUVA444P9BE:
-       case AV_PIX_FMT_YUVA420P10BE:
-       case AV_PIX_FMT_YUVA422P10BE:
-       case AV_PIX_FMT_YUVA444P10BE:
-       case AV_PIX_FMT_YUVA420P16BE:
-       case AV_PIX_FMT_YUVA422P16BE:
-       case AV_PIX_FMT_YUVA444P16BE:
-       case AV_PIX_FMT_RGB48BE:
-               /* 16-bit big-endian */
-               for (int c = 0; c < 3; ++c) {
+       case AV_PIX_FMT_YUV422P10LE:
+       {
+               /* Y */
+               {
+                       int const stride_pixels = stride()[0] / 2;
+                       int const line_size_pixels = line_size()[0] / 2;
+                       uint16_t* p = reinterpret_cast<uint16_t*> (data()[0]);
+                       int const lines = sample_size(0).height;
+                       for (int y = 0; y < lines; ++y) {
+                               uint16_t* q = p;
+                               for (int x = 0; x < line_size_pixels; ++x) {
+                                       *q = int(float(*q) * f);
+                                       ++q;
+                               }
+                               p += stride_pixels;
+                       }
+               }
+
+               /* U, V */
+               for (int c = 1; c < 3; ++c) {
                         int const stride_pixels = stride()[c] / 2;
                         int const line_size_pixels = line_size()[c] / 2;
                         uint16_t* p = reinterpret_cast<uint16_t*> (data()[c]);
@@ -1138,7 +1266,7 @@ Image::fade (float f)
                         for (int y = 0; y < lines; ++y) {
                                 uint16_t* q = p;
                                 for (int x = 0; x < line_size_pixels; ++x) {
-                                       *q = swap_16 (int (float (swap_16 (*q)) * f));
+                                       *q = ten_bit_uv + int((int(*q) - ten_bit_uv) * f);
                                         ++q;
                                 }
                                 p += stride_pixels;
@@ -1146,18 +1274,6 @@ Image::fade (float f)
                 }
                 break;
  
-       case AV_PIX_FMT_UYVY422:
-       {
-               int const Y = sample_size(0).height;
-               int const X = line_size()[0];
-               uint8_t* p = data()[0];
-               for (int y = 0; y < Y; ++y) {
-                       for (int x = 0; x < X; ++x) {
-                               *p = int (float (*p) * f);
-                               ++p;
-                       }
-               }
-               break;
         }
  
         default:
@@ -1172,7 +1288,7 @@ Image::ensure_aligned (shared_ptr<const Image> image)
                 return image;
         }
  
-       return shared_ptr<Image> (new Image (image, true));
+       return make_shared<Image>(image, true);
  }
  
  size_t
@@ -1205,7 +1321,7 @@ public:
  static void
  png_write_data (png_structp png_ptr, png_bytep data, png_size_t length)
  {
-       Memory* mem = reinterpret_cast<Memory*>(png_get_io_ptr(png_ptr));
+       auto mem = reinterpret_cast<Memory*>(png_get_io_ptr(png_ptr));
         size_t size = mem->size + length;
  
         if (mem->data) {
@@ -1240,12 +1356,14 @@ Image::png_error (char const * message)
         throw EncodeError (String::compose ("Error during PNG write: %1", message));
  }
  
-dcp::Data
+dcp::ArrayData
  Image::as_png () const
  {
         DCPOMATIC_ASSERT (bytes_per_pixel(0) == 4);
         DCPOMATIC_ASSERT (planes() == 1);
-       DCPOMATIC_ASSERT (pixel_format() == AV_PIX_FMT_BGRA);
+       if (pixel_format() != AV_PIX_FMT_RGBA) {
+               return convert_pixel_format(dcp::YUVToRGB::REC709, AV_PIX_FMT_RGBA, true, false)->as_png();
+       }
  
         /* error handling? */
         png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, reinterpret_cast<void*>(const_cast<Image*>(this)), png_error_fn, 0);
@@ -1277,5 +1395,48 @@ Image::as_png () const
         png_destroy_write_struct (&png_ptr, &info_ptr);
         png_free (png_ptr, row_pointers);
  
-       return dcp::Data (state.data, state.size);
+       return dcp::ArrayData (state.data, state.size);
  }
+
+
+void
+Image::video_range_to_full_range ()
+{
+       switch (_pixel_format) {
+       case AV_PIX_FMT_RGB24:
+       {
+               float const factor = 256.0 / 219.0;
+               uint8_t* p = data()[0];
+               int const lines = sample_size(0).height;
+               for (int y = 0; y < lines; ++y) {
+                       uint8_t* q = p;
+                       for (int x = 0; x < line_size()[0]; ++x) {
+                               *q = int((*q - 16) * factor);
+                               ++q;
+                       }
+                       p += stride()[0];
+               }
+               break;
+       }
+       case AV_PIX_FMT_GBRP12LE:
+       {
+               float const factor = 4096.0 / 3504.0;
+               for (int c = 0; c < 3; ++c) {
+                       uint16_t* p = reinterpret_cast<uint16_t*>(data()[c]);
+                       int const lines = sample_size(c).height;
+                       for (int y = 0; y < lines; ++y) {
+                               uint16_t* q = p;
+                               int const line_size_pixels = line_size()[c] / 2;
+                               for (int x = 0; x < line_size_pixels; ++x) {
+                                       *q = int((*q - 256) * factor);
+                                       ++q;
+                               }
+                       }
+               }
+               break;
+       }
+       default:
+               throw PixelFormatError ("video_range_to_full_range()", _pixel_format);
+       }
+}
+