Port make_black fix for YUVA from master.

[dcpomatic.git] / src / lib / image.cc
diff --git a/src/lib/image.cc b/src/lib/image.cc

index bdf7fd173c9d860db6cb8f749700d9fbea641075..9a3aa8d45b036557fcb6f0297cc92afbf519aeb1 100644 (file)
--- a/src/lib/image.cc
+++ b/src/lib/image.cc
@@ -79,7 +79,7 @@ Image::components () const
  }
  
  shared_ptr<Image>
-Image::scale (libdcp::Size out_size, Scaler const * scaler, bool result_aligned) const
+Image::scale (libdcp::Size out_size, Scaler const * scaler, AVPixelFormat result_format, bool result_aligned) const
  {
         assert (scaler);
         /* Empirical testing suggests that sws_scale() will crash if
@@ -87,11 +87,11 @@ Image::scale (libdcp::Size out_size, Scaler const * scaler, bool result_aligned)
         */
         assert (aligned ());
  
-       shared_ptr<Image> scaled (new Image (pixel_format(), out_size, result_aligned));
+       shared_ptr<Image> scaled (new Image (result_format, out_size, result_aligned));
  
         struct SwsContext* scale_context = sws_getContext (
                 size().width, size().height, pixel_format(),
-               out_size.width, out_size.height, pixel_format(),
+               out_size.width, out_size.height, result_format,
                 scaler->ffmpeg_id (), 0, 0, 0
                 );
  
@@ -107,40 +107,6 @@ Image::scale (libdcp::Size out_size, Scaler const * scaler, bool result_aligned)
         return scaled;
  }
  
-/** Scale this image to a given size and convert it to RGB.
- *  @param out_size Output image size in pixels.
- *  @param scaler Scaler to use.
- */
-shared_ptr<Image>
-Image::scale_and_convert_to_rgb (libdcp::Size out_size, Scaler const * scaler, bool result_aligned) const
-{
-       assert (scaler);
-       /* Empirical testing suggests that sws_scale() will crash if
-          the input image is not aligned.
-       */
-       assert (aligned ());
-
-       shared_ptr<Image> rgb (new Image (PIX_FMT_RGB24, out_size, result_aligned));
-
-       struct SwsContext* scale_context = sws_getContext (
-               size().width, size().height, pixel_format(),
-               out_size.width, out_size.height, PIX_FMT_RGB24,
-               scaler->ffmpeg_id (), 0, 0, 0
-               );
-
-       /* Scale and convert to RGB from whatever its currently in (which may be RGB) */
-       sws_scale (
-               scale_context,
-               data(), stride(),
-               0, size().height,
-               rgb->data(), rgb->stride()
-               );
-
-       sws_freeContext (scale_context);
-
-       return rgb;
-}
-
  /** Run a FFmpeg post-process on this image and return the processed version.
   *  @param pp Flags for the required set of post processes.
   *  @return Post-processed image.
@@ -219,18 +185,23 @@ Image::crop (Crop crop, bool aligned) const
  
  /** Blacken a YUV image whose bits per pixel is rounded up to 16 */
  void
-Image::yuv_16_black (uint16_t v)
+Image::yuv_16_black (uint16_t v, bool alpha)
  {
         memset (data()[0], 0, lines(0) * stride()[0]);
         for (int i = 1; i < 3; ++i) {
                 int16_t* p = reinterpret_cast<int16_t*> (data()[i]);
-               for (int y = 0; y < size().height; ++y) {
+               for (int y = 0; y < lines(i); ++y) {
+                       /* We divide by 2 here because we are writing 2 bytes at a time */
                         for (int x = 0; x < line_size()[i] / 2; ++x) {
                                 p[x] = v;
                         }
                         p += stride()[i] / 2;
                 }
         }
+
+       if (alpha) {
+               memset (data()[3], 0, lines(3) * stride()[3]);
+       }
  }
  
  uint16_t
@@ -270,27 +241,63 @@ Image::make_black ()
  
         case PIX_FMT_YUV422P9LE:
         case PIX_FMT_YUV444P9LE:
-               yuv_16_black (nine_bit_uv);
+               yuv_16_black (nine_bit_uv, false);
                 break;
  
         case PIX_FMT_YUV422P9BE:
         case PIX_FMT_YUV444P9BE:
-               yuv_16_black (swap_16 (nine_bit_uv));
+               yuv_16_black (swap_16 (nine_bit_uv), false);
                 break;
                 
         case PIX_FMT_YUV422P10LE:
         case PIX_FMT_YUV444P10LE:
-               yuv_16_black (ten_bit_uv);
+               yuv_16_black (ten_bit_uv, false);
                 break;
  
         case PIX_FMT_YUV422P16LE:
         case PIX_FMT_YUV444P16LE:
-               yuv_16_black (sixteen_bit_uv);
+               yuv_16_black (sixteen_bit_uv, false);
                 break;
                 
         case PIX_FMT_YUV444P10BE:
         case PIX_FMT_YUV422P10BE:
-               yuv_16_black (swap_16 (ten_bit_uv));
+               yuv_16_black (swap_16 (ten_bit_uv), false);
+               break;
+
+       case AV_PIX_FMT_YUVA420P9BE:
+       case AV_PIX_FMT_YUVA422P9BE:
+       case AV_PIX_FMT_YUVA444P9BE:
+               yuv_16_black (swap_16 (nine_bit_uv), true);
+               break;
+               
+       case AV_PIX_FMT_YUVA420P9LE:
+       case AV_PIX_FMT_YUVA422P9LE:
+       case AV_PIX_FMT_YUVA444P9LE:
+               yuv_16_black (nine_bit_uv, true);
+               break;
+               
+       case AV_PIX_FMT_YUVA420P10BE:
+       case AV_PIX_FMT_YUVA422P10BE:
+       case AV_PIX_FMT_YUVA444P10BE:
+               yuv_16_black (swap_16 (ten_bit_uv), true);
+               break;
+               
+       case AV_PIX_FMT_YUVA420P10LE:
+       case AV_PIX_FMT_YUVA422P10LE:
+       case AV_PIX_FMT_YUVA444P10LE:
+               yuv_16_black (ten_bit_uv, true);
+               break;
+               
+       case AV_PIX_FMT_YUVA420P16BE:
+       case AV_PIX_FMT_YUVA422P16BE:
+       case AV_PIX_FMT_YUVA444P16BE:
+               yuv_16_black (swap_16 (sixteen_bit_uv), true);
+               break;
+               
+       case AV_PIX_FMT_YUVA420P16LE:
+       case AV_PIX_FMT_YUVA422P16LE:
+       case AV_PIX_FMT_YUVA444P16LE:
+               yuv_16_black (sixteen_bit_uv, true);
                 break;
  
         case PIX_FMT_RGB24:             
@@ -454,9 +461,19 @@ Image::allocate ()
         _stride[0] = _stride[1] = _stride[2] = _stride[3] = 0;
  
         for (int i = 0; i < components(); ++i) {
-               _line_size[i] = _size.width * bytes_per_pixel(i);
+               _line_size[i] = ceil (_size.width * bytes_per_pixel(i));
                 _stride[i] = stride_round_up (i, _line_size, _aligned ? 32 : 1);
-               _data[i] = (uint8_t *) av_malloc (_stride[i] * lines (i));
+
+               /* The assembler function ff_rgb24ToY_avx (in libswscale/x86/input.asm)
+                  uses a 16-byte fetch to read three bytes (R/G/B) of image data.
+                  Hence on the last pixel of the last line it reads over the end of
+                  the actual data by 1 byte.  If the width of an image is a multiple
+                  of the stride alignment there will be no padding at the end of image lines.
+                  OS X crashes on this illegal read, though other operating systems don't
+                  seem to mind.  The nasty + 1 in this malloc makes sure there is always a byte
+                  for that instruction to read safely.
+               */
+               _data[i] = (uint8_t *) av_malloc (_stride[i] * lines (i) + 1);
         }
  }