src/lib/image.cc

   1 /*
   2     Copyright (C) 2012-2021 Carl Hetherington <cth@carlh.net>
   3
   4     This file is part of DCP-o-matic.
   5
   6     DCP-o-matic is free software; you can redistribute it and/or modify
   7     it under the terms of the GNU General Public License as published by
   8     the Free Software Foundation; either version 2 of the License, or
   9     (at your option) any later version.
  10
  11     DCP-o-matic is distributed in the hope that it will be useful,
  12     but WITHOUT ANY WARRANTY; without even the implied warranty of
  13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14     GNU General Public License for more details.
  15
  16     You should have received a copy of the GNU General Public License
  17     along with DCP-o-matic.  If not, see <http://www.gnu.org/licenses/>.
  18
  19 */
  20
  21
  22 /** @file src/image.cc
  23  *  @brief A class to describe a video image.
  24  */
  25
  26
  27 #include "compose.hpp"
  28 #include "dcpomatic_assert.h"
  29 #include "dcpomatic_socket.h"
  30 #include "enum_indexed_vector.h"
  31 #include "exceptions.h"
  32 #include "image.h"
  33 #include "maths_util.h"
  34 #include "memory_util.h"
  35 #include "rect.h"
  36 #include "timer.h"
  37 #include <dcp/rgb_xyz.h>
  38 #include <dcp/transfer_function.h>
  39 #include <dcp/warnings.h>
  40 LIBDCP_DISABLE_WARNINGS
  41 extern "C" {
  42 #include <libavutil/frame.h>
  43 #include <libavutil/pixdesc.h>
  44 #include <libavutil/pixfmt.h>
  45 #include <libswscale/swscale.h>
  46 }
  47 LIBDCP_ENABLE_WARNINGS
  48 #if HAVE_VALGRIND_MEMCHECK_H
  49 #include <valgrind/memcheck.h>
  50 #endif
  51 #include <iostream>
  52
  53
  54 #include "i18n.h"
  55
  56
  57 using std::cerr;
  58 using std::cout;
  59 using std::list;
  60 using std::make_shared;
  61 using std::max;
  62 using std::min;
  63 using std::runtime_error;
  64 using std::shared_ptr;
  65 using std::string;
  66 using dcp::Size;
  67
  68
  69 /** The memory alignment, in bytes, used for each row of an image if Alignment::PADDED is requested */
  70 int constexpr ALIGNMENT = 64;
  71
  72 /* U/V black value for 8-bit colour */
  73 static uint8_t const eight_bit_uv =     (1 << 7) - 1;
  74 /* U/V black value for 9-bit colour */
  75 static uint16_t const nine_bit_uv =     (1 << 8) - 1;
  76 /* U/V black value for 10-bit colour */
  77 static uint16_t const ten_bit_uv =      (1 << 9) - 1;
  78 /* U/V black value for 16-bit colour */
  79 static uint16_t const sixteen_bit_uv =  (1 << 15) - 1;
  80
  81
  82 int
  83 Image::vertical_factor (int n) const
  84 {
  85         if (n == 0) {
  86                 return 1;
  87         }
  88
  89         auto d = av_pix_fmt_desc_get(_pixel_format);
  90         if (!d) {
  91                 throw PixelFormatError ("line_factor()", _pixel_format);
  92         }
  93
  94         return lrintf(powf(2.0f, d->log2_chroma_h));
  95 }
  96
  97 int
  98 Image::horizontal_factor (int n) const
  99 {
 100         if (n == 0) {
 101                 return 1;
 102         }
 103
 104         auto d = av_pix_fmt_desc_get(_pixel_format);
 105         if (!d) {
 106                 throw PixelFormatError ("sample_size()", _pixel_format);
 107         }
 108
 109         return lrintf(powf(2.0f, d->log2_chroma_w));
 110 }
 111
 112
 113 /** @param n Component index.
 114  *  @return Number of samples (i.e. pixels, unless sub-sampled) in each direction for this component.
 115  */
 116 dcp::Size
 117 Image::sample_size (int n) const
 118 {
 119         return dcp::Size (
 120                 lrint (ceil(static_cast<double>(size().width) / horizontal_factor(n))),
 121                 lrint (ceil(static_cast<double>(size().height) / vertical_factor(n)))
 122                 );
 123 }
 124
 125
 126 /** @return Number of planes */
 127 int
 128 Image::planes () const
 129 {
 130         if (_pixel_format == AV_PIX_FMT_PAL8) {
 131                 return 2;
 132         }
 133
 134         auto d = av_pix_fmt_desc_get(_pixel_format);
 135         if (!d) {
 136                 throw PixelFormatError ("planes()", _pixel_format);
 137         }
 138
 139         if ((d->flags & AV_PIX_FMT_FLAG_PLANAR) == 0) {
 140                 return 1;
 141         }
 142
 143         return d->nb_components;
 144 }
 145
 146
 147 static
 148 int
 149 round_width_for_subsampling (int p, AVPixFmtDescriptor const * desc)
 150 {
 151         return p & ~ ((1 << desc->log2_chroma_w) - 1);
 152 }
 153
 154
 155 static
 156 int
 157 round_height_for_subsampling (int p, AVPixFmtDescriptor const * desc)
 158 {
 159         return p & ~ ((1 << desc->log2_chroma_h) - 1);
 160 }
 161
 162
 163 /** Crop this image, scale it to `inter_size' and then place it in a black frame of `out_size'.
 164  *  @param crop Amount to crop by.
 165  *  @param inter_size Size to scale the cropped image to.
 166  *  @param out_size Size of output frame; if this is larger than inter_size there will be black padding.
 167  *  @param yuv_to_rgb YUV to RGB transformation to use, if required.
 168  *  @param video_range Video range of the image.
 169  *  @param out_format Output pixel format.
 170  *  @param out_aligned true to make the output image aligned.
 171  *  @param out_video_range Video range to use for the output image.
 172  *  @param fast Try to be fast at the possible expense of quality; at present this means using
 173  *  fast bilinear rather than bicubic scaling.
 174  */
 175 shared_ptr<Image>
 176 Image::crop_scale_window (
 177         Crop crop,
 178         dcp::Size inter_size,
 179         dcp::Size out_size,
 180         dcp::YUVToRGB yuv_to_rgb,
 181         VideoRange video_range,
 182         AVPixelFormat out_format,
 183         VideoRange out_video_range,
 184         Alignment out_alignment,
 185         bool fast
 186         ) const
 187 {
 188         /* Empirical testing suggests that sws_scale() will crash if
 189            the input image is not padded.
 190         */
 191         DCPOMATIC_ASSERT (alignment() == Alignment::PADDED);
 192
 193         DCPOMATIC_ASSERT (out_size.width >= inter_size.width);
 194         DCPOMATIC_ASSERT (out_size.height >= inter_size.height);
 195
 196         auto out = make_shared<Image>(out_format, out_size, out_alignment);
 197         out->make_black ();
 198
 199         auto in_desc = av_pix_fmt_desc_get (_pixel_format);
 200         if (!in_desc) {
 201                 throw PixelFormatError ("crop_scale_window()", _pixel_format);
 202         }
 203
 204         /* Round down so that we crop only the number of pixels that is straightforward
 205          * considering any subsampling.
 206          */
 207         Crop corrected_crop(
 208                 round_width_for_subsampling(crop.left, in_desc),
 209                 round_width_for_subsampling(crop.right, in_desc),
 210                 round_height_for_subsampling(crop.top, in_desc),
 211                 round_height_for_subsampling(crop.bottom, in_desc)
 212                 );
 213
 214         /* Also check that we aren't cropping more image than there actually is */
 215         if ((corrected_crop.left + corrected_crop.right) >= (size().width - 4)) {
 216                 corrected_crop.left = 0;
 217                 corrected_crop.right = size().width - 4;
 218         }
 219
 220         if ((corrected_crop.top + corrected_crop.bottom) >= (size().height - 4)) {
 221                 corrected_crop.top = 0;
 222                 corrected_crop.bottom = size().height - 4;
 223         }
 224
 225         /* Size of the image after any crop */
 226         auto const cropped_size = corrected_crop.apply (size());
 227
 228         /* Scale context for a scale from cropped_size to inter_size */
 229         auto scale_context = sws_getContext (
 230                         cropped_size.width, cropped_size.height, pixel_format(),
 231                         inter_size.width, inter_size.height, out_format,
 232                         fast ? SWS_FAST_BILINEAR : SWS_BICUBIC, 0, 0, 0
 233                 );
 234
 235         if (!scale_context) {
 236                 throw runtime_error (N_("Could not allocate SwsContext"));
 237         }
 238
 239         DCPOMATIC_ASSERT (yuv_to_rgb < dcp::YUVToRGB::COUNT);
 240         EnumIndexedVector<int, dcp::YUVToRGB> lut;
 241         lut[dcp::YUVToRGB::REC601] = SWS_CS_ITU601;
 242         lut[dcp::YUVToRGB::REC709] = SWS_CS_ITU709;
 243         lut[dcp::YUVToRGB::REC2020] = SWS_CS_BT2020;
 244
 245         /* The 3rd parameter here is:
 246            0 -> source range MPEG (i.e. "video", 16-235)
 247            1 -> source range JPEG (i.e. "full", 0-255)
 248            And the 5th:
 249            0 -> destination range MPEG (i.e. "video", 16-235)
 250            1 -> destination range JPEG (i.e. "full", 0-255)
 251
 252            But remember: sws_setColorspaceDetails ignores these
 253            parameters unless the both source and destination images
 254            are isYUV or isGray.  (If either is not, it uses video range).
 255         */
 256         sws_setColorspaceDetails (
 257                 scale_context,
 258                 sws_getCoefficients(lut[yuv_to_rgb]), video_range == VideoRange::VIDEO ? 0 : 1,
 259                 sws_getCoefficients(lut[yuv_to_rgb]), out_video_range == VideoRange::VIDEO ? 0 : 1,
 260                 0, 1 << 16, 1 << 16
 261                 );
 262
 263         /* Prepare input data pointers with crop */
 264         uint8_t* scale_in_data[planes()];
 265         for (int c = 0; c < planes(); ++c) {
 266                 int const x = lrintf(bytes_per_pixel(c) * corrected_crop.left);
 267                 scale_in_data[c] = data()[c] + x + stride()[c] * (corrected_crop.top / vertical_factor(c));
 268         }
 269
 270         auto out_desc = av_pix_fmt_desc_get (out_format);
 271         if (!out_desc) {
 272                 throw PixelFormatError ("crop_scale_window()", out_format);
 273         }
 274
 275         /* Corner of the image within out_size */
 276         Position<int> const corner (
 277                 round_width_for_subsampling((out_size.width - inter_size.width) / 2, out_desc),
 278                 round_height_for_subsampling((out_size.height - inter_size.height) / 2, out_desc)
 279                 );
 280
 281         uint8_t* scale_out_data[out->planes()];
 282         for (int c = 0; c < out->planes(); ++c) {
 283                 int const x = lrintf(out->bytes_per_pixel(c) * corner.x);
 284                 scale_out_data[c] = out->data()[c] + x + out->stride()[c] * (corner.y / out->vertical_factor(c));
 285         }
 286
 287         sws_scale (
 288                 scale_context,
 289                 scale_in_data, stride(),
 290                 0, cropped_size.height,
 291                 scale_out_data, out->stride()
 292                 );
 293
 294         sws_freeContext (scale_context);
 295
 296         /* There are some cases where there will be unwanted image data left in the image at this point:
 297          *
 298          * 1. When we are cropping without any scaling or pixel format conversion.
 299          * 2. When we are scaling to certain sizes and placing the result into a larger
 300          *    black frame.
 301          *
 302          * Clear out the sides of the image to take care of those cases.
 303          */
 304         auto const pad = (out_size.width - inter_size.width) / 2;
 305         out->make_part_black(0, pad);
 306         out->make_part_black(corner.x + inter_size.width, pad);
 307
 308         if (
 309                 video_range == VideoRange::VIDEO &&
 310                 out_video_range == VideoRange::FULL &&
 311                 av_pix_fmt_desc_get(_pixel_format)->flags & AV_PIX_FMT_FLAG_RGB
 312            ) {
 313                 /* libswscale will not convert video range for RGB sources, so we have to do it ourselves */
 314                 out->video_range_to_full_range ();
 315         }
 316
 317         return out;
 318 }
 319
 320
 321 shared_ptr<Image>
 322 Image::convert_pixel_format (dcp::YUVToRGB yuv_to_rgb, AVPixelFormat out_format, Alignment out_alignment, bool fast) const
 323 {
 324         return scale(size(), yuv_to_rgb, out_format, out_alignment, fast);
 325 }
 326
 327
 328 /** @param out_size Size to scale to.
 329  *  @param yuv_to_rgb YUVToRGB transform transform to use, if required.
 330  *  @param out_format Output pixel format.
 331  *  @param out_alignment Output alignment.
 332  *  @param fast Try to be fast at the possible expense of quality; at present this means using
 333  *  fast bilinear rather than bicubic scaling.
 334  */
 335 shared_ptr<Image>
 336 Image::scale (dcp::Size out_size, dcp::YUVToRGB yuv_to_rgb, AVPixelFormat out_format, Alignment out_alignment, bool fast) const
 337 {
 338         /* Empirical testing suggests that sws_scale() will crash if
 339            the input image alignment is not PADDED.
 340         */
 341         DCPOMATIC_ASSERT (alignment() == Alignment::PADDED);
 342         DCPOMATIC_ASSERT(size().width > 0);
 343         DCPOMATIC_ASSERT(size().height > 0);
 344         DCPOMATIC_ASSERT(out_size.width > 0);
 345         DCPOMATIC_ASSERT(out_size.height > 0);
 346
 347         auto scaled = make_shared<Image>(out_format, out_size, out_alignment);
 348         auto scale_context = sws_getContext (
 349                 size().width, size().height, pixel_format(),
 350                 out_size.width, out_size.height, out_format,
 351                 (fast ? SWS_FAST_BILINEAR : SWS_BICUBIC) | SWS_ACCURATE_RND, 0, 0, 0
 352                 );
 353
 354         DCPOMATIC_ASSERT(scale_context);
 355
 356         DCPOMATIC_ASSERT (yuv_to_rgb < dcp::YUVToRGB::COUNT);
 357         EnumIndexedVector<int, dcp::YUVToRGB> lut;
 358         lut[dcp::YUVToRGB::REC601] = SWS_CS_ITU601;
 359         lut[dcp::YUVToRGB::REC709] = SWS_CS_ITU709;
 360         lut[dcp::YUVToRGB::REC2020] = SWS_CS_BT2020;
 361
 362         /* The 3rd parameter here is:
 363            0 -> source range MPEG (i.e. "video", 16-235)
 364            1 -> source range JPEG (i.e. "full", 0-255)
 365            And the 5th:
 366            0 -> destination range MPEG (i.e. "video", 16-235)
 367            1 -> destination range JPEG (i.e. "full", 0-255)
 368
 369            But remember: sws_setColorspaceDetails ignores these
 370            parameters unless the corresponding image isYUV or isGray.
 371            (If it's neither, it uses video range).
 372         */
 373         sws_setColorspaceDetails (
 374                 scale_context,
 375                 sws_getCoefficients(lut[yuv_to_rgb]), 0,
 376                 sws_getCoefficients(lut[yuv_to_rgb]), 0,
 377                 0, 1 << 16, 1 << 16
 378                 );
 379
 380         sws_scale (
 381                 scale_context,
 382                 data(), stride(),
 383                 0, size().height,
 384                 scaled->data(), scaled->stride()
 385                 );
 386
 387         sws_freeContext (scale_context);
 388
 389         return scaled;
 390 }
 391
 392
 393 /** Blacken a YUV image whose bits per pixel is rounded up to 16 */
 394 void
 395 Image::yuv_16_black (uint16_t v, bool alpha)
 396 {
 397         memset (data()[0], 0, sample_size(0).height * stride()[0]);
 398         for (int i = 1; i < 3; ++i) {
 399                 auto p = reinterpret_cast<int16_t*> (data()[i]);
 400                 int const lines = sample_size(i).height;
 401                 for (int y = 0; y < lines; ++y) {
 402                         /* We divide by 2 here because we are writing 2 bytes at a time */
 403                         for (int x = 0; x < line_size()[i] / 2; ++x) {
 404                                 p[x] = v;
 405                         }
 406                         p += stride()[i] / 2;
 407                 }
 408         }
 409
 410         if (alpha) {
 411                 memset (data()[3], 0, sample_size(3).height * stride()[3]);
 412         }
 413 }
 414
 415
 416 uint16_t
 417 Image::swap_16 (uint16_t v)
 418 {
 419         return ((v >> 8) & 0xff) | ((v & 0xff) << 8);
 420 }
 421
 422
 423 void
 424 Image::make_part_black (int const start, int const width)
 425 {
 426         auto y_part = [&]() {
 427                 int const bpp = bytes_per_pixel(0);
 428                 int const h = sample_size(0).height;
 429                 int const s = stride()[0];
 430                 auto p = data()[0];
 431                 for (int y = 0; y < h; ++y) {
 432                         memset (p + start * bpp, 0, width * bpp);
 433                         p += s;
 434                 }
 435         };
 436
 437         switch (_pixel_format) {
 438         case AV_PIX_FMT_RGB24:
 439         case AV_PIX_FMT_ARGB:
 440         case AV_PIX_FMT_RGBA:
 441         case AV_PIX_FMT_ABGR:
 442         case AV_PIX_FMT_BGRA:
 443         case AV_PIX_FMT_RGB555LE:
 444         case AV_PIX_FMT_RGB48LE:
 445         case AV_PIX_FMT_RGB48BE:
 446         case AV_PIX_FMT_XYZ12LE:
 447         {
 448                 int const h = sample_size(0).height;
 449                 int const bpp = bytes_per_pixel(0);
 450                 int const s = stride()[0];
 451                 uint8_t* p = data()[0];
 452                 for (int y = 0; y < h; y++) {
 453                         memset (p + start * bpp, 0, width * bpp);
 454                         p += s;
 455                 }
 456                 break;
 457         }
 458         case AV_PIX_FMT_YUV420P:
 459         {
 460                 y_part ();
 461                 for (int i = 1; i < 3; ++i) {
 462                         auto p = data()[i];
 463                         int const h = sample_size(i).height;
 464                         for (int y = 0; y < h; ++y) {
 465                                 for (int x = start / 2; x < (start + width) / 2; ++x) {
 466                                         p[x] = eight_bit_uv;
 467                                 }
 468                                 p += stride()[i];
 469                         }
 470                 }
 471                 break;
 472         }
 473         case AV_PIX_FMT_YUV422P10LE:
 474         {
 475                 y_part ();
 476                 for (int i = 1; i < 3; ++i) {
 477                         auto p = reinterpret_cast<int16_t*>(data()[i]);
 478                         int const h = sample_size(i).height;
 479                         for (int y = 0; y < h; ++y) {
 480                                 for (int x = start / 2; x < (start + width) / 2; ++x) {
 481                                         p[x] = ten_bit_uv;
 482                                 }
 483                                 p += stride()[i] / 2;
 484                         }
 485                 }
 486                 break;
 487         }
 488         case AV_PIX_FMT_YUV444P10LE:
 489         {
 490                 y_part();
 491                 for (int i = 1; i < 3; ++i) {
 492                         auto p = reinterpret_cast<int16_t*>(data()[i]);
 493                         int const h = sample_size(i).height;
 494                         for (int y = 0; y < h; ++y) {
 495                                 for (int x = start; x < (start + width); ++x) {
 496                                         p[x] = ten_bit_uv;
 497                                 }
 498                                 p += stride()[i] / 2;
 499                         }
 500                 }
 501                 break;
 502         }
 503         default:
 504                 throw PixelFormatError ("make_part_black()", _pixel_format);
 505         }
 506 }
 507
 508
 509 void
 510 Image::make_black ()
 511 {
 512         switch (_pixel_format) {
 513         case AV_PIX_FMT_YUV420P:
 514         case AV_PIX_FMT_YUV422P:
 515         case AV_PIX_FMT_YUV444P:
 516         case AV_PIX_FMT_YUV411P:
 517                 memset (data()[0], 0, sample_size(0).height * stride()[0]);
 518                 memset (data()[1], eight_bit_uv, sample_size(1).height * stride()[1]);
 519                 memset (data()[2], eight_bit_uv, sample_size(2).height * stride()[2]);
 520                 break;
 521
 522         case AV_PIX_FMT_YUVJ420P:
 523         case AV_PIX_FMT_YUVJ422P:
 524         case AV_PIX_FMT_YUVJ444P:
 525                 memset (data()[0], 0, sample_size(0).height * stride()[0]);
 526                 memset (data()[1], eight_bit_uv + 1, sample_size(1).height * stride()[1]);
 527                 memset (data()[2], eight_bit_uv + 1, sample_size(2).height * stride()[2]);
 528                 break;
 529
 530         case AV_PIX_FMT_YUV422P9LE:
 531         case AV_PIX_FMT_YUV444P9LE:
 532                 yuv_16_black (nine_bit_uv, false);
 533                 break;
 534
 535         case AV_PIX_FMT_YUV422P9BE:
 536         case AV_PIX_FMT_YUV444P9BE:
 537                 yuv_16_black (swap_16 (nine_bit_uv), false);
 538                 break;
 539
 540         case AV_PIX_FMT_YUV422P10LE:
 541         case AV_PIX_FMT_YUV444P10LE:
 542                 yuv_16_black (ten_bit_uv, false);
 543                 break;
 544
 545         case AV_PIX_FMT_YUV422P16LE:
 546         case AV_PIX_FMT_YUV444P16LE:
 547                 yuv_16_black (sixteen_bit_uv, false);
 548                 break;
 549
 550         case AV_PIX_FMT_YUV444P10BE:
 551         case AV_PIX_FMT_YUV422P10BE:
 552                 yuv_16_black (swap_16 (ten_bit_uv), false);
 553                 break;
 554
 555         case AV_PIX_FMT_YUVA420P9BE:
 556         case AV_PIX_FMT_YUVA422P9BE:
 557         case AV_PIX_FMT_YUVA444P9BE:
 558                 yuv_16_black (swap_16 (nine_bit_uv), true);
 559                 break;
 560
 561         case AV_PIX_FMT_YUVA420P9LE:
 562         case AV_PIX_FMT_YUVA422P9LE:
 563         case AV_PIX_FMT_YUVA444P9LE:
 564                 yuv_16_black (nine_bit_uv, true);
 565                 break;
 566
 567         case AV_PIX_FMT_YUVA420P10BE:
 568         case AV_PIX_FMT_YUVA422P10BE:
 569         case AV_PIX_FMT_YUVA444P10BE:
 570                 yuv_16_black (swap_16 (ten_bit_uv), true);
 571                 break;
 572
 573         case AV_PIX_FMT_YUVA420P10LE:
 574         case AV_PIX_FMT_YUVA422P10LE:
 575         case AV_PIX_FMT_YUVA444P10LE:
 576                 yuv_16_black (ten_bit_uv, true);
 577                 break;
 578
 579         case AV_PIX_FMT_YUVA420P16BE:
 580         case AV_PIX_FMT_YUVA422P16BE:
 581         case AV_PIX_FMT_YUVA444P16BE:
 582                 yuv_16_black (swap_16 (sixteen_bit_uv), true);
 583                 break;
 584
 585         case AV_PIX_FMT_YUVA420P16LE:
 586         case AV_PIX_FMT_YUVA422P16LE:
 587         case AV_PIX_FMT_YUVA444P16LE:
 588                 yuv_16_black (sixteen_bit_uv, true);
 589                 break;
 590
 591         case AV_PIX_FMT_RGB24:
 592         case AV_PIX_FMT_ARGB:
 593         case AV_PIX_FMT_RGBA:
 594         case AV_PIX_FMT_ABGR:
 595         case AV_PIX_FMT_BGRA:
 596         case AV_PIX_FMT_RGB555LE:
 597         case AV_PIX_FMT_RGB48LE:
 598         case AV_PIX_FMT_RGB48BE:
 599         case AV_PIX_FMT_XYZ12LE:
 600                 memset (data()[0], 0, sample_size(0).height * stride()[0]);
 601                 break;
 602
 603         case AV_PIX_FMT_UYVY422:
 604         {
 605                 int const Y = sample_size(0).height;
 606                 int const X = line_size()[0];
 607                 uint8_t* p = data()[0];
 608                 for (int y = 0; y < Y; ++y) {
 609                         for (int x = 0; x < X / 4; ++x) {
 610                                 *p++ = eight_bit_uv; // Cb
 611                                 *p++ = 0;            // Y0
 612                                 *p++ = eight_bit_uv; // Cr
 613                                 *p++ = 0;            // Y1
 614                         }
 615                 }
 616                 break;
 617         }
 618
 619         default:
 620                 throw PixelFormatError ("make_black()", _pixel_format);
 621         }
 622 }
 623
 624
 625 void
 626 Image::make_transparent ()
 627 {
 628         if (_pixel_format != AV_PIX_FMT_BGRA && _pixel_format != AV_PIX_FMT_RGBA && _pixel_format != AV_PIX_FMT_RGBA64BE) {
 629                 throw PixelFormatError ("make_transparent()", _pixel_format);
 630         }
 631
 632         memset (data()[0], 0, sample_size(0).height * stride()[0]);
 633 }
 634
 635
 636 struct TargetParams
 637 {
 638         int start_x;
 639         int start_y;
 640         dcp::Size size;
 641         uint8_t* const* data;
 642         int const* stride;
 643         int bpp;
 644
 645         uint8_t* line_pointer(int y) const {
 646                 return data[0] + y * stride[0] + start_x * bpp;
 647         }
 648 };
 649
 650
 651 /** Parameters of the other image (the one being blended onto the target) when target and other are RGB */
 652 struct OtherRGBParams
 653 {
 654         int start_x;
 655         int start_y;
 656         dcp::Size size;
 657         uint8_t* const* data;
 658         int const* stride;
 659         int bpp;
 660
 661         uint8_t* line_pointer(int y) const {
 662                 return data[0] + y * stride[0];
 663         }
 664
 665         float alpha_divisor() const {
 666                 return pow(2, bpp * 2) - 1;
 667         }
 668 };
 669
 670
 671 /** Parameters of the other image (the one being blended onto the target) when target and other are YUV */
 672 struct OtherYUVParams
 673 {
 674         int start_x;
 675         int start_y;
 676         dcp::Size size;
 677         uint8_t* const* data;
 678         int const* stride;
 679
 680         uint8_t* const* alpha_data;
 681         int const* alpha_stride;
 682         int alpha_bpp;
 683 };
 684
 685
 686 template <class OtherType>
 687 void
 688 alpha_blend_onto_rgb24(TargetParams const& target, OtherRGBParams const& other, int red, int blue, std::function<float (OtherType*)> get, int value_divisor)
 689 {
 690         /* Going onto RGB24.  First byte is red, second green, third blue */
 691         auto const alpha_divisor = other.alpha_divisor();
 692         for (int ty = target.start_y, oy = other.start_y; ty < target.size.height && oy < other.size.height; ++ty, ++oy) {
 693                 auto tp = target.line_pointer(ty);
 694                 auto op = reinterpret_cast<OtherType*>(other.line_pointer(oy));
 695                 for (int tx = target.start_x, ox = other.start_x; tx < target.size.width && ox < other.size.width; ++tx, ++ox) {
 696                         float const alpha = get(op + 3) / alpha_divisor;
 697                         tp[0] = (get(op + red) / value_divisor) * alpha + tp[0] * (1 - alpha);
 698                         tp[1] = (get(op + 1) / value_divisor) * alpha + tp[1] * (1 - alpha);
 699                         tp[2] = (get(op + blue) / value_divisor) * alpha + tp[2] * (1 - alpha);
 700
 701                         tp += target.bpp;
 702                         op += other.bpp / sizeof(OtherType);
 703                 }
 704         }
 705 }
 706
 707
 708 template <class OtherType>
 709 void
 710 alpha_blend_onto_bgra(TargetParams const& target, OtherRGBParams const& other, int red, int blue, std::function<float (OtherType*)> get, int value_divisor)
 711 {
 712         auto const alpha_divisor = other.alpha_divisor();
 713         for (int ty = target.start_y, oy = other.start_y; ty < target.size.height && oy < other.size.height; ++ty, ++oy) {
 714                 auto tp = target.line_pointer(ty);
 715                 auto op = reinterpret_cast<OtherType*>(other.line_pointer(oy));
 716                 for (int tx = target.start_x, ox = other.start_x; tx < target.size.width && ox < other.size.width; ++tx, ++ox) {
 717                         float const alpha = get(op + 3) / alpha_divisor;
 718                         tp[0] = (get(op + blue) / value_divisor) * alpha + tp[0] * (1 - alpha);
 719                         tp[1] = (get(op + 1) / value_divisor) * alpha + tp[1] * (1 - alpha);
 720                         tp[2] = (get(op + red) / value_divisor) * alpha + tp[2] * (1 - alpha);
 721                         tp[3] = (get(op + 3) / value_divisor) * alpha + tp[3] * (1 - alpha);
 722
 723                         tp += target.bpp;
 724                         op += other.bpp / sizeof(OtherType);
 725                 }
 726         }
 727 }
 728
 729
 730 template <class OtherType>
 731 void
 732 alpha_blend_onto_rgba(TargetParams const& target, OtherRGBParams const& other, int red, int blue, std::function<float (OtherType*)> get, int value_divisor)
 733 {
 734         auto const alpha_divisor = other.alpha_divisor();
 735         for (int ty = target.start_y, oy = other.start_y; ty < target.size.height && oy < other.size.height; ++ty, ++oy) {
 736                 auto tp = target.line_pointer(ty);
 737                 auto op = reinterpret_cast<OtherType*>(other.line_pointer(oy));
 738                 for (int tx = target.start_x, ox = other.start_x; tx < target.size.width && ox < other.size.width; ++tx, ++ox) {
 739                         float const alpha = get(op + 3) / alpha_divisor;
 740                         tp[0] = (get(op + red) / value_divisor) * alpha + tp[0] * (1 - alpha);
 741                         tp[1] = (get(op + 1) / value_divisor) * alpha + tp[1] * (1 - alpha);
 742                         tp[2] = (get(op + blue) / value_divisor) * alpha + tp[2] * (1 - alpha);
 743                         tp[3] = (get(op + 3) / value_divisor) * alpha + tp[3] * (1 - alpha);
 744
 745                         tp += target.bpp;
 746                         op += other.bpp / sizeof(OtherType);
 747                 }
 748         }
 749 }
 750
 751
 752 template <class OtherType>
 753 void
 754 alpha_blend_onto_rgb48le(TargetParams const& target, OtherRGBParams const& other, int red, int blue, std::function<float (OtherType*)> get, int value_scale)
 755 {
 756         auto const alpha_divisor = other.alpha_divisor();
 757         for (int ty = target.start_y, oy = other.start_y; ty < target.size.height && oy < other.size.height; ++ty, ++oy) {
 758                 auto tp = reinterpret_cast<uint16_t*>(target.line_pointer(ty));
 759                 auto op = reinterpret_cast<OtherType*>(other.line_pointer(oy));
 760                 for (int tx = target.start_x, ox = other.start_x; tx < target.size.width && ox < other.size.width; ++tx, ++ox) {
 761                         float const alpha = get(op + 3) / alpha_divisor;
 762                         tp[0] = get(op + red) * value_scale * alpha + tp[0] * (1 - alpha);
 763                         tp[1] = get(op + 1) * value_scale * alpha + tp[1] * (1 - alpha);
 764                         tp[2] = get(op + blue) * value_scale * alpha + tp[2] * (1 - alpha);
 765
 766                         tp += target.bpp / 2;
 767                         op += other.bpp / sizeof(OtherType);
 768                 }
 769         }
 770 }
 771
 772
 773 template <class OtherType>
 774 void
 775 alpha_blend_onto_xyz12le(TargetParams const& target, OtherRGBParams const& other, int red, int blue, std::function<float (OtherType*)> get, int value_divisor)
 776 {
 777         auto const alpha_divisor = other.alpha_divisor();
 778         auto conv = dcp::ColourConversion::srgb_to_xyz();
 779         double fast_matrix[9];
 780         dcp::combined_rgb_to_xyz(conv, fast_matrix);
 781         auto lut_in = conv.in()->double_lut(0, 1, 8, false);
 782         auto lut_out = conv.out()->int_lut(0, 1, 16, true, 65535);
 783         for (int ty = target.start_y, oy = other.start_y; ty < target.size.height && oy < other.size.height; ++ty, ++oy) {
 784                 auto tp = reinterpret_cast<uint16_t*>(target.data[0] + ty * target.stride[0] + target.start_x * target.bpp);
 785                 auto op = reinterpret_cast<OtherType*>(other.data[0] + oy * other.stride[0]);
 786                 for (int tx = target.start_x, ox = other.start_x; tx < target.size.width && ox < other.size.width; ++tx, ++ox) {
 787                         float const alpha = get(op + 3) / alpha_divisor;
 788
 789                         /* Convert sRGB to XYZ; op is BGRA.  First, input gamma LUT */
 790                         double const r = lut_in[get(op + red) / value_divisor];
 791                         double const g = lut_in[get(op + 1) / value_divisor];
 792                         double const b = lut_in[get(op + blue) / value_divisor];
 793
 794                         /* RGB to XYZ, including Bradford transform and DCI companding */
 795                         double const x = max(0.0, min(1.0, r * fast_matrix[0] + g * fast_matrix[1] + b * fast_matrix[2]));
 796                         double const y = max(0.0, min(1.0, r * fast_matrix[3] + g * fast_matrix[4] + b * fast_matrix[5]));
 797                         double const z = max(0.0, min(1.0, r * fast_matrix[6] + g * fast_matrix[7] + b * fast_matrix[8]));
 798
 799                         /* Out gamma LUT and blend */
 800                         tp[0] = lut_out[lrint(x * 65535)] * alpha + tp[0] * (1 - alpha);
 801                         tp[1] = lut_out[lrint(y * 65535)] * alpha + tp[1] * (1 - alpha);
 802                         tp[2] = lut_out[lrint(z * 65535)] * alpha + tp[2] * (1 - alpha);
 803
 804                         tp += target.bpp / 2;
 805                         op += other.bpp / sizeof(OtherType);
 806                 }
 807         }
 808 }
 809
 810
 811 static
 812 void
 813 alpha_blend_onto_yuv420p(TargetParams const& target, OtherYUVParams const& other, std::function<float (uint8_t* data)> get_alpha)
 814 {
 815         auto const ts = target.size;
 816         auto const os = other.size;
 817         for (int ty = target.start_y, oy = other.start_y; ty < ts.height && oy < os.height; ++ty, ++oy) {
 818                 int const hty = ty / 2;
 819                 int const hoy = oy / 2;
 820                 uint8_t* tY = target.data[0] + (ty * target.stride[0]) + target.start_x;
 821                 uint8_t* tU = target.data[1] + (hty * target.stride[1]) + target.start_x / 2;
 822                 uint8_t* tV = target.data[2] + (hty * target.stride[2]) + target.start_x / 2;
 823                 uint8_t* oY = other.data[0] + (oy * other.stride[0]) + other.start_x;
 824                 uint8_t* oU = other.data[1] + (hoy * other.stride[1]) + other.start_x / 2;
 825                 uint8_t* oV = other.data[2] + (hoy * other.stride[2]) + other.start_x / 2;
 826                 uint8_t* alpha = other.alpha_data[0] + (oy * other.alpha_stride[0]) + other.start_x * other.alpha_bpp;
 827                 for (int tx = target.start_x, ox = other.start_x; tx < ts.width && ox < os.width; ++tx, ++ox) {
 828                         float const a = get_alpha(alpha);
 829                         *tY = *oY * a + *tY * (1 - a);
 830                         *tU = *oU * a + *tU * (1 - a);
 831                         *tV = *oV * a + *tV * (1 - a);
 832                         ++tY;
 833                         ++oY;
 834                         if (tx % 2) {
 835                                 ++tU;
 836                                 ++tV;
 837                         }
 838                         if (ox % 2) {
 839                                 ++oU;
 840                                 ++oV;
 841                         }
 842                         alpha += other.alpha_bpp;
 843                 }
 844         }
 845 }
 846
 847
 848 static
 849 void
 850 alpha_blend_onto_yuv420p10(TargetParams const& target, OtherYUVParams const& other, std::function<float (uint8_t* data)> get_alpha)
 851 {
 852         auto const ts = target.size;
 853         auto const os = other.size;
 854         for (int ty = target.start_y, oy = other.start_y; ty < ts.height && oy < os.height; ++ty, ++oy) {
 855                 int const hty = ty / 2;
 856                 int const hoy = oy / 2;
 857                 uint16_t* tY = reinterpret_cast<uint16_t*>(target.data[0] + (ty * target.stride[0])) + target.start_x;
 858                 uint16_t* tU = reinterpret_cast<uint16_t*>(target.data[1] + (hty * target.stride[1])) + target.start_x / 2;
 859                 uint16_t* tV = reinterpret_cast<uint16_t*>(target.data[2] + (hty * target.stride[2])) + target.start_x / 2;
 860                 uint16_t* oY = reinterpret_cast<uint16_t*>(other.data[0] + (oy * other.stride[0])) + other.start_x;
 861                 uint16_t* oU = reinterpret_cast<uint16_t*>(other.data[1] + (hoy * other.stride[1])) + other.start_x / 2;
 862                 uint16_t* oV = reinterpret_cast<uint16_t*>(other.data[2] + (hoy * other.stride[2])) + other.start_x / 2;
 863                 uint8_t* alpha = other.alpha_data[0] + (oy * other.alpha_stride[0]) + other.start_x * other.alpha_bpp;
 864                 for (int tx = target.start_x, ox = other.start_x; tx < ts.width && ox < os.width; ++tx, ++ox) {
 865                         float const a = get_alpha(alpha);
 866                         *tY = *oY * a + *tY * (1 - a);
 867                         *tU = *oU * a + *tU * (1 - a);
 868                         *tV = *oV * a + *tV * (1 - a);
 869                         ++tY;
 870                         ++oY;
 871                         if (tx % 2) {
 872                                 ++tU;
 873                                 ++tV;
 874                         }
 875                         if (ox % 2) {
 876                                 ++oU;
 877                                 ++oV;
 878                         }
 879                         alpha += other.alpha_bpp;
 880                 }
 881         }
 882 }
 883
 884
 885 static
 886 void
 887 alpha_blend_onto_yuv422p9or10le(TargetParams const& target, OtherYUVParams const& other, std::function<float (uint8_t* data)> get_alpha)
 888 {
 889         auto const ts = target.size;
 890         auto const os = other.size;
 891         for (int ty = target.start_y, oy = other.start_y; ty < ts.height && oy < os.height; ++ty, ++oy) {
 892                 uint16_t* tY = reinterpret_cast<uint16_t*>(target.data[0] + (ty * target.stride[0])) + target.start_x;
 893                 uint16_t* tU = reinterpret_cast<uint16_t*>(target.data[1] + (ty * target.stride[1])) + target.start_x / 2;
 894                 uint16_t* tV = reinterpret_cast<uint16_t*>(target.data[2] + (ty * target.stride[2])) + target.start_x / 2;
 895                 uint16_t* oY = reinterpret_cast<uint16_t*>(other.data[0] + (oy * other.stride[0])) + other.start_x;
 896                 uint16_t* oU = reinterpret_cast<uint16_t*>(other.data[1] + (oy * other.stride[1])) + other.start_x / 2;
 897                 uint16_t* oV = reinterpret_cast<uint16_t*>(other.data[2] + (oy * other.stride[2])) + other.start_x / 2;
 898                 uint8_t* alpha = other.alpha_data[0] + (oy * other.alpha_stride[0]) + other.start_x * other.alpha_bpp;
 899                 for (int tx = target.start_x, ox = other.start_x; tx < ts.width && ox < os.width; ++tx, ++ox) {
 900                         float const a = get_alpha(alpha);
 901                         *tY = *oY * a + *tY * (1 - a);
 902                         *tU = *oU * a + *tU * (1 - a);
 903                         *tV = *oV * a + *tV * (1 - a);
 904                         ++tY;
 905                         ++oY;
 906                         if (tx % 2) {
 907                                 ++tU;
 908                                 ++tV;
 909                         }
 910                         if (ox % 2) {
 911                                 ++oU;
 912                                 ++oV;
 913                         }
 914                         alpha += other.alpha_bpp;
 915                 }
 916         }
 917 }
 918
 919
 920 static
 921 void
 922 alpha_blend_onto_yuv444p9or10le(TargetParams const& target, OtherYUVParams const& other, std::function<float (uint8_t* data)> get_alpha)
 923 {
 924         auto const ts = target.size;
 925         auto const os = other.size;
 926         for (int ty = target.start_y, oy = other.start_y; ty < ts.height && oy < os.height; ++ty, ++oy) {
 927                 uint16_t* tY = reinterpret_cast<uint16_t*>(target.data[0] + (ty * target.stride[0])) + target.start_x;
 928                 uint16_t* tU = reinterpret_cast<uint16_t*>(target.data[1] + (ty * target.stride[1])) + target.start_x;
 929                 uint16_t* tV = reinterpret_cast<uint16_t*>(target.data[2] + (ty * target.stride[2])) + target.start_x;
 930                 uint16_t* oY = reinterpret_cast<uint16_t*>(other.data[0] + (oy * other.stride[0])) + other.start_x;
 931                 uint16_t* oU = reinterpret_cast<uint16_t*>(other.data[1] + (oy * other.stride[1])) + other.start_x;
 932                 uint16_t* oV = reinterpret_cast<uint16_t*>(other.data[2] + (oy * other.stride[2])) + other.start_x;
 933                 uint8_t* alpha = other.alpha_data[0] + (oy * other.alpha_stride[0]) + other.start_x * other.alpha_bpp;
 934                 for (int tx = target.start_x, ox = other.start_x; tx < ts.width && ox < os.width; ++tx, ++ox) {
 935                         float const a = get_alpha(alpha);
 936                         *tY = *oY * a + *tY * (1 - a);
 937                         *tU = *oU * a + *tU * (1 - a);
 938                         *tV = *oV * a + *tV * (1 - a);
 939                         ++tY;
 940                         ++oY;
 941                         ++tU;
 942                         ++tV;
 943                         ++oU;
 944                         ++oV;
 945                         alpha += other.alpha_bpp;
 946                 }
 947         }
 948 }
 949
 950
 951 void
 952 Image::alpha_blend (shared_ptr<const Image> other, Position<int> position)
 953 {
 954         DCPOMATIC_ASSERT(
 955                 other->pixel_format() == AV_PIX_FMT_BGRA ||
 956                 other->pixel_format() == AV_PIX_FMT_RGBA ||
 957                 other->pixel_format() == AV_PIX_FMT_RGBA64BE
 958                 );
 959
 960         int const blue = other->pixel_format() == AV_PIX_FMT_BGRA ? 0 : 2;
 961         int const red = other->pixel_format() == AV_PIX_FMT_BGRA ? 2 : 0;
 962
 963         int start_tx = position.x;
 964         int start_ox = 0;
 965
 966         if (start_tx < 0) {
 967                 start_ox = -start_tx;
 968                 start_tx = 0;
 969         }
 970
 971         int start_ty = position.y;
 972         int start_oy = 0;
 973
 974         if (start_ty < 0) {
 975                 start_oy = -start_ty;
 976                 start_ty = 0;
 977         }
 978
 979         TargetParams target_params = {
 980                 start_tx,
 981                 start_ty,
 982                 size(),
 983                 data(),
 984                 stride(),
 985                 0
 986         };
 987
 988         OtherRGBParams other_rgb_params = {
 989                 start_ox,
 990                 start_oy,
 991                 other->size(),
 992                 other->data(),
 993                 other->stride(),
 994                 other->pixel_format() == AV_PIX_FMT_RGBA64BE ? 8 : 4
 995         };
 996
 997         OtherYUVParams other_yuv_params = {
 998                 start_ox,
 999                 start_oy,
1000                 other->size(),
1001                 other->data(),
1002                 other->stride(),
1003                 nullptr,
1004                 nullptr,
1005                 other->pixel_format() == AV_PIX_FMT_RGBA64BE ? 8 : 4
1006         };
1007
1008         auto byteswap = [](uint16_t* p) {
1009                 return (*p >> 8) | ((*p & 0xff) << 8);
1010         };
1011
1012         auto pass = [](uint8_t* p) {
1013                 return *p;
1014         };
1015
1016         auto get_alpha_64be = [](uint8_t* p) {
1017                 return ((static_cast<int16_t>(p[6]) << 8) | p[7]) / 65535.0f;
1018         };
1019
1020         auto get_alpha_byte = [](uint8_t* p) {
1021                 return p[3] / 255.0f;
1022         };
1023
1024         switch (_pixel_format) {
1025         case AV_PIX_FMT_RGB24:
1026                 target_params.bpp = 3;
1027                 if (other->pixel_format() == AV_PIX_FMT_RGBA64BE) {
1028                         alpha_blend_onto_rgb24<uint16_t>(target_params, other_rgb_params, red, blue, byteswap, 256);
1029                 } else {
1030                         alpha_blend_onto_rgb24<uint8_t>(target_params, other_rgb_params, red, blue, pass, 1);
1031                 }
1032                 break;
1033         case AV_PIX_FMT_BGRA:
1034                 target_params.bpp = 4;
1035                 if (other->pixel_format() == AV_PIX_FMT_RGBA64BE) {
1036                         alpha_blend_onto_bgra<uint16_t>(target_params, other_rgb_params, red, blue, byteswap, 256);
1037                 } else {
1038                         alpha_blend_onto_bgra<uint8_t>(target_params, other_rgb_params, red, blue, pass, 1);
1039                 }
1040                 break;
1041         case AV_PIX_FMT_RGBA:
1042                 target_params.bpp = 4;
1043                 if (other->pixel_format() == AV_PIX_FMT_RGBA64BE) {
1044                         alpha_blend_onto_rgba<uint16_t>(target_params, other_rgb_params, red, blue, byteswap, 256);
1045                 } else {
1046                         alpha_blend_onto_rgba<uint8_t>(target_params, other_rgb_params, red, blue, pass, 1);
1047                 }
1048                 break;
1049         case AV_PIX_FMT_RGB48LE:
1050                 target_params.bpp = 6;
1051                 if (other->pixel_format() == AV_PIX_FMT_RGBA64BE) {
1052                         alpha_blend_onto_rgb48le<uint16_t>(target_params, other_rgb_params, red, blue, byteswap, 1);
1053                 } else {
1054                         alpha_blend_onto_rgb48le<uint8_t>(target_params, other_rgb_params, red, blue, pass, 256);
1055                 }
1056                 break;
1057         case AV_PIX_FMT_XYZ12LE:
1058                 target_params.bpp = 6;
1059                 if (other->pixel_format() == AV_PIX_FMT_RGBA64BE) {
1060                         alpha_blend_onto_xyz12le<uint16_t>(target_params, other_rgb_params, red, blue, byteswap, 256);
1061                 } else {
1062                         alpha_blend_onto_xyz12le<uint8_t>(target_params, other_rgb_params, red, blue, pass, 1);
1063                 }
1064                 break;
1065         case AV_PIX_FMT_YUV420P:
1066         {
1067                 auto yuv = other->convert_pixel_format (dcp::YUVToRGB::REC709, _pixel_format, Alignment::COMPACT, false);
1068                 other_yuv_params.data = yuv->data();
1069                 other_yuv_params.stride = yuv->stride();
1070                 other_yuv_params.alpha_data = other->data();
1071                 other_yuv_params.alpha_stride = other->stride();
1072                 if (other->pixel_format() == AV_PIX_FMT_RGBA64BE) {
1073                         alpha_blend_onto_yuv420p(target_params, other_yuv_params, get_alpha_64be);
1074                 } else {
1075                         alpha_blend_onto_yuv420p(target_params, other_yuv_params, get_alpha_byte);
1076                 }
1077                 break;
1078         }
1079         case AV_PIX_FMT_YUV420P10:
1080         {
1081                 auto yuv = other->convert_pixel_format (dcp::YUVToRGB::REC709, _pixel_format, Alignment::COMPACT, false);
1082                 other_yuv_params.data = yuv->data();
1083                 other_yuv_params.stride = yuv->stride();
1084                 other_yuv_params.alpha_data = other->data();
1085                 other_yuv_params.alpha_stride = other->stride();
1086                 if (other->pixel_format() == AV_PIX_FMT_RGBA64BE) {
1087                         alpha_blend_onto_yuv420p10(target_params, other_yuv_params, get_alpha_64be);
1088                 } else {
1089                         alpha_blend_onto_yuv420p10(target_params, other_yuv_params, get_alpha_byte);
1090                 }
1091                 break;
1092         }
1093         case AV_PIX_FMT_YUV422P9LE:
1094         case AV_PIX_FMT_YUV422P10LE:
1095         {
1096                 auto yuv = other->convert_pixel_format (dcp::YUVToRGB::REC709, _pixel_format, Alignment::COMPACT, false);
1097                 other_yuv_params.data = yuv->data();
1098                 other_yuv_params.stride = yuv->stride();
1099                 other_yuv_params.alpha_data = other->data();
1100                 other_yuv_params.alpha_stride = other->stride();
1101                 if (other->pixel_format() == AV_PIX_FMT_RGBA64BE) {
1102                         alpha_blend_onto_yuv422p9or10le(target_params, other_yuv_params, get_alpha_64be);
1103                 } else {
1104                         alpha_blend_onto_yuv422p9or10le(target_params, other_yuv_params, get_alpha_byte);
1105                 }
1106                 break;
1107         }
1108         case AV_PIX_FMT_YUV444P9LE:
1109         case AV_PIX_FMT_YUV444P10LE:
1110         {
1111                 auto yuv = other->convert_pixel_format (dcp::YUVToRGB::REC709, _pixel_format, Alignment::COMPACT, false);
1112                 other_yuv_params.data = yuv->data();
1113                 other_yuv_params.stride = yuv->stride();
1114                 other_yuv_params.alpha_data = other->data();
1115                 other_yuv_params.alpha_stride = other->stride();
1116                 if (other->pixel_format() == AV_PIX_FMT_RGBA64BE) {
1117                         alpha_blend_onto_yuv444p9or10le(target_params, other_yuv_params, get_alpha_64be);
1118                 } else {
1119                         alpha_blend_onto_yuv444p9or10le(target_params, other_yuv_params, get_alpha_byte);
1120                 }
1121                 break;
1122         }
1123         default:
1124                 throw PixelFormatError ("alpha_blend()", _pixel_format);
1125         }
1126 }
1127
1128
1129 void
1130 Image::copy (shared_ptr<const Image> other, Position<int> position)
1131 {
1132         /* Only implemented for RGB24 onto RGB24 so far */
1133         DCPOMATIC_ASSERT (_pixel_format == AV_PIX_FMT_RGB24 && other->pixel_format() == AV_PIX_FMT_RGB24);
1134         DCPOMATIC_ASSERT (position.x >= 0 && position.y >= 0);
1135
1136         int const N = min (position.x + other->size().width, size().width) - position.x;
1137         for (int ty = position.y, oy = 0; ty < size().height && oy < other->size().height; ++ty, ++oy) {
1138                 uint8_t * const tp = data()[0] + ty * stride()[0] + position.x * 3;
1139                 uint8_t * const op = other->data()[0] + oy * other->stride()[0];
1140                 memcpy (tp, op, N * 3);
1141         }
1142 }
1143
1144
1145 void
1146 Image::read_from_socket (shared_ptr<Socket> socket)
1147 {
1148         for (int i = 0; i < planes(); ++i) {
1149                 uint8_t* p = data()[i];
1150                 int const lines = sample_size(i).height;
1151                 for (int y = 0; y < lines; ++y) {
1152                         socket->read (p, line_size()[i]);
1153                         p += stride()[i];
1154                 }
1155         }
1156 }
1157
1158
1159 void
1160 Image::write_to_socket (shared_ptr<Socket> socket) const
1161 {
1162         for (int i = 0; i < planes(); ++i) {
1163                 uint8_t* p = data()[i];
1164                 int const lines = sample_size(i).height;
1165                 for (int y = 0; y < lines; ++y) {
1166                         socket->write (p, line_size()[i]);
1167                         p += stride()[i];
1168                 }
1169         }
1170 }
1171
1172
1173 float
1174 Image::bytes_per_pixel (int c) const
1175 {
1176         auto d = av_pix_fmt_desc_get(_pixel_format);
1177         if (!d) {
1178                 throw PixelFormatError ("bytes_per_pixel()", _pixel_format);
1179         }
1180
1181         if (c >= planes()) {
1182                 return 0;
1183         }
1184
1185         float bpp[4] = { 0, 0, 0, 0 };
1186
1187 #ifdef DCPOMATIC_HAVE_AVCOMPONENTDESCRIPTOR_DEPTH_MINUS1
1188         bpp[0] = floor ((d->comp[0].depth_minus1 + 8) / 8);
1189         if (d->nb_components > 1) {
1190                 bpp[1] = floor ((d->comp[1].depth_minus1 + 8) / 8) / pow (2.0f, d->log2_chroma_w);
1191         }
1192         if (d->nb_components > 2) {
1193                 bpp[2] = floor ((d->comp[2].depth_minus1 + 8) / 8) / pow (2.0f, d->log2_chroma_w);
1194         }
1195         if (d->nb_components > 3) {
1196                 bpp[3] = floor ((d->comp[3].depth_minus1 + 8) / 8) / pow (2.0f, d->log2_chroma_w);
1197         }
1198 #else
1199         bpp[0] = floor ((d->comp[0].depth + 7) / 8);
1200         if (d->nb_components > 1) {
1201                 bpp[1] = floor ((d->comp[1].depth + 7) / 8) / pow (2.0f, d->log2_chroma_w);
1202         }
1203         if (d->nb_components > 2) {
1204                 bpp[2] = floor ((d->comp[2].depth + 7) / 8) / pow (2.0f, d->log2_chroma_w);
1205         }
1206         if (d->nb_components > 3) {
1207                 bpp[3] = floor ((d->comp[3].depth + 7) / 8) / pow (2.0f, d->log2_chroma_w);
1208         }
1209 #endif
1210
1211         if ((d->flags & AV_PIX_FMT_FLAG_PLANAR) == 0) {
1212                 /* Not planar; sum them up */
1213                 return bpp[0] + bpp[1] + bpp[2] + bpp[3];
1214         }
1215
1216         return bpp[c];
1217 }
1218
1219
1220 /** Construct a Image of a given size and format, allocating memory
1221  *  as required.
1222  *
1223  *  @param p Pixel format.
1224  *  @param s Size in pixels.
1225  *  @param alignment PADDED to make each row of this image aligned to a ALIGNMENT-byte boundary, otherwise COMPACT.
1226  */
1227 Image::Image (AVPixelFormat p, dcp::Size s, Alignment alignment)
1228         : _size (s)
1229         , _pixel_format (p)
1230         , _alignment (alignment)
1231 {
1232         allocate ();
1233 }
1234
1235
1236 void
1237 Image::allocate ()
1238 {
1239         _data = (uint8_t **) wrapped_av_malloc (4 * sizeof (uint8_t *));
1240         _data[0] = _data[1] = _data[2] = _data[3] = 0;
1241
1242         _line_size = (int *) wrapped_av_malloc (4 * sizeof (int));
1243         _line_size[0] = _line_size[1] = _line_size[2] = _line_size[3] = 0;
1244
1245         _stride = (int *) wrapped_av_malloc (4 * sizeof (int));
1246         _stride[0] = _stride[1] = _stride[2] = _stride[3] = 0;
1247
1248         auto stride_round_up = [](int stride, int t) {
1249                 int const a = stride + (t - 1);
1250                 return a - (a % t);
1251         };
1252
1253         for (int i = 0; i < planes(); ++i) {
1254                 _line_size[i] = ceil (_size.width * bytes_per_pixel(i));
1255                 _stride[i] = stride_round_up (_line_size[i], _alignment == Alignment::PADDED ? ALIGNMENT : 1);
1256
1257                 /* The assembler function ff_rgb24ToY_avx (in libswscale/x86/input.asm)
1258                    uses a 16-byte fetch to read three bytes (R/G/B) of image data.
1259                    Hence on the last pixel of the last line it reads over the end of
1260                    the actual data by 1 byte.  If the width of an image is a multiple
1261                    of the stride alignment there will be no padding at the end of image lines.
1262                    OS X crashes on this illegal read, though other operating systems don't
1263                    seem to mind.  The nasty + 1 in this malloc makes sure there is always a byte
1264                    for that instruction to read safely.
1265
1266                    Further to the above, valgrind is now telling me that ff_rgb24ToY_ssse3
1267                    over-reads by more then _avx.  I can't follow the code to work out how much,
1268                    so I'll just over-allocate by ALIGNMENT bytes and have done with it.  Empirical
1269                    testing suggests that it works.
1270
1271                    In addition to these concerns, we may read/write as much as a whole extra line
1272                    at the end of each plane in cases where we are messing with offsets in order to
1273                    do pad or crop.  To solve this we over-allocate by an extra _stride[i] bytes.
1274
1275                    As an example: we may write to images starting at an offset so we get some padding.
1276                    Hence we want to write in the following pattern:
1277
1278                    block start   write start                                  line end
1279                    |..(padding)..|<------line-size------------->|..(padding)..|
1280                    |..(padding)..|<------line-size------------->|..(padding)..|
1281                    |..(padding)..|<------line-size------------->|..(padding)..|
1282
1283                    where line-size is of the smaller (inter_size) image and the full padded line length is that of
1284                    out_size.  To get things to work we have to tell FFmpeg that the stride is that of out_size.
1285                    However some parts of FFmpeg (notably rgb48Toxyz12 in swscale.c) process data for the full
1286                    specified *stride*.  This does not matter until we get to the last line:
1287
1288                    block start   write start                                  line end
1289                    |..(padding)..|<------line-size------------->|XXXwrittenXXX|
1290                    |XXXwrittenXXX|<------line-size------------->|XXXwrittenXXX|
1291                    |XXXwrittenXXX|<------line-size------------->|XXXwrittenXXXXXXwrittenXXX
1292                                                                                ^^^^ out of bounds
1293                 */
1294                 _data[i] = (uint8_t *) wrapped_av_malloc (_stride[i] * (sample_size(i).height + 1) + ALIGNMENT);
1295 #if HAVE_VALGRIND_MEMCHECK_H
1296                 /* The data between the end of the line size and the stride is undefined but processed by
1297                    libswscale, causing lots of valgrind errors.  Mark it all defined to quell these errors.
1298                 */
1299                 VALGRIND_MAKE_MEM_DEFINED (_data[i], _stride[i] * (sample_size(i).height + 1) + ALIGNMENT);
1300 #endif
1301         }
1302 }
1303
1304
1305 Image::Image (Image const & other)
1306         : std::enable_shared_from_this<Image>(other)
1307         , _size (other._size)
1308         , _pixel_format (other._pixel_format)
1309         , _alignment (other._alignment)
1310 {
1311         allocate ();
1312
1313         for (int i = 0; i < planes(); ++i) {
1314                 uint8_t* p = _data[i];
1315                 uint8_t* q = other._data[i];
1316                 int const lines = sample_size(i).height;
1317                 for (int j = 0; j < lines; ++j) {
1318                         memcpy (p, q, _line_size[i]);
1319                         p += stride()[i];
1320                         q += other.stride()[i];
1321                 }
1322         }
1323 }
1324
1325
1326 Image::Image (AVFrame const * frame, Alignment alignment)
1327         : _size (frame->width, frame->height)
1328         , _pixel_format (static_cast<AVPixelFormat>(frame->format))
1329         , _alignment (alignment)
1330 {
1331         DCPOMATIC_ASSERT (_pixel_format != AV_PIX_FMT_NONE);
1332
1333         allocate ();
1334
1335         for (int i = 0; i < planes(); ++i) {
1336                 uint8_t* p = _data[i];
1337                 uint8_t* q = frame->data[i];
1338                 int const lines = sample_size(i).height;
1339                 for (int j = 0; j < lines; ++j) {
1340                         memcpy (p, q, _line_size[i]);
1341                         p += stride()[i];
1342                         /* AVFrame's linesize is what we call `stride' */
1343                         q += frame->linesize[i];
1344                 }
1345         }
1346 }
1347
1348
1349 Image::Image (shared_ptr<const Image> other, Alignment alignment)
1350         : _size (other->_size)
1351         , _pixel_format (other->_pixel_format)
1352         , _alignment (alignment)
1353 {
1354         allocate ();
1355
1356         for (int i = 0; i < planes(); ++i) {
1357                 DCPOMATIC_ASSERT (line_size()[i] == other->line_size()[i]);
1358                 uint8_t* p = _data[i];
1359                 uint8_t* q = other->data()[i];
1360                 int const lines = sample_size(i).height;
1361                 for (int j = 0; j < lines; ++j) {
1362                         memcpy (p, q, line_size()[i]);
1363                         p += stride()[i];
1364                         q += other->stride()[i];
1365                 }
1366         }
1367 }
1368
1369
1370 Image&
1371 Image::operator= (Image const & other)
1372 {
1373         if (this == &other) {
1374                 return *this;
1375         }
1376
1377         Image tmp (other);
1378         swap (tmp);
1379         return *this;
1380 }
1381
1382
1383 void
1384 Image::swap (Image & other)
1385 {
1386         std::swap (_size, other._size);
1387         std::swap (_pixel_format, other._pixel_format);
1388
1389         for (int i = 0; i < 4; ++i) {
1390                 std::swap (_data[i], other._data[i]);
1391                 std::swap (_line_size[i], other._line_size[i]);
1392                 std::swap (_stride[i], other._stride[i]);
1393         }
1394
1395         std::swap (_alignment, other._alignment);
1396 }
1397
1398
1399 Image::~Image ()
1400 {
1401         for (int i = 0; i < planes(); ++i) {
1402                 av_free (_data[i]);
1403         }
1404
1405         av_free (_data);
1406         av_free (_line_size);
1407         av_free (_stride);
1408 }
1409
1410
1411 uint8_t * const *
1412 Image::data () const
1413 {
1414         return _data;
1415 }
1416
1417
1418 int const *
1419 Image::line_size () const
1420 {
1421         return _line_size;
1422 }
1423
1424
1425 int const *
1426 Image::stride () const
1427 {
1428         return _stride;
1429 }
1430
1431
1432 dcp::Size
1433 Image::size () const
1434 {
1435         return _size;
1436 }
1437
1438
1439 Image::Alignment
1440 Image::alignment () const
1441 {
1442         return _alignment;
1443 }
1444
1445
1446 PositionImage
1447 merge (list<PositionImage> images, Image::Alignment alignment)
1448 {
1449         if (images.empty ()) {
1450                 return {};
1451         }
1452
1453         if (images.size() == 1) {
1454                 images.front().image = Image::ensure_alignment(images.front().image, alignment);
1455                 return images.front();
1456         }
1457
1458         dcpomatic::Rect<int> all (images.front().position, images.front().image->size().width, images.front().image->size().height);
1459         for (auto const& i: images) {
1460                 all.extend (dcpomatic::Rect<int>(i.position, i.image->size().width, i.image->size().height));
1461         }
1462
1463         auto merged = make_shared<Image>(images.front().image->pixel_format(), dcp::Size(all.width, all.height), alignment);
1464         merged->make_transparent ();
1465         for (auto const& i: images) {
1466                 merged->alpha_blend (i.image, i.position - all.position());
1467         }
1468
1469         return PositionImage (merged, all.position ());
1470 }
1471
1472
1473 bool
1474 operator== (Image const & a, Image const & b)
1475 {
1476         if (a.planes() != b.planes() || a.pixel_format() != b.pixel_format() || a.alignment() != b.alignment()) {
1477                 return false;
1478         }
1479
1480         for (int c = 0; c < a.planes(); ++c) {
1481                 if (a.sample_size(c).height != b.sample_size(c).height || a.line_size()[c] != b.line_size()[c] || a.stride()[c] != b.stride()[c]) {
1482                         return false;
1483                 }
1484
1485                 uint8_t* p = a.data()[c];
1486                 uint8_t* q = b.data()[c];
1487                 int const lines = a.sample_size(c).height;
1488                 for (int y = 0; y < lines; ++y) {
1489                         if (memcmp (p, q, a.line_size()[c]) != 0) {
1490                                 return false;
1491                         }
1492
1493                         p += a.stride()[c];
1494                         q += b.stride()[c];
1495                 }
1496         }
1497
1498         return true;
1499 }
1500
1501
1502 /** Fade the image.
1503  *  @param f Amount to fade by; 0 is black, 1 is no fade.
1504  */
1505 void
1506 Image::fade (float f)
1507 {
1508         /* U/V black value for 8-bit colour */
1509         static int const eight_bit_uv =    (1 << 7) - 1;
1510         /* U/V black value for 10-bit colour */
1511         static uint16_t const ten_bit_uv = (1 << 9) - 1;
1512
1513         switch (_pixel_format) {
1514         case AV_PIX_FMT_YUV420P:
1515         {
1516                 /* Y */
1517                 uint8_t* p = data()[0];
1518                 int const lines = sample_size(0).height;
1519                 for (int y = 0; y < lines; ++y) {
1520                         uint8_t* q = p;
1521                         for (int x = 0; x < line_size()[0]; ++x) {
1522                                 *q = int(float(*q) * f);
1523                                 ++q;
1524                         }
1525                         p += stride()[0];
1526                 }
1527
1528                 /* U, V */
1529                 for (int c = 1; c < 3; ++c) {
1530                         uint8_t* p = data()[c];
1531                         int const lines = sample_size(c).height;
1532                         for (int y = 0; y < lines; ++y) {
1533                                 uint8_t* q = p;
1534                                 for (int x = 0; x < line_size()[c]; ++x) {
1535                                         *q = eight_bit_uv + int((int(*q) - eight_bit_uv) * f);
1536                                         ++q;
1537                                 }
1538                                 p += stride()[c];
1539                         }
1540                 }
1541
1542                 break;
1543         }
1544
1545         case AV_PIX_FMT_RGB24:
1546         {
1547                 /* 8-bit */
1548                 uint8_t* p = data()[0];
1549                 int const lines = sample_size(0).height;
1550                 for (int y = 0; y < lines; ++y) {
1551                         uint8_t* q = p;
1552                         for (int x = 0; x < line_size()[0]; ++x) {
1553                                 *q = int (float (*q) * f);
1554                                 ++q;
1555                         }
1556                         p += stride()[0];
1557                 }
1558                 break;
1559         }
1560
1561         case AV_PIX_FMT_XYZ12LE:
1562         case AV_PIX_FMT_RGB48LE:
1563                 /* 16-bit little-endian */
1564                 for (int c = 0; c < 3; ++c) {
1565                         int const stride_pixels = stride()[c] / 2;
1566                         int const line_size_pixels = line_size()[c] / 2;
1567                         uint16_t* p = reinterpret_cast<uint16_t*> (data()[c]);
1568                         int const lines = sample_size(c).height;
1569                         for (int y = 0; y < lines; ++y) {
1570                                 uint16_t* q = p;
1571                                 for (int x = 0; x < line_size_pixels; ++x) {
1572                                         *q = int (float (*q) * f);
1573                                         ++q;
1574                                 }
1575                                 p += stride_pixels;
1576                         }
1577                 }
1578                 break;
1579
1580         case AV_PIX_FMT_YUV422P10LE:
1581         {
1582                 /* Y */
1583                 {
1584                         int const stride_pixels = stride()[0] / 2;
1585                         int const line_size_pixels = line_size()[0] / 2;
1586                         uint16_t* p = reinterpret_cast<uint16_t*> (data()[0]);
1587                         int const lines = sample_size(0).height;
1588                         for (int y = 0; y < lines; ++y) {
1589                                 uint16_t* q = p;
1590                                 for (int x = 0; x < line_size_pixels; ++x) {
1591                                         *q = int(float(*q) * f);
1592                                         ++q;
1593                                 }
1594                                 p += stride_pixels;
1595                         }
1596                 }
1597
1598                 /* U, V */
1599                 for (int c = 1; c < 3; ++c) {
1600                         int const stride_pixels = stride()[c] / 2;
1601                         int const line_size_pixels = line_size()[c] / 2;
1602                         uint16_t* p = reinterpret_cast<uint16_t*> (data()[c]);
1603                         int const lines = sample_size(c).height;
1604                         for (int y = 0; y < lines; ++y) {
1605                                 uint16_t* q = p;
1606                                 for (int x = 0; x < line_size_pixels; ++x) {
1607                                         *q = ten_bit_uv + int((int(*q) - ten_bit_uv) * f);
1608                                         ++q;
1609                                 }
1610                                 p += stride_pixels;
1611                         }
1612                 }
1613                 break;
1614
1615         }
1616
1617         default:
1618                 throw PixelFormatError ("fade()", _pixel_format);
1619         }
1620 }
1621
1622
1623 shared_ptr<const Image>
1624 Image::ensure_alignment (shared_ptr<const Image> image, Image::Alignment alignment)
1625 {
1626         if (image->alignment() == alignment) {
1627                 return image;
1628         }
1629
1630         return make_shared<Image>(image, alignment);
1631 }
1632
1633
1634 size_t
1635 Image::memory_used () const
1636 {
1637         size_t m = 0;
1638         for (int i = 0; i < planes(); ++i) {
1639                 m += _stride[i] * sample_size(i).height;
1640         }
1641         return m;
1642 }
1643
1644
1645 void
1646 Image::video_range_to_full_range ()
1647 {
1648         switch (_pixel_format) {
1649         case AV_PIX_FMT_RGB24:
1650         {
1651                 float const factor = 256.0 / 219.0;
1652                 uint8_t* p = data()[0];
1653                 int const lines = sample_size(0).height;
1654                 for (int y = 0; y < lines; ++y) {
1655                         uint8_t* q = p;
1656                         for (int x = 0; x < line_size()[0]; ++x) {
1657                                 *q = clamp(lrintf((*q - 16) * factor), 0L, 255L);
1658                                 ++q;
1659                         }
1660                         p += stride()[0];
1661                 }
1662                 break;
1663         }
1664         case AV_PIX_FMT_RGB48LE:
1665         {
1666                 float const factor = 65536.0 / 56064.0;
1667                 uint16_t* p = reinterpret_cast<uint16_t*>(data()[0]);
1668                 int const lines = sample_size(0).height;
1669                 for (int y = 0; y < lines; ++y) {
1670                         uint16_t* q = p;
1671                         int const line_size_pixels = line_size()[0] / 2;
1672                         for (int x = 0; x < line_size_pixels; ++x) {
1673                                 *q = clamp(lrintf((*q - 4096) * factor), 0L, 65535L);
1674                                 ++q;
1675                         }
1676                         p += stride()[0] / 2;
1677                 }
1678                 break;
1679         }
1680         case AV_PIX_FMT_GBRP12LE:
1681         {
1682                 float const factor = 4096.0 / 3504.0;
1683                 for (int c = 0; c < 3; ++c) {
1684                         uint16_t* p = reinterpret_cast<uint16_t*>(data()[c]);
1685                         int const lines = sample_size(c).height;
1686                         for (int y = 0; y < lines; ++y) {
1687                                 uint16_t* q = p;
1688                                 int const line_size_pixels = line_size()[c] / 2;
1689                                 for (int x = 0; x < line_size_pixels; ++x) {
1690                                         *q = clamp(lrintf((*q - 256) * factor), 0L, 4095L);
1691                                         ++q;
1692                                 }
1693                         }
1694                 }
1695                 break;
1696         }
1697         default:
1698                 throw PixelFormatError ("video_range_to_full_range()", _pixel_format);
1699         }
1700 }
1701