+
+struct TargetParams
+{
+ int start_x;
+ int start_y;
+ dcp::Size size;
+ uint8_t* const* data;
+ int const* stride;
+ int bpp;
+
+ uint8_t* line_pointer(int y) const {
+ return data[0] + y * stride[0] + start_x * bpp;
+ }
+};
+
+
+struct OtherParams
+{
+ int start_x;
+ int start_y;
+ dcp::Size size;
+ uint8_t* const* data;
+ int const* stride;
+ int bpp;
+
+ uint8_t* line_pointer(int y) const {
+ return data[0] + y * stride[0];
+ }
+
+ float alpha_divisor() const {
+ return pow(2, bpp * 2) - 1;
+ }
+};
+
+
+template <class OtherType>
+void
+alpha_blend_onto_rgb24(TargetParams const& target, OtherParams const& other, int red, int blue, std::function<float (OtherType*)> get, int value_divisor)
+{
+ /* Going onto RGB24. First byte is red, second green, third blue */
+ auto const alpha_divisor = other.alpha_divisor();
+ for (int ty = target.start_y, oy = other.start_y; ty < target.size.height && oy < other.size.height; ++ty, ++oy) {
+ auto tp = target.line_pointer(ty);
+ auto op = reinterpret_cast<OtherType*>(other.line_pointer(oy));
+ for (int tx = target.start_x, ox = other.start_x; tx < target.size.width && ox < other.size.width; ++tx, ++ox) {
+ float const alpha = get(op + 3) / alpha_divisor;
+ tp[0] = (get(op + red) / value_divisor) * alpha + tp[0] * (1 - alpha);
+ tp[1] = (get(op + 1) / value_divisor) * alpha + tp[1] * (1 - alpha);
+ tp[2] = (get(op + blue) / value_divisor) * alpha + tp[2] * (1 - alpha);
+
+ tp += target.bpp;
+ op += other.bpp / sizeof(OtherType);
+ }
+ }
+}
+
+
+template <class OtherType>
+void
+alpha_blend_onto_bgra(TargetParams const& target, OtherParams const& other, int red, int blue, std::function<float (OtherType*)> get, int value_divisor)
+{
+ auto const alpha_divisor = other.alpha_divisor();
+ for (int ty = target.start_y, oy = other.start_y; ty < target.size.height && oy < other.size.height; ++ty, ++oy) {
+ auto tp = target.line_pointer(ty);
+ auto op = reinterpret_cast<OtherType*>(other.line_pointer(oy));
+ for (int tx = target.start_x, ox = other.start_x; tx < target.size.width && ox < other.size.width; ++tx, ++ox) {
+ float const alpha = get(op + 3) / alpha_divisor;
+ tp[0] = (get(op + blue) / value_divisor) * alpha + tp[0] * (1 - alpha);
+ tp[1] = (get(op + 1) / value_divisor) * alpha + tp[1] * (1 - alpha);
+ tp[2] = (get(op + red) / value_divisor) * alpha + tp[2] * (1 - alpha);
+ tp[3] = (get(op + 3) / value_divisor) * alpha + tp[3] * (1 - alpha);
+
+ tp += target.bpp;
+ op += other.bpp / sizeof(OtherType);
+ }
+ }
+}
+
+
+template <class OtherType>
+void
+alpha_blend_onto_rgba(TargetParams const& target, OtherParams const& other, int red, int blue, std::function<float (OtherType*)> get, int value_divisor)
+{
+ auto const alpha_divisor = other.alpha_divisor();
+ for (int ty = target.start_y, oy = other.start_y; ty < target.size.height && oy < other.size.height; ++ty, ++oy) {
+ auto tp = target.line_pointer(ty);
+ auto op = reinterpret_cast<OtherType*>(other.line_pointer(oy));
+ for (int tx = target.start_x, ox = other.start_x; tx < target.size.width && ox < other.size.width; ++tx, ++ox) {
+ float const alpha = get(op + 3) / alpha_divisor;
+ tp[0] = (get(op + red) / value_divisor) * alpha + tp[0] * (1 - alpha);
+ tp[1] = (get(op + 1) / value_divisor) * alpha + tp[1] * (1 - alpha);
+ tp[2] = (get(op + blue) / value_divisor) * alpha + tp[2] * (1 - alpha);
+ tp[3] = (get(op + 3) / value_divisor) * alpha + tp[3] * (1 - alpha);
+
+ tp += target.bpp;
+ op += other.bpp / sizeof(OtherType);
+ }
+ }
+}
+
+
+template <class OtherType>
+void
+alpha_blend_onto_rgb48le(TargetParams const& target, OtherParams const& other, int red, int blue, std::function<float (OtherType*)> get, int value_scale)
+{
+ auto const alpha_divisor = other.alpha_divisor();
+ for (int ty = target.start_y, oy = other.start_y; ty < target.size.height && oy < other.size.height; ++ty, ++oy) {
+ auto tp = reinterpret_cast<uint16_t*>(target.line_pointer(ty));
+ auto op = reinterpret_cast<OtherType*>(other.line_pointer(oy));
+ for (int tx = target.start_x, ox = other.start_x; tx < target.size.width && ox < other.size.width; ++tx, ++ox) {
+ float const alpha = get(op + 3) / alpha_divisor;
+ tp[0] = get(op + red) * value_scale * alpha + tp[0] * (1 - alpha);
+ tp[1] = get(op + 1) * value_scale * alpha + tp[1] * (1 - alpha);
+ tp[2] = get(op + blue) * value_scale * alpha + tp[2] * (1 - alpha);
+
+ tp += target.bpp / 2;
+ op += other.bpp / sizeof(OtherType);
+ }
+ }
+}
+
+
+template <class OtherType>
+void
+alpha_blend_onto_xyz12le(TargetParams const& target, OtherParams const& other, int red, int blue, std::function<float (OtherType*)> get, int value_divisor)
+{
+ auto const alpha_divisor = other.alpha_divisor();
+ auto conv = dcp::ColourConversion::srgb_to_xyz();
+ double fast_matrix[9];
+ dcp::combined_rgb_to_xyz(conv, fast_matrix);
+ auto lut_in = conv.in()->double_lut(0, 1, 8, false);
+ auto lut_out = conv.out()->int_lut(0, 1, 16, true, 65535);
+ for (int ty = target.start_y, oy = other.start_y; ty < target.size.height && oy < other.size.height; ++ty, ++oy) {
+ auto tp = reinterpret_cast<uint16_t*>(target.data[0] + ty * target.stride[0] + target.start_x * target.bpp);
+ auto op = reinterpret_cast<OtherType*>(other.data[0] + oy * other.stride[0]);
+ for (int tx = target.start_x, ox = other.start_x; tx < target.size.width && ox < other.size.width; ++tx, ++ox) {
+ float const alpha = get(op + 3) / alpha_divisor;
+
+ /* Convert sRGB to XYZ; op is BGRA. First, input gamma LUT */
+ double const r = lut_in[get(op + red) / value_divisor];
+ double const g = lut_in[get(op + 1) / value_divisor];
+ double const b = lut_in[get(op + blue) / value_divisor];
+
+ /* RGB to XYZ, including Bradford transform and DCI companding */
+ double const x = max(0.0, min(1.0, r * fast_matrix[0] + g * fast_matrix[1] + b * fast_matrix[2]));
+ double const y = max(0.0, min(1.0, r * fast_matrix[3] + g * fast_matrix[4] + b * fast_matrix[5]));
+ double const z = max(0.0, min(1.0, r * fast_matrix[6] + g * fast_matrix[7] + b * fast_matrix[8]));
+
+ /* Out gamma LUT and blend */
+ tp[0] = lut_out[lrint(x * 65535)] * alpha + tp[0] * (1 - alpha);
+ tp[1] = lut_out[lrint(y * 65535)] * alpha + tp[1] * (1 - alpha);
+ tp[2] = lut_out[lrint(z * 65535)] * alpha + tp[2] * (1 - alpha);
+
+ tp += target.bpp / 2;
+ op += other.bpp / sizeof(OtherType);
+ }
+ }
+}
+
+
+static
+void
+alpha_blend_onto_yuv420p(TargetParams const& target, OtherParams const& other, uint8_t* const* alpha_data, int const* alpha_stride)
+{
+ auto const ts = target.size;
+ auto const os = other.size;
+ for (int ty = target.start_y, oy = other.start_y; ty < ts.height && oy < os.height; ++ty, ++oy) {
+ int const hty = ty / 2;
+ int const hoy = oy / 2;
+ uint8_t* tY = target.data[0] + (ty * target.stride[0]) + target.start_x;
+ uint8_t* tU = target.data[1] + (hty * target.stride[1]) + target.start_x / 2;
+ uint8_t* tV = target.data[2] + (hty * target.stride[2]) + target.start_x / 2;
+ uint8_t* oY = other.data[0] + (oy * other.stride[0]) + other.start_x;
+ uint8_t* oU = other.data[1] + (hoy * other.stride[1]) + other.start_x / 2;
+ uint8_t* oV = other.data[2] + (hoy * other.stride[2]) + other.start_x / 2;
+ uint8_t* alpha = alpha_data[0] + (oy * alpha_stride[0]) + other.start_x * 4;
+ for (int tx = target.start_x, ox = other.start_x; tx < ts.width && ox < os.width; ++tx, ++ox) {
+ float const a = float(alpha[3]) / 255;
+ *tY = *oY * a + *tY * (1 - a);
+ *tU = *oU * a + *tU * (1 - a);
+ *tV = *oV * a + *tV * (1 - a);
+ ++tY;
+ ++oY;
+ if (tx % 2) {
+ ++tU;
+ ++tV;
+ }
+ if (ox % 2) {
+ ++oU;
+ ++oV;
+ }
+ alpha += 4;
+ }
+ }
+}
+
+
+static
+void
+alpha_blend_onto_yuv420p10(TargetParams const& target, OtherParams const& other, uint8_t* const* alpha_data, int const* alpha_stride)
+{
+ auto const ts = target.size;
+ auto const os = other.size;
+ for (int ty = target.start_y, oy = other.start_y; ty < ts.height && oy < os.height; ++ty, ++oy) {
+ int const hty = ty / 2;
+ int const hoy = oy / 2;
+ uint16_t* tY = reinterpret_cast<uint16_t*>(target.data[0] + (ty * target.stride[0])) + target.start_x;
+ uint16_t* tU = reinterpret_cast<uint16_t*>(target.data[1] + (hty * target.stride[1])) + target.start_x / 2;
+ uint16_t* tV = reinterpret_cast<uint16_t*>(target.data[2] + (hty * target.stride[2])) + target.start_x / 2;
+ uint16_t* oY = reinterpret_cast<uint16_t*>(other.data[0] + (oy * other.stride[0])) + other.start_x;
+ uint16_t* oU = reinterpret_cast<uint16_t*>(other.data[1] + (hoy * other.stride[1])) + other.start_x / 2;
+ uint16_t* oV = reinterpret_cast<uint16_t*>(other.data[2] + (hoy * other.stride[2])) + other.start_x / 2;
+ uint8_t* alpha = alpha_data[0] + (oy * alpha_stride[0]) + other.start_x * 4;
+ for (int tx = target.start_x, ox = other.start_x; tx < ts.width && ox < os.width; ++tx, ++ox) {
+ float const a = float(alpha[3]) / 255;
+ *tY = *oY * a + *tY * (1 - a);
+ *tU = *oU * a + *tU * (1 - a);
+ *tV = *oV * a + *tV * (1 - a);
+ ++tY;
+ ++oY;
+ if (tx % 2) {
+ ++tU;
+ ++tV;
+ }
+ if (ox % 2) {
+ ++oU;
+ ++oV;
+ }
+ alpha += 4;
+ }
+ }
+}
+
+
+static
+void
+alpha_blend_onto_yuv422p9or10le(TargetParams const& target, OtherParams const& other, uint8_t* const* alpha_data, int const* alpha_stride)
+{
+ auto const ts = target.size;
+ auto const os = other.size;
+ for (int ty = target.start_y, oy = other.start_y; ty < ts.height && oy < os.height; ++ty, ++oy) {
+ uint16_t* tY = reinterpret_cast<uint16_t*>(target.data[0] + (ty * target.stride[0])) + target.start_x;
+ uint16_t* tU = reinterpret_cast<uint16_t*>(target.data[1] + (ty * target.stride[1])) + target.start_x / 2;
+ uint16_t* tV = reinterpret_cast<uint16_t*>(target.data[2] + (ty * target.stride[2])) + target.start_x / 2;
+ uint16_t* oY = reinterpret_cast<uint16_t*>(other.data[0] + (oy * other.stride[0])) + other.start_x;
+ uint16_t* oU = reinterpret_cast<uint16_t*>(other.data[1] + (oy * other.stride[1])) + other.start_x / 2;
+ uint16_t* oV = reinterpret_cast<uint16_t*>(other.data[2] + (oy * other.stride[2])) + other.start_x / 2;
+ uint8_t* alpha = alpha_data[0] + (oy * alpha_stride[0]) + other.start_x * 4;
+ for (int tx = target.start_x, ox = other.start_x; tx < ts.width && ox < os.width; ++tx, ++ox) {
+ float const a = float(alpha[3]) / 255;
+ *tY = *oY * a + *tY * (1 - a);
+ *tU = *oU * a + *tU * (1 - a);
+ *tV = *oV * a + *tV * (1 - a);
+ ++tY;
+ ++oY;
+ if (tx % 2) {
+ ++tU;
+ ++tV;
+ }
+ if (ox % 2) {
+ ++oU;
+ ++oV;
+ }
+ alpha += 4;
+ }
+ }
+}
+
+