opj_t1_encode_cblks: fix UBSAN signed integer overflow
[openjpeg.git] / src / lib / openjp2 / t1.c
index 54fb814a2ad650e48707ee16bc05496ad57327df..f6f7671190cd5bc5a40a8ccac9b349abc0489e43 100644 (file)
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#define OPJ_SKIP_POISON
 #include "opj_includes.h"
+
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+#if defined(__GNUC__)
+#pragma GCC poison malloc calloc realloc free
+#endif
+
 #include "t1_luts.h"
 
 /** @defgroup T1 T1 - Implementation of the tier-1 coding */
@@ -335,7 +348,7 @@ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1,
     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
         OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
-        v = opj_int_abs(*datap) & one ? 1 : 0;
+        v = (opj_int_abs(*datap) & one) ? 1 : 0;
 #ifdef DEBUG_ENC_SIG
         fprintf(stderr, "   ctxt1=%d\n", ctxt1);
 #endif
@@ -722,7 +735,7 @@ static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1,
         OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags);
         *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap),
                                           (OPJ_UINT32)bpno);
-        v = opj_int_abs(*datap) & one ? 1 : 0;
+        v = (opj_int_abs(*datap) & one) ? 1 : 0;
 #ifdef DEBUG_ENC_REF
         fprintf(stderr, "  ctxt=%d\n", ctxt);
 #endif
@@ -1052,6 +1065,7 @@ static void opj_t1_enc_clnpass_step(
     for (ci = runlen; ci < lim; ++ci) {
         OPJ_UINT32 vsc;
         opj_flag_t flags;
+        OPJ_UINT32 ctxt1;
 
         flags = *flagsp;
 
@@ -1060,12 +1074,12 @@ static void opj_t1_enc_clnpass_step(
         }
 
         if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {
-            OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
+            ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
 #ifdef DEBUG_ENC_CLN
             printf("   ctxt1=%d\n", ctxt1);
 #endif
             opj_mqc_setcurctx(mqc, ctxt1);
-            v = opj_int_abs(*datap) & one ? 1 : 0;
+            v = (opj_int_abs(*datap) & one) ? 1 : 0;
             opj_mqc_encode(mqc, v);
             if (v) {
                 OPJ_UINT32 ctxt2, spb;
@@ -1439,7 +1453,7 @@ static OPJ_BOOL opj_t1_allocate_buffers(
     if (!t1->encoder) {
         OPJ_UINT32 datasize = w * h;
 
-        if (datasize > (size_t)t1->datasize) {
+        if (datasize > t1->datasize) {
             opj_aligned_free(t1->data);
             t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
             if (!t1->data) {
@@ -1604,7 +1618,8 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
         cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
         cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
 
-        cblk->decoded_data = opj_aligned_malloc(cblk_w * cblk_h * sizeof(OPJ_INT32));
+        cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) *
+                             cblk_w * cblk_h);
         if (cblk->decoded_data == NULL) {
             if (job->p_manager_mutex) {
                 opj_mutex_lock(job->p_manager_mutex);
@@ -1619,7 +1634,7 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
             return;
         }
         /* Zero-init required */
-        memset(cblk->decoded_data, 0, cblk_w * cblk_h * sizeof(OPJ_INT32));
+        memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h);
     } else if (cblk->decoded_data) {
         /* Not sure if that code path can happen, but better be */
         /* safe than sorry */
@@ -1710,15 +1725,36 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
                 datap[i] /= 2;
             }
         } else {        /* if (tccp->qmfbid == 0) */
-            for (i = 0; i < cblk_size; ++i) {
+            i = 0;
+#ifdef __SSE2__
+            {
+                const __m128 xmm_stepsize = _mm_set1_ps(band->stepsize);
+                for (; i < (cblk_size & ~15U); i += 16) {
+                    __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                           datap + 0)));
+                    __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                           datap + 4)));
+                    __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                           datap + 8)));
+                    __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                           datap + 12)));
+                    _mm_store_ps((float*)(datap +  0), _mm_mul_ps(xmm0_data, xmm_stepsize));
+                    _mm_store_ps((float*)(datap +  4), _mm_mul_ps(xmm1_data, xmm_stepsize));
+                    _mm_store_ps((float*)(datap +  8), _mm_mul_ps(xmm2_data, xmm_stepsize));
+                    _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
+                    datap += 16;
+                }
+            }
+#endif
+            for (; i < cblk_size; ++i) {
                 OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize;
                 memcpy(datap, &tmp, sizeof(tmp));
                 datap++;
             }
         }
     } else if (tccp->qmfbid == 1) {
-        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(size_t)y * tile_w +
-                                                   (size_t)x];
+        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
+                                                       (OPJ_SIZE_T)x];
         for (j = 0; j < cblk_h; ++j) {
             i = 0;
             for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
@@ -1726,19 +1762,19 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
                 OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
                 OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
                 OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
-                ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 0U] = tmp0 / 2;
-                ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 1U] = tmp1 / 2;
-                ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 2U] = tmp2 / 2;
-                ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 3U] = tmp3 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
             }
             for (; i < cblk_w; ++i) {
                 OPJ_INT32 tmp = datap[(j * cblk_w) + i];
-                ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i] = tmp / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
             }
         }
     } else {        /* if (tccp->qmfbid == 0) */
-        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(size_t)y *
-                                                     tile_w + (size_t)x];
+        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
+                                                         tile_w + (OPJ_SIZE_T)x];
         for (j = 0; j < cblk_h; ++j) {
             OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
             for (i = 0; i < cblk_w; ++i) {
@@ -1963,6 +1999,10 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
         }
     } else if (cblk->numchunks == 1) {
         cblkdata = cblk->chunks[0].data;
+    } else {
+        /* Not sure if that can happen in practice, but avoid Coverity to */
+        /* think we will dereference a null cblkdta pointer */
+        return OPJ_TRUE;
     }
 
     /* For subtile decoding, directly decode in the decoded_data buffer of */
@@ -2100,7 +2140,7 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
                         OPJ_UINT32 cblk_w;
                         OPJ_UINT32 cblk_h;
                         OPJ_UINT32 i, j, tileLineAdvance;
-                        size_t tileIndex = 0;
+                        OPJ_SIZE_T tileIndex = 0;
 
                         OPJ_INT32 x = cblk->x0 - band->x0;
                         OPJ_INT32 y = cblk->y0 - band->y0;
@@ -2124,13 +2164,22 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
                         cblk_h = t1->h;
                         tileLineAdvance = tile_w - cblk_w;
 
-                        tiledp = &tilec->data[(size_t)y * tile_w + (size_t)x];
+                        tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
                         t1->data = tiledp;
                         t1->data_stride = tile_w;
                         if (tccp->qmfbid == 1) {
+                            /* Do multiplication on unsigned type, even if the
+                             * underlying type is signed, to avoid potential
+                             * int overflow on large value (the output will be
+                             * incorrect in such situation, but whatever...)
+                             * This assumes complement-to-2 signed integer
+                             * representation
+                             * Fixes https://github.com/uclouvain/openjpeg/issues/1053
+                             */
+                            OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
                             for (j = 0; j < cblk_h; ++j) {
                                 for (i = 0; i < cblk_w; ++i) {
-                                    tiledp[tileIndex] *= (1 << T1_NMSEDEC_FRACBITS);
+                                    tiledp_u[tileIndex] <<= T1_NMSEDEC_FRACBITS;
                                     tileIndex++;
                                 }
                                 tileIndex += tileLineAdvance;