X-Git-Url: https://main.carlh.net/gitweb/?a=blobdiff_plain;f=src%2Flib%2Fopenjp2%2Ft1.c;h=f6f7671190cd5bc5a40a8ccac9b349abc0489e43;hb=a1d32a596a94280178c44a55d7e7f1acd992ed5d;hp=54fb814a2ad650e48707ee16bc05496ad57327df;hpb=559d16e8f43a0cd090d217d7d111820989299b85;p=openjpeg.git diff --git a/src/lib/openjp2/t1.c b/src/lib/openjp2/t1.c index 54fb814a..f6f76711 100644 --- a/src/lib/openjp2/t1.c +++ b/src/lib/openjp2/t1.c @@ -38,7 +38,20 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#define OPJ_SKIP_POISON #include "opj_includes.h" + +#ifdef __SSE__ +#include +#endif +#ifdef __SSE2__ +#include +#endif + +#if defined(__GNUC__) +#pragma GCC poison malloc calloc realloc free +#endif + #include "t1_luts.h" /** @defgroup T1 T1 - Implementation of the tier-1 coding */ @@ -335,7 +348,7 @@ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1, if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); - v = opj_int_abs(*datap) & one ? 1 : 0; + v = (opj_int_abs(*datap) & one) ? 1 : 0; #ifdef DEBUG_ENC_SIG fprintf(stderr, " ctxt1=%d\n", ctxt1); #endif @@ -722,7 +735,7 @@ static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1, OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags); *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap), (OPJ_UINT32)bpno); - v = opj_int_abs(*datap) & one ? 1 : 0; + v = (opj_int_abs(*datap) & one) ? 1 : 0; #ifdef DEBUG_ENC_REF fprintf(stderr, " ctxt=%d\n", ctxt); #endif @@ -1052,6 +1065,7 @@ static void opj_t1_enc_clnpass_step( for (ci = runlen; ci < lim; ++ci) { OPJ_UINT32 vsc; opj_flag_t flags; + OPJ_UINT32 ctxt1; flags = *flagsp; @@ -1060,12 +1074,12 @@ static void opj_t1_enc_clnpass_step( } if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) { - OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); + ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); #ifdef DEBUG_ENC_CLN printf(" ctxt1=%d\n", ctxt1); #endif opj_mqc_setcurctx(mqc, ctxt1); - v = opj_int_abs(*datap) & one ? 1 : 0; + v = (opj_int_abs(*datap) & one) ? 1 : 0; opj_mqc_encode(mqc, v); if (v) { OPJ_UINT32 ctxt2, spb; @@ -1439,7 +1453,7 @@ static OPJ_BOOL opj_t1_allocate_buffers( if (!t1->encoder) { OPJ_UINT32 datasize = w * h; - if (datasize > (size_t)t1->datasize) { + if (datasize > t1->datasize) { opj_aligned_free(t1->data); t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32)); if (!t1->data) { @@ -1604,7 +1618,8 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0); cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0); - cblk->decoded_data = opj_aligned_malloc(cblk_w * cblk_h * sizeof(OPJ_INT32)); + cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) * + cblk_w * cblk_h); if (cblk->decoded_data == NULL) { if (job->p_manager_mutex) { opj_mutex_lock(job->p_manager_mutex); @@ -1619,7 +1634,7 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) return; } /* Zero-init required */ - memset(cblk->decoded_data, 0, cblk_w * cblk_h * sizeof(OPJ_INT32)); + memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h); } else if (cblk->decoded_data) { /* Not sure if that code path can happen, but better be */ /* safe than sorry */ @@ -1710,15 +1725,36 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) datap[i] /= 2; } } else { /* if (tccp->qmfbid == 0) */ - for (i = 0; i < cblk_size; ++i) { + i = 0; +#ifdef __SSE2__ + { + const __m128 xmm_stepsize = _mm_set1_ps(band->stepsize); + for (; i < (cblk_size & ~15U); i += 16) { + __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)( + datap + 0))); + __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)( + datap + 4))); + __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)( + datap + 8))); + __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)( + datap + 12))); + _mm_store_ps((float*)(datap + 0), _mm_mul_ps(xmm0_data, xmm_stepsize)); + _mm_store_ps((float*)(datap + 4), _mm_mul_ps(xmm1_data, xmm_stepsize)); + _mm_store_ps((float*)(datap + 8), _mm_mul_ps(xmm2_data, xmm_stepsize)); + _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize)); + datap += 16; + } + } +#endif + for (; i < cblk_size; ++i) { OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize; memcpy(datap, &tmp, sizeof(tmp)); datap++; } } } else if (tccp->qmfbid == 1) { - OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(size_t)y * tile_w + - (size_t)x]; + OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + + (OPJ_SIZE_T)x]; for (j = 0; j < cblk_h; ++j) { i = 0; for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) { @@ -1726,19 +1762,19 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls) OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U]; OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U]; OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U]; - ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 0U] = tmp0 / 2; - ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 1U] = tmp1 / 2; - ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 2U] = tmp2 / 2; - ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 3U] = tmp3 / 2; + ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2; + ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2; + ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2; + ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2; } for (; i < cblk_w; ++i) { OPJ_INT32 tmp = datap[(j * cblk_w) + i]; - ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i] = tmp / 2; + ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2; } } } else { /* if (tccp->qmfbid == 0) */ - OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(size_t)y * - tile_w + (size_t)x]; + OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y * + tile_w + (OPJ_SIZE_T)x]; for (j = 0; j < cblk_h; ++j) { OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp; for (i = 0; i < cblk_w; ++i) { @@ -1963,6 +1999,10 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1, } } else if (cblk->numchunks == 1) { cblkdata = cblk->chunks[0].data; + } else { + /* Not sure if that can happen in practice, but avoid Coverity to */ + /* think we will dereference a null cblkdta pointer */ + return OPJ_TRUE; } /* For subtile decoding, directly decode in the decoded_data buffer of */ @@ -2100,7 +2140,7 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, OPJ_UINT32 cblk_w; OPJ_UINT32 cblk_h; OPJ_UINT32 i, j, tileLineAdvance; - size_t tileIndex = 0; + OPJ_SIZE_T tileIndex = 0; OPJ_INT32 x = cblk->x0 - band->x0; OPJ_INT32 y = cblk->y0 - band->y0; @@ -2124,13 +2164,22 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1, cblk_h = t1->h; tileLineAdvance = tile_w - cblk_w; - tiledp = &tilec->data[(size_t)y * tile_w + (size_t)x]; + tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x]; t1->data = tiledp; t1->data_stride = tile_w; if (tccp->qmfbid == 1) { + /* Do multiplication on unsigned type, even if the + * underlying type is signed, to avoid potential + * int overflow on large value (the output will be + * incorrect in such situation, but whatever...) + * This assumes complement-to-2 signed integer + * representation + * Fixes https://github.com/uclouvain/openjpeg/issues/1053 + */ + OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp; for (j = 0; j < cblk_h; ++j) { for (i = 0; i < cblk_w; ++i) { - tiledp[tileIndex] *= (1 << T1_NMSEDEC_FRACBITS); + tiledp_u[tileIndex] <<= T1_NMSEDEC_FRACBITS; tileIndex++; } tileIndex += tileLineAdvance;