* POSSIBILITY OF SUCH DAMAGE.
*/
+#define OPJ_SKIP_POISON
#include "opj_includes.h"
+
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+#if defined(__GNUC__)
+#pragma GCC poison malloc calloc realloc free
+#endif
+
#include "t1_luts.h"
/** @defgroup T1 T1 - Implementation of the tier-1 coding */
if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
(flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
- v = opj_int_abs(*datap) & one ? 1 : 0;
+ v = (opj_int_abs(*datap) & one) ? 1 : 0;
#ifdef DEBUG_ENC_SIG
fprintf(stderr, " ctxt1=%d\n", ctxt1);
#endif
OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags);
*nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap),
(OPJ_UINT32)bpno);
- v = opj_int_abs(*datap) & one ? 1 : 0;
+ v = (opj_int_abs(*datap) & one) ? 1 : 0;
#ifdef DEBUG_ENC_REF
fprintf(stderr, " ctxt=%d\n", ctxt);
#endif
for (ci = runlen; ci < lim; ++ci) {
OPJ_UINT32 vsc;
opj_flag_t flags;
+ OPJ_UINT32 ctxt1;
flags = *flagsp;
}
if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {
- OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
+ ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
#ifdef DEBUG_ENC_CLN
printf(" ctxt1=%d\n", ctxt1);
#endif
opj_mqc_setcurctx(mqc, ctxt1);
- v = opj_int_abs(*datap) & one ? 1 : 0;
+ v = (opj_int_abs(*datap) & one) ? 1 : 0;
opj_mqc_encode(mqc, v);
if (v) {
OPJ_UINT32 ctxt2, spb;
if (!t1->encoder) {
OPJ_UINT32 datasize = w * h;
- if (datasize > (size_t)t1->datasize) {
+ if (datasize > t1->datasize) {
opj_aligned_free(t1->data);
t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
if (!t1->data) {
cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
- cblk->decoded_data = opj_aligned_malloc(cblk_w * cblk_h * sizeof(OPJ_INT32));
+ cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) *
+ cblk_w * cblk_h);
if (cblk->decoded_data == NULL) {
if (job->p_manager_mutex) {
opj_mutex_lock(job->p_manager_mutex);
return;
}
/* Zero-init required */
- memset(cblk->decoded_data, 0, cblk_w * cblk_h * sizeof(OPJ_INT32));
+ memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h);
} else if (cblk->decoded_data) {
/* Not sure if that code path can happen, but better be */
/* safe than sorry */
datap[i] /= 2;
}
} else { /* if (tccp->qmfbid == 0) */
- for (i = 0; i < cblk_size; ++i) {
+ i = 0;
+#ifdef __SSE2__
+ {
+ const __m128 xmm_stepsize = _mm_set1_ps(band->stepsize);
+ for (; i < (cblk_size & ~15U); i += 16) {
+ __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+ datap + 0)));
+ __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+ datap + 4)));
+ __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+ datap + 8)));
+ __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+ datap + 12)));
+ _mm_store_ps((float*)(datap + 0), _mm_mul_ps(xmm0_data, xmm_stepsize));
+ _mm_store_ps((float*)(datap + 4), _mm_mul_ps(xmm1_data, xmm_stepsize));
+ _mm_store_ps((float*)(datap + 8), _mm_mul_ps(xmm2_data, xmm_stepsize));
+ _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
+ datap += 16;
+ }
+ }
+#endif
+ for (; i < cblk_size; ++i) {
OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize;
memcpy(datap, &tmp, sizeof(tmp));
datap++;
}
}
} else if (tccp->qmfbid == 1) {
- OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(size_t)y * tile_w +
- (size_t)x];
+ OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
+ (OPJ_SIZE_T)x];
for (j = 0; j < cblk_h; ++j) {
i = 0;
for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
- ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 0U] = tmp0 / 2;
- ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 1U] = tmp1 / 2;
- ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 2U] = tmp2 / 2;
- ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i + 3U] = tmp3 / 2;
+ ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
+ ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
+ ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
+ ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
}
for (; i < cblk_w; ++i) {
OPJ_INT32 tmp = datap[(j * cblk_w) + i];
- ((OPJ_INT32*)tiledp)[(j * (size_t)tile_w) + i] = tmp / 2;
+ ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
}
}
} else { /* if (tccp->qmfbid == 0) */
- OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(size_t)y *
- tile_w + (size_t)x];
+ OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
+ tile_w + (OPJ_SIZE_T)x];
for (j = 0; j < cblk_h; ++j) {
OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
for (i = 0; i < cblk_w; ++i) {
}
} else if (cblk->numchunks == 1) {
cblkdata = cblk->chunks[0].data;
+ } else {
+ /* Not sure if that can happen in practice, but avoid Coverity to */
+ /* think we will dereference a null cblkdta pointer */
+ return OPJ_TRUE;
}
/* For subtile decoding, directly decode in the decoded_data buffer of */
OPJ_UINT32 cblk_w;
OPJ_UINT32 cblk_h;
OPJ_UINT32 i, j, tileLineAdvance;
- size_t tileIndex = 0;
+ OPJ_SIZE_T tileIndex = 0;
OPJ_INT32 x = cblk->x0 - band->x0;
OPJ_INT32 y = cblk->y0 - band->y0;
cblk_h = t1->h;
tileLineAdvance = tile_w - cblk_w;
- tiledp = &tilec->data[(size_t)y * tile_w + (size_t)x];
+ tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
t1->data = tiledp;
t1->data_stride = tile_w;
if (tccp->qmfbid == 1) {
+ /* Do multiplication on unsigned type, even if the
+ * underlying type is signed, to avoid potential
+ * int overflow on large value (the output will be
+ * incorrect in such situation, but whatever...)
+ * This assumes complement-to-2 signed integer
+ * representation
+ * Fixes https://github.com/uclouvain/openjpeg/issues/1053
+ */
+ OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
for (j = 0; j < cblk_h; ++j) {
for (i = 0; i < cblk_w; ++i) {
- tiledp[tileIndex] *= (1 << T1_NMSEDEC_FRACBITS);
+ tiledp_u[tileIndex] <<= T1_NMSEDEC_FRACBITS;
tileIndex++;
}
tileIndex += tileLineAdvance;