struct opj_j2k: remove unused fields, and add some documentation
[openjpeg.git] / src / lib / openjp2 / t1.c
index da26227d1f0615cb7236a355019faa544c3b701f..f6f7671190cd5bc5a40a8ccac9b349abc0489e43 100644 (file)
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#define OPJ_SKIP_POISON
 #include "opj_includes.h"
+
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+#if defined(__GNUC__)
+#pragma GCC poison malloc calloc realloc free
+#endif
+
 #include "t1_luts.h"
 
 /** @defgroup T1 T1 - Implementation of the tier-1 coding */
@@ -184,12 +197,18 @@ Decode 1 code-block
 @param orient
 @param roishift Region of interest shifting value
 @param cblksty Code-block style
+@param p_manager the event manager
+@param p_manager_mutex mutex for the event manager
+@param check_pterm whether PTERM correct termination should be checked
 */
 static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
                                    opj_tcd_cblk_dec_t* cblk,
                                    OPJ_UINT32 orient,
                                    OPJ_UINT32 roishift,
-                                   OPJ_UINT32 cblksty);
+                                   OPJ_UINT32 cblksty,
+                                   opj_event_mgr_t *p_manager,
+                                   opj_mutex_t* p_manager_mutex,
+                                   OPJ_BOOL check_pterm);
 
 static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
                                         OPJ_UINT32 w,
@@ -329,7 +348,7 @@ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1,
     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
         OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
-        v = opj_int_abs(*datap) & one ? 1 : 0;
+        v = (opj_int_abs(*datap) & one) ? 1 : 0;
 #ifdef DEBUG_ENC_SIG
         fprintf(stderr, "   ctxt1=%d\n", ctxt1);
 #endif
@@ -345,7 +364,7 @@ static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1,
                                 flagsp[-1], flagsp[1],
                                 ci);
             OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu);
-            v = *datap < 0 ? 1 : 0;
+            v = *datap < 0 ? 1U : 0U;
             *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
                                               (OPJ_UINT32)bpno);
 #ifdef DEBUG_ENC_SIG
@@ -376,14 +395,14 @@ static INLINE void opj_t1_dec_sigpass_step_raw(
     OPJ_UINT32 ci)
 {
     OPJ_UINT32 v;
-    opj_raw_t *raw = &(t1->raw);       /* RAW component */
+    opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
 
     OPJ_UINT32 const flags = *flagsp;
 
     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
-        if (opj_raw_decode(raw)) {
-            v = opj_raw_decode(raw);
+        if (opj_mqc_raw_decode(mqc)) {
+            v = opj_mqc_raw_decode(mqc);
             *datap = v ? -oneplushalf : oneplushalf;
             opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
         }
@@ -542,66 +561,59 @@ static void opj_t1_dec_sigpass_raw(
 {
     OPJ_INT32 one, half, oneplushalf;
     OPJ_UINT32 i, j, k;
-    OPJ_INT32 *data1 = t1->data;
+    OPJ_INT32 *data = t1->data;
     opj_flag_t *flagsp = &T1_FLAGS(0, 0);
+    const OPJ_UINT32 l_w = t1->w;
     one = 1 << bpno;
     half = one >> 1;
     oneplushalf = one | half;
 
-    for (k = 0; k < (t1->h & ~3U); k += 4) {
-        for (i = 0; i < t1->w; ++i) {
-            OPJ_INT32* data2 = data1 + i;
-            opj_t1_dec_sigpass_step_raw(
-                t1,
-                flagsp,
-                data2,
-                oneplushalf,
-                cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
-                0U);
-            data2 += t1->w;
-            opj_t1_dec_sigpass_step_raw(
-                t1,
-                flagsp,
-                data2,
-                oneplushalf,
-                OPJ_FALSE, /* vsc */
-                1U);
-            data2 += t1->w;
-            opj_t1_dec_sigpass_step_raw(
-                t1,
-                flagsp,
-                data2,
-                oneplushalf,
-                OPJ_FALSE, /* vsc */
-                2U);
-            data2 += t1->w;
-            opj_t1_dec_sigpass_step_raw(
-                t1,
-                flagsp,
-                data2,
-                oneplushalf,
-                OPJ_FALSE, /* vsc */
-                3U);
-            data2 += t1->w;
-            flagsp ++;
+    for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
+        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
+            opj_flag_t flags = *flagsp;
+            if (flags != 0) {
+                opj_t1_dec_sigpass_step_raw(
+                    t1,
+                    flagsp,
+                    data,
+                    oneplushalf,
+                    cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
+                    0U);
+                opj_t1_dec_sigpass_step_raw(
+                    t1,
+                    flagsp,
+                    data + l_w,
+                    oneplushalf,
+                    OPJ_FALSE, /* vsc */
+                    1U);
+                opj_t1_dec_sigpass_step_raw(
+                    t1,
+                    flagsp,
+                    data + 2 * l_w,
+                    oneplushalf,
+                    OPJ_FALSE, /* vsc */
+                    2U);
+                opj_t1_dec_sigpass_step_raw(
+                    t1,
+                    flagsp,
+                    data + 3 * l_w,
+                    oneplushalf,
+                    OPJ_FALSE, /* vsc */
+                    3U);
+            }
         }
-        data1 += t1->w << 2;
-        flagsp += 2;
     }
     if (k < t1->h) {
-        for (i = 0; i < t1->w; ++i) {
-            OPJ_INT32* data2 = data1 + i;
+        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
             for (j = 0; j < t1->h - k; ++j) {
                 opj_t1_dec_sigpass_step_raw(
                     t1,
                     flagsp,
-                    data2,
+                    data + j * l_w,
                     oneplushalf,
                     cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
                     j);
-                data2 += t1->w;
             }
-            flagsp ++;
         }
     }
 }
@@ -723,7 +735,7 @@ static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1,
         OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags);
         *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap),
                                           (OPJ_UINT32)bpno);
-        v = opj_int_abs(*datap) & one ? 1 : 0;
+        v = (opj_int_abs(*datap) & one) ? 1 : 0;
 #ifdef DEBUG_ENC_REF
         fprintf(stderr, "  ctxt=%d\n", ctxt);
 #endif
@@ -747,11 +759,11 @@ static INLINE void opj_t1_dec_refpass_step_raw(
 {
     OPJ_UINT32 v;
 
-    opj_raw_t *raw = &(t1->raw);       /* RAW component */
+    opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
 
     if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
             (T1_SIGMA_THIS << (ci * 3U))) {
-        v = opj_raw_decode(raw);
+        v = opj_mqc_raw_decode(mqc);
         *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
         *flagsp |= T1_MU_THIS << (ci * 3U);
     }
@@ -898,59 +910,52 @@ static void opj_t1_dec_refpass_raw(
 {
     OPJ_INT32 one, poshalf;
     OPJ_UINT32 i, j, k;
-    OPJ_INT32 *data1 = t1->data;
+    OPJ_INT32 *data = t1->data;
     opj_flag_t *flagsp = &T1_FLAGS(0, 0);
+    const OPJ_UINT32 l_w = t1->w;
     one = 1 << bpno;
     poshalf = one >> 1;
-    for (k = 0; k < (t1->h & ~3U); k += 4) {
-        for (i = 0; i < t1->w; ++i) {
-            OPJ_INT32 *data2 = data1 + i;
-            opj_t1_dec_refpass_step_raw(
-                t1,
-                flagsp,
-                data2,
-                poshalf,
-                0U);
-            data2 += t1->w;
-            opj_t1_dec_refpass_step_raw(
-                t1,
-                flagsp,
-                data2,
-                poshalf,
-                1U);
-            data2 += t1->w;
-            opj_t1_dec_refpass_step_raw(
-                t1,
-                flagsp,
-                data2,
-                poshalf,
-                2U);
-            data2 += t1->w;
-            opj_t1_dec_refpass_step_raw(
-                t1,
-                flagsp,
-                data2,
-                poshalf,
-                3U);
-            data2 += t1->w;
-            flagsp ++;
+    for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
+        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
+            opj_flag_t flags = *flagsp;
+            if (flags != 0) {
+                opj_t1_dec_refpass_step_raw(
+                    t1,
+                    flagsp,
+                    data,
+                    poshalf,
+                    0U);
+                opj_t1_dec_refpass_step_raw(
+                    t1,
+                    flagsp,
+                    data + l_w,
+                    poshalf,
+                    1U);
+                opj_t1_dec_refpass_step_raw(
+                    t1,
+                    flagsp,
+                    data + 2 * l_w,
+                    poshalf,
+                    2U);
+                opj_t1_dec_refpass_step_raw(
+                    t1,
+                    flagsp,
+                    data + 3 * l_w,
+                    poshalf,
+                    3U);
+            }
         }
-        data1 += t1->w << 2;
-        flagsp += 2;
     }
     if (k < t1->h) {
-        for (i = 0; i < t1->w; ++i) {
-            OPJ_INT32 *data2 = data1 + i;
-            for (j = k; j < t1->h; ++j) {
+        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
+            for (j = 0; j < t1->h - k; ++j) {
                 opj_t1_dec_refpass_step_raw(
                     t1,
                     flagsp,
-                    data2,
+                    data + j * l_w,
                     poshalf,
-                    j - k);
-                data2 += t1->w;
+                    j);
             }
-            flagsp ++;
         }
     }
 }
@@ -1060,6 +1065,7 @@ static void opj_t1_enc_clnpass_step(
     for (ci = runlen; ci < lim; ++ci) {
         OPJ_UINT32 vsc;
         opj_flag_t flags;
+        OPJ_UINT32 ctxt1;
 
         flags = *flagsp;
 
@@ -1068,12 +1074,12 @@ static void opj_t1_enc_clnpass_step(
         }
 
         if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {
-            OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
+            ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
 #ifdef DEBUG_ENC_CLN
             printf("   ctxt1=%d\n", ctxt1);
 #endif
             opj_mqc_setcurctx(mqc, ctxt1);
-            v = opj_int_abs(*datap) & one ? 1 : 0;
+            v = (opj_int_abs(*datap) & one) ? 1 : 0;
             opj_mqc_encode(mqc, v);
             if (v) {
                 OPJ_UINT32 ctxt2, spb;
@@ -1091,7 +1097,7 @@ LABEL_PARTIAL:
 #endif
                 opj_mqc_setcurctx(mqc, ctxt2);
 
-                v = *datap < 0 ? 1 : 0;
+                v = *datap < 0 ? 1U : 0U;
                 spb = opj_t1_getspb(lu);
 #ifdef DEBUG_ENC_CLN
                 printf("   spb=%d\n", spb);
@@ -1270,21 +1276,21 @@ static void opj_t1_enc_clnpass(
                                             l_w, 0, mqc, curctx, \
                                             v, a, c, ct, oneplushalf, vsc); \
                         partial = OPJ_FALSE; \
-                        /* falltrough */ \
+                        /* FALLTHRU */ \
                     case 1: \
                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
                                             flags, flagsp, flags_stride, data, \
                                             l_w, 1, mqc, curctx, \
                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
                         partial = OPJ_FALSE; \
-                        /* falltrough */ \
+                        /* FALLTHRU */ \
                     case 2: \
                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
                                             flags, flagsp, flags_stride, data, \
                                             l_w, 2, mqc, curctx, \
                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
                         partial = OPJ_FALSE; \
-                        /* falltrough */ \
+                        /* FALLTHRU */ \
                     case 3: \
                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
                                             flags, flagsp, flags_stride, data, \
@@ -1434,44 +1440,27 @@ static OPJ_BOOL opj_t1_allocate_buffers(
     OPJ_UINT32 w,
     OPJ_UINT32 h)
 {
-    size_t flagssize;
+    OPJ_UINT32 flagssize;
     OPJ_UINT32 flags_stride;
 
+    /* No risk of overflow. Prior checks ensure those assert are met */
+    /* They are per the specification */
+    assert(w <= 1024);
+    assert(h <= 1024);
+    assert(w * h <= 4096);
+
     /* encoder uses tile buffer, so no need to allocate */
     if (!t1->encoder) {
-        size_t datasize;
-
-#if (SIZE_MAX / 0xFFFFFFFFU) < 0xFFFFFFFFU /* UINT32_MAX */
-        /* Overflow check */
-        if ((w > 0U) && ((size_t)h > (SIZE_MAX / (size_t)w))) {
-            /* FIXME event manager error callback */
-            return OPJ_FALSE;
-        }
-#endif
-        datasize = (size_t)w * h;
+        OPJ_UINT32 datasize = w * h;
 
-        /* Overflow check */
-        if (datasize > (SIZE_MAX / sizeof(OPJ_INT32))) {
-            /* FIXME event manager error callback */
-            return OPJ_FALSE;
-        }
-
-        if (datasize > (size_t)t1->datasize) {
+        if (datasize > t1->datasize) {
             opj_aligned_free(t1->data);
             t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
             if (!t1->data) {
                 /* FIXME event manager error callback */
                 return OPJ_FALSE;
             }
-#if SIZE_MAX > 0xFFFFFFFFU /* UINT32_MAX */
-            /* TODO remove this if t1->datasize type changes to size_t */
-            /* Overflow check */
-            if (datasize > (size_t)0xFFFFFFFFU /* UINT32_MAX */) {
-                /* FIXME event manager error callback */
-                return OPJ_FALSE;
-            }
-#endif
-            t1->datasize = (OPJ_UINT32)datasize;
+            t1->datasize = datasize;
         }
         /* memset first arg is declared to never be null by gcc */
         if (t1->data != NULL) {
@@ -1479,40 +1468,18 @@ static OPJ_BOOL opj_t1_allocate_buffers(
         }
     }
 
-    /* Overflow check */
-    if (w > (0xFFFFFFFFU /* UINT32_MAX */ - 2U)) {
-        /* FIXME event manager error callback */
-        return OPJ_FALSE;
-    }
     flags_stride = w + 2U; /* can't be 0U */
 
-#if (SIZE_MAX - 3U) < 0xFFFFFFFFU /* UINT32_MAX */
-    /* Overflow check */
-    if (h > (0xFFFFFFFFU /* UINT32_MAX */ - 3U)) {
-        /* FIXME event manager error callback */
-        return OPJ_FALSE;
-    }
-#endif
     flagssize = (h + 3U) / 4U + 2U;
 
-    /* Overflow check */
-    if (flagssize > (SIZE_MAX / (size_t)flags_stride)) {
-        /* FIXME event manager error callback */
-        return OPJ_FALSE;
-    }
-    flagssize *= (size_t)flags_stride;
+    flagssize *= flags_stride;
     {
-        /* BIG FAT XXX */
         opj_flag_t* p;
         OPJ_UINT32 x;
         OPJ_UINT32 flags_height = (h + 3U) / 4U;
 
-        if (flagssize > (size_t)t1->flagssize) {
-            /* Overflow check */
-            if (flagssize > (SIZE_MAX / sizeof(opj_flag_t))) {
-                /* FIXME event manager error callback */
-                return OPJ_FALSE;
-            }
+        if (flagssize > t1->flagssize) {
+
             opj_aligned_free(t1->flags);
             t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(
                             opj_flag_t));
@@ -1520,16 +1487,8 @@ static OPJ_BOOL opj_t1_allocate_buffers(
                 /* FIXME event manager error callback */
                 return OPJ_FALSE;
             }
-#if SIZE_MAX > 0xFFFFFFFFU /* UINT32_MAX */
-            /* TODO remove this if t1->flagssize type changes to size_t */
-            /* Overflow check */
-            if (flagssize > (size_t)0xFFFFFFFFU /* UINT32_MAX */) {
-                /* FIXME event manager error callback */
-                return OPJ_FALSE;
-            }
-#endif
         }
-        t1->flagssize = (OPJ_UINT32)flagssize;
+        t1->flagssize = flagssize;
 
         memset(t1->flags, 0, flagssize * sizeof(opj_flag_t));
 
@@ -1612,16 +1571,23 @@ void opj_t1_destroy(opj_t1_t *p_t1)
         p_t1->flags = 00;
     }
 
+    opj_free(p_t1->cblkdatabuffer);
+
     opj_free(p_t1);
 }
 
 typedef struct {
+    OPJ_BOOL whole_tile_decoding;
     OPJ_UINT32 resno;
     opj_tcd_cblk_dec_t* cblk;
     opj_tcd_band_t* band;
     opj_tcd_tilecomp_t* tilec;
     opj_tccp_t* tccp;
+    OPJ_BOOL mustuse_cblkdatabuffer;
     volatile OPJ_BOOL* pret;
+    opj_event_mgr_t *p_manager;
+    opj_mutex_t* p_manager_mutex;
+    OPJ_BOOL check_pterm;
 } opj_t1_cblk_decode_processing_job_t;
 
 static void opj_t1_destroy_wrapper(void* t1)
@@ -1645,12 +1611,44 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
     OPJ_UINT32 tile_w;
 
     job = (opj_t1_cblk_decode_processing_job_t*) user_data;
-    resno = job->resno;
+
     cblk = job->cblk;
+
+    if (!job->whole_tile_decoding) {
+        cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
+        cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
+
+        cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) *
+                             cblk_w * cblk_h);
+        if (cblk->decoded_data == NULL) {
+            if (job->p_manager_mutex) {
+                opj_mutex_lock(job->p_manager_mutex);
+            }
+            opj_event_msg(job->p_manager, EVT_ERROR,
+                          "Cannot allocate cblk->decoded_data\n");
+            if (job->p_manager_mutex) {
+                opj_mutex_unlock(job->p_manager_mutex);
+            }
+            *(job->pret) = OPJ_FALSE;
+            opj_free(job);
+            return;
+        }
+        /* Zero-init required */
+        memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h);
+    } else if (cblk->decoded_data) {
+        /* Not sure if that code path can happen, but better be */
+        /* safe than sorry */
+        opj_aligned_free(cblk->decoded_data);
+        cblk->decoded_data = NULL;
+    }
+
+    resno = job->resno;
     band = job->band;
     tilec = job->tilec;
     tccp = job->tccp;
-    tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
+    tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1
+                          -
+                          tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
 
     if (!*(job->pret)) {
         opj_free(job);
@@ -1662,13 +1660,17 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
         t1 = opj_t1_create(OPJ_FALSE);
         opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
     }
+    t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
 
     if (OPJ_FALSE == opj_t1_decode_cblk(
                 t1,
                 cblk,
                 band->bandno,
                 (OPJ_UINT32)tccp->roishift,
-                tccp->cblksty)) {
+                tccp->cblksty,
+                job->p_manager,
+                job->p_manager_mutex,
+                job->check_pterm)) {
         *(job->pret) = OPJ_FALSE;
         opj_free(job);
         return;
@@ -1685,26 +1687,74 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
         y += pres->y1 - pres->y0;
     }
 
-    datap = t1->data;
+    datap = cblk->decoded_data ? cblk->decoded_data : t1->data;
     cblk_w = t1->w;
     cblk_h = t1->h;
 
     if (tccp->roishift) {
-        OPJ_INT32 thresh = 1 << tccp->roishift;
-        for (j = 0; j < cblk_h; ++j) {
-            for (i = 0; i < cblk_w; ++i) {
-                OPJ_INT32 val = datap[(j * cblk_w) + i];
-                OPJ_INT32 mag = abs(val);
-                if (mag >= thresh) {
-                    mag >>= tccp->roishift;
-                    datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
+        if (tccp->roishift >= 31) {
+            for (j = 0; j < cblk_h; ++j) {
+                for (i = 0; i < cblk_w; ++i) {
+                    datap[(j * cblk_w) + i] = 0;
+                }
+            }
+        } else {
+            OPJ_INT32 thresh = 1 << tccp->roishift;
+            for (j = 0; j < cblk_h; ++j) {
+                for (i = 0; i < cblk_w; ++i) {
+                    OPJ_INT32 val = datap[(j * cblk_w) + i];
+                    OPJ_INT32 mag = abs(val);
+                    if (mag >= thresh) {
+                        mag >>= tccp->roishift;
+                        datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
+                    }
                 }
             }
         }
     }
-    if (tccp->qmfbid == 1) {
-        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_UINT32)y * tile_w +
-                                                       (OPJ_UINT32)x];
+
+    /* Both can be non NULL if for example decoding a full tile and then */
+    /* partially a tile. In which case partial decoding should be the */
+    /* priority */
+    assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
+
+    if (cblk->decoded_data) {
+        OPJ_UINT32 cblk_size = cblk_w * cblk_h;
+        if (tccp->qmfbid == 1) {
+            for (i = 0; i < cblk_size; ++i) {
+                datap[i] /= 2;
+            }
+        } else {        /* if (tccp->qmfbid == 0) */
+            i = 0;
+#ifdef __SSE2__
+            {
+                const __m128 xmm_stepsize = _mm_set1_ps(band->stepsize);
+                for (; i < (cblk_size & ~15U); i += 16) {
+                    __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                           datap + 0)));
+                    __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                           datap + 4)));
+                    __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                           datap + 8)));
+                    __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                           datap + 12)));
+                    _mm_store_ps((float*)(datap +  0), _mm_mul_ps(xmm0_data, xmm_stepsize));
+                    _mm_store_ps((float*)(datap +  4), _mm_mul_ps(xmm1_data, xmm_stepsize));
+                    _mm_store_ps((float*)(datap +  8), _mm_mul_ps(xmm2_data, xmm_stepsize));
+                    _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
+                    datap += 16;
+                }
+            }
+#endif
+            for (; i < cblk_size; ++i) {
+                OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize;
+                memcpy(datap, &tmp, sizeof(tmp));
+                datap++;
+            }
+        }
+    } else if (tccp->qmfbid == 1) {
+        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
+                                                       (OPJ_SIZE_T)x];
         for (j = 0; j < cblk_h; ++j) {
             i = 0;
             for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
@@ -1712,19 +1762,19 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
                 OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
                 OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
                 OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 0U] = tmp0 / 2;
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 1U] = tmp1 / 2;
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 2U] = tmp2 / 2;
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 3U] = tmp3 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
             }
             for (; i < cblk_w; ++i) {
                 OPJ_INT32 tmp = datap[(j * cblk_w) + i];
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
             }
         }
     } else {        /* if (tccp->qmfbid == 0) */
-        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y *
-                                                         tile_w + (OPJ_UINT32)x];
+        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
+                                                         tile_w + (OPJ_SIZE_T)x];
         for (j = 0; j < cblk_h; ++j) {
             OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
             for (i = 0; i < cblk_w; ++i) {
@@ -1741,14 +1791,23 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
 }
 
 
-void opj_t1_decode_cblks(opj_thread_pool_t* tp,
+void opj_t1_decode_cblks(opj_tcd_t* tcd,
                          volatile OPJ_BOOL* pret,
                          opj_tcd_tilecomp_t* tilec,
-                         opj_tccp_t* tccp
+                         opj_tccp_t* tccp,
+                         opj_event_mgr_t *p_manager,
+                         opj_mutex_t* p_manager_mutex,
+                         OPJ_BOOL check_pterm
                         )
 {
+    opj_thread_pool_t* tp = tcd->thread_pool;
     OPJ_UINT32 resno, bandno, precno, cblkno;
 
+#ifdef DEBUG_VERBOSE
+    OPJ_UINT32 codeblocks_decoded = 0;
+    printf("Enter opj_t1_decode_cblks()\n");
+#endif
+
     for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
         opj_tcd_resolution_t* res = &tilec->resolutions[resno];
 
@@ -1758,23 +1817,91 @@ void opj_t1_decode_cblks(opj_thread_pool_t* tp,
             for (precno = 0; precno < res->pw * res->ph; ++precno) {
                 opj_tcd_precinct_t* precinct = &band->precincts[precno];
 
+                if (!opj_tcd_is_subband_area_of_interest(tcd,
+                        tilec->compno,
+                        resno,
+                        band->bandno,
+                        (OPJ_UINT32)precinct->x0,
+                        (OPJ_UINT32)precinct->y0,
+                        (OPJ_UINT32)precinct->x1,
+                        (OPJ_UINT32)precinct->y1)) {
+                    for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
+                        opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
+                        if (cblk->decoded_data) {
+#ifdef DEBUG_VERBOSE
+                            printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
+                                   cblk->x0, cblk->y0, resno, bandno);
+#endif
+                            opj_aligned_free(cblk->decoded_data);
+                            cblk->decoded_data = NULL;
+                        }
+                    }
+                    continue;
+                }
+
                 for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
                     opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
                     opj_t1_cblk_decode_processing_job_t* job;
 
+                    if (!opj_tcd_is_subband_area_of_interest(tcd,
+                            tilec->compno,
+                            resno,
+                            band->bandno,
+                            (OPJ_UINT32)cblk->x0,
+                            (OPJ_UINT32)cblk->y0,
+                            (OPJ_UINT32)cblk->x1,
+                            (OPJ_UINT32)cblk->y1)) {
+                        if (cblk->decoded_data) {
+#ifdef DEBUG_VERBOSE
+                            printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
+                                   cblk->x0, cblk->y0, resno, bandno);
+#endif
+                            opj_aligned_free(cblk->decoded_data);
+                            cblk->decoded_data = NULL;
+                        }
+                        continue;
+                    }
+
+                    if (!tcd->whole_tile_decoding) {
+                        OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
+                        OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
+                        if (cblk->decoded_data != NULL) {
+#ifdef DEBUG_VERBOSE
+                            printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n",
+                                   cblk->x0, cblk->y0, resno, bandno);
+#endif
+                            continue;
+                        }
+                        if (cblk_w == 0 || cblk_h == 0) {
+                            continue;
+                        }
+#ifdef DEBUG_VERBOSE
+                        printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n",
+                               cblk->x0, cblk->y0, resno, bandno);
+#endif
+                    }
+
                     job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1,
                             sizeof(opj_t1_cblk_decode_processing_job_t));
                     if (!job) {
                         *pret = OPJ_FALSE;
                         return;
                     }
+                    job->whole_tile_decoding = tcd->whole_tile_decoding;
                     job->resno = resno;
                     job->cblk = cblk;
                     job->band = band;
                     job->tilec = tilec;
                     job->tccp = tccp;
                     job->pret = pret;
+                    job->p_manager_mutex = p_manager_mutex;
+                    job->p_manager = p_manager;
+                    job->check_pterm = check_pterm;
+                    job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
                     opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
+#ifdef DEBUG_VERBOSE
+                    codeblocks_decoded ++;
+#endif
                     if (!(*pret)) {
                         return;
                     }
@@ -1783,6 +1910,9 @@ void opj_t1_decode_cblks(opj_thread_pool_t* tp,
         } /* bandno */
     } /* resno */
 
+#ifdef DEBUG_VERBOSE
+    printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded);
+#endif
     return;
 }
 
@@ -1791,15 +1921,20 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
                                    opj_tcd_cblk_dec_t* cblk,
                                    OPJ_UINT32 orient,
                                    OPJ_UINT32 roishift,
-                                   OPJ_UINT32 cblksty)
+                                   OPJ_UINT32 cblksty,
+                                   opj_event_mgr_t *p_manager,
+                                   opj_mutex_t* p_manager_mutex,
+                                   OPJ_BOOL check_pterm)
 {
-    opj_raw_t *raw = &(t1->raw);   /* RAW component */
     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
 
     OPJ_INT32 bpno_plus_one;
     OPJ_UINT32 passtype;
     OPJ_UINT32 segno, passno;
+    OPJ_BYTE* cblkdata = NULL;
+    OPJ_UINT32 cblkdataindex = 0;
     OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
+    OPJ_INT32* original_t1_data = NULL;
 
     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
 
@@ -1811,6 +1946,18 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
     }
 
     bpno_plus_one = (OPJ_INT32)(roishift + cblk->numbps);
+    if (bpno_plus_one >= 31) {
+        if (p_manager_mutex) {
+            opj_mutex_lock(p_manager_mutex);
+        }
+        opj_event_msg(p_manager, EVT_WARNING,
+                      "opj_t1_decode_cblk(): unsupported bpno_plus_one = %d >= 31\n",
+                      bpno_plus_one);
+        if (p_manager_mutex) {
+            opj_mutex_unlock(p_manager_mutex);
+        }
+        return OPJ_FALSE;
+    }
     passtype = 2;
 
     opj_mqc_resetstates(mqc);
@@ -1818,24 +1965,68 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
     opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
     opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
 
+    /* Even if we have a single chunk, in multi-threaded decoding */
+    /* the insertion of our synthetic marker might potentially override */
+    /* valid codestream of other codeblocks decoded in parallel. */
+    if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
+        OPJ_UINT32 i;
+        OPJ_UINT32 cblk_len;
+
+        /* Compute whole codeblock length from chunk lengths */
+        cblk_len = 0;
+        for (i = 0; i < cblk->numchunks; i++) {
+            cblk_len += cblk->chunks[i].len;
+        }
+
+        /* Allocate temporary memory if needed */
+        if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
+            cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
+                                              cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
+            if (cblkdata == NULL) {
+                return OPJ_FALSE;
+            }
+            t1->cblkdatabuffer = cblkdata;
+            memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
+            t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
+        }
+
+        /* Concatenate all chunks */
+        cblkdata = t1->cblkdatabuffer;
+        cblk_len = 0;
+        for (i = 0; i < cblk->numchunks; i++) {
+            memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
+            cblk_len += cblk->chunks[i].len;
+        }
+    } else if (cblk->numchunks == 1) {
+        cblkdata = cblk->chunks[0].data;
+    } else {
+        /* Not sure if that can happen in practice, but avoid Coverity to */
+        /* think we will dereference a null cblkdta pointer */
+        return OPJ_TRUE;
+    }
+
+    /* For subtile decoding, directly decode in the decoded_data buffer of */
+    /* the code-block. Hack t1->data to point to it, and restore it later */
+    if (cblk->decoded_data) {
+        original_t1_data = t1->data;
+        t1->data = cblk->decoded_data;
+    }
+
     for (segno = 0; segno < cblk->real_num_segs; ++segno) {
         opj_tcd_seg_t *seg = &cblk->segs[segno];
 
         /* BYPASS mode */
         type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
-        /* FIXME: slviewer gets here with a null pointer. Why? Partially downloaded and/or corrupt textures? */
-        if (seg->data == 00) {
-            continue;
-        }
+
         if (type == T1_TYPE_RAW) {
-            opj_raw_init_dec(raw, (*seg->data) + seg->dataindex, seg->len);
+            opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
+                                 OPJ_COMMON_CBLK_DATA_EXTRA);
         } else {
-            if (OPJ_FALSE == opj_mqc_init_dec(mqc, (*seg->data) + seg->dataindex,
-                                              seg->len)) {
-                return OPJ_FALSE;
-            }
+            opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
+                             OPJ_COMMON_CBLK_DATA_EXTRA);
         }
+        cblkdataindex += seg->len;
 
         for (passno = 0; (passno < seg->real_num_passes) &&
                 (bpno_plus_one >= 1); ++passno) {
@@ -1870,7 +2061,41 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
                 bpno_plus_one--;
             }
         }
+
+        opq_mqc_finish_dec(mqc);
+    }
+
+    if (check_pterm) {
+        if (mqc->bp + 2 < mqc->end) {
+            if (p_manager_mutex) {
+                opj_mutex_lock(p_manager_mutex);
+            }
+            opj_event_msg(p_manager, EVT_WARNING,
+                          "PTERM check failure: %d remaining bytes in code block (%d used / %d)\n",
+                          (int)(mqc->end - mqc->bp) - 2,
+                          (int)(mqc->bp - mqc->start),
+                          (int)(mqc->end - mqc->start));
+            if (p_manager_mutex) {
+                opj_mutex_unlock(p_manager_mutex);
+            }
+        } else if (mqc->end_of_byte_stream_counter > 2) {
+            if (p_manager_mutex) {
+                opj_mutex_lock(p_manager_mutex);
+            }
+            opj_event_msg(p_manager, EVT_WARNING,
+                          "PTERM check failure: %d synthetized 0xFF markers read\n",
+                          mqc->end_of_byte_stream_counter);
+            if (p_manager_mutex) {
+                opj_mutex_unlock(p_manager_mutex);
+            }
+        }
+    }
+
+    /* Restore original t1->data is needed */
+    if (cblk->decoded_data) {
+        t1->data = original_t1_data;
     }
+
     return OPJ_TRUE;
 }
 
@@ -1898,8 +2123,14 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
 
             for (bandno = 0; bandno < res->numbands; ++bandno) {
                 opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
-                OPJ_INT32 bandconst = 8192 * 8192 / ((OPJ_INT32) floor(band->stepsize * 8192));
+                OPJ_INT32 bandconst;
 
+                /* Skip empty bands */
+                if (opj_tcd_is_band_empty(band)) {
+                    continue;
+                }
+
+                bandconst = 8192 * 8192 / ((OPJ_INT32) floor(band->stepsize * 8192));
                 for (precno = 0; precno < res->pw * res->ph; ++precno) {
                     opj_tcd_precinct_t *prc = &band->precincts[precno];
 
@@ -1908,7 +2139,8 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
                         OPJ_INT32* OPJ_RESTRICT tiledp;
                         OPJ_UINT32 cblk_w;
                         OPJ_UINT32 cblk_h;
-                        OPJ_UINT32 i, j, tileIndex = 0, tileLineAdvance;
+                        OPJ_UINT32 i, j, tileLineAdvance;
+                        OPJ_SIZE_T tileIndex = 0;
 
                         OPJ_INT32 x = cblk->x0 - band->x0;
                         OPJ_INT32 y = cblk->y0 - band->y0;
@@ -1932,13 +2164,22 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
                         cblk_h = t1->h;
                         tileLineAdvance = tile_w - cblk_w;
 
-                        tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x];
+                        tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
                         t1->data = tiledp;
                         t1->data_stride = tile_w;
                         if (tccp->qmfbid == 1) {
+                            /* Do multiplication on unsigned type, even if the
+                             * underlying type is signed, to avoid potential
+                             * int overflow on large value (the output will be
+                             * incorrect in such situation, but whatever...)
+                             * This assumes complement-to-2 signed integer
+                             * representation
+                             * Fixes https://github.com/uclouvain/openjpeg/issues/1053
+                             */
+                            OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
                             for (j = 0; j < cblk_h; ++j) {
                                 for (i = 0; i < cblk_w; ++i) {
-                                    tiledp[tileIndex] *= (1 << T1_NMSEDEC_FRACBITS);
+                                    tiledp_u[tileIndex] <<= T1_NMSEDEC_FRACBITS;
                                     tileIndex++;
                                 }
                                 tileIndex += tileLineAdvance;
@@ -1979,6 +2220,37 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
     return OPJ_TRUE;
 }
 
+/* Returns whether the pass (bpno, passtype) is terminated */
+static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk,
+                                   OPJ_UINT32 cblksty,
+                                   OPJ_INT32 bpno,
+                                   OPJ_UINT32 passtype)
+{
+    /* Is it the last cleanup pass ? */
+    if (passtype == 2 && bpno == 0) {
+        return OPJ_TRUE;
+    }
+
+    if (cblksty & J2K_CCP_CBLKSTY_TERMALL) {
+        return OPJ_TRUE;
+    }
+
+    if ((cblksty & J2K_CCP_CBLKSTY_LAZY)) {
+        /* For bypass arithmetic bypass, terminate the 4th cleanup pass */
+        if ((bpno == ((OPJ_INT32)cblk->numbps - 4)) && (passtype == 2)) {
+            return OPJ_TRUE;
+        }
+        /* and beyond terminate all the magnitude refinement passes (in raw) */
+        /* and cleanup passes (in MQC) */
+        if ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype > 0)) {
+            return OPJ_TRUE;
+        }
+    }
+
+    return OPJ_FALSE;
+}
+
+
 /** mod fixed_quality */
 static void opj_t1_encode_cblk(opj_t1_t *t1,
                                opj_tcd_cblk_enc_t* cblk,
@@ -2006,6 +2278,11 @@ static void opj_t1_encode_cblk(opj_t1_t *t1,
     OPJ_BYTE type = T1_TYPE_MQ;
     OPJ_FLOAT64 tempwmsedec;
 
+#ifdef EXTRA_DEBUG
+    printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
+           cblk->x0, cblk->y0, cblk->x1, cblk->y1, orient, compno, level);
+#endif
+
     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
 
     max = 0;
@@ -2018,6 +2295,10 @@ static void opj_t1_encode_cblk(opj_t1_t *t1,
 
     cblk->numbps = max ? (OPJ_UINT32)((opj_int_floorlog2(max) + 1) -
                                       T1_NMSEDEC_FRACBITS) : 0;
+    if (cblk->numbps == 0) {
+        cblk->totalpasses = 0;
+        return;
+    }
 
     bpno = (OPJ_INT32)(cblk->numbps - 1);
     passtype = 2;
@@ -2030,10 +2311,18 @@ static void opj_t1_encode_cblk(opj_t1_t *t1,
 
     for (passno = 0; bpno >= 0; ++passno) {
         opj_tcd_pass_t *pass = &cblk->passes[passno];
-        OPJ_UINT32 correction = 3;
         type = ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype < 2) &&
                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
 
+        /* If the previous pass was terminating, we need to reset the encoder */
+        if (passno > 0 && cblk->passes[passno - 1].term) {
+            if (type == T1_TYPE_RAW) {
+                opj_mqc_bypass_init_enc(mqc);
+            } else {
+                opj_mqc_restart_init_enc(mqc);
+            }
+        }
+
         switch (passtype) {
         case 0:
             opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty);
@@ -2055,35 +2344,32 @@ static void opj_t1_encode_cblk(opj_t1_t *t1,
                                         stepsize, numcomps, mct_norms, mct_numcomps) ;
         cumwmsedec += tempwmsedec;
         tile->distotile += tempwmsedec;
+        pass->distortiondec = cumwmsedec;
 
-        /* Code switch "RESTART" (i.e. TERMALL) */
-        if ((cblksty & J2K_CCP_CBLKSTY_TERMALL) && !((passtype == 2) &&
-                (bpno - 1 < 0))) {
+        if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) {
+            /* If it is a terminated pass, terminate it */
             if (type == T1_TYPE_RAW) {
-                opj_mqc_flush(mqc);
-                correction = 1;
-                /* correction = mqc_bypass_flush_enc(); */
-            } else {            /* correction = mqc_restart_enc(); */
-                opj_mqc_flush(mqc);
-                correction = 1;
+                opj_mqc_bypass_flush_enc(mqc, cblksty & J2K_CCP_CBLKSTY_PTERM);
+            } else {
+                if (cblksty & J2K_CCP_CBLKSTY_PTERM) {
+                    opj_mqc_erterm_enc(mqc);
+                } else {
+                    opj_mqc_flush(mqc);
+                }
             }
             pass->term = 1;
+            pass->rate = opj_mqc_numbytes(mqc);
         } else {
-            if (((bpno < ((OPJ_INT32)(cblk->numbps) - 4) && (passtype > 0))
-                    || ((bpno == ((OPJ_INT32)cblk->numbps - 4)) && (passtype == 2))) &&
-                    (cblksty & J2K_CCP_CBLKSTY_LAZY)) {
-                if (type == T1_TYPE_RAW) {
-                    opj_mqc_flush(mqc);
-                    correction = 1;
-                    /* correction = mqc_bypass_flush_enc(); */
-                } else {        /* correction = mqc_restart_enc(); */
-                    opj_mqc_flush(mqc);
-                    correction = 1;
-                }
-                pass->term = 1;
+            /* Non terminated pass */
+            OPJ_UINT32 rate_extra_bytes;
+            if (type == T1_TYPE_RAW) {
+                rate_extra_bytes = opj_mqc_bypass_get_extra_bytes(
+                                       mqc, (cblksty & J2K_CCP_CBLKSTY_PTERM));
             } else {
-                pass->term = 0;
+                rate_extra_bytes = 3;
             }
+            pass->term = 0;
+            pass->rate = opj_mqc_numbytes(mqc) + rate_extra_bytes;
         }
 
         if (++passtype == 3) {
@@ -2091,43 +2377,52 @@ static void opj_t1_encode_cblk(opj_t1_t *t1,
             bpno--;
         }
 
-        if (pass->term && bpno > 0) {
-            type = ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype < 2) &&
-                    (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
-            if (type == T1_TYPE_RAW) {
-                opj_mqc_bypass_init_enc(mqc);
-            } else {
-                opj_mqc_restart_init_enc(mqc);
-            }
-        }
-
-        pass->distortiondec = cumwmsedec;
-        pass->rate = opj_mqc_numbytes(mqc) + correction;    /* FIXME */
-
         /* Code-switch "RESET" */
         if (cblksty & J2K_CCP_CBLKSTY_RESET) {
             opj_mqc_reset_enc(mqc);
         }
     }
 
-    /* Code switch "ERTERM" (i.e. PTERM) */
-    if (cblksty & J2K_CCP_CBLKSTY_PTERM) {
-        opj_mqc_erterm_enc(mqc);
-    } else /* Default coding */ if (!(cblksty & J2K_CCP_CBLKSTY_LAZY)) {
-        opj_mqc_flush(mqc);
-    }
-
     cblk->totalpasses = passno;
 
+    if (cblk->totalpasses) {
+        /* Make sure that pass rates are increasing */
+        OPJ_UINT32 last_pass_rate = opj_mqc_numbytes(mqc);
+        for (passno = cblk->totalpasses; passno > 0;) {
+            opj_tcd_pass_t *pass = &cblk->passes[--passno];
+            if (pass->rate > last_pass_rate) {
+                pass->rate = last_pass_rate;
+            } else {
+                last_pass_rate = pass->rate;
+            }
+        }
+    }
+
     for (passno = 0; passno < cblk->totalpasses; passno++) {
         opj_tcd_pass_t *pass = &cblk->passes[passno];
-        if (pass->rate > opj_mqc_numbytes(mqc)) {
-            pass->rate = opj_mqc_numbytes(mqc);
-        }
-        /*Preventing generation of FF as last data byte of a pass*/
-        if ((pass->rate > 1) && (cblk->data[pass->rate - 1] == 0xFF)) {
+
+        /* Prevent generation of FF as last data byte of a pass*/
+        /* For terminating passes, the flushing procedure ensured this already */
+        assert(pass->rate > 0);
+        if (cblk->data[pass->rate - 1] == 0xFF) {
             pass->rate--;
         }
         pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate);
     }
+
+#ifdef EXTRA_DEBUG
+    printf(" len=%d\n", (cblk->totalpasses) ? opj_mqc_numbytes(mqc) : 0);
+
+    /* Check that there not 0xff >=0x90 sequences */
+    if (cblk->totalpasses) {
+        OPJ_UINT32 i;
+        OPJ_UINT32 len = opj_mqc_numbytes(mqc);
+        for (i = 1; i < len; ++i) {
+            if (cblk->data[i - 1] == 0xff && cblk->data[i] >= 0x90) {
+                printf("0xff %02x at offset %d\n", cblk->data[i], i - 1);
+                abort();
+            }
+        }
+    }
+#endif
 }