opj_t1_encode_cblks: fix UBSAN signed integer overflow
[openjpeg.git] / src / lib / openjp2 / t1.c
index 15ac040701674faba280e826b2722db29ddbe58d..f6f7671190cd5bc5a40a8ccac9b349abc0489e43 100644 (file)
@@ -12,6 +12,8 @@
  * Copyright (c) 2003-2014, Antonin Descampe
  * Copyright (c) 2005, Herve Drolon, FreeImage Team
  * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
+ * Copyright (c) 2012, Carl Hetherington
+ * Copyright (c) 2017, IntoPIX SA <support@intopix.com>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * POSSIBILITY OF SUCH DAMAGE.
  */
 
+#define OPJ_SKIP_POISON
 #include "opj_includes.h"
-#include "t1_luts.h"
 
-/* #define CONSISTENCY_CHECK */
+#ifdef __SSE__
+#include <xmmintrin.h>
+#endif
+#ifdef __SSE2__
+#include <emmintrin.h>
+#endif
+
+#if defined(__GNUC__)
+#pragma GCC poison malloc calloc realloc free
+#endif
+
+#include "t1_luts.h"
 
 /** @defgroup T1 T1 - Implementation of the tier-1 coding */
 /*@{*/
 
+#define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)])
+
+#define opj_t1_setcurctx(curctx, ctxno)  curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
+
 /** @name Local static functions */
 /*@{*/
 
 static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f);
-static OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 f);
 static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f);
-static OPJ_BYTE opj_t1_getspb(OPJ_UINT32 f);
 static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos);
 static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos);
-static INLINE void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s,
-                                      OPJ_UINT32 stride);
-/**
-Encode significant pass
-*/
-static void opj_t1_enc_sigpass_step(opj_t1_t *t1,
-                                    opj_flag_t *flagsp,
-                                    OPJ_INT32 *datap,
-                                    OPJ_INT32 bpno,
-                                    OPJ_INT32 one,
-                                    OPJ_INT32 *nmsedec,
-                                    OPJ_BYTE type,
-                                    OPJ_UINT32 vsc);
+static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
+                                       OPJ_UINT32 s, OPJ_UINT32 stride,
+                                       OPJ_UINT32 vsc);
+
 
 /**
 Decode significant pass
 */
-#if 0
-static void opj_t1_dec_sigpass_step(opj_t1_t *t1,
-                                    opj_flag_t *flagsp,
-                                    OPJ_INT32 *datap,
-                                    OPJ_UINT32 orient,
-                                    OPJ_INT32 oneplushalf,
-                                    OPJ_BYTE type,
-                                    OPJ_UINT32 vsc);
-#endif
 
 static INLINE void opj_t1_dec_sigpass_step_raw(
     opj_t1_t *t1,
     opj_flag_t *flagsp,
-    opj_colflag_t* colflagsp,
     OPJ_INT32 *datap,
     OPJ_INT32 oneplushalf,
-    OPJ_INT32 vsc,
+    OPJ_UINT32 vsc,
     OPJ_UINT32 row);
 static INLINE void opj_t1_dec_sigpass_step_mqc(
     opj_t1_t *t1,
     opj_flag_t *flagsp,
-    opj_colflag_t* colflagsp,
     OPJ_INT32 *datap,
     OPJ_INT32 oneplushalf,
     OPJ_UINT32 row,
-    OPJ_UINT32 flags_stride);
-static INLINE void opj_t1_dec_sigpass_step_mqc_vsc(
-    opj_t1_t *t1,
-    opj_flag_t *flagsp,
-    opj_colflag_t* colflagsp,
-    OPJ_INT32 *datap,
-    OPJ_INT32 oneplushalf,
-    OPJ_INT32 vsc,
-    OPJ_UINT32 row);
-
+    OPJ_UINT32 flags_stride,
+    OPJ_UINT32 vsc);
 
 /**
 Encode significant pass
@@ -122,24 +109,6 @@ static void opj_t1_dec_sigpass_raw(
     opj_t1_t *t1,
     OPJ_INT32 bpno,
     OPJ_INT32 cblksty);
-static void opj_t1_dec_sigpass_mqc_vsc(
-    opj_t1_t *t1,
-    OPJ_INT32 bpno);
-
-
-
-/**
-Encode refinement pass
-*/
-static void opj_t1_enc_refpass_step(opj_t1_t *t1,
-                                    opj_flag_t *flagsp,
-                                    OPJ_INT32 *datap,
-                                    OPJ_INT32 bpno,
-                                    OPJ_INT32 one,
-                                    OPJ_INT32 *nmsedec,
-                                    OPJ_BYTE type,
-                                    OPJ_UINT32 vsc);
-
 
 /**
 Encode refinement pass
@@ -147,8 +116,7 @@ Encode refinement pass
 static void opj_t1_enc_refpass(opj_t1_t *t1,
                                OPJ_INT32 bpno,
                                OPJ_INT32 *nmsedec,
-                               OPJ_BYTE type,
-                               OPJ_UINT32 cblksty);
+                               OPJ_BYTE type);
 
 /**
 Decode refinement pass
@@ -156,89 +124,38 @@ Decode refinement pass
 static void opj_t1_dec_refpass_raw(
     opj_t1_t *t1,
     OPJ_INT32 bpno);
-static void opj_t1_dec_refpass_mqc_vsc(
-    opj_t1_t *t1,
-    OPJ_INT32 bpno);
 
 
 /**
 Decode refinement pass
 */
-#if 0
-static void opj_t1_dec_refpass_step(opj_t1_t *t1,
-                                    opj_flag_t *flagsp,
-                                    OPJ_INT32 *datap,
-                                    OPJ_INT32 poshalf,
-                                    OPJ_INT32 neghalf,
-                                    OPJ_BYTE type,
-                                    OPJ_UINT32 vsc);
-#endif
 
 static INLINE void  opj_t1_dec_refpass_step_raw(
-    opj_t1_t *t1,
-    opj_colflag_t *colflagsp,
-    OPJ_INT32 *datap,
-    OPJ_INT32 poshalf,
-    OPJ_INT32 neghalf,
-    OPJ_UINT32 row);
-static INLINE void opj_t1_dec_refpass_step_mqc(
     opj_t1_t *t1,
     opj_flag_t *flagsp,
-    opj_colflag_t *colflagsp,
     OPJ_INT32 *datap,
     OPJ_INT32 poshalf,
-    OPJ_INT32 neghalf,
     OPJ_UINT32 row);
-static INLINE void opj_t1_dec_refpass_step_mqc_vsc(
+static INLINE void opj_t1_dec_refpass_step_mqc(
     opj_t1_t *t1,
     opj_flag_t *flagsp,
-    opj_colflag_t *colflagsp,
     OPJ_INT32 *datap,
     OPJ_INT32 poshalf,
-    OPJ_INT32 neghalf,
-    OPJ_INT32 vsc,
     OPJ_UINT32 row);
 
 
-
-/**
-Encode clean-up pass
-*/
-static void opj_t1_enc_clnpass_step(
-    opj_t1_t *t1,
-    opj_flag_t *flagsp,
-    OPJ_INT32 *datap,
-    OPJ_INT32 bpno,
-    OPJ_INT32 one,
-    OPJ_INT32 *nmsedec,
-    OPJ_UINT32 partial,
-    OPJ_UINT32 vsc);
 /**
 Decode clean-up pass
 */
-static void opj_t1_dec_clnpass_step_partial(
-    opj_t1_t *t1,
-    opj_flag_t *flagsp,
-    opj_colflag_t *colflagsp,
-    OPJ_INT32 *datap,
-    OPJ_INT32 oneplushalf,
-    OPJ_UINT32 row);
+
 static void opj_t1_dec_clnpass_step(
     opj_t1_t *t1,
     opj_flag_t *flagsp,
-    opj_colflag_t *colflagsp,
-    OPJ_INT32 *datap,
-    OPJ_INT32 oneplushalf,
-    OPJ_UINT32 row);
-static void opj_t1_dec_clnpass_step_vsc(
-    opj_t1_t *t1,
-    opj_flag_t *flagsp,
-    opj_colflag_t *colflagsp,
     OPJ_INT32 *datap,
     OPJ_INT32 oneplushalf,
-    OPJ_INT32 partial,
-    OPJ_INT32 vsc,
-    OPJ_UINT32 row);
+    OPJ_UINT32 row,
+    OPJ_UINT32 vsc);
+
 /**
 Encode clean-up pass
 */
@@ -280,12 +197,18 @@ Decode 1 code-block
 @param orient
 @param roishift Region of interest shifting value
 @param cblksty Code-block style
+@param p_manager the event manager
+@param p_manager_mutex mutex for the event manager
+@param check_pterm whether PTERM correct termination should be checked
 */
 static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
                                    opj_tcd_cblk_dec_t* cblk,
                                    OPJ_UINT32 orient,
                                    OPJ_UINT32 roishift,
-                                   OPJ_UINT32 cblksty);
+                                   OPJ_UINT32 cblksty,
+                                   opj_event_mgr_t *p_manager,
+                                   opj_mutex_t* p_manager_mutex,
+                                   OPJ_BOOL check_pterm);
 
 static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
                                         OPJ_UINT32 w,
@@ -297,26 +220,56 @@ static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
 
 /* ----------------------------------------------------------------------- */
 
-static OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f)
+static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f)
 {
-    return mqc->lut_ctxno_zc_orient[(f & T1_SIG_OTH)];
+    return mqc->lut_ctxno_zc_orient[(f & T1_SIGMA_NEIGHBOURS)];
 }
 
-static OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 f)
+static INLINE OPJ_UINT32 opj_t1_getctxtno_sc_or_spb_index(OPJ_UINT32 fX,
+        OPJ_UINT32 pfX,
+        OPJ_UINT32 nfX,
+        OPJ_UINT32 ci)
 {
-    return lut_ctxno_sc[(f & (T1_SIG_PRIM | T1_SGN)) >> 4];
+    /*
+      0 pfX T1_CHI_THIS           T1_LUT_SGN_W
+      1 tfX T1_SIGMA_1            T1_LUT_SIG_N
+      2 nfX T1_CHI_THIS           T1_LUT_SGN_E
+      3 tfX T1_SIGMA_3            T1_LUT_SIG_W
+      4  fX T1_CHI_(THIS - 1)     T1_LUT_SGN_N
+      5 tfX T1_SIGMA_5            T1_LUT_SIG_E
+      6  fX T1_CHI_(THIS + 1)     T1_LUT_SGN_S
+      7 tfX T1_SIGMA_7            T1_LUT_SIG_S
+    */
+
+    OPJ_UINT32 lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 |
+                                         T1_SIGMA_7);
+
+    lu |= (pfX >> (T1_CHI_THIS_I      + (ci * 3U))) & (1U << 0);
+    lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2);
+    if (ci == 0U) {
+        lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4);
+    } else {
+        lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4);
+    }
+    lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6);
+    return lu;
 }
 
-static OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f)
+static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 lu)
 {
-    OPJ_UINT32 tmp1 = (f & T1_SIG_OTH) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
-    OPJ_UINT32 tmp2 = (f & T1_REFINE) ? T1_CTXNO_MAG + 2 : tmp1;
-    return (tmp2);
+    return lut_ctxno_sc[lu];
 }
 
-static OPJ_BYTE opj_t1_getspb(OPJ_UINT32 f)
+static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f)
 {
-    return lut_spb[(f & (T1_SIG_PRIM | T1_SGN)) >> 4];
+    OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
+    OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp;
+    return tmp2;
+}
+
+static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 lu)
+{
+    return lut_spb[lu];
 }
 
 static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos)
@@ -337,217 +290,168 @@ static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos)
     return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
 }
 
-static INLINE void opj_t1_updateflags(opj_flag_t *flagsp, OPJ_UINT32 s,
-                                      OPJ_UINT32 stride)
-{
-    opj_flag_t *np = flagsp - stride;
-    opj_flag_t *sp = flagsp + stride;
-
-    /* We strongly rely on (T1_SGN_N == 0x0100) == (T1_SIG_N == 0x0010) << 4 */
-    /* and T1_SIG_E == T1_SIG_N << 1, T1_SIG_W == T1_SIG_N << 2 and T1_SIG_S == T1_SIG_N << 2 */
-    /* and T1_SGN_E == T1_SGN_N << 1, T1_SGN_W == T1_SGN_N << 2 and T1_SGN_S == T1_SGN_N << 2 */
-
-    unsigned int flag_N = T1_SIG_N | ((unsigned int)T1_SIG_N << (4U * s));
-
-    np[-1] |= T1_SIG_SE;
-    np[0]  |= (opj_flag_t)(flag_N << 2U);
-    np[1]  |= T1_SIG_SW;
-
-    flagsp[-1] |= (opj_flag_t)(flag_N << 1U);
-    flagsp[0]  |= T1_SIG;
-    flagsp[1]  |= (opj_flag_t)(flag_N << 3U);
-
-    sp[-1] |= T1_SIG_NE;
-    sp[0]  = (opj_flag_t)(sp[0] | flag_N);
-    sp[1]  |= T1_SIG_NW;
+#define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride, vsc) \
+{ \
+    /* east */ \
+    flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \
+ \
+    /* mark target as significant */ \
+    flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \
+ \
+    /* west */ \
+    flagsp[1] |= T1_SIGMA_3 << (3U * ci); \
+ \
+    /* north-west, north, north-east */ \
+    if (ci == 0U && !(vsc)) { \
+        opj_flag_t* north = flagsp - (stride); \
+        *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \
+        north[-1] |= T1_SIGMA_17; \
+        north[1] |= T1_SIGMA_15; \
+    } \
+ \
+    /* south-west, south, south-east */ \
+    if (ci == 3U) { \
+        opj_flag_t* south = flagsp + (stride); \
+        *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \
+        south[-1] |= T1_SIGMA_2; \
+        south[1] |= T1_SIGMA_0; \
+    } \
 }
 
-static INLINE void opj_t1_updateflagscolflags(opj_flag_t *flagsp,
-        opj_colflag_t *colflagsp, OPJ_UINT32 s, OPJ_UINT32 stride, OPJ_UINT32 row)
+
+static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
+                                       OPJ_UINT32 s, OPJ_UINT32 stride,
+                                       OPJ_UINT32 vsc)
 {
-    opj_t1_updateflags(flagsp, s, stride);
-    if (row == 0U) {
-        *colflagsp |= (opj_colflag_t)((T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) |
-                                      (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row + 1U))));
-        *(colflagsp - 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 <<
-                                             (T1_COLFLAG_RBS * row)) |
-                                            (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row + 1U))));
-        *(colflagsp + 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 <<
-                                             (T1_COLFLAG_RBS * row)) |
-                                            (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row + 1U))));
-        *(colflagsp - stride - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_3);
-        *(colflagsp - stride) |= (T1_COLFLAG_SIG_OTHER_ROW_3);
-        *(colflagsp - stride + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_3);
-    } else if (row == 3U) {
-        *colflagsp |= (opj_colflag_t)((T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) |
-                                      (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row - 1U))));
-        *(colflagsp - 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 <<
-                                             (T1_COLFLAG_RBS * row)) |
-                                            (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row - 1U))));
-        *(colflagsp + 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 <<
-                                             (T1_COLFLAG_RBS * row)) |
-                                            (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row - 1))));
-        *(colflagsp + stride - 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0);
-        *(colflagsp + stride) |= (T1_COLFLAG_SIG_OTHER_ROW_0);
-        *(colflagsp + stride + 1) |= (T1_COLFLAG_SIG_OTHER_ROW_0);
-    } else {
-        *(colflagsp - 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 <<
-                                             (T1_COLFLAG_RBS * row)) |
-                                            (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row - 1U))) |
-                                            (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row + 1U))));
-        *colflagsp |= (opj_colflag_t)((T1_COLFLAG_SIG_ROW_0 << (T1_COLFLAG_RBS * row)) |
-                                      (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row - 1U))) |
-                                      (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row + 1U))));
-        *(colflagsp + 1) |= (opj_colflag_t)((T1_COLFLAG_SIG_OTHER_ROW_0 <<
-                                             (T1_COLFLAG_RBS * row)) |
-                                            (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row - 1U))) |
-                                            (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * (row + 1U))));
-    }
+    opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride, vsc);
 }
 
-static void opj_t1_enc_sigpass_step(opj_t1_t *t1,
-                                    opj_flag_t *flagsp,
-                                    OPJ_INT32 *datap,
-                                    OPJ_INT32 bpno,
-                                    OPJ_INT32 one,
-                                    OPJ_INT32 *nmsedec,
-                                    OPJ_BYTE type,
-                                    OPJ_UINT32 vsc
-                                   )
+/**
+Encode significant pass
+*/
+static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1,
+        opj_flag_t *flagsp,
+        OPJ_INT32 *datap,
+        OPJ_INT32 bpno,
+        OPJ_INT32 one,
+        OPJ_INT32 *nmsedec,
+        OPJ_BYTE type,
+        OPJ_UINT32 ci,
+        OPJ_UINT32 vsc)
 {
-    OPJ_INT32 v;
-    OPJ_UINT32 flag;
+    OPJ_UINT32 v;
+
+    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
 
-    opj_mqc_t *mqc = t1->mqc;   /* MQC component */
+    OPJ_UINT32 const flags = *flagsp;
 
-    flag = vsc ? (OPJ_UINT32)((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW |
-                                           T1_SGN_S))) : (OPJ_UINT32)(*flagsp);
-    if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) {
+    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
+            (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
+        OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
         v = (opj_int_abs(*datap) & one) ? 1 : 0;
-        opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, flag));  /* ESSAI */
+#ifdef DEBUG_ENC_SIG
+        fprintf(stderr, "   ctxt1=%d\n", ctxt1);
+#endif
+        opj_mqc_setcurctx(mqc, ctxt1);
         if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
-            opj_mqc_bypass_enc(mqc, (OPJ_UINT32)v);
+            opj_mqc_bypass_enc(mqc, v);
         } else {
-            opj_mqc_encode(mqc, (OPJ_UINT32)v);
+            opj_mqc_encode(mqc, v);
         }
         if (v) {
-            v = *datap < 0 ? 1 : 0;
+            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index(
+                                *flagsp,
+                                flagsp[-1], flagsp[1],
+                                ci);
+            OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu);
+            v = *datap < 0 ? 1U : 0U;
             *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
-                                              (OPJ_UINT32)(bpno));
-            opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(flag));   /* ESSAI */
+                                              (OPJ_UINT32)bpno);
+#ifdef DEBUG_ENC_SIG
+            fprintf(stderr, "   ctxt2=%d\n", ctxt2);
+#endif
+            opj_mqc_setcurctx(mqc, ctxt2);
             if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
-                opj_mqc_bypass_enc(mqc, (OPJ_UINT32)v);
+                opj_mqc_bypass_enc(mqc, v);
             } else {
-                opj_mqc_encode(mqc, (OPJ_UINT32)(v ^ opj_t1_getspb((OPJ_UINT32)flag)));
+                OPJ_UINT32 spb = opj_t1_getspb(lu);
+#ifdef DEBUG_ENC_SIG
+                fprintf(stderr, "   spb=%d\n", spb);
+#endif
+                opj_mqc_encode(mqc, v ^ spb);
             }
-            opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride);
+            opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
         }
-        *flagsp |= T1_VISIT;
+        *flagsp |= T1_PI_THIS << (ci * 3U);
     }
 }
 
-
 static INLINE void opj_t1_dec_sigpass_step_raw(
     opj_t1_t *t1,
     opj_flag_t *flagsp,
-    opj_colflag_t* colflagsp,
     OPJ_INT32 *datap,
     OPJ_INT32 oneplushalf,
-    OPJ_INT32 vsc,
-    OPJ_UINT32 row)
+    OPJ_UINT32 vsc,
+    OPJ_UINT32 ci)
 {
-    OPJ_UINT32 v, flag;
-    opj_raw_t *raw = t1->raw;       /* RAW component */
-
-    flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) :
-           (*flagsp);
-    if ((flag & T1_SIG_OTH) &&
-            !(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) <<
-                            (T1_COLFLAG_RBS * row)))) {
-        if (opj_raw_decode(raw)) {
-            v = opj_raw_decode(raw);    /* ESSAI */
+    OPJ_UINT32 v;
+    opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
+
+    OPJ_UINT32 const flags = *flagsp;
+
+    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
+            (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
+        if (opj_mqc_raw_decode(mqc)) {
+            v = opj_mqc_raw_decode(mqc);
             *datap = v ? -oneplushalf : oneplushalf;
-            opj_t1_updateflagscolflags(flagsp, colflagsp, v, t1->flags_stride, row);
+            opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
         }
-#ifdef CONSISTENCY_CHECK
-        *flagsp |= T1_VISIT;
-#endif
-        *colflagsp = (opj_colflag_t)(*colflagsp | (T1_COLFLAG_VISIT_ROW_0 <<
-                                     (T1_COLFLAG_RBS * row)));
+        *flagsp |= T1_PI_THIS << (ci * 3U);
     }
 }
 
-static INLINE void opj_t1_dec_sigpass_step_mqc(
-    opj_t1_t *t1,
-    opj_flag_t *flagsp,
-    opj_colflag_t* colflagsp,
-    OPJ_INT32 *datap,
-    OPJ_INT32 oneplushalf,
-    OPJ_UINT32 row,
-    OPJ_UINT32 flags_stride)
-{
-    OPJ_INT32 v, flag;
-
-    opj_mqc_t *mqc = t1->mqc;       /* MQC component */
-#ifdef CONSISTENCY_CHECK
-    assert(((*flagsp & T1_SIG_OTH) && !(*flagsp & (T1_SIG | T1_VISIT))) ==
-           ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0 |
-                            T1_COLFLAG_SIG_OTHER_ROW_0) << (T1_COLFLAG_RBS * row))) ==
-            (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row))));
-#endif
-    if ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0 |
-                        T1_COLFLAG_SIG_OTHER_ROW_0) << (T1_COLFLAG_RBS * row))) ==
-            (T1_COLFLAG_SIG_OTHER_ROW_0 << (T1_COLFLAG_RBS * row))) {
-        flag = *flagsp;
-        opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag));
-        if (opj_mqc_decode(mqc)) {
-            opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag));
-            v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag);
-            *datap = v ? -oneplushalf : oneplushalf;
-            opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, flags_stride, row);
-        }
-#ifdef CONSISTENCY_CHECK
-        *flagsp |= T1_VISIT;
-#endif
-        *colflagsp = (opj_colflag_t)(*colflagsp | (T1_COLFLAG_VISIT_ROW_0 <<
-                                     (T1_COLFLAG_RBS * row)));
-    }
-}                               /* VSC and  BYPASS by Antonin */
+#define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \
+                                          data_stride, ci, mqc, curctx, \
+                                          v, a, c, ct, oneplushalf, vsc) \
+{ \
+    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
+        (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
+        OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
+        opj_t1_setcurctx(curctx, ctxt1); \
+        opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
+        if (v) { \
+            OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
+                                flags, \
+                                flagsp[-1], flagsp[1], \
+                                ci); \
+            OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
+            OPJ_UINT32 spb = opj_t1_getspb(lu); \
+            opj_t1_setcurctx(curctx, ctxt2); \
+            opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
+            v = v ^ spb; \
+            data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
+            opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
+        } \
+        flags |= T1_PI_THIS << (ci * 3U); \
+    } \
+}
 
-static INLINE void opj_t1_dec_sigpass_step_mqc_vsc(
+static INLINE void opj_t1_dec_sigpass_step_mqc(
     opj_t1_t *t1,
     opj_flag_t *flagsp,
-    opj_colflag_t* colflagsp,
     OPJ_INT32 *datap,
     OPJ_INT32 oneplushalf,
-    OPJ_INT32 vsc,
-    OPJ_UINT32 row)
+    OPJ_UINT32 ci,
+    OPJ_UINT32 flags_stride,
+    OPJ_UINT32 vsc)
 {
-    OPJ_UINT32 v, flag;
-
-    opj_mqc_t *mqc = t1->mqc;       /* MQC component */
-
-    flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) :
-           (*flagsp);
-    if ((flag & T1_SIG_OTH) &&
-            !(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) <<
-                            (T1_COLFLAG_RBS * row)))) {
-        opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, flag));
-        if (opj_mqc_decode(mqc)) {
-            opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(flag));
-            v = (OPJ_UINT32)opj_mqc_decode(mqc) ^ opj_t1_getspb(flag);
-            *datap = v ? -oneplushalf : oneplushalf;
-            opj_t1_updateflagscolflags(flagsp, colflagsp, v, t1->flags_stride, row);
-        }
-#ifdef CONSISTENCY_CHECK
-        *flagsp |= T1_VISIT;
-#endif
-        *colflagsp = (opj_colflag_t)(*colflagsp | (T1_COLFLAG_VISIT_ROW_0 <<
-                                     (T1_COLFLAG_RBS * row)));
-    }
-}                               /* VSC and  BYPASS by Antonin */
-
+    OPJ_UINT32 v;
 
+    opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
+    opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap,
+                                      0, ci, mqc, mqc->curctx,
+                                      v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
+}
 
 static void opj_t1_enc_sigpass(opj_t1_t *t1,
                                OPJ_INT32 bpno,
@@ -556,26 +460,96 @@ static void opj_t1_enc_sigpass(opj_t1_t *t1,
                                OPJ_UINT32 cblksty
                               )
 {
-    OPJ_UINT32 i, j, k, vsc;
-    OPJ_INT32 one;
+    OPJ_UINT32 i, k;
+    OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
+    opj_flag_t* f = &T1_FLAGS(0, 0);
+    OPJ_UINT32 const extra = 2;
 
     *nmsedec = 0;
-    one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
-    for (k = 0; k < t1->h; k += 4) {
+#ifdef DEBUG_ENC_SIG
+    fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
+#endif
+    for (k = 0; k < (t1->h & ~3U); k += 4) {
+#ifdef DEBUG_ENC_SIG
+        fprintf(stderr, " k=%d\n", k);
+#endif
+        for (i = 0; i < t1->w; ++i) {
+#ifdef DEBUG_ENC_SIG
+            fprintf(stderr, " i=%d\n", i);
+#endif
+            if (*f == 0U) {
+                /* Nothing to do for any of the 4 data points */
+                f++;
+                continue;
+            }
+            opj_t1_enc_sigpass_step(
+                t1,
+                f,
+                &t1->data[((k + 0) * t1->data_stride) + i],
+                bpno,
+                one,
+                nmsedec,
+                type,
+                0, cblksty & J2K_CCP_CBLKSTY_VSC);
+            opj_t1_enc_sigpass_step(
+                t1,
+                f,
+                &t1->data[((k + 1) * t1->data_stride) + i],
+                bpno,
+                one,
+                nmsedec,
+                type,
+                1, 0);
+            opj_t1_enc_sigpass_step(
+                t1,
+                f,
+                &t1->data[((k + 2) * t1->data_stride) + i],
+                bpno,
+                one,
+                nmsedec,
+                type,
+                2, 0);
+            opj_t1_enc_sigpass_step(
+                t1,
+                f,
+                &t1->data[((k + 3) * t1->data_stride) + i],
+                bpno,
+                one,
+                nmsedec,
+                type,
+                3, 0);
+            ++f;
+        }
+        f += extra;
+    }
+
+    if (k < t1->h) {
+        OPJ_UINT32 j;
+#ifdef DEBUG_ENC_SIG
+        fprintf(stderr, " k=%d\n", k);
+#endif
         for (i = 0; i < t1->w; ++i) {
-            for (j = k; j < k + 4 && j < t1->h; ++j) {
-                vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 ||
-                        j == t1->h - 1)) ? 1 : 0;
+#ifdef DEBUG_ENC_SIG
+            fprintf(stderr, " i=%d\n", i);
+#endif
+            if (*f == 0U) {
+                /* Nothing to do for any of the 4 data points */
+                f++;
+                continue;
+            }
+            for (j = k; j < t1->h; ++j) {
                 opj_t1_enc_sigpass_step(
                     t1,
-                    &t1->flags[((j + 1) * t1->flags_stride) + i + 1],
+                    f,
                     &t1->data[(j * t1->data_stride) + i],
                     bpno,
                     one,
                     nmsedec,
                     type,
-                    vsc);
+                    j - k,
+                    (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
             }
+            ++f;
         }
     }
 }
@@ -585,373 +559,446 @@ static void opj_t1_dec_sigpass_raw(
     OPJ_INT32 bpno,
     OPJ_INT32 cblksty)
 {
-    OPJ_INT32 one, half, oneplushalf, vsc;
+    OPJ_INT32 one, half, oneplushalf;
     OPJ_UINT32 i, j, k;
-    opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1];
+    OPJ_INT32 *data = t1->data;
+    opj_flag_t *flagsp = &T1_FLAGS(0, 0);
+    const OPJ_UINT32 l_w = t1->w;
     one = 1 << bpno;
     half = one >> 1;
     oneplushalf = one | half;
-    for (k = 0; k < t1->h; k += 4) {
-        for (i = 0; i < t1->w; ++i) {
-            opj_colflag_t *colflags2 = colflags1 + i;
-            for (j = k; j < k + 4 && j < t1->h; ++j) {
-                vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 ||
-                        j == t1->h - 1)) ? 1 : 0;
+
+    for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
+        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
+            opj_flag_t flags = *flagsp;
+            if (flags != 0) {
                 opj_t1_dec_sigpass_step_raw(
                     t1,
-                    &t1->flags[((j + 1) * t1->flags_stride) + i + 1],
-                    colflags2,
-                    &t1->data[(j * t1->w) + i],
+                    flagsp,
+                    data,
                     oneplushalf,
-                    vsc,
-                    j - k);
+                    cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
+                    0U);
+                opj_t1_dec_sigpass_step_raw(
+                    t1,
+                    flagsp,
+                    data + l_w,
+                    oneplushalf,
+                    OPJ_FALSE, /* vsc */
+                    1U);
+                opj_t1_dec_sigpass_step_raw(
+                    t1,
+                    flagsp,
+                    data + 2 * l_w,
+                    oneplushalf,
+                    OPJ_FALSE, /* vsc */
+                    2U);
+                opj_t1_dec_sigpass_step_raw(
+                    t1,
+                    flagsp,
+                    data + 3 * l_w,
+                    oneplushalf,
+                    OPJ_FALSE, /* vsc */
+                    3U);
+            }
+        }
+    }
+    if (k < t1->h) {
+        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
+            for (j = 0; j < t1->h - k; ++j) {
+                opj_t1_dec_sigpass_step_raw(
+                    t1,
+                    flagsp,
+                    data + j * l_w,
+                    oneplushalf,
+                    cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
+                    j);
             }
         }
-        colflags1 += t1->flags_stride;
     }
-}                               /* VSC and  BYPASS by Antonin */
+}
 
-#define opj_t1_dec_sigpass_mqc_internal(t1, bpno, w, h, flags_stride) \
+#define opj_t1_dec_sigpass_mqc_internal(t1, bpno, vsc, w, h, flags_stride) \
 { \
         OPJ_INT32 one, half, oneplushalf; \
         OPJ_UINT32 i, j, k; \
-        OPJ_INT32 *data1 = t1->data; \
-        opj_flag_t *flags1 = &t1->flags[1]; \
-        opj_colflag_t *colflags1 = &t1->colflags[flags_stride + 1]; \
+        register OPJ_INT32 *data = t1->data; \
+        register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
+        const OPJ_UINT32 l_w = w; \
+        opj_mqc_t* mqc = &(t1->mqc); \
+        DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
+        register OPJ_UINT32 v; \
         one = 1 << bpno; \
         half = one >> 1; \
         oneplushalf = one | half; \
-        for (k = 0; k < (h & ~3u); k += 4) { \
-                for (i = 0; i < w; ++i) { \
-                        OPJ_INT32 *data2 = data1 + i; \
-                        opj_flag_t *flags2 = flags1 + i; \
-                        opj_colflag_t *colflags2 = colflags1 + i; \
-                        if( *colflags2 == 0 ) continue; \
-                        flags2 += flags_stride; \
-                        opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 0U, flags_stride); \
-                        data2 += w; \
-                        flags2 += flags_stride; \
-                        opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 1U, flags_stride); \
-                        data2 += w; \
-                        flags2 += flags_stride; \
-                        opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 2U, flags_stride); \
-                        data2 += w; \
-                        flags2 += flags_stride; \
-                        opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, 3U, flags_stride); \
-                        data2 += w; \
+        for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
+                for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
+                        opj_flag_t flags = *flagsp; \
+                        if( flags != 0 ) { \
+                            opj_t1_dec_sigpass_step_mqc_macro( \
+                                flags, flagsp, flags_stride, data, \
+                                l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf, vsc); \
+                            opj_t1_dec_sigpass_step_mqc_macro( \
+                                flags, flagsp, flags_stride, data, \
+                                l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
+                            opj_t1_dec_sigpass_step_mqc_macro( \
+                                flags, flagsp, flags_stride, data, \
+                                l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
+                            opj_t1_dec_sigpass_step_mqc_macro( \
+                                flags, flagsp, flags_stride, data, \
+                                l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
+                            *flagsp = flags; \
+                        } \
                 } \
-                data1 += w << 2; \
-                flags1 += flags_stride << 2; \
-                colflags1 += flags_stride; \
         } \
-        for (i = 0; i < w; ++i) { \
-                OPJ_INT32 *data2 = data1 + i; \
-                opj_flag_t *flags2 = flags1 + i; \
-                opj_colflag_t *colflags2 = colflags1 + i; \
-                for (j = k; j < h; ++j) { \
-                        flags2 += flags_stride; \
-                        opj_t1_dec_sigpass_step_mqc(t1, flags2, colflags2, data2, oneplushalf, j - k, flags_stride); \
-                        data2 += w; \
+        UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
+        if( k < h ) { \
+            for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
+                for (j = 0; j < h - k; ++j) { \
+                        opj_t1_dec_sigpass_step_mqc(t1, flagsp, \
+                            data + j * l_w, oneplushalf, j, flags_stride, vsc); \
                 } \
+            } \
         } \
 }
 
-static void opj_t1_dec_sigpass_mqc_64x64(
+static void opj_t1_dec_sigpass_mqc_64x64_novsc(
     opj_t1_t *t1,
     OPJ_INT32 bpno)
 {
-    opj_t1_dec_sigpass_mqc_internal(t1, bpno, 64, 64, 66);
+    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
 }
 
-static void opj_t1_dec_sigpass_mqc_generic(
+static void opj_t1_dec_sigpass_mqc_64x64_vsc(
     opj_t1_t *t1,
     OPJ_INT32 bpno)
 {
-    opj_t1_dec_sigpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->flags_stride);
+    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
 }
 
-/* VSC and  BYPASS by Antonin */
-static void opj_t1_dec_sigpass_mqc_vsc(
+static void opj_t1_dec_sigpass_mqc_generic_novsc(
     opj_t1_t *t1,
     OPJ_INT32 bpno)
 {
-    OPJ_INT32 one, half, oneplushalf, vsc;
-    OPJ_UINT32 i, j, k;
-    OPJ_INT32 *data1 = t1->data;
-    opj_flag_t *flags1 = &t1->flags[1];
-    opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1];
-    one = 1 << bpno;
-    half = one >> 1;
-    oneplushalf = one | half;
-    for (k = 0; k < (t1->h & ~(OPJ_UINT32)3); k += 4U) {
-        for (i = 0; i < t1->w; ++i) {
-            OPJ_INT32 *data2 = data1 + i;
-            opj_flag_t *flags2 = flags1 + i;
-            opj_colflag_t *colflags2 = colflags1 + i;
-            flags2 += t1->flags_stride;
-            opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0,
-                                            0U);
-            data2 += t1->w;
-            flags2 += t1->flags_stride;
-            opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0,
-                                            1U);
-            data2 += t1->w;
-            flags2 += t1->flags_stride;
-            opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 0,
-                                            2U);
-            data2 += t1->w;
-            flags2 += t1->flags_stride;
-            opj_t1_dec_sigpass_step_mqc_vsc(t1, flags2, colflags2, data2, oneplushalf, 1,
-                                            3U);
-            data2 += t1->w;
+    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
+                                    t1->w + 2U);
+}
+
+static void opj_t1_dec_sigpass_mqc_generic_vsc(
+    opj_t1_t *t1,
+    OPJ_INT32 bpno)
+{
+    opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
+                                    t1->w + 2U);
+}
+
+static void opj_t1_dec_sigpass_mqc(
+    opj_t1_t *t1,
+    OPJ_INT32 bpno,
+    OPJ_INT32 cblksty)
+{
+    if (t1->w == 64 && t1->h == 64) {
+        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
+            opj_t1_dec_sigpass_mqc_64x64_vsc(t1, bpno);
+        } else {
+            opj_t1_dec_sigpass_mqc_64x64_novsc(t1, bpno);
         }
-        data1 += t1->w << 2;
-        flags1 += t1->flags_stride << 2;
-        colflags1 += t1->flags_stride;
-    }
-    for (i = 0; i < t1->w; ++i) {
-        opj_colflag_t *colflags2 = colflags1 + i;
-        for (j = k; j < t1->h; ++j) {
-            vsc = (j == t1->h - 1) ? 1 : 0;
-            opj_t1_dec_sigpass_step_mqc_vsc(
-                t1,
-                &t1->flags[((j + 1) * t1->flags_stride) + i + 1],
-                colflags2,
-                &t1->data[(j * t1->w) + i],
-                oneplushalf,
-                vsc,
-                j - k);
+    } else {
+        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
+            opj_t1_dec_sigpass_mqc_generic_vsc(t1, bpno);
+        } else {
+            opj_t1_dec_sigpass_mqc_generic_novsc(t1, bpno);
         }
     }
-}                               /* VSC and  BYPASS by Antonin */
-
-
+}
 
-static void opj_t1_enc_refpass_step(opj_t1_t *t1,
-                                    opj_flag_t *flagsp,
-                                    OPJ_INT32 *datap,
-                                    OPJ_INT32 bpno,
-                                    OPJ_INT32 one,
-                                    OPJ_INT32 *nmsedec,
-                                    OPJ_BYTE type,
-                                    OPJ_UINT32 vsc)
+/**
+Encode refinement pass step
+*/
+static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1,
+        opj_flag_t *flagsp,
+        OPJ_INT32 *datap,
+        OPJ_INT32 bpno,
+        OPJ_INT32 one,
+        OPJ_INT32 *nmsedec,
+        OPJ_BYTE type,
+        OPJ_UINT32 ci)
 {
-    OPJ_INT32 v;
-    OPJ_UINT32 flag;
+    OPJ_UINT32 v;
+
+    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
 
-    opj_mqc_t *mqc = t1->mqc;   /* MQC component */
+    OPJ_UINT32 const shift_flags =
+        (*flagsp >> (ci * 3U));
 
-    flag = vsc ? (OPJ_UINT32)((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW |
-                                           T1_SGN_S))) : (OPJ_UINT32)(*flagsp);
-    if ((flag & (T1_SIG | T1_VISIT)) == T1_SIG) {
+    if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) {
+        OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags);
         *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap),
-                                          (OPJ_UINT32)(bpno));
+                                          (OPJ_UINT32)bpno);
         v = (opj_int_abs(*datap) & one) ? 1 : 0;
-        opj_mqc_setcurctx(mqc, opj_t1_getctxno_mag(flag));  /* ESSAI */
+#ifdef DEBUG_ENC_REF
+        fprintf(stderr, "  ctxt=%d\n", ctxt);
+#endif
+        opj_mqc_setcurctx(mqc, ctxt);
         if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
-            opj_mqc_bypass_enc(mqc, (OPJ_UINT32)v);
+            opj_mqc_bypass_enc(mqc, v);
         } else {
-            opj_mqc_encode(mqc, (OPJ_UINT32)v);
+            opj_mqc_encode(mqc, v);
         }
-        *flagsp |= T1_REFINE;
+        *flagsp |= T1_MU_THIS << (ci * 3U);
     }
 }
 
-static INLINE void opj_t1_dec_refpass_step_raw(
-    opj_t1_t *t1,
-    opj_colflag_t *colflagsp,
-    OPJ_INT32 *datap,
-    OPJ_INT32 poshalf,
-    OPJ_INT32 neghalf,
-    OPJ_UINT32 row)
-{
-    OPJ_INT32 v, t;
-
-    opj_raw_t *raw = t1->raw;       /* RAW component */
-
-    if ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) <<
-                       (T1_COLFLAG_RBS * row))) ==
-            ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) {
-        v = (OPJ_INT32)opj_raw_decode(raw);
-        t = v ? poshalf : neghalf;
-        *datap += *datap < 0 ? -t : t;
-        *colflagsp = (opj_colflag_t)(*colflagsp | (T1_COLFLAG_REFINE_ROW_0 <<
-                                     (T1_COLFLAG_RBS * row)));
-    }
-}                               /* VSC and  BYPASS by Antonin  */
 
-static INLINE void opj_t1_dec_refpass_step_mqc(
+static INLINE void opj_t1_dec_refpass_step_raw(
     opj_t1_t *t1,
-#ifdef CONSISTENCY_CHECK
     opj_flag_t *flagsp,
-#else
-    opj_flag_t *flagsp_unused,
-#endif
-    opj_colflag_t *colflagsp,
     OPJ_INT32 *datap,
     OPJ_INT32 poshalf,
-    OPJ_INT32 neghalf,
-    OPJ_UINT32 row)
+    OPJ_UINT32 ci)
 {
-    OPJ_INT32 v, t;
-
-    opj_mqc_t *mqc = t1->mqc;       /* MQC component */
-#ifdef CONSISTENCY_CHECK
-    assert(((*flagsp & (T1_SIG | T1_VISIT)) == T1_SIG) ==
-           ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) <<
-                           (T1_COLFLAG_RBS * row))) == ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS *
-                                   row))));
-#else
-    (void)flagsp_unused;
-#endif
-    if ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) <<
-                       (T1_COLFLAG_RBS * row))) ==
-            ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) {
-        OPJ_UINT32 tmp1 = (*colflagsp & (T1_COLFLAG_SIG_OTHER_ROW_0 <<
-                                         (T1_COLFLAG_RBS * row))) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
-        OPJ_UINT32 tmp2 = (*colflagsp & (T1_COLFLAG_REFINE_ROW_0 <<
-                                         (T1_COLFLAG_RBS * row))) ? T1_CTXNO_MAG + 2 : tmp1;
-        opj_mqc_setcurctx(mqc, tmp2);      /* ESSAI */
-        v = opj_mqc_decode(mqc);
-        t = v ? poshalf : neghalf;
-        *datap += *datap < 0 ? -t : t;
-        *colflagsp = (opj_colflag_t)(*colflagsp | (T1_COLFLAG_REFINE_ROW_0 <<
-                                     (T1_COLFLAG_RBS * row)));
+    OPJ_UINT32 v;
+
+    opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
+
+    if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
+            (T1_SIGMA_THIS << (ci * 3U))) {
+        v = opj_mqc_raw_decode(mqc);
+        *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
+        *flagsp |= T1_MU_THIS << (ci * 3U);
     }
-}                               /* VSC and  BYPASS by Antonin  */
+}
+
+#define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \
+                                          mqc, curctx, v, a, c, ct, poshalf) \
+{ \
+    if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \
+            (T1_SIGMA_THIS << (ci * 3U))) { \
+        OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \
+        opj_t1_setcurctx(curctx, ctxt); \
+        opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
+        data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \
+        flags |= T1_MU_THIS << (ci * 3U); \
+    } \
+}
 
-static INLINE void opj_t1_dec_refpass_step_mqc_vsc(
+static INLINE void opj_t1_dec_refpass_step_mqc(
     opj_t1_t *t1,
     opj_flag_t *flagsp,
-    opj_colflag_t *colflagsp,
     OPJ_INT32 *datap,
     OPJ_INT32 poshalf,
-    OPJ_INT32 neghalf,
-    OPJ_INT32 vsc,
-    OPJ_UINT32 row)
+    OPJ_UINT32 ci)
 {
     OPJ_UINT32 v;
-    OPJ_INT32 t;
-
-    opj_mqc_t *mqc = t1->mqc;       /* MQC component */
-
-    if ((*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) <<
-                       (T1_COLFLAG_RBS * row))) ==
-            ((T1_COLFLAG_SIG_ROW_0) << (T1_COLFLAG_RBS * row))) {
-        OPJ_UINT32 flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW |
-                                                T1_SGN_S))) : (*flagsp);
-        OPJ_UINT32 tmp1 = (flag & T1_SIG_OTH) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
-        OPJ_UINT32 tmp2 = (*colflagsp & (T1_COLFLAG_REFINE_ROW_0 <<
-                                         (T1_COLFLAG_RBS * row))) ? T1_CTXNO_MAG + 2U : tmp1;
-        opj_mqc_setcurctx(mqc, tmp2);      /* ESSAI */
-        v = (OPJ_UINT32)opj_mqc_decode(mqc);
-        t = v ? poshalf : neghalf;
-        *datap += *datap < 0 ? -t : t;
-        *colflagsp = (opj_colflag_t)(*colflagsp | (T1_COLFLAG_REFINE_ROW_0 <<
-                                     (T1_COLFLAG_RBS * row)));
-    }
-}                               /* VSC and  BYPASS by Antonin  */
 
+    opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
+    opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci,
+                                      mqc, mqc->curctx, v, mqc->a, mqc->c,
+                                      mqc->ct, poshalf);
+}
 
 static void opj_t1_enc_refpass(
     opj_t1_t *t1,
     OPJ_INT32 bpno,
     OPJ_INT32 *nmsedec,
-    OPJ_BYTE type,
-    OPJ_UINT32 cblksty)
+    OPJ_BYTE type)
 {
-    OPJ_UINT32 i, j, k, vsc;
-    OPJ_INT32 one;
+    OPJ_UINT32 i, k;
+    const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
+    opj_flag_t* f = &T1_FLAGS(0, 0);
+    const OPJ_UINT32 extra = 2U;
 
     *nmsedec = 0;
-    one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
-    for (k = 0; k < t1->h; k += 4) {
+#ifdef DEBUG_ENC_REF
+    fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
+#endif
+    for (k = 0; k < (t1->h & ~3U); k += 4) {
+#ifdef DEBUG_ENC_REF
+        fprintf(stderr, " k=%d\n", k);
+#endif
+        for (i = 0; i < t1->w; ++i) {
+#ifdef DEBUG_ENC_REF
+            fprintf(stderr, " i=%d\n", i);
+#endif
+            if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
+                /* none significant */
+                f++;
+                continue;
+            }
+            if ((*f & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
+                    (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
+                /* all processed by sigpass */
+                f++;
+                continue;
+            }
+
+            opj_t1_enc_refpass_step(
+                t1,
+                f,
+                &t1->data[((k + 0) * t1->data_stride) + i],
+                bpno,
+                one,
+                nmsedec,
+                type,
+                0);
+            opj_t1_enc_refpass_step(
+                t1,
+                f,
+                &t1->data[((k + 1) * t1->data_stride) + i],
+                bpno,
+                one,
+                nmsedec,
+                type,
+                1);
+            opj_t1_enc_refpass_step(
+                t1,
+                f,
+                &t1->data[((k + 2) * t1->data_stride) + i],
+                bpno,
+                one,
+                nmsedec,
+                type,
+                2);
+            opj_t1_enc_refpass_step(
+                t1,
+                f,
+                &t1->data[((k + 3) * t1->data_stride) + i],
+                bpno,
+                one,
+                nmsedec,
+                type,
+                3);
+            ++f;
+        }
+        f += extra;
+    }
+
+    if (k < t1->h) {
+        OPJ_UINT32 j;
+#ifdef DEBUG_ENC_REF
+        fprintf(stderr, " k=%d\n", k);
+#endif
         for (i = 0; i < t1->w; ++i) {
-            for (j = k; j < k + 4 && j < t1->h; ++j) {
-                vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 ||
-                        j == t1->h - 1)) ? 1 : 0;
+#ifdef DEBUG_ENC_REF
+            fprintf(stderr, " i=%d\n", i);
+#endif
+            if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
+                /* none significant */
+                f++;
+                continue;
+            }
+            for (j = k; j < t1->h; ++j) {
                 opj_t1_enc_refpass_step(
                     t1,
-                    &t1->flags[((j + 1) * t1->flags_stride) + i + 1],
+                    f,
                     &t1->data[(j * t1->data_stride) + i],
                     bpno,
                     one,
                     nmsedec,
                     type,
-                    vsc);
+                    j - k);
             }
+            ++f;
         }
     }
 }
 
+
 static void opj_t1_dec_refpass_raw(
     opj_t1_t *t1,
     OPJ_INT32 bpno)
 {
-    OPJ_INT32 one, poshalf, neghalf;
+    OPJ_INT32 one, poshalf;
     OPJ_UINT32 i, j, k;
-    opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1];
+    OPJ_INT32 *data = t1->data;
+    opj_flag_t *flagsp = &T1_FLAGS(0, 0);
+    const OPJ_UINT32 l_w = t1->w;
     one = 1 << bpno;
     poshalf = one >> 1;
-    neghalf = bpno > 0 ? -poshalf : -1;
-    for (k = 0; k < t1->h; k += 4) {
-        for (i = 0; i < t1->w; ++i) {
-            opj_colflag_t *colflags2 = colflags1 + i;
-            for (j = k; j < k + 4 && j < t1->h; ++j) {
+    for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
+        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
+            opj_flag_t flags = *flagsp;
+            if (flags != 0) {
+                opj_t1_dec_refpass_step_raw(
+                    t1,
+                    flagsp,
+                    data,
+                    poshalf,
+                    0U);
+                opj_t1_dec_refpass_step_raw(
+                    t1,
+                    flagsp,
+                    data + l_w,
+                    poshalf,
+                    1U);
+                opj_t1_dec_refpass_step_raw(
+                    t1,
+                    flagsp,
+                    data + 2 * l_w,
+                    poshalf,
+                    2U);
+                opj_t1_dec_refpass_step_raw(
+                    t1,
+                    flagsp,
+                    data + 3 * l_w,
+                    poshalf,
+                    3U);
+            }
+        }
+    }
+    if (k < t1->h) {
+        for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
+            for (j = 0; j < t1->h - k; ++j) {
                 opj_t1_dec_refpass_step_raw(
                     t1,
-                    colflags2,
-                    &t1->data[(j * t1->w) + i],
+                    flagsp,
+                    data + j * l_w,
                     poshalf,
-                    neghalf, j - k);
+                    j);
             }
         }
-        colflags1 += t1->flags_stride;
     }
-}                               /* VSC and  BYPASS by Antonin */
+}
 
 #define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \
 { \
-        OPJ_INT32 one, poshalf, neghalf; \
+        OPJ_INT32 one, poshalf; \
         OPJ_UINT32 i, j, k; \
-        OPJ_INT32 *data1 = t1->data; \
-        opj_flag_t *flags1 = &t1->flags[1]; \
-        opj_colflag_t *colflags1 = &t1->colflags[flags_stride + 1]; \
+        register OPJ_INT32 *data = t1->data; \
+        register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
+        const OPJ_UINT32 l_w = w; \
+        opj_mqc_t* mqc = &(t1->mqc); \
+        DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
+        register OPJ_UINT32 v; \
         one = 1 << bpno; \
         poshalf = one >> 1; \
-        neghalf = bpno > 0 ? -poshalf : -1; \
-        for (k = 0; k < (h & ~3u); k += 4) { \
-                for (i = 0; i < w; ++i) { \
-                        OPJ_INT32 *data2 = data1 + i; \
-                        opj_flag_t *flags2 = flags1 + i; \
-                        opj_colflag_t *colflags2 = colflags1 + i; \
-                        if( *colflags2 == 0 ) continue; \
-                        flags2 += flags_stride; \
-                        opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 0U); \
-                        data2 += w; \
-                        flags2 += flags_stride; \
-                        opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 1U); \
-                        data2 += w; \
-                        flags2 += flags_stride; \
-                        opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 2U); \
-                        data2 += w; \
-                        flags2 += flags_stride; \
-                        opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, 3U); \
-                        data2 += w; \
+        for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
+                for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
+                        opj_flag_t flags = *flagsp; \
+                        if( flags != 0 ) { \
+                            opj_t1_dec_refpass_step_mqc_macro( \
+                                flags, data, l_w, 0, \
+                                mqc, curctx, v, a, c, ct, poshalf); \
+                            opj_t1_dec_refpass_step_mqc_macro( \
+                                flags, data, l_w, 1, \
+                                mqc, curctx, v, a, c, ct, poshalf); \
+                            opj_t1_dec_refpass_step_mqc_macro( \
+                                flags, data, l_w, 2, \
+                                mqc, curctx, v, a, c, ct, poshalf); \
+                            opj_t1_dec_refpass_step_mqc_macro( \
+                                flags, data, l_w, 3, \
+                                mqc, curctx, v, a, c, ct, poshalf); \
+                            *flagsp = flags; \
+                        } \
                 } \
-                data1 += w << 2; \
-                flags1 += flags_stride << 2; \
-                colflags1 += flags_stride; \
         } \
-        for (i = 0; i < w; ++i) { \
-                OPJ_INT32 *data2 = data1 + i; \
-                opj_flag_t *flags2 = flags1 + i; \
-                opj_colflag_t *colflags2 = colflags1 + i; \
-                for (j = k; j < h; ++j) { \
-                        flags2 += flags_stride; \
-                        opj_t1_dec_refpass_step_mqc(t1, flags2, colflags2, data2, poshalf, neghalf, j - k); \
-                        data2 += w; \
+        UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
+        if( k < h ) { \
+            for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
+                for (j = 0; j < h - k; ++j) { \
+                        opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \
                 } \
+            } \
         } \
 }
 
@@ -966,66 +1013,23 @@ static void opj_t1_dec_refpass_mqc_generic(
     opj_t1_t *t1,
     OPJ_INT32 bpno)
 {
-    opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->flags_stride);
+    opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U);
 }
 
-/* VSC and  BYPASS by Antonin */
-static void opj_t1_dec_refpass_mqc_vsc(
+static void opj_t1_dec_refpass_mqc(
     opj_t1_t *t1,
     OPJ_INT32 bpno)
 {
-    OPJ_INT32 one, poshalf, neghalf;
-    OPJ_UINT32 i, j, k;
-    OPJ_INT32 vsc;
-    OPJ_INT32 *data1 = t1->data;
-    opj_flag_t *flags1 = &t1->flags[1];
-    opj_colflag_t *colflags1 = &t1->colflags[t1->flags_stride + 1];
-    one = 1 << bpno;
-    poshalf = one >> 1;
-    neghalf = bpno > 0 ? -poshalf : -1;
-    for (k = 0; k < (t1->h & ~(OPJ_UINT32)3U); k += 4U) {
-        for (i = 0; i < t1->w; ++i) {
-            OPJ_INT32 *data2 = data1 + i;
-            opj_flag_t *flags2 = flags1 + i;
-            opj_colflag_t *colflags2 = colflags1 + i;
-            flags2 += t1->flags_stride;
-            opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf,
-                                            0, 0U);
-            data2 += t1->w;
-            flags2 += t1->flags_stride;
-            opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf,
-                                            0, 1U);
-            data2 += t1->w;
-            flags2 += t1->flags_stride;
-            opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf,
-                                            0, 2U);
-            data2 += t1->w;
-            flags2 += t1->flags_stride;
-            opj_t1_dec_refpass_step_mqc_vsc(t1, flags2, colflags2, data2, poshalf, neghalf,
-                                            1, 3U);
-            data2 += t1->w;
-        }
-        data1 += t1->w << 2;
-        flags1 += t1->flags_stride << 2;
-        colflags1 += t1->flags_stride;
-    }
-    for (i = 0; i < t1->w; ++i) {
-        opj_colflag_t *colflags2 = colflags1 + i;
-        for (j = k; j < t1->h; ++j) {
-            vsc = (j == t1->h - 1) ? 1 : 0;
-            opj_t1_dec_refpass_step_mqc_vsc(
-                t1,
-                &t1->flags[((j + 1) * t1->flags_stride) + i + 1],
-                colflags2,
-                &t1->data[(j * t1->w) + i],
-                poshalf, neghalf,
-                vsc,
-                j - k);
-        }
+    if (t1->w == 64 && t1->h == 64) {
+        opj_t1_dec_refpass_mqc_64x64(t1, bpno);
+    } else {
+        opj_t1_dec_refpass_mqc_generic(t1, bpno);
     }
-}                               /* VSC and  BYPASS by Antonin */
-
+}
 
+/**
+Encode clean-up pass step
+*/
 static void opj_t1_enc_clnpass_step(
     opj_t1_t *t1,
     opj_flag_t *flagsp,
@@ -1033,151 +1037,124 @@ static void opj_t1_enc_clnpass_step(
     OPJ_INT32 bpno,
     OPJ_INT32 one,
     OPJ_INT32 *nmsedec,
-    OPJ_UINT32 partial,
-    OPJ_UINT32 vsc)
+    OPJ_UINT32 agg,
+    OPJ_UINT32 runlen,
+    OPJ_UINT32 lim,
+    OPJ_UINT32 cblksty)
 {
-    OPJ_INT32 v;
-    OPJ_UINT32 flag;
-
-    opj_mqc_t *mqc = t1->mqc;   /* MQC component */
-
-    flag = vsc ? (OPJ_UINT32)((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW |
-                                           T1_SGN_S))) : (OPJ_UINT32)(*flagsp);
-    if (partial) {
-        goto LABEL_PARTIAL;
-    }
-    if (!(*flagsp & (T1_SIG | T1_VISIT))) {
-        opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, flag));
-        v = (opj_int_abs(*datap) & one) ? 1 : 0;
-        opj_mqc_encode(mqc, (OPJ_UINT32)v);
-        if (v) {
-LABEL_PARTIAL:
-            *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
-                                              (OPJ_UINT32)(bpno));
-            opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(flag));
-            v = *datap < 0 ? 1 : 0;
-            opj_mqc_encode(mqc, (OPJ_UINT32)(v ^ opj_t1_getspb((OPJ_UINT32)flag)));
-            opj_t1_updateflags(flagsp, (OPJ_UINT32)v, t1->flags_stride);
+    OPJ_UINT32 v;
+    OPJ_UINT32 ci;
+    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
+
+    const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 |
+                              T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
+
+    if ((*flagsp & check) == check) {
+        if (runlen == 0) {
+            *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
+        } else if (runlen == 1) {
+            *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3);
+        } else if (runlen == 2) {
+            *flagsp &= ~(T1_PI_2 | T1_PI_3);
+        } else if (runlen == 3) {
+            *flagsp &= ~(T1_PI_3);
         }
+        return;
     }
-    *flagsp &= (opj_flag_t)~T1_VISIT;
-}
 
-static void opj_t1_dec_clnpass_step_partial(
-    opj_t1_t *t1,
-    opj_flag_t *flagsp,
-    opj_colflag_t *colflagsp,
-    OPJ_INT32 *datap,
-    OPJ_INT32 oneplushalf,
-    OPJ_UINT32 row)
-{
-    OPJ_INT32 v, flag;
-    opj_mqc_t *mqc = t1->mqc;   /* MQC component */
-
-    flag = *flagsp;
-    opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag));
-    v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag);
-    *datap = v ? -oneplushalf : oneplushalf;
-    opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride,
-                               row);
-#ifdef CONSISTENCY_CHECK
-    *flagsp &= ~T1_VISIT;
-#endif
-}               /* VSC and  BYPASS by Antonin */
+    for (ci = runlen; ci < lim; ++ci) {
+        OPJ_UINT32 vsc;
+        opj_flag_t flags;
+        OPJ_UINT32 ctxt1;
 
-static void opj_t1_dec_clnpass_step(
-    opj_t1_t *t1,
-    opj_flag_t *flagsp,
-    opj_colflag_t *colflagsp,
-    OPJ_INT32 *datap,
-    OPJ_INT32 oneplushalf,
-    OPJ_UINT32 row)
-{
-    OPJ_INT32 v, flag;
+        flags = *flagsp;
 
-    opj_mqc_t *mqc = t1->mqc;   /* MQC component */
-#ifdef CONSISTENCY_CHECK
-    assert((!(*flagsp & (T1_SIG | T1_VISIT))) == (!(*colflagsp & ((
-                T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) << (4 * row)))));
-#endif
-    if (!(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) <<
-                        (4 * row)))) {
-        flag = *flagsp;
-        opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag));
-        if (opj_mqc_decode(mqc)) {
-            opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag));
-            v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag);
-            *datap = v ? -oneplushalf : oneplushalf;
-            opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, t1->flags_stride,
-                                       row);
+        if ((agg != 0) && (ci == runlen)) {
+            goto LABEL_PARTIAL;
         }
-    }
-#ifdef CONSISTENCY_CHECK
-    *flagsp &= ~T1_VISIT;
-#endif
-}               /* VSC and  BYPASS by Antonin */
 
-static void opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(
-    opj_t1_t *t1,
-    opj_flag_t *flagsp,
-    opj_colflag_t *colflagsp,
-    OPJ_INT32 *datap,
-    OPJ_INT32 oneplushalf,
-    OPJ_UINT32 row,
-    OPJ_UINT32 flags_stride)
-{
-    OPJ_INT32 v;
-    OPJ_INT32 flag;
-
-    opj_mqc_t *mqc = t1->mqc;   /* MQC component */
+        if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {
+            ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
+#ifdef DEBUG_ENC_CLN
+            printf("   ctxt1=%d\n", ctxt1);
+#endif
+            opj_mqc_setcurctx(mqc, ctxt1);
+            v = (opj_int_abs(*datap) & one) ? 1 : 0;
+            opj_mqc_encode(mqc, v);
+            if (v) {
+                OPJ_UINT32 ctxt2, spb;
+                OPJ_UINT32 lu;
+LABEL_PARTIAL:
+                lu = opj_t1_getctxtno_sc_or_spb_index(
+                         *flagsp,
+                         flagsp[-1], flagsp[1],
+                         ci);
+                *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
+                                                  (OPJ_UINT32)bpno);
+                ctxt2 = opj_t1_getctxno_sc(lu);
+#ifdef DEBUG_ENC_CLN
+                printf("   ctxt2=%d\n", ctxt2);
+#endif
+                opj_mqc_setcurctx(mqc, ctxt2);
 
-    flag = *flagsp;
-    /*if (!(flag & (T1_SIG | T1_VISIT)))*/
-    {
-        opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, (OPJ_UINT32)flag));
-        if (opj_mqc_decode(mqc)) {
-            opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc((OPJ_UINT32)flag));
-            v = opj_mqc_decode(mqc) ^ opj_t1_getspb((OPJ_UINT32)flag);
-            *datap = v ? -oneplushalf : oneplushalf;
-            opj_t1_updateflagscolflags(flagsp, colflagsp, (OPJ_UINT32)v, flags_stride, row);
+                v = *datap < 0 ? 1U : 0U;
+                spb = opj_t1_getspb(lu);
+#ifdef DEBUG_ENC_CLN
+                printf("   spb=%d\n", spb);
+#endif
+                opj_mqc_encode(mqc, v ^ spb);
+                vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0;
+                opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc);
+            }
         }
+        *flagsp &= ~(T1_PI_THIS << (3U * ci));
+        datap += t1->data_stride;
     }
-    /*flagsp &= ~T1_VISIT;*/
 }
 
-static void opj_t1_dec_clnpass_step_vsc(
+#define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
+                                      flags, flagsp, flags_stride, data, \
+                                      data_stride, ci, mqc, curctx, \
+                                      v, a, c, ct, oneplushalf, vsc) \
+{ \
+    if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\
+        do { \
+            if( !partial ) { \
+                OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
+                opj_t1_setcurctx(curctx, ctxt1); \
+                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
+                if( !v ) \
+                    break; \
+            } \
+            { \
+                OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
+                                    flags, flagsp[-1], flagsp[1], \
+                                    ci); \
+                opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \
+                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
+                v = v ^ opj_t1_getspb(lu); \
+                data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
+                opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
+            } \
+        } while(0); \
+    } \
+}
+
+static void opj_t1_dec_clnpass_step(
     opj_t1_t *t1,
     opj_flag_t *flagsp,
-    opj_colflag_t *colflagsp,
     OPJ_INT32 *datap,
     OPJ_INT32 oneplushalf,
-    OPJ_INT32 partial,
-    OPJ_INT32 vsc,
-    OPJ_UINT32 row)
+    OPJ_UINT32 ci,
+    OPJ_UINT32 vsc)
 {
-    OPJ_UINT32 v, flag;
-
-    opj_mqc_t *mqc = t1->mqc;   /* MQC component */
+    OPJ_UINT32 v;
 
-    flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) :
-           (*flagsp);
-    if (partial) {
-        goto LABEL_PARTIAL;
-    }
-    if (!(*colflagsp & ((T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0) <<
-                        (T1_COLFLAG_RBS * row)))) {
-        opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(mqc, flag));
-        if (opj_mqc_decode(mqc)) {
-LABEL_PARTIAL:
-            opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(flag));
-            v = (OPJ_UINT32)opj_mqc_decode(mqc) ^ opj_t1_getspb(flag);
-            *datap = v ? -oneplushalf : oneplushalf;
-            opj_t1_updateflagscolflags(flagsp, colflagsp, v, t1->flags_stride, row);
-        }
-    }
-#ifdef CONSISTENCY_CHECK
-    *flagsp &= ~T1_VISIT;
-#endif
+    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
+    opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE,
+                                  *flagsp, flagsp, t1->w + 2U, datap,
+                                  0, ci, mqc, mqc->curctx,
+                                  v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
 }
 
 static void opj_t1_enc_clnpass(
@@ -1186,33 +1163,28 @@ static void opj_t1_enc_clnpass(
     OPJ_INT32 *nmsedec,
     OPJ_UINT32 cblksty)
 {
-    OPJ_UINT32 i, j, k;
-    OPJ_INT32 one;
-    OPJ_UINT32 agg, runlen, vsc;
+    OPJ_UINT32 i, k;
+    const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
+    OPJ_UINT32 agg, runlen;
 
-    opj_mqc_t *mqc = t1->mqc;   /* MQC component */
+    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
 
     *nmsedec = 0;
-    one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
-    for (k = 0; k < t1->h; k += 4) {
+#ifdef DEBUG_ENC_CLN
+    printf("enc_clnpass: bpno=%d\n", bpno);
+#endif
+    for (k = 0; k < (t1->h & ~3U); k += 4) {
+#ifdef DEBUG_ENC_CLN
+        printf(" k=%d\n", k);
+#endif
         for (i = 0; i < t1->w; ++i) {
-            if (k + 3 < t1->h) {
-                if (cblksty & J2K_CCP_CBLKSTY_VSC) {
-                    agg = !(MACRO_t1_flags(1 + k, 1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH)
-                            || MACRO_t1_flags(1 + k + 1, 1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH)
-                            || MACRO_t1_flags(1 + k + 2, 1 + i) & (T1_SIG | T1_VISIT | T1_SIG_OTH)
-                            || (MACRO_t1_flags(1 + k + 3, 1 + i)
-                                & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) & (T1_SIG | T1_VISIT |
-                                        T1_SIG_OTH));
-                } else {
-                    agg = !((MACRO_t1_flags(1 + k, 1 + i) |
-                             MACRO_t1_flags(1 + k + 1, 1 + i) |
-                             MACRO_t1_flags(1 + k + 2, 1 + i) |
-                             MACRO_t1_flags(1 + k + 3, 1 + i)) & (T1_SIG | T1_VISIT | T1_SIG_OTH));
-                }
-            } else {
-                agg = 0;
-            }
+#ifdef DEBUG_ENC_CLN
+            printf("  i=%d\n", i);
+#endif
+            agg = !(T1_FLAGS(i, k));
+#ifdef DEBUG_ENC_CLN
+            printf("   agg=%d\n", agg);
+#endif
             if (agg) {
                 for (runlen = 0; runlen < 4; ++runlen) {
                     if (opj_int_abs(t1->data[((k + runlen)*t1->data_stride) + i]) & one) {
@@ -1230,204 +1202,204 @@ static void opj_t1_enc_clnpass(
             } else {
                 runlen = 0;
             }
-            for (j = k + runlen; j < k + 4 && j < t1->h; ++j) {
-                vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (j == k + 3 ||
-                        j == t1->h - 1)) ? 1 : 0;
-                opj_t1_enc_clnpass_step(
-                    t1,
-                    &t1->flags[((j + 1) * t1->flags_stride) + i + 1],
-                    &t1->data[(j * t1->data_stride) + i],
-                    bpno,
-                    one,
-                    nmsedec,
-                    agg && (j == k + runlen),
-                    vsc);
-            }
+            opj_t1_enc_clnpass_step(
+                t1,
+                &T1_FLAGS(i, k),
+                &t1->data[((k + runlen) * t1->data_stride) + i],
+                bpno,
+                one,
+                nmsedec,
+                agg,
+                runlen,
+                4U,
+                cblksty);
+        }
+    }
+    if (k < t1->h) {
+        agg = 0;
+        runlen = 0;
+#ifdef DEBUG_ENC_CLN
+        printf(" k=%d\n", k);
+#endif
+        for (i = 0; i < t1->w; ++i) {
+#ifdef DEBUG_ENC_CLN
+            printf("  i=%d\n", i);
+            printf("   agg=%d\n", agg);
+#endif
+            opj_t1_enc_clnpass_step(
+                t1,
+                &T1_FLAGS(i, k),
+                &t1->data[((k + runlen) * t1->data_stride) + i],
+                bpno,
+                one,
+                nmsedec,
+                agg,
+                runlen,
+                t1->h - k,
+                cblksty);
         }
     }
 }
 
-#define MACRO_t1_flags_internal(x,y,flags_stride) t1->flags[((x)*(flags_stride))+(y)]
-
-#define opj_t1_dec_clnpass_internal(consistency_check, t1, bpno, cblksty, w, h, flags_stride) \
+#define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
 { \
-    OPJ_INT32 one, half, oneplushalf, agg, runlen, vsc; \
+    OPJ_INT32 one, half, oneplushalf; \
+    OPJ_UINT32 runlen; \
     OPJ_UINT32 i, j, k; \
-    OPJ_INT32 segsym = cblksty & J2K_CCP_CBLKSTY_SEGSYM; \
-     \
-    opj_mqc_t *mqc = t1->mqc;   /* MQC component */ \
-     \
+    const OPJ_UINT32 l_w = w; \
+    opj_mqc_t* mqc = &(t1->mqc); \
+    register OPJ_INT32 *data = t1->data; \
+    register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
+    DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
+    register OPJ_UINT32 v; \
     one = 1 << bpno; \
     half = one >> 1; \
     oneplushalf = one | half; \
-    if (cblksty & J2K_CCP_CBLKSTY_VSC) { \
-    opj_colflag_t *colflags1 = &t1->colflags[flags_stride + 1]; \
-    for (k = 0; k < h; k += 4) { \
-        for (i = 0; i < w; ++i) { \
-            opj_colflag_t *colflags2 = colflags1 + i; \
-            if (k + 3 < h) { \
-                    agg = !((*colflags2 & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_SIG_OTHER_ROW_0 | \
-                                           T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_SIG_OTHER_ROW_1 | \
-                                           T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_SIG_OTHER_ROW_2 | \
-                                           T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3)) || \
-                          ((MACRO_t1_flags_internal(1 + k + 3,1 + i,flags_stride) \
-                           & ((~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) & (T1_SIG_OTH)))); \
-                } else { \
-                agg = 0; \
-            } \
-            if (agg) { \
-                opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); \
-                if (!opj_mqc_decode(mqc)) { \
+    for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
+        for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
+            opj_flag_t flags = *flagsp; \
+            if (flags == 0) { \
+                OPJ_UINT32 partial = OPJ_TRUE; \
+                opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \
+                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
+                if (!v) { \
                     continue; \
                 } \
-                opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \
-                runlen = opj_mqc_decode(mqc); \
-                runlen = (runlen << 1) | opj_mqc_decode(mqc); \
+                opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \
+                opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \
+                opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
+                runlen = (runlen << 1) | v; \
+                switch(runlen) { \
+                    case 0: \
+                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\
+                                            flags, flagsp, flags_stride, data, \
+                                            l_w, 0, mqc, curctx, \
+                                            v, a, c, ct, oneplushalf, vsc); \
+                        partial = OPJ_FALSE; \
+                        /* FALLTHRU */ \
+                    case 1: \
+                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
+                                            flags, flagsp, flags_stride, data, \
+                                            l_w, 1, mqc, curctx, \
+                                            v, a, c, ct, oneplushalf, OPJ_FALSE); \
+                        partial = OPJ_FALSE; \
+                        /* FALLTHRU */ \
+                    case 2: \
+                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
+                                            flags, flagsp, flags_stride, data, \
+                                            l_w, 2, mqc, curctx, \
+                                            v, a, c, ct, oneplushalf, OPJ_FALSE); \
+                        partial = OPJ_FALSE; \
+                        /* FALLTHRU */ \
+                    case 3: \
+                        opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
+                                            flags, flagsp, flags_stride, data, \
+                                            l_w, 3, mqc, curctx, \
+                                            v, a, c, ct, oneplushalf, OPJ_FALSE); \
+                        break; \
+                } \
             } else { \
-                runlen = 0; \
-            } \
-            for (j = k + (OPJ_UINT32)runlen; j < k + 4 && j < h; ++j) { \
-                    vsc = (j == k + 3 || j == h - 1) ? 1 : 0; \
-                    opj_t1_dec_clnpass_step_vsc( \
-                        t1, \
-                        &t1->flags[((j+1) * flags_stride) + i + 1], \
-                        colflags2, \
-                        &t1->data[(j * w) + i], \
-                        oneplushalf, \
-                        agg && (j == k + (OPJ_UINT32)runlen), \
-                        vsc, j - k); \
+                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
+                                    flags, flagsp, flags_stride, data, \
+                                    l_w, 0, mqc, curctx, \
+                                    v, a, c, ct, oneplushalf, vsc); \
+                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
+                                    flags, flagsp, flags_stride, data, \
+                                    l_w, 1, mqc, curctx, \
+                                    v, a, c, ct, oneplushalf, OPJ_FALSE); \
+                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
+                                    flags, flagsp, flags_stride, data, \
+                                    l_w, 2, mqc, curctx, \
+                                    v, a, c, ct, oneplushalf, OPJ_FALSE); \
+                opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
+                                    flags, flagsp, flags_stride, data, \
+                                    l_w, 3, mqc, curctx, \
+                                    v, a, c, ct, oneplushalf, OPJ_FALSE); \
             } \
-            *colflags2 &= (opj_colflag_t)~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \
+            *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
         } \
-        colflags1 += flags_stride; \
     } \
-    } else { \
-        OPJ_INT32 *data1 = t1->data; \
-        opj_flag_t *flags1 = &t1->flags[1]; \
-        opj_colflag_t *colflags1 = &t1->colflags[flags_stride + 1]; \
-        for (k = 0; k < (h & ~3u); k += 4) { \
-            for (i = 0; i < w; ++i) { \
-                OPJ_INT32 *data2 = data1 + i; \
-                opj_flag_t *flags2 = flags1 + i; \
-                opj_colflag_t *colflags2 = colflags1 + i; \
-                opj_colflag_t colflags = *colflags2; \
-                agg = !(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_SIG_OTHER_ROW_0 | \
-                                     T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_SIG_OTHER_ROW_1 | \
-                                     T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_SIG_OTHER_ROW_2 | \
-                                     T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3 | T1_COLFLAG_SIG_OTHER_ROW_3)); \
-                if( consistency_check ) { \
-                    assert( agg == !((MACRO_t1_flags_internal(1 + k, 1 + i,flags_stride) | \
-                                      MACRO_t1_flags_internal(1 + k + 1, 1 + i,flags_stride) | \
-                                      MACRO_t1_flags_internal(1 + k + 2, 1 + i,flags_stride) | \
-                                      MACRO_t1_flags_internal(1 + k + 3, 1 + i,flags_stride)) & (T1_SIG | T1_VISIT | T1_SIG_OTH)) ); \
-                } \
-                if (agg) { \
-                    opj_mqc_setcurctx(mqc, T1_CTXNO_AGG); \
-                    if (!opj_mqc_decode(mqc)) { \
-                        continue; \
-                    } \
-                    opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \
-                    runlen = opj_mqc_decode(mqc); \
-                    runlen = (runlen << 1) | opj_mqc_decode(mqc); \
-                    flags2 += (OPJ_UINT32)runlen * flags_stride; \
-                    data2 += (OPJ_UINT32)runlen * w; \
-                    for (j = (OPJ_UINT32)runlen; j < 4; ++j) { \
-                        flags2 += flags_stride; \
-                        if (j == (OPJ_UINT32)runlen) { \
-                            opj_t1_dec_clnpass_step_partial(t1, flags2, colflags2, data2, oneplushalf, j); \
-                        } else { \
-                            opj_t1_dec_clnpass_step(t1, flags2, colflags2, data2, oneplushalf, j); \
-                        } \
-                        data2 += w; \
-                    } \
-                } else { \
-                    flags2 += flags_stride; \
-                    if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \
-                    if (!(colflags & (T1_COLFLAG_SIG_ROW_0 | T1_COLFLAG_VISIT_ROW_0))) {\
-                        opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 0U, flags_stride); \
-                    } \
-                    if( consistency_check ) *flags2 &= (opj_flag_t)~T1_VISIT; \
-                    data2 += w; \
-                    flags2 += flags_stride; \
-                    if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \
-                    if (!(colflags & (T1_COLFLAG_SIG_ROW_1 | T1_COLFLAG_VISIT_ROW_1))) {\
-                        opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 1U, flags_stride); \
-                    } \
-                    if( consistency_check ) *flags2 &= (opj_flag_t)~T1_VISIT; \
-                    data2 += w; \
-                    flags2 += flags_stride; \
-                    if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \
-                    if (!(colflags & (T1_COLFLAG_SIG_ROW_2 | T1_COLFLAG_VISIT_ROW_2))) {\
-                        opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 2U, flags_stride); \
-                    } \
-                    if( consistency_check ) *flags2 &= (opj_flag_t)~T1_VISIT; \
-                    data2 += w; \
-                    flags2 += flags_stride; \
-                    if( consistency_check ) { assert( (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) == (!(*flags2 & (T1_SIG | T1_VISIT))) ); } \
-                    if (!(colflags & (T1_COLFLAG_SIG_ROW_3 | T1_COLFLAG_VISIT_ROW_3))) {\
-                        opj_t1_dec_clnpass_step_only_if_flag_not_sig_visit(t1, flags2, colflags2, data2, oneplushalf, 3U, flags_stride); \
-                    } \
-                    if( consistency_check ) *flags2 &= (opj_flag_t)~T1_VISIT; \
-                    data2 += w; \
-                } \
-                *colflags2 &= (opj_colflag_t)~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \
-            } \
-            data1 += w << 2; \
-            flags1 += flags_stride << 2; \
-            colflags1 += flags_stride; \
-        } \
-        for (i = 0; i < w; ++i) { \
-            OPJ_INT32 *data2 = data1 + i; \
-            opj_flag_t *flags2 = flags1 + i; \
-            opj_colflag_t *colflags2 = colflags1 + i; \
-            for (j = k; j < h; ++j) { \
-                flags2 += flags_stride; \
-                opj_t1_dec_clnpass_step(t1, flags2, colflags2, data2, oneplushalf, j - k); \
-                data2 += w; \
+    UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
+    if( k < h ) { \
+        for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
+            for (j = 0; j < h - k; ++j) { \
+                opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j, vsc); \
             } \
-            *colflags2 &= (opj_colflag_t)~(T1_COLFLAG_VISIT_ROW_0 | T1_COLFLAG_VISIT_ROW_1 | T1_COLFLAG_VISIT_ROW_2 | T1_COLFLAG_VISIT_ROW_3); \
-        } \
-    } \
- \
-    if (segsym) { \
-        OPJ_INT32 v = 0; \
-        opj_mqc_setcurctx(mqc, T1_CTXNO_UNI); \
-        v = opj_mqc_decode(mqc); \
-        v = (v << 1) | opj_mqc_decode(mqc); \
-        v = (v << 1) | opj_mqc_decode(mqc); \
-        v = (v << 1) | opj_mqc_decode(mqc); \
-        /* \
-        if (v!=0xa) { \
-            opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v); \
+            *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
         } \
-        */ \
     } \
-}               /* VSC and  BYPASS by Antonin */
+}
+
+static void opj_t1_dec_clnpass_check_segsym(opj_t1_t *t1, OPJ_INT32 cblksty)
+{
+    if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
+        opj_mqc_t* mqc = &(t1->mqc);
+        OPJ_UINT32 v, v2;
+        opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
+        opj_mqc_decode(v, mqc);
+        opj_mqc_decode(v2, mqc);
+        v = (v << 1) | v2;
+        opj_mqc_decode(v2, mqc);
+        v = (v << 1) | v2;
+        opj_mqc_decode(v2, mqc);
+        v = (v << 1) | v2;
+        /*
+        if (v!=0xa) {
+            opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v);
+        }
+        */
+    }
+}
 
-static void opj_t1_dec_clnpass_64x64(
+static void opj_t1_dec_clnpass_64x64_novsc(
     opj_t1_t *t1,
-    OPJ_INT32 bpno,
-    OPJ_INT32 cblksty)
+    OPJ_INT32 bpno)
 {
-#ifdef CONSISTENCY_CHECK
-    opj_t1_dec_clnpass_internal(OPJ_TRUE, t1, bpno, cblksty, 64, 64, 66);
-#else
-    opj_t1_dec_clnpass_internal(OPJ_FALSE, t1, bpno, cblksty, 64, 64, 66);
-#endif
+    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
+}
+
+static void opj_t1_dec_clnpass_64x64_vsc(
+    opj_t1_t *t1,
+    OPJ_INT32 bpno)
+{
+    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
+}
+
+static void opj_t1_dec_clnpass_generic_novsc(
+    opj_t1_t *t1,
+    OPJ_INT32 bpno)
+{
+    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
+                                t1->w + 2U);
 }
 
-static void opj_t1_dec_clnpass_generic(
+static void opj_t1_dec_clnpass_generic_vsc(
+    opj_t1_t *t1,
+    OPJ_INT32 bpno)
+{
+    opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
+                                t1->w + 2U);
+}
+
+static void opj_t1_dec_clnpass(
     opj_t1_t *t1,
     OPJ_INT32 bpno,
     OPJ_INT32 cblksty)
 {
-#ifdef CONSISTENCY_CHECK
-    opj_t1_dec_clnpass_internal(OPJ_TRUE, t1, bpno, cblksty, t1->w, t1->h,
-                                t1->flags_stride);
-#else
-    opj_t1_dec_clnpass_internal(OPJ_FALSE, t1, bpno, cblksty, t1->w, t1->h,
-                                t1->flags_stride);
-#endif
+    if (t1->w == 64 && t1->h == 64) {
+        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
+            opj_t1_dec_clnpass_64x64_vsc(t1, bpno);
+        } else {
+            opj_t1_dec_clnpass_64x64_novsc(t1, bpno);
+        }
+    } else {
+        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
+            opj_t1_dec_clnpass_generic_vsc(t1, bpno);
+        } else {
+            opj_t1_dec_clnpass_generic_novsc(t1, bpno);
+        }
+    }
+    opj_t1_dec_clnpass_check_segsym(t1, cblksty);
 }
 
 
@@ -1468,41 +1440,27 @@ static OPJ_BOOL opj_t1_allocate_buffers(
     OPJ_UINT32 w,
     OPJ_UINT32 h)
 {
-    /* encoder uses tile buffer, so no need to allocate */
-    if (!t1->encoder) {
-        size_t datasize;
+    OPJ_UINT32 flagssize;
+    OPJ_UINT32 flags_stride;
 
-#if (SIZE_MAX / 0xFFFFFFFFU) < 0xFFFFFFFFU /* UINT32_MAX */
-        /* Overflow check */
-        if ((w > 0U) && ((size_t)h > (SIZE_MAX / (size_t)w))) {
-            /* FIXME event manager error callback */
-            return OPJ_FALSE;
-        }
-#endif
-        datasize = (size_t)w * h;
+    /* No risk of overflow. Prior checks ensure those assert are met */
+    /* They are per the specification */
+    assert(w <= 1024);
+    assert(h <= 1024);
+    assert(w * h <= 4096);
 
-        /* Overflow check */
-        if (datasize > (SIZE_MAX / sizeof(OPJ_INT32))) {
-            /* FIXME event manager error callback */
-            return OPJ_FALSE;
-        }
+    /* encoder uses tile buffer, so no need to allocate */
+    if (!t1->encoder) {
+        OPJ_UINT32 datasize = w * h;
 
-        if (datasize > (size_t)t1->datasize) {
+        if (datasize > t1->datasize) {
             opj_aligned_free(t1->data);
             t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
             if (!t1->data) {
                 /* FIXME event manager error callback */
                 return OPJ_FALSE;
             }
-#if SIZE_MAX > 0xFFFFFFFFU /* UINT32_MAX */
-            /* TODO remove this if t1->datasize type changes to size_t */
-            /* Overflow check */
-            if (datasize > (size_t)0xFFFFFFFFU /* UINT32_MAX */) {
-                /* FIXME event manager error callback */
-                return OPJ_FALSE;
-            }
-#endif
-            t1->datasize = (OPJ_UINT32)datasize;
+            t1->datasize = datasize;
         }
         /* memset first arg is declared to never be null by gcc */
         if (t1->data != NULL) {
@@ -1510,91 +1468,56 @@ static OPJ_BOOL opj_t1_allocate_buffers(
         }
     }
 
-    {
-        size_t flagssize;
+    flags_stride = w + 2U; /* can't be 0U */
 
-        /* Overflow check */
-        if (w > (0xFFFFFFFFU /* UINT32_MAX */ - 2U)) {
-            /* FIXME event manager error callback */
-            return OPJ_FALSE;
-        }
-        t1->flags_stride = w + 2U; /* can't be 0U */
+    flagssize = (h + 3U) / 4U + 2U;
 
-#if (SIZE_MAX - 3U) < 0xFFFFFFFFU /* UINT32_MAX */
-        /* Overflow check */
-        if (h > (0xFFFFFFFFU /* UINT32_MAX */ - 3U)) {
-            /* FIXME event manager error callback */
-            return OPJ_FALSE;
-        }
-#endif
-        flagssize = (size_t)h + 3U;
+    flagssize *= flags_stride;
+    {
+        opj_flag_t* p;
+        OPJ_UINT32 x;
+        OPJ_UINT32 flags_height = (h + 3U) / 4U;
 
-        /* Overflow check */
-        if (flagssize > (SIZE_MAX / (size_t)t1->flags_stride)) {
-            /* FIXME event manager error callback */
-            return OPJ_FALSE;
-        }
-        flagssize *= (size_t)t1->flags_stride;
+        if (flagssize > t1->flagssize) {
 
-        if (flagssize > (size_t)t1->flagssize) {
-            /* Overflow check */
-            if (flagssize > (SIZE_MAX / sizeof(opj_flag_t))) {
-                /* FIXME event manager error callback */
-                return OPJ_FALSE;
-            }
             opj_aligned_free(t1->flags);
-            t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(opj_flag_t));
+            t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(
+                            opj_flag_t));
             if (!t1->flags) {
                 /* FIXME event manager error callback */
                 return OPJ_FALSE;
             }
-#if SIZE_MAX > 0xFFFFFFFFU /* UINT32_MAX */
-            /* TODO remove this if t1->flagssize type changes to size_t */
-            /* Overflow check */
-            if (flagssize > (size_t)0xFFFFFFFFU /* UINT32_MAX */) {
-                /* FIXME event manager error callback */
-                return OPJ_FALSE;
-            }
-#endif
-            t1->flagssize = (OPJ_UINT32)flagssize;
         }
+        t1->flagssize = flagssize;
+
         memset(t1->flags, 0, flagssize * sizeof(opj_flag_t));
-    }
-    if (!t1->encoder) {
-        size_t colflags_size = ((((size_t)h + 3U) / 4U) +
-                                2U); /* Can't overflow, h checked against UINT32_MAX - 3U */
 
-        /* Overflow check */
-        if (colflags_size > (SIZE_MAX / (size_t)t1->flags_stride)) {
-            /* FIXME event manager error callback */
-            return OPJ_FALSE;
+        p = &t1->flags[0];
+        for (x = 0; x < flags_stride; ++x) {
+            /* magic value to hopefully stop any passes being interested in this entry */
+            *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
         }
-        colflags_size *= (size_t)t1->flags_stride;
 
-        if (colflags_size > (size_t)t1->colflags_size) {
-            /* Overflow check */
-            if ((size_t)colflags_size > (SIZE_MAX / sizeof(opj_colflag_t))) {
-                /* FIXME event manager error callback */
-                return OPJ_FALSE;
-            }
-            opj_aligned_free(t1->colflags);
-            t1->colflags = (opj_colflag_t*) opj_aligned_malloc(colflags_size * sizeof(
-                               opj_colflag_t));
-            if (!t1->colflags) {
-                /* FIXME event manager error callback */
-                return OPJ_FALSE;
+        p = &t1->flags[((flags_height + 1) * flags_stride)];
+        for (x = 0; x < flags_stride; ++x) {
+            /* magic value to hopefully stop any passes being interested in this entry */
+            *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
+        }
+
+        if (h % 4) {
+            OPJ_UINT32 v = 0;
+            p = &t1->flags[((flags_height) * flags_stride)];
+            if (h % 4 == 1) {
+                v |= T1_PI_1 | T1_PI_2 | T1_PI_3;
+            } else if (h % 4 == 2) {
+                v |= T1_PI_2 | T1_PI_3;
+            } else if (h % 4 == 3) {
+                v |= T1_PI_3;
             }
-#if SIZE_MAX > 0xFFFFFFFFU /* UINT32_MAX */
-            /* TODO remove this if t1->colflags_size type changes to size_t */
-            /* Overflow check */
-            if (colflags_size > (size_t)0xFFFFFFFFU /* UINT32_MAX */) {
-                /* FIXME event manager error callback */
-                return OPJ_FALSE;
+            for (x = 0; x < flags_stride; ++x) {
+                *p++ = v;
             }
-#endif
-            t1->colflags_size = (OPJ_UINT32)colflags_size;
         }
-        memset(t1->colflags, 0, colflags_size * sizeof(opj_colflag_t));
     }
 
     t1->w = w;
@@ -1620,18 +1543,6 @@ opj_t1_t* opj_t1_create(OPJ_BOOL isEncoder)
         return 00;
     }
 
-    /* create MQC and RAW handles */
-    l_t1->mqc = opj_mqc_create();
-    if (! l_t1->mqc) {
-        opj_t1_destroy(l_t1);
-        return 00;
-    }
-
-    l_t1->raw = opj_raw_create();
-    if (! l_t1->raw) {
-        opj_t1_destroy(l_t1);
-        return 00;
-    }
     l_t1->encoder = isEncoder;
 
     return l_t1;
@@ -1649,12 +1560,6 @@ void opj_t1_destroy(opj_t1_t *p_t1)
         return;
     }
 
-    /* destroy MQC and RAW handles */
-    opj_mqc_destroy(p_t1->mqc);
-    p_t1->mqc = 00;
-    opj_raw_destroy(p_t1->raw);
-    p_t1->raw = 00;
-
     /* encoder uses tile buffer, so no need to free */
     if (!p_t1->encoder && p_t1->data) {
         opj_aligned_free(p_t1->data);
@@ -1666,20 +1571,23 @@ void opj_t1_destroy(opj_t1_t *p_t1)
         p_t1->flags = 00;
     }
 
-    if (p_t1->colflags) {
-        opj_aligned_free(p_t1->colflags);
-        p_t1->colflags = 00;
-    }
+    opj_free(p_t1->cblkdatabuffer);
+
     opj_free(p_t1);
 }
 
 typedef struct {
+    OPJ_BOOL whole_tile_decoding;
     OPJ_UINT32 resno;
     opj_tcd_cblk_dec_t* cblk;
     opj_tcd_band_t* band;
     opj_tcd_tilecomp_t* tilec;
     opj_tccp_t* tccp;
+    OPJ_BOOL mustuse_cblkdatabuffer;
     volatile OPJ_BOOL* pret;
+    opj_event_mgr_t *p_manager;
+    opj_mutex_t* p_manager_mutex;
+    OPJ_BOOL check_pterm;
 } opj_t1_cblk_decode_processing_job_t;
 
 static void opj_t1_destroy_wrapper(void* t1)
@@ -1703,12 +1611,44 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
     OPJ_UINT32 tile_w;
 
     job = (opj_t1_cblk_decode_processing_job_t*) user_data;
-    resno = job->resno;
+
     cblk = job->cblk;
+
+    if (!job->whole_tile_decoding) {
+        cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
+        cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
+
+        cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) *
+                             cblk_w * cblk_h);
+        if (cblk->decoded_data == NULL) {
+            if (job->p_manager_mutex) {
+                opj_mutex_lock(job->p_manager_mutex);
+            }
+            opj_event_msg(job->p_manager, EVT_ERROR,
+                          "Cannot allocate cblk->decoded_data\n");
+            if (job->p_manager_mutex) {
+                opj_mutex_unlock(job->p_manager_mutex);
+            }
+            *(job->pret) = OPJ_FALSE;
+            opj_free(job);
+            return;
+        }
+        /* Zero-init required */
+        memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h);
+    } else if (cblk->decoded_data) {
+        /* Not sure if that code path can happen, but better be */
+        /* safe than sorry */
+        opj_aligned_free(cblk->decoded_data);
+        cblk->decoded_data = NULL;
+    }
+
+    resno = job->resno;
     band = job->band;
     tilec = job->tilec;
     tccp = job->tccp;
-    tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
+    tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1
+                          -
+                          tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
 
     if (!*(job->pret)) {
         opj_free(job);
@@ -1720,13 +1660,17 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
         t1 = opj_t1_create(OPJ_FALSE);
         opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
     }
+    t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
 
     if (OPJ_FALSE == opj_t1_decode_cblk(
                 t1,
                 cblk,
                 band->bandno,
                 (OPJ_UINT32)tccp->roishift,
-                tccp->cblksty)) {
+                tccp->cblksty,
+                job->p_manager,
+                job->p_manager_mutex,
+                job->check_pterm)) {
         *(job->pret) = OPJ_FALSE;
         opj_free(job);
         return;
@@ -1743,26 +1687,74 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
         y += pres->y1 - pres->y0;
     }
 
-    datap = t1->data;
+    datap = cblk->decoded_data ? cblk->decoded_data : t1->data;
     cblk_w = t1->w;
     cblk_h = t1->h;
 
     if (tccp->roishift) {
-        OPJ_INT32 thresh = 1 << tccp->roishift;
-        for (j = 0; j < cblk_h; ++j) {
-            for (i = 0; i < cblk_w; ++i) {
-                OPJ_INT32 val = datap[(j * cblk_w) + i];
-                OPJ_INT32 mag = abs(val);
-                if (mag >= thresh) {
-                    mag >>= tccp->roishift;
-                    datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
+        if (tccp->roishift >= 31) {
+            for (j = 0; j < cblk_h; ++j) {
+                for (i = 0; i < cblk_w; ++i) {
+                    datap[(j * cblk_w) + i] = 0;
+                }
+            }
+        } else {
+            OPJ_INT32 thresh = 1 << tccp->roishift;
+            for (j = 0; j < cblk_h; ++j) {
+                for (i = 0; i < cblk_w; ++i) {
+                    OPJ_INT32 val = datap[(j * cblk_w) + i];
+                    OPJ_INT32 mag = abs(val);
+                    if (mag >= thresh) {
+                        mag >>= tccp->roishift;
+                        datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
+                    }
                 }
             }
         }
     }
-    if (tccp->qmfbid == 1) {
-        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_UINT32)y * tile_w +
-                                                       (OPJ_UINT32)x];
+
+    /* Both can be non NULL if for example decoding a full tile and then */
+    /* partially a tile. In which case partial decoding should be the */
+    /* priority */
+    assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
+
+    if (cblk->decoded_data) {
+        OPJ_UINT32 cblk_size = cblk_w * cblk_h;
+        if (tccp->qmfbid == 1) {
+            for (i = 0; i < cblk_size; ++i) {
+                datap[i] /= 2;
+            }
+        } else {        /* if (tccp->qmfbid == 0) */
+            i = 0;
+#ifdef __SSE2__
+            {
+                const __m128 xmm_stepsize = _mm_set1_ps(band->stepsize);
+                for (; i < (cblk_size & ~15U); i += 16) {
+                    __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                           datap + 0)));
+                    __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                           datap + 4)));
+                    __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                           datap + 8)));
+                    __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
+                                                           datap + 12)));
+                    _mm_store_ps((float*)(datap +  0), _mm_mul_ps(xmm0_data, xmm_stepsize));
+                    _mm_store_ps((float*)(datap +  4), _mm_mul_ps(xmm1_data, xmm_stepsize));
+                    _mm_store_ps((float*)(datap +  8), _mm_mul_ps(xmm2_data, xmm_stepsize));
+                    _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
+                    datap += 16;
+                }
+            }
+#endif
+            for (; i < cblk_size; ++i) {
+                OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize;
+                memcpy(datap, &tmp, sizeof(tmp));
+                datap++;
+            }
+        }
+    } else if (tccp->qmfbid == 1) {
+        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
+                                                       (OPJ_SIZE_T)x];
         for (j = 0; j < cblk_h; ++j) {
             i = 0;
             for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
@@ -1770,19 +1762,19 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
                 OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
                 OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
                 OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 0U] = tmp0 / 2;
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 1U] = tmp1 / 2;
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 2U] = tmp2 / 2;
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i + 3U] = tmp3 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
             }
             for (; i < cblk_w; ++i) {
                 OPJ_INT32 tmp = datap[(j * cblk_w) + i];
-                ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp / 2;
+                ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
             }
         }
     } else {        /* if (tccp->qmfbid == 0) */
-        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y *
-                                                         tile_w + (OPJ_UINT32)x];
+        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
+                                                         tile_w + (OPJ_SIZE_T)x];
         for (j = 0; j < cblk_h; ++j) {
             OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
             for (i = 0; i < cblk_w; ++i) {
@@ -1799,14 +1791,23 @@ static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
 }
 
 
-void opj_t1_decode_cblks(opj_thread_pool_t* tp,
+void opj_t1_decode_cblks(opj_tcd_t* tcd,
                          volatile OPJ_BOOL* pret,
                          opj_tcd_tilecomp_t* tilec,
-                         opj_tccp_t* tccp
+                         opj_tccp_t* tccp,
+                         opj_event_mgr_t *p_manager,
+                         opj_mutex_t* p_manager_mutex,
+                         OPJ_BOOL check_pterm
                         )
 {
+    opj_thread_pool_t* tp = tcd->thread_pool;
     OPJ_UINT32 resno, bandno, precno, cblkno;
 
+#ifdef DEBUG_VERBOSE
+    OPJ_UINT32 codeblocks_decoded = 0;
+    printf("Enter opj_t1_decode_cblks()\n");
+#endif
+
     for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
         opj_tcd_resolution_t* res = &tilec->resolutions[resno];
 
@@ -1816,23 +1817,91 @@ void opj_t1_decode_cblks(opj_thread_pool_t* tp,
             for (precno = 0; precno < res->pw * res->ph; ++precno) {
                 opj_tcd_precinct_t* precinct = &band->precincts[precno];
 
+                if (!opj_tcd_is_subband_area_of_interest(tcd,
+                        tilec->compno,
+                        resno,
+                        band->bandno,
+                        (OPJ_UINT32)precinct->x0,
+                        (OPJ_UINT32)precinct->y0,
+                        (OPJ_UINT32)precinct->x1,
+                        (OPJ_UINT32)precinct->y1)) {
+                    for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
+                        opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
+                        if (cblk->decoded_data) {
+#ifdef DEBUG_VERBOSE
+                            printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
+                                   cblk->x0, cblk->y0, resno, bandno);
+#endif
+                            opj_aligned_free(cblk->decoded_data);
+                            cblk->decoded_data = NULL;
+                        }
+                    }
+                    continue;
+                }
+
                 for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
                     opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
                     opj_t1_cblk_decode_processing_job_t* job;
 
+                    if (!opj_tcd_is_subband_area_of_interest(tcd,
+                            tilec->compno,
+                            resno,
+                            band->bandno,
+                            (OPJ_UINT32)cblk->x0,
+                            (OPJ_UINT32)cblk->y0,
+                            (OPJ_UINT32)cblk->x1,
+                            (OPJ_UINT32)cblk->y1)) {
+                        if (cblk->decoded_data) {
+#ifdef DEBUG_VERBOSE
+                            printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
+                                   cblk->x0, cblk->y0, resno, bandno);
+#endif
+                            opj_aligned_free(cblk->decoded_data);
+                            cblk->decoded_data = NULL;
+                        }
+                        continue;
+                    }
+
+                    if (!tcd->whole_tile_decoding) {
+                        OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
+                        OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
+                        if (cblk->decoded_data != NULL) {
+#ifdef DEBUG_VERBOSE
+                            printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n",
+                                   cblk->x0, cblk->y0, resno, bandno);
+#endif
+                            continue;
+                        }
+                        if (cblk_w == 0 || cblk_h == 0) {
+                            continue;
+                        }
+#ifdef DEBUG_VERBOSE
+                        printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n",
+                               cblk->x0, cblk->y0, resno, bandno);
+#endif
+                    }
+
                     job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1,
                             sizeof(opj_t1_cblk_decode_processing_job_t));
                     if (!job) {
                         *pret = OPJ_FALSE;
                         return;
                     }
+                    job->whole_tile_decoding = tcd->whole_tile_decoding;
                     job->resno = resno;
                     job->cblk = cblk;
                     job->band = band;
                     job->tilec = tilec;
                     job->tccp = tccp;
                     job->pret = pret;
+                    job->p_manager_mutex = p_manager_mutex;
+                    job->p_manager = p_manager;
+                    job->check_pterm = check_pterm;
+                    job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
                     opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
+#ifdef DEBUG_VERBOSE
+                    codeblocks_decoded ++;
+#endif
                     if (!(*pret)) {
                         return;
                     }
@@ -1841,6 +1910,9 @@ void opj_t1_decode_cblks(opj_thread_pool_t* tp,
         } /* bandno */
     } /* resno */
 
+#ifdef DEBUG_VERBOSE
+    printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded);
+#endif
     return;
 }
 
@@ -1849,17 +1921,22 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
                                    opj_tcd_cblk_dec_t* cblk,
                                    OPJ_UINT32 orient,
                                    OPJ_UINT32 roishift,
-                                   OPJ_UINT32 cblksty)
+                                   OPJ_UINT32 cblksty,
+                                   opj_event_mgr_t *p_manager,
+                                   opj_mutex_t* p_manager_mutex,
+                                   OPJ_BOOL check_pterm)
 {
-    opj_raw_t *raw = t1->raw;   /* RAW component */
-    opj_mqc_t *mqc = t1->mqc;   /* MQC component */
+    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
 
     OPJ_INT32 bpno_plus_one;
     OPJ_UINT32 passtype;
     OPJ_UINT32 segno, passno;
+    OPJ_BYTE* cblkdata = NULL;
+    OPJ_UINT32 cblkdataindex = 0;
     OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
+    OPJ_INT32* original_t1_data = NULL;
 
-    mqc->lut_ctxno_zc_orient = lut_ctxno_zc + orient * 256;
+    mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
 
     if (!opj_t1_allocate_buffers(
                 t1,
@@ -1869,6 +1946,18 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
     }
 
     bpno_plus_one = (OPJ_INT32)(roishift + cblk->numbps);
+    if (bpno_plus_one >= 31) {
+        if (p_manager_mutex) {
+            opj_mutex_lock(p_manager_mutex);
+        }
+        opj_event_msg(p_manager, EVT_WARNING,
+                      "opj_t1_decode_cblk(): unsupported bpno_plus_one = %d >= 31\n",
+                      bpno_plus_one);
+        if (p_manager_mutex) {
+            opj_mutex_unlock(p_manager_mutex);
+        }
+        return OPJ_FALSE;
+    }
     passtype = 2;
 
     opj_mqc_resetstates(mqc);
@@ -1876,111 +1965,137 @@ static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
     opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
     opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
 
+    /* Even if we have a single chunk, in multi-threaded decoding */
+    /* the insertion of our synthetic marker might potentially override */
+    /* valid codestream of other codeblocks decoded in parallel. */
+    if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
+        OPJ_UINT32 i;
+        OPJ_UINT32 cblk_len;
+
+        /* Compute whole codeblock length from chunk lengths */
+        cblk_len = 0;
+        for (i = 0; i < cblk->numchunks; i++) {
+            cblk_len += cblk->chunks[i].len;
+        }
+
+        /* Allocate temporary memory if needed */
+        if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
+            cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
+                                              cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
+            if (cblkdata == NULL) {
+                return OPJ_FALSE;
+            }
+            t1->cblkdatabuffer = cblkdata;
+            memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
+            t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
+        }
+
+        /* Concatenate all chunks */
+        cblkdata = t1->cblkdatabuffer;
+        cblk_len = 0;
+        for (i = 0; i < cblk->numchunks; i++) {
+            memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
+            cblk_len += cblk->chunks[i].len;
+        }
+    } else if (cblk->numchunks == 1) {
+        cblkdata = cblk->chunks[0].data;
+    } else {
+        /* Not sure if that can happen in practice, but avoid Coverity to */
+        /* think we will dereference a null cblkdta pointer */
+        return OPJ_TRUE;
+    }
+
+    /* For subtile decoding, directly decode in the decoded_data buffer of */
+    /* the code-block. Hack t1->data to point to it, and restore it later */
+    if (cblk->decoded_data) {
+        original_t1_data = t1->data;
+        t1->data = cblk->decoded_data;
+    }
+
     for (segno = 0; segno < cblk->real_num_segs; ++segno) {
         opj_tcd_seg_t *seg = &cblk->segs[segno];
 
         /* BYPASS mode */
         type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
-        /* FIXME: slviewer gets here with a null pointer. Why? Partially downloaded and/or corrupt textures? */
-        if (seg->data == 00) {
-            continue;
-        }
+
         if (type == T1_TYPE_RAW) {
-            opj_raw_init_dec(raw, (*seg->data) + seg->dataindex, seg->len);
+            opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
+                                 OPJ_COMMON_CBLK_DATA_EXTRA);
         } else {
-            if (OPJ_FALSE == opj_mqc_init_dec(mqc, (*seg->data) + seg->dataindex,
-                                              seg->len)) {
-                return OPJ_FALSE;
-            }
+            opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
+                             OPJ_COMMON_CBLK_DATA_EXTRA);
         }
-
-        if (t1->w == 64 && t1->h == 64) {
-            for (passno = 0; (passno < seg->real_num_passes) &&
-                    (bpno_plus_one >= 1); ++passno) {
-                switch (passtype) {
-                case 0:
-                    if (type == T1_TYPE_RAW) {
-                        opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty);
-                    } else {
-                        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
-                            opj_t1_dec_sigpass_mqc_vsc(t1, bpno_plus_one);
-                        } else {
-                            opj_t1_dec_sigpass_mqc_64x64(t1, bpno_plus_one);
-                        }
-                    }
-                    break;
-                case 1:
-                    if (type == T1_TYPE_RAW) {
-                        opj_t1_dec_refpass_raw(t1, bpno_plus_one);
-                    } else {
-                        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
-                            opj_t1_dec_refpass_mqc_vsc(t1, bpno_plus_one);
-                        } else {
-                            opj_t1_dec_refpass_mqc_64x64(t1, bpno_plus_one);
-                        }
-                    }
-                    break;
-                case 2:
-                    opj_t1_dec_clnpass_64x64(t1, bpno_plus_one, (OPJ_INT32)cblksty);
-                    break;
-                }
-
-                if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) {
-                    opj_mqc_resetstates(mqc);
-                    opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
-                    opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
-                    opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
+        cblkdataindex += seg->len;
+
+        for (passno = 0; (passno < seg->real_num_passes) &&
+                (bpno_plus_one >= 1); ++passno) {
+            switch (passtype) {
+            case 0:
+                if (type == T1_TYPE_RAW) {
+                    opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty);
+                } else {
+                    opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)cblksty);
                 }
-                if (++passtype == 3) {
-                    passtype = 0;
-                    bpno_plus_one--;
+                break;
+            case 1:
+                if (type == T1_TYPE_RAW) {
+                    opj_t1_dec_refpass_raw(t1, bpno_plus_one);
+                } else {
+                    opj_t1_dec_refpass_mqc(t1, bpno_plus_one);
                 }
+                break;
+            case 2:
+                opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)cblksty);
+                break;
             }
-        } else {
-            for (passno = 0; (passno < seg->real_num_passes) &&
-                    (bpno_plus_one >= 1); ++passno) {
-                switch (passtype) {
-                case 0:
-                    if (type == T1_TYPE_RAW) {
-                        opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty);
-                    } else {
-                        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
-                            opj_t1_dec_sigpass_mqc_vsc(t1, bpno_plus_one);
-                        } else {
-                            opj_t1_dec_sigpass_mqc_generic(t1, bpno_plus_one);
-                        }
-                    }
-                    break;
-                case 1:
-                    if (type == T1_TYPE_RAW) {
-                        opj_t1_dec_refpass_raw(t1, bpno_plus_one);
-                    } else {
-                        if (cblksty & J2K_CCP_CBLKSTY_VSC) {
-                            opj_t1_dec_refpass_mqc_vsc(t1, bpno_plus_one);
-                        } else {
-                            opj_t1_dec_refpass_mqc_generic(t1, bpno_plus_one);
-                        }
-                    }
-                    break;
-                case 2:
-                    opj_t1_dec_clnpass_generic(t1, bpno_plus_one, (OPJ_INT32)cblksty);
-                    break;
-                }
 
-                if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) {
-                    opj_mqc_resetstates(mqc);
-                    opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
-                    opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
-                    opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
-                }
-                if (++passtype == 3) {
-                    passtype = 0;
-                    bpno_plus_one--;
-                }
+            if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) {
+                opj_mqc_resetstates(mqc);
+                opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
+                opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
+                opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
+            }
+            if (++passtype == 3) {
+                passtype = 0;
+                bpno_plus_one--;
+            }
+        }
+
+        opq_mqc_finish_dec(mqc);
+    }
+
+    if (check_pterm) {
+        if (mqc->bp + 2 < mqc->end) {
+            if (p_manager_mutex) {
+                opj_mutex_lock(p_manager_mutex);
+            }
+            opj_event_msg(p_manager, EVT_WARNING,
+                          "PTERM check failure: %d remaining bytes in code block (%d used / %d)\n",
+                          (int)(mqc->end - mqc->bp) - 2,
+                          (int)(mqc->bp - mqc->start),
+                          (int)(mqc->end - mqc->start));
+            if (p_manager_mutex) {
+                opj_mutex_unlock(p_manager_mutex);
+            }
+        } else if (mqc->end_of_byte_stream_counter > 2) {
+            if (p_manager_mutex) {
+                opj_mutex_lock(p_manager_mutex);
+            }
+            opj_event_msg(p_manager, EVT_WARNING,
+                          "PTERM check failure: %d synthetized 0xFF markers read\n",
+                          mqc->end_of_byte_stream_counter);
+            if (p_manager_mutex) {
+                opj_mutex_unlock(p_manager_mutex);
             }
         }
     }
+
+    /* Restore original t1->data is needed */
+    if (cblk->decoded_data) {
+        t1->data = original_t1_data;
+    }
+
     return OPJ_TRUE;
 }
 
@@ -2008,8 +2123,14 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
 
             for (bandno = 0; bandno < res->numbands; ++bandno) {
                 opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
-                OPJ_INT32 bandconst = 8192 * 8192 / ((OPJ_INT32) floor(band->stepsize * 8192));
+                OPJ_INT32 bandconst;
+
+                /* Skip empty bands */
+                if (opj_tcd_is_band_empty(band)) {
+                    continue;
+                }
 
+                bandconst = 8192 * 8192 / ((OPJ_INT32) floor(band->stepsize * 8192));
                 for (precno = 0; precno < res->pw * res->ph; ++precno) {
                     opj_tcd_precinct_t *prc = &band->precincts[precno];
 
@@ -2018,7 +2139,8 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
                         OPJ_INT32* OPJ_RESTRICT tiledp;
                         OPJ_UINT32 cblk_w;
                         OPJ_UINT32 cblk_h;
-                        OPJ_UINT32 i, j, tileIndex = 0, tileLineAdvance;
+                        OPJ_UINT32 i, j, tileLineAdvance;
+                        OPJ_SIZE_T tileIndex = 0;
 
                         OPJ_INT32 x = cblk->x0 - band->x0;
                         OPJ_INT32 y = cblk->y0 - band->y0;
@@ -2042,13 +2164,22 @@ OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
                         cblk_h = t1->h;
                         tileLineAdvance = tile_w - cblk_w;
 
-                        tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x];
+                        tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
                         t1->data = tiledp;
                         t1->data_stride = tile_w;
                         if (tccp->qmfbid == 1) {
+                            /* Do multiplication on unsigned type, even if the
+                             * underlying type is signed, to avoid potential
+                             * int overflow on large value (the output will be
+                             * incorrect in such situation, but whatever...)
+                             * This assumes complement-to-2 signed integer
+                             * representation
+                             * Fixes https://github.com/uclouvain/openjpeg/issues/1053
+                             */
+                            OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
                             for (j = 0; j < cblk_h; ++j) {
                                 for (i = 0; i < cblk_w; ++i) {
-                                    tiledp[tileIndex] *= (1 << T1_NMSEDEC_FRACBITS);
+                                    tiledp_u[tileIndex] <<= T1_NMSEDEC_FRACBITS;
                                     tileIndex++;
                                 }
                                 tileIndex += tileLineAdvance;
@@ -2136,7 +2267,7 @@ static void opj_t1_encode_cblk(opj_t1_t *t1,
 {
     OPJ_FLOAT64 cumwmsedec = 0.0;
 
-    opj_mqc_t *mqc = t1->mqc;   /* MQC component */
+    opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
 
     OPJ_UINT32 passno;
     OPJ_INT32 bpno;
@@ -2152,7 +2283,7 @@ static void opj_t1_encode_cblk(opj_t1_t *t1,
            cblk->x0, cblk->y0, cblk->x1, cblk->y1, orient, compno, level);
 #endif
 
-    mqc->lut_ctxno_zc_orient = lut_ctxno_zc + orient * 256;
+    mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
 
     max = 0;
     for (i = 0; i < t1->w; ++i) {
@@ -2164,6 +2295,10 @@ static void opj_t1_encode_cblk(opj_t1_t *t1,
 
     cblk->numbps = max ? (OPJ_UINT32)((opj_int_floorlog2(max) + 1) -
                                       T1_NMSEDEC_FRACBITS) : 0;
+    if (cblk->numbps == 0) {
+        cblk->totalpasses = 0;
+        return;
+    }
 
     bpno = (OPJ_INT32)(cblk->numbps - 1);
     passtype = 2;
@@ -2193,7 +2328,7 @@ static void opj_t1_encode_cblk(opj_t1_t *t1,
             opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty);
             break;
         case 1:
-            opj_t1_enc_refpass(t1, bpno, &nmsedec, type, cblksty);
+            opj_t1_enc_refpass(t1, bpno, &nmsedec, type);
             break;
         case 2:
             opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty);
@@ -2269,7 +2404,7 @@ static void opj_t1_encode_cblk(opj_t1_t *t1,
         /* Prevent generation of FF as last data byte of a pass*/
         /* For terminating passes, the flushing procedure ensured this already */
         assert(pass->rate > 0);
-        if ((cblk->data[pass->rate - 1] == 0xFF)) {
+        if (cblk->data[pass->rate - 1] == 0xFF) {
             pass->rate--;
         }
         pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate);
@@ -2291,73 +2426,3 @@ static void opj_t1_encode_cblk(opj_t1_t *t1,
     }
 #endif
 }
-
-#if 0
-static void opj_t1_dec_refpass_step(opj_t1_t *t1,
-                                    opj_flag_t *flagsp,
-                                    OPJ_INT32 *datap,
-                                    OPJ_INT32 poshalf,
-                                    OPJ_INT32 neghalf,
-                                    OPJ_BYTE type,
-                                    OPJ_UINT32 vsc)
-{
-    OPJ_INT32  t;
-    OPJ_UINT32 v, flag;
-
-    opj_mqc_t *mqc = t1->mqc;   /* MQC component */
-    opj_raw_t *raw = t1->raw;   /* RAW component */
-
-    flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) :
-           (*flagsp);
-    if ((flag & (T1_SIG | T1_VISIT)) == T1_SIG) {
-        opj_mqc_setcurctx(mqc, opj_t1_getctxno_mag(flag));  /* ESSAI */
-        if (type == T1_TYPE_RAW) {
-            v = opj_raw_decode(raw);
-        } else {
-            v = opj_mqc_decode(mqc);
-        }
-        t = v ? poshalf : neghalf;
-        *datap += *datap < 0 ? -t : t;
-        *flagsp |= T1_REFINE;
-    }
-}               /* VSC and  BYPASS by Antonin  */
-#endif
-
-
-
-#if 0
-static void opj_t1_dec_sigpass_step(opj_t1_t *t1,
-                                    opj_flag_t *flagsp,
-                                    OPJ_INT32 *datap,
-                                    OPJ_UINT32 orient,
-                                    OPJ_INT32 oneplushalf,
-                                    OPJ_BYTE type,
-                                    OPJ_UINT32 vsc)
-{
-    OPJ_UINT32 v, flag;
-
-    opj_raw_t *raw = t1->raw;   /* RAW component */
-    opj_mqc_t *mqc = t1->mqc;   /* MQC component */
-
-    flag = vsc ? ((*flagsp) & (~(T1_SIG_S | T1_SIG_SE | T1_SIG_SW | T1_SGN_S))) :
-           (*flagsp);
-    if ((flag & T1_SIG_OTH) && !(flag & (T1_SIG | T1_VISIT))) {
-        if (type == T1_TYPE_RAW) {
-            if (opj_raw_decode(raw)) {
-                v = opj_raw_decode(raw);    /* ESSAI */
-                *datap = v ? -oneplushalf : oneplushalf;
-                opj_t1_updateflags(flagsp, v, t1->flags_stride);
-            }
-        } else {
-            opj_mqc_setcurctx(mqc, opj_t1_getctxno_zc(flag, orient));
-            if (opj_mqc_decode(mqc)) {
-                opj_mqc_setcurctx(mqc, opj_t1_getctxno_sc(flag));
-                v = opj_mqc_decode(mqc) ^ opj_t1_getspb(flag);
-                *datap = v ? -oneplushalf : oneplushalf;
-                opj_t1_updateflags(flagsp, v, t1->flags_stride);
-            }
-        }
-        *flagsp |= T1_VISIT;
-    }
-}               /* VSC and  BYPASS by Antonin */
-#endif