opj_t1_encode_cblks: fix UBSAN signed integer overflow
[openjpeg.git] / src / lib / openjp2 / t1.c
1 /*
2  * The copyright in this software is being made available under the 2-clauses
3  * BSD License, included below. This software may be subject to other third
4  * party and contributor rights, including patent rights, and no such rights
5  * are granted under this license.
6  *
7  * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8  * Copyright (c) 2002-2014, Professor Benoit Macq
9  * Copyright (c) 2001-2003, David Janssens
10  * Copyright (c) 2002-2003, Yannick Verschueren
11  * Copyright (c) 2003-2007, Francois-Olivier Devaux
12  * Copyright (c) 2003-2014, Antonin Descampe
13  * Copyright (c) 2005, Herve Drolon, FreeImage Team
14  * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
15  * Copyright (c) 2012, Carl Hetherington
16  * Copyright (c) 2017, IntoPIX SA <support@intopix.com>
17  * All rights reserved.
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions
21  * are met:
22  * 1. Redistributions of source code must retain the above copyright
23  *    notice, this list of conditions and the following disclaimer.
24  * 2. Redistributions in binary form must reproduce the above copyright
25  *    notice, this list of conditions and the following disclaimer in the
26  *    documentation and/or other materials provided with the distribution.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
29  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38  * POSSIBILITY OF SUCH DAMAGE.
39  */
40
41 #define OPJ_SKIP_POISON
42 #include "opj_includes.h"
43
44 #ifdef __SSE__
45 #include <xmmintrin.h>
46 #endif
47 #ifdef __SSE2__
48 #include <emmintrin.h>
49 #endif
50
51 #if defined(__GNUC__)
52 #pragma GCC poison malloc calloc realloc free
53 #endif
54
55 #include "t1_luts.h"
56
57 /** @defgroup T1 T1 - Implementation of the tier-1 coding */
58 /*@{*/
59
60 #define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)])
61
62 #define opj_t1_setcurctx(curctx, ctxno)  curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
63
64 /** @name Local static functions */
65 /*@{*/
66
67 static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f);
68 static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f);
69 static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos);
70 static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos);
71 static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
72                                        OPJ_UINT32 s, OPJ_UINT32 stride,
73                                        OPJ_UINT32 vsc);
74
75
76 /**
77 Decode significant pass
78 */
79
80 static INLINE void opj_t1_dec_sigpass_step_raw(
81     opj_t1_t *t1,
82     opj_flag_t *flagsp,
83     OPJ_INT32 *datap,
84     OPJ_INT32 oneplushalf,
85     OPJ_UINT32 vsc,
86     OPJ_UINT32 row);
87 static INLINE void opj_t1_dec_sigpass_step_mqc(
88     opj_t1_t *t1,
89     opj_flag_t *flagsp,
90     OPJ_INT32 *datap,
91     OPJ_INT32 oneplushalf,
92     OPJ_UINT32 row,
93     OPJ_UINT32 flags_stride,
94     OPJ_UINT32 vsc);
95
96 /**
97 Encode significant pass
98 */
99 static void opj_t1_enc_sigpass(opj_t1_t *t1,
100                                OPJ_INT32 bpno,
101                                OPJ_INT32 *nmsedec,
102                                OPJ_BYTE type,
103                                OPJ_UINT32 cblksty);
104
105 /**
106 Decode significant pass
107 */
108 static void opj_t1_dec_sigpass_raw(
109     opj_t1_t *t1,
110     OPJ_INT32 bpno,
111     OPJ_INT32 cblksty);
112
113 /**
114 Encode refinement pass
115 */
116 static void opj_t1_enc_refpass(opj_t1_t *t1,
117                                OPJ_INT32 bpno,
118                                OPJ_INT32 *nmsedec,
119                                OPJ_BYTE type);
120
121 /**
122 Decode refinement pass
123 */
124 static void opj_t1_dec_refpass_raw(
125     opj_t1_t *t1,
126     OPJ_INT32 bpno);
127
128
129 /**
130 Decode refinement pass
131 */
132
133 static INLINE void  opj_t1_dec_refpass_step_raw(
134     opj_t1_t *t1,
135     opj_flag_t *flagsp,
136     OPJ_INT32 *datap,
137     OPJ_INT32 poshalf,
138     OPJ_UINT32 row);
139 static INLINE void opj_t1_dec_refpass_step_mqc(
140     opj_t1_t *t1,
141     opj_flag_t *flagsp,
142     OPJ_INT32 *datap,
143     OPJ_INT32 poshalf,
144     OPJ_UINT32 row);
145
146
147 /**
148 Decode clean-up pass
149 */
150
151 static void opj_t1_dec_clnpass_step(
152     opj_t1_t *t1,
153     opj_flag_t *flagsp,
154     OPJ_INT32 *datap,
155     OPJ_INT32 oneplushalf,
156     OPJ_UINT32 row,
157     OPJ_UINT32 vsc);
158
159 /**
160 Encode clean-up pass
161 */
162 static void opj_t1_enc_clnpass(
163     opj_t1_t *t1,
164     OPJ_INT32 bpno,
165     OPJ_INT32 *nmsedec,
166     OPJ_UINT32 cblksty);
167
168 static OPJ_FLOAT64 opj_t1_getwmsedec(
169     OPJ_INT32 nmsedec,
170     OPJ_UINT32 compno,
171     OPJ_UINT32 level,
172     OPJ_UINT32 orient,
173     OPJ_INT32 bpno,
174     OPJ_UINT32 qmfbid,
175     OPJ_FLOAT64 stepsize,
176     OPJ_UINT32 numcomps,
177     const OPJ_FLOAT64 * mct_norms,
178     OPJ_UINT32 mct_numcomps);
179
180 static void opj_t1_encode_cblk(opj_t1_t *t1,
181                                opj_tcd_cblk_enc_t* cblk,
182                                OPJ_UINT32 orient,
183                                OPJ_UINT32 compno,
184                                OPJ_UINT32 level,
185                                OPJ_UINT32 qmfbid,
186                                OPJ_FLOAT64 stepsize,
187                                OPJ_UINT32 cblksty,
188                                OPJ_UINT32 numcomps,
189                                opj_tcd_tile_t * tile,
190                                const OPJ_FLOAT64 * mct_norms,
191                                OPJ_UINT32 mct_numcomps);
192
193 /**
194 Decode 1 code-block
195 @param t1 T1 handle
196 @param cblk Code-block coding parameters
197 @param orient
198 @param roishift Region of interest shifting value
199 @param cblksty Code-block style
200 @param p_manager the event manager
201 @param p_manager_mutex mutex for the event manager
202 @param check_pterm whether PTERM correct termination should be checked
203 */
204 static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
205                                    opj_tcd_cblk_dec_t* cblk,
206                                    OPJ_UINT32 orient,
207                                    OPJ_UINT32 roishift,
208                                    OPJ_UINT32 cblksty,
209                                    opj_event_mgr_t *p_manager,
210                                    opj_mutex_t* p_manager_mutex,
211                                    OPJ_BOOL check_pterm);
212
213 static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
214                                         OPJ_UINT32 w,
215                                         OPJ_UINT32 h);
216
217 /*@}*/
218
219 /*@}*/
220
221 /* ----------------------------------------------------------------------- */
222
223 static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f)
224 {
225     return mqc->lut_ctxno_zc_orient[(f & T1_SIGMA_NEIGHBOURS)];
226 }
227
228 static INLINE OPJ_UINT32 opj_t1_getctxtno_sc_or_spb_index(OPJ_UINT32 fX,
229         OPJ_UINT32 pfX,
230         OPJ_UINT32 nfX,
231         OPJ_UINT32 ci)
232 {
233     /*
234       0 pfX T1_CHI_THIS           T1_LUT_SGN_W
235       1 tfX T1_SIGMA_1            T1_LUT_SIG_N
236       2 nfX T1_CHI_THIS           T1_LUT_SGN_E
237       3 tfX T1_SIGMA_3            T1_LUT_SIG_W
238       4  fX T1_CHI_(THIS - 1)     T1_LUT_SGN_N
239       5 tfX T1_SIGMA_5            T1_LUT_SIG_E
240       6  fX T1_CHI_(THIS + 1)     T1_LUT_SGN_S
241       7 tfX T1_SIGMA_7            T1_LUT_SIG_S
242     */
243
244     OPJ_UINT32 lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 |
245                                          T1_SIGMA_7);
246
247     lu |= (pfX >> (T1_CHI_THIS_I      + (ci * 3U))) & (1U << 0);
248     lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2);
249     if (ci == 0U) {
250         lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4);
251     } else {
252         lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4);
253     }
254     lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6);
255     return lu;
256 }
257
258 static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 lu)
259 {
260     return lut_ctxno_sc[lu];
261 }
262
263 static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f)
264 {
265     OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
266     OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp;
267     return tmp2;
268 }
269
270 static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 lu)
271 {
272     return lut_spb[lu];
273 }
274
275 static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos)
276 {
277     if (bitpos > 0) {
278         return lut_nmsedec_sig[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
279     }
280
281     return lut_nmsedec_sig0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
282 }
283
284 static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos)
285 {
286     if (bitpos > 0) {
287         return lut_nmsedec_ref[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
288     }
289
290     return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
291 }
292
293 #define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride, vsc) \
294 { \
295     /* east */ \
296     flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \
297  \
298     /* mark target as significant */ \
299     flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \
300  \
301     /* west */ \
302     flagsp[1] |= T1_SIGMA_3 << (3U * ci); \
303  \
304     /* north-west, north, north-east */ \
305     if (ci == 0U && !(vsc)) { \
306         opj_flag_t* north = flagsp - (stride); \
307         *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \
308         north[-1] |= T1_SIGMA_17; \
309         north[1] |= T1_SIGMA_15; \
310     } \
311  \
312     /* south-west, south, south-east */ \
313     if (ci == 3U) { \
314         opj_flag_t* south = flagsp + (stride); \
315         *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \
316         south[-1] |= T1_SIGMA_2; \
317         south[1] |= T1_SIGMA_0; \
318     } \
319 }
320
321
322 static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
323                                        OPJ_UINT32 s, OPJ_UINT32 stride,
324                                        OPJ_UINT32 vsc)
325 {
326     opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride, vsc);
327 }
328
329 /**
330 Encode significant pass
331 */
332 static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1,
333         opj_flag_t *flagsp,
334         OPJ_INT32 *datap,
335         OPJ_INT32 bpno,
336         OPJ_INT32 one,
337         OPJ_INT32 *nmsedec,
338         OPJ_BYTE type,
339         OPJ_UINT32 ci,
340         OPJ_UINT32 vsc)
341 {
342     OPJ_UINT32 v;
343
344     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
345
346     OPJ_UINT32 const flags = *flagsp;
347
348     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
349             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
350         OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
351         v = (opj_int_abs(*datap) & one) ? 1 : 0;
352 #ifdef DEBUG_ENC_SIG
353         fprintf(stderr, "   ctxt1=%d\n", ctxt1);
354 #endif
355         opj_mqc_setcurctx(mqc, ctxt1);
356         if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
357             opj_mqc_bypass_enc(mqc, v);
358         } else {
359             opj_mqc_encode(mqc, v);
360         }
361         if (v) {
362             OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index(
363                                 *flagsp,
364                                 flagsp[-1], flagsp[1],
365                                 ci);
366             OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu);
367             v = *datap < 0 ? 1U : 0U;
368             *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
369                                               (OPJ_UINT32)bpno);
370 #ifdef DEBUG_ENC_SIG
371             fprintf(stderr, "   ctxt2=%d\n", ctxt2);
372 #endif
373             opj_mqc_setcurctx(mqc, ctxt2);
374             if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
375                 opj_mqc_bypass_enc(mqc, v);
376             } else {
377                 OPJ_UINT32 spb = opj_t1_getspb(lu);
378 #ifdef DEBUG_ENC_SIG
379                 fprintf(stderr, "   spb=%d\n", spb);
380 #endif
381                 opj_mqc_encode(mqc, v ^ spb);
382             }
383             opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
384         }
385         *flagsp |= T1_PI_THIS << (ci * 3U);
386     }
387 }
388
389 static INLINE void opj_t1_dec_sigpass_step_raw(
390     opj_t1_t *t1,
391     opj_flag_t *flagsp,
392     OPJ_INT32 *datap,
393     OPJ_INT32 oneplushalf,
394     OPJ_UINT32 vsc,
395     OPJ_UINT32 ci)
396 {
397     OPJ_UINT32 v;
398     opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
399
400     OPJ_UINT32 const flags = *flagsp;
401
402     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
403             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
404         if (opj_mqc_raw_decode(mqc)) {
405             v = opj_mqc_raw_decode(mqc);
406             *datap = v ? -oneplushalf : oneplushalf;
407             opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
408         }
409         *flagsp |= T1_PI_THIS << (ci * 3U);
410     }
411 }
412
413 #define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \
414                                           data_stride, ci, mqc, curctx, \
415                                           v, a, c, ct, oneplushalf, vsc) \
416 { \
417     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
418         (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
419         OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
420         opj_t1_setcurctx(curctx, ctxt1); \
421         opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
422         if (v) { \
423             OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
424                                 flags, \
425                                 flagsp[-1], flagsp[1], \
426                                 ci); \
427             OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
428             OPJ_UINT32 spb = opj_t1_getspb(lu); \
429             opj_t1_setcurctx(curctx, ctxt2); \
430             opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
431             v = v ^ spb; \
432             data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
433             opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
434         } \
435         flags |= T1_PI_THIS << (ci * 3U); \
436     } \
437 }
438
439 static INLINE void opj_t1_dec_sigpass_step_mqc(
440     opj_t1_t *t1,
441     opj_flag_t *flagsp,
442     OPJ_INT32 *datap,
443     OPJ_INT32 oneplushalf,
444     OPJ_UINT32 ci,
445     OPJ_UINT32 flags_stride,
446     OPJ_UINT32 vsc)
447 {
448     OPJ_UINT32 v;
449
450     opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
451     opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap,
452                                       0, ci, mqc, mqc->curctx,
453                                       v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
454 }
455
456 static void opj_t1_enc_sigpass(opj_t1_t *t1,
457                                OPJ_INT32 bpno,
458                                OPJ_INT32 *nmsedec,
459                                OPJ_BYTE type,
460                                OPJ_UINT32 cblksty
461                               )
462 {
463     OPJ_UINT32 i, k;
464     OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
465     opj_flag_t* f = &T1_FLAGS(0, 0);
466     OPJ_UINT32 const extra = 2;
467
468     *nmsedec = 0;
469 #ifdef DEBUG_ENC_SIG
470     fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
471 #endif
472     for (k = 0; k < (t1->h & ~3U); k += 4) {
473 #ifdef DEBUG_ENC_SIG
474         fprintf(stderr, " k=%d\n", k);
475 #endif
476         for (i = 0; i < t1->w; ++i) {
477 #ifdef DEBUG_ENC_SIG
478             fprintf(stderr, " i=%d\n", i);
479 #endif
480             if (*f == 0U) {
481                 /* Nothing to do for any of the 4 data points */
482                 f++;
483                 continue;
484             }
485             opj_t1_enc_sigpass_step(
486                 t1,
487                 f,
488                 &t1->data[((k + 0) * t1->data_stride) + i],
489                 bpno,
490                 one,
491                 nmsedec,
492                 type,
493                 0, cblksty & J2K_CCP_CBLKSTY_VSC);
494             opj_t1_enc_sigpass_step(
495                 t1,
496                 f,
497                 &t1->data[((k + 1) * t1->data_stride) + i],
498                 bpno,
499                 one,
500                 nmsedec,
501                 type,
502                 1, 0);
503             opj_t1_enc_sigpass_step(
504                 t1,
505                 f,
506                 &t1->data[((k + 2) * t1->data_stride) + i],
507                 bpno,
508                 one,
509                 nmsedec,
510                 type,
511                 2, 0);
512             opj_t1_enc_sigpass_step(
513                 t1,
514                 f,
515                 &t1->data[((k + 3) * t1->data_stride) + i],
516                 bpno,
517                 one,
518                 nmsedec,
519                 type,
520                 3, 0);
521             ++f;
522         }
523         f += extra;
524     }
525
526     if (k < t1->h) {
527         OPJ_UINT32 j;
528 #ifdef DEBUG_ENC_SIG
529         fprintf(stderr, " k=%d\n", k);
530 #endif
531         for (i = 0; i < t1->w; ++i) {
532 #ifdef DEBUG_ENC_SIG
533             fprintf(stderr, " i=%d\n", i);
534 #endif
535             if (*f == 0U) {
536                 /* Nothing to do for any of the 4 data points */
537                 f++;
538                 continue;
539             }
540             for (j = k; j < t1->h; ++j) {
541                 opj_t1_enc_sigpass_step(
542                     t1,
543                     f,
544                     &t1->data[(j * t1->data_stride) + i],
545                     bpno,
546                     one,
547                     nmsedec,
548                     type,
549                     j - k,
550                     (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
551             }
552             ++f;
553         }
554     }
555 }
556
557 static void opj_t1_dec_sigpass_raw(
558     opj_t1_t *t1,
559     OPJ_INT32 bpno,
560     OPJ_INT32 cblksty)
561 {
562     OPJ_INT32 one, half, oneplushalf;
563     OPJ_UINT32 i, j, k;
564     OPJ_INT32 *data = t1->data;
565     opj_flag_t *flagsp = &T1_FLAGS(0, 0);
566     const OPJ_UINT32 l_w = t1->w;
567     one = 1 << bpno;
568     half = one >> 1;
569     oneplushalf = one | half;
570
571     for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
572         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
573             opj_flag_t flags = *flagsp;
574             if (flags != 0) {
575                 opj_t1_dec_sigpass_step_raw(
576                     t1,
577                     flagsp,
578                     data,
579                     oneplushalf,
580                     cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
581                     0U);
582                 opj_t1_dec_sigpass_step_raw(
583                     t1,
584                     flagsp,
585                     data + l_w,
586                     oneplushalf,
587                     OPJ_FALSE, /* vsc */
588                     1U);
589                 opj_t1_dec_sigpass_step_raw(
590                     t1,
591                     flagsp,
592                     data + 2 * l_w,
593                     oneplushalf,
594                     OPJ_FALSE, /* vsc */
595                     2U);
596                 opj_t1_dec_sigpass_step_raw(
597                     t1,
598                     flagsp,
599                     data + 3 * l_w,
600                     oneplushalf,
601                     OPJ_FALSE, /* vsc */
602                     3U);
603             }
604         }
605     }
606     if (k < t1->h) {
607         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
608             for (j = 0; j < t1->h - k; ++j) {
609                 opj_t1_dec_sigpass_step_raw(
610                     t1,
611                     flagsp,
612                     data + j * l_w,
613                     oneplushalf,
614                     cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
615                     j);
616             }
617         }
618     }
619 }
620
621 #define opj_t1_dec_sigpass_mqc_internal(t1, bpno, vsc, w, h, flags_stride) \
622 { \
623         OPJ_INT32 one, half, oneplushalf; \
624         OPJ_UINT32 i, j, k; \
625         register OPJ_INT32 *data = t1->data; \
626         register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
627         const OPJ_UINT32 l_w = w; \
628         opj_mqc_t* mqc = &(t1->mqc); \
629         DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
630         register OPJ_UINT32 v; \
631         one = 1 << bpno; \
632         half = one >> 1; \
633         oneplushalf = one | half; \
634         for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
635                 for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
636                         opj_flag_t flags = *flagsp; \
637                         if( flags != 0 ) { \
638                             opj_t1_dec_sigpass_step_mqc_macro( \
639                                 flags, flagsp, flags_stride, data, \
640                                 l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf, vsc); \
641                             opj_t1_dec_sigpass_step_mqc_macro( \
642                                 flags, flagsp, flags_stride, data, \
643                                 l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
644                             opj_t1_dec_sigpass_step_mqc_macro( \
645                                 flags, flagsp, flags_stride, data, \
646                                 l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
647                             opj_t1_dec_sigpass_step_mqc_macro( \
648                                 flags, flagsp, flags_stride, data, \
649                                 l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
650                             *flagsp = flags; \
651                         } \
652                 } \
653         } \
654         UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
655         if( k < h ) { \
656             for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
657                 for (j = 0; j < h - k; ++j) { \
658                         opj_t1_dec_sigpass_step_mqc(t1, flagsp, \
659                             data + j * l_w, oneplushalf, j, flags_stride, vsc); \
660                 } \
661             } \
662         } \
663 }
664
665 static void opj_t1_dec_sigpass_mqc_64x64_novsc(
666     opj_t1_t *t1,
667     OPJ_INT32 bpno)
668 {
669     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
670 }
671
672 static void opj_t1_dec_sigpass_mqc_64x64_vsc(
673     opj_t1_t *t1,
674     OPJ_INT32 bpno)
675 {
676     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
677 }
678
679 static void opj_t1_dec_sigpass_mqc_generic_novsc(
680     opj_t1_t *t1,
681     OPJ_INT32 bpno)
682 {
683     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
684                                     t1->w + 2U);
685 }
686
687 static void opj_t1_dec_sigpass_mqc_generic_vsc(
688     opj_t1_t *t1,
689     OPJ_INT32 bpno)
690 {
691     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
692                                     t1->w + 2U);
693 }
694
695 static void opj_t1_dec_sigpass_mqc(
696     opj_t1_t *t1,
697     OPJ_INT32 bpno,
698     OPJ_INT32 cblksty)
699 {
700     if (t1->w == 64 && t1->h == 64) {
701         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
702             opj_t1_dec_sigpass_mqc_64x64_vsc(t1, bpno);
703         } else {
704             opj_t1_dec_sigpass_mqc_64x64_novsc(t1, bpno);
705         }
706     } else {
707         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
708             opj_t1_dec_sigpass_mqc_generic_vsc(t1, bpno);
709         } else {
710             opj_t1_dec_sigpass_mqc_generic_novsc(t1, bpno);
711         }
712     }
713 }
714
715 /**
716 Encode refinement pass step
717 */
718 static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1,
719         opj_flag_t *flagsp,
720         OPJ_INT32 *datap,
721         OPJ_INT32 bpno,
722         OPJ_INT32 one,
723         OPJ_INT32 *nmsedec,
724         OPJ_BYTE type,
725         OPJ_UINT32 ci)
726 {
727     OPJ_UINT32 v;
728
729     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
730
731     OPJ_UINT32 const shift_flags =
732         (*flagsp >> (ci * 3U));
733
734     if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) {
735         OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags);
736         *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap),
737                                           (OPJ_UINT32)bpno);
738         v = (opj_int_abs(*datap) & one) ? 1 : 0;
739 #ifdef DEBUG_ENC_REF
740         fprintf(stderr, "  ctxt=%d\n", ctxt);
741 #endif
742         opj_mqc_setcurctx(mqc, ctxt);
743         if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
744             opj_mqc_bypass_enc(mqc, v);
745         } else {
746             opj_mqc_encode(mqc, v);
747         }
748         *flagsp |= T1_MU_THIS << (ci * 3U);
749     }
750 }
751
752
753 static INLINE void opj_t1_dec_refpass_step_raw(
754     opj_t1_t *t1,
755     opj_flag_t *flagsp,
756     OPJ_INT32 *datap,
757     OPJ_INT32 poshalf,
758     OPJ_UINT32 ci)
759 {
760     OPJ_UINT32 v;
761
762     opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
763
764     if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
765             (T1_SIGMA_THIS << (ci * 3U))) {
766         v = opj_mqc_raw_decode(mqc);
767         *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
768         *flagsp |= T1_MU_THIS << (ci * 3U);
769     }
770 }
771
772 #define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \
773                                           mqc, curctx, v, a, c, ct, poshalf) \
774 { \
775     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \
776             (T1_SIGMA_THIS << (ci * 3U))) { \
777         OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \
778         opj_t1_setcurctx(curctx, ctxt); \
779         opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
780         data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \
781         flags |= T1_MU_THIS << (ci * 3U); \
782     } \
783 }
784
785 static INLINE void opj_t1_dec_refpass_step_mqc(
786     opj_t1_t *t1,
787     opj_flag_t *flagsp,
788     OPJ_INT32 *datap,
789     OPJ_INT32 poshalf,
790     OPJ_UINT32 ci)
791 {
792     OPJ_UINT32 v;
793
794     opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
795     opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci,
796                                       mqc, mqc->curctx, v, mqc->a, mqc->c,
797                                       mqc->ct, poshalf);
798 }
799
800 static void opj_t1_enc_refpass(
801     opj_t1_t *t1,
802     OPJ_INT32 bpno,
803     OPJ_INT32 *nmsedec,
804     OPJ_BYTE type)
805 {
806     OPJ_UINT32 i, k;
807     const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
808     opj_flag_t* f = &T1_FLAGS(0, 0);
809     const OPJ_UINT32 extra = 2U;
810
811     *nmsedec = 0;
812 #ifdef DEBUG_ENC_REF
813     fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
814 #endif
815     for (k = 0; k < (t1->h & ~3U); k += 4) {
816 #ifdef DEBUG_ENC_REF
817         fprintf(stderr, " k=%d\n", k);
818 #endif
819         for (i = 0; i < t1->w; ++i) {
820 #ifdef DEBUG_ENC_REF
821             fprintf(stderr, " i=%d\n", i);
822 #endif
823             if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
824                 /* none significant */
825                 f++;
826                 continue;
827             }
828             if ((*f & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
829                     (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
830                 /* all processed by sigpass */
831                 f++;
832                 continue;
833             }
834
835             opj_t1_enc_refpass_step(
836                 t1,
837                 f,
838                 &t1->data[((k + 0) * t1->data_stride) + i],
839                 bpno,
840                 one,
841                 nmsedec,
842                 type,
843                 0);
844             opj_t1_enc_refpass_step(
845                 t1,
846                 f,
847                 &t1->data[((k + 1) * t1->data_stride) + i],
848                 bpno,
849                 one,
850                 nmsedec,
851                 type,
852                 1);
853             opj_t1_enc_refpass_step(
854                 t1,
855                 f,
856                 &t1->data[((k + 2) * t1->data_stride) + i],
857                 bpno,
858                 one,
859                 nmsedec,
860                 type,
861                 2);
862             opj_t1_enc_refpass_step(
863                 t1,
864                 f,
865                 &t1->data[((k + 3) * t1->data_stride) + i],
866                 bpno,
867                 one,
868                 nmsedec,
869                 type,
870                 3);
871             ++f;
872         }
873         f += extra;
874     }
875
876     if (k < t1->h) {
877         OPJ_UINT32 j;
878 #ifdef DEBUG_ENC_REF
879         fprintf(stderr, " k=%d\n", k);
880 #endif
881         for (i = 0; i < t1->w; ++i) {
882 #ifdef DEBUG_ENC_REF
883             fprintf(stderr, " i=%d\n", i);
884 #endif
885             if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
886                 /* none significant */
887                 f++;
888                 continue;
889             }
890             for (j = k; j < t1->h; ++j) {
891                 opj_t1_enc_refpass_step(
892                     t1,
893                     f,
894                     &t1->data[(j * t1->data_stride) + i],
895                     bpno,
896                     one,
897                     nmsedec,
898                     type,
899                     j - k);
900             }
901             ++f;
902         }
903     }
904 }
905
906
907 static void opj_t1_dec_refpass_raw(
908     opj_t1_t *t1,
909     OPJ_INT32 bpno)
910 {
911     OPJ_INT32 one, poshalf;
912     OPJ_UINT32 i, j, k;
913     OPJ_INT32 *data = t1->data;
914     opj_flag_t *flagsp = &T1_FLAGS(0, 0);
915     const OPJ_UINT32 l_w = t1->w;
916     one = 1 << bpno;
917     poshalf = one >> 1;
918     for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
919         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
920             opj_flag_t flags = *flagsp;
921             if (flags != 0) {
922                 opj_t1_dec_refpass_step_raw(
923                     t1,
924                     flagsp,
925                     data,
926                     poshalf,
927                     0U);
928                 opj_t1_dec_refpass_step_raw(
929                     t1,
930                     flagsp,
931                     data + l_w,
932                     poshalf,
933                     1U);
934                 opj_t1_dec_refpass_step_raw(
935                     t1,
936                     flagsp,
937                     data + 2 * l_w,
938                     poshalf,
939                     2U);
940                 opj_t1_dec_refpass_step_raw(
941                     t1,
942                     flagsp,
943                     data + 3 * l_w,
944                     poshalf,
945                     3U);
946             }
947         }
948     }
949     if (k < t1->h) {
950         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
951             for (j = 0; j < t1->h - k; ++j) {
952                 opj_t1_dec_refpass_step_raw(
953                     t1,
954                     flagsp,
955                     data + j * l_w,
956                     poshalf,
957                     j);
958             }
959         }
960     }
961 }
962
963 #define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \
964 { \
965         OPJ_INT32 one, poshalf; \
966         OPJ_UINT32 i, j, k; \
967         register OPJ_INT32 *data = t1->data; \
968         register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
969         const OPJ_UINT32 l_w = w; \
970         opj_mqc_t* mqc = &(t1->mqc); \
971         DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
972         register OPJ_UINT32 v; \
973         one = 1 << bpno; \
974         poshalf = one >> 1; \
975         for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
976                 for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
977                         opj_flag_t flags = *flagsp; \
978                         if( flags != 0 ) { \
979                             opj_t1_dec_refpass_step_mqc_macro( \
980                                 flags, data, l_w, 0, \
981                                 mqc, curctx, v, a, c, ct, poshalf); \
982                             opj_t1_dec_refpass_step_mqc_macro( \
983                                 flags, data, l_w, 1, \
984                                 mqc, curctx, v, a, c, ct, poshalf); \
985                             opj_t1_dec_refpass_step_mqc_macro( \
986                                 flags, data, l_w, 2, \
987                                 mqc, curctx, v, a, c, ct, poshalf); \
988                             opj_t1_dec_refpass_step_mqc_macro( \
989                                 flags, data, l_w, 3, \
990                                 mqc, curctx, v, a, c, ct, poshalf); \
991                             *flagsp = flags; \
992                         } \
993                 } \
994         } \
995         UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
996         if( k < h ) { \
997             for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
998                 for (j = 0; j < h - k; ++j) { \
999                         opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \
1000                 } \
1001             } \
1002         } \
1003 }
1004
1005 static void opj_t1_dec_refpass_mqc_64x64(
1006     opj_t1_t *t1,
1007     OPJ_INT32 bpno)
1008 {
1009     opj_t1_dec_refpass_mqc_internal(t1, bpno, 64, 64, 66);
1010 }
1011
1012 static void opj_t1_dec_refpass_mqc_generic(
1013     opj_t1_t *t1,
1014     OPJ_INT32 bpno)
1015 {
1016     opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U);
1017 }
1018
1019 static void opj_t1_dec_refpass_mqc(
1020     opj_t1_t *t1,
1021     OPJ_INT32 bpno)
1022 {
1023     if (t1->w == 64 && t1->h == 64) {
1024         opj_t1_dec_refpass_mqc_64x64(t1, bpno);
1025     } else {
1026         opj_t1_dec_refpass_mqc_generic(t1, bpno);
1027     }
1028 }
1029
1030 /**
1031 Encode clean-up pass step
1032 */
1033 static void opj_t1_enc_clnpass_step(
1034     opj_t1_t *t1,
1035     opj_flag_t *flagsp,
1036     OPJ_INT32 *datap,
1037     OPJ_INT32 bpno,
1038     OPJ_INT32 one,
1039     OPJ_INT32 *nmsedec,
1040     OPJ_UINT32 agg,
1041     OPJ_UINT32 runlen,
1042     OPJ_UINT32 lim,
1043     OPJ_UINT32 cblksty)
1044 {
1045     OPJ_UINT32 v;
1046     OPJ_UINT32 ci;
1047     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1048
1049     const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 |
1050                               T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1051
1052     if ((*flagsp & check) == check) {
1053         if (runlen == 0) {
1054             *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1055         } else if (runlen == 1) {
1056             *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3);
1057         } else if (runlen == 2) {
1058             *flagsp &= ~(T1_PI_2 | T1_PI_3);
1059         } else if (runlen == 3) {
1060             *flagsp &= ~(T1_PI_3);
1061         }
1062         return;
1063     }
1064
1065     for (ci = runlen; ci < lim; ++ci) {
1066         OPJ_UINT32 vsc;
1067         opj_flag_t flags;
1068         OPJ_UINT32 ctxt1;
1069
1070         flags = *flagsp;
1071
1072         if ((agg != 0) && (ci == runlen)) {
1073             goto LABEL_PARTIAL;
1074         }
1075
1076         if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {
1077             ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
1078 #ifdef DEBUG_ENC_CLN
1079             printf("   ctxt1=%d\n", ctxt1);
1080 #endif
1081             opj_mqc_setcurctx(mqc, ctxt1);
1082             v = (opj_int_abs(*datap) & one) ? 1 : 0;
1083             opj_mqc_encode(mqc, v);
1084             if (v) {
1085                 OPJ_UINT32 ctxt2, spb;
1086                 OPJ_UINT32 lu;
1087 LABEL_PARTIAL:
1088                 lu = opj_t1_getctxtno_sc_or_spb_index(
1089                          *flagsp,
1090                          flagsp[-1], flagsp[1],
1091                          ci);
1092                 *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
1093                                                   (OPJ_UINT32)bpno);
1094                 ctxt2 = opj_t1_getctxno_sc(lu);
1095 #ifdef DEBUG_ENC_CLN
1096                 printf("   ctxt2=%d\n", ctxt2);
1097 #endif
1098                 opj_mqc_setcurctx(mqc, ctxt2);
1099
1100                 v = *datap < 0 ? 1U : 0U;
1101                 spb = opj_t1_getspb(lu);
1102 #ifdef DEBUG_ENC_CLN
1103                 printf("   spb=%d\n", spb);
1104 #endif
1105                 opj_mqc_encode(mqc, v ^ spb);
1106                 vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0;
1107                 opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc);
1108             }
1109         }
1110         *flagsp &= ~(T1_PI_THIS << (3U * ci));
1111         datap += t1->data_stride;
1112     }
1113 }
1114
1115 #define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
1116                                       flags, flagsp, flags_stride, data, \
1117                                       data_stride, ci, mqc, curctx, \
1118                                       v, a, c, ct, oneplushalf, vsc) \
1119 { \
1120     if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\
1121         do { \
1122             if( !partial ) { \
1123                 OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
1124                 opj_t1_setcurctx(curctx, ctxt1); \
1125                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1126                 if( !v ) \
1127                     break; \
1128             } \
1129             { \
1130                 OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1131                                     flags, flagsp[-1], flagsp[1], \
1132                                     ci); \
1133                 opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \
1134                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1135                 v = v ^ opj_t1_getspb(lu); \
1136                 data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
1137                 opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
1138             } \
1139         } while(0); \
1140     } \
1141 }
1142
1143 static void opj_t1_dec_clnpass_step(
1144     opj_t1_t *t1,
1145     opj_flag_t *flagsp,
1146     OPJ_INT32 *datap,
1147     OPJ_INT32 oneplushalf,
1148     OPJ_UINT32 ci,
1149     OPJ_UINT32 vsc)
1150 {
1151     OPJ_UINT32 v;
1152
1153     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1154     opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE,
1155                                   *flagsp, flagsp, t1->w + 2U, datap,
1156                                   0, ci, mqc, mqc->curctx,
1157                                   v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
1158 }
1159
1160 static void opj_t1_enc_clnpass(
1161     opj_t1_t *t1,
1162     OPJ_INT32 bpno,
1163     OPJ_INT32 *nmsedec,
1164     OPJ_UINT32 cblksty)
1165 {
1166     OPJ_UINT32 i, k;
1167     const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
1168     OPJ_UINT32 agg, runlen;
1169
1170     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1171
1172     *nmsedec = 0;
1173 #ifdef DEBUG_ENC_CLN
1174     printf("enc_clnpass: bpno=%d\n", bpno);
1175 #endif
1176     for (k = 0; k < (t1->h & ~3U); k += 4) {
1177 #ifdef DEBUG_ENC_CLN
1178         printf(" k=%d\n", k);
1179 #endif
1180         for (i = 0; i < t1->w; ++i) {
1181 #ifdef DEBUG_ENC_CLN
1182             printf("  i=%d\n", i);
1183 #endif
1184             agg = !(T1_FLAGS(i, k));
1185 #ifdef DEBUG_ENC_CLN
1186             printf("   agg=%d\n", agg);
1187 #endif
1188             if (agg) {
1189                 for (runlen = 0; runlen < 4; ++runlen) {
1190                     if (opj_int_abs(t1->data[((k + runlen)*t1->data_stride) + i]) & one) {
1191                         break;
1192                     }
1193                 }
1194                 opj_mqc_setcurctx(mqc, T1_CTXNO_AGG);
1195                 opj_mqc_encode(mqc, runlen != 4);
1196                 if (runlen == 4) {
1197                     continue;
1198                 }
1199                 opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
1200                 opj_mqc_encode(mqc, runlen >> 1);
1201                 opj_mqc_encode(mqc, runlen & 1);
1202             } else {
1203                 runlen = 0;
1204             }
1205             opj_t1_enc_clnpass_step(
1206                 t1,
1207                 &T1_FLAGS(i, k),
1208                 &t1->data[((k + runlen) * t1->data_stride) + i],
1209                 bpno,
1210                 one,
1211                 nmsedec,
1212                 agg,
1213                 runlen,
1214                 4U,
1215                 cblksty);
1216         }
1217     }
1218     if (k < t1->h) {
1219         agg = 0;
1220         runlen = 0;
1221 #ifdef DEBUG_ENC_CLN
1222         printf(" k=%d\n", k);
1223 #endif
1224         for (i = 0; i < t1->w; ++i) {
1225 #ifdef DEBUG_ENC_CLN
1226             printf("  i=%d\n", i);
1227             printf("   agg=%d\n", agg);
1228 #endif
1229             opj_t1_enc_clnpass_step(
1230                 t1,
1231                 &T1_FLAGS(i, k),
1232                 &t1->data[((k + runlen) * t1->data_stride) + i],
1233                 bpno,
1234                 one,
1235                 nmsedec,
1236                 agg,
1237                 runlen,
1238                 t1->h - k,
1239                 cblksty);
1240         }
1241     }
1242 }
1243
1244 #define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
1245 { \
1246     OPJ_INT32 one, half, oneplushalf; \
1247     OPJ_UINT32 runlen; \
1248     OPJ_UINT32 i, j, k; \
1249     const OPJ_UINT32 l_w = w; \
1250     opj_mqc_t* mqc = &(t1->mqc); \
1251     register OPJ_INT32 *data = t1->data; \
1252     register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
1253     DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
1254     register OPJ_UINT32 v; \
1255     one = 1 << bpno; \
1256     half = one >> 1; \
1257     oneplushalf = one | half; \
1258     for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
1259         for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
1260             opj_flag_t flags = *flagsp; \
1261             if (flags == 0) { \
1262                 OPJ_UINT32 partial = OPJ_TRUE; \
1263                 opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \
1264                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1265                 if (!v) { \
1266                     continue; \
1267                 } \
1268                 opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \
1269                 opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \
1270                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1271                 runlen = (runlen << 1) | v; \
1272                 switch(runlen) { \
1273                     case 0: \
1274                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\
1275                                             flags, flagsp, flags_stride, data, \
1276                                             l_w, 0, mqc, curctx, \
1277                                             v, a, c, ct, oneplushalf, vsc); \
1278                         partial = OPJ_FALSE; \
1279                         /* FALLTHRU */ \
1280                     case 1: \
1281                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1282                                             flags, flagsp, flags_stride, data, \
1283                                             l_w, 1, mqc, curctx, \
1284                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1285                         partial = OPJ_FALSE; \
1286                         /* FALLTHRU */ \
1287                     case 2: \
1288                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1289                                             flags, flagsp, flags_stride, data, \
1290                                             l_w, 2, mqc, curctx, \
1291                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1292                         partial = OPJ_FALSE; \
1293                         /* FALLTHRU */ \
1294                     case 3: \
1295                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1296                                             flags, flagsp, flags_stride, data, \
1297                                             l_w, 3, mqc, curctx, \
1298                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1299                         break; \
1300                 } \
1301             } else { \
1302                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1303                                     flags, flagsp, flags_stride, data, \
1304                                     l_w, 0, mqc, curctx, \
1305                                     v, a, c, ct, oneplushalf, vsc); \
1306                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1307                                     flags, flagsp, flags_stride, data, \
1308                                     l_w, 1, mqc, curctx, \
1309                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1310                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1311                                     flags, flagsp, flags_stride, data, \
1312                                     l_w, 2, mqc, curctx, \
1313                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1314                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1315                                     flags, flagsp, flags_stride, data, \
1316                                     l_w, 3, mqc, curctx, \
1317                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1318             } \
1319             *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1320         } \
1321     } \
1322     UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
1323     if( k < h ) { \
1324         for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
1325             for (j = 0; j < h - k; ++j) { \
1326                 opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j, vsc); \
1327             } \
1328             *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1329         } \
1330     } \
1331 }
1332
1333 static void opj_t1_dec_clnpass_check_segsym(opj_t1_t *t1, OPJ_INT32 cblksty)
1334 {
1335     if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
1336         opj_mqc_t* mqc = &(t1->mqc);
1337         OPJ_UINT32 v, v2;
1338         opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
1339         opj_mqc_decode(v, mqc);
1340         opj_mqc_decode(v2, mqc);
1341         v = (v << 1) | v2;
1342         opj_mqc_decode(v2, mqc);
1343         v = (v << 1) | v2;
1344         opj_mqc_decode(v2, mqc);
1345         v = (v << 1) | v2;
1346         /*
1347         if (v!=0xa) {
1348             opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v);
1349         }
1350         */
1351     }
1352 }
1353
1354 static void opj_t1_dec_clnpass_64x64_novsc(
1355     opj_t1_t *t1,
1356     OPJ_INT32 bpno)
1357 {
1358     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
1359 }
1360
1361 static void opj_t1_dec_clnpass_64x64_vsc(
1362     opj_t1_t *t1,
1363     OPJ_INT32 bpno)
1364 {
1365     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
1366 }
1367
1368 static void opj_t1_dec_clnpass_generic_novsc(
1369     opj_t1_t *t1,
1370     OPJ_INT32 bpno)
1371 {
1372     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
1373                                 t1->w + 2U);
1374 }
1375
1376 static void opj_t1_dec_clnpass_generic_vsc(
1377     opj_t1_t *t1,
1378     OPJ_INT32 bpno)
1379 {
1380     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
1381                                 t1->w + 2U);
1382 }
1383
1384 static void opj_t1_dec_clnpass(
1385     opj_t1_t *t1,
1386     OPJ_INT32 bpno,
1387     OPJ_INT32 cblksty)
1388 {
1389     if (t1->w == 64 && t1->h == 64) {
1390         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1391             opj_t1_dec_clnpass_64x64_vsc(t1, bpno);
1392         } else {
1393             opj_t1_dec_clnpass_64x64_novsc(t1, bpno);
1394         }
1395     } else {
1396         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1397             opj_t1_dec_clnpass_generic_vsc(t1, bpno);
1398         } else {
1399             opj_t1_dec_clnpass_generic_novsc(t1, bpno);
1400         }
1401     }
1402     opj_t1_dec_clnpass_check_segsym(t1, cblksty);
1403 }
1404
1405
1406 /** mod fixed_quality */
1407 static OPJ_FLOAT64 opj_t1_getwmsedec(
1408     OPJ_INT32 nmsedec,
1409     OPJ_UINT32 compno,
1410     OPJ_UINT32 level,
1411     OPJ_UINT32 orient,
1412     OPJ_INT32 bpno,
1413     OPJ_UINT32 qmfbid,
1414     OPJ_FLOAT64 stepsize,
1415     OPJ_UINT32 numcomps,
1416     const OPJ_FLOAT64 * mct_norms,
1417     OPJ_UINT32 mct_numcomps)
1418 {
1419     OPJ_FLOAT64 w1 = 1, w2, wmsedec;
1420     OPJ_ARG_NOT_USED(numcomps);
1421
1422     if (mct_norms && (compno < mct_numcomps)) {
1423         w1 = mct_norms[compno];
1424     }
1425
1426     if (qmfbid == 1) {
1427         w2 = opj_dwt_getnorm(level, orient);
1428     } else {    /* if (qmfbid == 0) */
1429         w2 = opj_dwt_getnorm_real(level, orient);
1430     }
1431
1432     wmsedec = w1 * w2 * stepsize * (1 << bpno);
1433     wmsedec *= wmsedec * nmsedec / 8192.0;
1434
1435     return wmsedec;
1436 }
1437
1438 static OPJ_BOOL opj_t1_allocate_buffers(
1439     opj_t1_t *t1,
1440     OPJ_UINT32 w,
1441     OPJ_UINT32 h)
1442 {
1443     OPJ_UINT32 flagssize;
1444     OPJ_UINT32 flags_stride;
1445
1446     /* No risk of overflow. Prior checks ensure those assert are met */
1447     /* They are per the specification */
1448     assert(w <= 1024);
1449     assert(h <= 1024);
1450     assert(w * h <= 4096);
1451
1452     /* encoder uses tile buffer, so no need to allocate */
1453     if (!t1->encoder) {
1454         OPJ_UINT32 datasize = w * h;
1455
1456         if (datasize > t1->datasize) {
1457             opj_aligned_free(t1->data);
1458             t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
1459             if (!t1->data) {
1460                 /* FIXME event manager error callback */
1461                 return OPJ_FALSE;
1462             }
1463             t1->datasize = datasize;
1464         }
1465         /* memset first arg is declared to never be null by gcc */
1466         if (t1->data != NULL) {
1467             memset(t1->data, 0, datasize * sizeof(OPJ_INT32));
1468         }
1469     }
1470
1471     flags_stride = w + 2U; /* can't be 0U */
1472
1473     flagssize = (h + 3U) / 4U + 2U;
1474
1475     flagssize *= flags_stride;
1476     {
1477         opj_flag_t* p;
1478         OPJ_UINT32 x;
1479         OPJ_UINT32 flags_height = (h + 3U) / 4U;
1480
1481         if (flagssize > t1->flagssize) {
1482
1483             opj_aligned_free(t1->flags);
1484             t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(
1485                             opj_flag_t));
1486             if (!t1->flags) {
1487                 /* FIXME event manager error callback */
1488                 return OPJ_FALSE;
1489             }
1490         }
1491         t1->flagssize = flagssize;
1492
1493         memset(t1->flags, 0, flagssize * sizeof(opj_flag_t));
1494
1495         p = &t1->flags[0];
1496         for (x = 0; x < flags_stride; ++x) {
1497             /* magic value to hopefully stop any passes being interested in this entry */
1498             *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1499         }
1500
1501         p = &t1->flags[((flags_height + 1) * flags_stride)];
1502         for (x = 0; x < flags_stride; ++x) {
1503             /* magic value to hopefully stop any passes being interested in this entry */
1504             *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1505         }
1506
1507         if (h % 4) {
1508             OPJ_UINT32 v = 0;
1509             p = &t1->flags[((flags_height) * flags_stride)];
1510             if (h % 4 == 1) {
1511                 v |= T1_PI_1 | T1_PI_2 | T1_PI_3;
1512             } else if (h % 4 == 2) {
1513                 v |= T1_PI_2 | T1_PI_3;
1514             } else if (h % 4 == 3) {
1515                 v |= T1_PI_3;
1516             }
1517             for (x = 0; x < flags_stride; ++x) {
1518                 *p++ = v;
1519             }
1520         }
1521     }
1522
1523     t1->w = w;
1524     t1->h = h;
1525
1526     return OPJ_TRUE;
1527 }
1528
1529 /* ----------------------------------------------------------------------- */
1530
1531 /* ----------------------------------------------------------------------- */
1532 /**
1533  * Creates a new Tier 1 handle
1534  * and initializes the look-up tables of the Tier-1 coder/decoder
1535  * @return a new T1 handle if successful, returns NULL otherwise
1536 */
1537 opj_t1_t* opj_t1_create(OPJ_BOOL isEncoder)
1538 {
1539     opj_t1_t *l_t1 = 00;
1540
1541     l_t1 = (opj_t1_t*) opj_calloc(1, sizeof(opj_t1_t));
1542     if (!l_t1) {
1543         return 00;
1544     }
1545
1546     l_t1->encoder = isEncoder;
1547
1548     return l_t1;
1549 }
1550
1551
1552 /**
1553  * Destroys a previously created T1 handle
1554  *
1555  * @param p_t1 Tier 1 handle to destroy
1556 */
1557 void opj_t1_destroy(opj_t1_t *p_t1)
1558 {
1559     if (! p_t1) {
1560         return;
1561     }
1562
1563     /* encoder uses tile buffer, so no need to free */
1564     if (!p_t1->encoder && p_t1->data) {
1565         opj_aligned_free(p_t1->data);
1566         p_t1->data = 00;
1567     }
1568
1569     if (p_t1->flags) {
1570         opj_aligned_free(p_t1->flags);
1571         p_t1->flags = 00;
1572     }
1573
1574     opj_free(p_t1->cblkdatabuffer);
1575
1576     opj_free(p_t1);
1577 }
1578
1579 typedef struct {
1580     OPJ_BOOL whole_tile_decoding;
1581     OPJ_UINT32 resno;
1582     opj_tcd_cblk_dec_t* cblk;
1583     opj_tcd_band_t* band;
1584     opj_tcd_tilecomp_t* tilec;
1585     opj_tccp_t* tccp;
1586     OPJ_BOOL mustuse_cblkdatabuffer;
1587     volatile OPJ_BOOL* pret;
1588     opj_event_mgr_t *p_manager;
1589     opj_mutex_t* p_manager_mutex;
1590     OPJ_BOOL check_pterm;
1591 } opj_t1_cblk_decode_processing_job_t;
1592
1593 static void opj_t1_destroy_wrapper(void* t1)
1594 {
1595     opj_t1_destroy((opj_t1_t*) t1);
1596 }
1597
1598 static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
1599 {
1600     opj_tcd_cblk_dec_t* cblk;
1601     opj_tcd_band_t* band;
1602     opj_tcd_tilecomp_t* tilec;
1603     opj_tccp_t* tccp;
1604     OPJ_INT32* OPJ_RESTRICT datap;
1605     OPJ_UINT32 cblk_w, cblk_h;
1606     OPJ_INT32 x, y;
1607     OPJ_UINT32 i, j;
1608     opj_t1_cblk_decode_processing_job_t* job;
1609     opj_t1_t* t1;
1610     OPJ_UINT32 resno;
1611     OPJ_UINT32 tile_w;
1612
1613     job = (opj_t1_cblk_decode_processing_job_t*) user_data;
1614
1615     cblk = job->cblk;
1616
1617     if (!job->whole_tile_decoding) {
1618         cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1619         cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1620
1621         cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) *
1622                              cblk_w * cblk_h);
1623         if (cblk->decoded_data == NULL) {
1624             if (job->p_manager_mutex) {
1625                 opj_mutex_lock(job->p_manager_mutex);
1626             }
1627             opj_event_msg(job->p_manager, EVT_ERROR,
1628                           "Cannot allocate cblk->decoded_data\n");
1629             if (job->p_manager_mutex) {
1630                 opj_mutex_unlock(job->p_manager_mutex);
1631             }
1632             *(job->pret) = OPJ_FALSE;
1633             opj_free(job);
1634             return;
1635         }
1636         /* Zero-init required */
1637         memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h);
1638     } else if (cblk->decoded_data) {
1639         /* Not sure if that code path can happen, but better be */
1640         /* safe than sorry */
1641         opj_aligned_free(cblk->decoded_data);
1642         cblk->decoded_data = NULL;
1643     }
1644
1645     resno = job->resno;
1646     band = job->band;
1647     tilec = job->tilec;
1648     tccp = job->tccp;
1649     tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1
1650                           -
1651                           tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
1652
1653     if (!*(job->pret)) {
1654         opj_free(job);
1655         return;
1656     }
1657
1658     t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
1659     if (t1 == NULL) {
1660         t1 = opj_t1_create(OPJ_FALSE);
1661         opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
1662     }
1663     t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
1664
1665     if (OPJ_FALSE == opj_t1_decode_cblk(
1666                 t1,
1667                 cblk,
1668                 band->bandno,
1669                 (OPJ_UINT32)tccp->roishift,
1670                 tccp->cblksty,
1671                 job->p_manager,
1672                 job->p_manager_mutex,
1673                 job->check_pterm)) {
1674         *(job->pret) = OPJ_FALSE;
1675         opj_free(job);
1676         return;
1677     }
1678
1679     x = cblk->x0 - band->x0;
1680     y = cblk->y0 - band->y0;
1681     if (band->bandno & 1) {
1682         opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1683         x += pres->x1 - pres->x0;
1684     }
1685     if (band->bandno & 2) {
1686         opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1687         y += pres->y1 - pres->y0;
1688     }
1689
1690     datap = cblk->decoded_data ? cblk->decoded_data : t1->data;
1691     cblk_w = t1->w;
1692     cblk_h = t1->h;
1693
1694     if (tccp->roishift) {
1695         if (tccp->roishift >= 31) {
1696             for (j = 0; j < cblk_h; ++j) {
1697                 for (i = 0; i < cblk_w; ++i) {
1698                     datap[(j * cblk_w) + i] = 0;
1699                 }
1700             }
1701         } else {
1702             OPJ_INT32 thresh = 1 << tccp->roishift;
1703             for (j = 0; j < cblk_h; ++j) {
1704                 for (i = 0; i < cblk_w; ++i) {
1705                     OPJ_INT32 val = datap[(j * cblk_w) + i];
1706                     OPJ_INT32 mag = abs(val);
1707                     if (mag >= thresh) {
1708                         mag >>= tccp->roishift;
1709                         datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
1710                     }
1711                 }
1712             }
1713         }
1714     }
1715
1716     /* Both can be non NULL if for example decoding a full tile and then */
1717     /* partially a tile. In which case partial decoding should be the */
1718     /* priority */
1719     assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
1720
1721     if (cblk->decoded_data) {
1722         OPJ_UINT32 cblk_size = cblk_w * cblk_h;
1723         if (tccp->qmfbid == 1) {
1724             for (i = 0; i < cblk_size; ++i) {
1725                 datap[i] /= 2;
1726             }
1727         } else {        /* if (tccp->qmfbid == 0) */
1728             i = 0;
1729 #ifdef __SSE2__
1730             {
1731                 const __m128 xmm_stepsize = _mm_set1_ps(band->stepsize);
1732                 for (; i < (cblk_size & ~15U); i += 16) {
1733                     __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1734                                                            datap + 0)));
1735                     __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1736                                                            datap + 4)));
1737                     __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1738                                                            datap + 8)));
1739                     __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1740                                                            datap + 12)));
1741                     _mm_store_ps((float*)(datap +  0), _mm_mul_ps(xmm0_data, xmm_stepsize));
1742                     _mm_store_ps((float*)(datap +  4), _mm_mul_ps(xmm1_data, xmm_stepsize));
1743                     _mm_store_ps((float*)(datap +  8), _mm_mul_ps(xmm2_data, xmm_stepsize));
1744                     _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
1745                     datap += 16;
1746                 }
1747             }
1748 #endif
1749             for (; i < cblk_size; ++i) {
1750                 OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize;
1751                 memcpy(datap, &tmp, sizeof(tmp));
1752                 datap++;
1753             }
1754         }
1755     } else if (tccp->qmfbid == 1) {
1756         OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
1757                                                        (OPJ_SIZE_T)x];
1758         for (j = 0; j < cblk_h; ++j) {
1759             i = 0;
1760             for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
1761                 OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U];
1762                 OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
1763                 OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
1764                 OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
1765                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
1766                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
1767                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
1768                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
1769             }
1770             for (; i < cblk_w; ++i) {
1771                 OPJ_INT32 tmp = datap[(j * cblk_w) + i];
1772                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
1773             }
1774         }
1775     } else {        /* if (tccp->qmfbid == 0) */
1776         OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
1777                                                          tile_w + (OPJ_SIZE_T)x];
1778         for (j = 0; j < cblk_h; ++j) {
1779             OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
1780             for (i = 0; i < cblk_w; ++i) {
1781                 OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * band->stepsize;
1782                 *tiledp2 = tmp;
1783                 datap++;
1784                 tiledp2++;
1785             }
1786             tiledp += tile_w;
1787         }
1788     }
1789
1790     opj_free(job);
1791 }
1792
1793
1794 void opj_t1_decode_cblks(opj_tcd_t* tcd,
1795                          volatile OPJ_BOOL* pret,
1796                          opj_tcd_tilecomp_t* tilec,
1797                          opj_tccp_t* tccp,
1798                          opj_event_mgr_t *p_manager,
1799                          opj_mutex_t* p_manager_mutex,
1800                          OPJ_BOOL check_pterm
1801                         )
1802 {
1803     opj_thread_pool_t* tp = tcd->thread_pool;
1804     OPJ_UINT32 resno, bandno, precno, cblkno;
1805
1806 #ifdef DEBUG_VERBOSE
1807     OPJ_UINT32 codeblocks_decoded = 0;
1808     printf("Enter opj_t1_decode_cblks()\n");
1809 #endif
1810
1811     for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
1812         opj_tcd_resolution_t* res = &tilec->resolutions[resno];
1813
1814         for (bandno = 0; bandno < res->numbands; ++bandno) {
1815             opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
1816
1817             for (precno = 0; precno < res->pw * res->ph; ++precno) {
1818                 opj_tcd_precinct_t* precinct = &band->precincts[precno];
1819
1820                 if (!opj_tcd_is_subband_area_of_interest(tcd,
1821                         tilec->compno,
1822                         resno,
1823                         band->bandno,
1824                         (OPJ_UINT32)precinct->x0,
1825                         (OPJ_UINT32)precinct->y0,
1826                         (OPJ_UINT32)precinct->x1,
1827                         (OPJ_UINT32)precinct->y1)) {
1828                     for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1829                         opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1830                         if (cblk->decoded_data) {
1831 #ifdef DEBUG_VERBOSE
1832                             printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1833                                    cblk->x0, cblk->y0, resno, bandno);
1834 #endif
1835                             opj_aligned_free(cblk->decoded_data);
1836                             cblk->decoded_data = NULL;
1837                         }
1838                     }
1839                     continue;
1840                 }
1841
1842                 for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1843                     opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1844                     opj_t1_cblk_decode_processing_job_t* job;
1845
1846                     if (!opj_tcd_is_subband_area_of_interest(tcd,
1847                             tilec->compno,
1848                             resno,
1849                             band->bandno,
1850                             (OPJ_UINT32)cblk->x0,
1851                             (OPJ_UINT32)cblk->y0,
1852                             (OPJ_UINT32)cblk->x1,
1853                             (OPJ_UINT32)cblk->y1)) {
1854                         if (cblk->decoded_data) {
1855 #ifdef DEBUG_VERBOSE
1856                             printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1857                                    cblk->x0, cblk->y0, resno, bandno);
1858 #endif
1859                             opj_aligned_free(cblk->decoded_data);
1860                             cblk->decoded_data = NULL;
1861                         }
1862                         continue;
1863                     }
1864
1865                     if (!tcd->whole_tile_decoding) {
1866                         OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1867                         OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1868                         if (cblk->decoded_data != NULL) {
1869 #ifdef DEBUG_VERBOSE
1870                             printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n",
1871                                    cblk->x0, cblk->y0, resno, bandno);
1872 #endif
1873                             continue;
1874                         }
1875                         if (cblk_w == 0 || cblk_h == 0) {
1876                             continue;
1877                         }
1878 #ifdef DEBUG_VERBOSE
1879                         printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n",
1880                                cblk->x0, cblk->y0, resno, bandno);
1881 #endif
1882                     }
1883
1884                     job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1,
1885                             sizeof(opj_t1_cblk_decode_processing_job_t));
1886                     if (!job) {
1887                         *pret = OPJ_FALSE;
1888                         return;
1889                     }
1890                     job->whole_tile_decoding = tcd->whole_tile_decoding;
1891                     job->resno = resno;
1892                     job->cblk = cblk;
1893                     job->band = band;
1894                     job->tilec = tilec;
1895                     job->tccp = tccp;
1896                     job->pret = pret;
1897                     job->p_manager_mutex = p_manager_mutex;
1898                     job->p_manager = p_manager;
1899                     job->check_pterm = check_pterm;
1900                     job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
1901                     opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
1902 #ifdef DEBUG_VERBOSE
1903                     codeblocks_decoded ++;
1904 #endif
1905                     if (!(*pret)) {
1906                         return;
1907                     }
1908                 } /* cblkno */
1909             } /* precno */
1910         } /* bandno */
1911     } /* resno */
1912
1913 #ifdef DEBUG_VERBOSE
1914     printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded);
1915 #endif
1916     return;
1917 }
1918
1919
1920 static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
1921                                    opj_tcd_cblk_dec_t* cblk,
1922                                    OPJ_UINT32 orient,
1923                                    OPJ_UINT32 roishift,
1924                                    OPJ_UINT32 cblksty,
1925                                    opj_event_mgr_t *p_manager,
1926                                    opj_mutex_t* p_manager_mutex,
1927                                    OPJ_BOOL check_pterm)
1928 {
1929     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1930
1931     OPJ_INT32 bpno_plus_one;
1932     OPJ_UINT32 passtype;
1933     OPJ_UINT32 segno, passno;
1934     OPJ_BYTE* cblkdata = NULL;
1935     OPJ_UINT32 cblkdataindex = 0;
1936     OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
1937     OPJ_INT32* original_t1_data = NULL;
1938
1939     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
1940
1941     if (!opj_t1_allocate_buffers(
1942                 t1,
1943                 (OPJ_UINT32)(cblk->x1 - cblk->x0),
1944                 (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
1945         return OPJ_FALSE;
1946     }
1947
1948     bpno_plus_one = (OPJ_INT32)(roishift + cblk->numbps);
1949     if (bpno_plus_one >= 31) {
1950         if (p_manager_mutex) {
1951             opj_mutex_lock(p_manager_mutex);
1952         }
1953         opj_event_msg(p_manager, EVT_WARNING,
1954                       "opj_t1_decode_cblk(): unsupported bpno_plus_one = %d >= 31\n",
1955                       bpno_plus_one);
1956         if (p_manager_mutex) {
1957             opj_mutex_unlock(p_manager_mutex);
1958         }
1959         return OPJ_FALSE;
1960     }
1961     passtype = 2;
1962
1963     opj_mqc_resetstates(mqc);
1964     opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
1965     opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
1966     opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
1967
1968     /* Even if we have a single chunk, in multi-threaded decoding */
1969     /* the insertion of our synthetic marker might potentially override */
1970     /* valid codestream of other codeblocks decoded in parallel. */
1971     if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
1972         OPJ_UINT32 i;
1973         OPJ_UINT32 cblk_len;
1974
1975         /* Compute whole codeblock length from chunk lengths */
1976         cblk_len = 0;
1977         for (i = 0; i < cblk->numchunks; i++) {
1978             cblk_len += cblk->chunks[i].len;
1979         }
1980
1981         /* Allocate temporary memory if needed */
1982         if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
1983             cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
1984                                               cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
1985             if (cblkdata == NULL) {
1986                 return OPJ_FALSE;
1987             }
1988             t1->cblkdatabuffer = cblkdata;
1989             memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
1990             t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
1991         }
1992
1993         /* Concatenate all chunks */
1994         cblkdata = t1->cblkdatabuffer;
1995         cblk_len = 0;
1996         for (i = 0; i < cblk->numchunks; i++) {
1997             memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
1998             cblk_len += cblk->chunks[i].len;
1999         }
2000     } else if (cblk->numchunks == 1) {
2001         cblkdata = cblk->chunks[0].data;
2002     } else {
2003         /* Not sure if that can happen in practice, but avoid Coverity to */
2004         /* think we will dereference a null cblkdta pointer */
2005         return OPJ_TRUE;
2006     }
2007
2008     /* For subtile decoding, directly decode in the decoded_data buffer of */
2009     /* the code-block. Hack t1->data to point to it, and restore it later */
2010     if (cblk->decoded_data) {
2011         original_t1_data = t1->data;
2012         t1->data = cblk->decoded_data;
2013     }
2014
2015     for (segno = 0; segno < cblk->real_num_segs; ++segno) {
2016         opj_tcd_seg_t *seg = &cblk->segs[segno];
2017
2018         /* BYPASS mode */
2019         type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
2020                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2021
2022         if (type == T1_TYPE_RAW) {
2023             opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2024                                  OPJ_COMMON_CBLK_DATA_EXTRA);
2025         } else {
2026             opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2027                              OPJ_COMMON_CBLK_DATA_EXTRA);
2028         }
2029         cblkdataindex += seg->len;
2030
2031         for (passno = 0; (passno < seg->real_num_passes) &&
2032                 (bpno_plus_one >= 1); ++passno) {
2033             switch (passtype) {
2034             case 0:
2035                 if (type == T1_TYPE_RAW) {
2036                     opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2037                 } else {
2038                     opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2039                 }
2040                 break;
2041             case 1:
2042                 if (type == T1_TYPE_RAW) {
2043                     opj_t1_dec_refpass_raw(t1, bpno_plus_one);
2044                 } else {
2045                     opj_t1_dec_refpass_mqc(t1, bpno_plus_one);
2046                 }
2047                 break;
2048             case 2:
2049                 opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2050                 break;
2051             }
2052
2053             if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) {
2054                 opj_mqc_resetstates(mqc);
2055                 opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2056                 opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2057                 opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2058             }
2059             if (++passtype == 3) {
2060                 passtype = 0;
2061                 bpno_plus_one--;
2062             }
2063         }
2064
2065         opq_mqc_finish_dec(mqc);
2066     }
2067
2068     if (check_pterm) {
2069         if (mqc->bp + 2 < mqc->end) {
2070             if (p_manager_mutex) {
2071                 opj_mutex_lock(p_manager_mutex);
2072             }
2073             opj_event_msg(p_manager, EVT_WARNING,
2074                           "PTERM check failure: %d remaining bytes in code block (%d used / %d)\n",
2075                           (int)(mqc->end - mqc->bp) - 2,
2076                           (int)(mqc->bp - mqc->start),
2077                           (int)(mqc->end - mqc->start));
2078             if (p_manager_mutex) {
2079                 opj_mutex_unlock(p_manager_mutex);
2080             }
2081         } else if (mqc->end_of_byte_stream_counter > 2) {
2082             if (p_manager_mutex) {
2083                 opj_mutex_lock(p_manager_mutex);
2084             }
2085             opj_event_msg(p_manager, EVT_WARNING,
2086                           "PTERM check failure: %d synthetized 0xFF markers read\n",
2087                           mqc->end_of_byte_stream_counter);
2088             if (p_manager_mutex) {
2089                 opj_mutex_unlock(p_manager_mutex);
2090             }
2091         }
2092     }
2093
2094     /* Restore original t1->data is needed */
2095     if (cblk->decoded_data) {
2096         t1->data = original_t1_data;
2097     }
2098
2099     return OPJ_TRUE;
2100 }
2101
2102
2103
2104
2105 OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
2106                              opj_tcd_tile_t *tile,
2107                              opj_tcp_t *tcp,
2108                              const OPJ_FLOAT64 * mct_norms,
2109                              OPJ_UINT32 mct_numcomps
2110                             )
2111 {
2112     OPJ_UINT32 compno, resno, bandno, precno, cblkno;
2113
2114     tile->distotile = 0;        /* fixed_quality */
2115
2116     for (compno = 0; compno < tile->numcomps; ++compno) {
2117         opj_tcd_tilecomp_t* tilec = &tile->comps[compno];
2118         opj_tccp_t* tccp = &tcp->tccps[compno];
2119         OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
2120
2121         for (resno = 0; resno < tilec->numresolutions; ++resno) {
2122             opj_tcd_resolution_t *res = &tilec->resolutions[resno];
2123
2124             for (bandno = 0; bandno < res->numbands; ++bandno) {
2125                 opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
2126                 OPJ_INT32 bandconst;
2127
2128                 /* Skip empty bands */
2129                 if (opj_tcd_is_band_empty(band)) {
2130                     continue;
2131                 }
2132
2133                 bandconst = 8192 * 8192 / ((OPJ_INT32) floor(band->stepsize * 8192));
2134                 for (precno = 0; precno < res->pw * res->ph; ++precno) {
2135                     opj_tcd_precinct_t *prc = &band->precincts[precno];
2136
2137                     for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
2138                         opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
2139                         OPJ_INT32* OPJ_RESTRICT tiledp;
2140                         OPJ_UINT32 cblk_w;
2141                         OPJ_UINT32 cblk_h;
2142                         OPJ_UINT32 i, j, tileLineAdvance;
2143                         OPJ_SIZE_T tileIndex = 0;
2144
2145                         OPJ_INT32 x = cblk->x0 - band->x0;
2146                         OPJ_INT32 y = cblk->y0 - band->y0;
2147                         if (band->bandno & 1) {
2148                             opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2149                             x += pres->x1 - pres->x0;
2150                         }
2151                         if (band->bandno & 2) {
2152                             opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2153                             y += pres->y1 - pres->y0;
2154                         }
2155
2156                         if (!opj_t1_allocate_buffers(
2157                                     t1,
2158                                     (OPJ_UINT32)(cblk->x1 - cblk->x0),
2159                                     (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
2160                             return OPJ_FALSE;
2161                         }
2162
2163                         cblk_w = t1->w;
2164                         cblk_h = t1->h;
2165                         tileLineAdvance = tile_w - cblk_w;
2166
2167                         tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
2168                         t1->data = tiledp;
2169                         t1->data_stride = tile_w;
2170                         if (tccp->qmfbid == 1) {
2171                             /* Do multiplication on unsigned type, even if the
2172                              * underlying type is signed, to avoid potential
2173                              * int overflow on large value (the output will be
2174                              * incorrect in such situation, but whatever...)
2175                              * This assumes complement-to-2 signed integer
2176                              * representation
2177                              * Fixes https://github.com/uclouvain/openjpeg/issues/1053
2178                              */
2179                             OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
2180                             for (j = 0; j < cblk_h; ++j) {
2181                                 for (i = 0; i < cblk_w; ++i) {
2182                                     tiledp_u[tileIndex] <<= T1_NMSEDEC_FRACBITS;
2183                                     tileIndex++;
2184                                 }
2185                                 tileIndex += tileLineAdvance;
2186                             }
2187                         } else {        /* if (tccp->qmfbid == 0) */
2188                             for (j = 0; j < cblk_h; ++j) {
2189                                 for (i = 0; i < cblk_w; ++i) {
2190                                     OPJ_INT32 tmp = tiledp[tileIndex];
2191                                     tiledp[tileIndex] =
2192                                         opj_int_fix_mul_t1(
2193                                             tmp,
2194                                             bandconst);
2195                                     tileIndex++;
2196                                 }
2197                                 tileIndex += tileLineAdvance;
2198                             }
2199                         }
2200
2201                         opj_t1_encode_cblk(
2202                             t1,
2203                             cblk,
2204                             band->bandno,
2205                             compno,
2206                             tilec->numresolutions - 1 - resno,
2207                             tccp->qmfbid,
2208                             band->stepsize,
2209                             tccp->cblksty,
2210                             tile->numcomps,
2211                             tile,
2212                             mct_norms,
2213                             mct_numcomps);
2214
2215                     } /* cblkno */
2216                 } /* precno */
2217             } /* bandno */
2218         } /* resno  */
2219     } /* compno  */
2220     return OPJ_TRUE;
2221 }
2222
2223 /* Returns whether the pass (bpno, passtype) is terminated */
2224 static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk,
2225                                    OPJ_UINT32 cblksty,
2226                                    OPJ_INT32 bpno,
2227                                    OPJ_UINT32 passtype)
2228 {
2229     /* Is it the last cleanup pass ? */
2230     if (passtype == 2 && bpno == 0) {
2231         return OPJ_TRUE;
2232     }
2233
2234     if (cblksty & J2K_CCP_CBLKSTY_TERMALL) {
2235         return OPJ_TRUE;
2236     }
2237
2238     if ((cblksty & J2K_CCP_CBLKSTY_LAZY)) {
2239         /* For bypass arithmetic bypass, terminate the 4th cleanup pass */
2240         if ((bpno == ((OPJ_INT32)cblk->numbps - 4)) && (passtype == 2)) {
2241             return OPJ_TRUE;
2242         }
2243         /* and beyond terminate all the magnitude refinement passes (in raw) */
2244         /* and cleanup passes (in MQC) */
2245         if ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype > 0)) {
2246             return OPJ_TRUE;
2247         }
2248     }
2249
2250     return OPJ_FALSE;
2251 }
2252
2253
2254 /** mod fixed_quality */
2255 static void opj_t1_encode_cblk(opj_t1_t *t1,
2256                                opj_tcd_cblk_enc_t* cblk,
2257                                OPJ_UINT32 orient,
2258                                OPJ_UINT32 compno,
2259                                OPJ_UINT32 level,
2260                                OPJ_UINT32 qmfbid,
2261                                OPJ_FLOAT64 stepsize,
2262                                OPJ_UINT32 cblksty,
2263                                OPJ_UINT32 numcomps,
2264                                opj_tcd_tile_t * tile,
2265                                const OPJ_FLOAT64 * mct_norms,
2266                                OPJ_UINT32 mct_numcomps)
2267 {
2268     OPJ_FLOAT64 cumwmsedec = 0.0;
2269
2270     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
2271
2272     OPJ_UINT32 passno;
2273     OPJ_INT32 bpno;
2274     OPJ_UINT32 passtype;
2275     OPJ_INT32 nmsedec = 0;
2276     OPJ_INT32 max;
2277     OPJ_UINT32 i, j;
2278     OPJ_BYTE type = T1_TYPE_MQ;
2279     OPJ_FLOAT64 tempwmsedec;
2280
2281 #ifdef EXTRA_DEBUG
2282     printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
2283            cblk->x0, cblk->y0, cblk->x1, cblk->y1, orient, compno, level);
2284 #endif
2285
2286     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
2287
2288     max = 0;
2289     for (i = 0; i < t1->w; ++i) {
2290         for (j = 0; j < t1->h; ++j) {
2291             OPJ_INT32 tmp = abs(t1->data[i + j * t1->data_stride]);
2292             max = opj_int_max(max, tmp);
2293         }
2294     }
2295
2296     cblk->numbps = max ? (OPJ_UINT32)((opj_int_floorlog2(max) + 1) -
2297                                       T1_NMSEDEC_FRACBITS) : 0;
2298     if (cblk->numbps == 0) {
2299         cblk->totalpasses = 0;
2300         return;
2301     }
2302
2303     bpno = (OPJ_INT32)(cblk->numbps - 1);
2304     passtype = 2;
2305
2306     opj_mqc_resetstates(mqc);
2307     opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2308     opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2309     opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2310     opj_mqc_init_enc(mqc, cblk->data);
2311
2312     for (passno = 0; bpno >= 0; ++passno) {
2313         opj_tcd_pass_t *pass = &cblk->passes[passno];
2314         type = ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype < 2) &&
2315                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2316
2317         /* If the previous pass was terminating, we need to reset the encoder */
2318         if (passno > 0 && cblk->passes[passno - 1].term) {
2319             if (type == T1_TYPE_RAW) {
2320                 opj_mqc_bypass_init_enc(mqc);
2321             } else {
2322                 opj_mqc_restart_init_enc(mqc);
2323             }
2324         }
2325
2326         switch (passtype) {
2327         case 0:
2328             opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty);
2329             break;
2330         case 1:
2331             opj_t1_enc_refpass(t1, bpno, &nmsedec, type);
2332             break;
2333         case 2:
2334             opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty);
2335             /* code switch SEGMARK (i.e. SEGSYM) */
2336             if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
2337                 opj_mqc_segmark_enc(mqc);
2338             }
2339             break;
2340         }
2341
2342         /* fixed_quality */
2343         tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid,
2344                                         stepsize, numcomps, mct_norms, mct_numcomps) ;
2345         cumwmsedec += tempwmsedec;
2346         tile->distotile += tempwmsedec;
2347         pass->distortiondec = cumwmsedec;
2348
2349         if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) {
2350             /* If it is a terminated pass, terminate it */
2351             if (type == T1_TYPE_RAW) {
2352                 opj_mqc_bypass_flush_enc(mqc, cblksty & J2K_CCP_CBLKSTY_PTERM);
2353             } else {
2354                 if (cblksty & J2K_CCP_CBLKSTY_PTERM) {
2355                     opj_mqc_erterm_enc(mqc);
2356                 } else {
2357                     opj_mqc_flush(mqc);
2358                 }
2359             }
2360             pass->term = 1;
2361             pass->rate = opj_mqc_numbytes(mqc);
2362         } else {
2363             /* Non terminated pass */
2364             OPJ_UINT32 rate_extra_bytes;
2365             if (type == T1_TYPE_RAW) {
2366                 rate_extra_bytes = opj_mqc_bypass_get_extra_bytes(
2367                                        mqc, (cblksty & J2K_CCP_CBLKSTY_PTERM));
2368             } else {
2369                 rate_extra_bytes = 3;
2370             }
2371             pass->term = 0;
2372             pass->rate = opj_mqc_numbytes(mqc) + rate_extra_bytes;
2373         }
2374
2375         if (++passtype == 3) {
2376             passtype = 0;
2377             bpno--;
2378         }
2379
2380         /* Code-switch "RESET" */
2381         if (cblksty & J2K_CCP_CBLKSTY_RESET) {
2382             opj_mqc_reset_enc(mqc);
2383         }
2384     }
2385
2386     cblk->totalpasses = passno;
2387
2388     if (cblk->totalpasses) {
2389         /* Make sure that pass rates are increasing */
2390         OPJ_UINT32 last_pass_rate = opj_mqc_numbytes(mqc);
2391         for (passno = cblk->totalpasses; passno > 0;) {
2392             opj_tcd_pass_t *pass = &cblk->passes[--passno];
2393             if (pass->rate > last_pass_rate) {
2394                 pass->rate = last_pass_rate;
2395             } else {
2396                 last_pass_rate = pass->rate;
2397             }
2398         }
2399     }
2400
2401     for (passno = 0; passno < cblk->totalpasses; passno++) {
2402         opj_tcd_pass_t *pass = &cblk->passes[passno];
2403
2404         /* Prevent generation of FF as last data byte of a pass*/
2405         /* For terminating passes, the flushing procedure ensured this already */
2406         assert(pass->rate > 0);
2407         if (cblk->data[pass->rate - 1] == 0xFF) {
2408             pass->rate--;
2409         }
2410         pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate);
2411     }
2412
2413 #ifdef EXTRA_DEBUG
2414     printf(" len=%d\n", (cblk->totalpasses) ? opj_mqc_numbytes(mqc) : 0);
2415
2416     /* Check that there not 0xff >=0x90 sequences */
2417     if (cblk->totalpasses) {
2418         OPJ_UINT32 i;
2419         OPJ_UINT32 len = opj_mqc_numbytes(mqc);
2420         for (i = 1; i < len; ++i) {
2421             if (cblk->data[i - 1] == 0xff && cblk->data[i] >= 0x90) {
2422                 printf("0xff %02x at offset %d\n", cblk->data[i], i - 1);
2423                 abort();
2424             }
2425         }
2426     }
2427 #endif
2428 }