Merge pull request #1010 from rouault/subtile_decoding_stage3
[openjpeg.git] / src / lib / openjp2 / t1.c
1 /*
2  * The copyright in this software is being made available under the 2-clauses
3  * BSD License, included below. This software may be subject to other third
4  * party and contributor rights, including patent rights, and no such rights
5  * are granted under this license.
6  *
7  * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8  * Copyright (c) 2002-2014, Professor Benoit Macq
9  * Copyright (c) 2001-2003, David Janssens
10  * Copyright (c) 2002-2003, Yannick Verschueren
11  * Copyright (c) 2003-2007, Francois-Olivier Devaux
12  * Copyright (c) 2003-2014, Antonin Descampe
13  * Copyright (c) 2005, Herve Drolon, FreeImage Team
14  * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
15  * Copyright (c) 2012, Carl Hetherington
16  * Copyright (c) 2017, IntoPIX SA <support@intopix.com>
17  * All rights reserved.
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions
21  * are met:
22  * 1. Redistributions of source code must retain the above copyright
23  *    notice, this list of conditions and the following disclaimer.
24  * 2. Redistributions in binary form must reproduce the above copyright
25  *    notice, this list of conditions and the following disclaimer in the
26  *    documentation and/or other materials provided with the distribution.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
29  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38  * POSSIBILITY OF SUCH DAMAGE.
39  */
40
41 #define OPJ_SKIP_POISON
42 #include "opj_includes.h"
43
44 #ifdef __SSE__
45 #include <xmmintrin.h>
46 #endif
47 #ifdef __SSE2__
48 #include <emmintrin.h>
49 #endif
50
51 #if defined(__GNUC__)
52 #pragma GCC poison malloc calloc realloc free
53 #endif
54
55 #include "t1_luts.h"
56
57 /** @defgroup T1 T1 - Implementation of the tier-1 coding */
58 /*@{*/
59
60 #define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)])
61
62 #define opj_t1_setcurctx(curctx, ctxno)  curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
63
64 /** @name Local static functions */
65 /*@{*/
66
67 static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f);
68 static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f);
69 static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos);
70 static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos);
71 static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
72                                        OPJ_UINT32 s, OPJ_UINT32 stride,
73                                        OPJ_UINT32 vsc);
74
75
76 /**
77 Decode significant pass
78 */
79
80 static INLINE void opj_t1_dec_sigpass_step_raw(
81     opj_t1_t *t1,
82     opj_flag_t *flagsp,
83     OPJ_INT32 *datap,
84     OPJ_INT32 oneplushalf,
85     OPJ_UINT32 vsc,
86     OPJ_UINT32 row);
87 static INLINE void opj_t1_dec_sigpass_step_mqc(
88     opj_t1_t *t1,
89     opj_flag_t *flagsp,
90     OPJ_INT32 *datap,
91     OPJ_INT32 oneplushalf,
92     OPJ_UINT32 row,
93     OPJ_UINT32 flags_stride,
94     OPJ_UINT32 vsc);
95
96 /**
97 Encode significant pass
98 */
99 static void opj_t1_enc_sigpass(opj_t1_t *t1,
100                                OPJ_INT32 bpno,
101                                OPJ_INT32 *nmsedec,
102                                OPJ_BYTE type,
103                                OPJ_UINT32 cblksty);
104
105 /**
106 Decode significant pass
107 */
108 static void opj_t1_dec_sigpass_raw(
109     opj_t1_t *t1,
110     OPJ_INT32 bpno,
111     OPJ_INT32 cblksty);
112
113 /**
114 Encode refinement pass
115 */
116 static void opj_t1_enc_refpass(opj_t1_t *t1,
117                                OPJ_INT32 bpno,
118                                OPJ_INT32 *nmsedec,
119                                OPJ_BYTE type);
120
121 /**
122 Decode refinement pass
123 */
124 static void opj_t1_dec_refpass_raw(
125     opj_t1_t *t1,
126     OPJ_INT32 bpno);
127
128
129 /**
130 Decode refinement pass
131 */
132
133 static INLINE void  opj_t1_dec_refpass_step_raw(
134     opj_t1_t *t1,
135     opj_flag_t *flagsp,
136     OPJ_INT32 *datap,
137     OPJ_INT32 poshalf,
138     OPJ_UINT32 row);
139 static INLINE void opj_t1_dec_refpass_step_mqc(
140     opj_t1_t *t1,
141     opj_flag_t *flagsp,
142     OPJ_INT32 *datap,
143     OPJ_INT32 poshalf,
144     OPJ_UINT32 row);
145
146
147 /**
148 Decode clean-up pass
149 */
150
151 static void opj_t1_dec_clnpass_step(
152     opj_t1_t *t1,
153     opj_flag_t *flagsp,
154     OPJ_INT32 *datap,
155     OPJ_INT32 oneplushalf,
156     OPJ_UINT32 row,
157     OPJ_UINT32 vsc);
158
159 /**
160 Encode clean-up pass
161 */
162 static void opj_t1_enc_clnpass(
163     opj_t1_t *t1,
164     OPJ_INT32 bpno,
165     OPJ_INT32 *nmsedec,
166     OPJ_UINT32 cblksty);
167
168 static OPJ_FLOAT64 opj_t1_getwmsedec(
169     OPJ_INT32 nmsedec,
170     OPJ_UINT32 compno,
171     OPJ_UINT32 level,
172     OPJ_UINT32 orient,
173     OPJ_INT32 bpno,
174     OPJ_UINT32 qmfbid,
175     OPJ_FLOAT64 stepsize,
176     OPJ_UINT32 numcomps,
177     const OPJ_FLOAT64 * mct_norms,
178     OPJ_UINT32 mct_numcomps);
179
180 static void opj_t1_encode_cblk(opj_t1_t *t1,
181                                opj_tcd_cblk_enc_t* cblk,
182                                OPJ_UINT32 orient,
183                                OPJ_UINT32 compno,
184                                OPJ_UINT32 level,
185                                OPJ_UINT32 qmfbid,
186                                OPJ_FLOAT64 stepsize,
187                                OPJ_UINT32 cblksty,
188                                OPJ_UINT32 numcomps,
189                                opj_tcd_tile_t * tile,
190                                const OPJ_FLOAT64 * mct_norms,
191                                OPJ_UINT32 mct_numcomps);
192
193 /**
194 Decode 1 code-block
195 @param t1 T1 handle
196 @param cblk Code-block coding parameters
197 @param orient
198 @param roishift Region of interest shifting value
199 @param cblksty Code-block style
200 @param p_manager the event manager
201 @param p_manager_mutex mutex for the event manager
202 @param check_pterm whether PTERM correct termination should be checked
203 */
204 static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
205                                    opj_tcd_cblk_dec_t* cblk,
206                                    OPJ_UINT32 orient,
207                                    OPJ_UINT32 roishift,
208                                    OPJ_UINT32 cblksty,
209                                    opj_event_mgr_t *p_manager,
210                                    opj_mutex_t* p_manager_mutex,
211                                    OPJ_BOOL check_pterm);
212
213 static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
214                                         OPJ_UINT32 w,
215                                         OPJ_UINT32 h);
216
217 /*@}*/
218
219 /*@}*/
220
221 /* ----------------------------------------------------------------------- */
222
223 static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f)
224 {
225     return mqc->lut_ctxno_zc_orient[(f & T1_SIGMA_NEIGHBOURS)];
226 }
227
228 static INLINE OPJ_UINT32 opj_t1_getctxtno_sc_or_spb_index(OPJ_UINT32 fX,
229         OPJ_UINT32 pfX,
230         OPJ_UINT32 nfX,
231         OPJ_UINT32 ci)
232 {
233     /*
234       0 pfX T1_CHI_THIS           T1_LUT_SGN_W
235       1 tfX T1_SIGMA_1            T1_LUT_SIG_N
236       2 nfX T1_CHI_THIS           T1_LUT_SGN_E
237       3 tfX T1_SIGMA_3            T1_LUT_SIG_W
238       4  fX T1_CHI_(THIS - 1)     T1_LUT_SGN_N
239       5 tfX T1_SIGMA_5            T1_LUT_SIG_E
240       6  fX T1_CHI_(THIS + 1)     T1_LUT_SGN_S
241       7 tfX T1_SIGMA_7            T1_LUT_SIG_S
242     */
243
244     OPJ_UINT32 lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 |
245                                          T1_SIGMA_7);
246
247     lu |= (pfX >> (T1_CHI_THIS_I      + (ci * 3U))) & (1U << 0);
248     lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2);
249     if (ci == 0U) {
250         lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4);
251     } else {
252         lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4);
253     }
254     lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6);
255     return lu;
256 }
257
258 static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 lu)
259 {
260     return lut_ctxno_sc[lu];
261 }
262
263 static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f)
264 {
265     OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
266     OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp;
267     return tmp2;
268 }
269
270 static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 lu)
271 {
272     return lut_spb[lu];
273 }
274
275 static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos)
276 {
277     if (bitpos > 0) {
278         return lut_nmsedec_sig[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
279     }
280
281     return lut_nmsedec_sig0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
282 }
283
284 static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos)
285 {
286     if (bitpos > 0) {
287         return lut_nmsedec_ref[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
288     }
289
290     return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
291 }
292
293 #define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride, vsc) \
294 { \
295     /* east */ \
296     flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \
297  \
298     /* mark target as significant */ \
299     flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \
300  \
301     /* west */ \
302     flagsp[1] |= T1_SIGMA_3 << (3U * ci); \
303  \
304     /* north-west, north, north-east */ \
305     if (ci == 0U && !(vsc)) { \
306         opj_flag_t* north = flagsp - (stride); \
307         *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \
308         north[-1] |= T1_SIGMA_17; \
309         north[1] |= T1_SIGMA_15; \
310     } \
311  \
312     /* south-west, south, south-east */ \
313     if (ci == 3U) { \
314         opj_flag_t* south = flagsp + (stride); \
315         *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \
316         south[-1] |= T1_SIGMA_2; \
317         south[1] |= T1_SIGMA_0; \
318     } \
319 }
320
321
322 static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
323                                        OPJ_UINT32 s, OPJ_UINT32 stride,
324                                        OPJ_UINT32 vsc)
325 {
326     opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride, vsc);
327 }
328
329 /**
330 Encode significant pass
331 */
332 static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1,
333         opj_flag_t *flagsp,
334         OPJ_INT32 *datap,
335         OPJ_INT32 bpno,
336         OPJ_INT32 one,
337         OPJ_INT32 *nmsedec,
338         OPJ_BYTE type,
339         OPJ_UINT32 ci,
340         OPJ_UINT32 vsc)
341 {
342     OPJ_UINT32 v;
343
344     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
345
346     OPJ_UINT32 const flags = *flagsp;
347
348     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
349             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
350         OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
351         v = opj_int_abs(*datap) & one ? 1 : 0;
352 #ifdef DEBUG_ENC_SIG
353         fprintf(stderr, "   ctxt1=%d\n", ctxt1);
354 #endif
355         opj_mqc_setcurctx(mqc, ctxt1);
356         if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
357             opj_mqc_bypass_enc(mqc, v);
358         } else {
359             opj_mqc_encode(mqc, v);
360         }
361         if (v) {
362             OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index(
363                                 *flagsp,
364                                 flagsp[-1], flagsp[1],
365                                 ci);
366             OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu);
367             v = *datap < 0 ? 1U : 0U;
368             *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
369                                               (OPJ_UINT32)bpno);
370 #ifdef DEBUG_ENC_SIG
371             fprintf(stderr, "   ctxt2=%d\n", ctxt2);
372 #endif
373             opj_mqc_setcurctx(mqc, ctxt2);
374             if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
375                 opj_mqc_bypass_enc(mqc, v);
376             } else {
377                 OPJ_UINT32 spb = opj_t1_getspb(lu);
378 #ifdef DEBUG_ENC_SIG
379                 fprintf(stderr, "   spb=%d\n", spb);
380 #endif
381                 opj_mqc_encode(mqc, v ^ spb);
382             }
383             opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
384         }
385         *flagsp |= T1_PI_THIS << (ci * 3U);
386     }
387 }
388
389 static INLINE void opj_t1_dec_sigpass_step_raw(
390     opj_t1_t *t1,
391     opj_flag_t *flagsp,
392     OPJ_INT32 *datap,
393     OPJ_INT32 oneplushalf,
394     OPJ_UINT32 vsc,
395     OPJ_UINT32 ci)
396 {
397     OPJ_UINT32 v;
398     opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
399
400     OPJ_UINT32 const flags = *flagsp;
401
402     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
403             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
404         if (opj_mqc_raw_decode(mqc)) {
405             v = opj_mqc_raw_decode(mqc);
406             *datap = v ? -oneplushalf : oneplushalf;
407             opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
408         }
409         *flagsp |= T1_PI_THIS << (ci * 3U);
410     }
411 }
412
413 #define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \
414                                           data_stride, ci, mqc, curctx, \
415                                           v, a, c, ct, oneplushalf, vsc) \
416 { \
417     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
418         (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
419         OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
420         opj_t1_setcurctx(curctx, ctxt1); \
421         opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
422         if (v) { \
423             OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
424                                 flags, \
425                                 flagsp[-1], flagsp[1], \
426                                 ci); \
427             OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
428             OPJ_UINT32 spb = opj_t1_getspb(lu); \
429             opj_t1_setcurctx(curctx, ctxt2); \
430             opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
431             v = v ^ spb; \
432             data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
433             opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
434         } \
435         flags |= T1_PI_THIS << (ci * 3U); \
436     } \
437 }
438
439 static INLINE void opj_t1_dec_sigpass_step_mqc(
440     opj_t1_t *t1,
441     opj_flag_t *flagsp,
442     OPJ_INT32 *datap,
443     OPJ_INT32 oneplushalf,
444     OPJ_UINT32 ci,
445     OPJ_UINT32 flags_stride,
446     OPJ_UINT32 vsc)
447 {
448     OPJ_UINT32 v;
449
450     opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
451     opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap,
452                                       0, ci, mqc, mqc->curctx,
453                                       v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
454 }
455
456 static void opj_t1_enc_sigpass(opj_t1_t *t1,
457                                OPJ_INT32 bpno,
458                                OPJ_INT32 *nmsedec,
459                                OPJ_BYTE type,
460                                OPJ_UINT32 cblksty
461                               )
462 {
463     OPJ_UINT32 i, k;
464     OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
465     opj_flag_t* f = &T1_FLAGS(0, 0);
466     OPJ_UINT32 const extra = 2;
467
468     *nmsedec = 0;
469 #ifdef DEBUG_ENC_SIG
470     fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
471 #endif
472     for (k = 0; k < (t1->h & ~3U); k += 4) {
473 #ifdef DEBUG_ENC_SIG
474         fprintf(stderr, " k=%d\n", k);
475 #endif
476         for (i = 0; i < t1->w; ++i) {
477 #ifdef DEBUG_ENC_SIG
478             fprintf(stderr, " i=%d\n", i);
479 #endif
480             if (*f == 0U) {
481                 /* Nothing to do for any of the 4 data points */
482                 f++;
483                 continue;
484             }
485             opj_t1_enc_sigpass_step(
486                 t1,
487                 f,
488                 &t1->data[((k + 0) * t1->data_stride) + i],
489                 bpno,
490                 one,
491                 nmsedec,
492                 type,
493                 0, cblksty & J2K_CCP_CBLKSTY_VSC);
494             opj_t1_enc_sigpass_step(
495                 t1,
496                 f,
497                 &t1->data[((k + 1) * t1->data_stride) + i],
498                 bpno,
499                 one,
500                 nmsedec,
501                 type,
502                 1, 0);
503             opj_t1_enc_sigpass_step(
504                 t1,
505                 f,
506                 &t1->data[((k + 2) * t1->data_stride) + i],
507                 bpno,
508                 one,
509                 nmsedec,
510                 type,
511                 2, 0);
512             opj_t1_enc_sigpass_step(
513                 t1,
514                 f,
515                 &t1->data[((k + 3) * t1->data_stride) + i],
516                 bpno,
517                 one,
518                 nmsedec,
519                 type,
520                 3, 0);
521             ++f;
522         }
523         f += extra;
524     }
525
526     if (k < t1->h) {
527         OPJ_UINT32 j;
528 #ifdef DEBUG_ENC_SIG
529         fprintf(stderr, " k=%d\n", k);
530 #endif
531         for (i = 0; i < t1->w; ++i) {
532 #ifdef DEBUG_ENC_SIG
533             fprintf(stderr, " i=%d\n", i);
534 #endif
535             if (*f == 0U) {
536                 /* Nothing to do for any of the 4 data points */
537                 f++;
538                 continue;
539             }
540             for (j = k; j < t1->h; ++j) {
541                 opj_t1_enc_sigpass_step(
542                     t1,
543                     f,
544                     &t1->data[(j * t1->data_stride) + i],
545                     bpno,
546                     one,
547                     nmsedec,
548                     type,
549                     j - k,
550                     (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
551             }
552             ++f;
553         }
554     }
555 }
556
557 static void opj_t1_dec_sigpass_raw(
558     opj_t1_t *t1,
559     OPJ_INT32 bpno,
560     OPJ_INT32 cblksty)
561 {
562     OPJ_INT32 one, half, oneplushalf;
563     OPJ_UINT32 i, j, k;
564     OPJ_INT32 *data = t1->data;
565     opj_flag_t *flagsp = &T1_FLAGS(0, 0);
566     const OPJ_UINT32 l_w = t1->w;
567     one = 1 << bpno;
568     half = one >> 1;
569     oneplushalf = one | half;
570
571     for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
572         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
573             opj_flag_t flags = *flagsp;
574             if (flags != 0) {
575                 opj_t1_dec_sigpass_step_raw(
576                     t1,
577                     flagsp,
578                     data,
579                     oneplushalf,
580                     cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
581                     0U);
582                 opj_t1_dec_sigpass_step_raw(
583                     t1,
584                     flagsp,
585                     data + l_w,
586                     oneplushalf,
587                     OPJ_FALSE, /* vsc */
588                     1U);
589                 opj_t1_dec_sigpass_step_raw(
590                     t1,
591                     flagsp,
592                     data + 2 * l_w,
593                     oneplushalf,
594                     OPJ_FALSE, /* vsc */
595                     2U);
596                 opj_t1_dec_sigpass_step_raw(
597                     t1,
598                     flagsp,
599                     data + 3 * l_w,
600                     oneplushalf,
601                     OPJ_FALSE, /* vsc */
602                     3U);
603             }
604         }
605     }
606     if (k < t1->h) {
607         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
608             for (j = 0; j < t1->h - k; ++j) {
609                 opj_t1_dec_sigpass_step_raw(
610                     t1,
611                     flagsp,
612                     data + j * l_w,
613                     oneplushalf,
614                     cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
615                     j);
616             }
617         }
618     }
619 }
620
621 #define opj_t1_dec_sigpass_mqc_internal(t1, bpno, vsc, w, h, flags_stride) \
622 { \
623         OPJ_INT32 one, half, oneplushalf; \
624         OPJ_UINT32 i, j, k; \
625         register OPJ_INT32 *data = t1->data; \
626         register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
627         const OPJ_UINT32 l_w = w; \
628         opj_mqc_t* mqc = &(t1->mqc); \
629         DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
630         register OPJ_UINT32 v; \
631         one = 1 << bpno; \
632         half = one >> 1; \
633         oneplushalf = one | half; \
634         for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
635                 for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
636                         opj_flag_t flags = *flagsp; \
637                         if( flags != 0 ) { \
638                             opj_t1_dec_sigpass_step_mqc_macro( \
639                                 flags, flagsp, flags_stride, data, \
640                                 l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf, vsc); \
641                             opj_t1_dec_sigpass_step_mqc_macro( \
642                                 flags, flagsp, flags_stride, data, \
643                                 l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
644                             opj_t1_dec_sigpass_step_mqc_macro( \
645                                 flags, flagsp, flags_stride, data, \
646                                 l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
647                             opj_t1_dec_sigpass_step_mqc_macro( \
648                                 flags, flagsp, flags_stride, data, \
649                                 l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
650                             *flagsp = flags; \
651                         } \
652                 } \
653         } \
654         UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
655         if( k < h ) { \
656             for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
657                 for (j = 0; j < h - k; ++j) { \
658                         opj_t1_dec_sigpass_step_mqc(t1, flagsp, \
659                             data + j * l_w, oneplushalf, j, flags_stride, vsc); \
660                 } \
661             } \
662         } \
663 }
664
665 static void opj_t1_dec_sigpass_mqc_64x64_novsc(
666     opj_t1_t *t1,
667     OPJ_INT32 bpno)
668 {
669     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
670 }
671
672 static void opj_t1_dec_sigpass_mqc_64x64_vsc(
673     opj_t1_t *t1,
674     OPJ_INT32 bpno)
675 {
676     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
677 }
678
679 static void opj_t1_dec_sigpass_mqc_generic_novsc(
680     opj_t1_t *t1,
681     OPJ_INT32 bpno)
682 {
683     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
684                                     t1->w + 2U);
685 }
686
687 static void opj_t1_dec_sigpass_mqc_generic_vsc(
688     opj_t1_t *t1,
689     OPJ_INT32 bpno)
690 {
691     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
692                                     t1->w + 2U);
693 }
694
695 static void opj_t1_dec_sigpass_mqc(
696     opj_t1_t *t1,
697     OPJ_INT32 bpno,
698     OPJ_INT32 cblksty)
699 {
700     if (t1->w == 64 && t1->h == 64) {
701         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
702             opj_t1_dec_sigpass_mqc_64x64_vsc(t1, bpno);
703         } else {
704             opj_t1_dec_sigpass_mqc_64x64_novsc(t1, bpno);
705         }
706     } else {
707         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
708             opj_t1_dec_sigpass_mqc_generic_vsc(t1, bpno);
709         } else {
710             opj_t1_dec_sigpass_mqc_generic_novsc(t1, bpno);
711         }
712     }
713 }
714
715 /**
716 Encode refinement pass step
717 */
718 static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1,
719         opj_flag_t *flagsp,
720         OPJ_INT32 *datap,
721         OPJ_INT32 bpno,
722         OPJ_INT32 one,
723         OPJ_INT32 *nmsedec,
724         OPJ_BYTE type,
725         OPJ_UINT32 ci)
726 {
727     OPJ_UINT32 v;
728
729     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
730
731     OPJ_UINT32 const shift_flags =
732         (*flagsp >> (ci * 3U));
733
734     if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) {
735         OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags);
736         *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap),
737                                           (OPJ_UINT32)bpno);
738         v = opj_int_abs(*datap) & one ? 1 : 0;
739 #ifdef DEBUG_ENC_REF
740         fprintf(stderr, "  ctxt=%d\n", ctxt);
741 #endif
742         opj_mqc_setcurctx(mqc, ctxt);
743         if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
744             opj_mqc_bypass_enc(mqc, v);
745         } else {
746             opj_mqc_encode(mqc, v);
747         }
748         *flagsp |= T1_MU_THIS << (ci * 3U);
749     }
750 }
751
752
753 static INLINE void opj_t1_dec_refpass_step_raw(
754     opj_t1_t *t1,
755     opj_flag_t *flagsp,
756     OPJ_INT32 *datap,
757     OPJ_INT32 poshalf,
758     OPJ_UINT32 ci)
759 {
760     OPJ_UINT32 v;
761
762     opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
763
764     if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
765             (T1_SIGMA_THIS << (ci * 3U))) {
766         v = opj_mqc_raw_decode(mqc);
767         *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
768         *flagsp |= T1_MU_THIS << (ci * 3U);
769     }
770 }
771
772 #define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \
773                                           mqc, curctx, v, a, c, ct, poshalf) \
774 { \
775     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \
776             (T1_SIGMA_THIS << (ci * 3U))) { \
777         OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \
778         opj_t1_setcurctx(curctx, ctxt); \
779         opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
780         data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \
781         flags |= T1_MU_THIS << (ci * 3U); \
782     } \
783 }
784
785 static INLINE void opj_t1_dec_refpass_step_mqc(
786     opj_t1_t *t1,
787     opj_flag_t *flagsp,
788     OPJ_INT32 *datap,
789     OPJ_INT32 poshalf,
790     OPJ_UINT32 ci)
791 {
792     OPJ_UINT32 v;
793
794     opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
795     opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci,
796                                       mqc, mqc->curctx, v, mqc->a, mqc->c,
797                                       mqc->ct, poshalf);
798 }
799
800 static void opj_t1_enc_refpass(
801     opj_t1_t *t1,
802     OPJ_INT32 bpno,
803     OPJ_INT32 *nmsedec,
804     OPJ_BYTE type)
805 {
806     OPJ_UINT32 i, k;
807     const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
808     opj_flag_t* f = &T1_FLAGS(0, 0);
809     const OPJ_UINT32 extra = 2U;
810
811     *nmsedec = 0;
812 #ifdef DEBUG_ENC_REF
813     fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
814 #endif
815     for (k = 0; k < (t1->h & ~3U); k += 4) {
816 #ifdef DEBUG_ENC_REF
817         fprintf(stderr, " k=%d\n", k);
818 #endif
819         for (i = 0; i < t1->w; ++i) {
820 #ifdef DEBUG_ENC_REF
821             fprintf(stderr, " i=%d\n", i);
822 #endif
823             if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
824                 /* none significant */
825                 f++;
826                 continue;
827             }
828             if ((*f & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
829                     (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
830                 /* all processed by sigpass */
831                 f++;
832                 continue;
833             }
834
835             opj_t1_enc_refpass_step(
836                 t1,
837                 f,
838                 &t1->data[((k + 0) * t1->data_stride) + i],
839                 bpno,
840                 one,
841                 nmsedec,
842                 type,
843                 0);
844             opj_t1_enc_refpass_step(
845                 t1,
846                 f,
847                 &t1->data[((k + 1) * t1->data_stride) + i],
848                 bpno,
849                 one,
850                 nmsedec,
851                 type,
852                 1);
853             opj_t1_enc_refpass_step(
854                 t1,
855                 f,
856                 &t1->data[((k + 2) * t1->data_stride) + i],
857                 bpno,
858                 one,
859                 nmsedec,
860                 type,
861                 2);
862             opj_t1_enc_refpass_step(
863                 t1,
864                 f,
865                 &t1->data[((k + 3) * t1->data_stride) + i],
866                 bpno,
867                 one,
868                 nmsedec,
869                 type,
870                 3);
871             ++f;
872         }
873         f += extra;
874     }
875
876     if (k < t1->h) {
877         OPJ_UINT32 j;
878 #ifdef DEBUG_ENC_REF
879         fprintf(stderr, " k=%d\n", k);
880 #endif
881         for (i = 0; i < t1->w; ++i) {
882 #ifdef DEBUG_ENC_REF
883             fprintf(stderr, " i=%d\n", i);
884 #endif
885             if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
886                 /* none significant */
887                 f++;
888                 continue;
889             }
890             for (j = k; j < t1->h; ++j) {
891                 opj_t1_enc_refpass_step(
892                     t1,
893                     f,
894                     &t1->data[(j * t1->data_stride) + i],
895                     bpno,
896                     one,
897                     nmsedec,
898                     type,
899                     j - k);
900             }
901             ++f;
902         }
903     }
904 }
905
906
907 static void opj_t1_dec_refpass_raw(
908     opj_t1_t *t1,
909     OPJ_INT32 bpno)
910 {
911     OPJ_INT32 one, poshalf;
912     OPJ_UINT32 i, j, k;
913     OPJ_INT32 *data = t1->data;
914     opj_flag_t *flagsp = &T1_FLAGS(0, 0);
915     const OPJ_UINT32 l_w = t1->w;
916     one = 1 << bpno;
917     poshalf = one >> 1;
918     for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
919         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
920             opj_flag_t flags = *flagsp;
921             if (flags != 0) {
922                 opj_t1_dec_refpass_step_raw(
923                     t1,
924                     flagsp,
925                     data,
926                     poshalf,
927                     0U);
928                 opj_t1_dec_refpass_step_raw(
929                     t1,
930                     flagsp,
931                     data + l_w,
932                     poshalf,
933                     1U);
934                 opj_t1_dec_refpass_step_raw(
935                     t1,
936                     flagsp,
937                     data + 2 * l_w,
938                     poshalf,
939                     2U);
940                 opj_t1_dec_refpass_step_raw(
941                     t1,
942                     flagsp,
943                     data + 3 * l_w,
944                     poshalf,
945                     3U);
946             }
947         }
948     }
949     if (k < t1->h) {
950         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
951             for (j = 0; j < t1->h - k; ++j) {
952                 opj_t1_dec_refpass_step_raw(
953                     t1,
954                     flagsp,
955                     data + j * l_w,
956                     poshalf,
957                     j);
958             }
959         }
960     }
961 }
962
963 #define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \
964 { \
965         OPJ_INT32 one, poshalf; \
966         OPJ_UINT32 i, j, k; \
967         register OPJ_INT32 *data = t1->data; \
968         register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
969         const OPJ_UINT32 l_w = w; \
970         opj_mqc_t* mqc = &(t1->mqc); \
971         DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
972         register OPJ_UINT32 v; \
973         one = 1 << bpno; \
974         poshalf = one >> 1; \
975         for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
976                 for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
977                         opj_flag_t flags = *flagsp; \
978                         if( flags != 0 ) { \
979                             opj_t1_dec_refpass_step_mqc_macro( \
980                                 flags, data, l_w, 0, \
981                                 mqc, curctx, v, a, c, ct, poshalf); \
982                             opj_t1_dec_refpass_step_mqc_macro( \
983                                 flags, data, l_w, 1, \
984                                 mqc, curctx, v, a, c, ct, poshalf); \
985                             opj_t1_dec_refpass_step_mqc_macro( \
986                                 flags, data, l_w, 2, \
987                                 mqc, curctx, v, a, c, ct, poshalf); \
988                             opj_t1_dec_refpass_step_mqc_macro( \
989                                 flags, data, l_w, 3, \
990                                 mqc, curctx, v, a, c, ct, poshalf); \
991                             *flagsp = flags; \
992                         } \
993                 } \
994         } \
995         UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
996         if( k < h ) { \
997             for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
998                 for (j = 0; j < h - k; ++j) { \
999                         opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \
1000                 } \
1001             } \
1002         } \
1003 }
1004
1005 static void opj_t1_dec_refpass_mqc_64x64(
1006     opj_t1_t *t1,
1007     OPJ_INT32 bpno)
1008 {
1009     opj_t1_dec_refpass_mqc_internal(t1, bpno, 64, 64, 66);
1010 }
1011
1012 static void opj_t1_dec_refpass_mqc_generic(
1013     opj_t1_t *t1,
1014     OPJ_INT32 bpno)
1015 {
1016     opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U);
1017 }
1018
1019 static void opj_t1_dec_refpass_mqc(
1020     opj_t1_t *t1,
1021     OPJ_INT32 bpno)
1022 {
1023     if (t1->w == 64 && t1->h == 64) {
1024         opj_t1_dec_refpass_mqc_64x64(t1, bpno);
1025     } else {
1026         opj_t1_dec_refpass_mqc_generic(t1, bpno);
1027     }
1028 }
1029
1030 /**
1031 Encode clean-up pass step
1032 */
1033 static void opj_t1_enc_clnpass_step(
1034     opj_t1_t *t1,
1035     opj_flag_t *flagsp,
1036     OPJ_INT32 *datap,
1037     OPJ_INT32 bpno,
1038     OPJ_INT32 one,
1039     OPJ_INT32 *nmsedec,
1040     OPJ_UINT32 agg,
1041     OPJ_UINT32 runlen,
1042     OPJ_UINT32 lim,
1043     OPJ_UINT32 cblksty)
1044 {
1045     OPJ_UINT32 v;
1046     OPJ_UINT32 ci;
1047     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1048
1049     const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 |
1050                               T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1051
1052     if ((*flagsp & check) == check) {
1053         if (runlen == 0) {
1054             *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1055         } else if (runlen == 1) {
1056             *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3);
1057         } else if (runlen == 2) {
1058             *flagsp &= ~(T1_PI_2 | T1_PI_3);
1059         } else if (runlen == 3) {
1060             *flagsp &= ~(T1_PI_3);
1061         }
1062         return;
1063     }
1064
1065     for (ci = runlen; ci < lim; ++ci) {
1066         OPJ_UINT32 vsc;
1067         opj_flag_t flags;
1068
1069         flags = *flagsp;
1070
1071         if ((agg != 0) && (ci == runlen)) {
1072             goto LABEL_PARTIAL;
1073         }
1074
1075         if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {
1076             OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
1077 #ifdef DEBUG_ENC_CLN
1078             printf("   ctxt1=%d\n", ctxt1);
1079 #endif
1080             opj_mqc_setcurctx(mqc, ctxt1);
1081             v = opj_int_abs(*datap) & one ? 1 : 0;
1082             opj_mqc_encode(mqc, v);
1083             if (v) {
1084                 OPJ_UINT32 ctxt2, spb;
1085                 OPJ_UINT32 lu;
1086 LABEL_PARTIAL:
1087                 lu = opj_t1_getctxtno_sc_or_spb_index(
1088                          *flagsp,
1089                          flagsp[-1], flagsp[1],
1090                          ci);
1091                 *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
1092                                                   (OPJ_UINT32)bpno);
1093                 ctxt2 = opj_t1_getctxno_sc(lu);
1094 #ifdef DEBUG_ENC_CLN
1095                 printf("   ctxt2=%d\n", ctxt2);
1096 #endif
1097                 opj_mqc_setcurctx(mqc, ctxt2);
1098
1099                 v = *datap < 0 ? 1U : 0U;
1100                 spb = opj_t1_getspb(lu);
1101 #ifdef DEBUG_ENC_CLN
1102                 printf("   spb=%d\n", spb);
1103 #endif
1104                 opj_mqc_encode(mqc, v ^ spb);
1105                 vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0;
1106                 opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc);
1107             }
1108         }
1109         *flagsp &= ~(T1_PI_THIS << (3U * ci));
1110         datap += t1->data_stride;
1111     }
1112 }
1113
1114 #define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
1115                                       flags, flagsp, flags_stride, data, \
1116                                       data_stride, ci, mqc, curctx, \
1117                                       v, a, c, ct, oneplushalf, vsc) \
1118 { \
1119     if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\
1120         do { \
1121             if( !partial ) { \
1122                 OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
1123                 opj_t1_setcurctx(curctx, ctxt1); \
1124                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1125                 if( !v ) \
1126                     break; \
1127             } \
1128             { \
1129                 OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1130                                     flags, flagsp[-1], flagsp[1], \
1131                                     ci); \
1132                 opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \
1133                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1134                 v = v ^ opj_t1_getspb(lu); \
1135                 data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
1136                 opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
1137             } \
1138         } while(0); \
1139     } \
1140 }
1141
1142 static void opj_t1_dec_clnpass_step(
1143     opj_t1_t *t1,
1144     opj_flag_t *flagsp,
1145     OPJ_INT32 *datap,
1146     OPJ_INT32 oneplushalf,
1147     OPJ_UINT32 ci,
1148     OPJ_UINT32 vsc)
1149 {
1150     OPJ_UINT32 v;
1151
1152     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1153     opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE,
1154                                   *flagsp, flagsp, t1->w + 2U, datap,
1155                                   0, ci, mqc, mqc->curctx,
1156                                   v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
1157 }
1158
1159 static void opj_t1_enc_clnpass(
1160     opj_t1_t *t1,
1161     OPJ_INT32 bpno,
1162     OPJ_INT32 *nmsedec,
1163     OPJ_UINT32 cblksty)
1164 {
1165     OPJ_UINT32 i, k;
1166     const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
1167     OPJ_UINT32 agg, runlen;
1168
1169     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1170
1171     *nmsedec = 0;
1172 #ifdef DEBUG_ENC_CLN
1173     printf("enc_clnpass: bpno=%d\n", bpno);
1174 #endif
1175     for (k = 0; k < (t1->h & ~3U); k += 4) {
1176 #ifdef DEBUG_ENC_CLN
1177         printf(" k=%d\n", k);
1178 #endif
1179         for (i = 0; i < t1->w; ++i) {
1180 #ifdef DEBUG_ENC_CLN
1181             printf("  i=%d\n", i);
1182 #endif
1183             agg = !(T1_FLAGS(i, k));
1184 #ifdef DEBUG_ENC_CLN
1185             printf("   agg=%d\n", agg);
1186 #endif
1187             if (agg) {
1188                 for (runlen = 0; runlen < 4; ++runlen) {
1189                     if (opj_int_abs(t1->data[((k + runlen)*t1->data_stride) + i]) & one) {
1190                         break;
1191                     }
1192                 }
1193                 opj_mqc_setcurctx(mqc, T1_CTXNO_AGG);
1194                 opj_mqc_encode(mqc, runlen != 4);
1195                 if (runlen == 4) {
1196                     continue;
1197                 }
1198                 opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
1199                 opj_mqc_encode(mqc, runlen >> 1);
1200                 opj_mqc_encode(mqc, runlen & 1);
1201             } else {
1202                 runlen = 0;
1203             }
1204             opj_t1_enc_clnpass_step(
1205                 t1,
1206                 &T1_FLAGS(i, k),
1207                 &t1->data[((k + runlen) * t1->data_stride) + i],
1208                 bpno,
1209                 one,
1210                 nmsedec,
1211                 agg,
1212                 runlen,
1213                 4U,
1214                 cblksty);
1215         }
1216     }
1217     if (k < t1->h) {
1218         agg = 0;
1219         runlen = 0;
1220 #ifdef DEBUG_ENC_CLN
1221         printf(" k=%d\n", k);
1222 #endif
1223         for (i = 0; i < t1->w; ++i) {
1224 #ifdef DEBUG_ENC_CLN
1225             printf("  i=%d\n", i);
1226             printf("   agg=%d\n", agg);
1227 #endif
1228             opj_t1_enc_clnpass_step(
1229                 t1,
1230                 &T1_FLAGS(i, k),
1231                 &t1->data[((k + runlen) * t1->data_stride) + i],
1232                 bpno,
1233                 one,
1234                 nmsedec,
1235                 agg,
1236                 runlen,
1237                 t1->h - k,
1238                 cblksty);
1239         }
1240     }
1241 }
1242
1243 #define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
1244 { \
1245     OPJ_INT32 one, half, oneplushalf; \
1246     OPJ_UINT32 runlen; \
1247     OPJ_UINT32 i, j, k; \
1248     const OPJ_UINT32 l_w = w; \
1249     opj_mqc_t* mqc = &(t1->mqc); \
1250     register OPJ_INT32 *data = t1->data; \
1251     register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
1252     DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
1253     register OPJ_UINT32 v; \
1254     one = 1 << bpno; \
1255     half = one >> 1; \
1256     oneplushalf = one | half; \
1257     for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
1258         for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
1259             opj_flag_t flags = *flagsp; \
1260             if (flags == 0) { \
1261                 OPJ_UINT32 partial = OPJ_TRUE; \
1262                 opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \
1263                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1264                 if (!v) { \
1265                     continue; \
1266                 } \
1267                 opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \
1268                 opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \
1269                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1270                 runlen = (runlen << 1) | v; \
1271                 switch(runlen) { \
1272                     case 0: \
1273                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\
1274                                             flags, flagsp, flags_stride, data, \
1275                                             l_w, 0, mqc, curctx, \
1276                                             v, a, c, ct, oneplushalf, vsc); \
1277                         partial = OPJ_FALSE; \
1278                         /* FALLTHRU */ \
1279                     case 1: \
1280                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1281                                             flags, flagsp, flags_stride, data, \
1282                                             l_w, 1, mqc, curctx, \
1283                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1284                         partial = OPJ_FALSE; \
1285                         /* FALLTHRU */ \
1286                     case 2: \
1287                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1288                                             flags, flagsp, flags_stride, data, \
1289                                             l_w, 2, mqc, curctx, \
1290                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1291                         partial = OPJ_FALSE; \
1292                         /* FALLTHRU */ \
1293                     case 3: \
1294                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1295                                             flags, flagsp, flags_stride, data, \
1296                                             l_w, 3, mqc, curctx, \
1297                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1298                         break; \
1299                 } \
1300             } else { \
1301                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1302                                     flags, flagsp, flags_stride, data, \
1303                                     l_w, 0, mqc, curctx, \
1304                                     v, a, c, ct, oneplushalf, vsc); \
1305                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1306                                     flags, flagsp, flags_stride, data, \
1307                                     l_w, 1, mqc, curctx, \
1308                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1309                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1310                                     flags, flagsp, flags_stride, data, \
1311                                     l_w, 2, mqc, curctx, \
1312                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1313                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1314                                     flags, flagsp, flags_stride, data, \
1315                                     l_w, 3, mqc, curctx, \
1316                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1317             } \
1318             *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1319         } \
1320     } \
1321     UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
1322     if( k < h ) { \
1323         for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
1324             for (j = 0; j < h - k; ++j) { \
1325                 opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j, vsc); \
1326             } \
1327             *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1328         } \
1329     } \
1330 }
1331
1332 static void opj_t1_dec_clnpass_check_segsym(opj_t1_t *t1, OPJ_INT32 cblksty)
1333 {
1334     if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
1335         opj_mqc_t* mqc = &(t1->mqc);
1336         OPJ_UINT32 v, v2;
1337         opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
1338         opj_mqc_decode(v, mqc);
1339         opj_mqc_decode(v2, mqc);
1340         v = (v << 1) | v2;
1341         opj_mqc_decode(v2, mqc);
1342         v = (v << 1) | v2;
1343         opj_mqc_decode(v2, mqc);
1344         v = (v << 1) | v2;
1345         /*
1346         if (v!=0xa) {
1347             opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v);
1348         }
1349         */
1350     }
1351 }
1352
1353 static void opj_t1_dec_clnpass_64x64_novsc(
1354     opj_t1_t *t1,
1355     OPJ_INT32 bpno)
1356 {
1357     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
1358 }
1359
1360 static void opj_t1_dec_clnpass_64x64_vsc(
1361     opj_t1_t *t1,
1362     OPJ_INT32 bpno)
1363 {
1364     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
1365 }
1366
1367 static void opj_t1_dec_clnpass_generic_novsc(
1368     opj_t1_t *t1,
1369     OPJ_INT32 bpno)
1370 {
1371     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
1372                                 t1->w + 2U);
1373 }
1374
1375 static void opj_t1_dec_clnpass_generic_vsc(
1376     opj_t1_t *t1,
1377     OPJ_INT32 bpno)
1378 {
1379     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
1380                                 t1->w + 2U);
1381 }
1382
1383 static void opj_t1_dec_clnpass(
1384     opj_t1_t *t1,
1385     OPJ_INT32 bpno,
1386     OPJ_INT32 cblksty)
1387 {
1388     if (t1->w == 64 && t1->h == 64) {
1389         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1390             opj_t1_dec_clnpass_64x64_vsc(t1, bpno);
1391         } else {
1392             opj_t1_dec_clnpass_64x64_novsc(t1, bpno);
1393         }
1394     } else {
1395         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1396             opj_t1_dec_clnpass_generic_vsc(t1, bpno);
1397         } else {
1398             opj_t1_dec_clnpass_generic_novsc(t1, bpno);
1399         }
1400     }
1401     opj_t1_dec_clnpass_check_segsym(t1, cblksty);
1402 }
1403
1404
1405 /** mod fixed_quality */
1406 static OPJ_FLOAT64 opj_t1_getwmsedec(
1407     OPJ_INT32 nmsedec,
1408     OPJ_UINT32 compno,
1409     OPJ_UINT32 level,
1410     OPJ_UINT32 orient,
1411     OPJ_INT32 bpno,
1412     OPJ_UINT32 qmfbid,
1413     OPJ_FLOAT64 stepsize,
1414     OPJ_UINT32 numcomps,
1415     const OPJ_FLOAT64 * mct_norms,
1416     OPJ_UINT32 mct_numcomps)
1417 {
1418     OPJ_FLOAT64 w1 = 1, w2, wmsedec;
1419     OPJ_ARG_NOT_USED(numcomps);
1420
1421     if (mct_norms && (compno < mct_numcomps)) {
1422         w1 = mct_norms[compno];
1423     }
1424
1425     if (qmfbid == 1) {
1426         w2 = opj_dwt_getnorm(level, orient);
1427     } else {    /* if (qmfbid == 0) */
1428         w2 = opj_dwt_getnorm_real(level, orient);
1429     }
1430
1431     wmsedec = w1 * w2 * stepsize * (1 << bpno);
1432     wmsedec *= wmsedec * nmsedec / 8192.0;
1433
1434     return wmsedec;
1435 }
1436
1437 static OPJ_BOOL opj_t1_allocate_buffers(
1438     opj_t1_t *t1,
1439     OPJ_UINT32 w,
1440     OPJ_UINT32 h)
1441 {
1442     OPJ_UINT32 flagssize;
1443     OPJ_UINT32 flags_stride;
1444
1445     /* No risk of overflow. Prior checks ensure those assert are met */
1446     /* They are per the specification */
1447     assert(w <= 1024);
1448     assert(h <= 1024);
1449     assert(w * h <= 4096);
1450
1451     /* encoder uses tile buffer, so no need to allocate */
1452     if (!t1->encoder) {
1453         OPJ_UINT32 datasize = w * h;
1454
1455         if (datasize > t1->datasize) {
1456             opj_aligned_free(t1->data);
1457             t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
1458             if (!t1->data) {
1459                 /* FIXME event manager error callback */
1460                 return OPJ_FALSE;
1461             }
1462             t1->datasize = datasize;
1463         }
1464         /* memset first arg is declared to never be null by gcc */
1465         if (t1->data != NULL) {
1466             memset(t1->data, 0, datasize * sizeof(OPJ_INT32));
1467         }
1468     }
1469
1470     flags_stride = w + 2U; /* can't be 0U */
1471
1472     flagssize = (h + 3U) / 4U + 2U;
1473
1474     flagssize *= flags_stride;
1475     {
1476         opj_flag_t* p;
1477         OPJ_UINT32 x;
1478         OPJ_UINT32 flags_height = (h + 3U) / 4U;
1479
1480         if (flagssize > t1->flagssize) {
1481
1482             opj_aligned_free(t1->flags);
1483             t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(
1484                             opj_flag_t));
1485             if (!t1->flags) {
1486                 /* FIXME event manager error callback */
1487                 return OPJ_FALSE;
1488             }
1489         }
1490         t1->flagssize = flagssize;
1491
1492         memset(t1->flags, 0, flagssize * sizeof(opj_flag_t));
1493
1494         p = &t1->flags[0];
1495         for (x = 0; x < flags_stride; ++x) {
1496             /* magic value to hopefully stop any passes being interested in this entry */
1497             *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1498         }
1499
1500         p = &t1->flags[((flags_height + 1) * flags_stride)];
1501         for (x = 0; x < flags_stride; ++x) {
1502             /* magic value to hopefully stop any passes being interested in this entry */
1503             *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1504         }
1505
1506         if (h % 4) {
1507             OPJ_UINT32 v = 0;
1508             p = &t1->flags[((flags_height) * flags_stride)];
1509             if (h % 4 == 1) {
1510                 v |= T1_PI_1 | T1_PI_2 | T1_PI_3;
1511             } else if (h % 4 == 2) {
1512                 v |= T1_PI_2 | T1_PI_3;
1513             } else if (h % 4 == 3) {
1514                 v |= T1_PI_3;
1515             }
1516             for (x = 0; x < flags_stride; ++x) {
1517                 *p++ = v;
1518             }
1519         }
1520     }
1521
1522     t1->w = w;
1523     t1->h = h;
1524
1525     return OPJ_TRUE;
1526 }
1527
1528 /* ----------------------------------------------------------------------- */
1529
1530 /* ----------------------------------------------------------------------- */
1531 /**
1532  * Creates a new Tier 1 handle
1533  * and initializes the look-up tables of the Tier-1 coder/decoder
1534  * @return a new T1 handle if successful, returns NULL otherwise
1535 */
1536 opj_t1_t* opj_t1_create(OPJ_BOOL isEncoder)
1537 {
1538     opj_t1_t *l_t1 = 00;
1539
1540     l_t1 = (opj_t1_t*) opj_calloc(1, sizeof(opj_t1_t));
1541     if (!l_t1) {
1542         return 00;
1543     }
1544
1545     l_t1->encoder = isEncoder;
1546
1547     return l_t1;
1548 }
1549
1550
1551 /**
1552  * Destroys a previously created T1 handle
1553  *
1554  * @param p_t1 Tier 1 handle to destroy
1555 */
1556 void opj_t1_destroy(opj_t1_t *p_t1)
1557 {
1558     if (! p_t1) {
1559         return;
1560     }
1561
1562     /* encoder uses tile buffer, so no need to free */
1563     if (!p_t1->encoder && p_t1->data) {
1564         opj_aligned_free(p_t1->data);
1565         p_t1->data = 00;
1566     }
1567
1568     if (p_t1->flags) {
1569         opj_aligned_free(p_t1->flags);
1570         p_t1->flags = 00;
1571     }
1572
1573     opj_free(p_t1->cblkdatabuffer);
1574
1575     opj_free(p_t1);
1576 }
1577
1578 typedef struct {
1579     OPJ_BOOL whole_tile_decoding;
1580     OPJ_UINT32 resno;
1581     opj_tcd_cblk_dec_t* cblk;
1582     opj_tcd_band_t* band;
1583     opj_tcd_tilecomp_t* tilec;
1584     opj_tccp_t* tccp;
1585     OPJ_BOOL mustuse_cblkdatabuffer;
1586     volatile OPJ_BOOL* pret;
1587     opj_event_mgr_t *p_manager;
1588     opj_mutex_t* p_manager_mutex;
1589     OPJ_BOOL check_pterm;
1590 } opj_t1_cblk_decode_processing_job_t;
1591
1592 static void opj_t1_destroy_wrapper(void* t1)
1593 {
1594     opj_t1_destroy((opj_t1_t*) t1);
1595 }
1596
1597 static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
1598 {
1599     opj_tcd_cblk_dec_t* cblk;
1600     opj_tcd_band_t* band;
1601     opj_tcd_tilecomp_t* tilec;
1602     opj_tccp_t* tccp;
1603     OPJ_INT32* OPJ_RESTRICT datap;
1604     OPJ_UINT32 cblk_w, cblk_h;
1605     OPJ_INT32 x, y;
1606     OPJ_UINT32 i, j;
1607     opj_t1_cblk_decode_processing_job_t* job;
1608     opj_t1_t* t1;
1609     OPJ_UINT32 resno;
1610     OPJ_UINT32 tile_w;
1611
1612     job = (opj_t1_cblk_decode_processing_job_t*) user_data;
1613
1614     cblk = job->cblk;
1615
1616     if (!job->whole_tile_decoding) {
1617         cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1618         cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1619
1620         cblk->decoded_data = opj_aligned_malloc(cblk_w * cblk_h * sizeof(OPJ_INT32));
1621         if (cblk->decoded_data == NULL) {
1622             if (job->p_manager_mutex) {
1623                 opj_mutex_lock(job->p_manager_mutex);
1624             }
1625             opj_event_msg(job->p_manager, EVT_ERROR,
1626                           "Cannot allocate cblk->decoded_data\n");
1627             if (job->p_manager_mutex) {
1628                 opj_mutex_unlock(job->p_manager_mutex);
1629             }
1630             *(job->pret) = OPJ_FALSE;
1631             opj_free(job);
1632             return;
1633         }
1634         /* Zero-init required */
1635         memset(cblk->decoded_data, 0, cblk_w * cblk_h * sizeof(OPJ_INT32));
1636     } else if (cblk->decoded_data) {
1637         /* Not sure if that code path can happen, but better be */
1638         /* safe than sorry */
1639         opj_aligned_free(cblk->decoded_data);
1640         cblk->decoded_data = NULL;
1641     }
1642
1643     resno = job->resno;
1644     band = job->band;
1645     tilec = job->tilec;
1646     tccp = job->tccp;
1647     tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1
1648                           -
1649                           tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
1650
1651     if (!*(job->pret)) {
1652         opj_free(job);
1653         return;
1654     }
1655
1656     t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
1657     if (t1 == NULL) {
1658         t1 = opj_t1_create(OPJ_FALSE);
1659         opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
1660     }
1661     t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
1662
1663     if (OPJ_FALSE == opj_t1_decode_cblk(
1664                 t1,
1665                 cblk,
1666                 band->bandno,
1667                 (OPJ_UINT32)tccp->roishift,
1668                 tccp->cblksty,
1669                 job->p_manager,
1670                 job->p_manager_mutex,
1671                 job->check_pterm)) {
1672         *(job->pret) = OPJ_FALSE;
1673         opj_free(job);
1674         return;
1675     }
1676
1677     x = cblk->x0 - band->x0;
1678     y = cblk->y0 - band->y0;
1679     if (band->bandno & 1) {
1680         opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1681         x += pres->x1 - pres->x0;
1682     }
1683     if (band->bandno & 2) {
1684         opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1685         y += pres->y1 - pres->y0;
1686     }
1687
1688     datap = cblk->decoded_data ? cblk->decoded_data : t1->data;
1689     cblk_w = t1->w;
1690     cblk_h = t1->h;
1691
1692     if (tccp->roishift) {
1693         if (tccp->roishift >= 31) {
1694             for (j = 0; j < cblk_h; ++j) {
1695                 for (i = 0; i < cblk_w; ++i) {
1696                     datap[(j * cblk_w) + i] = 0;
1697                 }
1698             }
1699         } else {
1700             OPJ_INT32 thresh = 1 << tccp->roishift;
1701             for (j = 0; j < cblk_h; ++j) {
1702                 for (i = 0; i < cblk_w; ++i) {
1703                     OPJ_INT32 val = datap[(j * cblk_w) + i];
1704                     OPJ_INT32 mag = abs(val);
1705                     if (mag >= thresh) {
1706                         mag >>= tccp->roishift;
1707                         datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
1708                     }
1709                 }
1710             }
1711         }
1712     }
1713
1714     /* Both can be non NULL if for example decoding a full tile and then */
1715     /* partially a tile. In which case partial decoding should be the */
1716     /* priority */
1717     assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
1718
1719     if (cblk->decoded_data) {
1720         OPJ_UINT32 cblk_size = cblk_w * cblk_h;
1721         if (tccp->qmfbid == 1) {
1722             for (i = 0; i < cblk_size; ++i) {
1723                 datap[i] /= 2;
1724             }
1725         } else {        /* if (tccp->qmfbid == 0) */
1726             i = 0;
1727 #ifdef __SSE2__
1728             {
1729                 const __m128 xmm_stepsize = _mm_set1_ps(band->stepsize);
1730                 for (; i < (cblk_size & ~15U); i += 16) {
1731                     __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1732                                                            datap + 0)));
1733                     __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1734                                                            datap + 4)));
1735                     __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1736                                                            datap + 8)));
1737                     __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1738                                                            datap + 12)));
1739                     _mm_store_ps((float*)(datap +  0), _mm_mul_ps(xmm0_data, xmm_stepsize));
1740                     _mm_store_ps((float*)(datap +  4), _mm_mul_ps(xmm1_data, xmm_stepsize));
1741                     _mm_store_ps((float*)(datap +  8), _mm_mul_ps(xmm2_data, xmm_stepsize));
1742                     _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
1743                     datap += 16;
1744                 }
1745             }
1746 #endif
1747             for (; i < cblk_size; ++i) {
1748                 OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * band->stepsize;
1749                 memcpy(datap, &tmp, sizeof(tmp));
1750                 datap++;
1751             }
1752         }
1753     } else if (tccp->qmfbid == 1) {
1754         OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
1755                                                        (OPJ_SIZE_T)x];
1756         for (j = 0; j < cblk_h; ++j) {
1757             i = 0;
1758             for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
1759                 OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U];
1760                 OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
1761                 OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
1762                 OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
1763                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
1764                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
1765                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
1766                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
1767             }
1768             for (; i < cblk_w; ++i) {
1769                 OPJ_INT32 tmp = datap[(j * cblk_w) + i];
1770                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
1771             }
1772         }
1773     } else {        /* if (tccp->qmfbid == 0) */
1774         OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
1775                                                          tile_w + (OPJ_SIZE_T)x];
1776         for (j = 0; j < cblk_h; ++j) {
1777             OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
1778             for (i = 0; i < cblk_w; ++i) {
1779                 OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * band->stepsize;
1780                 *tiledp2 = tmp;
1781                 datap++;
1782                 tiledp2++;
1783             }
1784             tiledp += tile_w;
1785         }
1786     }
1787
1788     opj_free(job);
1789 }
1790
1791
1792 void opj_t1_decode_cblks(opj_tcd_t* tcd,
1793                          volatile OPJ_BOOL* pret,
1794                          opj_tcd_tilecomp_t* tilec,
1795                          opj_tccp_t* tccp,
1796                          opj_event_mgr_t *p_manager,
1797                          opj_mutex_t* p_manager_mutex,
1798                          OPJ_BOOL check_pterm
1799                         )
1800 {
1801     opj_thread_pool_t* tp = tcd->thread_pool;
1802     OPJ_UINT32 resno, bandno, precno, cblkno;
1803
1804 #ifdef DEBUG_VERBOSE
1805     OPJ_UINT32 codeblocks_decoded = 0;
1806     printf("Enter opj_t1_decode_cblks()\n");
1807 #endif
1808
1809     for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
1810         opj_tcd_resolution_t* res = &tilec->resolutions[resno];
1811
1812         for (bandno = 0; bandno < res->numbands; ++bandno) {
1813             opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
1814
1815             for (precno = 0; precno < res->pw * res->ph; ++precno) {
1816                 opj_tcd_precinct_t* precinct = &band->precincts[precno];
1817
1818                 if (!opj_tcd_is_subband_area_of_interest(tcd,
1819                         tilec->compno,
1820                         resno,
1821                         band->bandno,
1822                         (OPJ_UINT32)precinct->x0,
1823                         (OPJ_UINT32)precinct->y0,
1824                         (OPJ_UINT32)precinct->x1,
1825                         (OPJ_UINT32)precinct->y1)) {
1826                     for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1827                         opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1828                         if (cblk->decoded_data) {
1829 #ifdef DEBUG_VERBOSE
1830                             printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1831                                    cblk->x0, cblk->y0, resno, bandno);
1832 #endif
1833                             opj_aligned_free(cblk->decoded_data);
1834                             cblk->decoded_data = NULL;
1835                         }
1836                     }
1837                     continue;
1838                 }
1839
1840                 for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1841                     opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1842                     opj_t1_cblk_decode_processing_job_t* job;
1843
1844                     if (!opj_tcd_is_subband_area_of_interest(tcd,
1845                             tilec->compno,
1846                             resno,
1847                             band->bandno,
1848                             (OPJ_UINT32)cblk->x0,
1849                             (OPJ_UINT32)cblk->y0,
1850                             (OPJ_UINT32)cblk->x1,
1851                             (OPJ_UINT32)cblk->y1)) {
1852                         if (cblk->decoded_data) {
1853 #ifdef DEBUG_VERBOSE
1854                             printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1855                                    cblk->x0, cblk->y0, resno, bandno);
1856 #endif
1857                             opj_aligned_free(cblk->decoded_data);
1858                             cblk->decoded_data = NULL;
1859                         }
1860                         continue;
1861                     }
1862
1863                     if (!tcd->whole_tile_decoding) {
1864                         OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1865                         OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1866                         if (cblk->decoded_data != NULL) {
1867 #ifdef DEBUG_VERBOSE
1868                             printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n",
1869                                    cblk->x0, cblk->y0, resno, bandno);
1870 #endif
1871                             continue;
1872                         }
1873                         if (cblk_w == 0 || cblk_h == 0) {
1874                             continue;
1875                         }
1876 #ifdef DEBUG_VERBOSE
1877                         printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n",
1878                                cblk->x0, cblk->y0, resno, bandno);
1879 #endif
1880                     }
1881
1882                     job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1,
1883                             sizeof(opj_t1_cblk_decode_processing_job_t));
1884                     if (!job) {
1885                         *pret = OPJ_FALSE;
1886                         return;
1887                     }
1888                     job->whole_tile_decoding = tcd->whole_tile_decoding;
1889                     job->resno = resno;
1890                     job->cblk = cblk;
1891                     job->band = band;
1892                     job->tilec = tilec;
1893                     job->tccp = tccp;
1894                     job->pret = pret;
1895                     job->p_manager_mutex = p_manager_mutex;
1896                     job->p_manager = p_manager;
1897                     job->check_pterm = check_pterm;
1898                     job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
1899                     opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
1900 #ifdef DEBUG_VERBOSE
1901                     codeblocks_decoded ++;
1902 #endif
1903                     if (!(*pret)) {
1904                         return;
1905                     }
1906                 } /* cblkno */
1907             } /* precno */
1908         } /* bandno */
1909     } /* resno */
1910
1911 #ifdef DEBUG_VERBOSE
1912     printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded);
1913 #endif
1914     return;
1915 }
1916
1917
1918 static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
1919                                    opj_tcd_cblk_dec_t* cblk,
1920                                    OPJ_UINT32 orient,
1921                                    OPJ_UINT32 roishift,
1922                                    OPJ_UINT32 cblksty,
1923                                    opj_event_mgr_t *p_manager,
1924                                    opj_mutex_t* p_manager_mutex,
1925                                    OPJ_BOOL check_pterm)
1926 {
1927     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1928
1929     OPJ_INT32 bpno_plus_one;
1930     OPJ_UINT32 passtype;
1931     OPJ_UINT32 segno, passno;
1932     OPJ_BYTE* cblkdata = NULL;
1933     OPJ_UINT32 cblkdataindex = 0;
1934     OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
1935     OPJ_INT32* original_t1_data = NULL;
1936
1937     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
1938
1939     if (!opj_t1_allocate_buffers(
1940                 t1,
1941                 (OPJ_UINT32)(cblk->x1 - cblk->x0),
1942                 (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
1943         return OPJ_FALSE;
1944     }
1945
1946     bpno_plus_one = (OPJ_INT32)(roishift + cblk->numbps);
1947     if (bpno_plus_one >= 31) {
1948         if (p_manager_mutex) {
1949             opj_mutex_lock(p_manager_mutex);
1950         }
1951         opj_event_msg(p_manager, EVT_WARNING,
1952                       "opj_t1_decode_cblk(): unsupported bpno_plus_one = %d >= 31\n",
1953                       bpno_plus_one);
1954         if (p_manager_mutex) {
1955             opj_mutex_unlock(p_manager_mutex);
1956         }
1957         return OPJ_FALSE;
1958     }
1959     passtype = 2;
1960
1961     opj_mqc_resetstates(mqc);
1962     opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
1963     opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
1964     opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
1965
1966     /* Even if we have a single chunk, in multi-threaded decoding */
1967     /* the insertion of our synthetic marker might potentially override */
1968     /* valid codestream of other codeblocks decoded in parallel. */
1969     if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
1970         OPJ_UINT32 i;
1971         OPJ_UINT32 cblk_len;
1972
1973         /* Compute whole codeblock length from chunk lengths */
1974         cblk_len = 0;
1975         for (i = 0; i < cblk->numchunks; i++) {
1976             cblk_len += cblk->chunks[i].len;
1977         }
1978
1979         /* Allocate temporary memory if needed */
1980         if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
1981             cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
1982                                               cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
1983             if (cblkdata == NULL) {
1984                 return OPJ_FALSE;
1985             }
1986             t1->cblkdatabuffer = cblkdata;
1987             memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
1988             t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
1989         }
1990
1991         /* Concatenate all chunks */
1992         cblkdata = t1->cblkdatabuffer;
1993         cblk_len = 0;
1994         for (i = 0; i < cblk->numchunks; i++) {
1995             memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
1996             cblk_len += cblk->chunks[i].len;
1997         }
1998     } else if (cblk->numchunks == 1) {
1999         cblkdata = cblk->chunks[0].data;
2000     }
2001
2002     /* For subtile decoding, directly decode in the decoded_data buffer of */
2003     /* the code-block. Hack t1->data to point to it, and restore it later */
2004     if (cblk->decoded_data) {
2005         original_t1_data = t1->data;
2006         t1->data = cblk->decoded_data;
2007     }
2008
2009     for (segno = 0; segno < cblk->real_num_segs; ++segno) {
2010         opj_tcd_seg_t *seg = &cblk->segs[segno];
2011
2012         /* BYPASS mode */
2013         type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
2014                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2015
2016         if (type == T1_TYPE_RAW) {
2017             opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2018                                  OPJ_COMMON_CBLK_DATA_EXTRA);
2019         } else {
2020             opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2021                              OPJ_COMMON_CBLK_DATA_EXTRA);
2022         }
2023         cblkdataindex += seg->len;
2024
2025         for (passno = 0; (passno < seg->real_num_passes) &&
2026                 (bpno_plus_one >= 1); ++passno) {
2027             switch (passtype) {
2028             case 0:
2029                 if (type == T1_TYPE_RAW) {
2030                     opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2031                 } else {
2032                     opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2033                 }
2034                 break;
2035             case 1:
2036                 if (type == T1_TYPE_RAW) {
2037                     opj_t1_dec_refpass_raw(t1, bpno_plus_one);
2038                 } else {
2039                     opj_t1_dec_refpass_mqc(t1, bpno_plus_one);
2040                 }
2041                 break;
2042             case 2:
2043                 opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2044                 break;
2045             }
2046
2047             if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) {
2048                 opj_mqc_resetstates(mqc);
2049                 opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2050                 opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2051                 opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2052             }
2053             if (++passtype == 3) {
2054                 passtype = 0;
2055                 bpno_plus_one--;
2056             }
2057         }
2058
2059         opq_mqc_finish_dec(mqc);
2060     }
2061
2062     if (check_pterm) {
2063         if (mqc->bp + 2 < mqc->end) {
2064             if (p_manager_mutex) {
2065                 opj_mutex_lock(p_manager_mutex);
2066             }
2067             opj_event_msg(p_manager, EVT_WARNING,
2068                           "PTERM check failure: %d remaining bytes in code block (%d used / %d)\n",
2069                           (int)(mqc->end - mqc->bp) - 2,
2070                           (int)(mqc->bp - mqc->start),
2071                           (int)(mqc->end - mqc->start));
2072             if (p_manager_mutex) {
2073                 opj_mutex_unlock(p_manager_mutex);
2074             }
2075         } else if (mqc->end_of_byte_stream_counter > 2) {
2076             if (p_manager_mutex) {
2077                 opj_mutex_lock(p_manager_mutex);
2078             }
2079             opj_event_msg(p_manager, EVT_WARNING,
2080                           "PTERM check failure: %d synthetized 0xFF markers read\n",
2081                           mqc->end_of_byte_stream_counter);
2082             if (p_manager_mutex) {
2083                 opj_mutex_unlock(p_manager_mutex);
2084             }
2085         }
2086     }
2087
2088     /* Restore original t1->data is needed */
2089     if (cblk->decoded_data) {
2090         t1->data = original_t1_data;
2091     }
2092
2093     return OPJ_TRUE;
2094 }
2095
2096
2097
2098
2099 OPJ_BOOL opj_t1_encode_cblks(opj_t1_t *t1,
2100                              opj_tcd_tile_t *tile,
2101                              opj_tcp_t *tcp,
2102                              const OPJ_FLOAT64 * mct_norms,
2103                              OPJ_UINT32 mct_numcomps
2104                             )
2105 {
2106     OPJ_UINT32 compno, resno, bandno, precno, cblkno;
2107
2108     tile->distotile = 0;        /* fixed_quality */
2109
2110     for (compno = 0; compno < tile->numcomps; ++compno) {
2111         opj_tcd_tilecomp_t* tilec = &tile->comps[compno];
2112         opj_tccp_t* tccp = &tcp->tccps[compno];
2113         OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
2114
2115         for (resno = 0; resno < tilec->numresolutions; ++resno) {
2116             opj_tcd_resolution_t *res = &tilec->resolutions[resno];
2117
2118             for (bandno = 0; bandno < res->numbands; ++bandno) {
2119                 opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
2120                 OPJ_INT32 bandconst;
2121
2122                 /* Skip empty bands */
2123                 if (opj_tcd_is_band_empty(band)) {
2124                     continue;
2125                 }
2126
2127                 bandconst = 8192 * 8192 / ((OPJ_INT32) floor(band->stepsize * 8192));
2128                 for (precno = 0; precno < res->pw * res->ph; ++precno) {
2129                     opj_tcd_precinct_t *prc = &band->precincts[precno];
2130
2131                     for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
2132                         opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
2133                         OPJ_INT32* OPJ_RESTRICT tiledp;
2134                         OPJ_UINT32 cblk_w;
2135                         OPJ_UINT32 cblk_h;
2136                         OPJ_UINT32 i, j, tileLineAdvance;
2137                         OPJ_SIZE_T tileIndex = 0;
2138
2139                         OPJ_INT32 x = cblk->x0 - band->x0;
2140                         OPJ_INT32 y = cblk->y0 - band->y0;
2141                         if (band->bandno & 1) {
2142                             opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2143                             x += pres->x1 - pres->x0;
2144                         }
2145                         if (band->bandno & 2) {
2146                             opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2147                             y += pres->y1 - pres->y0;
2148                         }
2149
2150                         if (!opj_t1_allocate_buffers(
2151                                     t1,
2152                                     (OPJ_UINT32)(cblk->x1 - cblk->x0),
2153                                     (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
2154                             return OPJ_FALSE;
2155                         }
2156
2157                         cblk_w = t1->w;
2158                         cblk_h = t1->h;
2159                         tileLineAdvance = tile_w - cblk_w;
2160
2161                         tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
2162                         t1->data = tiledp;
2163                         t1->data_stride = tile_w;
2164                         if (tccp->qmfbid == 1) {
2165                             for (j = 0; j < cblk_h; ++j) {
2166                                 for (i = 0; i < cblk_w; ++i) {
2167                                     tiledp[tileIndex] *= (1 << T1_NMSEDEC_FRACBITS);
2168                                     tileIndex++;
2169                                 }
2170                                 tileIndex += tileLineAdvance;
2171                             }
2172                         } else {        /* if (tccp->qmfbid == 0) */
2173                             for (j = 0; j < cblk_h; ++j) {
2174                                 for (i = 0; i < cblk_w; ++i) {
2175                                     OPJ_INT32 tmp = tiledp[tileIndex];
2176                                     tiledp[tileIndex] =
2177                                         opj_int_fix_mul_t1(
2178                                             tmp,
2179                                             bandconst);
2180                                     tileIndex++;
2181                                 }
2182                                 tileIndex += tileLineAdvance;
2183                             }
2184                         }
2185
2186                         opj_t1_encode_cblk(
2187                             t1,
2188                             cblk,
2189                             band->bandno,
2190                             compno,
2191                             tilec->numresolutions - 1 - resno,
2192                             tccp->qmfbid,
2193                             band->stepsize,
2194                             tccp->cblksty,
2195                             tile->numcomps,
2196                             tile,
2197                             mct_norms,
2198                             mct_numcomps);
2199
2200                     } /* cblkno */
2201                 } /* precno */
2202             } /* bandno */
2203         } /* resno  */
2204     } /* compno  */
2205     return OPJ_TRUE;
2206 }
2207
2208 /* Returns whether the pass (bpno, passtype) is terminated */
2209 static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk,
2210                                    OPJ_UINT32 cblksty,
2211                                    OPJ_INT32 bpno,
2212                                    OPJ_UINT32 passtype)
2213 {
2214     /* Is it the last cleanup pass ? */
2215     if (passtype == 2 && bpno == 0) {
2216         return OPJ_TRUE;
2217     }
2218
2219     if (cblksty & J2K_CCP_CBLKSTY_TERMALL) {
2220         return OPJ_TRUE;
2221     }
2222
2223     if ((cblksty & J2K_CCP_CBLKSTY_LAZY)) {
2224         /* For bypass arithmetic bypass, terminate the 4th cleanup pass */
2225         if ((bpno == ((OPJ_INT32)cblk->numbps - 4)) && (passtype == 2)) {
2226             return OPJ_TRUE;
2227         }
2228         /* and beyond terminate all the magnitude refinement passes (in raw) */
2229         /* and cleanup passes (in MQC) */
2230         if ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype > 0)) {
2231             return OPJ_TRUE;
2232         }
2233     }
2234
2235     return OPJ_FALSE;
2236 }
2237
2238
2239 /** mod fixed_quality */
2240 static void opj_t1_encode_cblk(opj_t1_t *t1,
2241                                opj_tcd_cblk_enc_t* cblk,
2242                                OPJ_UINT32 orient,
2243                                OPJ_UINT32 compno,
2244                                OPJ_UINT32 level,
2245                                OPJ_UINT32 qmfbid,
2246                                OPJ_FLOAT64 stepsize,
2247                                OPJ_UINT32 cblksty,
2248                                OPJ_UINT32 numcomps,
2249                                opj_tcd_tile_t * tile,
2250                                const OPJ_FLOAT64 * mct_norms,
2251                                OPJ_UINT32 mct_numcomps)
2252 {
2253     OPJ_FLOAT64 cumwmsedec = 0.0;
2254
2255     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
2256
2257     OPJ_UINT32 passno;
2258     OPJ_INT32 bpno;
2259     OPJ_UINT32 passtype;
2260     OPJ_INT32 nmsedec = 0;
2261     OPJ_INT32 max;
2262     OPJ_UINT32 i, j;
2263     OPJ_BYTE type = T1_TYPE_MQ;
2264     OPJ_FLOAT64 tempwmsedec;
2265
2266 #ifdef EXTRA_DEBUG
2267     printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
2268            cblk->x0, cblk->y0, cblk->x1, cblk->y1, orient, compno, level);
2269 #endif
2270
2271     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
2272
2273     max = 0;
2274     for (i = 0; i < t1->w; ++i) {
2275         for (j = 0; j < t1->h; ++j) {
2276             OPJ_INT32 tmp = abs(t1->data[i + j * t1->data_stride]);
2277             max = opj_int_max(max, tmp);
2278         }
2279     }
2280
2281     cblk->numbps = max ? (OPJ_UINT32)((opj_int_floorlog2(max) + 1) -
2282                                       T1_NMSEDEC_FRACBITS) : 0;
2283     if (cblk->numbps == 0) {
2284         cblk->totalpasses = 0;
2285         return;
2286     }
2287
2288     bpno = (OPJ_INT32)(cblk->numbps - 1);
2289     passtype = 2;
2290
2291     opj_mqc_resetstates(mqc);
2292     opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2293     opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2294     opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2295     opj_mqc_init_enc(mqc, cblk->data);
2296
2297     for (passno = 0; bpno >= 0; ++passno) {
2298         opj_tcd_pass_t *pass = &cblk->passes[passno];
2299         type = ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype < 2) &&
2300                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2301
2302         /* If the previous pass was terminating, we need to reset the encoder */
2303         if (passno > 0 && cblk->passes[passno - 1].term) {
2304             if (type == T1_TYPE_RAW) {
2305                 opj_mqc_bypass_init_enc(mqc);
2306             } else {
2307                 opj_mqc_restart_init_enc(mqc);
2308             }
2309         }
2310
2311         switch (passtype) {
2312         case 0:
2313             opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty);
2314             break;
2315         case 1:
2316             opj_t1_enc_refpass(t1, bpno, &nmsedec, type);
2317             break;
2318         case 2:
2319             opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty);
2320             /* code switch SEGMARK (i.e. SEGSYM) */
2321             if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
2322                 opj_mqc_segmark_enc(mqc);
2323             }
2324             break;
2325         }
2326
2327         /* fixed_quality */
2328         tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid,
2329                                         stepsize, numcomps, mct_norms, mct_numcomps) ;
2330         cumwmsedec += tempwmsedec;
2331         tile->distotile += tempwmsedec;
2332         pass->distortiondec = cumwmsedec;
2333
2334         if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) {
2335             /* If it is a terminated pass, terminate it */
2336             if (type == T1_TYPE_RAW) {
2337                 opj_mqc_bypass_flush_enc(mqc, cblksty & J2K_CCP_CBLKSTY_PTERM);
2338             } else {
2339                 if (cblksty & J2K_CCP_CBLKSTY_PTERM) {
2340                     opj_mqc_erterm_enc(mqc);
2341                 } else {
2342                     opj_mqc_flush(mqc);
2343                 }
2344             }
2345             pass->term = 1;
2346             pass->rate = opj_mqc_numbytes(mqc);
2347         } else {
2348             /* Non terminated pass */
2349             OPJ_UINT32 rate_extra_bytes;
2350             if (type == T1_TYPE_RAW) {
2351                 rate_extra_bytes = opj_mqc_bypass_get_extra_bytes(
2352                                        mqc, (cblksty & J2K_CCP_CBLKSTY_PTERM));
2353             } else {
2354                 rate_extra_bytes = 3;
2355             }
2356             pass->term = 0;
2357             pass->rate = opj_mqc_numbytes(mqc) + rate_extra_bytes;
2358         }
2359
2360         if (++passtype == 3) {
2361             passtype = 0;
2362             bpno--;
2363         }
2364
2365         /* Code-switch "RESET" */
2366         if (cblksty & J2K_CCP_CBLKSTY_RESET) {
2367             opj_mqc_reset_enc(mqc);
2368         }
2369     }
2370
2371     cblk->totalpasses = passno;
2372
2373     if (cblk->totalpasses) {
2374         /* Make sure that pass rates are increasing */
2375         OPJ_UINT32 last_pass_rate = opj_mqc_numbytes(mqc);
2376         for (passno = cblk->totalpasses; passno > 0;) {
2377             opj_tcd_pass_t *pass = &cblk->passes[--passno];
2378             if (pass->rate > last_pass_rate) {
2379                 pass->rate = last_pass_rate;
2380             } else {
2381                 last_pass_rate = pass->rate;
2382             }
2383         }
2384     }
2385
2386     for (passno = 0; passno < cblk->totalpasses; passno++) {
2387         opj_tcd_pass_t *pass = &cblk->passes[passno];
2388
2389         /* Prevent generation of FF as last data byte of a pass*/
2390         /* For terminating passes, the flushing procedure ensured this already */
2391         assert(pass->rate > 0);
2392         if (cblk->data[pass->rate - 1] == 0xFF) {
2393             pass->rate--;
2394         }
2395         pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate);
2396     }
2397
2398 #ifdef EXTRA_DEBUG
2399     printf(" len=%d\n", (cblk->totalpasses) ? opj_mqc_numbytes(mqc) : 0);
2400
2401     /* Check that there not 0xff >=0x90 sequences */
2402     if (cblk->totalpasses) {
2403         OPJ_UINT32 i;
2404         OPJ_UINT32 len = opj_mqc_numbytes(mqc);
2405         for (i = 1; i < len; ++i) {
2406             if (cblk->data[i - 1] == 0xff && cblk->data[i] >= 0x90) {
2407                 printf("0xff %02x at offset %d\n", cblk->data[i], i - 1);
2408                 abort();
2409             }
2410         }
2411     }
2412 #endif
2413 }