937f420ad7e4846860bdf0c0b8e94613eafb5a07
[openjpeg.git] / src / lib / openjp2 / t1.c
1 /*
2  * The copyright in this software is being made available under the 2-clauses
3  * BSD License, included below. This software may be subject to other third
4  * party and contributor rights, including patent rights, and no such rights
5  * are granted under this license.
6  *
7  * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
8  * Copyright (c) 2002-2014, Professor Benoit Macq
9  * Copyright (c) 2001-2003, David Janssens
10  * Copyright (c) 2002-2003, Yannick Verschueren
11  * Copyright (c) 2003-2007, Francois-Olivier Devaux
12  * Copyright (c) 2003-2014, Antonin Descampe
13  * Copyright (c) 2005, Herve Drolon, FreeImage Team
14  * Copyright (c) 2007, Callum Lerwick <seg@haxxed.com>
15  * Copyright (c) 2012, Carl Hetherington
16  * Copyright (c) 2017, IntoPIX SA <support@intopix.com>
17  * All rights reserved.
18  *
19  * Redistribution and use in source and binary forms, with or without
20  * modification, are permitted provided that the following conditions
21  * are met:
22  * 1. Redistributions of source code must retain the above copyright
23  *    notice, this list of conditions and the following disclaimer.
24  * 2. Redistributions in binary form must reproduce the above copyright
25  *    notice, this list of conditions and the following disclaimer in the
26  *    documentation and/or other materials provided with the distribution.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
29  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38  * POSSIBILITY OF SUCH DAMAGE.
39  */
40
41 #define OPJ_SKIP_POISON
42 #include "opj_includes.h"
43
44 #ifdef __SSE__
45 #include <xmmintrin.h>
46 #endif
47 #ifdef __SSE2__
48 #include <emmintrin.h>
49 #endif
50
51 #if defined(__GNUC__)
52 #pragma GCC poison malloc calloc realloc free
53 #endif
54
55 #include "t1_luts.h"
56
57 /** @defgroup T1 T1 - Implementation of the tier-1 coding */
58 /*@{*/
59
60 #define T1_FLAGS(x, y) (t1->flags[x + 1 + ((y / 4) + 1) * (t1->w+2)])
61
62 #define opj_t1_setcurctx(curctx, ctxno)  curctx = &(mqc)->ctxs[(OPJ_UINT32)(ctxno)]
63
64 /** @name Local static functions */
65 /*@{*/
66
67 static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f);
68 static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f);
69 static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos);
70 static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos);
71 static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
72                                        OPJ_UINT32 s, OPJ_UINT32 stride,
73                                        OPJ_UINT32 vsc);
74
75
76 /**
77 Decode significant pass
78 */
79
80 static INLINE void opj_t1_dec_sigpass_step_raw(
81     opj_t1_t *t1,
82     opj_flag_t *flagsp,
83     OPJ_INT32 *datap,
84     OPJ_INT32 oneplushalf,
85     OPJ_UINT32 vsc,
86     OPJ_UINT32 row);
87 static INLINE void opj_t1_dec_sigpass_step_mqc(
88     opj_t1_t *t1,
89     opj_flag_t *flagsp,
90     OPJ_INT32 *datap,
91     OPJ_INT32 oneplushalf,
92     OPJ_UINT32 row,
93     OPJ_UINT32 flags_stride,
94     OPJ_UINT32 vsc);
95
96 /**
97 Encode significant pass
98 */
99 static void opj_t1_enc_sigpass(opj_t1_t *t1,
100                                OPJ_INT32 bpno,
101                                OPJ_INT32 *nmsedec,
102                                OPJ_BYTE type,
103                                OPJ_UINT32 cblksty);
104
105 /**
106 Decode significant pass
107 */
108 static void opj_t1_dec_sigpass_raw(
109     opj_t1_t *t1,
110     OPJ_INT32 bpno,
111     OPJ_INT32 cblksty);
112
113 /**
114 Encode refinement pass
115 */
116 static void opj_t1_enc_refpass(opj_t1_t *t1,
117                                OPJ_INT32 bpno,
118                                OPJ_INT32 *nmsedec,
119                                OPJ_BYTE type);
120
121 /**
122 Decode refinement pass
123 */
124 static void opj_t1_dec_refpass_raw(
125     opj_t1_t *t1,
126     OPJ_INT32 bpno);
127
128
129 /**
130 Decode refinement pass
131 */
132
133 static INLINE void  opj_t1_dec_refpass_step_raw(
134     opj_t1_t *t1,
135     opj_flag_t *flagsp,
136     OPJ_INT32 *datap,
137     OPJ_INT32 poshalf,
138     OPJ_UINT32 row);
139 static INLINE void opj_t1_dec_refpass_step_mqc(
140     opj_t1_t *t1,
141     opj_flag_t *flagsp,
142     OPJ_INT32 *datap,
143     OPJ_INT32 poshalf,
144     OPJ_UINT32 row);
145
146
147 /**
148 Decode clean-up pass
149 */
150
151 static void opj_t1_dec_clnpass_step(
152     opj_t1_t *t1,
153     opj_flag_t *flagsp,
154     OPJ_INT32 *datap,
155     OPJ_INT32 oneplushalf,
156     OPJ_UINT32 row,
157     OPJ_UINT32 vsc);
158
159 /**
160 Encode clean-up pass
161 */
162 static void opj_t1_enc_clnpass(
163     opj_t1_t *t1,
164     OPJ_INT32 bpno,
165     OPJ_INT32 *nmsedec,
166     OPJ_UINT32 cblksty);
167
168 static OPJ_FLOAT64 opj_t1_getwmsedec(
169     OPJ_INT32 nmsedec,
170     OPJ_UINT32 compno,
171     OPJ_UINT32 level,
172     OPJ_UINT32 orient,
173     OPJ_INT32 bpno,
174     OPJ_UINT32 qmfbid,
175     OPJ_FLOAT64 stepsize,
176     OPJ_UINT32 numcomps,
177     const OPJ_FLOAT64 * mct_norms,
178     OPJ_UINT32 mct_numcomps);
179
180 /** Return "cumwmsedec" that should be used to increase tile->distotile */
181 static double opj_t1_encode_cblk(opj_t1_t *t1,
182                                  opj_tcd_cblk_enc_t* cblk,
183                                  OPJ_UINT32 orient,
184                                  OPJ_UINT32 compno,
185                                  OPJ_UINT32 level,
186                                  OPJ_UINT32 qmfbid,
187                                  OPJ_FLOAT64 stepsize,
188                                  OPJ_UINT32 cblksty,
189                                  OPJ_UINT32 numcomps,
190                                  const OPJ_FLOAT64 * mct_norms,
191                                  OPJ_UINT32 mct_numcomps);
192
193 /**
194 Decode 1 code-block
195 @param t1 T1 handle
196 @param cblk Code-block coding parameters
197 @param orient
198 @param roishift Region of interest shifting value
199 @param cblksty Code-block style
200 @param p_manager the event manager
201 @param p_manager_mutex mutex for the event manager
202 @param check_pterm whether PTERM correct termination should be checked
203 */
204 static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
205                                    opj_tcd_cblk_dec_t* cblk,
206                                    OPJ_UINT32 orient,
207                                    OPJ_UINT32 roishift,
208                                    OPJ_UINT32 cblksty,
209                                    opj_event_mgr_t *p_manager,
210                                    opj_mutex_t* p_manager_mutex,
211                                    OPJ_BOOL check_pterm);
212
213 static OPJ_BOOL opj_t1_allocate_buffers(opj_t1_t *t1,
214                                         OPJ_UINT32 w,
215                                         OPJ_UINT32 h);
216
217 /*@}*/
218
219 /*@}*/
220
221 /* ----------------------------------------------------------------------- */
222
223 static INLINE OPJ_BYTE opj_t1_getctxno_zc(opj_mqc_t *mqc, OPJ_UINT32 f)
224 {
225     return mqc->lut_ctxno_zc_orient[(f & T1_SIGMA_NEIGHBOURS)];
226 }
227
228 static INLINE OPJ_UINT32 opj_t1_getctxtno_sc_or_spb_index(OPJ_UINT32 fX,
229         OPJ_UINT32 pfX,
230         OPJ_UINT32 nfX,
231         OPJ_UINT32 ci)
232 {
233     /*
234       0 pfX T1_CHI_THIS           T1_LUT_SGN_W
235       1 tfX T1_SIGMA_1            T1_LUT_SIG_N
236       2 nfX T1_CHI_THIS           T1_LUT_SGN_E
237       3 tfX T1_SIGMA_3            T1_LUT_SIG_W
238       4  fX T1_CHI_(THIS - 1)     T1_LUT_SGN_N
239       5 tfX T1_SIGMA_5            T1_LUT_SIG_E
240       6  fX T1_CHI_(THIS + 1)     T1_LUT_SGN_S
241       7 tfX T1_SIGMA_7            T1_LUT_SIG_S
242     */
243
244     OPJ_UINT32 lu = (fX >> (ci * 3U)) & (T1_SIGMA_1 | T1_SIGMA_3 | T1_SIGMA_5 |
245                                          T1_SIGMA_7);
246
247     lu |= (pfX >> (T1_CHI_THIS_I      + (ci * 3U))) & (1U << 0);
248     lu |= (nfX >> (T1_CHI_THIS_I - 2U + (ci * 3U))) & (1U << 2);
249     if (ci == 0U) {
250         lu |= (fX >> (T1_CHI_0_I - 4U)) & (1U << 4);
251     } else {
252         lu |= (fX >> (T1_CHI_1_I - 4U + ((ci - 1U) * 3U))) & (1U << 4);
253     }
254     lu |= (fX >> (T1_CHI_2_I - 6U + (ci * 3U))) & (1U << 6);
255     return lu;
256 }
257
258 static INLINE OPJ_BYTE opj_t1_getctxno_sc(OPJ_UINT32 lu)
259 {
260     return lut_ctxno_sc[lu];
261 }
262
263 static INLINE OPJ_UINT32 opj_t1_getctxno_mag(OPJ_UINT32 f)
264 {
265     OPJ_UINT32 tmp = (f & T1_SIGMA_NEIGHBOURS) ? T1_CTXNO_MAG + 1 : T1_CTXNO_MAG;
266     OPJ_UINT32 tmp2 = (f & T1_MU_0) ? T1_CTXNO_MAG + 2 : tmp;
267     return tmp2;
268 }
269
270 static INLINE OPJ_BYTE opj_t1_getspb(OPJ_UINT32 lu)
271 {
272     return lut_spb[lu];
273 }
274
275 static OPJ_INT16 opj_t1_getnmsedec_sig(OPJ_UINT32 x, OPJ_UINT32 bitpos)
276 {
277     if (bitpos > 0) {
278         return lut_nmsedec_sig[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
279     }
280
281     return lut_nmsedec_sig0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
282 }
283
284 static OPJ_INT16 opj_t1_getnmsedec_ref(OPJ_UINT32 x, OPJ_UINT32 bitpos)
285 {
286     if (bitpos > 0) {
287         return lut_nmsedec_ref[(x >> (bitpos)) & ((1 << T1_NMSEDEC_BITS) - 1)];
288     }
289
290     return lut_nmsedec_ref0[x & ((1 << T1_NMSEDEC_BITS) - 1)];
291 }
292
293 #define opj_t1_update_flags_macro(flags, flagsp, ci, s, stride, vsc) \
294 { \
295     /* east */ \
296     flagsp[-1] |= T1_SIGMA_5 << (3U * ci); \
297  \
298     /* mark target as significant */ \
299     flags |= ((s << T1_CHI_1_I) | T1_SIGMA_4) << (3U * ci); \
300  \
301     /* west */ \
302     flagsp[1] |= T1_SIGMA_3 << (3U * ci); \
303  \
304     /* north-west, north, north-east */ \
305     if (ci == 0U && !(vsc)) { \
306         opj_flag_t* north = flagsp - (stride); \
307         *north |= (s << T1_CHI_5_I) | T1_SIGMA_16; \
308         north[-1] |= T1_SIGMA_17; \
309         north[1] |= T1_SIGMA_15; \
310     } \
311  \
312     /* south-west, south, south-east */ \
313     if (ci == 3U) { \
314         opj_flag_t* south = flagsp + (stride); \
315         *south |= (s << T1_CHI_0_I) | T1_SIGMA_1; \
316         south[-1] |= T1_SIGMA_2; \
317         south[1] |= T1_SIGMA_0; \
318     } \
319 }
320
321
322 static INLINE void opj_t1_update_flags(opj_flag_t *flagsp, OPJ_UINT32 ci,
323                                        OPJ_UINT32 s, OPJ_UINT32 stride,
324                                        OPJ_UINT32 vsc)
325 {
326     opj_t1_update_flags_macro(*flagsp, flagsp, ci, s, stride, vsc);
327 }
328
329 /**
330 Encode significant pass
331 */
332 static INLINE void opj_t1_enc_sigpass_step(opj_t1_t *t1,
333         opj_flag_t *flagsp,
334         OPJ_INT32 *datap,
335         OPJ_INT32 bpno,
336         OPJ_INT32 one,
337         OPJ_INT32 *nmsedec,
338         OPJ_BYTE type,
339         OPJ_UINT32 ci,
340         OPJ_UINT32 vsc)
341 {
342     OPJ_UINT32 v;
343
344     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
345
346     OPJ_UINT32 const flags = *flagsp;
347
348     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
349             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
350         OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
351         v = (opj_int_abs(*datap) & one) ? 1 : 0;
352 #ifdef DEBUG_ENC_SIG
353         fprintf(stderr, "   ctxt1=%d\n", ctxt1);
354 #endif
355         opj_mqc_setcurctx(mqc, ctxt1);
356         if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
357             opj_mqc_bypass_enc(mqc, v);
358         } else {
359             opj_mqc_encode(mqc, v);
360         }
361         if (v) {
362             OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index(
363                                 *flagsp,
364                                 flagsp[-1], flagsp[1],
365                                 ci);
366             OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu);
367             v = *datap < 0 ? 1U : 0U;
368             *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
369                                               (OPJ_UINT32)bpno);
370 #ifdef DEBUG_ENC_SIG
371             fprintf(stderr, "   ctxt2=%d\n", ctxt2);
372 #endif
373             opj_mqc_setcurctx(mqc, ctxt2);
374             if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
375                 opj_mqc_bypass_enc(mqc, v);
376             } else {
377                 OPJ_UINT32 spb = opj_t1_getspb(lu);
378 #ifdef DEBUG_ENC_SIG
379                 fprintf(stderr, "   spb=%d\n", spb);
380 #endif
381                 opj_mqc_encode(mqc, v ^ spb);
382             }
383             opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
384         }
385         *flagsp |= T1_PI_THIS << (ci * 3U);
386     }
387 }
388
389 static INLINE void opj_t1_dec_sigpass_step_raw(
390     opj_t1_t *t1,
391     opj_flag_t *flagsp,
392     OPJ_INT32 *datap,
393     OPJ_INT32 oneplushalf,
394     OPJ_UINT32 vsc,
395     OPJ_UINT32 ci)
396 {
397     OPJ_UINT32 v;
398     opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
399
400     OPJ_UINT32 const flags = *flagsp;
401
402     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U &&
403             (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) {
404         if (opj_mqc_raw_decode(mqc)) {
405             v = opj_mqc_raw_decode(mqc);
406             *datap = v ? -oneplushalf : oneplushalf;
407             opj_t1_update_flags(flagsp, ci, v, t1->w + 2, vsc);
408         }
409         *flagsp |= T1_PI_THIS << (ci * 3U);
410     }
411 }
412
413 #define opj_t1_dec_sigpass_step_mqc_macro(flags, flagsp, flags_stride, data, \
414                                           data_stride, ci, mqc, curctx, \
415                                           v, a, c, ct, oneplushalf, vsc) \
416 { \
417     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == 0U && \
418         (flags & (T1_SIGMA_NEIGHBOURS << (ci * 3U))) != 0U) { \
419         OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
420         opj_t1_setcurctx(curctx, ctxt1); \
421         opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
422         if (v) { \
423             OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
424                                 flags, \
425                                 flagsp[-1], flagsp[1], \
426                                 ci); \
427             OPJ_UINT32 ctxt2 = opj_t1_getctxno_sc(lu); \
428             OPJ_UINT32 spb = opj_t1_getspb(lu); \
429             opj_t1_setcurctx(curctx, ctxt2); \
430             opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
431             v = v ^ spb; \
432             data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
433             opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
434         } \
435         flags |= T1_PI_THIS << (ci * 3U); \
436     } \
437 }
438
439 static INLINE void opj_t1_dec_sigpass_step_mqc(
440     opj_t1_t *t1,
441     opj_flag_t *flagsp,
442     OPJ_INT32 *datap,
443     OPJ_INT32 oneplushalf,
444     OPJ_UINT32 ci,
445     OPJ_UINT32 flags_stride,
446     OPJ_UINT32 vsc)
447 {
448     OPJ_UINT32 v;
449
450     opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
451     opj_t1_dec_sigpass_step_mqc_macro(*flagsp, flagsp, flags_stride, datap,
452                                       0, ci, mqc, mqc->curctx,
453                                       v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
454 }
455
456 static void opj_t1_enc_sigpass(opj_t1_t *t1,
457                                OPJ_INT32 bpno,
458                                OPJ_INT32 *nmsedec,
459                                OPJ_BYTE type,
460                                OPJ_UINT32 cblksty
461                               )
462 {
463     OPJ_UINT32 i, k;
464     OPJ_INT32 const one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
465     opj_flag_t* f = &T1_FLAGS(0, 0);
466     OPJ_UINT32 const extra = 2;
467
468     *nmsedec = 0;
469 #ifdef DEBUG_ENC_SIG
470     fprintf(stderr, "enc_sigpass: bpno=%d\n", bpno);
471 #endif
472     for (k = 0; k < (t1->h & ~3U); k += 4) {
473 #ifdef DEBUG_ENC_SIG
474         fprintf(stderr, " k=%d\n", k);
475 #endif
476         for (i = 0; i < t1->w; ++i) {
477 #ifdef DEBUG_ENC_SIG
478             fprintf(stderr, " i=%d\n", i);
479 #endif
480             if (*f == 0U) {
481                 /* Nothing to do for any of the 4 data points */
482                 f++;
483                 continue;
484             }
485             opj_t1_enc_sigpass_step(
486                 t1,
487                 f,
488                 &t1->data[((k + 0) * t1->data_stride) + i],
489                 bpno,
490                 one,
491                 nmsedec,
492                 type,
493                 0, cblksty & J2K_CCP_CBLKSTY_VSC);
494             opj_t1_enc_sigpass_step(
495                 t1,
496                 f,
497                 &t1->data[((k + 1) * t1->data_stride) + i],
498                 bpno,
499                 one,
500                 nmsedec,
501                 type,
502                 1, 0);
503             opj_t1_enc_sigpass_step(
504                 t1,
505                 f,
506                 &t1->data[((k + 2) * t1->data_stride) + i],
507                 bpno,
508                 one,
509                 nmsedec,
510                 type,
511                 2, 0);
512             opj_t1_enc_sigpass_step(
513                 t1,
514                 f,
515                 &t1->data[((k + 3) * t1->data_stride) + i],
516                 bpno,
517                 one,
518                 nmsedec,
519                 type,
520                 3, 0);
521             ++f;
522         }
523         f += extra;
524     }
525
526     if (k < t1->h) {
527         OPJ_UINT32 j;
528 #ifdef DEBUG_ENC_SIG
529         fprintf(stderr, " k=%d\n", k);
530 #endif
531         for (i = 0; i < t1->w; ++i) {
532 #ifdef DEBUG_ENC_SIG
533             fprintf(stderr, " i=%d\n", i);
534 #endif
535             if (*f == 0U) {
536                 /* Nothing to do for any of the 4 data points */
537                 f++;
538                 continue;
539             }
540             for (j = k; j < t1->h; ++j) {
541                 opj_t1_enc_sigpass_step(
542                     t1,
543                     f,
544                     &t1->data[(j * t1->data_stride) + i],
545                     bpno,
546                     one,
547                     nmsedec,
548                     type,
549                     j - k,
550                     (j == k && (cblksty & J2K_CCP_CBLKSTY_VSC) != 0));
551             }
552             ++f;
553         }
554     }
555 }
556
557 static void opj_t1_dec_sigpass_raw(
558     opj_t1_t *t1,
559     OPJ_INT32 bpno,
560     OPJ_INT32 cblksty)
561 {
562     OPJ_INT32 one, half, oneplushalf;
563     OPJ_UINT32 i, j, k;
564     OPJ_INT32 *data = t1->data;
565     opj_flag_t *flagsp = &T1_FLAGS(0, 0);
566     const OPJ_UINT32 l_w = t1->w;
567     one = 1 << bpno;
568     half = one >> 1;
569     oneplushalf = one | half;
570
571     for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
572         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
573             opj_flag_t flags = *flagsp;
574             if (flags != 0) {
575                 opj_t1_dec_sigpass_step_raw(
576                     t1,
577                     flagsp,
578                     data,
579                     oneplushalf,
580                     cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
581                     0U);
582                 opj_t1_dec_sigpass_step_raw(
583                     t1,
584                     flagsp,
585                     data + l_w,
586                     oneplushalf,
587                     OPJ_FALSE, /* vsc */
588                     1U);
589                 opj_t1_dec_sigpass_step_raw(
590                     t1,
591                     flagsp,
592                     data + 2 * l_w,
593                     oneplushalf,
594                     OPJ_FALSE, /* vsc */
595                     2U);
596                 opj_t1_dec_sigpass_step_raw(
597                     t1,
598                     flagsp,
599                     data + 3 * l_w,
600                     oneplushalf,
601                     OPJ_FALSE, /* vsc */
602                     3U);
603             }
604         }
605     }
606     if (k < t1->h) {
607         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
608             for (j = 0; j < t1->h - k; ++j) {
609                 opj_t1_dec_sigpass_step_raw(
610                     t1,
611                     flagsp,
612                     data + j * l_w,
613                     oneplushalf,
614                     cblksty & J2K_CCP_CBLKSTY_VSC, /* vsc */
615                     j);
616             }
617         }
618     }
619 }
620
621 #define opj_t1_dec_sigpass_mqc_internal(t1, bpno, vsc, w, h, flags_stride) \
622 { \
623         OPJ_INT32 one, half, oneplushalf; \
624         OPJ_UINT32 i, j, k; \
625         register OPJ_INT32 *data = t1->data; \
626         register opj_flag_t *flagsp = &t1->flags[(flags_stride) + 1]; \
627         const OPJ_UINT32 l_w = w; \
628         opj_mqc_t* mqc = &(t1->mqc); \
629         DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
630         register OPJ_UINT32 v; \
631         one = 1 << bpno; \
632         half = one >> 1; \
633         oneplushalf = one | half; \
634         for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
635                 for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
636                         opj_flag_t flags = *flagsp; \
637                         if( flags != 0 ) { \
638                             opj_t1_dec_sigpass_step_mqc_macro( \
639                                 flags, flagsp, flags_stride, data, \
640                                 l_w, 0, mqc, curctx, v, a, c, ct, oneplushalf, vsc); \
641                             opj_t1_dec_sigpass_step_mqc_macro( \
642                                 flags, flagsp, flags_stride, data, \
643                                 l_w, 1, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
644                             opj_t1_dec_sigpass_step_mqc_macro( \
645                                 flags, flagsp, flags_stride, data, \
646                                 l_w, 2, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
647                             opj_t1_dec_sigpass_step_mqc_macro( \
648                                 flags, flagsp, flags_stride, data, \
649                                 l_w, 3, mqc, curctx, v, a, c, ct, oneplushalf, OPJ_FALSE); \
650                             *flagsp = flags; \
651                         } \
652                 } \
653         } \
654         UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
655         if( k < h ) { \
656             for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
657                 for (j = 0; j < h - k; ++j) { \
658                         opj_t1_dec_sigpass_step_mqc(t1, flagsp, \
659                             data + j * l_w, oneplushalf, j, flags_stride, vsc); \
660                 } \
661             } \
662         } \
663 }
664
665 static void opj_t1_dec_sigpass_mqc_64x64_novsc(
666     opj_t1_t *t1,
667     OPJ_INT32 bpno)
668 {
669     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
670 }
671
672 static void opj_t1_dec_sigpass_mqc_64x64_vsc(
673     opj_t1_t *t1,
674     OPJ_INT32 bpno)
675 {
676     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
677 }
678
679 static void opj_t1_dec_sigpass_mqc_generic_novsc(
680     opj_t1_t *t1,
681     OPJ_INT32 bpno)
682 {
683     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
684                                     t1->w + 2U);
685 }
686
687 static void opj_t1_dec_sigpass_mqc_generic_vsc(
688     opj_t1_t *t1,
689     OPJ_INT32 bpno)
690 {
691     opj_t1_dec_sigpass_mqc_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
692                                     t1->w + 2U);
693 }
694
695 static void opj_t1_dec_sigpass_mqc(
696     opj_t1_t *t1,
697     OPJ_INT32 bpno,
698     OPJ_INT32 cblksty)
699 {
700     if (t1->w == 64 && t1->h == 64) {
701         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
702             opj_t1_dec_sigpass_mqc_64x64_vsc(t1, bpno);
703         } else {
704             opj_t1_dec_sigpass_mqc_64x64_novsc(t1, bpno);
705         }
706     } else {
707         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
708             opj_t1_dec_sigpass_mqc_generic_vsc(t1, bpno);
709         } else {
710             opj_t1_dec_sigpass_mqc_generic_novsc(t1, bpno);
711         }
712     }
713 }
714
715 /**
716 Encode refinement pass step
717 */
718 static INLINE void opj_t1_enc_refpass_step(opj_t1_t *t1,
719         opj_flag_t *flagsp,
720         OPJ_INT32 *datap,
721         OPJ_INT32 bpno,
722         OPJ_INT32 one,
723         OPJ_INT32 *nmsedec,
724         OPJ_BYTE type,
725         OPJ_UINT32 ci)
726 {
727     OPJ_UINT32 v;
728
729     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
730
731     OPJ_UINT32 const shift_flags =
732         (*flagsp >> (ci * 3U));
733
734     if ((shift_flags & (T1_SIGMA_THIS | T1_PI_THIS)) == T1_SIGMA_THIS) {
735         OPJ_UINT32 ctxt = opj_t1_getctxno_mag(shift_flags);
736         *nmsedec += opj_t1_getnmsedec_ref((OPJ_UINT32)opj_int_abs(*datap),
737                                           (OPJ_UINT32)bpno);
738         v = (opj_int_abs(*datap) & one) ? 1 : 0;
739 #ifdef DEBUG_ENC_REF
740         fprintf(stderr, "  ctxt=%d\n", ctxt);
741 #endif
742         opj_mqc_setcurctx(mqc, ctxt);
743         if (type == T1_TYPE_RAW) {  /* BYPASS/LAZY MODE */
744             opj_mqc_bypass_enc(mqc, v);
745         } else {
746             opj_mqc_encode(mqc, v);
747         }
748         *flagsp |= T1_MU_THIS << (ci * 3U);
749     }
750 }
751
752
753 static INLINE void opj_t1_dec_refpass_step_raw(
754     opj_t1_t *t1,
755     opj_flag_t *flagsp,
756     OPJ_INT32 *datap,
757     OPJ_INT32 poshalf,
758     OPJ_UINT32 ci)
759 {
760     OPJ_UINT32 v;
761
762     opj_mqc_t *mqc = &(t1->mqc);       /* RAW component */
763
764     if ((*flagsp & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) ==
765             (T1_SIGMA_THIS << (ci * 3U))) {
766         v = opj_mqc_raw_decode(mqc);
767         *datap += (v ^ (*datap < 0)) ? poshalf : -poshalf;
768         *flagsp |= T1_MU_THIS << (ci * 3U);
769     }
770 }
771
772 #define opj_t1_dec_refpass_step_mqc_macro(flags, data, data_stride, ci, \
773                                           mqc, curctx, v, a, c, ct, poshalf) \
774 { \
775     if ((flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U))) == \
776             (T1_SIGMA_THIS << (ci * 3U))) { \
777         OPJ_UINT32 ctxt = opj_t1_getctxno_mag(flags >> (ci * 3U)); \
778         opj_t1_setcurctx(curctx, ctxt); \
779         opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
780         data[ci*data_stride] += (v ^ (data[ci*data_stride] < 0)) ? poshalf : -poshalf; \
781         flags |= T1_MU_THIS << (ci * 3U); \
782     } \
783 }
784
785 static INLINE void opj_t1_dec_refpass_step_mqc(
786     opj_t1_t *t1,
787     opj_flag_t *flagsp,
788     OPJ_INT32 *datap,
789     OPJ_INT32 poshalf,
790     OPJ_UINT32 ci)
791 {
792     OPJ_UINT32 v;
793
794     opj_mqc_t *mqc = &(t1->mqc);       /* MQC component */
795     opj_t1_dec_refpass_step_mqc_macro(*flagsp, datap, 0, ci,
796                                       mqc, mqc->curctx, v, mqc->a, mqc->c,
797                                       mqc->ct, poshalf);
798 }
799
800 static void opj_t1_enc_refpass(
801     opj_t1_t *t1,
802     OPJ_INT32 bpno,
803     OPJ_INT32 *nmsedec,
804     OPJ_BYTE type)
805 {
806     OPJ_UINT32 i, k;
807     const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
808     opj_flag_t* f = &T1_FLAGS(0, 0);
809     const OPJ_UINT32 extra = 2U;
810
811     *nmsedec = 0;
812 #ifdef DEBUG_ENC_REF
813     fprintf(stderr, "enc_refpass: bpno=%d\n", bpno);
814 #endif
815     for (k = 0; k < (t1->h & ~3U); k += 4) {
816 #ifdef DEBUG_ENC_REF
817         fprintf(stderr, " k=%d\n", k);
818 #endif
819         for (i = 0; i < t1->w; ++i) {
820 #ifdef DEBUG_ENC_REF
821             fprintf(stderr, " i=%d\n", i);
822 #endif
823             if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
824                 /* none significant */
825                 f++;
826                 continue;
827             }
828             if ((*f & (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) ==
829                     (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3)) {
830                 /* all processed by sigpass */
831                 f++;
832                 continue;
833             }
834
835             opj_t1_enc_refpass_step(
836                 t1,
837                 f,
838                 &t1->data[((k + 0) * t1->data_stride) + i],
839                 bpno,
840                 one,
841                 nmsedec,
842                 type,
843                 0);
844             opj_t1_enc_refpass_step(
845                 t1,
846                 f,
847                 &t1->data[((k + 1) * t1->data_stride) + i],
848                 bpno,
849                 one,
850                 nmsedec,
851                 type,
852                 1);
853             opj_t1_enc_refpass_step(
854                 t1,
855                 f,
856                 &t1->data[((k + 2) * t1->data_stride) + i],
857                 bpno,
858                 one,
859                 nmsedec,
860                 type,
861                 2);
862             opj_t1_enc_refpass_step(
863                 t1,
864                 f,
865                 &t1->data[((k + 3) * t1->data_stride) + i],
866                 bpno,
867                 one,
868                 nmsedec,
869                 type,
870                 3);
871             ++f;
872         }
873         f += extra;
874     }
875
876     if (k < t1->h) {
877         OPJ_UINT32 j;
878 #ifdef DEBUG_ENC_REF
879         fprintf(stderr, " k=%d\n", k);
880 #endif
881         for (i = 0; i < t1->w; ++i) {
882 #ifdef DEBUG_ENC_REF
883             fprintf(stderr, " i=%d\n", i);
884 #endif
885             if ((*f & (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13)) == 0) {
886                 /* none significant */
887                 f++;
888                 continue;
889             }
890             for (j = k; j < t1->h; ++j) {
891                 opj_t1_enc_refpass_step(
892                     t1,
893                     f,
894                     &t1->data[(j * t1->data_stride) + i],
895                     bpno,
896                     one,
897                     nmsedec,
898                     type,
899                     j - k);
900             }
901             ++f;
902         }
903     }
904 }
905
906
907 static void opj_t1_dec_refpass_raw(
908     opj_t1_t *t1,
909     OPJ_INT32 bpno)
910 {
911     OPJ_INT32 one, poshalf;
912     OPJ_UINT32 i, j, k;
913     OPJ_INT32 *data = t1->data;
914     opj_flag_t *flagsp = &T1_FLAGS(0, 0);
915     const OPJ_UINT32 l_w = t1->w;
916     one = 1 << bpno;
917     poshalf = one >> 1;
918     for (k = 0; k < (t1->h & ~3U); k += 4, flagsp += 2, data += 3 * l_w) {
919         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
920             opj_flag_t flags = *flagsp;
921             if (flags != 0) {
922                 opj_t1_dec_refpass_step_raw(
923                     t1,
924                     flagsp,
925                     data,
926                     poshalf,
927                     0U);
928                 opj_t1_dec_refpass_step_raw(
929                     t1,
930                     flagsp,
931                     data + l_w,
932                     poshalf,
933                     1U);
934                 opj_t1_dec_refpass_step_raw(
935                     t1,
936                     flagsp,
937                     data + 2 * l_w,
938                     poshalf,
939                     2U);
940                 opj_t1_dec_refpass_step_raw(
941                     t1,
942                     flagsp,
943                     data + 3 * l_w,
944                     poshalf,
945                     3U);
946             }
947         }
948     }
949     if (k < t1->h) {
950         for (i = 0; i < l_w; ++i, ++flagsp, ++data) {
951             for (j = 0; j < t1->h - k; ++j) {
952                 opj_t1_dec_refpass_step_raw(
953                     t1,
954                     flagsp,
955                     data + j * l_w,
956                     poshalf,
957                     j);
958             }
959         }
960     }
961 }
962
963 #define opj_t1_dec_refpass_mqc_internal(t1, bpno, w, h, flags_stride) \
964 { \
965         OPJ_INT32 one, poshalf; \
966         OPJ_UINT32 i, j, k; \
967         register OPJ_INT32 *data = t1->data; \
968         register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
969         const OPJ_UINT32 l_w = w; \
970         opj_mqc_t* mqc = &(t1->mqc); \
971         DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
972         register OPJ_UINT32 v; \
973         one = 1 << bpno; \
974         poshalf = one >> 1; \
975         for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
976                 for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
977                         opj_flag_t flags = *flagsp; \
978                         if( flags != 0 ) { \
979                             opj_t1_dec_refpass_step_mqc_macro( \
980                                 flags, data, l_w, 0, \
981                                 mqc, curctx, v, a, c, ct, poshalf); \
982                             opj_t1_dec_refpass_step_mqc_macro( \
983                                 flags, data, l_w, 1, \
984                                 mqc, curctx, v, a, c, ct, poshalf); \
985                             opj_t1_dec_refpass_step_mqc_macro( \
986                                 flags, data, l_w, 2, \
987                                 mqc, curctx, v, a, c, ct, poshalf); \
988                             opj_t1_dec_refpass_step_mqc_macro( \
989                                 flags, data, l_w, 3, \
990                                 mqc, curctx, v, a, c, ct, poshalf); \
991                             *flagsp = flags; \
992                         } \
993                 } \
994         } \
995         UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
996         if( k < h ) { \
997             for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
998                 for (j = 0; j < h - k; ++j) { \
999                         opj_t1_dec_refpass_step_mqc(t1, flagsp, data + j * l_w, poshalf, j); \
1000                 } \
1001             } \
1002         } \
1003 }
1004
1005 static void opj_t1_dec_refpass_mqc_64x64(
1006     opj_t1_t *t1,
1007     OPJ_INT32 bpno)
1008 {
1009     opj_t1_dec_refpass_mqc_internal(t1, bpno, 64, 64, 66);
1010 }
1011
1012 static void opj_t1_dec_refpass_mqc_generic(
1013     opj_t1_t *t1,
1014     OPJ_INT32 bpno)
1015 {
1016     opj_t1_dec_refpass_mqc_internal(t1, bpno, t1->w, t1->h, t1->w + 2U);
1017 }
1018
1019 static void opj_t1_dec_refpass_mqc(
1020     opj_t1_t *t1,
1021     OPJ_INT32 bpno)
1022 {
1023     if (t1->w == 64 && t1->h == 64) {
1024         opj_t1_dec_refpass_mqc_64x64(t1, bpno);
1025     } else {
1026         opj_t1_dec_refpass_mqc_generic(t1, bpno);
1027     }
1028 }
1029
1030 /**
1031 Encode clean-up pass step
1032 */
1033 static void opj_t1_enc_clnpass_step(
1034     opj_t1_t *t1,
1035     opj_flag_t *flagsp,
1036     OPJ_INT32 *datap,
1037     OPJ_INT32 bpno,
1038     OPJ_INT32 one,
1039     OPJ_INT32 *nmsedec,
1040     OPJ_UINT32 agg,
1041     OPJ_UINT32 runlen,
1042     OPJ_UINT32 lim,
1043     OPJ_UINT32 cblksty)
1044 {
1045     OPJ_UINT32 v;
1046     OPJ_UINT32 ci;
1047     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1048
1049     const OPJ_UINT32 check = (T1_SIGMA_4 | T1_SIGMA_7 | T1_SIGMA_10 | T1_SIGMA_13 |
1050                               T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1051
1052     if ((*flagsp & check) == check) {
1053         if (runlen == 0) {
1054             *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1055         } else if (runlen == 1) {
1056             *flagsp &= ~(T1_PI_1 | T1_PI_2 | T1_PI_3);
1057         } else if (runlen == 2) {
1058             *flagsp &= ~(T1_PI_2 | T1_PI_3);
1059         } else if (runlen == 3) {
1060             *flagsp &= ~(T1_PI_3);
1061         }
1062         return;
1063     }
1064
1065     for (ci = runlen; ci < lim; ++ci) {
1066         OPJ_UINT32 vsc;
1067         opj_flag_t flags;
1068         OPJ_UINT32 ctxt1;
1069
1070         flags = *flagsp;
1071
1072         if ((agg != 0) && (ci == runlen)) {
1073             goto LABEL_PARTIAL;
1074         }
1075
1076         if (!(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {
1077             ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U));
1078 #ifdef DEBUG_ENC_CLN
1079             printf("   ctxt1=%d\n", ctxt1);
1080 #endif
1081             opj_mqc_setcurctx(mqc, ctxt1);
1082             v = (opj_int_abs(*datap) & one) ? 1 : 0;
1083             opj_mqc_encode(mqc, v);
1084             if (v) {
1085                 OPJ_UINT32 ctxt2, spb;
1086                 OPJ_UINT32 lu;
1087 LABEL_PARTIAL:
1088                 lu = opj_t1_getctxtno_sc_or_spb_index(
1089                          *flagsp,
1090                          flagsp[-1], flagsp[1],
1091                          ci);
1092                 *nmsedec += opj_t1_getnmsedec_sig((OPJ_UINT32)opj_int_abs(*datap),
1093                                                   (OPJ_UINT32)bpno);
1094                 ctxt2 = opj_t1_getctxno_sc(lu);
1095 #ifdef DEBUG_ENC_CLN
1096                 printf("   ctxt2=%d\n", ctxt2);
1097 #endif
1098                 opj_mqc_setcurctx(mqc, ctxt2);
1099
1100                 v = *datap < 0 ? 1U : 0U;
1101                 spb = opj_t1_getspb(lu);
1102 #ifdef DEBUG_ENC_CLN
1103                 printf("   spb=%d\n", spb);
1104 #endif
1105                 opj_mqc_encode(mqc, v ^ spb);
1106                 vsc = ((cblksty & J2K_CCP_CBLKSTY_VSC) && (ci == 0)) ? 1 : 0;
1107                 opj_t1_update_flags(flagsp, ci, v, t1->w + 2U, vsc);
1108             }
1109         }
1110         *flagsp &= ~(T1_PI_THIS << (3U * ci));
1111         datap += t1->data_stride;
1112     }
1113 }
1114
1115 #define opj_t1_dec_clnpass_step_macro(check_flags, partial, \
1116                                       flags, flagsp, flags_stride, data, \
1117                                       data_stride, ci, mqc, curctx, \
1118                                       v, a, c, ct, oneplushalf, vsc) \
1119 { \
1120     if ( !check_flags || !(flags & ((T1_SIGMA_THIS | T1_PI_THIS) << (ci * 3U)))) {\
1121         do { \
1122             if( !partial ) { \
1123                 OPJ_UINT32 ctxt1 = opj_t1_getctxno_zc(mqc, flags >> (ci * 3U)); \
1124                 opj_t1_setcurctx(curctx, ctxt1); \
1125                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1126                 if( !v ) \
1127                     break; \
1128             } \
1129             { \
1130                 OPJ_UINT32 lu = opj_t1_getctxtno_sc_or_spb_index( \
1131                                     flags, flagsp[-1], flagsp[1], \
1132                                     ci); \
1133                 opj_t1_setcurctx(curctx, opj_t1_getctxno_sc(lu)); \
1134                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1135                 v = v ^ opj_t1_getspb(lu); \
1136                 data[ci*data_stride] = v ? -oneplushalf : oneplushalf; \
1137                 opj_t1_update_flags_macro(flags, flagsp, ci, v, flags_stride, vsc); \
1138             } \
1139         } while(0); \
1140     } \
1141 }
1142
1143 static void opj_t1_dec_clnpass_step(
1144     opj_t1_t *t1,
1145     opj_flag_t *flagsp,
1146     OPJ_INT32 *datap,
1147     OPJ_INT32 oneplushalf,
1148     OPJ_UINT32 ci,
1149     OPJ_UINT32 vsc)
1150 {
1151     OPJ_UINT32 v;
1152
1153     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1154     opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE,
1155                                   *flagsp, flagsp, t1->w + 2U, datap,
1156                                   0, ci, mqc, mqc->curctx,
1157                                   v, mqc->a, mqc->c, mqc->ct, oneplushalf, vsc);
1158 }
1159
1160 static void opj_t1_enc_clnpass(
1161     opj_t1_t *t1,
1162     OPJ_INT32 bpno,
1163     OPJ_INT32 *nmsedec,
1164     OPJ_UINT32 cblksty)
1165 {
1166     OPJ_UINT32 i, k;
1167     const OPJ_INT32 one = 1 << (bpno + T1_NMSEDEC_FRACBITS);
1168     OPJ_UINT32 agg, runlen;
1169
1170     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1171
1172     *nmsedec = 0;
1173 #ifdef DEBUG_ENC_CLN
1174     printf("enc_clnpass: bpno=%d\n", bpno);
1175 #endif
1176     for (k = 0; k < (t1->h & ~3U); k += 4) {
1177 #ifdef DEBUG_ENC_CLN
1178         printf(" k=%d\n", k);
1179 #endif
1180         for (i = 0; i < t1->w; ++i) {
1181 #ifdef DEBUG_ENC_CLN
1182             printf("  i=%d\n", i);
1183 #endif
1184             agg = !(T1_FLAGS(i, k));
1185 #ifdef DEBUG_ENC_CLN
1186             printf("   agg=%d\n", agg);
1187 #endif
1188             if (agg) {
1189                 for (runlen = 0; runlen < 4; ++runlen) {
1190                     if (opj_int_abs(t1->data[((k + runlen)*t1->data_stride) + i]) & one) {
1191                         break;
1192                     }
1193                 }
1194                 opj_mqc_setcurctx(mqc, T1_CTXNO_AGG);
1195                 opj_mqc_encode(mqc, runlen != 4);
1196                 if (runlen == 4) {
1197                     continue;
1198                 }
1199                 opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
1200                 opj_mqc_encode(mqc, runlen >> 1);
1201                 opj_mqc_encode(mqc, runlen & 1);
1202             } else {
1203                 runlen = 0;
1204             }
1205             opj_t1_enc_clnpass_step(
1206                 t1,
1207                 &T1_FLAGS(i, k),
1208                 &t1->data[((k + runlen) * t1->data_stride) + i],
1209                 bpno,
1210                 one,
1211                 nmsedec,
1212                 agg,
1213                 runlen,
1214                 4U,
1215                 cblksty);
1216         }
1217     }
1218     if (k < t1->h) {
1219         agg = 0;
1220         runlen = 0;
1221 #ifdef DEBUG_ENC_CLN
1222         printf(" k=%d\n", k);
1223 #endif
1224         for (i = 0; i < t1->w; ++i) {
1225 #ifdef DEBUG_ENC_CLN
1226             printf("  i=%d\n", i);
1227             printf("   agg=%d\n", agg);
1228 #endif
1229             opj_t1_enc_clnpass_step(
1230                 t1,
1231                 &T1_FLAGS(i, k),
1232                 &t1->data[((k + runlen) * t1->data_stride) + i],
1233                 bpno,
1234                 one,
1235                 nmsedec,
1236                 agg,
1237                 runlen,
1238                 t1->h - k,
1239                 cblksty);
1240         }
1241     }
1242 }
1243
1244 #define opj_t1_dec_clnpass_internal(t1, bpno, vsc, w, h, flags_stride) \
1245 { \
1246     OPJ_INT32 one, half, oneplushalf; \
1247     OPJ_UINT32 runlen; \
1248     OPJ_UINT32 i, j, k; \
1249     const OPJ_UINT32 l_w = w; \
1250     opj_mqc_t* mqc = &(t1->mqc); \
1251     register OPJ_INT32 *data = t1->data; \
1252     register opj_flag_t *flagsp = &t1->flags[flags_stride + 1]; \
1253     DOWNLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
1254     register OPJ_UINT32 v; \
1255     one = 1 << bpno; \
1256     half = one >> 1; \
1257     oneplushalf = one | half; \
1258     for (k = 0; k < (h & ~3u); k += 4, data += 3*l_w, flagsp += 2) { \
1259         for (i = 0; i < l_w; ++i, ++data, ++flagsp) { \
1260             opj_flag_t flags = *flagsp; \
1261             if (flags == 0) { \
1262                 OPJ_UINT32 partial = OPJ_TRUE; \
1263                 opj_t1_setcurctx(curctx, T1_CTXNO_AGG); \
1264                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1265                 if (!v) { \
1266                     continue; \
1267                 } \
1268                 opj_t1_setcurctx(curctx, T1_CTXNO_UNI); \
1269                 opj_mqc_decode_macro(runlen, mqc, curctx, a, c, ct); \
1270                 opj_mqc_decode_macro(v, mqc, curctx, a, c, ct); \
1271                 runlen = (runlen << 1) | v; \
1272                 switch(runlen) { \
1273                     case 0: \
1274                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, OPJ_TRUE,\
1275                                             flags, flagsp, flags_stride, data, \
1276                                             l_w, 0, mqc, curctx, \
1277                                             v, a, c, ct, oneplushalf, vsc); \
1278                         partial = OPJ_FALSE; \
1279                         /* FALLTHRU */ \
1280                     case 1: \
1281                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1282                                             flags, flagsp, flags_stride, data, \
1283                                             l_w, 1, mqc, curctx, \
1284                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1285                         partial = OPJ_FALSE; \
1286                         /* FALLTHRU */ \
1287                     case 2: \
1288                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1289                                             flags, flagsp, flags_stride, data, \
1290                                             l_w, 2, mqc, curctx, \
1291                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1292                         partial = OPJ_FALSE; \
1293                         /* FALLTHRU */ \
1294                     case 3: \
1295                         opj_t1_dec_clnpass_step_macro(OPJ_FALSE, partial,\
1296                                             flags, flagsp, flags_stride, data, \
1297                                             l_w, 3, mqc, curctx, \
1298                                             v, a, c, ct, oneplushalf, OPJ_FALSE); \
1299                         break; \
1300                 } \
1301             } else { \
1302                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1303                                     flags, flagsp, flags_stride, data, \
1304                                     l_w, 0, mqc, curctx, \
1305                                     v, a, c, ct, oneplushalf, vsc); \
1306                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1307                                     flags, flagsp, flags_stride, data, \
1308                                     l_w, 1, mqc, curctx, \
1309                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1310                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1311                                     flags, flagsp, flags_stride, data, \
1312                                     l_w, 2, mqc, curctx, \
1313                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1314                 opj_t1_dec_clnpass_step_macro(OPJ_TRUE, OPJ_FALSE, \
1315                                     flags, flagsp, flags_stride, data, \
1316                                     l_w, 3, mqc, curctx, \
1317                                     v, a, c, ct, oneplushalf, OPJ_FALSE); \
1318             } \
1319             *flagsp = flags & ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1320         } \
1321     } \
1322     UPLOAD_MQC_VARIABLES(mqc, curctx, c, a, ct); \
1323     if( k < h ) { \
1324         for (i = 0; i < l_w; ++i, ++flagsp, ++data) { \
1325             for (j = 0; j < h - k; ++j) { \
1326                 opj_t1_dec_clnpass_step(t1, flagsp, data + j * l_w, oneplushalf, j, vsc); \
1327             } \
1328             *flagsp &= ~(T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3); \
1329         } \
1330     } \
1331 }
1332
1333 static void opj_t1_dec_clnpass_check_segsym(opj_t1_t *t1, OPJ_INT32 cblksty)
1334 {
1335     if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
1336         opj_mqc_t* mqc = &(t1->mqc);
1337         OPJ_UINT32 v, v2;
1338         opj_mqc_setcurctx(mqc, T1_CTXNO_UNI);
1339         opj_mqc_decode(v, mqc);
1340         opj_mqc_decode(v2, mqc);
1341         v = (v << 1) | v2;
1342         opj_mqc_decode(v2, mqc);
1343         v = (v << 1) | v2;
1344         opj_mqc_decode(v2, mqc);
1345         v = (v << 1) | v2;
1346         /*
1347         if (v!=0xa) {
1348             opj_event_msg(t1->cinfo, EVT_WARNING, "Bad segmentation symbol %x\n", v);
1349         }
1350         */
1351     }
1352 }
1353
1354 static void opj_t1_dec_clnpass_64x64_novsc(
1355     opj_t1_t *t1,
1356     OPJ_INT32 bpno)
1357 {
1358     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, 64, 64, 66);
1359 }
1360
1361 static void opj_t1_dec_clnpass_64x64_vsc(
1362     opj_t1_t *t1,
1363     OPJ_INT32 bpno)
1364 {
1365     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, 64, 64, 66);
1366 }
1367
1368 static void opj_t1_dec_clnpass_generic_novsc(
1369     opj_t1_t *t1,
1370     OPJ_INT32 bpno)
1371 {
1372     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_FALSE, t1->w, t1->h,
1373                                 t1->w + 2U);
1374 }
1375
1376 static void opj_t1_dec_clnpass_generic_vsc(
1377     opj_t1_t *t1,
1378     OPJ_INT32 bpno)
1379 {
1380     opj_t1_dec_clnpass_internal(t1, bpno, OPJ_TRUE, t1->w, t1->h,
1381                                 t1->w + 2U);
1382 }
1383
1384 static void opj_t1_dec_clnpass(
1385     opj_t1_t *t1,
1386     OPJ_INT32 bpno,
1387     OPJ_INT32 cblksty)
1388 {
1389     if (t1->w == 64 && t1->h == 64) {
1390         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1391             opj_t1_dec_clnpass_64x64_vsc(t1, bpno);
1392         } else {
1393             opj_t1_dec_clnpass_64x64_novsc(t1, bpno);
1394         }
1395     } else {
1396         if (cblksty & J2K_CCP_CBLKSTY_VSC) {
1397             opj_t1_dec_clnpass_generic_vsc(t1, bpno);
1398         } else {
1399             opj_t1_dec_clnpass_generic_novsc(t1, bpno);
1400         }
1401     }
1402     opj_t1_dec_clnpass_check_segsym(t1, cblksty);
1403 }
1404
1405
1406 /** mod fixed_quality */
1407 static OPJ_FLOAT64 opj_t1_getwmsedec(
1408     OPJ_INT32 nmsedec,
1409     OPJ_UINT32 compno,
1410     OPJ_UINT32 level,
1411     OPJ_UINT32 orient,
1412     OPJ_INT32 bpno,
1413     OPJ_UINT32 qmfbid,
1414     OPJ_FLOAT64 stepsize,
1415     OPJ_UINT32 numcomps,
1416     const OPJ_FLOAT64 * mct_norms,
1417     OPJ_UINT32 mct_numcomps)
1418 {
1419     OPJ_FLOAT64 w1 = 1, w2, wmsedec;
1420     OPJ_ARG_NOT_USED(numcomps);
1421
1422     if (mct_norms && (compno < mct_numcomps)) {
1423         w1 = mct_norms[compno];
1424     }
1425
1426     if (qmfbid == 1) {
1427         w2 = opj_dwt_getnorm(level, orient);
1428     } else {    /* if (qmfbid == 0) */
1429         const OPJ_INT32 log2_gain = (orient == 0) ? 0 :
1430                                                 (orient == 3) ? 2 : 1;
1431         w2 = opj_dwt_getnorm_real(level, orient);
1432         /* Not sure this is right. But preserves past behaviour */
1433         stepsize /= (1 << log2_gain);
1434     }
1435
1436     wmsedec = w1 * w2 * stepsize * (1 << bpno);
1437     wmsedec *= wmsedec * nmsedec / 8192.0;
1438
1439     return wmsedec;
1440 }
1441
1442 static OPJ_BOOL opj_t1_allocate_buffers(
1443     opj_t1_t *t1,
1444     OPJ_UINT32 w,
1445     OPJ_UINT32 h)
1446 {
1447     OPJ_UINT32 flagssize;
1448     OPJ_UINT32 flags_stride;
1449
1450     /* No risk of overflow. Prior checks ensure those assert are met */
1451     /* They are per the specification */
1452     assert(w <= 1024);
1453     assert(h <= 1024);
1454     assert(w * h <= 4096);
1455
1456     /* encoder uses tile buffer, so no need to allocate */
1457     if (!t1->encoder) {
1458         OPJ_UINT32 datasize = w * h;
1459
1460         if (datasize > t1->datasize) {
1461             opj_aligned_free(t1->data);
1462             t1->data = (OPJ_INT32*) opj_aligned_malloc(datasize * sizeof(OPJ_INT32));
1463             if (!t1->data) {
1464                 /* FIXME event manager error callback */
1465                 return OPJ_FALSE;
1466             }
1467             t1->datasize = datasize;
1468         }
1469         /* memset first arg is declared to never be null by gcc */
1470         if (t1->data != NULL) {
1471             memset(t1->data, 0, datasize * sizeof(OPJ_INT32));
1472         }
1473     }
1474
1475     flags_stride = w + 2U; /* can't be 0U */
1476
1477     flagssize = (h + 3U) / 4U + 2U;
1478
1479     flagssize *= flags_stride;
1480     {
1481         opj_flag_t* p;
1482         OPJ_UINT32 x;
1483         OPJ_UINT32 flags_height = (h + 3U) / 4U;
1484
1485         if (flagssize > t1->flagssize) {
1486
1487             opj_aligned_free(t1->flags);
1488             t1->flags = (opj_flag_t*) opj_aligned_malloc(flagssize * sizeof(
1489                             opj_flag_t));
1490             if (!t1->flags) {
1491                 /* FIXME event manager error callback */
1492                 return OPJ_FALSE;
1493             }
1494         }
1495         t1->flagssize = flagssize;
1496
1497         memset(t1->flags, 0, flagssize * sizeof(opj_flag_t));
1498
1499         p = &t1->flags[0];
1500         for (x = 0; x < flags_stride; ++x) {
1501             /* magic value to hopefully stop any passes being interested in this entry */
1502             *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1503         }
1504
1505         p = &t1->flags[((flags_height + 1) * flags_stride)];
1506         for (x = 0; x < flags_stride; ++x) {
1507             /* magic value to hopefully stop any passes being interested in this entry */
1508             *p++ = (T1_PI_0 | T1_PI_1 | T1_PI_2 | T1_PI_3);
1509         }
1510
1511         if (h % 4) {
1512             OPJ_UINT32 v = 0;
1513             p = &t1->flags[((flags_height) * flags_stride)];
1514             if (h % 4 == 1) {
1515                 v |= T1_PI_1 | T1_PI_2 | T1_PI_3;
1516             } else if (h % 4 == 2) {
1517                 v |= T1_PI_2 | T1_PI_3;
1518             } else if (h % 4 == 3) {
1519                 v |= T1_PI_3;
1520             }
1521             for (x = 0; x < flags_stride; ++x) {
1522                 *p++ = v;
1523             }
1524         }
1525     }
1526
1527     t1->w = w;
1528     t1->h = h;
1529
1530     return OPJ_TRUE;
1531 }
1532
1533 /* ----------------------------------------------------------------------- */
1534
1535 /* ----------------------------------------------------------------------- */
1536 /**
1537  * Creates a new Tier 1 handle
1538  * and initializes the look-up tables of the Tier-1 coder/decoder
1539  * @return a new T1 handle if successful, returns NULL otherwise
1540 */
1541 opj_t1_t* opj_t1_create(OPJ_BOOL isEncoder)
1542 {
1543     opj_t1_t *l_t1 = 00;
1544
1545     l_t1 = (opj_t1_t*) opj_calloc(1, sizeof(opj_t1_t));
1546     if (!l_t1) {
1547         return 00;
1548     }
1549
1550     l_t1->encoder = isEncoder;
1551
1552     return l_t1;
1553 }
1554
1555
1556 /**
1557  * Destroys a previously created T1 handle
1558  *
1559  * @param p_t1 Tier 1 handle to destroy
1560 */
1561 void opj_t1_destroy(opj_t1_t *p_t1)
1562 {
1563     if (! p_t1) {
1564         return;
1565     }
1566
1567     /* encoder uses tile buffer, so no need to free */
1568     if (!p_t1->encoder && p_t1->data) {
1569         opj_aligned_free(p_t1->data);
1570         p_t1->data = 00;
1571     }
1572
1573     if (p_t1->flags) {
1574         opj_aligned_free(p_t1->flags);
1575         p_t1->flags = 00;
1576     }
1577
1578     opj_free(p_t1->cblkdatabuffer);
1579
1580     opj_free(p_t1);
1581 }
1582
1583 typedef struct {
1584     OPJ_BOOL whole_tile_decoding;
1585     OPJ_UINT32 resno;
1586     opj_tcd_cblk_dec_t* cblk;
1587     opj_tcd_band_t* band;
1588     opj_tcd_tilecomp_t* tilec;
1589     opj_tccp_t* tccp;
1590     OPJ_BOOL mustuse_cblkdatabuffer;
1591     volatile OPJ_BOOL* pret;
1592     opj_event_mgr_t *p_manager;
1593     opj_mutex_t* p_manager_mutex;
1594     OPJ_BOOL check_pterm;
1595 } opj_t1_cblk_decode_processing_job_t;
1596
1597 static void opj_t1_destroy_wrapper(void* t1)
1598 {
1599     opj_t1_destroy((opj_t1_t*) t1);
1600 }
1601
1602 static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
1603 {
1604     opj_tcd_cblk_dec_t* cblk;
1605     opj_tcd_band_t* band;
1606     opj_tcd_tilecomp_t* tilec;
1607     opj_tccp_t* tccp;
1608     OPJ_INT32* OPJ_RESTRICT datap;
1609     OPJ_UINT32 cblk_w, cblk_h;
1610     OPJ_INT32 x, y;
1611     OPJ_UINT32 i, j;
1612     opj_t1_cblk_decode_processing_job_t* job;
1613     opj_t1_t* t1;
1614     OPJ_UINT32 resno;
1615     OPJ_UINT32 tile_w;
1616
1617     job = (opj_t1_cblk_decode_processing_job_t*) user_data;
1618
1619     cblk = job->cblk;
1620
1621     if (!job->whole_tile_decoding) {
1622         cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1623         cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1624
1625         cblk->decoded_data = (OPJ_INT32*)opj_aligned_malloc(sizeof(OPJ_INT32) *
1626                              cblk_w * cblk_h);
1627         if (cblk->decoded_data == NULL) {
1628             if (job->p_manager_mutex) {
1629                 opj_mutex_lock(job->p_manager_mutex);
1630             }
1631             opj_event_msg(job->p_manager, EVT_ERROR,
1632                           "Cannot allocate cblk->decoded_data\n");
1633             if (job->p_manager_mutex) {
1634                 opj_mutex_unlock(job->p_manager_mutex);
1635             }
1636             *(job->pret) = OPJ_FALSE;
1637             opj_free(job);
1638             return;
1639         }
1640         /* Zero-init required */
1641         memset(cblk->decoded_data, 0, sizeof(OPJ_INT32) * cblk_w * cblk_h);
1642     } else if (cblk->decoded_data) {
1643         /* Not sure if that code path can happen, but better be */
1644         /* safe than sorry */
1645         opj_aligned_free(cblk->decoded_data);
1646         cblk->decoded_data = NULL;
1647     }
1648
1649     resno = job->resno;
1650     band = job->band;
1651     tilec = job->tilec;
1652     tccp = job->tccp;
1653     tile_w = (OPJ_UINT32)(tilec->resolutions[tilec->minimum_num_resolutions - 1].x1
1654                           -
1655                           tilec->resolutions[tilec->minimum_num_resolutions - 1].x0);
1656
1657     if (!*(job->pret)) {
1658         opj_free(job);
1659         return;
1660     }
1661
1662     t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
1663     if (t1 == NULL) {
1664         t1 = opj_t1_create(OPJ_FALSE);
1665         opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
1666     }
1667     t1->mustuse_cblkdatabuffer = job->mustuse_cblkdatabuffer;
1668
1669     if (OPJ_FALSE == opj_t1_decode_cblk(
1670                 t1,
1671                 cblk,
1672                 band->bandno,
1673                 (OPJ_UINT32)tccp->roishift,
1674                 tccp->cblksty,
1675                 job->p_manager,
1676                 job->p_manager_mutex,
1677                 job->check_pterm)) {
1678         *(job->pret) = OPJ_FALSE;
1679         opj_free(job);
1680         return;
1681     }
1682
1683     x = cblk->x0 - band->x0;
1684     y = cblk->y0 - band->y0;
1685     if (band->bandno & 1) {
1686         opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1687         x += pres->x1 - pres->x0;
1688     }
1689     if (band->bandno & 2) {
1690         opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
1691         y += pres->y1 - pres->y0;
1692     }
1693
1694     datap = cblk->decoded_data ? cblk->decoded_data : t1->data;
1695     cblk_w = t1->w;
1696     cblk_h = t1->h;
1697
1698     if (tccp->roishift) {
1699         if (tccp->roishift >= 31) {
1700             for (j = 0; j < cblk_h; ++j) {
1701                 for (i = 0; i < cblk_w; ++i) {
1702                     datap[(j * cblk_w) + i] = 0;
1703                 }
1704             }
1705         } else {
1706             OPJ_INT32 thresh = 1 << tccp->roishift;
1707             for (j = 0; j < cblk_h; ++j) {
1708                 for (i = 0; i < cblk_w; ++i) {
1709                     OPJ_INT32 val = datap[(j * cblk_w) + i];
1710                     OPJ_INT32 mag = abs(val);
1711                     if (mag >= thresh) {
1712                         mag >>= tccp->roishift;
1713                         datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
1714                     }
1715                 }
1716             }
1717         }
1718     }
1719
1720     /* Both can be non NULL if for example decoding a full tile and then */
1721     /* partially a tile. In which case partial decoding should be the */
1722     /* priority */
1723     assert((cblk->decoded_data != NULL) || (tilec->data != NULL));
1724
1725     if (cblk->decoded_data) {
1726         OPJ_UINT32 cblk_size = cblk_w * cblk_h;
1727         if (tccp->qmfbid == 1) {
1728             for (i = 0; i < cblk_size; ++i) {
1729                 datap[i] /= 2;
1730             }
1731         } else {        /* if (tccp->qmfbid == 0) */
1732             const float stepsize = 0.5f * band->stepsize;
1733             i = 0;
1734 #ifdef __SSE2__
1735             {
1736                 const __m128 xmm_stepsize = _mm_set1_ps(stepsize);
1737                 for (; i < (cblk_size & ~15U); i += 16) {
1738                     __m128 xmm0_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1739                                                            datap + 0)));
1740                     __m128 xmm1_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1741                                                            datap + 4)));
1742                     __m128 xmm2_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1743                                                            datap + 8)));
1744                     __m128 xmm3_data = _mm_cvtepi32_ps(_mm_load_si128((__m128i * const)(
1745                                                            datap + 12)));
1746                     _mm_store_ps((float*)(datap +  0), _mm_mul_ps(xmm0_data, xmm_stepsize));
1747                     _mm_store_ps((float*)(datap +  4), _mm_mul_ps(xmm1_data, xmm_stepsize));
1748                     _mm_store_ps((float*)(datap +  8), _mm_mul_ps(xmm2_data, xmm_stepsize));
1749                     _mm_store_ps((float*)(datap + 12), _mm_mul_ps(xmm3_data, xmm_stepsize));
1750                     datap += 16;
1751                 }
1752             }
1753 #endif
1754             for (; i < cblk_size; ++i) {
1755                 OPJ_FLOAT32 tmp = ((OPJ_FLOAT32)(*datap)) * stepsize;
1756                 memcpy(datap, &tmp, sizeof(tmp));
1757                 datap++;
1758             }
1759         }
1760     } else if (tccp->qmfbid == 1) {
1761         OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w +
1762                                                        (OPJ_SIZE_T)x];
1763         for (j = 0; j < cblk_h; ++j) {
1764             i = 0;
1765             for (; i < (cblk_w & ~(OPJ_UINT32)3U); i += 4U) {
1766                 OPJ_INT32 tmp0 = datap[(j * cblk_w) + i + 0U];
1767                 OPJ_INT32 tmp1 = datap[(j * cblk_w) + i + 1U];
1768                 OPJ_INT32 tmp2 = datap[(j * cblk_w) + i + 2U];
1769                 OPJ_INT32 tmp3 = datap[(j * cblk_w) + i + 3U];
1770                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 0U] = tmp0 / 2;
1771                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 1U] = tmp1 / 2;
1772                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 2U] = tmp2 / 2;
1773                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i + 3U] = tmp3 / 2;
1774             }
1775             for (; i < cblk_w; ++i) {
1776                 OPJ_INT32 tmp = datap[(j * cblk_w) + i];
1777                 ((OPJ_INT32*)tiledp)[(j * (OPJ_SIZE_T)tile_w) + i] = tmp / 2;
1778             }
1779         }
1780     } else {        /* if (tccp->qmfbid == 0) */
1781         const float stepsize = 0.5f * band->stepsize;
1782         OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_SIZE_T)y *
1783                                                          tile_w + (OPJ_SIZE_T)x];
1784         for (j = 0; j < cblk_h; ++j) {
1785             OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
1786             for (i = 0; i < cblk_w; ++i) {
1787                 OPJ_FLOAT32 tmp = (OPJ_FLOAT32) * datap * stepsize;
1788                 *tiledp2 = tmp;
1789                 datap++;
1790                 tiledp2++;
1791             }
1792             tiledp += tile_w;
1793         }
1794     }
1795
1796     opj_free(job);
1797 }
1798
1799
1800 void opj_t1_decode_cblks(opj_tcd_t* tcd,
1801                          volatile OPJ_BOOL* pret,
1802                          opj_tcd_tilecomp_t* tilec,
1803                          opj_tccp_t* tccp,
1804                          opj_event_mgr_t *p_manager,
1805                          opj_mutex_t* p_manager_mutex,
1806                          OPJ_BOOL check_pterm
1807                         )
1808 {
1809     opj_thread_pool_t* tp = tcd->thread_pool;
1810     OPJ_UINT32 resno, bandno, precno, cblkno;
1811
1812 #ifdef DEBUG_VERBOSE
1813     OPJ_UINT32 codeblocks_decoded = 0;
1814     printf("Enter opj_t1_decode_cblks()\n");
1815 #endif
1816
1817     for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
1818         opj_tcd_resolution_t* res = &tilec->resolutions[resno];
1819
1820         for (bandno = 0; bandno < res->numbands; ++bandno) {
1821             opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
1822
1823             for (precno = 0; precno < res->pw * res->ph; ++precno) {
1824                 opj_tcd_precinct_t* precinct = &band->precincts[precno];
1825
1826                 if (!opj_tcd_is_subband_area_of_interest(tcd,
1827                         tilec->compno,
1828                         resno,
1829                         band->bandno,
1830                         (OPJ_UINT32)precinct->x0,
1831                         (OPJ_UINT32)precinct->y0,
1832                         (OPJ_UINT32)precinct->x1,
1833                         (OPJ_UINT32)precinct->y1)) {
1834                     for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1835                         opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1836                         if (cblk->decoded_data) {
1837 #ifdef DEBUG_VERBOSE
1838                             printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1839                                    cblk->x0, cblk->y0, resno, bandno);
1840 #endif
1841                             opj_aligned_free(cblk->decoded_data);
1842                             cblk->decoded_data = NULL;
1843                         }
1844                     }
1845                     continue;
1846                 }
1847
1848                 for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
1849                     opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
1850                     opj_t1_cblk_decode_processing_job_t* job;
1851
1852                     if (!opj_tcd_is_subband_area_of_interest(tcd,
1853                             tilec->compno,
1854                             resno,
1855                             band->bandno,
1856                             (OPJ_UINT32)cblk->x0,
1857                             (OPJ_UINT32)cblk->y0,
1858                             (OPJ_UINT32)cblk->x1,
1859                             (OPJ_UINT32)cblk->y1)) {
1860                         if (cblk->decoded_data) {
1861 #ifdef DEBUG_VERBOSE
1862                             printf("Discarding codeblock %d,%d at resno=%d, bandno=%d\n",
1863                                    cblk->x0, cblk->y0, resno, bandno);
1864 #endif
1865                             opj_aligned_free(cblk->decoded_data);
1866                             cblk->decoded_data = NULL;
1867                         }
1868                         continue;
1869                     }
1870
1871                     if (!tcd->whole_tile_decoding) {
1872                         OPJ_UINT32 cblk_w = (OPJ_UINT32)(cblk->x1 - cblk->x0);
1873                         OPJ_UINT32 cblk_h = (OPJ_UINT32)(cblk->y1 - cblk->y0);
1874                         if (cblk->decoded_data != NULL) {
1875 #ifdef DEBUG_VERBOSE
1876                             printf("Reusing codeblock %d,%d at resno=%d, bandno=%d\n",
1877                                    cblk->x0, cblk->y0, resno, bandno);
1878 #endif
1879                             continue;
1880                         }
1881                         if (cblk_w == 0 || cblk_h == 0) {
1882                             continue;
1883                         }
1884 #ifdef DEBUG_VERBOSE
1885                         printf("Decoding codeblock %d,%d at resno=%d, bandno=%d\n",
1886                                cblk->x0, cblk->y0, resno, bandno);
1887 #endif
1888                     }
1889
1890                     job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1,
1891                             sizeof(opj_t1_cblk_decode_processing_job_t));
1892                     if (!job) {
1893                         *pret = OPJ_FALSE;
1894                         return;
1895                     }
1896                     job->whole_tile_decoding = tcd->whole_tile_decoding;
1897                     job->resno = resno;
1898                     job->cblk = cblk;
1899                     job->band = band;
1900                     job->tilec = tilec;
1901                     job->tccp = tccp;
1902                     job->pret = pret;
1903                     job->p_manager_mutex = p_manager_mutex;
1904                     job->p_manager = p_manager;
1905                     job->check_pterm = check_pterm;
1906                     job->mustuse_cblkdatabuffer = opj_thread_pool_get_thread_count(tp) > 1;
1907                     opj_thread_pool_submit_job(tp, opj_t1_clbl_decode_processor, job);
1908 #ifdef DEBUG_VERBOSE
1909                     codeblocks_decoded ++;
1910 #endif
1911                     if (!(*pret)) {
1912                         return;
1913                     }
1914                 } /* cblkno */
1915             } /* precno */
1916         } /* bandno */
1917     } /* resno */
1918
1919 #ifdef DEBUG_VERBOSE
1920     printf("Leave opj_t1_decode_cblks(). Number decoded: %d\n", codeblocks_decoded);
1921 #endif
1922     return;
1923 }
1924
1925
1926 static OPJ_BOOL opj_t1_decode_cblk(opj_t1_t *t1,
1927                                    opj_tcd_cblk_dec_t* cblk,
1928                                    OPJ_UINT32 orient,
1929                                    OPJ_UINT32 roishift,
1930                                    OPJ_UINT32 cblksty,
1931                                    opj_event_mgr_t *p_manager,
1932                                    opj_mutex_t* p_manager_mutex,
1933                                    OPJ_BOOL check_pterm)
1934 {
1935     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
1936
1937     OPJ_INT32 bpno_plus_one;
1938     OPJ_UINT32 passtype;
1939     OPJ_UINT32 segno, passno;
1940     OPJ_BYTE* cblkdata = NULL;
1941     OPJ_UINT32 cblkdataindex = 0;
1942     OPJ_BYTE type = T1_TYPE_MQ; /* BYPASS mode */
1943     OPJ_INT32* original_t1_data = NULL;
1944
1945     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
1946
1947     if (!opj_t1_allocate_buffers(
1948                 t1,
1949                 (OPJ_UINT32)(cblk->x1 - cblk->x0),
1950                 (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
1951         return OPJ_FALSE;
1952     }
1953
1954     bpno_plus_one = (OPJ_INT32)(roishift + cblk->numbps);
1955     if (bpno_plus_one >= 31) {
1956         if (p_manager_mutex) {
1957             opj_mutex_lock(p_manager_mutex);
1958         }
1959         opj_event_msg(p_manager, EVT_WARNING,
1960                       "opj_t1_decode_cblk(): unsupported bpno_plus_one = %d >= 31\n",
1961                       bpno_plus_one);
1962         if (p_manager_mutex) {
1963             opj_mutex_unlock(p_manager_mutex);
1964         }
1965         return OPJ_FALSE;
1966     }
1967     passtype = 2;
1968
1969     opj_mqc_resetstates(mqc);
1970     opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
1971     opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
1972     opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
1973
1974     /* Even if we have a single chunk, in multi-threaded decoding */
1975     /* the insertion of our synthetic marker might potentially override */
1976     /* valid codestream of other codeblocks decoded in parallel. */
1977     if (cblk->numchunks > 1 || t1->mustuse_cblkdatabuffer) {
1978         OPJ_UINT32 i;
1979         OPJ_UINT32 cblk_len;
1980
1981         /* Compute whole codeblock length from chunk lengths */
1982         cblk_len = 0;
1983         for (i = 0; i < cblk->numchunks; i++) {
1984             cblk_len += cblk->chunks[i].len;
1985         }
1986
1987         /* Allocate temporary memory if needed */
1988         if (cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA > t1->cblkdatabuffersize) {
1989             cblkdata = (OPJ_BYTE*)opj_realloc(t1->cblkdatabuffer,
1990                                               cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA);
1991             if (cblkdata == NULL) {
1992                 return OPJ_FALSE;
1993             }
1994             t1->cblkdatabuffer = cblkdata;
1995             memset(t1->cblkdatabuffer + cblk_len, 0, OPJ_COMMON_CBLK_DATA_EXTRA);
1996             t1->cblkdatabuffersize = cblk_len + OPJ_COMMON_CBLK_DATA_EXTRA;
1997         }
1998
1999         /* Concatenate all chunks */
2000         cblkdata = t1->cblkdatabuffer;
2001         cblk_len = 0;
2002         for (i = 0; i < cblk->numchunks; i++) {
2003             memcpy(cblkdata + cblk_len, cblk->chunks[i].data, cblk->chunks[i].len);
2004             cblk_len += cblk->chunks[i].len;
2005         }
2006     } else if (cblk->numchunks == 1) {
2007         cblkdata = cblk->chunks[0].data;
2008     } else {
2009         /* Not sure if that can happen in practice, but avoid Coverity to */
2010         /* think we will dereference a null cblkdta pointer */
2011         return OPJ_TRUE;
2012     }
2013
2014     /* For subtile decoding, directly decode in the decoded_data buffer of */
2015     /* the code-block. Hack t1->data to point to it, and restore it later */
2016     if (cblk->decoded_data) {
2017         original_t1_data = t1->data;
2018         t1->data = cblk->decoded_data;
2019     }
2020
2021     for (segno = 0; segno < cblk->real_num_segs; ++segno) {
2022         opj_tcd_seg_t *seg = &cblk->segs[segno];
2023
2024         /* BYPASS mode */
2025         type = ((bpno_plus_one <= ((OPJ_INT32)(cblk->numbps)) - 4) && (passtype < 2) &&
2026                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2027
2028         if (type == T1_TYPE_RAW) {
2029             opj_mqc_raw_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2030                                  OPJ_COMMON_CBLK_DATA_EXTRA);
2031         } else {
2032             opj_mqc_init_dec(mqc, cblkdata + cblkdataindex, seg->len,
2033                              OPJ_COMMON_CBLK_DATA_EXTRA);
2034         }
2035         cblkdataindex += seg->len;
2036
2037         for (passno = 0; (passno < seg->real_num_passes) &&
2038                 (bpno_plus_one >= 1); ++passno) {
2039             switch (passtype) {
2040             case 0:
2041                 if (type == T1_TYPE_RAW) {
2042                     opj_t1_dec_sigpass_raw(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2043                 } else {
2044                     opj_t1_dec_sigpass_mqc(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2045                 }
2046                 break;
2047             case 1:
2048                 if (type == T1_TYPE_RAW) {
2049                     opj_t1_dec_refpass_raw(t1, bpno_plus_one);
2050                 } else {
2051                     opj_t1_dec_refpass_mqc(t1, bpno_plus_one);
2052                 }
2053                 break;
2054             case 2:
2055                 opj_t1_dec_clnpass(t1, bpno_plus_one, (OPJ_INT32)cblksty);
2056                 break;
2057             }
2058
2059             if ((cblksty & J2K_CCP_CBLKSTY_RESET) && type == T1_TYPE_MQ) {
2060                 opj_mqc_resetstates(mqc);
2061                 opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2062                 opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2063                 opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2064             }
2065             if (++passtype == 3) {
2066                 passtype = 0;
2067                 bpno_plus_one--;
2068             }
2069         }
2070
2071         opq_mqc_finish_dec(mqc);
2072     }
2073
2074     if (check_pterm) {
2075         if (mqc->bp + 2 < mqc->end) {
2076             if (p_manager_mutex) {
2077                 opj_mutex_lock(p_manager_mutex);
2078             }
2079             opj_event_msg(p_manager, EVT_WARNING,
2080                           "PTERM check failure: %d remaining bytes in code block (%d used / %d)\n",
2081                           (int)(mqc->end - mqc->bp) - 2,
2082                           (int)(mqc->bp - mqc->start),
2083                           (int)(mqc->end - mqc->start));
2084             if (p_manager_mutex) {
2085                 opj_mutex_unlock(p_manager_mutex);
2086             }
2087         } else if (mqc->end_of_byte_stream_counter > 2) {
2088             if (p_manager_mutex) {
2089                 opj_mutex_lock(p_manager_mutex);
2090             }
2091             opj_event_msg(p_manager, EVT_WARNING,
2092                           "PTERM check failure: %d synthetized 0xFF markers read\n",
2093                           mqc->end_of_byte_stream_counter);
2094             if (p_manager_mutex) {
2095                 opj_mutex_unlock(p_manager_mutex);
2096             }
2097         }
2098     }
2099
2100     /* Restore original t1->data is needed */
2101     if (cblk->decoded_data) {
2102         t1->data = original_t1_data;
2103     }
2104
2105     return OPJ_TRUE;
2106 }
2107
2108
2109 typedef struct {
2110     OPJ_UINT32 compno;
2111     OPJ_UINT32 resno;
2112     opj_tcd_cblk_enc_t* cblk;
2113     opj_tcd_tile_t *tile;
2114     opj_tcd_band_t* band;
2115     opj_tcd_tilecomp_t* tilec;
2116     opj_tccp_t* tccp;
2117     const OPJ_FLOAT64 * mct_norms;
2118     OPJ_UINT32 mct_numcomps;
2119     volatile OPJ_BOOL* pret;
2120     opj_mutex_t* mutex;
2121 } opj_t1_cblk_encode_processing_job_t;
2122
2123 /** Procedure to deal with a asynchronous code-block encoding job.
2124  *
2125  * @param user_data Pointer to a opj_t1_cblk_encode_processing_job_t* structure
2126  * @param tls       TLS handle.
2127  */
2128 static void opj_t1_clbl_encode_processor(void* user_data, opj_tls_t* tls)
2129 {
2130     opj_t1_cblk_encode_processing_job_t* job =
2131         (opj_t1_cblk_encode_processing_job_t*)user_data;
2132     opj_tcd_cblk_enc_t* cblk = job->cblk;
2133     const opj_tcd_band_t* band = job->band;
2134     const opj_tcd_tilecomp_t* tilec = job->tilec;
2135     const opj_tccp_t* tccp = job->tccp;
2136     const OPJ_UINT32 resno = job->resno;
2137     opj_t1_t* t1;
2138     const OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
2139
2140     OPJ_INT32* OPJ_RESTRICT tiledp;
2141     OPJ_UINT32 cblk_w;
2142     OPJ_UINT32 cblk_h;
2143     OPJ_UINT32 i, j, tileLineAdvance;
2144     OPJ_SIZE_T tileIndex = 0;
2145
2146     OPJ_INT32 x = cblk->x0 - band->x0;
2147     OPJ_INT32 y = cblk->y0 - band->y0;
2148
2149     if (!*(job->pret)) {
2150         opj_free(job);
2151         return;
2152     }
2153
2154     t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
2155     if (t1 == NULL) {
2156         t1 = opj_t1_create(OPJ_TRUE); /* OPJ_TRUE == T1 for encoding */
2157         opj_tls_set(tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper);
2158     }
2159
2160     if (band->bandno & 1) {
2161         opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2162         x += pres->x1 - pres->x0;
2163     }
2164     if (band->bandno & 2) {
2165         opj_tcd_resolution_t *pres = &tilec->resolutions[resno - 1];
2166         y += pres->y1 - pres->y0;
2167     }
2168
2169     if (!opj_t1_allocate_buffers(
2170                 t1,
2171                 (OPJ_UINT32)(cblk->x1 - cblk->x0),
2172                 (OPJ_UINT32)(cblk->y1 - cblk->y0))) {
2173         *(job->pret) = OPJ_FALSE;
2174         opj_free(job);
2175         return;
2176     }
2177
2178     cblk_w = t1->w;
2179     cblk_h = t1->h;
2180     tileLineAdvance = tile_w - cblk_w;
2181
2182     tiledp = &tilec->data[(OPJ_SIZE_T)y * tile_w + (OPJ_SIZE_T)x];
2183     t1->data = tiledp;
2184     t1->data_stride = tile_w;
2185     if (tccp->qmfbid == 1) {
2186         /* Do multiplication on unsigned type, even if the
2187             * underlying type is signed, to avoid potential
2188             * int overflow on large value (the output will be
2189             * incorrect in such situation, but whatever...)
2190             * This assumes complement-to-2 signed integer
2191             * representation
2192             * Fixes https://github.com/uclouvain/openjpeg/issues/1053
2193             */
2194         OPJ_UINT32* OPJ_RESTRICT tiledp_u = (OPJ_UINT32*) tiledp;
2195         for (j = 0; j < cblk_h; ++j) {
2196             for (i = 0; i < cblk_w; ++i) {
2197                 tiledp_u[tileIndex] <<= T1_NMSEDEC_FRACBITS;
2198                 tileIndex++;
2199             }
2200             tileIndex += tileLineAdvance;
2201         }
2202     } else {        /* if (tccp->qmfbid == 0) */
2203         for (j = 0; j < cblk_h; ++j) {
2204             for (i = 0; i < cblk_w; ++i) {
2205                 OPJ_FLOAT32 tmp = ((OPJ_FLOAT32*)tiledp)[tileIndex];
2206                 tiledp[tileIndex] = (OPJ_INT32)opj_lrintf((tmp / band->stepsize) *
2207                                     (1 << T1_NMSEDEC_FRACBITS));
2208                 tileIndex++;
2209             }
2210             tileIndex += tileLineAdvance;
2211         }
2212     }
2213
2214     {
2215         OPJ_FLOAT64 cumwmsedec =
2216             opj_t1_encode_cblk(
2217                 t1,
2218                 cblk,
2219                 band->bandno,
2220                 job->compno,
2221                 tilec->numresolutions - 1 - resno,
2222                 tccp->qmfbid,
2223                 band->stepsize,
2224                 tccp->cblksty,
2225                 job->tile->numcomps,
2226                 job->mct_norms,
2227                 job->mct_numcomps);
2228         if (job->mutex) {
2229             opj_mutex_lock(job->mutex);
2230         }
2231         job->tile->distotile += cumwmsedec;
2232         if (job->mutex) {
2233             opj_mutex_unlock(job->mutex);
2234         }
2235     }
2236
2237     opj_free(job);
2238 }
2239
2240
2241 OPJ_BOOL opj_t1_encode_cblks(opj_tcd_t* tcd,
2242                              opj_tcd_tile_t *tile,
2243                              opj_tcp_t *tcp,
2244                              const OPJ_FLOAT64 * mct_norms,
2245                              OPJ_UINT32 mct_numcomps
2246                             )
2247 {
2248     volatile OPJ_BOOL ret = OPJ_TRUE;
2249     opj_thread_pool_t* tp = tcd->thread_pool;
2250     OPJ_UINT32 compno, resno, bandno, precno, cblkno;
2251     opj_mutex_t* mutex = opj_mutex_create();
2252
2253     tile->distotile = 0;        /* fixed_quality */
2254
2255     for (compno = 0; compno < tile->numcomps; ++compno) {
2256         opj_tcd_tilecomp_t* tilec = &tile->comps[compno];
2257         opj_tccp_t* tccp = &tcp->tccps[compno];
2258
2259         for (resno = 0; resno < tilec->numresolutions; ++resno) {
2260             opj_tcd_resolution_t *res = &tilec->resolutions[resno];
2261
2262             for (bandno = 0; bandno < res->numbands; ++bandno) {
2263                 opj_tcd_band_t* OPJ_RESTRICT band = &res->bands[bandno];
2264
2265                 /* Skip empty bands */
2266                 if (opj_tcd_is_band_empty(band)) {
2267                     continue;
2268                 }
2269                 for (precno = 0; precno < res->pw * res->ph; ++precno) {
2270                     opj_tcd_precinct_t *prc = &band->precincts[precno];
2271
2272                     for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
2273                         opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
2274
2275                         opj_t1_cblk_encode_processing_job_t* job =
2276                             (opj_t1_cblk_encode_processing_job_t*) opj_calloc(1,
2277                                     sizeof(opj_t1_cblk_encode_processing_job_t));
2278                         if (!job) {
2279                             ret = OPJ_FALSE;
2280                             goto end;
2281                         }
2282                         job->compno = compno;
2283                         job->tile = tile;
2284                         job->resno = resno;
2285                         job->cblk = cblk;
2286                         job->band = band;
2287                         job->tilec = tilec;
2288                         job->tccp = tccp;
2289                         job->mct_norms = mct_norms;
2290                         job->mct_numcomps = mct_numcomps;
2291                         job->pret = &ret;
2292                         job->mutex = mutex;
2293                         opj_thread_pool_submit_job(tp, opj_t1_clbl_encode_processor, job);
2294
2295                     } /* cblkno */
2296                 } /* precno */
2297             } /* bandno */
2298         } /* resno  */
2299     } /* compno  */
2300
2301 end:
2302     opj_thread_pool_wait_completion(tcd->thread_pool, 0);
2303     if (mutex) {
2304         opj_mutex_destroy(mutex);
2305     }
2306
2307     return ret;
2308 }
2309
2310 /* Returns whether the pass (bpno, passtype) is terminated */
2311 static int opj_t1_enc_is_term_pass(opj_tcd_cblk_enc_t* cblk,
2312                                    OPJ_UINT32 cblksty,
2313                                    OPJ_INT32 bpno,
2314                                    OPJ_UINT32 passtype)
2315 {
2316     /* Is it the last cleanup pass ? */
2317     if (passtype == 2 && bpno == 0) {
2318         return OPJ_TRUE;
2319     }
2320
2321     if (cblksty & J2K_CCP_CBLKSTY_TERMALL) {
2322         return OPJ_TRUE;
2323     }
2324
2325     if ((cblksty & J2K_CCP_CBLKSTY_LAZY)) {
2326         /* For bypass arithmetic bypass, terminate the 4th cleanup pass */
2327         if ((bpno == ((OPJ_INT32)cblk->numbps - 4)) && (passtype == 2)) {
2328             return OPJ_TRUE;
2329         }
2330         /* and beyond terminate all the magnitude refinement passes (in raw) */
2331         /* and cleanup passes (in MQC) */
2332         if ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype > 0)) {
2333             return OPJ_TRUE;
2334         }
2335     }
2336
2337     return OPJ_FALSE;
2338 }
2339
2340
2341 /** mod fixed_quality */
2342 static OPJ_FLOAT64 opj_t1_encode_cblk(opj_t1_t *t1,
2343                                       opj_tcd_cblk_enc_t* cblk,
2344                                       OPJ_UINT32 orient,
2345                                       OPJ_UINT32 compno,
2346                                       OPJ_UINT32 level,
2347                                       OPJ_UINT32 qmfbid,
2348                                       OPJ_FLOAT64 stepsize,
2349                                       OPJ_UINT32 cblksty,
2350                                       OPJ_UINT32 numcomps,
2351                                       const OPJ_FLOAT64 * mct_norms,
2352                                       OPJ_UINT32 mct_numcomps)
2353 {
2354     OPJ_FLOAT64 cumwmsedec = 0.0;
2355
2356     opj_mqc_t *mqc = &(t1->mqc);   /* MQC component */
2357
2358     OPJ_UINT32 passno;
2359     OPJ_INT32 bpno;
2360     OPJ_UINT32 passtype;
2361     OPJ_INT32 nmsedec = 0;
2362     OPJ_INT32 max;
2363     OPJ_UINT32 i, j;
2364     OPJ_BYTE type = T1_TYPE_MQ;
2365     OPJ_FLOAT64 tempwmsedec;
2366
2367 #ifdef EXTRA_DEBUG
2368     printf("encode_cblk(x=%d,y=%d,x1=%d,y1=%d,orient=%d,compno=%d,level=%d\n",
2369            cblk->x0, cblk->y0, cblk->x1, cblk->y1, orient, compno, level);
2370 #endif
2371
2372     mqc->lut_ctxno_zc_orient = lut_ctxno_zc + (orient << 9);
2373
2374     max = 0;
2375     for (i = 0; i < t1->w; ++i) {
2376         for (j = 0; j < t1->h; ++j) {
2377             OPJ_INT32 tmp = abs(t1->data[i + j * t1->data_stride]);
2378             max = opj_int_max(max, tmp);
2379         }
2380     }
2381
2382     cblk->numbps = max ? (OPJ_UINT32)((opj_int_floorlog2(max) + 1) -
2383                                       T1_NMSEDEC_FRACBITS) : 0;
2384     if (cblk->numbps == 0) {
2385         cblk->totalpasses = 0;
2386         return cumwmsedec;
2387     }
2388
2389     bpno = (OPJ_INT32)(cblk->numbps - 1);
2390     passtype = 2;
2391
2392     opj_mqc_resetstates(mqc);
2393     opj_mqc_setstate(mqc, T1_CTXNO_UNI, 0, 46);
2394     opj_mqc_setstate(mqc, T1_CTXNO_AGG, 0, 3);
2395     opj_mqc_setstate(mqc, T1_CTXNO_ZC, 0, 4);
2396     opj_mqc_init_enc(mqc, cblk->data);
2397
2398     for (passno = 0; bpno >= 0; ++passno) {
2399         opj_tcd_pass_t *pass = &cblk->passes[passno];
2400         type = ((bpno < ((OPJ_INT32)(cblk->numbps) - 4)) && (passtype < 2) &&
2401                 (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
2402
2403         /* If the previous pass was terminating, we need to reset the encoder */
2404         if (passno > 0 && cblk->passes[passno - 1].term) {
2405             if (type == T1_TYPE_RAW) {
2406                 opj_mqc_bypass_init_enc(mqc);
2407             } else {
2408                 opj_mqc_restart_init_enc(mqc);
2409             }
2410         }
2411
2412         switch (passtype) {
2413         case 0:
2414             opj_t1_enc_sigpass(t1, bpno, &nmsedec, type, cblksty);
2415             break;
2416         case 1:
2417             opj_t1_enc_refpass(t1, bpno, &nmsedec, type);
2418             break;
2419         case 2:
2420             opj_t1_enc_clnpass(t1, bpno, &nmsedec, cblksty);
2421             /* code switch SEGMARK (i.e. SEGSYM) */
2422             if (cblksty & J2K_CCP_CBLKSTY_SEGSYM) {
2423                 opj_mqc_segmark_enc(mqc);
2424             }
2425             break;
2426         }
2427
2428         /* fixed_quality */
2429         tempwmsedec = opj_t1_getwmsedec(nmsedec, compno, level, orient, bpno, qmfbid,
2430                                         stepsize, numcomps, mct_norms, mct_numcomps) ;
2431         cumwmsedec += tempwmsedec;
2432         pass->distortiondec = cumwmsedec;
2433
2434         if (opj_t1_enc_is_term_pass(cblk, cblksty, bpno, passtype)) {
2435             /* If it is a terminated pass, terminate it */
2436             if (type == T1_TYPE_RAW) {
2437                 opj_mqc_bypass_flush_enc(mqc, cblksty & J2K_CCP_CBLKSTY_PTERM);
2438             } else {
2439                 if (cblksty & J2K_CCP_CBLKSTY_PTERM) {
2440                     opj_mqc_erterm_enc(mqc);
2441                 } else {
2442                     opj_mqc_flush(mqc);
2443                 }
2444             }
2445             pass->term = 1;
2446             pass->rate = opj_mqc_numbytes(mqc);
2447         } else {
2448             /* Non terminated pass */
2449             OPJ_UINT32 rate_extra_bytes;
2450             if (type == T1_TYPE_RAW) {
2451                 rate_extra_bytes = opj_mqc_bypass_get_extra_bytes(
2452                                        mqc, (cblksty & J2K_CCP_CBLKSTY_PTERM));
2453             } else {
2454                 rate_extra_bytes = 3;
2455             }
2456             pass->term = 0;
2457             pass->rate = opj_mqc_numbytes(mqc) + rate_extra_bytes;
2458         }
2459
2460         if (++passtype == 3) {
2461             passtype = 0;
2462             bpno--;
2463         }
2464
2465         /* Code-switch "RESET" */
2466         if (cblksty & J2K_CCP_CBLKSTY_RESET) {
2467             opj_mqc_reset_enc(mqc);
2468         }
2469     }
2470
2471     cblk->totalpasses = passno;
2472
2473     if (cblk->totalpasses) {
2474         /* Make sure that pass rates are increasing */
2475         OPJ_UINT32 last_pass_rate = opj_mqc_numbytes(mqc);
2476         for (passno = cblk->totalpasses; passno > 0;) {
2477             opj_tcd_pass_t *pass = &cblk->passes[--passno];
2478             if (pass->rate > last_pass_rate) {
2479                 pass->rate = last_pass_rate;
2480             } else {
2481                 last_pass_rate = pass->rate;
2482             }
2483         }
2484     }
2485
2486     for (passno = 0; passno < cblk->totalpasses; passno++) {
2487         opj_tcd_pass_t *pass = &cblk->passes[passno];
2488
2489         /* Prevent generation of FF as last data byte of a pass*/
2490         /* For terminating passes, the flushing procedure ensured this already */
2491         assert(pass->rate > 0);
2492         if (cblk->data[pass->rate - 1] == 0xFF) {
2493             pass->rate--;
2494         }
2495         pass->len = pass->rate - (passno == 0 ? 0 : cblk->passes[passno - 1].rate);
2496     }
2497
2498 #ifdef EXTRA_DEBUG
2499     printf(" len=%d\n", (cblk->totalpasses) ? opj_mqc_numbytes(mqc) : 0);
2500
2501     /* Check that there not 0xff >=0x90 sequences */
2502     if (cblk->totalpasses) {
2503         OPJ_UINT32 i;
2504         OPJ_UINT32 len = opj_mqc_numbytes(mqc);
2505         for (i = 1; i < len; ++i) {
2506             if (cblk->data[i - 1] == 0xff && cblk->data[i] >= 0x90) {
2507                 printf("0xff %02x at offset %d\n", cblk->data[i], i - 1);
2508                 abort();
2509             }
2510         }
2511     }
2512 #endif
2513
2514     return cumwmsedec;
2515 }