Significant speed-up rate allocation by rate/distoratio ratio
[openjpeg.git] / src / lib / openjp2 / tcd.c
index 108462ca48895402180343ecd7fb2cbdd3a9d093..998baf9a5529b4ed9f7dc2f7b95cad577bcb3aff 100644 (file)
@@ -42,6 +42,8 @@
 #include "opj_includes.h"
 #include "opj_common.h"
 
+// #define DEBUG_RATE_ALLOC
+
 /* ----------------------------------------------------------------------- */
 
 /* TODO MSD: */
@@ -112,7 +114,7 @@ void tcd_dump(FILE *fd, opj_tcd_t *tcd, opj_tcd_image_t * img)
  * Initializes tile coding/decoding
  */
 static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
-        OPJ_BOOL isEncoder, OPJ_FLOAT32 fraction, OPJ_SIZE_T sizeof_block,
+        OPJ_BOOL isEncoder, OPJ_SIZE_T sizeof_block,
         opj_event_mgr_t* manager);
 
 /**
@@ -143,6 +145,9 @@ static OPJ_BOOL opj_tcd_code_block_enc_allocate_data(opj_tcd_cblk_enc_t *
  */
 static void opj_tcd_code_block_enc_deallocate(opj_tcd_precinct_t * p_precinct);
 
+static
+void opj_tcd_makelayer_fixed(opj_tcd_t *tcd, OPJ_UINT32 layno,
+                             OPJ_UINT32 final);
 
 /**
 Free the memory allocated for encoding
@@ -224,6 +229,7 @@ opj_tcd_t* opj_tcd_create(OPJ_BOOL p_is_decoder)
 
 /* ----------------------------------------------------------------------- */
 
+static
 void opj_tcd_rateallocate_fixed(opj_tcd_t *tcd)
 {
     OPJ_UINT32 layno;
@@ -234,15 +240,21 @@ void opj_tcd_rateallocate_fixed(opj_tcd_t *tcd)
 }
 
 
-void opj_tcd_makelayer(opj_tcd_t *tcd,
-                       OPJ_UINT32 layno,
-                       OPJ_FLOAT64 thresh,
-                       OPJ_UINT32 final)
+/* ----------------------------------------------------------------------- */
+
+/** Returns OPJ_TRUE if the layer allocation is unchanged w.r.t to the previous
+ * invokation with a different threshold */
+static
+OPJ_BOOL opj_tcd_makelayer(opj_tcd_t *tcd,
+                           OPJ_UINT32 layno,
+                           OPJ_FLOAT64 thresh,
+                           OPJ_UINT32 final)
 {
     OPJ_UINT32 compno, resno, bandno, precno, cblkno;
     OPJ_UINT32 passno;
 
     opj_tcd_tile_t *tcd_tile = tcd->tcd_image->tiles;
+    OPJ_BOOL layer_allocation_is_same = OPJ_TRUE;
 
     tcd_tile->distolayer[layno] = 0;        /* fixed_quality */
 
@@ -304,7 +316,10 @@ void opj_tcd_makelayer(opj_tcd_t *tcd,
                             }
                         }
 
-                        layer->numpasses = n - cblk->numpassesinlayers;
+                        if (layer->numpasses != n - cblk->numpassesinlayers) {
+                            layer_allocation_is_same = OPJ_FALSE;
+                            layer->numpasses = n - cblk->numpassesinlayers;
+                        }
 
                         if (!layer->numpasses) {
                             layer->disto = 0;
@@ -333,8 +348,10 @@ void opj_tcd_makelayer(opj_tcd_t *tcd,
             }
         }
     }
+    return layer_allocation_is_same;
 }
 
+static
 void opj_tcd_makelayer_fixed(opj_tcd_t *tcd, OPJ_UINT32 layno,
                              OPJ_UINT32 final)
 {
@@ -440,6 +457,11 @@ void opj_tcd_makelayer_fixed(opj_tcd_t *tcd, OPJ_UINT32 layno,
     }
 }
 
+/** Rate allocation for the following methods:
+ * - allocation by rate/distortio (m_disto_alloc == 1)
+ * - allocation by fixed quality  (m_fixed_quality == 1)
+ */
+static
 OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
                               OPJ_BYTE *dest,
                               OPJ_UINT32 * p_data_written,
@@ -561,6 +583,7 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
                  (tcd_tcp->distoratio[layno] > 0.0))) {
             opj_t2_t*t2 = opj_t2_create(tcd->image, cp);
             OPJ_FLOAT64 thresh = 0;
+            OPJ_BOOL last_layer_allocation_ok = OPJ_FALSE;
 
             if (t2 == 00) {
                 return OPJ_FALSE;
@@ -568,11 +591,27 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
 
             for (i = 0; i < 128; ++i) {
                 OPJ_FLOAT64 distoachieved = 0;  /* fixed_quality */
+                OPJ_BOOL layer_allocation_is_same;
+
+                OPJ_FLOAT64 new_thresh = (lo + hi) / 2;
+                /* Stop iterating when the threshold has stabilized enough */
+                /* 0.5 * 1e-5 is somewhat arbitrary, but has been selected */
+                /* so that this doesn't change the results of the regression */
+                /* test suite. */
+                if (fabs(new_thresh - thresh) <= 0.5 * 1e-5 * thresh) {
+                    break;
+                }
+                thresh = new_thresh;
+#ifdef DEBUG_RATE_ALLOC
+                opj_event_msg(p_manager, EVT_INFO, "layno=%u, iter=%u, thresh=%g",
+                              layno, i, new_thresh);
+#endif
 
-                thresh = (lo + hi) / 2;
-
-                opj_tcd_makelayer(tcd, layno, thresh, 0);
-
+                layer_allocation_is_same = opj_tcd_makelayer(tcd, layno, thresh, 0) && i != 0;
+#ifdef DEBUG_RATE_ALLOC
+                opj_event_msg(p_manager, EVT_INFO, "--> layer_allocation_is_same = %d",
+                              layer_allocation_is_same);
+#endif
                 if (cp->m_specific_param.m_enc.m_fixed_quality) {       /* fixed_quality */
                     if (OPJ_IS_CINEMA(cp->rsiz) || OPJ_IS_IMF(cp->rsiz)) {
                         if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest,
@@ -605,17 +644,41 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
                         }
                         lo = thresh;
                     }
-                } else {
-                    if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest,
-                                                p_data_written, maxlen, cstr_info, NULL, tcd->cur_tp_num, tcd->tp_pos,
-                                                tcd->cur_pino,
-                                                THRESH_CALC, p_manager)) {
-                        /* TODO: what to do with l ??? seek / tell ??? */
-                        /* opj_event_msg(tcd->cinfo, EVT_INFO, "rate alloc: len=%d, max=%d\n", l, maxlen); */
+                } else { /* Disto/rate based optimization */
+                    /* Check if the layer allocation done by opj_tcd_makelayer()
+                     * is compatible of the maximum rate allocation. If not,
+                     * retry with a higher threshold.
+                     * If OK, try with a lower threshold.
+                     * Call opj_t2_encode_packets() only if opj_tcd_makelayer()
+                     * has resulted in different truncation points since its last
+                     * call. */
+                    if ((layer_allocation_is_same && !last_layer_allocation_ok) ||
+                            (!layer_allocation_is_same &&
+                             ! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest,
+                                                     p_data_written, maxlen, cstr_info, NULL, tcd->cur_tp_num, tcd->tp_pos,
+                                                     tcd->cur_pino,
+                                                     THRESH_CALC, p_manager))) {
+
+#ifdef DEBUG_RATE_ALLOC
+                        if (!layer_allocation_is_same) {
+                            opj_event_msg(p_manager, EVT_INFO,
+                                          "--> check rate alloc failed (> maxlen=%u)\n", maxlen);
+                        }
+#endif
+                        last_layer_allocation_ok = OPJ_FALSE;
                         lo = thresh;
                         continue;
                     }
 
+#ifdef DEBUG_RATE_ALLOC
+                    if (!layer_allocation_is_same) {
+                        opj_event_msg(p_manager, EVT_INFO,
+                                      "--> check rate alloc success (len=%u <= maxlen=%u)\n", *p_data_written,
+                                      maxlen);
+                    }
+#endif
+
+                    last_layer_allocation_ok = OPJ_TRUE;
                     hi = thresh;
                     stable_thresh = thresh;
                 }
@@ -721,10 +784,9 @@ OPJ_BOOL opj_alloc_tile_component_data(opj_tcd_tilecomp_t *l_tilec)
 /* ----------------------------------------------------------------------- */
 
 static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
-        OPJ_BOOL isEncoder, OPJ_FLOAT32 fraction, OPJ_SIZE_T sizeof_block,
+        OPJ_BOOL isEncoder, OPJ_SIZE_T sizeof_block,
         opj_event_mgr_t* manager)
 {
-    OPJ_UINT32(*l_gain_ptr)(OPJ_UINT32) = 00;
     OPJ_UINT32 compno, resno, bandno, precno, cblkno;
     opj_tcp_t * l_tcp = 00;
     opj_cp_t * l_cp = 00;
@@ -740,7 +802,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
     OPJ_UINT32 p, q;
     OPJ_UINT32 l_level_no;
     OPJ_UINT32 l_pdx, l_pdy;
-    OPJ_UINT32 l_gain;
     OPJ_INT32 l_x0b, l_y0b;
     OPJ_UINT32 l_tx0, l_ty0;
     /* extent of precincts , top left, bottom right**/
@@ -879,11 +940,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
         l_level_no = l_tilec->numresolutions;
         l_res = l_tilec->resolutions;
         l_step_size = l_tccp->stepsizes;
-        if (l_tccp->qmfbid == 0) {
-            l_gain_ptr = &opj_dwt_getgain_real;
-        } else {
-            l_gain_ptr  = &opj_dwt_getgain;
-        }
         /*fprintf(stderr, "\tlevel_no=%d\n",l_level_no);*/
 
         for (resno = 0; resno < l_tilec->numresolutions; ++resno) {
@@ -970,7 +1026,6 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
             l_band = l_res->bands;
 
             for (bandno = 0; bandno < l_res->numbands; ++bandno, ++l_band, ++l_step_size) {
-                OPJ_INT32 numbps;
                 /*fprintf(stderr, "\t\t\tband_no=%d/%d\n", bandno, l_res->numbands );*/
 
                 if (resno == 0) {
@@ -1006,11 +1061,24 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
                     }
                 }
 
-                /** avoid an if with storing function pointer */
-                l_gain = (*l_gain_ptr)(l_band->bandno);
-                numbps = (OPJ_INT32)(l_image_comp->prec + l_gain);
-                l_band->stepsize = (OPJ_FLOAT32)(((1.0 + l_step_size->mant / 2048.0) * pow(2.0,
-                                                  (OPJ_INT32)(numbps - l_step_size->expn)))) * fraction;
+                {
+                    /* Table E-1 - Sub-band gains */
+                    /* BUG_WEIRD_TWO_INVK (look for this identifier in dwt.c): */
+                    /* the test (!isEncoder && l_tccp->qmfbid == 0) is strongly */
+                    /* linked to the use of two_invK instead of invK */
+                    const OPJ_INT32 log2_gain = (!isEncoder &&
+                                                 l_tccp->qmfbid == 0) ? 0 : (l_band->bandno == 0) ? 0 :
+                                                (l_band->bandno == 3) ? 2 : 1;
+
+                    /* Nominal dynamic range. Equation E-4 */
+                    const OPJ_INT32 Rb = (OPJ_INT32)l_image_comp->prec + log2_gain;
+
+                    /* Delta_b value of Equation E-3 in "E.1 Inverse quantization
+                    * procedure" of the standard */
+                    l_band->stepsize = (OPJ_FLOAT32)(((1.0 + l_step_size->mant / 2048.0) * pow(2.0,
+                                                      (OPJ_INT32)(Rb - l_step_size->expn))));
+                }
+
                 /* Mb value of Equation E-2 in "E.1 Inverse quantization
                  * procedure" of the standard */
                 l_band->numbps = l_step_size->expn + (OPJ_INT32)l_tccp->numgbits -
@@ -1193,14 +1261,14 @@ static INLINE OPJ_BOOL opj_tcd_init_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
 OPJ_BOOL opj_tcd_init_encode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
                                   opj_event_mgr_t* p_manager)
 {
-    return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_TRUE, 1.0F,
+    return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_TRUE,
                              sizeof(opj_tcd_cblk_enc_t), p_manager);
 }
 
 OPJ_BOOL opj_tcd_init_decode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
                                   opj_event_mgr_t* p_manager)
 {
-    return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_FALSE, 0.5F,
+    return opj_tcd_init_tile(p_tcd, p_tile_no, OPJ_FALSE,
                              sizeof(opj_tcd_cblk_dec_t), p_manager);
 }
 
@@ -1238,10 +1306,16 @@ static OPJ_BOOL opj_tcd_code_block_enc_allocate_data(opj_tcd_cblk_enc_t *
 
     /* +1 is needed for https://github.com/uclouvain/openjpeg/issues/835 */
     /* and actually +2 required for https://github.com/uclouvain/openjpeg/issues/982 */
+    /* and +7 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 3) */
+    /* and +26 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 7) */
+    /* and +28 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 44) */
+    /* and +33 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 4) */
+    /* and +63 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 4 -IMF 2K) */
+    /* and +74 for https://github.com/uclouvain/openjpeg/issues/1283 (-M 4 -n 8 -s 7,7 -I) */
     /* TODO: is there a theoretical upper-bound for the compressed code */
     /* block size ? */
-    l_data_size = 2 + (OPJ_UINT32)((p_code_block->x1 - p_code_block->x0) *
-                                   (p_code_block->y1 - p_code_block->y0) * (OPJ_INT32)sizeof(OPJ_UINT32));
+    l_data_size = 74 + (OPJ_UINT32)((p_code_block->x1 - p_code_block->x0) *
+                                    (p_code_block->y1 - p_code_block->y0) * (OPJ_INT32)sizeof(OPJ_UINT32));
 
     if (l_data_size > p_code_block->data_size) {
         if (p_code_block->data) {
@@ -2411,7 +2485,8 @@ static OPJ_BOOL opj_tcd_dc_level_shift_encode(opj_tcd_t *p_tcd)
             }
         } else {
             for (i = 0; i < l_nb_elem; ++i) {
-                *l_current_ptr = (*l_current_ptr - l_tccp->m_dc_level_shift) * (1 << 11);
+                *((OPJ_FLOAT32 *) l_current_ptr) = (OPJ_FLOAT32)(*l_current_ptr -
+                                                   l_tccp->m_dc_level_shift);
                 ++l_current_ptr;
             }
         }
@@ -2469,8 +2544,11 @@ static OPJ_BOOL opj_tcd_mct_encode(opj_tcd_t *p_tcd)
 
         opj_free(l_data);
     } else if (l_tcp->tccps->qmfbid == 0) {
-        opj_mct_encode_real(l_tile->comps[0].data, l_tile->comps[1].data,
-                            l_tile->comps[2].data, samples);
+        opj_mct_encode_real(
+            (OPJ_FLOAT32*)l_tile->comps[0].data,
+            (OPJ_FLOAT32*)l_tile->comps[1].data,
+            (OPJ_FLOAT32*)l_tile->comps[2].data,
+            samples);
     } else {
         opj_mct_encode(l_tile->comps[0].data, l_tile->comps[1].data,
                        l_tile->comps[2].data, samples);
@@ -2488,11 +2566,11 @@ static OPJ_BOOL opj_tcd_dwt_encode(opj_tcd_t *p_tcd)
 
     for (compno = 0; compno < l_tile->numcomps; ++compno) {
         if (l_tccp->qmfbid == 1) {
-            if (! opj_dwt_encode(l_tile_comp)) {
+            if (! opj_dwt_encode(p_tcd, l_tile_comp)) {
                 return OPJ_FALSE;
             }
         } else if (l_tccp->qmfbid == 0) {
-            if (! opj_dwt_encode_real(l_tile_comp)) {
+            if (! opj_dwt_encode_real(p_tcd, l_tile_comp)) {
                 return OPJ_FALSE;
             }
         }
@@ -2837,6 +2915,7 @@ void opj_tcd_marker_info_destroy(opj_tcd_marker_info_t *p_tcd_marker_info)
 {
     if (p_tcd_marker_info) {
         opj_free(p_tcd_marker_info->p_packet_size);
+        opj_free(p_tcd_marker_info);
     }
 }