Significant speed-up rate allocation by rate/distoratio ratio
authorEven Rouault <even.rouault@spatialys.com>
Thu, 11 Aug 2022 14:46:55 +0000 (16:46 +0200)
committerEven Rouault <even.rouault@spatialys.com>
Thu, 11 Aug 2022 16:06:50 +0000 (18:06 +0200)
- Avoid doing 128 iterations all the time, and stop when the threshold
  doesn't vary much
- Avoid calling costly opj_t2_encode_packets() repeatdly when bisecting the
  layer ratio if the truncation points haven't changed since the last
  iteration.

When used with the GDAL gdal_translate application to convert a 11977 x
8745 raster with data type UInt16 and 8 channels, the conversion time
to JPEG2000 with 20 quality layers using disto/rate allocation (
-co "IC=C8" -co "JPEG2000_DRIVER=JP2OPENJPEG" -co "PROFILE=NPJE_NUMERICALLY_LOSSLESS"
creation options of the GDAL NITF driver) goes from 5m56 wall clock
(8m20s total, 12 vCPUs) down to 1m16 wall clock (3m45 total).

src/lib/openjp2/tcd.c
src/lib/openjp2/tcd.h

index 6442669d60a8493a8b4ca6ce78fffd048d294569..998baf9a5529b4ed9f7dc2f7b95cad577bcb3aff 100644 (file)
@@ -42,6 +42,8 @@
 #include "opj_includes.h"
 #include "opj_common.h"
 
+// #define DEBUG_RATE_ALLOC
+
 /* ----------------------------------------------------------------------- */
 
 /* TODO MSD: */
@@ -143,6 +145,9 @@ static OPJ_BOOL opj_tcd_code_block_enc_allocate_data(opj_tcd_cblk_enc_t *
  */
 static void opj_tcd_code_block_enc_deallocate(opj_tcd_precinct_t * p_precinct);
 
+static
+void opj_tcd_makelayer_fixed(opj_tcd_t *tcd, OPJ_UINT32 layno,
+                             OPJ_UINT32 final);
 
 /**
 Free the memory allocated for encoding
@@ -224,6 +229,7 @@ opj_tcd_t* opj_tcd_create(OPJ_BOOL p_is_decoder)
 
 /* ----------------------------------------------------------------------- */
 
+static
 void opj_tcd_rateallocate_fixed(opj_tcd_t *tcd)
 {
     OPJ_UINT32 layno;
@@ -234,15 +240,21 @@ void opj_tcd_rateallocate_fixed(opj_tcd_t *tcd)
 }
 
 
-void opj_tcd_makelayer(opj_tcd_t *tcd,
-                       OPJ_UINT32 layno,
-                       OPJ_FLOAT64 thresh,
-                       OPJ_UINT32 final)
+/* ----------------------------------------------------------------------- */
+
+/** Returns OPJ_TRUE if the layer allocation is unchanged w.r.t to the previous
+ * invokation with a different threshold */
+static
+OPJ_BOOL opj_tcd_makelayer(opj_tcd_t *tcd,
+                           OPJ_UINT32 layno,
+                           OPJ_FLOAT64 thresh,
+                           OPJ_UINT32 final)
 {
     OPJ_UINT32 compno, resno, bandno, precno, cblkno;
     OPJ_UINT32 passno;
 
     opj_tcd_tile_t *tcd_tile = tcd->tcd_image->tiles;
+    OPJ_BOOL layer_allocation_is_same = OPJ_TRUE;
 
     tcd_tile->distolayer[layno] = 0;        /* fixed_quality */
 
@@ -304,7 +316,10 @@ void opj_tcd_makelayer(opj_tcd_t *tcd,
                             }
                         }
 
-                        layer->numpasses = n - cblk->numpassesinlayers;
+                        if (layer->numpasses != n - cblk->numpassesinlayers) {
+                            layer_allocation_is_same = OPJ_FALSE;
+                            layer->numpasses = n - cblk->numpassesinlayers;
+                        }
 
                         if (!layer->numpasses) {
                             layer->disto = 0;
@@ -333,8 +348,10 @@ void opj_tcd_makelayer(opj_tcd_t *tcd,
             }
         }
     }
+    return layer_allocation_is_same;
 }
 
+static
 void opj_tcd_makelayer_fixed(opj_tcd_t *tcd, OPJ_UINT32 layno,
                              OPJ_UINT32 final)
 {
@@ -440,6 +457,11 @@ void opj_tcd_makelayer_fixed(opj_tcd_t *tcd, OPJ_UINT32 layno,
     }
 }
 
+/** Rate allocation for the following methods:
+ * - allocation by rate/distortio (m_disto_alloc == 1)
+ * - allocation by fixed quality  (m_fixed_quality == 1)
+ */
+static
 OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
                               OPJ_BYTE *dest,
                               OPJ_UINT32 * p_data_written,
@@ -561,6 +583,7 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
                  (tcd_tcp->distoratio[layno] > 0.0))) {
             opj_t2_t*t2 = opj_t2_create(tcd->image, cp);
             OPJ_FLOAT64 thresh = 0;
+            OPJ_BOOL last_layer_allocation_ok = OPJ_FALSE;
 
             if (t2 == 00) {
                 return OPJ_FALSE;
@@ -568,11 +591,27 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
 
             for (i = 0; i < 128; ++i) {
                 OPJ_FLOAT64 distoachieved = 0;  /* fixed_quality */
+                OPJ_BOOL layer_allocation_is_same;
+
+                OPJ_FLOAT64 new_thresh = (lo + hi) / 2;
+                /* Stop iterating when the threshold has stabilized enough */
+                /* 0.5 * 1e-5 is somewhat arbitrary, but has been selected */
+                /* so that this doesn't change the results of the regression */
+                /* test suite. */
+                if (fabs(new_thresh - thresh) <= 0.5 * 1e-5 * thresh) {
+                    break;
+                }
+                thresh = new_thresh;
+#ifdef DEBUG_RATE_ALLOC
+                opj_event_msg(p_manager, EVT_INFO, "layno=%u, iter=%u, thresh=%g",
+                              layno, i, new_thresh);
+#endif
 
-                thresh = (lo + hi) / 2;
-
-                opj_tcd_makelayer(tcd, layno, thresh, 0);
-
+                layer_allocation_is_same = opj_tcd_makelayer(tcd, layno, thresh, 0) && i != 0;
+#ifdef DEBUG_RATE_ALLOC
+                opj_event_msg(p_manager, EVT_INFO, "--> layer_allocation_is_same = %d",
+                              layer_allocation_is_same);
+#endif
                 if (cp->m_specific_param.m_enc.m_fixed_quality) {       /* fixed_quality */
                     if (OPJ_IS_CINEMA(cp->rsiz) || OPJ_IS_IMF(cp->rsiz)) {
                         if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest,
@@ -605,17 +644,41 @@ OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
                         }
                         lo = thresh;
                     }
-                } else {
-                    if (! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest,
-                                                p_data_written, maxlen, cstr_info, NULL, tcd->cur_tp_num, tcd->tp_pos,
-                                                tcd->cur_pino,
-                                                THRESH_CALC, p_manager)) {
-                        /* TODO: what to do with l ??? seek / tell ??? */
-                        /* opj_event_msg(tcd->cinfo, EVT_INFO, "rate alloc: len=%d, max=%d\n", l, maxlen); */
+                } else { /* Disto/rate based optimization */
+                    /* Check if the layer allocation done by opj_tcd_makelayer()
+                     * is compatible of the maximum rate allocation. If not,
+                     * retry with a higher threshold.
+                     * If OK, try with a lower threshold.
+                     * Call opj_t2_encode_packets() only if opj_tcd_makelayer()
+                     * has resulted in different truncation points since its last
+                     * call. */
+                    if ((layer_allocation_is_same && !last_layer_allocation_ok) ||
+                            (!layer_allocation_is_same &&
+                             ! opj_t2_encode_packets(t2, tcd->tcd_tileno, tcd_tile, layno + 1, dest,
+                                                     p_data_written, maxlen, cstr_info, NULL, tcd->cur_tp_num, tcd->tp_pos,
+                                                     tcd->cur_pino,
+                                                     THRESH_CALC, p_manager))) {
+
+#ifdef DEBUG_RATE_ALLOC
+                        if (!layer_allocation_is_same) {
+                            opj_event_msg(p_manager, EVT_INFO,
+                                          "--> check rate alloc failed (> maxlen=%u)\n", maxlen);
+                        }
+#endif
+                        last_layer_allocation_ok = OPJ_FALSE;
                         lo = thresh;
                         continue;
                     }
 
+#ifdef DEBUG_RATE_ALLOC
+                    if (!layer_allocation_is_same) {
+                        opj_event_msg(p_manager, EVT_INFO,
+                                      "--> check rate alloc success (len=%u <= maxlen=%u)\n", *p_data_written,
+                                      maxlen);
+                    }
+#endif
+
+                    last_layer_allocation_ok = OPJ_TRUE;
                     hi = thresh;
                     stable_thresh = thresh;
                 }
index 340c2bf8a64639328217cd2bda0e14b16a97705d..7a4f3dcb783c0aba10a15ad2db5090cadd8ba525 100644 (file)
@@ -369,23 +369,6 @@ OPJ_BOOL opj_tcd_init(opj_tcd_t *p_tcd,
 OPJ_BOOL opj_tcd_init_decode_tile(opj_tcd_t *p_tcd, OPJ_UINT32 p_tile_no,
                                   opj_event_mgr_t* p_manager);
 
-void opj_tcd_makelayer_fixed(opj_tcd_t *tcd, OPJ_UINT32 layno,
-                             OPJ_UINT32 final);
-
-void opj_tcd_rateallocate_fixed(opj_tcd_t *tcd);
-
-void opj_tcd_makelayer(opj_tcd_t *tcd,
-                       OPJ_UINT32 layno,
-                       OPJ_FLOAT64 thresh,
-                       OPJ_UINT32 final);
-
-OPJ_BOOL opj_tcd_rateallocate(opj_tcd_t *tcd,
-                              OPJ_BYTE *dest,
-                              OPJ_UINT32 * p_data_written,
-                              OPJ_UINT32 len,
-                              opj_codestream_info_t *cstr_info,
-                              opj_event_mgr_t *p_manager);
-
 /**
  * Gets the maximum tile size that will be taken by the tile once decoded.
  */