Use thread-pool for T1 decoding
authorEven Rouault <even.rouault@spatialys.com>
Wed, 25 May 2016 14:38:44 +0000 (16:38 +0200)
committerEven Rouault <even.rouault@spatialys.com>
Wed, 25 May 2016 19:02:07 +0000 (21:02 +0200)
src/lib/openjp2/opj_includes.h
src/lib/openjp2/t1.c
src/lib/openjp2/t1.h
src/lib/openjp2/tcd.c
src/lib/openjp2/tls_keys.h [new file with mode: 0644]

index c2cc31fa61f09e26911970a77a4a3db83dca0137..e835fae47eac9e0ade48275fcd55819a3ac5f7c4 100644 (file)
@@ -183,6 +183,7 @@ static INLINE long opj_lrintf(float f) {
 #include "cio.h"
 
 #include "thread.h"
+#include "tls_keys.h"
 
 #include "image.h"
 #include "invert.h"
index e1097bf5bed8c7ed6d4ff1881af842bf3f42c732..adf64bb4e2c7846739ee87a3168321891efb600d 100644 (file)
@@ -1540,13 +1540,140 @@ void opj_t1_destroy(opj_t1_t *p_t1)
        opj_free(p_t1);
 }
 
-OPJ_BOOL opj_t1_decode_cblks(   opj_t1_t* t1,
-                            opj_tcd_tilecomp_t* tilec,
-                            opj_tccp_t* tccp
-                            )
+typedef struct
+{
+    OPJ_UINT32 resno;
+    opj_tcd_cblk_dec_t* cblk;
+    opj_tcd_band_t* band;
+    opj_tcd_tilecomp_t* tilec;
+    opj_tccp_t* tccp;
+    volatile OPJ_BOOL* pret;
+} opj_t1_cblk_decode_processing_job_t;
+
+static void opj_t1_destroy_wrapper(void* t1)
+{
+    opj_t1_destroy( (opj_t1_t*) t1 );
+}
+
+static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
+{
+    opj_tcd_cblk_dec_t* cblk;
+    opj_tcd_band_t* band;
+    opj_tcd_tilecomp_t* tilec;
+    opj_tccp_t* tccp;
+    OPJ_INT32* restrict datap;
+    OPJ_UINT32 cblk_w, cblk_h;
+    OPJ_INT32 x, y;
+    OPJ_UINT32 i, j;
+    opj_t1_cblk_decode_processing_job_t* job;
+    opj_t1_t* t1;
+    OPJ_UINT32 resno;
+    OPJ_UINT32 tile_w;
+
+    job = (opj_t1_cblk_decode_processing_job_t*) user_data;
+    resno = job->resno;
+    cblk = job->cblk;
+    band = job->band;
+    tilec = job->tilec;
+    tccp = job->tccp;
+    tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
+
+    if( !*(job->pret) )
+    {
+        opj_free(job);
+        return;
+    }
+
+    t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
+    if( t1 == NULL )
+    {
+        t1 = opj_t1_create( OPJ_FALSE );
+        opj_tls_set( tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper );
+    }
+
+    if (OPJ_FALSE == opj_t1_decode_cblk(
+                            t1,
+                            cblk,
+                            band->bandno,
+                            (OPJ_UINT32)tccp->roishift,
+                            tccp->cblksty)) {
+            *(job->pret) = OPJ_FALSE;
+            opj_free(job);
+            return;
+    }
+
+    x = cblk->x0 - band->x0;
+    y = cblk->y0 - band->y0;
+    if (band->bandno & 1) {
+        opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
+        x += pres->x1 - pres->x0;
+    }
+    if (band->bandno & 2) {
+        opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
+        y += pres->y1 - pres->y0;
+    }
+
+    datap=t1->data;
+    cblk_w = t1->w;
+    cblk_h = t1->h;
+
+    if (tccp->roishift) {
+        OPJ_INT32 thresh = 1 << tccp->roishift;
+        for (j = 0; j < cblk_h; ++j) {
+            for (i = 0; i < cblk_w; ++i) {
+                OPJ_INT32 val = datap[(j * cblk_w) + i];
+                OPJ_INT32 mag = abs(val);
+                if (mag >= thresh) {
+                    mag >>= tccp->roishift;
+                    datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
+                }
+            }
+        }
+    }
+    if (tccp->qmfbid == 1) {
+        OPJ_INT32* restrict tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x];
+        for (j = 0; j < cblk_h; ++j) {
+            i = 0;
+            for (; i < (cblk_w & ~3); i += 4) {
+                OPJ_INT32 tmp0 = datap[(j * cblk_w) + i];
+                OPJ_INT32 tmp1 = datap[(j * cblk_w) + i+1];
+                OPJ_INT32 tmp2 = datap[(j * cblk_w) + i+2];
+                OPJ_INT32 tmp3 = datap[(j * cblk_w) + i+3];
+                ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp0/2;
+                ((OPJ_INT32*)tiledp)[(j * tile_w) + i+1] = tmp1/2;
+                ((OPJ_INT32*)tiledp)[(j * tile_w) + i+2] = tmp2/2;
+                ((OPJ_INT32*)tiledp)[(j * tile_w) + i+3] = tmp3/2;
+            }
+            for (; i < cblk_w; ++i) {
+                OPJ_INT32 tmp = datap[(j * cblk_w) + i];
+                ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp/2;
+            }
+        }
+    } else {        /* if (tccp->qmfbid == 0) */
+        OPJ_FLOAT32* restrict tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x];
+        for (j = 0; j < cblk_h; ++j) {
+            OPJ_FLOAT32* restrict tiledp2 = tiledp;
+            for (i = 0; i < cblk_w; ++i) {
+                OPJ_FLOAT32 tmp = (OPJ_FLOAT32)*datap * band->stepsize;
+                *tiledp2 = tmp;
+                datap++;
+                tiledp2++;
+            }
+            tiledp += tile_w;
+        }
+    }
+
+    opj_free(job);
+}
+
+
+void opj_t1_decode_cblks( opj_thread_pool_t* tp,
+                          volatile OPJ_BOOL* pret,
+                          opj_tcd_tilecomp_t* tilec,
+                          opj_tccp_t* tccp
+                         )
 {
        OPJ_UINT32 resno, bandno, precno, cblkno;
-       OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
 
        for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
                opj_tcd_resolution_t* res = &tilec->resolutions[resno];
@@ -1559,85 +1686,24 @@ OPJ_BOOL opj_t1_decode_cblks(   opj_t1_t* t1,
 
                                for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
                                        opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
-                                       OPJ_INT32* restrict datap;
-                                       OPJ_UINT32 cblk_w, cblk_h;
-                                       OPJ_INT32 x, y;
-                                       OPJ_UINT32 i, j;
-
-                    if (OPJ_FALSE == opj_t1_decode_cblk(
-                                            t1,
-                                            cblk,
-                                            band->bandno,
-                                            (OPJ_UINT32)tccp->roishift,
-                                            tccp->cblksty)) {
-                            return OPJ_FALSE;
-                    }
-
-                                       x = cblk->x0 - band->x0;
-                                       y = cblk->y0 - band->y0;
-                                       if (band->bandno & 1) {
-                                               opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
-                                               x += pres->x1 - pres->x0;
-                                       }
-                                       if (band->bandno & 2) {
-                                               opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
-                                               y += pres->y1 - pres->y0;
-                                       }
-
-                                       datap=t1->data;
-                                       cblk_w = t1->w;
-                                       cblk_h = t1->h;
-
-                                       if (tccp->roishift) {
-                                               OPJ_INT32 thresh = 1 << tccp->roishift;
-                                               for (j = 0; j < cblk_h; ++j) {
-                                                       for (i = 0; i < cblk_w; ++i) {
-                                                               OPJ_INT32 val = datap[(j * cblk_w) + i];
-                                                               OPJ_INT32 mag = abs(val);
-                                                               if (mag >= thresh) {
-                                                                       mag >>= tccp->roishift;
-                                                                       datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
-                                                               }
-                                                       }
-                                               }
-                                       }
-                                       if (tccp->qmfbid == 1) {
-                        OPJ_INT32* restrict tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x];
-                                               for (j = 0; j < cblk_h; ++j) {
-                                                       i = 0;
-                                                       for (; i < (cblk_w & ~3); i += 4) {
-                                                               OPJ_INT32 tmp0 = datap[(j * cblk_w) + i];
-                                                               OPJ_INT32 tmp1 = datap[(j * cblk_w) + i+1];
-                                                               OPJ_INT32 tmp2 = datap[(j * cblk_w) + i+2];
-                                                               OPJ_INT32 tmp3 = datap[(j * cblk_w) + i+3];
-                                                               ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp0/2;
-                                                               ((OPJ_INT32*)tiledp)[(j * tile_w) + i+1] = tmp1/2;
-                                                               ((OPJ_INT32*)tiledp)[(j * tile_w) + i+2] = tmp2/2;
-                                                               ((OPJ_INT32*)tiledp)[(j * tile_w) + i+3] = tmp3/2;
-                                                       }
-                                                       for (; i < cblk_w; ++i) {
-                                                               OPJ_INT32 tmp = datap[(j * cblk_w) + i];
-                                                               ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp/2;
-                                                       }
-                                               }
-                                       } else {                /* if (tccp->qmfbid == 0) */
-                        OPJ_FLOAT32* restrict tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x];
-                                               for (j = 0; j < cblk_h; ++j) {
-                            OPJ_FLOAT32* restrict tiledp2 = tiledp;
-                                                       for (i = 0; i < cblk_w; ++i) {
-                                OPJ_FLOAT32 tmp = (OPJ_FLOAT32)*datap * band->stepsize;
-                                *tiledp2 = tmp;
-                                datap++;
-                                tiledp2++;
-                                                       }
-                            tiledp += tile_w;
-                                               }
-                                       }
+                    opj_t1_cblk_decode_processing_job_t* job;
+
+                    job = (opj_t1_cblk_decode_processing_job_t*) opj_calloc(1, sizeof(opj_t1_cblk_decode_processing_job_t));
+                    job->resno = resno;
+                    job->cblk = cblk;
+                    job->band = band;
+                    job->tilec = tilec;
+                    job->tccp = tccp;
+                    job->pret = pret;
+                    opj_thread_pool_submit_job( tp, opj_t1_clbl_decode_processor, job );
+                    if( !(*pret) )
+                        return;
                                } /* cblkno */
                        } /* precno */
                } /* bandno */
        } /* resno */
-        return OPJ_TRUE;
+
+    return;
 }
 
 
index 22557d964e9c17868cd62c5d50801432fafc1531..5afc64900c756d9c77d72047dd59d387db6e6e58 100644 (file)
@@ -172,7 +172,8 @@ Decode the code-blocks of a tile
 @param tilec The tile to decode
 @param tccp Tile coding parameters
 */
-OPJ_BOOL opj_t1_decode_cblks(   opj_t1_t* t1,
+void opj_t1_decode_cblks(   opj_thread_pool_t* tp,
+                                volatile OPJ_BOOL* pret,
                                 opj_tcd_tilecomp_t* tilec,
                                 opj_tccp_t* tccp);
 
index d76a3f9d701ae9cf188d2a627fff614d8e1aa24d..a34fa18c75a459970b4e69c262ec7856924c5ccc 100644 (file)
@@ -1568,30 +1568,22 @@ static OPJ_BOOL opj_tcd_t2_decode (opj_tcd_t *p_tcd,
 static OPJ_BOOL opj_tcd_t1_decode ( opj_tcd_t *p_tcd )
 {
         OPJ_UINT32 compno;
-        opj_t1_t * l_t1;
         opj_tcd_tile_t * l_tile = p_tcd->tcd_image->tiles;
         opj_tcd_tilecomp_t* l_tile_comp = l_tile->comps;
         opj_tccp_t * l_tccp = p_tcd->tcp->tccps;
-
-
-        l_t1 = opj_t1_create(OPJ_FALSE);
-        if (l_t1 == 00) {
-                return OPJ_FALSE;
-        }
+        volatile OPJ_BOOL ret = OPJ_TRUE;
 
         for (compno = 0; compno < l_tile->numcomps; ++compno) {
-                /* The +3 is headroom required by the vectorized DWT */
-                if (OPJ_FALSE == opj_t1_decode_cblks(l_t1, l_tile_comp, l_tccp)) {
-                        opj_t1_destroy(l_t1);
-                        return OPJ_FALSE;
-                }
+                opj_t1_decode_cblks(p_tcd->thread_pool, &ret, l_tile_comp, l_tccp);
+                if( !ret )
+                    break;
                 ++l_tile_comp;
                 ++l_tccp;
         }
 
-        opj_t1_destroy(l_t1);
+        opj_thread_pool_wait_completion(p_tcd->thread_pool, 0);
 
-        return OPJ_TRUE;
+        return ret;
 }
 
 
diff --git a/src/lib/openjp2/tls_keys.h b/src/lib/openjp2/tls_keys.h
new file mode 100644 (file)
index 0000000..fb26498
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * The copyright in this software is being made available under the 2-clauses 
+ * BSD License, included below. This software may be subject to other third 
+ * party and contributor rights, including patent rights, and no such rights
+ * are granted under this license.
+ *
+ * Copyright (c) 2016, Even Rouault
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef TLS_KEYS_H
+#define TLS_KEYS_H
+
+#define OPJ_TLS_KEY_T1  0
+
+#endif