Merge branch 'master' of https://github.com/uclouvain/openjpeg into tier1_optimizatio... 786/head
authorEven Rouault <even.rouault@spatialys.com>
Thu, 8 Sep 2016 08:30:09 +0000 (10:30 +0200)
committerEven Rouault <even.rouault@spatialys.com>
Thu, 8 Sep 2016 08:30:09 +0000 (10:30 +0200)
Conflicts:
src/lib/openjp2/t1.c

1  2 
.travis.yml
src/lib/openjp2/dwt.c
src/lib/openjp2/dwt.h
src/lib/openjp2/j2k.c
src/lib/openjp2/jp2.c
src/lib/openjp2/opj_includes.h
src/lib/openjp2/t1.c
src/lib/openjp2/tcd.c

diff --cc .travis.yml
Simple merge
index 18f8d9c34d390854103d839e7ee82caa6b195a42,4fce8b209c085dcf90742cdb675825376e5c95de..2e28effc855cbb8b749f61acfcee8e7dcc0b87f5
@@@ -556,68 -556,6 +556,68 @@@ static OPJ_UINT32 opj_dwt_max_resolutio
        return mr ;
  }
  
-     OPJ_INT32 * restrict tiledp;
 +typedef struct
 +{
 +    opj_dwt_t h;
 +    DWT1DFN dwt_1D;
 +    OPJ_UINT32 rw;
 +    OPJ_UINT32 w;
-     OPJ_INT32 * restrict tiledp;
++    OPJ_INT32 * OPJ_RESTRICT tiledp;
 +    int min_j;
 +    int max_j;
 +} opj_dwd_decode_h_job_t;
 +
 +static void opj_dwt_decode_h_func(void* user_data, opj_tls_t* tls)
 +{
 +    int j;
 +    opj_dwd_decode_h_job_t* job;
 +    (void)tls;
 +
 +    job = (opj_dwd_decode_h_job_t*)user_data;
 +    for( j = job->min_j; j < job->max_j; j++ )
 +    {
 +          opj_dwt_interleave_h(&job->h, &job->tiledp[j*job->w]);
 +          (job->dwt_1D)(&job->h);
 +          memcpy(&job->tiledp[j*job->w], job->h.mem, job->rw * sizeof(OPJ_INT32));
 +    }
 +
 +    opj_aligned_free(job->h.mem);
 +    opj_free(job);
 +}
 +
 +typedef struct
 +{
 +    opj_dwt_t v;
 +    DWT1DFN dwt_1D;
 +    OPJ_UINT32 rh;
 +    OPJ_UINT32 w;
++    OPJ_INT32 * OPJ_RESTRICT tiledp;
 +    int min_j;
 +    int max_j;
 +} opj_dwd_decode_v_job_t;
 +
 +static void opj_dwt_decode_v_func(void* user_data, opj_tls_t* tls)
 +{
 +    int j;
 +    opj_dwd_decode_v_job_t* job;
 +    (void)tls;
 +
 +    job = (opj_dwd_decode_v_job_t*)user_data;
 +    for( j = job->min_j; j < job->max_j; j++ )
 +    {
 +        OPJ_UINT32 k;
 +        opj_dwt_interleave_v(&job->v, &job->tiledp[j], (OPJ_INT32)job->w);
 +        (job->dwt_1D)(&job->v);
 +        for(k = 0; k < job->rh; ++k) {
 +            job->tiledp[k * job->w + j] = job->v.mem[k];
 +        }
 +    }
 +
 +    opj_aligned_free(job->v.mem);
 +    opj_free(job);
 +}
 +
 +
  /* <summary>                            */
  /* Inverse wavelet transform in 2-D.     */
  /* </summary>                           */
Simple merge
Simple merge
index e156ebfc421a5159b73018019ef6625f74f3fc19,a344a0e67c64df66574f386b66bf446443c93530..ea81d0f5d61bf6fb1ac8f4ecc3d82eec7971d925
@@@ -1764,14 -1774,9 +1774,14 @@@ void opj_jp2_setup_decoder(opj_jp2_t *j
  
        /* further JP2 initializations go here */
        jp2->color.jp2_has_colr = 0;
-     jp2->ignore_pclr_cmap_cdef = parameters->flags & OPJ_DPARAMETERS_IGNORE_PCLR_CMAP_CDEF_FLAG;
+       jp2->ignore_pclr_cmap_cdef = parameters->flags & OPJ_DPARAMETERS_IGNORE_PCLR_CMAP_CDEF_FLAG;
  }
  
 +OPJ_BOOL opj_jp2_set_threads(opj_jp2_t *jp2, OPJ_UINT32 num_threads)
 +{
 +     return opj_j2k_set_threads(jp2->j2k, num_threads);
 +}
 +
  /* ----------------------------------------------------------------------- */
  /* JP2 encoder interface                                             */
  /* ----------------------------------------------------------------------- */
Simple merge
index b1c6128d091b6e31436ca43e9a69d92f089a709f,cb5a1cefd30d460f4d307bea686d39e64a0ea105..66884e3b52cce477b95807b48a83187b3f27b655
@@@ -1540,140 -1271,13 +1540,140 @@@ void opj_t1_destroy(opj_t1_t *p_t1
        opj_free(p_t1);
  }
  
 -OPJ_BOOL opj_t1_decode_cblks(   opj_t1_t* t1,
 -                            opj_tcd_tilecomp_t* tilec,
 -                            opj_tccp_t* tccp
 -                            )
 +typedef struct
 +{
 +    OPJ_UINT32 resno;
 +    opj_tcd_cblk_dec_t* cblk;
 +    opj_tcd_band_t* band;
 +    opj_tcd_tilecomp_t* tilec;
 +    opj_tccp_t* tccp;
 +    volatile OPJ_BOOL* pret;
 +} opj_t1_cblk_decode_processing_job_t;
 +
 +static void opj_t1_destroy_wrapper(void* t1)
 +{
 +    opj_t1_destroy( (opj_t1_t*) t1 );
 +}
 +
 +static void opj_t1_clbl_decode_processor(void* user_data, opj_tls_t* tls)
 +{
 +    opj_tcd_cblk_dec_t* cblk;
 +    opj_tcd_band_t* band;
 +    opj_tcd_tilecomp_t* tilec;
 +    opj_tccp_t* tccp;
-     OPJ_INT32* restrict datap;
++    OPJ_INT32* OPJ_RESTRICT datap;
 +    OPJ_UINT32 cblk_w, cblk_h;
 +    OPJ_INT32 x, y;
 +    OPJ_UINT32 i, j;
 +    opj_t1_cblk_decode_processing_job_t* job;
 +    opj_t1_t* t1;
 +    OPJ_UINT32 resno;
 +    OPJ_UINT32 tile_w;
 +
 +    job = (opj_t1_cblk_decode_processing_job_t*) user_data;
 +    resno = job->resno;
 +    cblk = job->cblk;
 +    band = job->band;
 +    tilec = job->tilec;
 +    tccp = job->tccp;
 +    tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
 +
 +    if( !*(job->pret) )
 +    {
 +        opj_free(job);
 +        return;
 +    }
 +
 +    t1 = (opj_t1_t*) opj_tls_get(tls, OPJ_TLS_KEY_T1);
 +    if( t1 == NULL )
 +    {
 +        t1 = opj_t1_create( OPJ_FALSE );
 +        opj_tls_set( tls, OPJ_TLS_KEY_T1, t1, opj_t1_destroy_wrapper );
 +    }
 +
 +    if (OPJ_FALSE == opj_t1_decode_cblk(
 +                            t1,
 +                            cblk,
 +                            band->bandno,
 +                            (OPJ_UINT32)tccp->roishift,
 +                            tccp->cblksty)) {
 +            *(job->pret) = OPJ_FALSE;
 +            opj_free(job);
 +            return;
 +    }
 +
 +    x = cblk->x0 - band->x0;
 +    y = cblk->y0 - band->y0;
 +    if (band->bandno & 1) {
 +        opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
 +        x += pres->x1 - pres->x0;
 +    }
 +    if (band->bandno & 2) {
 +        opj_tcd_resolution_t* pres = &tilec->resolutions[resno - 1];
 +        y += pres->y1 - pres->y0;
 +    }
 +
 +    datap=t1->data;
 +    cblk_w = t1->w;
 +    cblk_h = t1->h;
 +
 +    if (tccp->roishift) {
 +        OPJ_INT32 thresh = 1 << tccp->roishift;
 +        for (j = 0; j < cblk_h; ++j) {
 +            for (i = 0; i < cblk_w; ++i) {
 +                OPJ_INT32 val = datap[(j * cblk_w) + i];
 +                OPJ_INT32 mag = abs(val);
 +                if (mag >= thresh) {
 +                    mag >>= tccp->roishift;
 +                    datap[(j * cblk_w) + i] = val < 0 ? -mag : mag;
 +                }
 +            }
 +        }
 +    }
 +    if (tccp->qmfbid == 1) {
-         OPJ_INT32* restrict tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x];
++        OPJ_INT32* OPJ_RESTRICT tiledp = &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x];
 +        for (j = 0; j < cblk_h; ++j) {
 +            i = 0;
 +            for (; i < (cblk_w & ~3); i += 4) {
 +                OPJ_INT32 tmp0 = datap[(j * cblk_w) + i];
 +                OPJ_INT32 tmp1 = datap[(j * cblk_w) + i+1];
 +                OPJ_INT32 tmp2 = datap[(j * cblk_w) + i+2];
 +                OPJ_INT32 tmp3 = datap[(j * cblk_w) + i+3];
 +                ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp0/2;
 +                ((OPJ_INT32*)tiledp)[(j * tile_w) + i+1] = tmp1/2;
 +                ((OPJ_INT32*)tiledp)[(j * tile_w) + i+2] = tmp2/2;
 +                ((OPJ_INT32*)tiledp)[(j * tile_w) + i+3] = tmp3/2;
 +            }
 +            for (; i < cblk_w; ++i) {
 +                OPJ_INT32 tmp = datap[(j * cblk_w) + i];
 +                ((OPJ_INT32*)tiledp)[(j * tile_w) + i] = tmp/2;
 +            }
 +        }
 +    } else {        /* if (tccp->qmfbid == 0) */
-         OPJ_FLOAT32* restrict tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x];
++        OPJ_FLOAT32* OPJ_RESTRICT tiledp = (OPJ_FLOAT32*) &tilec->data[(OPJ_UINT32)y * tile_w + (OPJ_UINT32)x];
 +        for (j = 0; j < cblk_h; ++j) {
-             OPJ_FLOAT32* restrict tiledp2 = tiledp;
++            OPJ_FLOAT32* OPJ_RESTRICT tiledp2 = tiledp;
 +            for (i = 0; i < cblk_w; ++i) {
 +                OPJ_FLOAT32 tmp = (OPJ_FLOAT32)*datap * band->stepsize;
 +                *tiledp2 = tmp;
 +                datap++;
 +                tiledp2++;
 +            }
 +            tiledp += tile_w;
 +        }
 +    }
 +
 +    opj_free(job);
 +}
 +
 +
 +void opj_t1_decode_cblks( opj_thread_pool_t* tp,
 +                          volatile OPJ_BOOL* pret,
 +                          opj_tcd_tilecomp_t* tilec,
 +                          opj_tccp_t* tccp
 +                         )
  {
        OPJ_UINT32 resno, bandno, precno, cblkno;
 -      OPJ_UINT32 tile_w = (OPJ_UINT32)(tilec->x1 - tilec->x0);
  
        for (resno = 0; resno < tilec->minimum_num_resolutions; ++resno) {
                opj_tcd_resolution_t* res = &tilec->resolutions[resno];
Simple merge