From 3810e943b18e0ab0e2def73c54e2580c327dbb31 Mon Sep 17 00:00:00 2001 From: Francois-Olivier Devaux Date: Tue, 21 Aug 2007 12:13:54 +0000 Subject: [PATCH] Aligned malloc using Intel's _mm_malloc(), cleanup on the t1 memory allocation, getting rid of some leftover debug code --- ChangeLog | 1 + libopenjpeg/j2k_lib.h | 17 +++++++++++-- libopenjpeg/t1.c | 58 +++++++++++++++++++++---------------------- libopenjpeg/tcd.c | 18 +++++++------- 4 files changed, 54 insertions(+), 40 deletions(-) diff --git a/ChangeLog b/ChangeLog index 71699c79..e4c72ec7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -10,6 +10,7 @@ August 21, 2007 * [FOD] Robustified MJ2 codecs * [Parvatha] Solved problems with codec reading from image file directory when filename had more than one "." in name * [Callum Lerwick] Minor cleanup patch, that gets rid of a bunch of "old style declaration" warnings from Intel's compiler +* [Callum Lerwick] Aligned malloc using Intel's _mm_malloc(). Cleanup on the t1 memory allocation, getting rid of some leftover debug code August 20, 2007 + [FOD] Added support for the TGA file format in the codec diff --git a/libopenjpeg/j2k_lib.h b/libopenjpeg/j2k_lib.h index 0db63c60..84a68c06 100644 --- a/libopenjpeg/j2k_lib.h +++ b/libopenjpeg/j2k_lib.h @@ -32,6 +32,10 @@ The functions in J2K_LIB.C are internal utilities mainly used for memory management. */ +#ifndef __GCC__ +#define __attribute__(x) /* */ +#endif + /** @defgroup MISC MISC - Miscellaneous internal functions */ /*@{*/ @@ -50,7 +54,16 @@ Allocate a memory block with elements initialized to 0 @param size Bytes to allocate @return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available */ -void* opj_malloc( size_t size ); +void* __attribute__ ((malloc)) opj_malloc( size_t size ); + +/** +Allocate memory aligned to a 16 byte boundry +@param size Bytes to allocate +@return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available +*/ +#include +#define opj_aligned_malloc(size) _mm_malloc(size, 16) +#define opj_aligned_free(m) _mm_free(m) /** Reallocate memory blocks. @@ -58,7 +71,7 @@ Reallocate memory blocks. @param size New size in bytes @return Returns a void pointer to the reallocated (and possibly moved) memory block */ -void* opj_realloc( void *memblock, size_t size ); +void* __attribute__ ((malloc)) opj_realloc( void *memblock, size_t size ); /** Deallocates or frees a memory block. diff --git a/libopenjpeg/t1.c b/libopenjpeg/t1.c index b5d95994..724ec04d 100644 --- a/libopenjpeg/t1.c +++ b/libopenjpeg/t1.c @@ -758,47 +758,41 @@ static double t1_getwmsedec( return wmsedec; } -static void allocate_buffers( +static bool allocate_buffers( opj_t1_t *t1, int w, int h) { - int datasize; + int datasize=w * h; int flagssize; - datasize=w * h; - //fprintf(stderr,"w=%i h=%i datasize=%i flagssize=%i\n",w,h,datasize,flagssize); - if(datasize > t1->datasize){ - //fprintf(stderr,"Allocating t1->data: datasize=%i\n",datasize); - free(t1->data); - t1->data=malloc(datasize * sizeof(int)); + opj_aligned_free(t1->data); + t1->data=opj_aligned_malloc(datasize * sizeof(int)); if(!t1->data){ - return; + return false; } t1->datasize=datasize; } - //memset(t1->data,0xff,t1->datasize); memset(t1->data,0,datasize * sizeof(int)); t1->flags_stride=w+2; - flagssize=t1->flags_stride * (h+2); + flagssize = t1->flags_stride * (h+2); if(flagssize > t1->flagssize){ - //fprintf(stderr,"Allocating t1->flags: flagssize=%i\n",flagssize); - free(t1->flags); - t1->flags=malloc(flagssize * sizeof(flag_t)); + opj_aligned_free(t1->flags); + t1->flags=opj_aligned_malloc(flagssize * sizeof(flag_t)); if(!t1->flags){ - fprintf(stderr,"Allocating t1->flags FAILED!\n"); - return; + return false; } t1->flagssize=flagssize; } - //memset(t1->flags,0xff,t1->flagssize); memset(t1->flags,0,flagssize * sizeof(flag_t)); t1->w=w; t1->h=h; + + return true; } /** mod fixed_quality */ @@ -948,12 +942,15 @@ static void t1_decode_cblk( opj_raw_t *raw = t1->raw; /* RAW component */ opj_mqc_t *mqc = t1->mqc; /* MQC component */ - - allocate_buffers( - t1, - cblk->x1 - cblk->x0, - cblk->y1 - cblk->y0); - + + if(!allocate_buffers( + t1, + cblk->x1 - cblk->x0, + cblk->y1 - cblk->y0)) + { + return; + } + bpno = roishift + cblk->numbps - 1; passtype = 2; @@ -1025,8 +1022,8 @@ void t1_destroy(opj_t1_t *t1) { /* destroy MQC and RAW handles */ mqc_destroy(t1->mqc); raw_destroy(t1->raw); - free(t1->data); - free(t1->flags); + opj_aligned_free(t1->data); + opj_aligned_free(t1->flags); free(t1); } } @@ -1067,10 +1064,13 @@ void t1_encode_cblks( y += pres->y1 - pres->y0; } - allocate_buffers( - t1, - cblk->x1 - cblk->x0, - cblk->y1 - cblk->y0); + if(!allocate_buffers( + t1, + cblk->x1 - cblk->x0, + cblk->y1 - cblk->y0)) + { + return; + } w = tilec->x1 - tilec->x0; if (tcp->tccps[compno].qmfbid == 1) { diff --git a/libopenjpeg/tcd.c b/libopenjpeg/tcd.c index 4c9f0c93..7479822a 100644 --- a/libopenjpeg/tcd.c +++ b/libopenjpeg/tcd.c @@ -192,7 +192,7 @@ void tcd_malloc_encode(opj_tcd_t *tcd, opj_image_t * image, opj_cp_t * cp, int c tilec->x1 = int_ceildiv(tile->x1, image->comps[compno].dx); tilec->y1 = int_ceildiv(tile->y1, image->comps[compno].dy); - tilec->data = (int *) opj_malloc((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0) * sizeof(int)); + tilec->data = (int *) opj_aligned_malloc((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0) * sizeof(int)); tilec->numresolutions = tccp->numresolutions; tilec->resolutions = (opj_tcd_resolution_t *) opj_malloc(tilec->numresolutions * sizeof(opj_tcd_resolution_t)); @@ -437,7 +437,7 @@ void tcd_init_encode(opj_tcd_t *tcd, opj_image_t * image, opj_cp_t * cp, int cur tilec->x1 = int_ceildiv(tile->x1, image->comps[compno].dx); tilec->y1 = int_ceildiv(tile->y1, image->comps[compno].dy); - tilec->data = (int *) opj_malloc((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0) * sizeof(int)); + tilec->data = (int *) opj_aligned_malloc((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0) * sizeof(int)); tilec->numresolutions = tccp->numresolutions; /* tilec->resolutions=(opj_tcd_resolution_t*)opj_realloc(tilec->resolutions,tilec->numresolutions*sizeof(opj_tcd_resolution_t)); */ for (resno = 0; resno < tilec->numresolutions; resno++) { @@ -676,7 +676,7 @@ void tcd_malloc_decode_tile(opj_tcd_t *tcd, opj_image_t * image, opj_cp_t * cp, tilec->x1 = int_ceildiv(tile->x1, image->comps[compno].dx); tilec->y1 = int_ceildiv(tile->y1, image->comps[compno].dy); - tilec->data = (int *) opj_malloc((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0) * sizeof(int)); + tilec->data = (int*) opj_aligned_malloc((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0) * sizeof(int)); tilec->numresolutions = tccp->numresolutions; tilec->resolutions = (opj_tcd_resolution_t *) opj_malloc(tilec->numresolutions * sizeof(opj_tcd_resolution_t)); @@ -1279,14 +1279,14 @@ int tcd_encode_tile(opj_tcd_t *tcd, int tileno, unsigned char *dest, int len, op if(tcd->cur_tp_num == tcd->cur_totnum_tp - 1){ tcd->encoding_time = opj_clock() - tcd->encoding_time; opj_event_msg(tcd->cinfo, EVT_INFO, "- tile encoded in %f s\n", tcd->encoding_time); - + /* cleaning memory */ for (compno = 0; compno < tile->numcomps; compno++) { opj_tcd_tilecomp_t *tilec = &tile->comps[compno]; - opj_free(tilec->data); + opj_aligned_free(tilec->data); } } - + return l; } @@ -1401,12 +1401,12 @@ bool tcd_decode_tile(opj_tcd_t *tcd, unsigned char *src, int len, int tileno) { tile_time = opj_clock() - tile_time; /* time needed to decode a tile */ opj_event_msg(tcd->cinfo, EVT_INFO, "- tile decoded in %f s\n", tile_time); - + for (compno = 0; compno < tile->numcomps; compno++) { - opj_free(tcd->tcd_image->tiles[tileno].comps[compno].data); + opj_aligned_free(tcd->tcd_image->tiles[tileno].comps[compno].data); tcd->tcd_image->tiles[tileno].comps[compno].data = NULL; } - + if (eof) { return false; } -- 2.30.2