Aligned malloc using Intel's _mm_malloc(), cleanup on the t1 memory allocation, getti...
authorFrancois-Olivier Devaux <fodevaux@users.noreply.github.com>
Tue, 21 Aug 2007 12:13:54 +0000 (12:13 +0000)
committerFrancois-Olivier Devaux <fodevaux@users.noreply.github.com>
Tue, 21 Aug 2007 12:13:54 +0000 (12:13 +0000)
ChangeLog
libopenjpeg/j2k_lib.h
libopenjpeg/t1.c
libopenjpeg/tcd.c

index 71699c79bd54c710bd44ec610857edcdc4a89949..e4c72ec7745c5157792239a2a2bfc8993f9f0e45 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -10,6 +10,7 @@ August 21, 2007
 * [FOD] Robustified MJ2 codecs
 * [Parvatha] Solved problems with codec reading from image file directory when filename had more than one "." in name
 * [Callum Lerwick] Minor cleanup patch, that gets rid of a bunch of "old style declaration" warnings from Intel's compiler
+* [Callum Lerwick] Aligned malloc using Intel's _mm_malloc(). Cleanup on the t1 memory allocation, getting rid of some leftover debug code
 
 August 20, 2007
 + [FOD] Added support for the TGA file format in the codec
index 0db63c60fb810aebecfe615b9e7065d9ddbf77b3..84a68c06667cf9fa3c0e4e1dbe8b6af21aa42079 100644 (file)
 The functions in J2K_LIB.C are internal utilities mainly used for memory management.
 */
 
+#ifndef __GCC__
+#define __attribute__(x) /* */
+#endif
+
 /** @defgroup MISC MISC - Miscellaneous internal functions */
 /*@{*/
 
@@ -50,7 +54,16 @@ Allocate a memory block with elements initialized to 0
 @param size Bytes to allocate
 @return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available
 */
-void* opj_malloc( size_t size );
+void* __attribute__ ((malloc)) opj_malloc( size_t size );
+
+/**
+Allocate memory aligned to a 16 byte boundry
+@param size Bytes to allocate
+@return Returns a void pointer to the allocated space, or NULL if there is insufficient memory available
+*/
+#include <xmmintrin.h>
+#define opj_aligned_malloc(size)       _mm_malloc(size, 16)
+#define opj_aligned_free(m) _mm_free(m)
 
 /**
 Reallocate memory blocks.
@@ -58,7 +71,7 @@ Reallocate memory blocks.
 @param size New size in bytes
 @return Returns a void pointer to the reallocated (and possibly moved) memory block
 */
-void* opj_realloc( void *memblock, size_t size );
+void* __attribute__ ((malloc)) opj_realloc( void *memblock, size_t size );
 
 /**
 Deallocates or frees a memory block.
index b5d9599425192810b7bd8ba89a1f503f969479c9..724ec04d0c23e25234c3582bd6af0d3f61162935 100644 (file)
@@ -758,47 +758,41 @@ static double t1_getwmsedec(
        return wmsedec;
 }
 
-static void allocate_buffers(
+static bool allocate_buffers(
                opj_t1_t *t1,
                int w,
                int h)
 {
-       int datasize;
+       int datasize=w * h;
        int flagssize;
 
-       datasize=w * h;
-       //fprintf(stderr,"w=%i h=%i datasize=%i flagssize=%i\n",w,h,datasize,flagssize);
-
        if(datasize > t1->datasize){
-               //fprintf(stderr,"Allocating t1->data: datasize=%i\n",datasize);
-               free(t1->data);
-               t1->data=malloc(datasize * sizeof(int));
+               opj_aligned_free(t1->data);
+               t1->data=opj_aligned_malloc(datasize * sizeof(int));
                if(!t1->data){
-                       return;
+                       return false;
                }
                t1->datasize=datasize;
        }
-       //memset(t1->data,0xff,t1->datasize);
        memset(t1->data,0,datasize * sizeof(int));
 
        t1->flags_stride=w+2;
-       flagssize=t1->flags_stride * (h+2);
+       flagssize = t1->flags_stride * (h+2);
 
        if(flagssize > t1->flagssize){
-               //fprintf(stderr,"Allocating t1->flags: flagssize=%i\n",flagssize);
-               free(t1->flags);
-               t1->flags=malloc(flagssize * sizeof(flag_t));
+               opj_aligned_free(t1->flags);
+               t1->flags=opj_aligned_malloc(flagssize * sizeof(flag_t));
                if(!t1->flags){
-                       fprintf(stderr,"Allocating t1->flags FAILED!\n");
-                       return;
+                       return false;
                }
                t1->flagssize=flagssize;
        }
-       //memset(t1->flags,0xff,t1->flagssize);
        memset(t1->flags,0,flagssize * sizeof(flag_t));
 
        t1->w=w;
        t1->h=h;
+
+       return true;
 }
 
 /** mod fixed_quality */
@@ -948,12 +942,15 @@ static void t1_decode_cblk(
        
        opj_raw_t *raw = t1->raw;       /* RAW component */
        opj_mqc_t *mqc = t1->mqc;       /* MQC component */
-       
-       allocate_buffers(
-                       t1,
-                       cblk->x1 - cblk->x0,
-                       cblk->y1 - cblk->y0);
-       
+
+       if(!allocate_buffers(
+                               t1,
+                               cblk->x1 - cblk->x0,
+                               cblk->y1 - cblk->y0))
+       {
+               return;
+       }
+
        bpno = roishift + cblk->numbps - 1;
        passtype = 2;
        
@@ -1025,8 +1022,8 @@ void t1_destroy(opj_t1_t *t1) {
                /* destroy MQC and RAW handles */
                mqc_destroy(t1->mqc);
                raw_destroy(t1->raw);
-               free(t1->data);
-               free(t1->flags);
+               opj_aligned_free(t1->data);
+               opj_aligned_free(t1->flags);
                free(t1);
        }
 }
@@ -1067,10 +1064,13 @@ void t1_encode_cblks(
                                                        y += pres->y1 - pres->y0;
                                                }
 
-                                               allocate_buffers(
-                                                               t1,
-                                                               cblk->x1 - cblk->x0,
-                                                               cblk->y1 - cblk->y0);
+                                               if(!allocate_buffers(
+                                                                       t1,
+                                                                       cblk->x1 - cblk->x0,
+                                                                       cblk->y1 - cblk->y0))
+                                               {
+                                                       return;
+                                               }
 
                                                w = tilec->x1 - tilec->x0;
                                                if (tcp->tccps[compno].qmfbid == 1) {
index 4c9f0c937946fa3f92243e81b64685671eb63ad6..7479822afa12902db311a88991f4b20fbd92340f 100644 (file)
@@ -192,7 +192,7 @@ void tcd_malloc_encode(opj_tcd_t *tcd, opj_image_t * image, opj_cp_t * cp, int c
                        tilec->x1 = int_ceildiv(tile->x1, image->comps[compno].dx);
                        tilec->y1 = int_ceildiv(tile->y1, image->comps[compno].dy);
                        
-                       tilec->data = (int *) opj_malloc((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0) * sizeof(int));
+                       tilec->data = (int *) opj_aligned_malloc((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0) * sizeof(int));
                        tilec->numresolutions = tccp->numresolutions;
 
                        tilec->resolutions = (opj_tcd_resolution_t *) opj_malloc(tilec->numresolutions * sizeof(opj_tcd_resolution_t));
@@ -437,7 +437,7 @@ void tcd_init_encode(opj_tcd_t *tcd, opj_image_t * image, opj_cp_t * cp, int cur
                        tilec->x1 = int_ceildiv(tile->x1, image->comps[compno].dx);
                        tilec->y1 = int_ceildiv(tile->y1, image->comps[compno].dy);
                        
-                       tilec->data = (int *) opj_malloc((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0) * sizeof(int));
+                       tilec->data = (int *) opj_aligned_malloc((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0) * sizeof(int));
                        tilec->numresolutions = tccp->numresolutions;
                        /* tilec->resolutions=(opj_tcd_resolution_t*)opj_realloc(tilec->resolutions,tilec->numresolutions*sizeof(opj_tcd_resolution_t)); */
                        for (resno = 0; resno < tilec->numresolutions; resno++) {
@@ -676,7 +676,7 @@ void tcd_malloc_decode_tile(opj_tcd_t *tcd, opj_image_t * image, opj_cp_t * cp,
                tilec->x1 = int_ceildiv(tile->x1, image->comps[compno].dx);
                tilec->y1 = int_ceildiv(tile->y1, image->comps[compno].dy);
                
-               tilec->data = (int *) opj_malloc((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0) * sizeof(int));
+               tilec->data = (int*) opj_aligned_malloc((tilec->x1 - tilec->x0) * (tilec->y1 - tilec->y0) * sizeof(int));
                tilec->numresolutions = tccp->numresolutions;
                tilec->resolutions = (opj_tcd_resolution_t *) opj_malloc(tilec->numresolutions * sizeof(opj_tcd_resolution_t));
                
@@ -1279,14 +1279,14 @@ int tcd_encode_tile(opj_tcd_t *tcd, int tileno, unsigned char *dest, int len, op
        if(tcd->cur_tp_num == tcd->cur_totnum_tp - 1){
                tcd->encoding_time = opj_clock() - tcd->encoding_time;
                opj_event_msg(tcd->cinfo, EVT_INFO, "- tile encoded in %f s\n", tcd->encoding_time);
-       
+
                /* cleaning memory */
                for (compno = 0; compno < tile->numcomps; compno++) {
                        opj_tcd_tilecomp_t *tilec = &tile->comps[compno];
-                       opj_free(tilec->data);
+                       opj_aligned_free(tilec->data);
                }
        }
-       
+
        return l;
 }
 
@@ -1401,12 +1401,12 @@ bool tcd_decode_tile(opj_tcd_t *tcd, unsigned char *src, int len, int tileno) {
        
        tile_time = opj_clock() - tile_time;    /* time needed to decode a tile */
        opj_event_msg(tcd->cinfo, EVT_INFO, "- tile decoded in %f s\n", tile_time);
-               
+
        for (compno = 0; compno < tile->numcomps; compno++) {
-               opj_free(tcd->tcd_image->tiles[tileno].comps[compno].data);
+               opj_aligned_free(tcd->tcd_image->tiles[tileno].comps[compno].data);
                tcd->tcd_image->tiles[tileno].comps[compno].data = NULL;
        }
-       
+
        if (eof) {
                return false;
        }