November 14, 2007
-! [FOD] First Patch by Callum Lerwick. Instead of reinventing realloc, j2k_read_sod now just uses opj_realloc in j2k.c
- Second Patch by Callum Lerwick. This patch rearranges the largest memory allocations so they're allocated as
+! [FOD] - First Patch by Callum Lerwick. Instead of reinventing realloc, j2k_read_sod now just uses opj_realloc in j2k.c
+ - Second Patch by Callum Lerwick. This patch rearranges the largest memory allocations so they're allocated as
late as possible, and freed as soon as possible. This cuts memory usage by about half on two large test images.
+ - Third Patch by Callum Lerwick. The opj_tcd_cblk array is one of the largest allocations, because it
+ contains a bunch of static buffers. This also makes it a major source of cache thrashing. This patch allocates
+ the buffers from the heap, and dynamically sizes them in the decoder. I have not yet managed to dynamically size
+ them in the encoder, getting the decoder to do it was tricky enough... I also split opj_tcd_cblk_t into separate
+ encode and decode versions. A lot of fields were not used by both, so this cuts its size even further.
November 13, 2007
! [FOD] Patch by Dzonatas and Callum Lerwick.
*/
static void t1_encode_cblk(
opj_t1_t *t1,
- opj_tcd_cblk_t * cblk,
+ opj_tcd_cblk_enc_t* cblk,
int orient,
int compno,
int level,
*/
static void t1_decode_cblk(
opj_t1_t *t1,
- opj_tcd_cblk_t * cblk,
+ opj_tcd_cblk_dec_t* cblk,
int orient,
int roishift,
int cblksty);
/** mod fixed_quality */
static void t1_encode_cblk(
opj_t1_t *t1,
- opj_tcd_cblk_t * cblk,
+ opj_tcd_cblk_enc_t* cblk,
int orient,
int compno,
int level,
static void t1_decode_cblk(
opj_t1_t *t1,
- opj_tcd_cblk_t * cblk,
+ opj_tcd_cblk_dec_t* cblk,
int orient,
int roishift,
int cblksty)
/* BYPASS mode */
type = ((bpno <= (cblk->numbps - 1) - 4) && (passtype < 2) && (cblksty & J2K_CCP_CBLKSTY_LAZY)) ? T1_TYPE_RAW : T1_TYPE_MQ;
+ /* FIXME: slviewer gets here with a null pointer. Why? Partially downloaded and/or corrupt textures? */
+ if(seg->data == NULL){
+ continue;
+ }
if (type == T1_TYPE_RAW) {
- raw_init_dec(raw, seg->data, seg->len);
+ raw_init_dec(raw, (*seg->data) + seg->dataindex, seg->len);
} else {
- mqc_init_dec(mqc, seg->data, seg->len);
+ mqc_init_dec(mqc, (*seg->data) + seg->dataindex, seg->len);
}
for (passno = 0; passno < seg->numpasses; ++passno) {
opj_tcd_precinct_t *prc = &band->precincts[precno];
for (cblkno = 0; cblkno < prc->cw * prc->ch; ++cblkno) {
- opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
+ opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
int* restrict datap;
int* restrict tiledp;
int cblk_w;
opj_tcd_precinct_t* precinct = &band->precincts[precno];
for (cblkno = 0; cblkno < precinct->cw * precinct->ch; ++cblkno) {
- opj_tcd_cblk_t* cblk = &precinct->cblks[cblkno];
+ opj_tcd_cblk_dec_t* cblk = &precinct->cblks.dec[cblkno];
int* restrict datap;
void* restrict tiledp;
int cblk_w, cblk_h;
}
}
}
+ opj_free(cblk->data);
+ opj_free(cblk->segs);
} /* cblkno */
- opj_free(precinct->cblks);
+ opj_free(precinct->cblks.dec);
} /* precno */
} /* bandno */
} /* resno */
@param cblksty
@param first
*/
-static void t2_init_seg(opj_tcd_seg_t *seg, int cblksty, int first);
+static void t2_init_seg(opj_tcd_cblk_dec_t* cblk, int index, int cblksty, int first);
/**
Decode a packet of a tile from a source buffer
@param t2 T2 handle
tgt_reset(prc->incltree);
tgt_reset(prc->imsbtree);
for (cblkno = 0; cblkno < prc->cw * prc->ch; cblkno++) {
- opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
+ opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
cblk->numpasses = 0;
tgt_setvalue(prc->imsbtree, cblkno, band->numbps - cblk->numbps);
}
opj_tcd_band_t *band = &res->bands[bandno];
opj_tcd_precinct_t *prc = &band->precincts[precno];
for (cblkno = 0; cblkno < prc->cw * prc->ch; cblkno++) {
- opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
+ opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
opj_tcd_layer_t *layer = &cblk->layers[layno];
if (!cblk->numpasses && layer->numpasses) {
tgt_setvalue(prc->incltree, cblkno, layno);
}
}
for (cblkno = 0; cblkno < prc->cw * prc->ch; cblkno++) {
- opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
+ opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
opj_tcd_layer_t *layer = &cblk->layers[layno];
int increment = 0;
int nump = 0;
opj_tcd_band_t *band = &res->bands[bandno];
opj_tcd_precinct_t *prc = &band->precincts[precno];
for (cblkno = 0; cblkno < prc->cw * prc->ch; cblkno++) {
- opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
+ opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
opj_tcd_layer_t *layer = &cblk->layers[layno];
if (!layer->numpasses) {
continue;
return (c - dest);
}
-static void t2_init_seg(opj_tcd_seg_t * seg, int cblksty, int first) {
+static void t2_init_seg(opj_tcd_cblk_dec_t* cblk, int index, int cblksty, int first) {
+ opj_tcd_seg_t* seg;
+ cblk->segs = (opj_tcd_seg_t*) opj_realloc(cblk->segs, (index + 1) * sizeof(opj_tcd_seg_t));
+ seg = &cblk->segs[index];
+ seg->data = NULL;
+ seg->dataindex = 0;
seg->numpasses = 0;
seg->len = 0;
if (cblksty & J2K_CCP_CBLKSTY_TERMALL) {
tgt_reset(prc->incltree);
tgt_reset(prc->imsbtree);
for (cblkno = 0; cblkno < prc->cw * prc->ch; cblkno++) {
- opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
+ opj_tcd_cblk_dec_t* cblk = &prc->cblks.dec[cblkno];
cblk->numsegs = 0;
}
}
if ((band->x1-band->x0 == 0)||(band->y1-band->y0 == 0)) continue;
for (cblkno = 0; cblkno < prc->cw * prc->ch; cblkno++) {
- int included, increment, n;
- opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
- opj_tcd_seg_t *seg = NULL;
+ int included, increment, n, segno;
+ opj_tcd_cblk_dec_t* cblk = &prc->cblks.dec[cblkno];
/* if cblk not yet included before --> inclusion tagtree */
if (!cblk->numsegs) {
included = tgt_decode(bio, prc->incltree, cblkno, layno + 1);
increment = t2_getcommacode(bio);
/* length indicator increment */
cblk->numlenbits += increment;
+ segno = 0;
if (!cblk->numsegs) {
- seg = &cblk->segs[0];
- t2_init_seg(seg, tcp->tccps[compno].cblksty, 1);
+ t2_init_seg(cblk, segno, tcp->tccps[compno].cblksty, 1);
} else {
- seg = &cblk->segs[cblk->numsegs - 1];
- if (seg->numpasses == seg->maxpasses) {
- t2_init_seg(++seg, tcp->tccps[compno].cblksty, 0);
+ segno = cblk->numsegs - 1;
+ if (cblk->segs[segno].numpasses == cblk->segs[segno].maxpasses) {
+ ++segno;
+ t2_init_seg(cblk, segno, tcp->tccps[compno].cblksty, 0);
}
}
n = cblk->numnewpasses;
do {
- seg->numnewpasses = int_min(seg->maxpasses - seg->numpasses, n);
- seg->newlen = bio_read(bio, cblk->numlenbits + int_floorlog2(seg->numnewpasses));
- n -= seg->numnewpasses;
+ cblk->segs[segno].numnewpasses = int_min(cblk->segs[segno].maxpasses - cblk->segs[segno].numpasses, n);
+ cblk->segs[segno].newlen = bio_read(bio, cblk->numlenbits + int_floorlog2(cblk->segs[segno].numnewpasses));
+ n -= cblk->segs[segno].numnewpasses;
if (n > 0) {
- t2_init_seg(++seg, tcp->tccps[compno].cblksty, 0);
+ ++segno;
+ t2_init_seg(cblk, segno, tcp->tccps[compno].cblksty, 0);
}
} while (n > 0);
}
if ((band->x1-band->x0 == 0)||(band->y1-band->y0 == 0)) continue;
for (cblkno = 0; cblkno < prc->cw * prc->ch; cblkno++) {
- opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
+ opj_tcd_cblk_dec_t* cblk = &prc->cblks.dec[cblkno];
opj_tcd_seg_t *seg = NULL;
if (!cblk->numnewpasses)
continue;
#endif /* USE_JPWL */
+ cblk->data = (unsigned char*) opj_realloc(cblk->data, (cblk->len + seg->newlen) * sizeof(unsigned char*));
memcpy(cblk->data + cblk->len, c, seg->newlen);
if (seg->numpasses == 0) {
- seg->data = cblk->data + cblk->len;
+ seg->data = &cblk->data;
+ seg->dataindex = cblk->len;
}
c += seg->newlen;
cblk->len += seg->newlen;
#include "opj_includes.h"
void tcd_dump(FILE *fd, opj_tcd_t *tcd, opj_tcd_image_t * img) {
- int tileno, compno, resno, bandno, precno, cblkno;
+ int tileno, compno, resno, bandno, precno;//, cblkno;
fprintf(fd, "image {\n");
fprintf(fd, " tw=%d, th=%d x0=%d x1=%d y0=%d y1=%d\n",
fprintf(fd,
" x0=%d, y0=%d, x1=%d, y1=%d, cw=%d, ch=%d\n",
prec->x0, prec->y0, prec->x1, prec->y1, prec->cw, prec->ch);
+ /*
for (cblkno = 0; cblkno < prec->cw * prec->ch; cblkno++) {
opj_tcd_cblk_t *cblk = &prec->cblks[cblkno];
fprintf(fd, " cblk {\n");
cblk->x0, cblk->y0, cblk->x1, cblk->y1);
fprintf(fd, " }\n");
}
+ */
fprintf(fd, " }\n");
}
fprintf(fd, " }\n");
prc->cw = (brcblkxend - tlcblkxstart) >> cblkwidthexpn;
prc->ch = (brcblkyend - tlcblkystart) >> cblkheightexpn;
- prc->cblks = (opj_tcd_cblk_t*) opj_calloc((prc->cw * prc->ch), sizeof(opj_tcd_cblk_t));
+ prc->cblks.enc = (opj_tcd_cblk_enc_t*) opj_calloc((prc->cw * prc->ch), sizeof(opj_tcd_cblk_enc_t));
prc->incltree = tgt_create(prc->cw, prc->ch);
prc->imsbtree = tgt_create(prc->cw, prc->ch);
int cblkxend = cblkxstart + (1 << cblkwidthexpn);
int cblkyend = cblkystart + (1 << cblkheightexpn);
- opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
+ opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
/* code-block size (global) */
cblk->x0 = int_max(cblkxstart, prc->x0);
cblk->y0 = int_max(cblkystart, prc->y0);
cblk->x1 = int_min(cblkxend, prc->x1);
cblk->y1 = int_min(cblkyend, prc->y1);
+ cblk->data = (unsigned char*) opj_calloc(8192+2, sizeof(unsigned char));
+ /* FIXME: mqc_init_enc and mqc_byteout underrun the buffer if we don't do this. Why? */
+ cblk->data += 2;
+ cblk->layers = (opj_tcd_layer_t*) opj_calloc(100, sizeof(opj_tcd_layer_t));
+ cblk->passes = (opj_tcd_pass_t*) opj_calloc(100, sizeof(opj_tcd_pass_t));
}
}
}
}
void tcd_free_encode(opj_tcd_t *tcd) {
- int tileno, compno, resno, bandno, precno;
+ int tileno, compno, resno, bandno, precno, cblkno;
for (tileno = 0; tileno < 1; tileno++) {
opj_tcd_tile_t *tile = tcd->tcd_image->tiles;
tgt_destroy(prc->imsbtree);
prc->imsbtree = NULL;
}
- opj_free(prc->cblks);
- prc->cblks = NULL;
+ for (cblkno = 0; cblkno < prc->cw * prc->ch; cblkno++) {
+ opj_free(prc->cblks.enc[cblkno].data - 2);
+ opj_free(prc->cblks.enc[cblkno].layers);
+ opj_free(prc->cblks.enc[cblkno].passes);
+ }
+ opj_free(prc->cblks.enc);
} /* for (precno */
opj_free(band->precincts);
band->precincts = NULL;
prc->cw = (brcblkxend - tlcblkxstart) >> cblkwidthexpn;
prc->ch = (brcblkyend - tlcblkystart) >> cblkheightexpn;
- opj_free(prc->cblks);
- prc->cblks = (opj_tcd_cblk_t*) opj_calloc(prc->cw * prc->ch, sizeof(opj_tcd_cblk_t));
+ opj_free(prc->cblks.enc);
+ prc->cblks.enc = (opj_tcd_cblk_enc_t*) opj_calloc(prc->cw * prc->ch, sizeof(opj_tcd_cblk_enc_t));
if (prc->incltree != NULL) {
tgt_destroy(prc->incltree);
int cblkxend = cblkxstart + (1 << cblkwidthexpn);
int cblkyend = cblkystart + (1 << cblkheightexpn);
- opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
-
+ opj_tcd_cblk_enc_t* cblk = &prc->cblks.enc[cblkno];
+
/* code-block size (global) */
cblk->x0 = int_max(cblkxstart, prc->x0);
cblk->y0 = int_max(cblkystart, prc->y0);
cblk->x1 = int_min(cblkxend, prc->x1);
cblk->y1 = int_min(cblkyend, prc->y1);
+ cblk->data = (unsigned char*) opj_calloc(8192, sizeof(unsigned char));
+ cblk->layers = (opj_tcd_layer_t*) opj_calloc(100, sizeof(opj_tcd_layer_t));
+ cblk->passes = (opj_tcd_pass_t*) opj_calloc(100, sizeof(opj_tcd_pass_t));
}
} /* precno */
} /* bandno */
brcblkyend = int_ceildivpow2(prc->y1, cblkheightexpn) << cblkheightexpn;
prc->cw = (brcblkxend - tlcblkxstart) >> cblkwidthexpn;
prc->ch = (brcblkyend - tlcblkystart) >> cblkheightexpn;
-
- prc->cblks = (opj_tcd_cblk_t *) opj_malloc(prc->cw * prc->ch * sizeof(opj_tcd_cblk_t));
-
+
+ prc->cblks.dec = (opj_tcd_cblk_dec_t*) opj_malloc(prc->cw * prc->ch * sizeof(opj_tcd_cblk_dec_t));
+
prc->incltree = tgt_create(prc->cw, prc->ch);
prc->imsbtree = tgt_create(prc->cw, prc->ch);
int cblkystart = tlcblkystart + (cblkno / prc->cw) * (1 << cblkheightexpn);
int cblkxend = cblkxstart + (1 << cblkwidthexpn);
int cblkyend = cblkystart + (1 << cblkheightexpn);
-
+
+ opj_tcd_cblk_dec_t* cblk = &prc->cblks.dec[cblkno];
+ cblk->data = NULL;
+ cblk->segs = NULL;
/* code-block size (global) */
- opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
cblk->x0 = int_max(cblkxstart, prc->x0);
cblk->y0 = int_max(cblkystart, prc->y0);
cblk->x1 = int_min(cblkxend, prc->x1);
for (precno = 0; precno < res->pw * res->ph; precno++) {
opj_tcd_precinct_t *prc = &band->precincts[precno];
for (cblkno = 0; cblkno < prc->cw * prc->ch; cblkno++) {
- opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
+ opj_tcd_cblk_enc_t *cblk = &prc->cblks.enc[cblkno];
opj_tcd_layer_t *layer = &cblk->layers[layno];
int n;
int imsb = tcd->image->comps[compno].prec - cblk->numbps; /* number of bit-plan equal to zero */
for (precno = 0; precno < res->pw * res->ph; precno++) {
opj_tcd_precinct_t *prc = &band->precincts[precno];
for (cblkno = 0; cblkno < prc->cw * prc->ch; cblkno++) {
- opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
+ opj_tcd_cblk_enc_t *cblk = &prc->cblks.enc[cblkno];
opj_tcd_layer_t *layer = &cblk->layers[layno];
int n;
opj_tcd_precinct_t *prc = &band->precincts[precno];
for (cblkno = 0; cblkno < prc->cw * prc->ch; cblkno++) {
- opj_tcd_cblk_t *cblk = &prc->cblks[cblkno];
+ opj_tcd_cblk_enc_t *cblk = &prc->cblks.enc[cblkno];
for (passno = 0; passno < cblk->totalpasses; passno++) {
opj_tcd_pass_t *pass = &cblk->passes[passno];
FIXME: documentation
*/
typedef struct opj_tcd_seg {
+ unsigned char** data;
+ int dataindex;
int numpasses;
int len;
- unsigned char *data;
int maxpasses;
int numnewpasses;
int newlen;
/**
FIXME: documentation
*/
-typedef struct opj_tcd_cblk {
+typedef struct opj_tcd_cblk_enc {
+ unsigned char* data; /* Data */
+ opj_tcd_layer_t* layers; /* layer information */
+ opj_tcd_pass_t* passes; /* information about the passes */
int x0, y0, x1, y1; /* dimension of the code-blocks : left upper corner (x0, y0) right low corner (x1,y1) */
int numbps;
int numlenbits;
- int len; /* length */
int numpasses; /* number of pass already done for the code-blocks */
- int numnewpasses; /* number of pass added to the code-blocks */
- int numsegs; /* number of segments */
- opj_tcd_seg_t segs[100]; /* segments informations */
- unsigned char data[8192]; /* Data */
int numpassesinlayers; /* number of passes in the layer */
- opj_tcd_layer_t layers[100]; /* layer information */
int totalpasses; /* total number of passes */
- opj_tcd_pass_t passes[100]; /* information about the passes */
-} opj_tcd_cblk_t;
+} opj_tcd_cblk_enc_t;
+
+typedef struct opj_tcd_cblk_dec {
+ unsigned char* data; /* Data */
+ opj_tcd_seg_t* segs; /* segments informations */
+ int x0, y0, x1, y1; /* dimension of the code-blocks : left upper corner (x0, y0) right low corner (x1,y1) */
+ int numbps;
+ int numlenbits;
+ int len; /* length */
+ int numnewpasses; /* number of pass added to the code-blocks */
+ int numsegs; /* number of segments */
+} opj_tcd_cblk_dec_t;
/**
FIXME: documentation
typedef struct opj_tcd_precinct {
int x0, y0, x1, y1; /* dimension of the precinct : left upper corner (x0, y0) right low corner (x1,y1) */
int cw, ch; /* number of precinct in width and heigth */
- opj_tcd_cblk_t *cblks; /* code-blocks informations */
+ union{ /* code-blocks informations */
+ opj_tcd_cblk_enc_t* enc;
+ opj_tcd_cblk_dec_t* dec;
+ } cblks;
opj_tgt_tree_t *incltree; /* inclusion tree */
opj_tgt_tree_t *imsbtree; /* IMSB tree */
} opj_tcd_precinct_t;