libopenjpeg/mct.c

   1 /*
   2  * Copyright (c) 2002-2007, Communications and Remote Sensing Laboratory, Universite catholique de Louvain (UCL), Belgium
   3  * Copyright (c) 2002-2007, Professor Benoit Macq
   4  * Copyright (c) 2001-2003, David Janssens
   5  * Copyright (c) 2002-2003, Yannick Verschueren
   6  * Copyright (c) 2003-2007, Francois-Olivier Devaux and Antonin Descampe
   7  * Copyright (c) 2005, Herve Drolon, FreeImage Team
   8  * All rights reserved.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
  20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29  * POSSIBILITY OF SUCH DAMAGE.
  30  */
  31
  32 #ifdef __SSE__
  33 #include <xmmintrin.h>
  34 #endif
  35
  36 #include "opj_includes.h"
  37
  38 /* <summary> */
  39 /* This table contains the norms of the basis function of the reversible MCT. */
  40 /* </summary> */
  41 static const double mct_norms[3] = { 1.732, .8292, .8292 };
  42
  43 /* <summary> */
  44 /* This table contains the norms of the basis function of the irreversible MCT. */
  45 /* </summary> */
  46 static const double mct_norms_real[3] = { 1.732, 1.805, 1.573 };
  47
  48 const OPJ_FLOAT64 * get_mct_norms ()
  49 {
  50         return mct_norms;
  51 }
  52
  53 const OPJ_FLOAT64 * get_mct_norms_real ()
  54 {
  55         return mct_norms_real;
  56 }
  57
  58 /* <summary> */
  59 /* Foward reversible MCT. */
  60 /* </summary> */
  61 void mct_encode(
  62                 int* restrict c0,
  63                 int* restrict c1,
  64                 int* restrict c2,
  65                 int n)
  66 {
  67         int i;
  68         for(i = 0; i < n; ++i) {
  69                 int r = c0[i];
  70                 int g = c1[i];
  71                 int b = c2[i];
  72                 int y = (r + (g * 2) + b) >> 2;
  73                 int u = b - g;
  74                 int v = r - g;
  75                 c0[i] = y;
  76                 c1[i] = u;
  77                 c2[i] = v;
  78         }
  79 }
  80
  81 /* <summary> */
  82 /* Inverse reversible MCT. */
  83 /* </summary> */
  84 void mct_decode(
  85                 int* restrict c0,
  86                 int* restrict c1,
  87                 int* restrict c2,
  88                 int n)
  89 {
  90         int i;
  91         for (i = 0; i < n; ++i) {
  92                 int y = c0[i];
  93                 int u = c1[i];
  94                 int v = c2[i];
  95                 int g = y - ((u + v) >> 2);
  96                 int r = v + g;
  97                 int b = u + g;
  98                 c0[i] = r;
  99                 c1[i] = g;
 100                 c2[i] = b;
 101         }
 102 }
 103
 104 /* <summary> */
 105 /* Get norm of basis function of reversible MCT. */
 106 /* </summary> */
 107 double mct_getnorm(int compno) {
 108         return mct_norms[compno];
 109 }
 110
 111 /* <summary> */
 112 /* Foward irreversible MCT. */
 113 /* </summary> */
 114 void mct_encode_real(
 115                 int* restrict c0,
 116                 int* restrict c1,
 117                 int* restrict c2,
 118                 int n)
 119 {
 120         int i;
 121         for(i = 0; i < n; ++i) {
 122                 int r = c0[i];
 123                 int g = c1[i];
 124                 int b = c2[i];
 125                 int y =  fix_mul(r, 2449) + fix_mul(g, 4809) + fix_mul(b, 934);
 126                 int u = -fix_mul(r, 1382) - fix_mul(g, 2714) + fix_mul(b, 4096);
 127                 int v =  fix_mul(r, 4096) - fix_mul(g, 3430) - fix_mul(b, 666);
 128                 c0[i] = y;
 129                 c1[i] = u;
 130                 c2[i] = v;
 131         }
 132 }
 133
 134 /* <summary> */
 135 /* Inverse irreversible MCT. */
 136 /* </summary> */
 137 void mct_decode_real(
 138                 float* restrict c0,
 139                 float* restrict c1,
 140                 float* restrict c2,
 141                 int n)
 142 {
 143         int i;
 144 #ifdef __SSE__
 145         __m128 vrv, vgu, vgv, vbu;
 146         vrv = _mm_set1_ps(1.402f);
 147         vgu = _mm_set1_ps(0.34413f);
 148         vgv = _mm_set1_ps(0.71414f);
 149         vbu = _mm_set1_ps(1.772f);
 150         for (i = 0; i < (n >> 3); ++i) {
 151                 __m128 vy, vu, vv;
 152                 __m128 vr, vg, vb;
 153
 154                 vy = _mm_load_ps(c0);
 155                 vu = _mm_load_ps(c1);
 156                 vv = _mm_load_ps(c2);
 157                 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
 158                 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
 159                 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
 160                 _mm_store_ps(c0, vr);
 161                 _mm_store_ps(c1, vg);
 162                 _mm_store_ps(c2, vb);
 163                 c0 += 4;
 164                 c1 += 4;
 165                 c2 += 4;
 166
 167                 vy = _mm_load_ps(c0);
 168                 vu = _mm_load_ps(c1);
 169                 vv = _mm_load_ps(c2);
 170                 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
 171                 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
 172                 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
 173                 _mm_store_ps(c0, vr);
 174                 _mm_store_ps(c1, vg);
 175                 _mm_store_ps(c2, vb);
 176                 c0 += 4;
 177                 c1 += 4;
 178                 c2 += 4;
 179         }
 180         n &= 7;
 181 #endif
 182         for(i = 0; i < n; ++i) {
 183                 float y = c0[i];
 184                 float u = c1[i];
 185                 float v = c2[i];
 186                 float r = y + (v * 1.402f);
 187                 float g = y - (u * 0.34413f) - (v * (0.71414f));
 188                 float b = y + (u * 1.772f);
 189                 c0[i] = r;
 190                 c1[i] = g;
 191                 c2[i] = b;
 192         }
 193 }
 194
 195 /* <summary> */
 196 /* Get norm of basis function of irreversible MCT. */
 197 /* </summary> */
 198 double mct_getnorm_real(int compno) {
 199         return mct_norms_real[compno];
 200 }
 201
 202
 203 opj_bool mct_encode_custom(
 204                                            // MCT data
 205                                            OPJ_BYTE * pCodingdata,
 206                                            // size of components
 207                                            OPJ_UINT32 n,
 208                                            // components
 209                                            OPJ_BYTE ** pData,
 210                                            // nb of components (i.e. size of pData)
 211                                            OPJ_UINT32 pNbComp,
 212                                            // tells if the data is signed
 213                                            OPJ_UINT32 isSigned)
 214 {
 215         OPJ_FLOAT32 * lMct = (OPJ_FLOAT32 *) pCodingdata;
 216         OPJ_UINT32 i;
 217         OPJ_UINT32 j;
 218         OPJ_UINT32 k;
 219         OPJ_UINT32 lNbMatCoeff = pNbComp * pNbComp;
 220         OPJ_INT32 * lCurrentData = 00;
 221         OPJ_INT32 * lCurrentMatrix = 00;
 222         OPJ_INT32 ** lData = (OPJ_INT32 **) pData;
 223         OPJ_UINT32 lMultiplicator = 1 << 13;
 224         OPJ_INT32 * lMctPtr;
 225
 226         lCurrentData = (OPJ_INT32 *) opj_malloc((pNbComp + lNbMatCoeff) * sizeof(OPJ_INT32));
 227         if (! lCurrentData) {
 228                 return OPJ_FALSE;
 229         }
 230
 231         lCurrentMatrix = lCurrentData + pNbComp;
 232
 233         for (i =0;i<lNbMatCoeff;++i) {
 234                 lCurrentMatrix[i] = (OPJ_INT32) (*(lMct++) * lMultiplicator);
 235         }
 236
 237         for (i = 0; i < n; ++i)  {
 238                 lMctPtr = lCurrentMatrix;
 239                 for (j=0;j<pNbComp;++j) {
 240                         lCurrentData[j] = (*(lData[j]));
 241                 }
 242
 243                 for (j=0;j<pNbComp;++j) {
 244                         *(lData[j]) = 0;
 245                         for (k=0;k<pNbComp;++k) {
 246                                 *(lData[j]) += fix_mul(*lMctPtr, lCurrentData[k]);
 247                                 ++lMctPtr;
 248                         }
 249
 250                         ++lData[j];
 251                 }
 252         }
 253
 254         opj_free(lCurrentData);
 255
 256         return OPJ_TRUE;
 257 }
 258
 259 opj_bool mct_decode_custom(
 260                                            /* MCT data */
 261                                            OPJ_BYTE * pDecodingData,
 262                                            /* size of components */
 263                                            OPJ_UINT32 n,
 264                                            /* components */
 265                                            OPJ_BYTE ** pData,
 266                                            /* nb of components (i.e. size of pData) */
 267                                            OPJ_UINT32 pNbComp,
 268                                            /* tells if the data is signed */
 269                                            OPJ_UINT32 isSigned)
 270 {
 271         OPJ_FLOAT32 * lMct;
 272         OPJ_UINT32 i;
 273         OPJ_UINT32 j;
 274         OPJ_UINT32 k;
 275
 276         OPJ_FLOAT32 * lCurrentData = 00;
 277         OPJ_FLOAT32 * lCurrentResult = 00;
 278         OPJ_FLOAT32 ** lData = (OPJ_FLOAT32 **) pData;
 279
 280         lCurrentData = (OPJ_FLOAT32 *) opj_malloc (2 * pNbComp * sizeof(OPJ_FLOAT32));
 281         if
 282                 (! lCurrentData)
 283         {
 284                 return OPJ_FALSE;
 285         }
 286         lCurrentResult = lCurrentData + pNbComp;
 287
 288         for
 289                 (i = 0; i < n; ++i)
 290         {
 291                 lMct = (OPJ_FLOAT32 *) pDecodingData;
 292                 for
 293                         (j=0;j<pNbComp;++j)
 294                 {
 295                         lCurrentData[j] = (OPJ_FLOAT32) (*(lData[j]));
 296                 }
 297                 for
 298                         (j=0;j<pNbComp;++j)
 299                 {
 300                         lCurrentResult[j] = 0;
 301                         for
 302                                 (k=0;k<pNbComp;++k)
 303                         {
 304                                 lCurrentResult[j] += *(lMct++) * lCurrentData[k];
 305                         }
 306                         *(lData[j]++) = (OPJ_FLOAT32) (lCurrentResult[j]);
 307                 }
 308         }
 309         opj_free(lCurrentData);
 310         return OPJ_TRUE;
 311 }
 312
 313 void opj_calculate_norms(       OPJ_FLOAT64 * pNorms,
 314                                                         OPJ_UINT32 pNbComps,
 315                                                         OPJ_FLOAT32 * pMatrix)
 316 {
 317         OPJ_UINT32 i,j,lIndex;
 318         OPJ_FLOAT32 lCurrentValue;
 319         OPJ_FLOAT64 * lNorms = (OPJ_FLOAT64 *) pNorms;
 320         OPJ_FLOAT32 * lMatrix = (OPJ_FLOAT32 *) pMatrix;
 321
 322         for     (i=0;i<pNbComps;++i) {
 323                 lNorms[i] = 0;
 324                 lIndex = i;
 325
 326                 for     (j=0;j<pNbComps;++j) {
 327                         lCurrentValue = lMatrix[lIndex];
 328                         lIndex += pNbComps;
 329                         lNorms[i] += lCurrentValue * lCurrentValue;
 330                 }
 331                 lNorms[i] = sqrt(lNorms[i]);
 332         }
 333 }