src/lib/openjp2/mct.c

   1 /*
   2  * Copyright (c) 2002-2007, Communications and Remote Sensing Laboratory, Universite catholique de Louvain (UCL), Belgium
   3  * Copyright (c) 2002-2007, Professor Benoit Macq
   4  * Copyright (c) 2001-2003, David Janssens
   5  * Copyright (c) 2002-2003, Yannick Verschueren
   6  * Copyright (c) 2003-2007, Francois-Olivier Devaux and Antonin Descampe
   7  * Copyright (c) 2005, Herve Drolon, FreeImage Team
   8  * All rights reserved.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  *
  19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
  20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29  * POSSIBILITY OF SUCH DAMAGE.
  30  */
  31
  32 #ifdef __SSE__
  33 #include <xmmintrin.h>
  34 #endif
  35
  36 #include "opj_includes.h"
  37
  38 /* <summary> */
  39 /* This table contains the norms of the basis function of the reversible MCT. */
  40 /* </summary> */
  41 static const OPJ_FLOAT64 opj_mct_norms[3] = { 1.732, .8292, .8292 };
  42
  43 /* <summary> */
  44 /* This table contains the norms of the basis function of the irreversible MCT. */
  45 /* </summary> */
  46 static const OPJ_FLOAT64 opj_mct_norms_real[3] = { 1.732, 1.805, 1.573 };
  47
  48 const OPJ_FLOAT64 * opj_mct_get_mct_norms ()
  49 {
  50         return opj_mct_norms;
  51 }
  52
  53 const OPJ_FLOAT64 * opj_mct_get_mct_norms_real ()
  54 {
  55         return opj_mct_norms_real;
  56 }
  57
  58 /* <summary> */
  59 /* Foward reversible MCT. */
  60 /* </summary> */
  61 void opj_mct_encode(
  62                 OPJ_INT32* restrict c0,
  63                 OPJ_INT32* restrict c1,
  64                 OPJ_INT32* restrict c2,
  65                 OPJ_UINT32 n)
  66 {
  67         OPJ_UINT32 i;
  68         for(i = 0; i < n; ++i) {
  69                 OPJ_INT32 r = c0[i];
  70                 OPJ_INT32 g = c1[i];
  71                 OPJ_INT32 b = c2[i];
  72                 OPJ_INT32 y = (r + (g * 2) + b) >> 2;
  73                 OPJ_INT32 u = b - g;
  74                 OPJ_INT32 v = r - g;
  75                 c0[i] = y;
  76                 c1[i] = u;
  77                 c2[i] = v;
  78         }
  79 }
  80
  81 /* <summary> */
  82 /* Inverse reversible MCT. */
  83 /* </summary> */
  84 void opj_mct_decode(
  85                 OPJ_INT32* restrict c0,
  86                 OPJ_INT32* restrict c1,
  87                 OPJ_INT32* restrict c2,
  88                 OPJ_UINT32 n)
  89 {
  90         OPJ_UINT32 i;
  91         for (i = 0; i < n; ++i) {
  92                 OPJ_INT32 y = c0[i];
  93                 OPJ_INT32 u = c1[i];
  94                 OPJ_INT32 v = c2[i];
  95                 OPJ_INT32 g = y - ((u + v) >> 2);
  96                 OPJ_INT32 r = v + g;
  97                 OPJ_INT32 b = u + g;
  98                 c0[i] = r;
  99                 c1[i] = g;
 100                 c2[i] = b;
 101         }
 102 }
 103
 104 /* <summary> */
 105 /* Get norm of basis function of reversible MCT. */
 106 /* </summary> */
 107 OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) {
 108         return opj_mct_norms[compno];
 109 }
 110
 111 /* <summary> */
 112 /* Foward irreversible MCT. */
 113 /* </summary> */
 114 void opj_mct_encode_real(
 115                 OPJ_INT32* restrict c0,
 116                 OPJ_INT32* restrict c1,
 117                 OPJ_INT32* restrict c2,
 118                 OPJ_UINT32 n)
 119 {
 120         OPJ_UINT32 i;
 121         for(i = 0; i < n; ++i) {
 122                 OPJ_INT32 r = c0[i];
 123                 OPJ_INT32 g = c1[i];
 124                 OPJ_INT32 b = c2[i];
 125                 OPJ_INT32 y =  opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, 4809) + opj_int_fix_mul(b, 934);
 126                 OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, 2714) + opj_int_fix_mul(b, 4096);
 127                 OPJ_INT32 v =  opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, 3430) - opj_int_fix_mul(b, 666);
 128                 c0[i] = y;
 129                 c1[i] = u;
 130                 c2[i] = v;
 131         }
 132 }
 133
 134 /* <summary> */
 135 /* Inverse irreversible MCT. */
 136 /* </summary> */
 137 void opj_mct_decode_real(
 138                 OPJ_FLOAT32* restrict c0,
 139                 OPJ_FLOAT32* restrict c1,
 140                 OPJ_FLOAT32* restrict c2,
 141                 OPJ_UINT32 n)
 142 {
 143         OPJ_UINT32 i;
 144 #ifdef __SSE__
 145         __m128 vrv, vgu, vgv, vbu;
 146         vrv = _mm_set1_ps(1.402f);
 147         vgu = _mm_set1_ps(0.34413f);
 148         vgv = _mm_set1_ps(0.71414f);
 149         vbu = _mm_set1_ps(1.772f);
 150         for (i = 0; i < (n >> 3); ++i) {
 151                 __m128 vy, vu, vv;
 152                 __m128 vr, vg, vb;
 153
 154                 vy = _mm_load_ps(c0);
 155                 vu = _mm_load_ps(c1);
 156                 vv = _mm_load_ps(c2);
 157                 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
 158                 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
 159                 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
 160                 _mm_store_ps(c0, vr);
 161                 _mm_store_ps(c1, vg);
 162                 _mm_store_ps(c2, vb);
 163                 c0 += 4;
 164                 c1 += 4;
 165                 c2 += 4;
 166
 167                 vy = _mm_load_ps(c0);
 168                 vu = _mm_load_ps(c1);
 169                 vv = _mm_load_ps(c2);
 170                 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
 171                 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
 172                 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
 173                 _mm_store_ps(c0, vr);
 174                 _mm_store_ps(c1, vg);
 175                 _mm_store_ps(c2, vb);
 176                 c0 += 4;
 177                 c1 += 4;
 178                 c2 += 4;
 179         }
 180         n &= 7;
 181 #endif
 182         for(i = 0; i < n; ++i) {
 183                 OPJ_FLOAT32 y = c0[i];
 184                 OPJ_FLOAT32 u = c1[i];
 185                 OPJ_FLOAT32 v = c2[i];
 186                 OPJ_FLOAT32 r = y + (v * 1.402f);
 187                 OPJ_FLOAT32 g = y - (u * 0.34413f) - (v * (0.71414f));
 188                 OPJ_FLOAT32 b = y + (u * 1.772f);
 189                 c0[i] = r;
 190                 c1[i] = g;
 191                 c2[i] = b;
 192         }
 193 }
 194
 195 /* <summary> */
 196 /* Get norm of basis function of irreversible MCT. */
 197 /* </summary> */
 198 OPJ_FLOAT64 opj_mct_getnorm_real(OPJ_UINT32 compno) {
 199         return opj_mct_norms_real[compno];
 200 }
 201
 202
 203 opj_bool opj_mct_encode_custom(
 204                                            OPJ_BYTE * pCodingdata,
 205                                            OPJ_UINT32 n,
 206                                            OPJ_BYTE ** pData,
 207                                            OPJ_UINT32 pNbComp,
 208                                            OPJ_UINT32 isSigned)
 209 {
 210         OPJ_FLOAT32 * lMct = (OPJ_FLOAT32 *) pCodingdata;
 211         OPJ_UINT32 i;
 212         OPJ_UINT32 j;
 213         OPJ_UINT32 k;
 214         OPJ_UINT32 lNbMatCoeff = pNbComp * pNbComp;
 215         OPJ_INT32 * lCurrentData = 00;
 216         OPJ_INT32 * lCurrentMatrix = 00;
 217         OPJ_INT32 ** lData = (OPJ_INT32 **) pData;
 218         OPJ_UINT32 lMultiplicator = 1 << 13;
 219         OPJ_INT32 * lMctPtr;
 220
 221     OPJ_ARG_NOT_USED(isSigned);
 222
 223         lCurrentData = (OPJ_INT32 *) opj_malloc((pNbComp + lNbMatCoeff) * sizeof(OPJ_INT32));
 224         if (! lCurrentData) {
 225                 return OPJ_FALSE;
 226         }
 227
 228         lCurrentMatrix = lCurrentData + pNbComp;
 229
 230         for (i =0;i<lNbMatCoeff;++i) {
 231                 lCurrentMatrix[i] = (OPJ_INT32) (*(lMct++) * lMultiplicator);
 232         }
 233
 234         for (i = 0; i < n; ++i)  {
 235                 lMctPtr = lCurrentMatrix;
 236                 for (j=0;j<pNbComp;++j) {
 237                         lCurrentData[j] = (*(lData[j]));
 238                 }
 239
 240                 for (j=0;j<pNbComp;++j) {
 241                         *(lData[j]) = 0;
 242                         for (k=0;k<pNbComp;++k) {
 243                                 *(lData[j]) += opj_int_fix_mul(*lMctPtr, lCurrentData[k]);
 244                                 ++lMctPtr;
 245                         }
 246
 247                         ++lData[j];
 248                 }
 249         }
 250
 251         opj_free(lCurrentData);
 252
 253         return OPJ_TRUE;
 254 }
 255
 256 opj_bool opj_mct_decode_custom(
 257                                            OPJ_BYTE * pDecodingData,
 258                                            OPJ_UINT32 n,
 259                                            OPJ_BYTE ** pData,
 260                                            OPJ_UINT32 pNbComp,
 261                                            OPJ_UINT32 isSigned)
 262 {
 263         OPJ_FLOAT32 * lMct;
 264         OPJ_UINT32 i;
 265         OPJ_UINT32 j;
 266         OPJ_UINT32 k;
 267
 268         OPJ_FLOAT32 * lCurrentData = 00;
 269         OPJ_FLOAT32 * lCurrentResult = 00;
 270         OPJ_FLOAT32 ** lData = (OPJ_FLOAT32 **) pData;
 271
 272     OPJ_ARG_NOT_USED(isSigned);
 273
 274         lCurrentData = (OPJ_FLOAT32 *) opj_malloc (2 * pNbComp * sizeof(OPJ_FLOAT32));
 275         if (! lCurrentData) {
 276                 return OPJ_FALSE;
 277         }
 278         lCurrentResult = lCurrentData + pNbComp;
 279
 280         for (i = 0; i < n; ++i) {
 281                 lMct = (OPJ_FLOAT32 *) pDecodingData;
 282                 for (j=0;j<pNbComp;++j) {
 283                         lCurrentData[j] = (OPJ_FLOAT32) (*(lData[j]));
 284                 }
 285                 for (j=0;j<pNbComp;++j) {
 286                         lCurrentResult[j] = 0;
 287                         for     (k=0;k<pNbComp;++k)     {
 288                                 lCurrentResult[j] += *(lMct++) * lCurrentData[k];
 289                         }
 290                         *(lData[j]++) = (OPJ_FLOAT32) (lCurrentResult[j]);
 291                 }
 292         }
 293         opj_free(lCurrentData);
 294         return OPJ_TRUE;
 295 }
 296
 297 void opj_calculate_norms(       OPJ_FLOAT64 * pNorms,
 298                                                         OPJ_UINT32 pNbComps,
 299                                                         OPJ_FLOAT32 * pMatrix)
 300 {
 301         OPJ_UINT32 i,j,lIndex;
 302         OPJ_FLOAT32 lCurrentValue;
 303         OPJ_FLOAT64 * lNorms = (OPJ_FLOAT64 *) pNorms;
 304         OPJ_FLOAT32 * lMatrix = (OPJ_FLOAT32 *) pMatrix;
 305
 306         for     (i=0;i<pNbComps;++i) {
 307                 lNorms[i] = 0;
 308                 lIndex = i;
 309
 310                 for     (j=0;j<pNbComps;++j) {
 311                         lCurrentValue = lMatrix[lIndex];
 312                         lIndex += pNbComps;
 313                         lNorms[i] += lCurrentValue * lCurrentValue;
 314                 }
 315                 lNorms[i] = sqrt(lNorms[i]);
 316         }
 317 }