src/lib/openjp2/mct.c

   1 /*
   2  * The copyright in this software is being made available under the 2-clauses
   3  * BSD License, included below. This software may be subject to other third
   4  * party and contributor rights, including patent rights, and no such rights
   5  * are granted under this license.
   6  *
   7  * Copyright (c) 2002-2014, Universite catholique de Louvain (UCL), Belgium
   8  * Copyright (c) 2002-2014, Professor Benoit Macq
   9  * Copyright (c) 2001-2003, David Janssens
  10  * Copyright (c) 2002-2003, Yannick Verschueren
  11  * Copyright (c) 2003-2007, Francois-Olivier Devaux
  12  * Copyright (c) 2003-2014, Antonin Descampe
  13  * Copyright (c) 2005, Herve Drolon, FreeImage Team
  14  * Copyright (c) 2008, 2011-2012, Centre National d'Etudes Spatiales (CNES), FR
  15  * Copyright (c) 2012, CS Systemes d'Information, France
  16  * All rights reserved.
  17  *
  18  * Redistribution and use in source and binary forms, with or without
  19  * modification, are permitted provided that the following conditions
  20  * are met:
  21  * 1. Redistributions of source code must retain the above copyright
  22  *    notice, this list of conditions and the following disclaimer.
  23  * 2. Redistributions in binary form must reproduce the above copyright
  24  *    notice, this list of conditions and the following disclaimer in the
  25  *    documentation and/or other materials provided with the distribution.
  26  *
  27  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
  28  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  31  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  37  * POSSIBILITY OF SUCH DAMAGE.
  38  */
  39
  40 #ifdef __SSE__
  41 #include <xmmintrin.h>
  42 #endif
  43 #ifdef __SSE2__
  44 #include <emmintrin.h>
  45 #endif
  46 #ifdef __SSE4_1__
  47 #include <smmintrin.h>
  48 #endif
  49
  50 #include "opj_includes.h"
  51
  52 /* <summary> */
  53 /* This table contains the norms of the basis function of the reversible MCT. */
  54 /* </summary> */
  55 static const OPJ_FLOAT64 opj_mct_norms[3] = { 1.732, .8292, .8292 };
  56
  57 /* <summary> */
  58 /* This table contains the norms of the basis function of the irreversible MCT. */
  59 /* </summary> */
  60 static const OPJ_FLOAT64 opj_mct_norms_real[3] = { 1.732, 1.805, 1.573 };
  61
  62 const OPJ_FLOAT64 * opj_mct_get_mct_norms ()
  63 {
  64         return opj_mct_norms;
  65 }
  66
  67 const OPJ_FLOAT64 * opj_mct_get_mct_norms_real ()
  68 {
  69         return opj_mct_norms_real;
  70 }
  71
  72 /* <summary> */
  73 /* Forward reversible MCT. */
  74 /* </summary> */
  75 #ifdef __SSE2__
  76 void opj_mct_encode(
  77                 OPJ_INT32* restrict c0,
  78                 OPJ_INT32* restrict c1,
  79                 OPJ_INT32* restrict c2,
  80                 OPJ_UINT32 n)
  81 {
  82         OPJ_SIZE_T i;
  83         const OPJ_SIZE_T len = n;
  84         /* buffer are aligned on 16 bytes */
  85         assert( ((size_t)c0 & 0xf) == 0 );
  86         assert( ((size_t)c1 & 0xf) == 0 );
  87         assert( ((size_t)c2 & 0xf) == 0 );
  88
  89         for(i = 0; i < (len & ~3U); i += 4) {
  90                 __m128i y, u, v;
  91                 __m128i r = _mm_load_si128((const __m128i *)&(c0[i]));
  92                 __m128i g = _mm_load_si128((const __m128i *)&(c1[i]));
  93                 __m128i b = _mm_load_si128((const __m128i *)&(c2[i]));
  94                 y = _mm_add_epi32(g, g);
  95                 y = _mm_add_epi32(y, b);
  96                 y = _mm_add_epi32(y, r);
  97                 y = _mm_srai_epi32(y, 2);
  98                 u = _mm_sub_epi32(b, g);
  99                 v = _mm_sub_epi32(r, g);
 100                 _mm_store_si128((__m128i *)&(c0[i]), y);
 101                 _mm_store_si128((__m128i *)&(c1[i]), u);
 102                 _mm_store_si128((__m128i *)&(c2[i]), v);
 103         }
 104
 105         for(; i < len; ++i) {
 106                 OPJ_INT32 r = c0[i];
 107                 OPJ_INT32 g = c1[i];
 108                 OPJ_INT32 b = c2[i];
 109                 OPJ_INT32 y = (r + (g * 2) + b) >> 2;
 110                 OPJ_INT32 u = b - g;
 111                 OPJ_INT32 v = r - g;
 112                 c0[i] = y;
 113                 c1[i] = u;
 114                 c2[i] = v;
 115         }
 116 }
 117 #else
 118 void opj_mct_encode(
 119                 OPJ_INT32* restrict c0,
 120                 OPJ_INT32* restrict c1,
 121                 OPJ_INT32* restrict c2,
 122                 OPJ_UINT32 n)
 123 {
 124         OPJ_SIZE_T i;
 125         const OPJ_SIZE_T len = n;
 126
 127         for(i = 0; i < len; ++i) {
 128                 OPJ_INT32 r = c0[i];
 129                 OPJ_INT32 g = c1[i];
 130                 OPJ_INT32 b = c2[i];
 131                 OPJ_INT32 y = (r + (g * 2) + b) >> 2;
 132                 OPJ_INT32 u = b - g;
 133                 OPJ_INT32 v = r - g;
 134                 c0[i] = y;
 135                 c1[i] = u;
 136                 c2[i] = v;
 137         }
 138 }
 139 #endif
 140
 141 /* <summary> */
 142 /* Inverse reversible MCT. */
 143 /* </summary> */
 144 #ifdef __SSE2__
 145 void opj_mct_decode(
 146                 OPJ_INT32* restrict c0,
 147                 OPJ_INT32* restrict c1,
 148                 OPJ_INT32* restrict c2,
 149                 OPJ_UINT32 n)
 150 {
 151         OPJ_SIZE_T i;
 152         const OPJ_SIZE_T len = n;
 153
 154         for(i = 0; i < (len & ~3U); i += 4) {
 155                 __m128i r, g, b;
 156                 __m128i y = _mm_load_si128((const __m128i *)&(c0[i]));
 157                 __m128i u = _mm_load_si128((const __m128i *)&(c1[i]));
 158                 __m128i v = _mm_load_si128((const __m128i *)&(c2[i]));
 159                 g = y;
 160                 g = _mm_sub_epi32(g, _mm_srai_epi32(_mm_add_epi32(u, v), 2));
 161                 r = _mm_add_epi32(v, g);
 162                 b = _mm_add_epi32(u, g);
 163                 _mm_store_si128((__m128i *)&(c0[i]), r);
 164                 _mm_store_si128((__m128i *)&(c1[i]), g);
 165                 _mm_store_si128((__m128i *)&(c2[i]), b);
 166         }
 167         for (; i < len; ++i) {
 168                 OPJ_INT32 y = c0[i];
 169                 OPJ_INT32 u = c1[i];
 170                 OPJ_INT32 v = c2[i];
 171                 OPJ_INT32 g = y - ((u + v) >> 2);
 172                 OPJ_INT32 r = v + g;
 173                 OPJ_INT32 b = u + g;
 174                 c0[i] = r;
 175                 c1[i] = g;
 176                 c2[i] = b;
 177         }
 178 }
 179 #else
 180 void opj_mct_decode(
 181                 OPJ_INT32* restrict c0,
 182                 OPJ_INT32* restrict c1,
 183                 OPJ_INT32* restrict c2,
 184                 OPJ_UINT32 n)
 185 {
 186         OPJ_UINT32 i;
 187         for (i = 0; i < n; ++i) {
 188                 OPJ_INT32 y = c0[i];
 189                 OPJ_INT32 u = c1[i];
 190                 OPJ_INT32 v = c2[i];
 191                 OPJ_INT32 g = y - ((u + v) >> 2);
 192                 OPJ_INT32 r = v + g;
 193                 OPJ_INT32 b = u + g;
 194                 c0[i] = r;
 195                 c1[i] = g;
 196                 c2[i] = b;
 197         }
 198 }
 199 #endif
 200
 201 /* <summary> */
 202 /* Get norm of basis function of reversible MCT. */
 203 /* </summary> */
 204 OPJ_FLOAT64 opj_mct_getnorm(OPJ_UINT32 compno) {
 205         return opj_mct_norms[compno];
 206 }
 207
 208 /* <summary> */
 209 /* Forward irreversible MCT. */
 210 /* </summary> */
 211 #ifdef __SSE4_1__
 212 void opj_mct_encode_real(
 213                                                                                                  OPJ_INT32* restrict c0,
 214                                                                                                  OPJ_INT32* restrict c1,
 215                                                                                                  OPJ_INT32* restrict c2,
 216                                                                                                  OPJ_UINT32 n)
 217 {
 218         OPJ_SIZE_T i;
 219         const OPJ_SIZE_T len = n;
 220
 221         const __m128i ry = _mm_set1_epi32(2449);
 222         const __m128i gy = _mm_set1_epi32(4809);
 223         const __m128i by = _mm_set1_epi32(934);
 224         const __m128i ru = _mm_set1_epi32(1382);
 225         const __m128i gu = _mm_set1_epi32(2714);
 226         /* const __m128i bu = _mm_set1_epi32(4096); */
 227         /* const __m128i rv = _mm_set1_epi32(4096); */
 228         const __m128i gv = _mm_set1_epi32(3430);
 229         const __m128i bv = _mm_set1_epi32(666);
 230         const __m128i mulround = _mm_shuffle_epi32(_mm_cvtsi32_si128(4096), _MM_SHUFFLE(1, 0, 1, 0));
 231
 232         for(i = 0; i < (len & ~3U); i += 4) {
 233                 __m128i lo, hi;
 234                 __m128i y, u, v;
 235                 __m128i r = _mm_load_si128((const __m128i *)&(c0[i]));
 236                 __m128i g = _mm_load_si128((const __m128i *)&(c1[i]));
 237                 __m128i b = _mm_load_si128((const __m128i *)&(c2[i]));
 238
 239                 lo = r;
 240                 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
 241                 lo = _mm_mul_epi32(lo, ry);
 242                 hi = _mm_mul_epi32(hi, ry);
 243                 lo = _mm_add_epi64(lo, mulround);
 244                 hi = _mm_add_epi64(hi, mulround);
 245                 lo = _mm_srli_epi64(lo, 13);
 246                 hi = _mm_slli_epi64(hi, 32-13);
 247                 y = _mm_blend_epi16(lo, hi, 0xCC);
 248
 249                 lo = g;
 250                 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
 251                 lo = _mm_mul_epi32(lo, gy);
 252                 hi = _mm_mul_epi32(hi, gy);
 253                 lo = _mm_add_epi64(lo, mulround);
 254                 hi = _mm_add_epi64(hi, mulround);
 255                 lo = _mm_srli_epi64(lo, 13);
 256                 hi = _mm_slli_epi64(hi, 32-13);
 257                 y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
 258
 259                 lo = b;
 260                 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
 261                 lo = _mm_mul_epi32(lo, by);
 262                 hi = _mm_mul_epi32(hi, by);
 263                 lo = _mm_add_epi64(lo, mulround);
 264                 hi = _mm_add_epi64(hi, mulround);
 265                 lo = _mm_srli_epi64(lo, 13);
 266                 hi = _mm_slli_epi64(hi, 32-13);
 267                 y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
 268                 _mm_store_si128((__m128i *)&(c0[i]), y);
 269
 270                 /*lo = b;
 271                 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
 272                 lo = _mm_mul_epi32(lo, mulround);
 273                 hi = _mm_mul_epi32(hi, mulround);*/
 274                 lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 2, 0)));
 275                 hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 3, 1)));
 276                 lo = _mm_slli_epi64(lo, 12);
 277                 hi = _mm_slli_epi64(hi, 12);
 278                 lo = _mm_add_epi64(lo, mulround);
 279                 hi = _mm_add_epi64(hi, mulround);
 280                 lo = _mm_srli_epi64(lo, 13);
 281                 hi = _mm_slli_epi64(hi, 32-13);
 282                 u = _mm_blend_epi16(lo, hi, 0xCC);
 283
 284                 lo = r;
 285                 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
 286                 lo = _mm_mul_epi32(lo, ru);
 287                 hi = _mm_mul_epi32(hi, ru);
 288                 lo = _mm_add_epi64(lo, mulround);
 289                 hi = _mm_add_epi64(hi, mulround);
 290                 lo = _mm_srli_epi64(lo, 13);
 291                 hi = _mm_slli_epi64(hi, 32-13);
 292                 u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
 293
 294                 lo = g;
 295                 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
 296                 lo = _mm_mul_epi32(lo, gu);
 297                 hi = _mm_mul_epi32(hi, gu);
 298                 lo = _mm_add_epi64(lo, mulround);
 299                 hi = _mm_add_epi64(hi, mulround);
 300                 lo = _mm_srli_epi64(lo, 13);
 301                 hi = _mm_slli_epi64(hi, 32-13);
 302                 u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
 303                 _mm_store_si128((__m128i *)&(c1[i]), u);
 304
 305                 /*lo = r;
 306                 hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
 307                 lo = _mm_mul_epi32(lo, mulround);
 308                 hi = _mm_mul_epi32(hi, mulround);*/
 309                 lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 2, 0)));
 310                 hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 3, 1)));
 311                 lo = _mm_slli_epi64(lo, 12);
 312                 hi = _mm_slli_epi64(hi, 12);
 313                 lo = _mm_add_epi64(lo, mulround);
 314                 hi = _mm_add_epi64(hi, mulround);
 315                 lo = _mm_srli_epi64(lo, 13);
 316                 hi = _mm_slli_epi64(hi, 32-13);
 317                 v = _mm_blend_epi16(lo, hi, 0xCC);
 318
 319                 lo = g;
 320                 hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
 321                 lo = _mm_mul_epi32(lo, gv);
 322                 hi = _mm_mul_epi32(hi, gv);
 323                 lo = _mm_add_epi64(lo, mulround);
 324                 hi = _mm_add_epi64(hi, mulround);
 325                 lo = _mm_srli_epi64(lo, 13);
 326                 hi = _mm_slli_epi64(hi, 32-13);
 327                 v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
 328
 329                 lo = b;
 330                 hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
 331                 lo = _mm_mul_epi32(lo, bv);
 332                 hi = _mm_mul_epi32(hi, bv);
 333                 lo = _mm_add_epi64(lo, mulround);
 334                 hi = _mm_add_epi64(hi, mulround);
 335                 lo = _mm_srli_epi64(lo, 13);
 336                 hi = _mm_slli_epi64(hi, 32-13);
 337                 v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
 338                 _mm_store_si128((__m128i *)&(c2[i]), v);
 339         }
 340         for(; i < len; ++i) {
 341                 OPJ_INT32 r = c0[i];
 342                 OPJ_INT32 g = c1[i];
 343                 OPJ_INT32 b = c2[i];
 344                 OPJ_INT32 y =  opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, 4809) + opj_int_fix_mul(b, 934);
 345                 OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, 2714) + opj_int_fix_mul(b, 4096);
 346                 OPJ_INT32 v =  opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, 3430) - opj_int_fix_mul(b, 666);
 347                 c0[i] = y;
 348                 c1[i] = u;
 349                 c2[i] = v;
 350         }
 351 }
 352 #else
 353 void opj_mct_encode_real(
 354                 OPJ_INT32* restrict c0,
 355                 OPJ_INT32* restrict c1,
 356                 OPJ_INT32* restrict c2,
 357                 OPJ_UINT32 n)
 358 {
 359         OPJ_UINT32 i;
 360         for(i = 0; i < n; ++i) {
 361                 OPJ_INT32 r = c0[i];
 362                 OPJ_INT32 g = c1[i];
 363                 OPJ_INT32 b = c2[i];
 364                 OPJ_INT32 y =  opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g, 4809) + opj_int_fix_mul(b, 934);
 365                 OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g, 2714) + opj_int_fix_mul(b, 4096);
 366                 OPJ_INT32 v =  opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g, 3430) - opj_int_fix_mul(b, 666);
 367                 c0[i] = y;
 368                 c1[i] = u;
 369                 c2[i] = v;
 370         }
 371 }
 372 #endif
 373
 374 /* <summary> */
 375 /* Inverse irreversible MCT. */
 376 /* </summary> */
 377 void opj_mct_decode_real(
 378                 OPJ_FLOAT32* restrict c0,
 379                 OPJ_FLOAT32* restrict c1,
 380                 OPJ_FLOAT32* restrict c2,
 381                 OPJ_UINT32 n)
 382 {
 383         OPJ_UINT32 i;
 384 #ifdef __SSE__
 385         __m128 vrv, vgu, vgv, vbu;
 386         vrv = _mm_set1_ps(1.402f);
 387         vgu = _mm_set1_ps(0.34413f);
 388         vgv = _mm_set1_ps(0.71414f);
 389         vbu = _mm_set1_ps(1.772f);
 390         for (i = 0; i < (n >> 3); ++i) {
 391                 __m128 vy, vu, vv;
 392                 __m128 vr, vg, vb;
 393
 394                 vy = _mm_load_ps(c0);
 395                 vu = _mm_load_ps(c1);
 396                 vv = _mm_load_ps(c2);
 397                 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
 398                 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
 399                 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
 400                 _mm_store_ps(c0, vr);
 401                 _mm_store_ps(c1, vg);
 402                 _mm_store_ps(c2, vb);
 403                 c0 += 4;
 404                 c1 += 4;
 405                 c2 += 4;
 406
 407                 vy = _mm_load_ps(c0);
 408                 vu = _mm_load_ps(c1);
 409                 vv = _mm_load_ps(c2);
 410                 vr = _mm_add_ps(vy, _mm_mul_ps(vv, vrv));
 411                 vg = _mm_sub_ps(_mm_sub_ps(vy, _mm_mul_ps(vu, vgu)), _mm_mul_ps(vv, vgv));
 412                 vb = _mm_add_ps(vy, _mm_mul_ps(vu, vbu));
 413                 _mm_store_ps(c0, vr);
 414                 _mm_store_ps(c1, vg);
 415                 _mm_store_ps(c2, vb);
 416                 c0 += 4;
 417                 c1 += 4;
 418                 c2 += 4;
 419         }
 420         n &= 7;
 421 #endif
 422         for(i = 0; i < n; ++i) {
 423                 OPJ_FLOAT32 y = c0[i];
 424                 OPJ_FLOAT32 u = c1[i];
 425                 OPJ_FLOAT32 v = c2[i];
 426                 OPJ_FLOAT32 r = y + (v * 1.402f);
 427                 OPJ_FLOAT32 g = y - (u * 0.34413f) - (v * (0.71414f));
 428                 OPJ_FLOAT32 b = y + (u * 1.772f);
 429                 c0[i] = r;
 430                 c1[i] = g;
 431                 c2[i] = b;
 432         }
 433 }
 434
 435 /* <summary> */
 436 /* Get norm of basis function of irreversible MCT. */
 437 /* </summary> */
 438 OPJ_FLOAT64 opj_mct_getnorm_real(OPJ_UINT32 compno) {
 439         return opj_mct_norms_real[compno];
 440 }
 441
 442
 443 OPJ_BOOL opj_mct_encode_custom(
 444                                            OPJ_BYTE * pCodingdata,
 445                                            OPJ_UINT32 n,
 446                                            OPJ_BYTE ** pData,
 447                                            OPJ_UINT32 pNbComp,
 448                                            OPJ_UINT32 isSigned)
 449 {
 450         OPJ_FLOAT32 * lMct = (OPJ_FLOAT32 *) pCodingdata;
 451         OPJ_UINT32 i;
 452         OPJ_UINT32 j;
 453         OPJ_UINT32 k;
 454         OPJ_UINT32 lNbMatCoeff = pNbComp * pNbComp;
 455         OPJ_INT32 * lCurrentData = 00;
 456         OPJ_INT32 * lCurrentMatrix = 00;
 457         OPJ_INT32 ** lData = (OPJ_INT32 **) pData;
 458         OPJ_UINT32 lMultiplicator = 1 << 13;
 459         OPJ_INT32 * lMctPtr;
 460
 461     OPJ_ARG_NOT_USED(isSigned);
 462
 463         lCurrentData = (OPJ_INT32 *) opj_malloc((pNbComp + lNbMatCoeff) * sizeof(OPJ_INT32));
 464         if (! lCurrentData) {
 465                 return OPJ_FALSE;
 466         }
 467
 468         lCurrentMatrix = lCurrentData + pNbComp;
 469
 470         for (i =0;i<lNbMatCoeff;++i) {
 471                 lCurrentMatrix[i] = (OPJ_INT32) (*(lMct++) * (OPJ_FLOAT32)lMultiplicator);
 472         }
 473
 474         for (i = 0; i < n; ++i)  {
 475                 lMctPtr = lCurrentMatrix;
 476                 for (j=0;j<pNbComp;++j) {
 477                         lCurrentData[j] = (*(lData[j]));
 478                 }
 479
 480                 for (j=0;j<pNbComp;++j) {
 481                         *(lData[j]) = 0;
 482                         for (k=0;k<pNbComp;++k) {
 483                                 *(lData[j]) += opj_int_fix_mul(*lMctPtr, lCurrentData[k]);
 484                                 ++lMctPtr;
 485                         }
 486
 487                         ++lData[j];
 488                 }
 489         }
 490
 491         opj_free(lCurrentData);
 492
 493         return OPJ_TRUE;
 494 }
 495
 496 OPJ_BOOL opj_mct_decode_custom(
 497                                            OPJ_BYTE * pDecodingData,
 498                                            OPJ_UINT32 n,
 499                                            OPJ_BYTE ** pData,
 500                                            OPJ_UINT32 pNbComp,
 501                                            OPJ_UINT32 isSigned)
 502 {
 503         OPJ_FLOAT32 * lMct;
 504         OPJ_UINT32 i;
 505         OPJ_UINT32 j;
 506         OPJ_UINT32 k;
 507
 508         OPJ_FLOAT32 * lCurrentData = 00;
 509         OPJ_FLOAT32 * lCurrentResult = 00;
 510         OPJ_FLOAT32 ** lData = (OPJ_FLOAT32 **) pData;
 511
 512     OPJ_ARG_NOT_USED(isSigned);
 513
 514         lCurrentData = (OPJ_FLOAT32 *) opj_malloc (2 * pNbComp * sizeof(OPJ_FLOAT32));
 515         if (! lCurrentData) {
 516                 return OPJ_FALSE;
 517         }
 518         lCurrentResult = lCurrentData + pNbComp;
 519
 520         for (i = 0; i < n; ++i) {
 521                 lMct = (OPJ_FLOAT32 *) pDecodingData;
 522                 for (j=0;j<pNbComp;++j) {
 523                         lCurrentData[j] = (OPJ_FLOAT32) (*(lData[j]));
 524                 }
 525                 for (j=0;j<pNbComp;++j) {
 526                         lCurrentResult[j] = 0;
 527                         for     (k=0;k<pNbComp;++k)     {
 528                                 lCurrentResult[j] += *(lMct++) * lCurrentData[k];
 529                         }
 530                         *(lData[j]++) = (OPJ_FLOAT32) (lCurrentResult[j]);
 531                 }
 532         }
 533         opj_free(lCurrentData);
 534         return OPJ_TRUE;
 535 }
 536
 537 void opj_calculate_norms(       OPJ_FLOAT64 * pNorms,
 538                                                         OPJ_UINT32 pNbComps,
 539                                                         OPJ_FLOAT32 * pMatrix)
 540 {
 541         OPJ_UINT32 i,j,lIndex;
 542         OPJ_FLOAT32 lCurrentValue;
 543         OPJ_FLOAT64 * lNorms = (OPJ_FLOAT64 *) pNorms;
 544         OPJ_FLOAT32 * lMatrix = (OPJ_FLOAT32 *) pMatrix;
 545
 546         for     (i=0;i<pNbComps;++i) {
 547                 lNorms[i] = 0;
 548                 lIndex = i;
 549
 550                 for     (j=0;j<pNbComps;++j) {
 551                         lCurrentValue = lMatrix[lIndex];
 552                         lIndex += pNbComps;
 553                         lNorms[i] += lCurrentValue * lCurrentValue;
 554                 }
 555                 lNorms[i] = sqrt(lNorms[i]);
 556         }
 557 }