/**
Forward 5-3 wavelet transform in 1-D
*/
-static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
+static void opj_dwt_encode_1(void *a, OPJ_INT32 dn, OPJ_INT32 sn,
OPJ_INT32 cas);
/**
Forward 9-7 wavelet transform in 1-D
*/
-static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
+static void opj_dwt_encode_1_real(void *a, OPJ_INT32 dn, OPJ_INT32 sn,
OPJ_INT32 cas);
/**
Explicit calculation of the Quantization Stepsizes
opj_tcd_tilecomp_t* tilec,
OPJ_UINT32 numres);
+/* Where void* is a OPJ_INT32* for 5x3 and OPJ_FLOAT32* for 9x7 */
+typedef void (*opj_encode_one_row_fnptr_type)(void *, OPJ_INT32, OPJ_INT32,
+ OPJ_INT32);
+
static OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp,
opj_tcd_tilecomp_t * tilec,
- void (*p_function)(OPJ_INT32 *, OPJ_INT32, OPJ_INT32, OPJ_INT32));
+ opj_encode_one_row_fnptr_type p_function);
static OPJ_UINT32 opj_dwt_max_resolution(opj_tcd_resolution_t* OPJ_RESTRICT r,
OPJ_UINT32 i);
/* <summary> */
/* Forward 5-3 wavelet transform in 1-D. */
/* </summary> */
-static void opj_dwt_encode_1(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
+static void opj_dwt_encode_1(void *aIn, OPJ_INT32 dn, OPJ_INT32 sn,
OPJ_INT32 cas)
{
OPJ_INT32 i;
+ OPJ_INT32* a = (OPJ_INT32*)aIn;
if (!cas) {
if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */
/* <summary> */
/* Forward 9-7 wavelet transform in 1-D. */
/* </summary> */
-static void opj_dwt_encode_1_real(OPJ_INT32 *a, OPJ_INT32 dn, OPJ_INT32 sn,
+static void opj_dwt_encode_1_real(void *aIn, OPJ_INT32 dn, OPJ_INT32 sn,
OPJ_INT32 cas)
{
OPJ_INT32 i;
+ OPJ_FLOAT32* a = (OPJ_FLOAT32*)aIn;
+
if (!cas) {
if ((dn > 0) || (sn > 1)) { /* NEW : CASE ONE ELEMENT */
for (i = 0; i < dn; i++) {
- OPJ_D(i) -= opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 12993);
+ OPJ_D(i) += opj_dwt_alpha * (OPJ_S_(i) + OPJ_S_(i + 1));
}
for (i = 0; i < sn; i++) {
- OPJ_S(i) -= opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 434);
+ OPJ_S(i) += opj_dwt_beta * (OPJ_D_(i - 1) + OPJ_D_(i));
}
for (i = 0; i < dn; i++) {
- OPJ_D(i) += opj_int_fix_mul(OPJ_S_(i) + OPJ_S_(i + 1), 7233);
+ OPJ_D(i) += opj_dwt_gamma * (OPJ_S_(i) + OPJ_S_(i + 1));
}
for (i = 0; i < sn; i++) {
- OPJ_S(i) += opj_int_fix_mul(OPJ_D_(i - 1) + OPJ_D_(i), 3633);
+ OPJ_S(i) += opj_dwt_delta * (OPJ_D_(i - 1) + OPJ_D_(i));
}
for (i = 0; i < dn; i++) {
- OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 5038); /*5038 */
+ OPJ_D(i) = opj_K / 2 * OPJ_D(i);
}
for (i = 0; i < sn; i++) {
- OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 6659); /*6660 */
+ OPJ_S(i) = opj_c13318 / 2 * OPJ_S(i);
}
}
} else {
if ((sn > 0) || (dn > 1)) { /* NEW : CASE ONE ELEMENT */
for (i = 0; i < dn; i++) {
- OPJ_S(i) -= opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 12993);
+ OPJ_S(i) += opj_dwt_alpha * (OPJ_DD_(i) + OPJ_DD_(i - 1));
}
for (i = 0; i < sn; i++) {
- OPJ_D(i) -= opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 434);
+ OPJ_D(i) += opj_dwt_beta * (OPJ_SS_(i) + OPJ_SS_(i + 1));
}
for (i = 0; i < dn; i++) {
- OPJ_S(i) += opj_int_fix_mul(OPJ_DD_(i) + OPJ_DD_(i - 1), 7233);
+ OPJ_S(i) += opj_dwt_gamma * (OPJ_DD_(i) + OPJ_DD_(i - 1));
}
for (i = 0; i < sn; i++) {
- OPJ_D(i) += opj_int_fix_mul(OPJ_SS_(i) + OPJ_SS_(i + 1), 3633);
+ OPJ_D(i) += opj_dwt_delta * (OPJ_SS_(i) + OPJ_SS_(i + 1));
}
for (i = 0; i < dn; i++) {
- OPJ_S(i) = opj_int_fix_mul(OPJ_S(i), 5038); /*5038 */
+ OPJ_S(i) = opj_K / 2 * OPJ_S(i);
}
for (i = 0; i < sn; i++) {
- OPJ_D(i) = opj_int_fix_mul(OPJ_D(i), 6659); /*6660 */
+ OPJ_D(i) = opj_c13318 / 2 * OPJ_D(i);
}
}
}
OPJ_INT32 * OPJ_RESTRICT tiledp;
OPJ_UINT32 min_j;
OPJ_UINT32 max_j;
- void (*p_function)(OPJ_INT32 *, OPJ_INT32, OPJ_INT32, OPJ_INT32);
+ opj_encode_one_row_fnptr_type p_function;
} opj_dwt_encode_h_job_t;
static void opj_dwt_encode_h_func(void* user_data, opj_tls_t* tls)
OPJ_INT32 * OPJ_RESTRICT tiledp;
OPJ_UINT32 min_j;
OPJ_UINT32 max_j;
- void (*p_function)(OPJ_INT32 *, OPJ_INT32, OPJ_INT32, OPJ_INT32);
+ opj_encode_one_row_fnptr_type p_function;
} opj_dwt_encode_v_job_t;
static void opj_dwt_encode_v_func(void* user_data, opj_tls_t* tls)
/* </summary> */
static INLINE OPJ_BOOL opj_dwt_encode_procedure(opj_thread_pool_t* tp,
opj_tcd_tilecomp_t * tilec,
- void (*p_function)(OPJ_INT32 *, OPJ_INT32, OPJ_INT32, OPJ_INT32))
+ opj_encode_one_row_fnptr_type p_function)
{
OPJ_INT32 i;
OPJ_INT32 *bj = 00;
/* <summary> */
/* Forward irreversible MCT. */
/* </summary> */
-#ifdef __SSE4_1__
void opj_mct_encode_real(
- OPJ_INT32* OPJ_RESTRICT c0,
- OPJ_INT32* OPJ_RESTRICT c1,
- OPJ_INT32* OPJ_RESTRICT c2,
- OPJ_SIZE_T n)
-{
- OPJ_SIZE_T i;
- const OPJ_SIZE_T len = n;
-
- const __m128i ry = _mm_set1_epi32(2449);
- const __m128i gy = _mm_set1_epi32(4809);
- const __m128i by = _mm_set1_epi32(934);
- const __m128i ru = _mm_set1_epi32(1382);
- const __m128i gu = _mm_set1_epi32(2714);
- /* const __m128i bu = _mm_set1_epi32(4096); */
- /* const __m128i rv = _mm_set1_epi32(4096); */
- const __m128i gv = _mm_set1_epi32(3430);
- const __m128i bv = _mm_set1_epi32(666);
- const __m128i mulround = _mm_shuffle_epi32(_mm_cvtsi32_si128(4096),
- _MM_SHUFFLE(1, 0, 1, 0));
-
- for (i = 0; i < (len & ~3U); i += 4) {
- __m128i lo, hi;
- __m128i y, u, v;
- __m128i r = _mm_load_si128((const __m128i *) & (c0[i]));
- __m128i g = _mm_load_si128((const __m128i *) & (c1[i]));
- __m128i b = _mm_load_si128((const __m128i *) & (c2[i]));
-
- lo = r;
- hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
- lo = _mm_mul_epi32(lo, ry);
- hi = _mm_mul_epi32(hi, ry);
- lo = _mm_add_epi64(lo, mulround);
- hi = _mm_add_epi64(hi, mulround);
- lo = _mm_srli_epi64(lo, 13);
- hi = _mm_slli_epi64(hi, 32 - 13);
- y = _mm_blend_epi16(lo, hi, 0xCC);
-
- lo = g;
- hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
- lo = _mm_mul_epi32(lo, gy);
- hi = _mm_mul_epi32(hi, gy);
- lo = _mm_add_epi64(lo, mulround);
- hi = _mm_add_epi64(hi, mulround);
- lo = _mm_srli_epi64(lo, 13);
- hi = _mm_slli_epi64(hi, 32 - 13);
- y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
-
- lo = b;
- hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
- lo = _mm_mul_epi32(lo, by);
- hi = _mm_mul_epi32(hi, by);
- lo = _mm_add_epi64(lo, mulround);
- hi = _mm_add_epi64(hi, mulround);
- lo = _mm_srli_epi64(lo, 13);
- hi = _mm_slli_epi64(hi, 32 - 13);
- y = _mm_add_epi32(y, _mm_blend_epi16(lo, hi, 0xCC));
- _mm_store_si128((__m128i *) & (c0[i]), y);
-
- /*lo = b;
- hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
- lo = _mm_mul_epi32(lo, mulround);
- hi = _mm_mul_epi32(hi, mulround);*/
- lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 2, 0)));
- hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(b, _MM_SHUFFLE(3, 2, 3, 1)));
- lo = _mm_slli_epi64(lo, 12);
- hi = _mm_slli_epi64(hi, 12);
- lo = _mm_add_epi64(lo, mulround);
- hi = _mm_add_epi64(hi, mulround);
- lo = _mm_srli_epi64(lo, 13);
- hi = _mm_slli_epi64(hi, 32 - 13);
- u = _mm_blend_epi16(lo, hi, 0xCC);
-
- lo = r;
- hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
- lo = _mm_mul_epi32(lo, ru);
- hi = _mm_mul_epi32(hi, ru);
- lo = _mm_add_epi64(lo, mulround);
- hi = _mm_add_epi64(hi, mulround);
- lo = _mm_srli_epi64(lo, 13);
- hi = _mm_slli_epi64(hi, 32 - 13);
- u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
-
- lo = g;
- hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
- lo = _mm_mul_epi32(lo, gu);
- hi = _mm_mul_epi32(hi, gu);
- lo = _mm_add_epi64(lo, mulround);
- hi = _mm_add_epi64(hi, mulround);
- lo = _mm_srli_epi64(lo, 13);
- hi = _mm_slli_epi64(hi, 32 - 13);
- u = _mm_sub_epi32(u, _mm_blend_epi16(lo, hi, 0xCC));
- _mm_store_si128((__m128i *) & (c1[i]), u);
-
- /*lo = r;
- hi = _mm_shuffle_epi32(r, _MM_SHUFFLE(3, 3, 1, 1));
- lo = _mm_mul_epi32(lo, mulround);
- hi = _mm_mul_epi32(hi, mulround);*/
- lo = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 2, 0)));
- hi = _mm_cvtepi32_epi64(_mm_shuffle_epi32(r, _MM_SHUFFLE(3, 2, 3, 1)));
- lo = _mm_slli_epi64(lo, 12);
- hi = _mm_slli_epi64(hi, 12);
- lo = _mm_add_epi64(lo, mulround);
- hi = _mm_add_epi64(hi, mulround);
- lo = _mm_srli_epi64(lo, 13);
- hi = _mm_slli_epi64(hi, 32 - 13);
- v = _mm_blend_epi16(lo, hi, 0xCC);
-
- lo = g;
- hi = _mm_shuffle_epi32(g, _MM_SHUFFLE(3, 3, 1, 1));
- lo = _mm_mul_epi32(lo, gv);
- hi = _mm_mul_epi32(hi, gv);
- lo = _mm_add_epi64(lo, mulround);
- hi = _mm_add_epi64(hi, mulround);
- lo = _mm_srli_epi64(lo, 13);
- hi = _mm_slli_epi64(hi, 32 - 13);
- v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
-
- lo = b;
- hi = _mm_shuffle_epi32(b, _MM_SHUFFLE(3, 3, 1, 1));
- lo = _mm_mul_epi32(lo, bv);
- hi = _mm_mul_epi32(hi, bv);
- lo = _mm_add_epi64(lo, mulround);
- hi = _mm_add_epi64(hi, mulround);
- lo = _mm_srli_epi64(lo, 13);
- hi = _mm_slli_epi64(hi, 32 - 13);
- v = _mm_sub_epi32(v, _mm_blend_epi16(lo, hi, 0xCC));
- _mm_store_si128((__m128i *) & (c2[i]), v);
- }
- for (; i < len; ++i) {
- OPJ_INT32 r = c0[i];
- OPJ_INT32 g = c1[i];
- OPJ_INT32 b = c2[i];
- OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g,
- 4809) + opj_int_fix_mul(b, 934);
- OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g,
- 2714) + opj_int_fix_mul(b, 4096);
- OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g,
- 3430) - opj_int_fix_mul(b, 666);
- c0[i] = y;
- c1[i] = u;
- c2[i] = v;
- }
-}
-#else
-void opj_mct_encode_real(
- OPJ_INT32* OPJ_RESTRICT c0,
- OPJ_INT32* OPJ_RESTRICT c1,
- OPJ_INT32* OPJ_RESTRICT c2,
+ OPJ_FLOAT32* OPJ_RESTRICT c0,
+ OPJ_FLOAT32* OPJ_RESTRICT c1,
+ OPJ_FLOAT32* OPJ_RESTRICT c2,
OPJ_SIZE_T n)
{
OPJ_SIZE_T i;
for (i = 0; i < n; ++i) {
- OPJ_INT32 r = c0[i];
- OPJ_INT32 g = c1[i];
- OPJ_INT32 b = c2[i];
- OPJ_INT32 y = opj_int_fix_mul(r, 2449) + opj_int_fix_mul(g,
- 4809) + opj_int_fix_mul(b, 934);
- OPJ_INT32 u = -opj_int_fix_mul(r, 1382) - opj_int_fix_mul(g,
- 2714) + opj_int_fix_mul(b, 4096);
- OPJ_INT32 v = opj_int_fix_mul(r, 4096) - opj_int_fix_mul(g,
- 3430) - opj_int_fix_mul(b, 666);
+ OPJ_FLOAT32 r = c0[i];
+ OPJ_FLOAT32 g = c1[i];
+ OPJ_FLOAT32 b = c2[i];
+ OPJ_FLOAT32 y = 0.299f * r + 0.587f * g + 0.114f * b;
+ OPJ_FLOAT32 u = -0.16875f * r - 0.331260f * g + 0.5f * b;
+ OPJ_FLOAT32 v = 0.5f * r - 0.41869f * g - 0.08131f * b;
c0[i] = y;
c1[i] = u;
c2[i] = v;
}
}
-#endif
/* <summary> */
/* Inverse irreversible MCT. */