Optimize reading/write into sparse array
authorEven Rouault <even.rouault@spatialys.com>
Fri, 1 Sep 2017 14:30:50 +0000 (16:30 +0200)
committerEven Rouault <even.rouault@spatialys.com>
Fri, 1 Sep 2017 14:30:50 +0000 (16:30 +0200)
src/lib/openjp2/sparse_array.c
src/lib/openjp2/sparse_array.h
src/lib/openjp2/test_sparse_array.c

index 3402dca2d18c00bfb8a6c89db4fd49c5453320e7..b0634f67e8ecc4b9e653416dbd6db0f5778a51a7 100644 (file)
@@ -91,7 +91,7 @@ void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa)
     }
 }
 
-OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa,
+OPJ_BOOL opj_sparse_array_is_region_valid(const opj_sparse_array_int32_t* sa,
         OPJ_UINT32 x0,
         OPJ_UINT32 y0,
         OPJ_UINT32 x1,
@@ -102,7 +102,7 @@ OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa,
 }
 
 static OPJ_BOOL opj_sparse_array_int32_read_or_write(
-    opj_sparse_array_int32_t* sa,
+    const opj_sparse_array_int32_t* sa,
     OPJ_UINT32 x0,
     OPJ_UINT32 y0,
     OPJ_UINT32 x1,
@@ -115,6 +115,8 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
 {
     OPJ_UINT32 y, block_y;
     OPJ_UINT32 y_incr = 0;
+    const OPJ_UINT32 block_width = sa->block_width;
+
     if (!opj_sparse_array_is_region_valid(sa, x0, y0, x1, y1)) {
         return forgiving;
     }
@@ -128,43 +130,64 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
                  sa->block_height;
         block_y_offset = sa->block_height - y_incr;
         y_incr = opj_uint_min(y_incr, y1 - y);
-        block_x = x0 / sa->block_width;
+        block_x = x0 / block_width;
         for (x = x0; x < x1; block_x ++, x += x_incr) {
             OPJ_UINT32 j;
             OPJ_UINT32 block_x_offset;
             OPJ_INT32* src_block;
-            x_incr = (x == x0) ? sa->block_width - (x0 % sa->block_width) : sa->block_width;
-            block_x_offset = sa->block_width - x_incr;
+            x_incr = (x == x0) ? block_width - (x0 % block_width) : block_width;
+            block_x_offset = block_width - x_incr;
             x_incr = opj_uint_min(x_incr, x1 - x);
             src_block = sa->data_blocks[block_y * sa->block_count_hor + block_x];
             if (is_read_op) {
                 if (src_block == NULL) {
-                    for (j = 0; j < y_incr; j++) {
-                        if (buf_col_stride == 1) {
-                            memset(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride,
-                                   0,
-                                   sizeof(OPJ_INT32) * x_incr);
-                        } else {
+                    if (buf_col_stride == 1) {
+                        OPJ_INT32* dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
+                                              (x - x0) * buf_col_stride;
+                        for (j = 0; j < y_incr; j++) {
+                            memset(dest_ptr, 0, sizeof(OPJ_INT32) * x_incr);
+                            dest_ptr += buf_line_stride;
+                        }
+                    } else {
+                        OPJ_INT32* dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
+                                              (x - x0) * buf_col_stride;
+                        for (j = 0; j < y_incr; j++) {
                             OPJ_UINT32 k;
                             for (k = 0; k < x_incr; k++) {
-                                *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride)
-                                    = 0;
+                                dest_ptr[k * buf_col_stride] = 0;
                             }
+                            dest_ptr += buf_line_stride;
                         }
                     }
                 } else {
-                    for (j = 0; j < y_incr; j++) {
-                        if (buf_col_stride == 1) {
-                            memcpy(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride,
-                                   src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset,
-                                   sizeof(OPJ_INT32) * x_incr);
+                    const OPJ_INT32* OPJ_RESTRICT src_ptr = src_block + block_y_offset *
+                                                            (size_t)block_width + block_x_offset;
+                    if (buf_col_stride == 1) {
+                        OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
+                                                           (x - x0) * buf_col_stride;
+                        for (j = 0; j < y_incr; j++) {
+                            memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+                            dest_ptr += buf_line_stride;
+                            src_ptr += block_width;
+                        }
+                    } else {
+                        OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (size_t)buf_line_stride +
+                                                           (x - x0) * buf_col_stride;
+                        if (x_incr == 1) {
+                            for (j = 0; j < y_incr; j++) {
+                                *dest_ptr = *src_ptr;
+                                dest_ptr += buf_line_stride;
+                                src_ptr += block_width;
+                            }
                         } else {
-                            OPJ_UINT32 k;
-                            for (k = 0; k < x_incr; k++) {
-                                *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride)
-                                    =
-                                        *(src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset +
-                                          k);
+                            /* General case */
+                            for (j = 0; j < y_incr; j++) {
+                                OPJ_UINT32 k;
+                                for (k = 0; k < x_incr; k++) {
+                                    dest_ptr[k * buf_col_stride] = src_ptr[k];
+                                }
+                                dest_ptr += buf_line_stride;
+                                src_ptr += block_width;
                             }
                         }
                     }
@@ -179,18 +202,36 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
                     sa->data_blocks[block_y * sa->block_count_hor + block_x] = src_block;
                 }
 
-                for (j = 0; j < y_incr; j++) {
-                    if (buf_col_stride == 1) {
-                        memcpy(src_block + (block_y_offset + j) * (size_t)sa->block_width +
-                               block_x_offset,
-                               buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0) * buf_col_stride,
-                               sizeof(OPJ_INT32) * x_incr);
+                if (buf_col_stride == 1) {
+                    OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset *
+                                                       (size_t)block_width + block_x_offset;
+                    const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) *
+                                                            (size_t)buf_line_stride + (x - x0) * buf_col_stride;
+                    for (j = 0; j < y_incr; j++) {
+                        memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
+                        dest_ptr += block_width;
+                        src_ptr += buf_line_stride;
+                    }
+                } else {
+                    OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset *
+                                                       (size_t)block_width + block_x_offset;
+                    const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) *
+                                                            (size_t)buf_line_stride + (x - x0) * buf_col_stride;
+                    if (x_incr == 1) {
+                        for (j = 0; j < y_incr; j++) {
+                            *dest_ptr = *src_ptr;
+                            src_ptr += buf_line_stride;
+                            dest_ptr += block_width;
+                        }
                     } else {
-                        OPJ_UINT32 k;
-                        for (k = 0; k < x_incr; k++) {
-                            *(src_block + (block_y_offset + j) * (size_t)sa->block_width + block_x_offset +
-                              k) =
-                                  *(buf + (y - y0 + j) * (size_t)buf_line_stride + (x - x0 + k) * buf_col_stride);
+                        /* General case */
+                        for (j = 0; j < y_incr; j++) {
+                            OPJ_UINT32 k;
+                            for (k = 0; k < x_incr; k++) {
+                                dest_ptr[k] = src_ptr[k * buf_col_stride];
+                            }
+                            src_ptr += buf_line_stride;
+                            dest_ptr += block_width;
                         }
                     }
                 }
@@ -201,7 +242,7 @@ static OPJ_BOOL opj_sparse_array_int32_read_or_write(
     return OPJ_TRUE;
 }
 
-OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa,
+OPJ_BOOL opj_sparse_array_int32_read(const opj_sparse_array_int32_t* sa,
                                      OPJ_UINT32 x0,
                                      OPJ_UINT32 y0,
                                      OPJ_UINT32 x1,
@@ -211,12 +252,13 @@ OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa,
                                      OPJ_UINT32 dest_line_stride,
                                      OPJ_BOOL forgiving)
 {
-    return opj_sparse_array_int32_read_or_write(sa, x0, y0, x1, y1,
-            dest,
-            dest_col_stride,
-            dest_line_stride,
-            forgiving,
-            OPJ_TRUE);
+    return opj_sparse_array_int32_read_or_write(
+               (opj_sparse_array_int32_t*)sa, x0, y0, x1, y1,
+               dest,
+               dest_col_stride,
+               dest_line_stride,
+               forgiving,
+               OPJ_TRUE);
 }
 
 OPJ_BOOL opj_sparse_array_int32_write(opj_sparse_array_int32_t* sa,
index 485cafeaef629a81009bb217cc12b8814c8de5c6..130fe13ef58f94036fd4ab45436d6fd01a627cf1 100644 (file)
@@ -77,7 +77,7 @@ void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa);
  * @param y1 bottom y coordinate (not included) of the region. Must be greater than y0.
  * @return OPJ_TRUE or OPJ_FALSE.
  */
-OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa,
+OPJ_BOOL opj_sparse_array_is_region_valid(const opj_sparse_array_int32_t* sa,
         OPJ_UINT32 x0,
         OPJ_UINT32 y0,
         OPJ_UINT32 x1,
@@ -99,7 +99,7 @@ OPJ_BOOL opj_sparse_array_is_region_valid(opj_sparse_array_int32_t* sa,
  * @param forgiving if set to TRUE and the region is invalid, OPJ_TRUE will still be returned.
  * @return OPJ_TRUE in case of success.
  */
-OPJ_BOOL opj_sparse_array_int32_read(opj_sparse_array_int32_t* sa,
+OPJ_BOOL opj_sparse_array_int32_read(const opj_sparse_array_int32_t* sa,
                                      OPJ_UINT32 x0,
                                      OPJ_UINT32 y0,
                                      OPJ_UINT32 x1,
index 0b49110fb04f8b53607de75e80271ab05a0b62e8..8e1364515d757e86a38a08e3b1b7b8c652e09552 100644 (file)
@@ -92,6 +92,7 @@ int main()
     ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1,
                                        OPJ_FALSE);
     assert(ret);
+
     buffer[0] = 2;
     ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1,
                                        OPJ_FALSE);
@@ -105,6 +106,29 @@ int main()
     assert(buffer[0] == 2);
     assert(buffer[1] == 0xFF);
 
+    buffer[0] = 0xFF;
+    buffer[1] = 0xFF;
+    buffer[2] = 0xFF;
+    ret = opj_sparse_array_int32_read(sa, 4, 5, 4 + 1, 5 + 2, buffer, 0, 1,
+                                      OPJ_FALSE);
+    assert(ret);
+    assert(buffer[0] == 2);
+    assert(buffer[1] == 0);
+    assert(buffer[2] == 0xFF);
+
+    buffer[0] = 3;
+    ret = opj_sparse_array_int32_write(sa, 4, 5, 4 + 1, 5 + 1, buffer, 0, 1,
+                                       OPJ_FALSE);
+    assert(ret);
+
+    buffer[0] = 0;
+    buffer[1] = 0xFF;
+    ret = opj_sparse_array_int32_read(sa, 4, 5, 4 + 1, 5 + 1, buffer, 1, 1,
+                                      OPJ_FALSE);
+    assert(ret);
+    assert(buffer[0] == 3);
+    assert(buffer[1] == 0xFF);
+
     w = 15 + 1;
     h = 17 + 1;
     memset(buffer, 0xFF, sizeof(buffer));
@@ -114,7 +138,7 @@ int main()
     for (j = 0; j < h; j++) {
         for (i = 0; i < w; i++) {
             if (i == 4 - 2 && j == 5 - 1) {
-                assert(buffer[ j * w + i ] == 2);
+                assert(buffer[ j * w + i ] == 3);
             } else {
                 assert(buffer[ j * w + i ] == 0);
             }