Merge pull request #1164 from sebras/master
[openjpeg.git] / src / lib / openjp2 / sparse_array.c
1 /*
2  * The copyright in this software is being made available under the 2-clauses
3  * BSD License, included below. This software may be subject to other third
4  * party and contributor rights, including patent rights, and no such rights
5  * are granted under this license.
6  *
7  * Copyright (c) 2017, IntoPix SA <contact@intopix.com>
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS'
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31
32 #include "opj_includes.h"
33
34
35 struct opj_sparse_array_int32 {
36     OPJ_UINT32 width;
37     OPJ_UINT32 height;
38     OPJ_UINT32 block_width;
39     OPJ_UINT32 block_height;
40     OPJ_UINT32 block_count_hor;
41     OPJ_UINT32 block_count_ver;
42     OPJ_INT32** data_blocks;
43 };
44
45 opj_sparse_array_int32_t* opj_sparse_array_int32_create(OPJ_UINT32 width,
46         OPJ_UINT32 height,
47         OPJ_UINT32 block_width,
48         OPJ_UINT32 block_height)
49 {
50     opj_sparse_array_int32_t* sa;
51
52     if (width == 0 || height == 0 || block_width == 0 || block_height == 0) {
53         return NULL;
54     }
55     if (block_width > ((OPJ_UINT32)~0U) / block_height / sizeof(OPJ_INT32)) {
56         return NULL;
57     }
58
59     sa = (opj_sparse_array_int32_t*) opj_calloc(1,
60             sizeof(opj_sparse_array_int32_t));
61     sa->width = width;
62     sa->height = height;
63     sa->block_width = block_width;
64     sa->block_height = block_height;
65     sa->block_count_hor = opj_uint_ceildiv(width, block_width);
66     sa->block_count_ver = opj_uint_ceildiv(height, block_height);
67     if (sa->block_count_hor > ((OPJ_UINT32)~0U) / sa->block_count_ver) {
68         opj_free(sa);
69         return NULL;
70     }
71     sa->data_blocks = (OPJ_INT32**) opj_calloc(sizeof(OPJ_INT32*),
72                       sa->block_count_hor * sa->block_count_ver);
73     if (sa->data_blocks == NULL) {
74         opj_free(sa);
75         return NULL;
76     }
77
78     return sa;
79 }
80
81 void opj_sparse_array_int32_free(opj_sparse_array_int32_t* sa)
82 {
83     if (sa) {
84         OPJ_UINT32 i;
85         for (i = 0; i < sa->block_count_hor * sa->block_count_ver; i++) {
86             if (sa->data_blocks[i]) {
87                 opj_free(sa->data_blocks[i]);
88             }
89         }
90         opj_free(sa->data_blocks);
91         opj_free(sa);
92     }
93 }
94
95 OPJ_BOOL opj_sparse_array_is_region_valid(const opj_sparse_array_int32_t* sa,
96         OPJ_UINT32 x0,
97         OPJ_UINT32 y0,
98         OPJ_UINT32 x1,
99         OPJ_UINT32 y1)
100 {
101     return !(x0 >= sa->width || x1 <= x0 || x1 > sa->width ||
102              y0 >= sa->height || y1 <= y0 || y1 > sa->height);
103 }
104
105 static OPJ_BOOL opj_sparse_array_int32_read_or_write(
106     const opj_sparse_array_int32_t* sa,
107     OPJ_UINT32 x0,
108     OPJ_UINT32 y0,
109     OPJ_UINT32 x1,
110     OPJ_UINT32 y1,
111     OPJ_INT32* buf,
112     OPJ_UINT32 buf_col_stride,
113     OPJ_UINT32 buf_line_stride,
114     OPJ_BOOL forgiving,
115     OPJ_BOOL is_read_op)
116 {
117     OPJ_UINT32 y, block_y;
118     OPJ_UINT32 y_incr = 0;
119     const OPJ_UINT32 block_width = sa->block_width;
120
121     if (!opj_sparse_array_is_region_valid(sa, x0, y0, x1, y1)) {
122         return forgiving;
123     }
124
125     block_y = y0 / sa->block_height;
126     for (y = y0; y < y1; block_y ++, y += y_incr) {
127         OPJ_UINT32 x, block_x;
128         OPJ_UINT32 x_incr = 0;
129         OPJ_UINT32 block_y_offset;
130         y_incr = (y == y0) ? sa->block_height - (y0 % sa->block_height) :
131                  sa->block_height;
132         block_y_offset = sa->block_height - y_incr;
133         y_incr = opj_uint_min(y_incr, y1 - y);
134         block_x = x0 / block_width;
135         for (x = x0; x < x1; block_x ++, x += x_incr) {
136             OPJ_UINT32 j;
137             OPJ_UINT32 block_x_offset;
138             OPJ_INT32* src_block;
139             x_incr = (x == x0) ? block_width - (x0 % block_width) : block_width;
140             block_x_offset = block_width - x_incr;
141             x_incr = opj_uint_min(x_incr, x1 - x);
142             src_block = sa->data_blocks[block_y * sa->block_count_hor + block_x];
143             if (is_read_op) {
144                 if (src_block == NULL) {
145                     if (buf_col_stride == 1) {
146                         OPJ_INT32* dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride +
147                                               (x - x0) * buf_col_stride;
148                         for (j = 0; j < y_incr; j++) {
149                             memset(dest_ptr, 0, sizeof(OPJ_INT32) * x_incr);
150                             dest_ptr += buf_line_stride;
151                         }
152                     } else {
153                         OPJ_INT32* dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride +
154                                               (x - x0) * buf_col_stride;
155                         for (j = 0; j < y_incr; j++) {
156                             OPJ_UINT32 k;
157                             for (k = 0; k < x_incr; k++) {
158                                 dest_ptr[k * buf_col_stride] = 0;
159                             }
160                             dest_ptr += buf_line_stride;
161                         }
162                     }
163                 } else {
164                     const OPJ_INT32* OPJ_RESTRICT src_ptr = src_block + block_y_offset *
165                                                             (OPJ_SIZE_T)block_width + block_x_offset;
166                     if (buf_col_stride == 1) {
167                         OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride
168                                                            +
169                                                            (x - x0) * buf_col_stride;
170                         if (x_incr == 4) {
171                             /* Same code as general branch, but the compiler */
172                             /* can have an efficient memcpy() */
173                             (void)(x_incr); /* trick to silent cppcheck duplicateBranch warning */
174                             for (j = 0; j < y_incr; j++) {
175                                 memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
176                                 dest_ptr += buf_line_stride;
177                                 src_ptr += block_width;
178                             }
179                         } else {
180                             for (j = 0; j < y_incr; j++) {
181                                 memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
182                                 dest_ptr += buf_line_stride;
183                                 src_ptr += block_width;
184                             }
185                         }
186                     } else {
187                         OPJ_INT32* OPJ_RESTRICT dest_ptr = buf + (y - y0) * (OPJ_SIZE_T)buf_line_stride
188                                                            +
189                                                            (x - x0) * buf_col_stride;
190                         if (x_incr == 1) {
191                             for (j = 0; j < y_incr; j++) {
192                                 *dest_ptr = *src_ptr;
193                                 dest_ptr += buf_line_stride;
194                                 src_ptr += block_width;
195                             }
196                         } else if (y_incr == 1 && buf_col_stride == 2) {
197                             OPJ_UINT32 k;
198                             for (k = 0; k < (x_incr & ~3U); k += 4) {
199                                 dest_ptr[k * buf_col_stride] = src_ptr[k];
200                                 dest_ptr[(k + 1) * buf_col_stride] = src_ptr[k + 1];
201                                 dest_ptr[(k + 2) * buf_col_stride] = src_ptr[k + 2];
202                                 dest_ptr[(k + 3) * buf_col_stride] = src_ptr[k + 3];
203                             }
204                             for (; k < x_incr; k++) {
205                                 dest_ptr[k * buf_col_stride] = src_ptr[k];
206                             }
207                         } else if (x_incr >= 8 && buf_col_stride == 8) {
208                             for (j = 0; j < y_incr; j++) {
209                                 OPJ_UINT32 k;
210                                 for (k = 0; k < (x_incr & ~3U); k += 4) {
211                                     dest_ptr[k * buf_col_stride] = src_ptr[k];
212                                     dest_ptr[(k + 1) * buf_col_stride] = src_ptr[k + 1];
213                                     dest_ptr[(k + 2) * buf_col_stride] = src_ptr[k + 2];
214                                     dest_ptr[(k + 3) * buf_col_stride] = src_ptr[k + 3];
215                                 }
216                                 for (; k < x_incr; k++) {
217                                     dest_ptr[k * buf_col_stride] = src_ptr[k];
218                                 }
219                                 dest_ptr += buf_line_stride;
220                                 src_ptr += block_width;
221                             }
222                         } else {
223                             /* General case */
224                             for (j = 0; j < y_incr; j++) {
225                                 OPJ_UINT32 k;
226                                 for (k = 0; k < x_incr; k++) {
227                                     dest_ptr[k * buf_col_stride] = src_ptr[k];
228                                 }
229                                 dest_ptr += buf_line_stride;
230                                 src_ptr += block_width;
231                             }
232                         }
233                     }
234                 }
235             } else {
236                 if (src_block == NULL) {
237                     src_block = (OPJ_INT32*) opj_calloc(1,
238                                                         sa->block_width * sa->block_height * sizeof(OPJ_INT32));
239                     if (src_block == NULL) {
240                         return OPJ_FALSE;
241                     }
242                     sa->data_blocks[block_y * sa->block_count_hor + block_x] = src_block;
243                 }
244
245                 if (buf_col_stride == 1) {
246                     OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset *
247                                                        (OPJ_SIZE_T)block_width + block_x_offset;
248                     const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) *
249                                                             (OPJ_SIZE_T)buf_line_stride + (x - x0) * buf_col_stride;
250                     if (x_incr == 4) {
251                         /* Same code as general branch, but the compiler */
252                         /* can have an efficient memcpy() */
253                         (void)(x_incr); /* trick to silent cppcheck duplicateBranch warning */
254                         for (j = 0; j < y_incr; j++) {
255                             memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
256                             dest_ptr += block_width;
257                             src_ptr += buf_line_stride;
258                         }
259                     } else {
260                         for (j = 0; j < y_incr; j++) {
261                             memcpy(dest_ptr, src_ptr, sizeof(OPJ_INT32) * x_incr);
262                             dest_ptr += block_width;
263                             src_ptr += buf_line_stride;
264                         }
265                     }
266                 } else {
267                     OPJ_INT32* OPJ_RESTRICT dest_ptr = src_block + block_y_offset *
268                                                        (OPJ_SIZE_T)block_width + block_x_offset;
269                     const OPJ_INT32* OPJ_RESTRICT src_ptr = buf + (y - y0) *
270                                                             (OPJ_SIZE_T)buf_line_stride + (x - x0) * buf_col_stride;
271                     if (x_incr == 1) {
272                         for (j = 0; j < y_incr; j++) {
273                             *dest_ptr = *src_ptr;
274                             src_ptr += buf_line_stride;
275                             dest_ptr += block_width;
276                         }
277                     } else if (x_incr >= 8 && buf_col_stride == 8) {
278                         for (j = 0; j < y_incr; j++) {
279                             OPJ_UINT32 k;
280                             for (k = 0; k < (x_incr & ~3U); k += 4) {
281                                 dest_ptr[k] = src_ptr[k * buf_col_stride];
282                                 dest_ptr[k + 1] = src_ptr[(k + 1) * buf_col_stride];
283                                 dest_ptr[k + 2] = src_ptr[(k + 2) * buf_col_stride];
284                                 dest_ptr[k + 3] = src_ptr[(k + 3) * buf_col_stride];
285                             }
286                             for (; k < x_incr; k++) {
287                                 dest_ptr[k] = src_ptr[k * buf_col_stride];
288                             }
289                             src_ptr += buf_line_stride;
290                             dest_ptr += block_width;
291                         }
292                     } else {
293                         /* General case */
294                         for (j = 0; j < y_incr; j++) {
295                             OPJ_UINT32 k;
296                             for (k = 0; k < x_incr; k++) {
297                                 dest_ptr[k] = src_ptr[k * buf_col_stride];
298                             }
299                             src_ptr += buf_line_stride;
300                             dest_ptr += block_width;
301                         }
302                     }
303                 }
304             }
305         }
306     }
307
308     return OPJ_TRUE;
309 }
310
311 OPJ_BOOL opj_sparse_array_int32_read(const opj_sparse_array_int32_t* sa,
312                                      OPJ_UINT32 x0,
313                                      OPJ_UINT32 y0,
314                                      OPJ_UINT32 x1,
315                                      OPJ_UINT32 y1,
316                                      OPJ_INT32* dest,
317                                      OPJ_UINT32 dest_col_stride,
318                                      OPJ_UINT32 dest_line_stride,
319                                      OPJ_BOOL forgiving)
320 {
321     return opj_sparse_array_int32_read_or_write(
322                (opj_sparse_array_int32_t*)sa, x0, y0, x1, y1,
323                dest,
324                dest_col_stride,
325                dest_line_stride,
326                forgiving,
327                OPJ_TRUE);
328 }
329
330 OPJ_BOOL opj_sparse_array_int32_write(opj_sparse_array_int32_t* sa,
331                                       OPJ_UINT32 x0,
332                                       OPJ_UINT32 y0,
333                                       OPJ_UINT32 x1,
334                                       OPJ_UINT32 y1,
335                                       const OPJ_INT32* src,
336                                       OPJ_UINT32 src_col_stride,
337                                       OPJ_UINT32 src_line_stride,
338                                       OPJ_BOOL forgiving)
339 {
340     return opj_sparse_array_int32_read_or_write(sa, x0, y0, x1, y1,
341             (OPJ_INT32*)src,
342             src_col_stride,
343             src_line_stride,
344             forgiving,
345             OPJ_FALSE);
346 }