Project Ne10
An open, optimized software library for the ARM architecture.
NE10_resize.c
Go to the documentation of this file.
1 /*
2  * Copyright 2013-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /* license of OpenCV */
29 /*M///////////////////////////////////////////////////////////////////////////////////////
30 //
31 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
32 //
33 // By downloading, copying, installing or using the software you agree to this license.
34 // If you do not agree to this license, do not download, install,
35 // copy or use the software.
36 //
37 //
38 // License Agreement
39 // For Open Source Computer Vision Library
40 //
41 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
42 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
43 // Third party copyrights are property of their respective owners.
44 //
45 // Redistribution and use in source and binary forms, with or without modification,
46 // are permitted provided that the following conditions are met:
47 //
48 // * Redistribution's of source code must retain the above copyright notice,
49 // this list of conditions and the following disclaimer.
50 //
51 // * Redistribution's in binary form must reproduce the above copyright notice,
52 // this list of conditions and the following disclaimer in the documentation
53 // and/or other materials provided with the distribution.
54 //
55 // * The name of the copyright holders may not be used to endorse or promote products
56 // derived from this software without specific prior written permission.
57 //
58 // This software is provided by the copyright holders and contributors "as is" and
59 // any express or implied warranties, including, but not limited to, the implied
60 // warranties of merchantability and fitness for a particular purpose are disclaimed.
61 // In no event shall the Intel Corporation or contributors be liable for any direct,
62 // indirect, incidental, special, exemplary, or consequential damages
63 // (including, but not limited to, procurement of substitute goods or services;
64 // loss of use, data, or profits; or business interruption) however caused
65 // and on any theory of liability, whether in contract, strict liability,
66 // or tort (including negligence or otherwise) arising in any way out of
67 // the use of this software, even if advised of the possibility of such damage.
68 //
69 //M*/
70 
71 /*
72  * NE10 Library : imgproc/NE10_resize.c
73  */
74 
75 #include "NE10.h"
76 
90 #define INTER_RESIZE_COEF_BITS 11
91 #define INTER_RESIZE_COEF_SCALE (1 << 11)
92 #define NE10_MAX_ESIZE 16
93 
94 static inline ne10_uint32_t ne10_align_size (ne10_int32_t sz, ne10_int32_t n)
95 {
96  return (sz + n - 1) & -n;
97 }
98 
99 static inline ne10_int32_t ne10_floor (ne10_float32_t a)
100 {
101  return ( ( (a) >= 0) ? ( (ne10_int32_t) a) : ( (ne10_int32_t) a - 1));
102 }
103 
104 static inline ne10_int32_t ne10_clip (ne10_int32_t x, ne10_int32_t a, ne10_int32_t b)
105 {
106  return (x >= a ? (x < b ? x : b - 1) : a);
107 }
108 
109 static inline ne10_uint8_t ne10_cast_op (ne10_int32_t val)
110 {
112  ne10_int32_t SHIFT = bits;
113  ne10_int32_t DELTA = 1 << (bits - 1) ;
114  ne10_int32_t temp = NE10_MIN (255, NE10_MAX (0, (val + DELTA) >> SHIFT));
115  return (ne10_uint8_t) (temp);
116 };
117 
118 static void ne10_img_hresize_linear_c (const ne10_uint8_t** src,
119  ne10_int32_t** dst,
120  ne10_int32_t count,
121  const ne10_int32_t* xofs,
122  const ne10_int16_t* alpha,
123  ne10_int32_t swidth,
124  ne10_int32_t dwidth,
125  ne10_int32_t cn,
126  ne10_int32_t xmin,
127  ne10_int32_t xmax)
128 {
129  ne10_int32_t dx, k;
130 
131  ne10_int32_t dx0 = 0;
132 
133  //for (k = 0; k <= count - 2; k++)
134  if (count == 2)
135  {
136  k = 0;
137  const ne10_uint8_t *S0 = src[k], *S1 = src[k + 1];
138  ne10_int32_t *D0 = dst[k], *D1 = dst[k + 1];
139  for (dx = dx0; dx < xmax; dx++)
140  {
141  ne10_int32_t sx = xofs[dx];
142  ne10_int32_t a0 = alpha[dx * 2], a1 = alpha[dx * 2 + 1];
143  ne10_int32_t t0 = S0[sx] * a0 + S0[sx + cn] * a1;
144  ne10_int32_t t1 = S1[sx] * a0 + S1[sx + cn] * a1;
145  D0[dx] = t0;
146  D1[dx] = t1;
147  }
148 
149  for (; dx < dwidth; dx++)
150  {
151  ne10_int32_t sx = xofs[dx];
152  D0[dx] = (ne10_int32_t) S0[sx] * INTER_RESIZE_COEF_SCALE;
153  D1[dx] = (ne10_int32_t) S1[sx] * INTER_RESIZE_COEF_SCALE;
154  }
155  }
156 
157  //for (; k < count; k++)
158  if (count == 1)
159  {
160  k = 0;
161  const ne10_uint8_t *S = src[k];
162  ne10_int32_t *D = dst[k];
163  for (dx = 0; dx < xmax; dx++)
164  {
165  ne10_int32_t sx = xofs[dx];
166  D[dx] = S[sx] * alpha[dx * 2] + S[sx + cn] * alpha[dx * 2 + 1];
167  }
168 
169  for (; dx < dwidth; dx++)
170  D[dx] = (ne10_int32_t) S[xofs[dx]] * INTER_RESIZE_COEF_SCALE;
171  }
172 }
173 
174 
175 static void ne10_img_vresize_linear_c (const ne10_int32_t** src, ne10_uint8_t* dst, const ne10_int16_t* beta, ne10_int32_t width)
176 {
177  ne10_int32_t b0 = beta[0], b1 = beta[1];
178  const ne10_int32_t *S0 = src[0], *S1 = src[1];
179 
180  ne10_int32_t x = 0;
181  for (; x <= width - 4; x += 4)
182  {
183  ne10_int32_t t0, t1;
184  t0 = S0[x] * b0 + S1[x] * b1;
185  t1 = S0[x + 1] * b0 + S1[x + 1] * b1;
186  dst[x] = ne10_cast_op (t0);
187  dst[x + 1] = ne10_cast_op (t1);
188  t0 = S0[x + 2] * b0 + S1[x + 2] * b1;
189  t1 = S0[x + 3] * b0 + S1[x + 3] * b1;
190  dst[x + 2] = ne10_cast_op (t0);
191  dst[x + 3] = ne10_cast_op (t1);
192  }
193 
194  for (; x < width; x++)
195  dst[x] = ne10_cast_op (S0[x] * b0 + S1[x] * b1);
196 }
197 
198 static void ne10_img_resize_generic_linear_c (ne10_uint8_t* src,
199  ne10_uint8_t* dst,
200  const ne10_int32_t* xofs,
201  const ne10_int16_t* _alpha,
202  const ne10_int32_t* yofs,
203  const ne10_int16_t* _beta,
204  ne10_int32_t xmin,
205  ne10_int32_t xmax,
206  ne10_int32_t ksize,
207  ne10_int32_t srcw,
208  ne10_int32_t srch,
209  ne10_int32_t srcstep,
210  ne10_int32_t dstw,
211  ne10_int32_t dsth,
212  ne10_int32_t channels)
213 {
214 
215  const ne10_int16_t* alpha = _alpha;
216  const ne10_int16_t* beta = _beta;
217  ne10_int32_t cn = channels;
218  srcw *= cn;
219  dstw *= cn;
220 
221  ne10_int32_t bufstep = (ne10_int32_t) ne10_align_size (dstw, 16);
222  ne10_int32_t dststep = (ne10_int32_t) ne10_align_size (dstw, 4);
223 
224 
225  ne10_int32_t *buffer_ = (ne10_int32_t*) NE10_MALLOC (bufstep * ksize * sizeof (ne10_int32_t));
226 
227  const ne10_uint8_t* srows[NE10_MAX_ESIZE];
229  ne10_int32_t prev_sy[NE10_MAX_ESIZE];
230  ne10_int32_t k, dy;
231  xmin *= cn;
232  xmax *= cn;
233 
234  for (k = 0; k < ksize; k++)
235  {
236  prev_sy[k] = -1;
237  rows[k] = (ne10_int32_t*) buffer_ + bufstep * k;
238  }
239 
240  // image resize is a separable operation. In case of not too strong
241  for (dy = 0; dy < dsth; dy++, beta += ksize)
242  {
243  ne10_int32_t sy0 = yofs[dy], k, k0 = ksize, k1 = 0, ksize2 = ksize / 2;
244 
245  for (k = 0; k < ksize; k++)
246  {
247  ne10_int32_t sy = ne10_clip (sy0 - ksize2 + 1 + k, 0, srch);
248  for (k1 = NE10_MAX (k1, k); k1 < ksize; k1++)
249  {
250  if (sy == prev_sy[k1]) // if the sy-th row has been computed already, reuse it.
251  {
252  if (k1 > k)
253  memcpy (rows[k], rows[k1], bufstep * sizeof (rows[0][0]));
254  break;
255  }
256  }
257  if (k1 == ksize)
258  k0 = NE10_MIN (k0, k); // remember the first row that needs to be computed
259  srows[k] = (const ne10_uint8_t*) (src + srcstep * sy);
260  prev_sy[k] = sy;
261  }
262 
263  if (k0 < ksize)
264  ne10_img_hresize_linear_c (srows + k0, rows + k0, ksize - k0, xofs, alpha,
265  srcw, dstw, cn, xmin, xmax);
266 
267  ne10_img_vresize_linear_c ( (const ne10_int32_t**) rows, (ne10_uint8_t*) (dst + dststep * dy), beta, dstw);
268  }
269 
270  NE10_FREE (buffer_);
271 }
272 
273 static void ne10_img_resize_cal_offset_linear (ne10_int32_t* xofs,
274  ne10_int16_t* ialpha,
275  ne10_int32_t* yofs,
276  ne10_int16_t* ibeta,
277  ne10_int32_t *xmin,
278  ne10_int32_t *xmax,
279  ne10_int32_t ksize,
280  ne10_int32_t ksize2,
281  ne10_int32_t srcw,
282  ne10_int32_t srch,
283  ne10_int32_t dstw,
284  ne10_int32_t dsth,
285  ne10_int32_t channels)
286 {
287  ne10_float32_t inv_scale_x = (ne10_float32_t) dstw / srcw;
288  ne10_float32_t inv_scale_y = (ne10_float32_t) dsth / srch;
289 
290  ne10_int32_t cn = channels;
291  ne10_float32_t scale_x = 1. / inv_scale_x;
292  ne10_float32_t scale_y = 1. / inv_scale_y;
293  ne10_int32_t k, sx, sy, dx, dy;
294 
295 
296  ne10_float32_t fx, fy;
297 
299 
300  for (dx = 0; dx < dstw; dx++)
301  {
302  fx = (ne10_float32_t) ( (dx + 0.5) * scale_x - 0.5);
303  sx = ne10_floor (fx);
304  fx -= sx;
305 
306  if (sx < ksize2 - 1)
307  {
308  *xmin = dx + 1;
309  if (sx < 0)
310  fx = 0, sx = 0;
311  }
312 
313  if (sx + ksize2 >= srcw)
314  {
315  *xmax = NE10_MIN (*xmax, dx);
316  if (sx >= srcw - 1)
317  fx = 0, sx = srcw - 1;
318  }
319 
320  for (k = 0, sx *= cn; k < cn; k++)
321  xofs[dx * cn + k] = sx + k;
322 
323  cbuf[0] = 1.f - fx;
324  cbuf[1] = fx;
325 
326  for (k = 0; k < ksize; k++)
327  ialpha[dx * cn * ksize + k] = (ne10_int16_t) (cbuf[k] * INTER_RESIZE_COEF_SCALE);
328  for (; k < cn * ksize; k++)
329  ialpha[dx * cn * ksize + k] = ialpha[dx * cn * ksize + k - ksize];
330  }
331 
332  for (dy = 0; dy < dsth; dy++)
333  {
334  fy = (ne10_float32_t) ( (dy + 0.5) * scale_y - 0.5);
335  sy = ne10_floor (fy);
336  fy -= sy;
337 
338  yofs[dy] = sy;
339 
340  cbuf[0] = 1.f - fy;
341  cbuf[1] = fy;
342 
343  for (k = 0; k < ksize; k++)
344  ibeta[dy * ksize + k] = (ne10_int16_t) (cbuf[k] * INTER_RESIZE_COEF_SCALE);
345 
346  }
347 
348 }
349 
351  ne10_int32_t** dst,
352  ne10_int32_t count,
353  const ne10_int32_t* xofs,
354  const ne10_int16_t* alpha,
355  ne10_int32_t swidth,
356  ne10_int32_t dwidth,
357  ne10_int32_t cn,
358  ne10_int32_t xmin,
359  ne10_int32_t xmax);
360 extern void ne10_img_vresize_linear_neon (const ne10_int32_t** src, ne10_uint8_t* dst, const ne10_int16_t* beta, ne10_int32_t width);
361 
362 static void ne10_img_resize_generic_linear_neon (ne10_uint8_t* src,
363  ne10_uint8_t* dst,
364  const ne10_int32_t* xofs,
365  const ne10_int16_t* _alpha,
366  const ne10_int32_t* yofs,
367  const ne10_int16_t* _beta,
368  ne10_int32_t xmin,
369  ne10_int32_t xmax,
370  ne10_int32_t ksize,
371  ne10_int32_t srcw,
372  ne10_int32_t srch,
373  ne10_int32_t srcstep,
374  ne10_int32_t dstw,
375  ne10_int32_t dsth,
376  ne10_int32_t channels)
377 {
378 
379  const ne10_int16_t* alpha = _alpha;
380  const ne10_int16_t* beta = _beta;
381  ne10_int32_t cn = channels;
382  srcw *= cn;
383  dstw *= cn;
384 
385  ne10_int32_t bufstep = (ne10_int32_t) ne10_align_size (dstw, 16);
386  ne10_int32_t dststep = (ne10_int32_t) ne10_align_size (dstw, 4);
387 
388 
389  ne10_int32_t *buffer_ = (ne10_int32_t*) NE10_MALLOC (bufstep * ksize * sizeof (ne10_int32_t));
390 
391  const ne10_uint8_t* srows[NE10_MAX_ESIZE];
393  ne10_int32_t prev_sy[NE10_MAX_ESIZE];
394  ne10_int32_t k, dy;
395  xmin *= cn;
396  xmax *= cn;
397 
398  for (k = 0; k < ksize; k++)
399  {
400  prev_sy[k] = -1;
401  rows[k] = (ne10_int32_t*) buffer_ + bufstep * k;
402  }
403 
404  // image resize is a separable operation. In case of not too strong
405  for (dy = 0; dy < dsth; dy++, beta += ksize)
406  {
407  ne10_int32_t sy0 = yofs[dy], k, k0 = ksize, k1 = 0, ksize2 = ksize / 2;
408 
409  for (k = 0; k < ksize; k++)
410  {
411  ne10_int32_t sy = ne10_clip (sy0 - ksize2 + 1 + k, 0, srch);
412  for (k1 = NE10_MAX (k1, k); k1 < ksize; k1++)
413  {
414  if (sy == prev_sy[k1]) // if the sy-th row has been computed already, reuse it.
415  {
416  if (k1 > k)
417  memcpy (rows[k], rows[k1], bufstep * sizeof (rows[0][0]));
418  break;
419  }
420  }
421  if (k1 == ksize)
422  k0 = NE10_MIN (k0, k); // remember the first row that needs to be computed
423  srows[k] = (const ne10_uint8_t*) (src + srcstep * sy);
424  prev_sy[k] = sy;
425  }
426 
427  if (k0 < ksize)
428  {
429  if (cn == 4)
430  ne10_img_hresize_4channels_linear_neon (srows + k0, rows + k0, ksize - k0, xofs, alpha,
431  srcw, dstw, cn, xmin, xmax);
432  else
433  ne10_img_hresize_linear_c (srows + k0, rows + k0, ksize - k0, xofs, alpha,
434  srcw, dstw, cn, xmin, xmax);
435  }
436  ne10_img_vresize_linear_neon ( (const ne10_int32_t**) rows, (ne10_uint8_t*) (dst + dststep * dy), beta, dstw);
437  }
438 
439  NE10_FREE (buffer_);
440 }
441 
447  ne10_uint32_t dst_width,
448  ne10_uint32_t dst_height,
449  ne10_uint8_t* src,
450  ne10_uint32_t src_width,
451  ne10_uint32_t src_height,
452  ne10_uint32_t src_stride)
453 {
454  ne10_int32_t dstw = dst_width;
455  ne10_int32_t dsth = dst_height;
456  ne10_int32_t srcw = src_width;
457  ne10_int32_t srch = src_height;
458 
459  ne10_int32_t cn = 4;
460 
461 
462  ne10_int32_t xmin = 0;
463  ne10_int32_t xmax = dstw;
464  ne10_int32_t width = dstw * cn;
465 
466  ne10_int32_t ksize = 0, ksize2;
467  ksize = 2;
468  ksize2 = ksize / 2;
469 
470  ne10_uint8_t *buffer_ = (ne10_uint8_t*) NE10_MALLOC ( (width + dsth) * (sizeof (ne10_int32_t) + sizeof (ne10_float32_t) * ksize));
471 
472  ne10_int32_t* xofs = (ne10_int32_t*) buffer_;
473  ne10_int32_t* yofs = xofs + width;
474  ne10_int16_t* ialpha = (ne10_int16_t*) (yofs + dsth);
475  ne10_int16_t* ibeta = ialpha + width * ksize;
476 
477  ne10_img_resize_cal_offset_linear (xofs, ialpha, yofs, ibeta, &xmin, &xmax, ksize, ksize2, srcw, srch, dstw, dsth, cn);
478 
479  ne10_img_resize_generic_linear_c (src, dst, xofs, ialpha, yofs, ibeta, xmin, xmax, ksize, srcw, srch, src_stride, dstw, dsth, cn);
480  NE10_FREE (buffer_);
481 }
482 
488  ne10_uint32_t dst_width,
489  ne10_uint32_t dst_height,
490  ne10_uint8_t* src,
491  ne10_uint32_t src_width,
492  ne10_uint32_t src_height,
493  ne10_uint32_t src_stride)
494 {
495  ne10_int32_t dstw = dst_width;
496  ne10_int32_t dsth = dst_height;
497  ne10_int32_t srcw = src_width;
498  ne10_int32_t srch = src_height;
499 
500  ne10_int32_t cn = 4;
501 
502 
503  ne10_int32_t xmin = 0;
504  ne10_int32_t xmax = dstw;
505  ne10_int32_t width = dstw * cn;
506 
507  ne10_int32_t ksize = 0, ksize2;
508  ksize = 2;
509  ksize2 = ksize / 2;
510 
511  ne10_uint8_t *buffer_ = (ne10_uint8_t*) NE10_MALLOC ( (width + dsth) * (sizeof (ne10_int32_t) + sizeof (ne10_float32_t) * ksize));
512 
513  ne10_int32_t* xofs = (ne10_int32_t*) buffer_;
514  ne10_int32_t* yofs = xofs + width;
515  ne10_int16_t* ialpha = (ne10_int16_t*) (yofs + dsth);
516  ne10_int16_t* ibeta = ialpha + width * ksize;
517 
518  ne10_img_resize_cal_offset_linear (xofs, ialpha, yofs, ibeta, &xmin, &xmax, ksize, ksize2, srcw, srch, dstw, dsth, cn);
519 
520  ne10_img_resize_generic_linear_neon (src, dst, xofs, ialpha, yofs, ibeta, xmin, xmax, ksize, srcw, srch, src_stride, dstw, dsth, cn);
521  NE10_FREE (buffer_);
522 }
523 
void ne10_img_resize_bilinear_rgba_c(ne10_uint8_t *dst, ne10_uint32_t dst_width, ne10_uint32_t dst_height, ne10_uint8_t *src, ne10_uint32_t src_width, ne10_uint32_t src_height, ne10_uint32_t src_stride)
Specific implementation of ne10_img_resize_bilinear_rgba using plain C.
Definition: NE10_resize.c:446
uint8_t ne10_uint8_t
Definition: NE10_types.h:73
int32_t ne10_int32_t
Definition: NE10_types.h:76
void ne10_img_resize_bilinear_rgba_neon(ne10_uint8_t *dst, ne10_uint32_t dst_width, ne10_uint32_t dst_height, ne10_uint8_t *src, ne10_uint32_t src_width, ne10_uint32_t src_height, ne10_uint32_t src_stride)
Specific implementation of ne10_img_resize_bilinear_rgba using NEON SIMD capabilities.
Definition: NE10_resize.c:487
#define NE10_MIN(a, b)
Definition: NE10_macros.h:60
#define INTER_RESIZE_COEF_BITS
Definition: NE10_resize.c:90
float ne10_float32_t
Definition: NE10_types.h:80
void ne10_img_hresize_4channels_linear_neon(const ne10_uint8_t **src, ne10_int32_t **dst, ne10_int32_t count, const ne10_int32_t *xofs, const ne10_int16_t *alpha, ne10_int32_t swidth, ne10_int32_t dwidth, ne10_int32_t cn, ne10_int32_t xmin, ne10_int32_t xmax)
#define INTER_RESIZE_COEF_SCALE
Definition: NE10_resize.c:91
void ne10_img_vresize_linear_neon(const ne10_int32_t **src, ne10_uint8_t *dst, const ne10_int16_t *beta, ne10_int32_t width)
uint32_t ne10_uint32_t
Definition: NE10_types.h:77
#define NE10_FREE(p)
Definition: NE10_macros.h:54
#define DELTA
#define NE10_MALLOC
Definition: NE10_macros.h:53
#define D1
#define NE10_MAX(a, b)
Definition: NE10_macros.h:61
int16_t ne10_int16_t
Definition: NE10_types.h:74
#define NE10_MAX_ESIZE
Definition: NE10_resize.c:92