Project Ne10
An open, optimized software library for the ARM architecture.
test_suite_boxfilter.c
Go to the documentation of this file.
1 /*
2  * Copyright 2013-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : test_suite_boxfilter.c
30  */
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <math.h>
35 #include <string.h>
36 
37 #include "NE10_imgproc.h"
38 #include "seatest.h"
39 #include "unit_test_common.h"
40 
41 #define BASIC_KERNEL_SIZE 5
42 #define KERNEL_COUNT BASIC_KERNEL_SIZE * BASIC_KERNEL_SIZE
43 
45  const ne10_uint8_t *pTest,
46  const ne10_uint32_t buffSize)
47 {
48  ne10_float64_t mse = 0.0, max = 255.0;
49  ne10_uint32_t i;
50  ne10_float32_t psnr_value;
51 
52  for (i = 0; i < buffSize; i++)
53  {
54  mse += (pRef[i] - pTest[i]) * (pRef[i] - pTest[i]);
55  }
56  mse = mse / buffSize / 4;
57  psnr_value = 10 * log10 (max * max / mse);
58  return psnr_value;
59 }
60 
61 int rand_range (int min, int max)
62 {
63  int diff = max - min;
64  return (int) ( ( (double) (diff + 1) / RAND_MAX) * rand() + min);
65 }
66 
67 int valid_kernels (ne10_size_t *kernels, int size)
68 {
69  int i;
70  for (i = 0; i < size; i++)
71  {
72  if (kernels[i].x < 1 || kernels[i].y < 1)
73  return NE10_ERR;
74  }
75  return NE10_OK;
76 }
77 
78 /*
79  * this function check whether there is big difference between image1
80  * and image2. Here we employ 2 kinds of check: diff() and
81  * cal_psnr_uint8_rgba_color()
82  */
83 int valid_result (const ne10_uint8_t *image1,
84  const ne10_uint8_t *image2,
85  ne10_size_t src_sz,
86  ne10_int32_t src_stride,
87  ne10_int32_t channel)
88 {
89  assert ((image1 != 0) && (image2 != 0));
90  assert ((src_sz.x != 0) && (src_sz.y != 0)
91  && (src_stride != 0) && (channel != 0));
92 
93  ne10_int32_t *diff_mat = (ne10_int32_t *) malloc (sizeof (ne10_int32_t)
94  * channel
95  * src_sz.x
96  * src_sz.y);
97  ne10_int32_t diff_mat_stride = sizeof (ne10_int32_t) * channel * src_sz.x;
98 
99  if (diff_mat == 0)
100  {
101  printf ("**ERROR**: allocating %d bytes memory for kernels fails!",
102  sizeof (ne10_int32_t)
103  * src_sz.x
104  * src_sz.y
105  * channel);
106  return NE10_ERR;
107  }
108 
109  diff (image1,
110  image2,
111  diff_mat,
112  diff_mat_stride,
113  src_sz.x,
114  src_sz.y,
115  src_stride,
116  4);
117 
118  ne10_int32_t diff_nu = diff_count ( (const ne10_int32_t *) diff_mat,
119  src_sz.x,
120  src_sz.y,
121  diff_mat_stride,
122  4);
123  free (diff_mat);
124 
125  ne10_float32_t psnr_value = cal_psnr_uint8_rgba (image1,
126  image2,
127  src_sz.x
128  * src_sz.y
129  * channel);
130  if (diff_nu != 0 && psnr_value < PSNR_THRESHOLD)
131  {
132  printf ("\ndifferent point is:%d\t PSNR value is:%f\n",
133  diff_nu, psnr_value);
134  return NE10_ERR;
135  }
136  else
137  {
138  return NE10_OK;
139  }
140 }
141 
142 void boxfilter_get_kernels (size_t max_kernel_length,
143  ne10_size_t **kernels_ptr,
144  int *size)
145 {
146 
147  if (max_kernel_length > BASIC_KERNEL_SIZE)
148  {
149  *size = KERNEL_COUNT + 3;
150  }
151  else if (max_kernel_length < BASIC_KERNEL_SIZE)
152  {
153  *size = max_kernel_length * max_kernel_length;
154  }
155  else
156  {
157  *size = KERNEL_COUNT;
158  }
159 
160  *kernels_ptr = (ne10_size_t *) malloc (sizeof (ne10_size_t) * (*size));
161  if (*kernels_ptr == 0)
162  {
163  printf ("**ERROR**: allocating %d bytes memory for kernels fails!\n",
164  sizeof (ne10_size_t) * (*size));
165  }
166 
167  int x, y, first_part_size;
168 
169  if (max_kernel_length < BASIC_KERNEL_SIZE)
170  {
171  first_part_size = max_kernel_length;
172  }
173  else
174  {
175  first_part_size = BASIC_KERNEL_SIZE;
176  }
177 
178  for (x = 0; x < first_part_size; x++)
179  {
180  for (y = 0; y < first_part_size; y++)
181  {
182  (*kernels_ptr) [x * first_part_size + y].x = x + 1;
183  (*kernels_ptr) [x * first_part_size + y].y = y + 1;
184  }
185  }
186  /* add:
187  * max_kernel_length x 1
188  * 1 x max_kernel_length
189  * max_kernel_length x max_kernel_length
190  * to kernels.
191  */
192  if (max_kernel_length > BASIC_KERNEL_SIZE)
193  {
194  (*kernels_ptr) [*size - 3].x = max_kernel_length;
195  (*kernels_ptr) [*size - 3].y = 1;
196  (*kernels_ptr) [*size - 2].x = 1;
197  (*kernels_ptr) [*size - 2].y = max_kernel_length;
198  (*kernels_ptr) [*size - 1].x = max_kernel_length;
199  (*kernels_ptr) [*size - 1].y = max_kernel_length;
200  }
201 
202  assert (valid_kernels (*kernels_ptr, *size) == NE10_OK);
203 }
204 
206 {
207  assert ( (src_sz.x != 0) || (src_sz.y != 0));
208 
209  int size = sizeof (ne10_uint8_t) * src_sz.x * src_sz.y * 4;
210 
211  *img = (ne10_uint8_t *) NE10_MALLOC (sizeof (ne10_uint8_t) *
212  src_sz.x *
213  src_sz.y * 4);
214  int i;
215  for (i = 0; i < size; i++)
216  {
217  * (*img + i) = rand_range (0, 255);
218  }
219 
220  assert (*img != NULL);
221 }
222 
224 {
225  assert ( (src_sz.x != 0) || (src_sz.y != 0));
226 
227  printf ("\ntest boxfilter on image with size:%d x %d:\n",
228  src_sz.x, src_sz.y);
229 
230  int max_kernel_length = src_sz.x < src_sz.y ?
231  src_sz.x : src_sz.y;
232  max_kernel_length = max_kernel_length < ( (1 << 7) - 1) ?
233  max_kernel_length : ( (1 << 7) - 1);
234 
235  ne10_size_t *kernels;
236  int kernels_size;
237  boxfilter_get_kernels (max_kernel_length, &kernels, &kernels_size);
238 
239  ne10_uint8_t *src, *neon_dst, *c_dst;
240  create_rgba8888_image (&src, src_sz);
241  create_rgba8888_image (&neon_dst, src_sz);
242  create_rgba8888_image (&c_dst, src_sz);
243  ne10_int32_t stride = src_sz.x * 4 * sizeof (ne10_uint8_t);
244 
245  int i;
246  for (i = 0; i < kernels_size; i++)
247  {
248  printf ("test kernel size(%d x %d):",
249  kernels[i].x, kernels[i].y);
250  //use ne10 neon version
252  neon_dst,
253  src_sz,
254  stride,
255  stride,
256  kernels[i]);
257  //use ne10 c version
259  c_dst,
260  src_sz,
261  stride,
262  stride,
263  kernels[i]);
264  assert_true (valid_result (c_dst,
265  neon_dst,
266  src_sz,
267  stride,
268  4) == NE10_OK);
269  printf (" OK.\n");
270  }
271 
272  free (kernels);
273  free (src);
274  free (c_dst);
275  free (neon_dst);
276  return NE10_OK;
277 }
278 
280  ne10_size_t kernel_size,
281  long int *neon_ticks,
282  long int *c_ticks)
283 {
284  int run_loop = 10;
285  int i;
286  ne10_uint8_t *src, *neon_dst, *c_dst;
287  create_rgba8888_image (&src, img_size);
288  create_rgba8888_image (&neon_dst, img_size);
289  create_rgba8888_image (&c_dst, img_size);
290  ne10_int32_t stride = img_size.x * 4 * sizeof (ne10_uint8_t);
291 
292  long int ticks;
293  /* boxfilter c version, run multiple times to get average time */
294  for (i = 0; i < run_loop; i++)
295  {
296  GET_TIME (ticks,
298  c_dst,
299  img_size,
300  stride,
301  stride,
302  kernel_size););
303  ticks += ticks;
304  }
305  *c_ticks = ticks / run_loop;
306 
307  /* boxfilter c version, run multiple times to get average time */
308  for (i = 0; i < run_loop; i++)
309  {
310  GET_TIME (ticks,
312  c_dst,
313  img_size,
314  stride,
315  stride,
316  kernel_size););
317  ticks += ticks;
318  }
319  *neon_ticks = ticks / run_loop;
320 }
321 
322 void test_boxfilter_performance_case()
323 {
324  ne10_size_t img_sizes[] = {{240, 320}, {480, 320}, {960, 1280},
325  {1200, 1600}, {2000, 2000}
326  };
327  ne10_size_t kernel_sizes[] = {{3, 3}, {5, 5}, {7, 7}, {9, 9}};
328 
329  int i, j, n_img, n_kernel;
330  n_img = sizeof (img_sizes) / sizeof (img_sizes[0]);
331  n_kernel = sizeof (kernel_sizes) / sizeof (kernel_sizes[0]);
332  long int neon_ticks, c_ticks;
333 
334  char info[100];
335  for (i = 0; i < n_img; i++)
336  {
337  for (j = 0; j < n_kernel; j++)
338  {
339  boxfilter_performance_test (img_sizes[i],
340  kernel_sizes[j],
341  &neon_ticks,
342  &c_ticks);
343  sprintf (info,
344  "name:box filter\n"
345  "image size:%dx%d\n"
346  "kernel size:%dx%d",
347  img_sizes[i].x, img_sizes[i].y,
348  kernel_sizes[j].x, kernel_sizes[j].y);
349 
350  ne10_performance_print (UBUNTU_COMMAND_LINE,
351  neon_ticks,
352  c_ticks,
353  info);
354  }
355  }
356 }
357 
358 void test_boxfilter_smoke_case()
359 {
360  ne10_size_t img_sizes[] = {{1, 1}, {2, 2}, {8, 3}, {10, 19},
361  {240, 320}
362  };
363  int n = sizeof (img_sizes) / sizeof (img_sizes[0]);
364  int i;
365  for (i = 0; i < n; i++)
366  {
367  boxfilter_conformance_test (img_sizes[i]);
368  //progress_bar((float)(i + 1) / n);
369  }
370 }
371 
372 void test_boxfilter_regression_case()
373 {
374  ne10_size_t img_sizes[] = {{1, 1}, {2, 2}, {8, 3}, {10, 19},
375  {239, 319}, {240, 320}, {480, 640},
376  {969, 1280}, {1200, 1600}
377  };
378  int n = sizeof (img_sizes) / sizeof (img_sizes[0]);
379  int i;
380  for (i = 0; i < n; i++)
381  {
382  boxfilter_conformance_test (img_sizes[i]);
383  }
384 }
385 
386 void test_boxfilter()
387 {
388 #if defined (SMOKE_TEST)
389  test_boxfilter_smoke_case();
390 #endif
391 
392 #if defined (REGRESSION_TEST)
393  test_boxfilter_regression_case();
394 #endif
395 
396 #if defined PERFORMANCE_TEST
397  test_boxfilter_performance_case();
398 #endif
399 }
400 
401 static void my_test_setup (void)
402 {
403  ne10_log_buffer_ptr = ne10_log_buffer;
404 }
405 
406 void test_fixture_boxfilter (void)
407 {
408  test_fixture_start();
409 
410  fixture_setup (my_test_setup);
411 
412  run_test (test_boxfilter);
413 
414  test_fixture_end();
415 }
uint8_t ne10_uint8_t
Definition: NE10_types.h:73
#define KERNEL_COUNT
ne10_uint32_t y
Definition: NE10_types.h:440
int32_t ne10_int32_t
Definition: NE10_types.h:76
void my_test_setup(void)
float ne10_float32_t
Definition: NE10_types.h:80
void ne10_img_boxfilter_rgba8888_neon(const ne10_uint8_t *src, ne10_uint8_t *dst, ne10_size_t src_size, ne10_int32_t src_stride, ne10_int32_t dst_stride, ne10_size_t kernel_size)
Specific implementation of ne10_img_boxfilter_rgba8888 using NEON SIMD capabilities.
void boxfilter_performance_test(ne10_size_t img_size, ne10_size_t kernel_size, long int *neon_ticks, long int *c_ticks)
ne10_uint32_t x
Definition: NE10_types.h:439
uint32_t ne10_uint32_t
Definition: NE10_types.h:77
void ne10_img_boxfilter_rgba8888_c(const ne10_uint8_t *src, ne10_uint8_t *dst, ne10_size_t src_size, ne10_int32_t src_stride, ne10_int32_t dst_stride, ne10_size_t kernel_size)
Specific implementation of ne10_img_boxfilter_rgba8888 using plain C.
void create_rgba8888_image(ne10_uint8_t **img, ne10_size_t src_sz)
int boxfilter_conformance_test(ne10_size_t src_sz)
int rand_range(int min, int max)
void boxfilter_get_kernels(size_t max_kernel_length, ne10_size_t **kernels_ptr, int *size)
int valid_kernels(ne10_size_t *kernels, int size)
#define BASIC_KERNEL_SIZE
ne10_float32_t cal_psnr_uint8_rgba(const ne10_uint8_t *pRef, const ne10_uint8_t *pTest, const ne10_uint32_t buffSize)
#define NE10_MALLOC
Definition: NE10_macros.h:53
#define NE10_ERR
Definition: NE10_types.h:66
int valid_result(const ne10_uint8_t *image1, const ne10_uint8_t *image2, ne10_size_t src_sz, ne10_int32_t src_stride, ne10_int32_t channel)
double ne10_float64_t
Definition: NE10_types.h:81
#define NE10_OK
Definition: NE10_types.h:65