Project Ne10
An open, optimized software library for the ARM architecture.
test_suite_fir_interpolate.c
Go to the documentation of this file.
1 /*
2  * Copyright 2012-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : test_suite_fir_interpolate.c
30  */
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <math.h>
35 
36 #include "NE10_dsp.h"
37 #include "seatest.h"
38 
39 
40 /* ----------------------------------------------------------------------
41 ** Global defines
42 ** ------------------------------------------------------------------- */
43 
44 /* Max FFT Length 1024 and double buffer for real and imag */
45 #define TEST_LENGTH_SAMPLES 480
46 #define MAX_BLOCKSIZE 320
47 #define MAX_NUMTAPS 100
48 
49 #define TEST_COUNT 5000
50 
51 //input and output
52 static ne10_float32_t * guarded_in_c = NULL;
53 static ne10_float32_t * guarded_in_neon = NULL;
54 static ne10_float32_t * in_c = NULL;
55 static ne10_float32_t * in_neon = NULL;
56 
57 static ne10_float32_t * guarded_out_c = NULL;
58 static ne10_float32_t * guarded_out_neon = NULL;
59 static ne10_float32_t * out_c = NULL;
60 static ne10_float32_t * out_neon = NULL;
61 
62 static ne10_float32_t * guarded_fir_state_c = NULL;
63 static ne10_float32_t * guarded_fir_state_neon = NULL;
64 static ne10_float32_t * fir_state_c = NULL;
65 static ne10_float32_t * fir_state_neon = NULL;
66 
67 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
68 static ne10_float32_t snr = 0.0f;
69 #endif
70 #ifdef PERFORMANCE_TEST
71 static ne10_int64_t time_c = 0;
72 static ne10_int64_t time_neon = 0;
73 static ne10_float32_t time_speedup = 0.0f;
74 static ne10_float32_t time_savings = 0.0f;
75 #endif
76 
77 /* ----------------------------------------------------------------------
78 * Coefficients for 32-tap filter for F32
79 * ------------------------------------------------------------------- */
80 
81 static ne10_float32_t testCoeffs32_f32[32] =
82 {
83  0.068186, 0.064344, -0.162450, 0.057015, 0.029743, 0.010066, 0.047792, 0.021273,
84  -0.096447, -0.211652, -0.086613, 0.057501, -0.187605, -0.167199, -0.026983, -0.025464,
85  -0.061495, 0.110914, -0.081973, -0.055231, -0.074430, -0.196536, 0.016845, -0.096493,
86  0.039625, -0.110273, -0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440
87 };
88 
89 /* ----------------------------------------------------------------------
90 * Coefficients for 8-tap filter for F32
91 * ------------------------------------------------------------------- */
92 
93 static ne10_float32_t testCoeffs8_f32[8] =
94 {
95  0.039625, -0.110273, -0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440
96 };
97 
98 /* ----------------------------------------------------------------------
99 ** Coefficients for 1-tap filter for F32
100 ** ------------------------------------------------------------------- */
101 
102 // static ne10_float32_t testCoeffs1_f32 = 0.086440;
103 
104 /* ----------------------------------------------------------------------
105 ** Coefficients for 27-tap filter for F32
106 ** ------------------------------------------------------------------- */
107 static ne10_float32_t testCoeffs27_f32[27] =
108 {
109  0.010066, 0.047792, 0.021273, -0.096447, -0.211652, -0.086613, 0.057501, -0.187605,
110  -0.167199, -0.026983, -0.025464, -0.061495, 0.110914, -0.081973, -0.055231, -0.074430,
111  -0.196536, 0.016845, -0.096493, 0.039625, -0.110273, -0.042966, -0.043804, 0.087350,
112  -0.085191, 0.009420, 0.086440
113 };
114 
115 static ne10_float32_t testCoeffs6_f32[6] =
116 {
117  -0.042966, -0.043804, 0.087350, -0.085191, 0.009420, 0.086440
118 };
119 
120 /* ----------------------------------------------------------------------
121 ** Test input data for F32
122 ** Generated by the MATLAB rand() function
123 ** ------------------------------------------------------------------- */
124 
125 static ne10_float32_t testInput_f32[80] =
126 {
127  -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633,
128  0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931,
129  1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562,
130  -0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997,
131  0.815622, 0.711908, 1.290250, 0.668601, 1.190838, -1.202457, -0.019790, -0.156717,
132  -1.604086, 0.257304, -1.056473, 1.415141, -0.805090, 0.528743, 0.219321, -0.921902,
133  -2.170674, -0.059188, -1.010634, 0.614463, 0.507741, 1.692430, 0.591283, -0.643595,
134  0.380337, -1.009116, -0.019511, -0.048221, 0.000043, -0.317859, 1.095004, -1.873990,
135  0.428183, 0.895638, 0.730957, 0.577857, 0.040314, 0.677089, 0.568900, -0.255645,
136  -0.377469, -0.295887, -1.475135, -0.234004, 0.118445, 0.314809, 1.443508, -0.350975
137 };
138 
139 /* ----------------------------------------------------------------------
140 ** Defines each of the tests performed
141 ** ------------------------------------------------------------------- */
142 typedef struct
143 {
144  ne10_uint32_t blockSize;
145  ne10_uint32_t numTaps;
146  ne10_uint32_t D;
147  ne10_uint32_t numFrames;
148  ne10_float32_t *coeffsF32;
149  ne10_float32_t *inputF32;
150 } test_config;
151 
152 /* All Test configurations, 100% Code Coverage */
153 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
154 static test_config CONFIG[] = {{0, 1, 1, 10, &testCoeffs6_f32[0], &testInput_f32[0]},
155  {8, 6, 6, 10, &testCoeffs6_f32[0], &testInput_f32[0]},
156  {8, 8, 2, 10, &testCoeffs8_f32[0], &testInput_f32[0]},
157  {8, 27, 4, 10, &testCoeffs27_f32[0], &testInput_f32[0]},
158  {8, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]},
159  {80, 6, 6, 1, &testCoeffs6_f32[0], &testInput_f32[0]},
160  {80, 8, 2, 1, &testCoeffs8_f32[0], &testInput_f32[0]},
161  {80, 27, 4, 1, &testCoeffs27_f32[0], &testInput_f32[0]},
162  {80, 32, 4, 1, &testCoeffs32_f32[0], &testInput_f32[0]}
163 };
164 #define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) )
165 #endif
166 #ifdef PERFORMANCE_TEST
167 static test_config CONFIG_PERF[] =
168 {
169  {8, 27, 3, 10, &testCoeffs27_f32[0], &testInput_f32[0]},
170  {8, 32, 4, 10, &testCoeffs32_f32[0], &testInput_f32[0]},
171  {80, 27, 3, 1, &testCoeffs27_f32[0], &testInput_f32[0]},
172  {80, 32, 4, 1, &testCoeffs32_f32[0], &testInput_f32[0]}
173 };
174 #define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) )
175 #endif
176 
177 
178 void test_fir_interpolate_case0()
179 {
181 
182  ne10_uint16_t loop = 0;
183  ne10_uint16_t block = 0;
184  ne10_uint16_t i = 0;
185  ne10_uint16_t length = 0;
186 
187  test_config *config;
188  ne10_result_t status_c = NE10_OK;
189  ne10_result_t status_neon = NE10_OK;
190 
191  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
192 
193  /* init input memory */
194  NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
195  NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
196 
197  /* init dst memory */
198  NE10_DST_ALLOC (out_c, guarded_out_c, TEST_LENGTH_SAMPLES);
199  NE10_DST_ALLOC (out_neon, guarded_out_neon, TEST_LENGTH_SAMPLES);
200 
201  /* init state memory */
202  NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_NUMTAPS + MAX_BLOCKSIZE);
203  NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_NUMTAPS + MAX_BLOCKSIZE);
204 
205 #ifdef ENABLE_NE10_FIR_INTERPOLATE_FLOAT_NEON
206 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
207  ne10_uint16_t pos = 0;
208  for (loop = 0; loop < NUM_TESTS; loop++)
209  {
210  config = &CONFIG[loop];
211  length = config->numFrames * config->blockSize * config->D;
212 
213  /* Initialize the CFFT/CIFFT module */
214  status_c = ne10_fir_interpolate_init_float (&SC, config->D, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize);
215  status_neon = ne10_fir_interpolate_init_float (&SN, config->D, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize);
216 
217  if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR)))
218  {
219  if (config->numTaps == 27)
220  {
221  fprintf (stdout, "length of input data is wrong!\n");
222  continue;
223  }
224  else
225  {
226  fprintf (stdout, "initialization error\n");
227  }
228  }
229  /* copy input to input buffer */
230  for (i = 0; i < 80; i++)
231  {
232  in_c[i] = testInput_f32[i];
233  in_neon[i] = testInput_f32[i];
234  }
235 
236  GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES);
237  GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES);
238 
239  for (block = 0; block < config->numFrames; block++)
240  {
241  ne10_fir_interpolate_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize * config->D), config->blockSize);
242  }
243  for (block = 0; block < config->numFrames; block++)
244  {
245  ne10_fir_interpolate_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize * config->D), config->blockSize);
246  }
247 
248  assert_true (CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES));
249  assert_true (CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES));
250 
251  //conformance test 1: compare snr
252  snr = CAL_SNR_FLOAT32 (out_c, out_neon, length);
253  assert_false ( (snr < SNR_THRESHOLD));
254 
255  //conformance test 2: compare output of C and neon
256 #if defined (DEBUG_TRACE)
257  printf ("--------------------config %d\n", loop);
258  printf ("snr %f\n", snr);
259 #endif
260  for (pos = 0; pos < length; pos++)
261  {
262 #if defined (DEBUG_TRACE)
263  printf ("pos %d \n", pos);
264  printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
265 #endif
266  assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_SMALL, 1);
267  }
268 
269  }
270 #endif
271 #endif // ENABLE_NE10_FIR_INTERPOLATE_FLOAT_NEON
272 
273 #ifdef PERFORMANCE_TEST
274  ne10_uint16_t k;
275  fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio");
276  for (loop = 0; loop < NUM_PERF_TESTS; loop++)
277  {
278  config = &CONFIG_PERF[loop];
279  length = config->numFrames * config->blockSize * config->D;
280 
281  /* Initialize the CFFT/CIFFT module */
282  status_c = ne10_fir_interpolate_init_float (&SC, config->D, config->numTaps, config->coeffsF32, fir_state_c, config->blockSize);
283  status_neon = ne10_fir_interpolate_init_float (&SN, config->D, config->numTaps, config->coeffsF32, fir_state_neon, config->blockSize);
284 
285  if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR)))
286  {
287  if (config->numTaps == 27)
288  {
289  fprintf (stdout, "length of input data is wrong!\n");
290  continue;
291  }
292  else
293  {
294  fprintf (stdout, "initialization error\n");
295  }
296  }
297 
298  /* copy input to input buffer */
299  for (i = 0; i < 80; i++)
300  {
301  in_c[i] = testInput_f32[i];
302  in_neon[i] = testInput_f32[i];
303  }
304 
305  GET_TIME
306  (
307  time_c,
308  {
309  for (k = 0; k < TEST_COUNT; k++)
310  {
311  for (block = 0; block < config->numFrames; block++)
312  {
313  ne10_fir_interpolate_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize * config->D), config->blockSize);
314  }
315  }
316  }
317  );
318 #ifdef ENABLE_NE10_FIR_INTERPOLATE_FLOAT_NEON
319  GET_TIME
320  (
321  time_neon,
322  {
323  for (k = 0; k < TEST_COUNT; k++)
324  {
325  for (block = 0; block < config->numFrames; block++)
326  {
327  ne10_fir_interpolate_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize * config->D), config->blockSize);
328  }
329  }
330  }
331  );
332 #endif // ENABLE_NE10_FIR_INTERPOLATE_FLOAT_NEON
333 
334  time_speedup = (ne10_float32_t) time_c / time_neon;
335  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
336  ne10_log (__FUNCTION__, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->numTaps, time_c, time_neon, time_savings, time_speedup);
337  }
338 #endif
339 
340  free (guarded_in_c);
341  free (guarded_in_neon);
342  free (guarded_out_c);
343  free (guarded_out_neon);
344  free (guarded_fir_state_c);
345  free (guarded_fir_state_neon);
346  fprintf (stdout, "----------%30s end\n", __FUNCTION__);
347 }
348 
349 void test_fir_interpolate()
350 {
351  test_fir_interpolate_case0();
352 }
353 
354 static void my_test_setup (void)
355 {
356  ne10_log_buffer_ptr = ne10_log_buffer;
357 }
358 
359 void test_fixture_fir_interpolate (void)
360 {
361  test_fixture_start(); // starts a fixture
362 
363  fixture_setup (my_test_setup);
364 
365  run_test (test_fir_interpolate); // run tests
366 
367  test_fixture_end(); // ends a fixture
368 }
#define TEST_LENGTH_SAMPLES
#define TEST_COUNT
ne10_result_t ne10_fir_interpolate_init_float(ne10_fir_interpolate_instance_f32_t *S, ne10_uint8_t L, ne10_uint16_t numTaps, ne10_float32_t *pCoeffs, ne10_float32_t *pState, ne10_uint32_t blockSize)
Initialization function for the floating-point FIR interpolator.
void ne10_fir_interpolate_float_neon(const ne10_fir_interpolate_instance_f32_t *S, ne10_float32_t *pSrc, ne10_float32_t *pDst, ne10_uint32_t blockSize) asm("ne10_fir_interpolate_float_neon")
Specific implementation of ne10_fir_interpolate_float using NEON SIMD capabilities.
void my_test_setup(void)
float ne10_float32_t
Definition: NE10_types.h:80
int64_t ne10_int64_t
Definition: NE10_types.h:78
uint16_t ne10_uint16_t
Definition: NE10_types.h:75
uint32_t ne10_uint32_t
Definition: NE10_types.h:77
Instance structure for the floating-point FIR Interpolation.
Definition: NE10_types.h:392
#define MAX_BLOCKSIZE
void ne10_fir_interpolate_float_c(const ne10_fir_interpolate_instance_f32_t *S, ne10_float32_t *pSrc, ne10_float32_t *pDst, ne10_uint32_t blockSize)
Specific implementation of ne10_fir_interpolate_float using plain C.
Definition: NE10_fir.c:679
#define MAX_NUMTAPS
#define NE10_ERR
Definition: NE10_types.h:66
#define NE10_OK
Definition: NE10_types.h:65
int ne10_result_t
Definition: NE10_types.h:82