Project Ne10
An open, optimized software library for the ARM architecture.
test_suite_fir_sparse.c
Go to the documentation of this file.
1 /*
2  * Copyright 2012-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : test_suite_fir_sparse.c
30  */
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <math.h>
35 
36 #include "NE10_dsp.h"
37 #include "seatest.h"
38 
39 
40 /* ----------------------------------------------------------------------
41 ** Global defines
42 ** ------------------------------------------------------------------- */
43 
44 /* Max FFT Length 1024 and double buffer for real and imag */
45 #define TEST_LENGTH_SAMPLES 320
46 #define MAX_BLOCKSIZE 320
47 #define MAX_NUMTAPS 100
48 #define MAX_DELAY 500
49 
50 #define TEST_COUNT 5000
51 
52 //input and output
53 static ne10_float32_t * guarded_in_c = NULL;
54 static ne10_float32_t * guarded_in_neon = NULL;
55 static ne10_float32_t * in_c = NULL;
56 static ne10_float32_t * in_neon = NULL;
57 
58 static ne10_float32_t * guarded_out_c = NULL;
59 static ne10_float32_t * guarded_out_neon = NULL;
60 static ne10_float32_t * out_c = NULL;
61 static ne10_float32_t * out_neon = NULL;
62 
63 static ne10_float32_t * guarded_fir_state_c = NULL;
64 static ne10_float32_t * guarded_fir_state_neon = NULL;
65 static ne10_float32_t * fir_state_c = NULL;
66 static ne10_float32_t * fir_state_neon = NULL;
67 
68 static ne10_float32_t scratch_c[MAX_BLOCKSIZE] = {0};
69 static ne10_float32_t scratch_neon[MAX_BLOCKSIZE] = {0};
70 
71 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
72 static ne10_float32_t snr = 0.0f;
73 #endif
74 #ifdef PERFORMANCE_TEST
75 static ne10_int64_t time_c = 0;
76 static ne10_int64_t time_neon = 0;
77 static ne10_float32_t time_speedup = 0.0f;
78 static ne10_float32_t time_savings = 0.0f;
79 #endif
80 
81 /* ----------------------------------------------------------------------
82 ** Coefficients for 5-tap filter for F32
83 ** ------------------------------------------------------------------- */
84 
85 static ne10_float32_t testCoeffs5_f32[5] =
86 {
87  1.749140, 0.132598, 0.325228, -0.793809, 0.314924
88 };
89 
90 /* ----------------------------------------------------------------------
91 ** Coefficients for 32-tap filter for F32
92 ** ------------------------------------------------------------------- */
93 // static ne10_float32_t testCoeffs32_f32[32] =
94 // {
95 // 1.749140, 0.132598, 0.325228, -0.793809, 0.314924, -0.527270, 0.932267, 1.164664,
96 // -2.045669, -0.644373, 1.741066, 0.486768, 1.048829, 1.488575, 1.270501, -1.856124,
97 // 2.134321, 1.435847, -0.917302, -1.106077, 0.810571, 0.698543, -0.401583, 1.268751,
98 // -0.783608, 0.213266, 0.787898, 0.896682, -0.186917, 1.013182, 0.248435, 0.059608
99 // };
100 
101 /* ----------------------------------------------------------------------
102 ** Delay offsets for 5-tap Sparse filter for F32
103 ** ------------------------------------------------------------------- */
104 static ne10_int32_t tapDelay5_f32[5] =
105 {
106  95, 23, 61, 49, 89
107 };
108 
109 /* ----------------------------------------------------------------------
110 ** Delay offsets for 32-tap Sparse filter for F32
111 ** ------------------------------------------------------------------- */
112 // static ne10_int32_t tapDelay32_f32[32] =
113 // {
114 // 95, 23, 61, 49, 89, 76, 46, 2,
115 // 82, 44, 62, 79, 92, 74, 18, 41,
116 // 94, 92, 41, 89, 6, 35, 81, 1,
117 // 14, 20, 20, 60, 27, 20, 2, 75
118 // };
119 
120 /* ----------------------------------------------------------------------
121 ** Test input data for F32
122 ** Generated by the MATLAB rand() function
123 ** ------------------------------------------------------------------- */
124 static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES] =
125 {
126  -0.432565, -1.665584, 0.125332, 0.287676, -1.146471, 1.190915, 1.189164, -0.037633,
127  0.327292, 0.174639, -0.186709, 0.725791, -0.588317, 2.183186, -0.136396, 0.113931,
128  1.066768, 0.059281, -0.095648, -0.832349, 0.294411, -1.336182, 0.714325, 1.623562,
129  -0.691776, 0.857997, 1.254001, -1.593730, -1.440964, 0.571148, -0.399886, 0.689997,
130  0.815622, 0.711908, 1.290250, 0.668601, 1.190838, -1.202457, -0.019790, -0.156717,
131  -1.604086, 0.257304, -1.056473, 1.415141, -0.805090, 0.528743, 0.219321, -0.921902,
132  -2.170674, -0.059188, -1.010634, 0.614463, 0.507741, 1.692430, 0.591283, -0.643595,
133  0.380337, -1.009116, -0.019511, -0.048221, 0.000043, -0.317859, 1.095004, -1.873990,
134  0.428183, 0.895638, 0.730957, 0.577857, 0.040314, 0.677089, 0.568900, -0.255645,
135  -0.377469, -0.295887, -1.475135, -0.234004, 0.118445, 0.314809, 1.443508, -0.350975,
136  0.623234, 0.799049, 0.940890, -0.992092, 0.212035, 0.237882, -1.007763, -0.742045,
137  1.082295, -0.131500, 0.389880, 0.087987, -0.635465, -0.559573, 0.443653, -0.949904,
138  0.781182, 0.568961, -0.821714, -0.265607, -1.187777, -2.202321, 0.986337, -0.518635,
139  0.327368, 0.234057, 0.021466, -1.003944, -0.947146, -0.374429, -1.185886, -1.055903,
140  1.472480, 0.055744, -1.217317, -0.041227, -1.128344, -1.349278, -0.261102, 0.953465,
141  0.128644, 0.656468, -1.167819, -0.460605, -0.262440, -1.213152, -1.319437, 0.931218,
142  0.011245, -0.645146, 0.805729, 0.231626, -0.989760, 1.339586, 0.289502, 1.478917,
143  1.138028, -0.684139, -1.291936, -0.072926, -0.330599, -0.843628, 0.497770, 1.488490,
144  -0.546476, -0.846758, -0.246337, 0.663024, -0.854197, -1.201315, -0.119869, -0.065294,
145  0.485296, -0.595491, -0.149668, -0.434752, -0.079330, 1.535152, -0.606483, -1.347363,
146  0.469383, -0.903567, 0.035880, -0.627531, 0.535398, 0.552884, -0.203690, -2.054325,
147  0.132561, 1.592941, 1.018412, -1.580402, -0.078662, -0.681657, -1.024553, -1.234353,
148  0.288807, -0.429303, 0.055801, -0.367874, -0.464973, 0.370961, 0.728283, 2.112160,
149  -1.357298, -1.022610, 1.037834, -0.389800, -1.381266, 0.315543, 1.553243, 0.707894,
150  1.957385, 0.504542, 1.864529, -0.339812, -1.139779, -0.211123, 1.190245, -1.116209,
151  0.635274, -0.601412, 0.551185, -1.099840, 0.085991, -2.004563, -0.493088, 0.462048,
152  -0.321005, 1.236556, -0.631280, -2.325211, -1.231637, 1.055648, -0.113224, 0.379224,
153  0.944200, -2.120427, -0.644679, -0.704302, -1.018137, -0.182082, 1.521013, -0.038439,
154  1.227448, -0.696205, 0.007524, -0.782893, 0.586939, -0.251207, 0.480136, 0.668155,
155  -0.078321, 0.889173, 2.309287, 0.524639, -0.011787, 0.913141, 0.055941, -1.107070,
156  0.485498, -0.005005, -0.276218, 1.276452, 1.863401, -0.522559, 0.103424, -0.807649,
157  0.680439, -2.364590, 0.990115, 0.218899, 0.261662, 1.213444, -0.274667, -0.133134,
158  -1.270500, -1.663606, -0.703554, 0.280880, -0.541209, -1.333531, 1.072686, -0.712085,
159  -0.011286, -0.000817, -0.249436, 0.396575, -0.264013, -1.664011, -1.028975, 0.243095,
160  -1.256590, -0.347183, -0.941372, -1.174560, -1.021142, -0.401667, 0.173666, -0.116118,
161  1.064119, -0.245386, -1.517539, 0.009734, 0.071373, 0.316536, 0.499826, 1.278084,
162  -0.547816, 0.260808, -0.013177, -0.580264, 2.136308, -0.257617, -1.409528, 1.770101,
163  0.325546, -1.119040, 0.620350, 1.269782, -0.896043, 0.135175, -0.139040, -1.163395,
164  1.183720, -0.015430, 0.536219, -0.716429, -0.655559, 0.314363, 0.106814, 1.848216,
165  -0.275106, 2.212554, 1.508526, -1.945079, -1.680543, -0.573534, -0.185817, 0.008934
166 };
167 
168 /* ----------------------------------------------------------------------
169 ** Defines each of the tests performed
170 ** ------------------------------------------------------------------- */
171 typedef struct
172 {
173  ne10_uint32_t blockSize;
174  ne10_uint32_t numTaps;
175  ne10_uint32_t numFrames;
176  ne10_uint32_t maxDelay;
177  ne10_int32_t *tapDelay;
178  ne10_float32_t *coeffsF32;
179  ne10_float32_t *inputF32;
180 } test_config;
181 
182 /* All Test configurations, 100% Code Coverage */
183 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
184 static test_config CONFIG[] =
185 {
186  {0, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
187  {2, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
188  //{2, 0, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
189  {64, 5, 5, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
190  {5, 5, 64, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
191  //{64, 32, 5, 100, &tapDelay32_f32[0], &testCoeffs32_f32[0], &testInput_f32[0]}
192 };
193 #define NUM_TESTS (sizeof(CONFIG) / sizeof(CONFIG[0]) )
194 #endif
195 #ifdef PERFORMANCE_TEST
196 static test_config CONFIG_PERF[] =
197 {
198  {2, 5, 160, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
199  {64, 5, 5, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
200  {5, 5, 64, 100, &tapDelay5_f32[0], &testCoeffs5_f32[0], &testInput_f32[0]},
201 };
202 #define NUM_PERF_TESTS (sizeof(CONFIG_PERF) / sizeof(CONFIG_PERF[0]) )
203 #endif
204 
205 
206 void test_fir_sparse_case0()
207 {
209 
210  ne10_uint16_t loop = 0;
211  ne10_uint16_t block = 0;
212  ne10_uint16_t i = 0;
213 
214  test_config *config;
215  ne10_result_t status_c = NE10_OK;
216  ne10_result_t status_neon = NE10_OK;
217 
218  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
219 
220  /* init input memory */
221  NE10_SRC_ALLOC (in_c, guarded_in_c, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
222  NE10_SRC_ALLOC (in_neon, guarded_in_neon, TEST_LENGTH_SAMPLES); // 16 extra bytes at the begining and 16 extra bytes at the end
223 
224  /* init dst memory */
225  NE10_DST_ALLOC (out_c, guarded_out_c, MAX_DELAY + TEST_LENGTH_SAMPLES);
226  NE10_DST_ALLOC (out_neon, guarded_out_neon, MAX_DELAY + TEST_LENGTH_SAMPLES);
227 
228  /* init state memory */
229  NE10_DST_ALLOC (fir_state_c, guarded_fir_state_c, MAX_DELAY + MAX_BLOCKSIZE);
230  NE10_DST_ALLOC (fir_state_neon, guarded_fir_state_neon, MAX_DELAY + MAX_BLOCKSIZE);
231 
232 #ifdef ENABLE_NE10_FIR_SPARSE_FLOAT_NEON
233 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
234  ne10_uint16_t pos = 0;
235  for (loop = 0; loop < NUM_TESTS; loop++)
236  {
237  config = &CONFIG[loop];
238 
239  /* Initialize the CFFT/CIFFT module */
240  status_c = ne10_fir_sparse_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c, config->tapDelay, config->maxDelay, config->blockSize);
241  status_neon = ne10_fir_sparse_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->tapDelay, config->maxDelay, config->blockSize);
242 
243  if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR)))
244  {
245  fprintf (stdout, "initialization error\n");
246  }
247 
248  /* copy input to input buffer */
249  for (i = 0; i < TEST_LENGTH_SAMPLES; i++)
250  {
251  in_c[i] = testInput_f32[i];
252  in_neon[i] = testInput_f32[i];
253  scratch_c[i] = 0;
254  scratch_neon[i] = 0;
255  }
256 
257  GUARD_ARRAY (out_c, TEST_LENGTH_SAMPLES);
258  GUARD_ARRAY (out_neon, TEST_LENGTH_SAMPLES);
259 
260  for (block = 0; block < config->numFrames; block++)
261  {
262  ne10_fir_sparse_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), scratch_c, config->blockSize);
263  }
264  for (block = 0; block < config->numFrames; block++)
265  {
266  ne10_fir_sparse_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), scratch_neon, config->blockSize);
267  }
268 
269  assert_true (CHECK_ARRAY_GUARD (out_c, TEST_LENGTH_SAMPLES));
270  assert_true (CHECK_ARRAY_GUARD (out_neon, TEST_LENGTH_SAMPLES));
271 
272  //conformance test 1: compare snr
273  snr = CAL_SNR_FLOAT32 (out_c, out_neon, TEST_LENGTH_SAMPLES);
274  assert_false ( (snr < SNR_THRESHOLD));
275 
276  //conformance test 2: compare output of C and neon
277 #if defined (DEBUG_TRACE)
278  printf ("--------------------config %d\n", loop);
279  printf ("snr %f\n", snr);
280 #endif
281  for (pos = 0; pos < TEST_LENGTH_SAMPLES; pos++)
282  {
283 #if defined (DEBUG_TRACE)
284  printf ("pos %d \n", pos);
285  printf ("c %f (0x%04X) neon %f (0x%04X)\n", out_c[pos], * (ne10_uint32_t*) &out_c[pos], out_neon[pos], * (ne10_uint32_t*) &out_neon[pos]);
286 #endif
287  assert_float_vec_equal (&out_c[pos], &out_neon[pos], ERROR_MARGIN_SMALL, 1);
288  }
289 
290  }
291 #endif
292 #endif // ENABLE_NE10_FIR_SPARSE_FLOAT_NEON
293 
294 #ifdef PERFORMANCE_TEST
295  ne10_uint16_t k;
296  fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FIR Length&Taps", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio");
297  for (loop = 0; loop < NUM_PERF_TESTS; loop++)
298  {
299  config = &CONFIG_PERF[loop];
300 
301  /* Initialize the CFFT/CIFFT module */
302  status_c = ne10_fir_sparse_init_float (&SC, config->numTaps, config->coeffsF32, fir_state_c, config->tapDelay, config->maxDelay, config->blockSize);
303  status_neon = ne10_fir_sparse_init_float (&SN, config->numTaps, config->coeffsF32, fir_state_neon, config->tapDelay, config->maxDelay, config->blockSize);
304 
305  if ( ( (status_c == NE10_ERR) || (status_neon == NE10_ERR)))
306  {
307  fprintf (stdout, "initialization error\n");
308  }
309 
310  /* copy input to input buffer */
311  for (i = 0; i < TEST_LENGTH_SAMPLES; i++)
312  {
313  in_c[i] = testInput_f32[i];
314  in_neon[i] = testInput_f32[i];
315  }
316 
317  GET_TIME
318  (
319  time_c,
320  {
321  for (k = 0; k < TEST_COUNT; k++)
322  {
323  for (block = 0; block < config->numFrames; block++)
324  {
325  ne10_fir_sparse_float_c (&SC, in_c + (block * config->blockSize), out_c + (block * config->blockSize), scratch_c, config->blockSize);
326  }
327  }
328  }
329  );
330 
331 #ifdef ENABLE_NE10_FIR_SPARSE_FLOAT_NEON
332  GET_TIME
333  (
334  time_neon,
335  {
336  for (k = 0; k < TEST_COUNT; k++)
337  {
338  for (block = 0; block < config->numFrames; block++)
339  {
340  ne10_fir_sparse_float_neon (&SN, in_neon + (block * config->blockSize), out_neon + (block * config->blockSize), scratch_neon, config->blockSize);
341  }
342  }
343  }
344  );
345 #endif // ENABLE_NE10_FIR_SPARSE_FLOAT_NEON
346 
347  time_speedup = (ne10_float32_t) time_c / time_neon;
348  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
349  ne10_log (__FUNCTION__, "%20d,%4d%20lld%20lld%19.2f%%%18.2f:1\n", config->numTaps, time_c, time_neon, time_savings, time_speedup);
350  }
351 #endif
352 
353  free (guarded_in_c);
354  free (guarded_in_neon);
355  free (guarded_out_c);
356  free (guarded_out_neon);
357  free (guarded_fir_state_c);
358  free (guarded_fir_state_neon);
359  fprintf (stdout, "----------%30s end\n", __FUNCTION__);
360 }
361 
362 void test_fir_sparse()
363 {
364  test_fir_sparse_case0();
365 }
366 
367 static void my_test_setup (void)
368 {
369  ne10_log_buffer_ptr = ne10_log_buffer;
370 }
371 
372 void test_fixture_fir_sparse (void)
373 {
374  test_fixture_start(); // starts a fixture
375 
376  fixture_setup (my_test_setup);
377 
378  run_test (test_fir_sparse); // run tests
379 
380  test_fixture_end(); // ends a fixture
381 }
Instance structure for the floating-point FIR Sparse filter.
Definition: NE10_types.h:403
ne10_result_t ne10_fir_sparse_init_float(ne10_fir_sparse_instance_f32_t *S, ne10_uint16_t numTaps, ne10_float32_t *pCoeffs, ne10_float32_t *pState, ne10_int32_t *pTapDelay, ne10_uint16_t maxDelay, ne10_uint32_t blockSize)
Initialization function for the floating-point sparse FIR filter.
int32_t ne10_int32_t
Definition: NE10_types.h:76
void my_test_setup(void)
float ne10_float32_t
Definition: NE10_types.h:80
int64_t ne10_int64_t
Definition: NE10_types.h:78
uint16_t ne10_uint16_t
Definition: NE10_types.h:75
uint32_t ne10_uint32_t
Definition: NE10_types.h:77
#define MAX_DELAY
void ne10_fir_sparse_float_neon(ne10_fir_sparse_instance_f32_t *S, ne10_float32_t *pSrc, ne10_float32_t *pDst, ne10_float32_t *pScratch, ne10_uint32_t blockSize) asm("ne10_fir_sparse_float_neon")
Specific implementation of ne10_fir_sparse_float using NEON SIMD capabilities.
#define TEST_LENGTH_SAMPLES
ne10_uint16_t numTaps
Length of the filter.
Definition: NE10_types.h:405
#define NE10_ERR
Definition: NE10_types.h:66
#define NE10_OK
Definition: NE10_types.h:65
#define TEST_COUNT
#define MAX_BLOCKSIZE
void ne10_fir_sparse_float_c(ne10_fir_sparse_instance_f32_t *S, ne10_float32_t *pSrc, ne10_float32_t *pDst, ne10_float32_t *pScratchIn, ne10_uint32_t blockSize)
Specific implementation of ne10_fir_sparse_float using plain C.
Definition: NE10_fir.c:1386
int ne10_result_t
Definition: NE10_types.h:82