Project Ne10
An open, optimized software library for the ARM architecture.
test_suite_fft_int16.c
Go to the documentation of this file.
1 /*
2  * Copyright 2013-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : test_suite_fft_int16.c
30  */
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <math.h>
35 #include <string.h>
36 
37 #include "NE10_dsp.h"
38 #include "seatest.h"
39 #include "unit_test_common.h"
40 
41 
42 /* ----------------------------------------------------------------------
43 ** Global defines
44 ** ------------------------------------------------------------------- */
45 
46 /* Max FFT Length and double buffer for real and imag */
47 #define TEST_LENGTH_SAMPLES (4096)
48 #define MIN_LENGTH_SAMPLES_CPX (2)
49 #define MIN_LENGTH_SAMPLES_REAL (MIN_LENGTH_SAMPLES_CPX)
50 
51 #define SNR_THRESHOLD_INT16 15.0f
52 
53 #define TEST_COUNT 250000
54 
55 /* ----------------------------------------------------------------------
56 ** Defines each of the tests performed
57 ** ------------------------------------------------------------------- */
58 
59 //input and output
60 static ne10_int32_t testInput_i16_unscaled[TEST_LENGTH_SAMPLES * 2];
61 static ne10_int32_t testInput_i16_scaled[TEST_LENGTH_SAMPLES * 2];
62 static ne10_int16_t * guarded_in_c = NULL;
63 static ne10_int16_t * guarded_in_neon = NULL;
64 static ne10_int16_t * in_c = NULL;
65 static ne10_int16_t * in_neon = NULL;
66 
67 static ne10_int16_t * guarded_out_c = NULL;
68 static ne10_int16_t * guarded_out_neon = NULL;
69 static ne10_int16_t * out_c = NULL;
70 static ne10_int16_t * out_neon = NULL;
71 
72 static ne10_float32_t snr = 0.0f;
73 
74 static ne10_int64_t time_c = 0;
75 static ne10_int64_t time_neon = 0;
76 static ne10_float32_t time_speedup = 0.0f;
77 static ne10_float32_t time_savings = 0.0f;
78 
79 void test_fft_c2c_1d_int16_conformance()
80 {
81 
82  ne10_int32_t i = 0;
83  ne10_int32_t fftSize = 0;
85  ne10_float32_t * out_c_tmp = NULL;
86  ne10_float32_t * out_neon_tmp = NULL;
87 
88  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
89 
90  /* init input memory */
91  guarded_in_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
92  guarded_in_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
93  in_c = guarded_in_c + ARRAY_GUARD_LEN;
94  in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
95 
96  /* init dst memory */
97  guarded_out_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
98  guarded_out_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
99  out_c = guarded_out_c + ARRAY_GUARD_LEN;
100  out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
101 
102  out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
103  out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
104 
105  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
106  {
107  testInput_i16_unscaled[i] = (ne10_int32_t) (drand48() * 1024) - 512;
108  testInput_i16_scaled[i] = (ne10_int16_t) (drand48() * NE10_F2I16_MAX) - NE10_F2I16_MAX / 2;
109  }
110  for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
111  {
112  fprintf (stdout, "FFT size %d\n", fftSize);
113  cfg = ne10_fft_alloc_c2c_int16 (fftSize);
114  if (cfg == NULL)
115  {
116  fprintf (stdout, "======ERROR, FFT alloc fails\n");
117  return;
118  }
119 
120  /* unscaled FFT test */
121  memcpy (in_c, testInput_i16_unscaled, 2 * fftSize * sizeof (ne10_int16_t));
122  memcpy (in_neon, testInput_i16_unscaled, 2 * fftSize * sizeof (ne10_int16_t));
123 
124  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t));
125  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t));
126 
127  ne10_fft_c2c_1d_int16_c ( (ne10_fft_cpx_int16_t*) out_c, (ne10_fft_cpx_int16_t*) in_c, cfg, 0, 0);
128  ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 0, 0);
129 
130  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t)));
131  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t)));
132 
133  //conformance test
134  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
135  {
136  out_c_tmp[i] = (ne10_float32_t) out_c[i];
137  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
138  }
139  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
140  assert_false ( (snr < SNR_THRESHOLD_INT16));
141 
142  /* IFFT test */
143  memcpy (in_c, testInput_i16_unscaled, 2 * fftSize * sizeof (ne10_int16_t));
144  memcpy (in_neon, testInput_i16_unscaled, 2 * fftSize * sizeof (ne10_int16_t));
145 
146  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t));
147  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t));
148 
149  ne10_fft_c2c_1d_int16_c ( (ne10_fft_cpx_int16_t*) out_c, (ne10_fft_cpx_int16_t*) in_c, cfg, 1, 0);
150  ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 1, 0);
151 
152  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t)));
153  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t)));
154 
155  //conformance test
156  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
157  {
158  out_c_tmp[i] = (ne10_float32_t) out_c[i];
159  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
160  }
161  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
162  assert_false ( (snr < SNR_THRESHOLD_INT16));
163 
164  /* scaled FFT test */
165  memcpy (in_c, testInput_i16_scaled, 2 * fftSize * sizeof (ne10_int16_t));
166  memcpy (in_neon, testInput_i16_scaled, 2 * fftSize * sizeof (ne10_int16_t));
167 
168  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t));
169  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t));
170 
171  ne10_fft_c2c_1d_int16_c ( (ne10_fft_cpx_int16_t*) out_c, (ne10_fft_cpx_int16_t*) in_c, cfg, 0, 1);
172  ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 0, 1);
173 
174  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t)));
175  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t)));
176 
177  //conformance test
178  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
179  {
180  out_c_tmp[i] = (ne10_float32_t) out_c[i];
181  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
182  }
183  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
184  assert_false ( (snr < SNR_THRESHOLD_INT16));
185 
186  /* IFFT test */
187  memcpy (in_c, testInput_i16_scaled, 2 * fftSize * sizeof (ne10_int16_t));
188  memcpy (in_neon, testInput_i16_scaled, 2 * fftSize * sizeof (ne10_int16_t));
189 
190  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t));
191  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t));
192 
193  ne10_fft_c2c_1d_int16_c ( (ne10_fft_cpx_int16_t*) out_c, (ne10_fft_cpx_int16_t*) in_c, cfg, 1, 1);
194  ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 1, 1);
195 
196  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int16_t)));
197  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int16_t)));
198 
199  //conformance test
200  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
201  {
202  out_c_tmp[i] = (ne10_float32_t) out_c[i];
203  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
204  }
205  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
206  assert_false ( (snr < SNR_THRESHOLD_INT16));
207 
208  NE10_FREE (cfg);
209  }
210 
211  NE10_FREE (guarded_in_c);
212  NE10_FREE (guarded_in_neon);
213  NE10_FREE (guarded_out_c);
214  NE10_FREE (guarded_out_neon);
215  NE10_FREE (out_c_tmp);
216  NE10_FREE (out_neon_tmp);
217 }
218 
219 void test_fft_c2c_1d_int16_performance()
220 {
221 
222  ne10_int32_t i = 0;
223  ne10_int32_t fftSize = 0;
225  ne10_int32_t test_loop = 0;
226 
227  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
228  fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio");
229 
230  /* init input memory */
231  guarded_in_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
232  guarded_in_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
233  in_c = guarded_in_c + ARRAY_GUARD_LEN;
234  in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
235 
236  /* init dst memory */
237  guarded_out_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
238  guarded_out_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
239  out_c = guarded_out_c + ARRAY_GUARD_LEN;
240  out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
241 
242  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
243  {
244  testInput_i16_unscaled[i] = (ne10_int16_t) (drand48() * 1024) - 512;
245  testInput_i16_scaled[i] = (ne10_int16_t) (drand48() * NE10_F2I16_MAX) - NE10_F2I16_MAX / 2;
246  }
247  for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
248  {
249  fprintf (stdout, "FFT size %d\n", fftSize);
250  cfg = ne10_fft_alloc_c2c_int16 (fftSize);
251  if (cfg == NULL)
252  {
253  fprintf (stdout, "======ERROR, FFT alloc fails\n");
254  return;
255  }
256  test_loop = TEST_COUNT / fftSize;
257 
258  /* unscaled FFT test */
259  memcpy (in_c, testInput_i16_unscaled, 2 * fftSize * sizeof (ne10_int16_t));
260  memcpy (in_neon, testInput_i16_unscaled, 2 * fftSize * sizeof (ne10_int16_t));
261 
262  GET_TIME
263  (
264  time_c,
265  {
266  for (i = 0; i < test_loop; i++)
267  ne10_fft_c2c_1d_int16_c ( (ne10_fft_cpx_int16_t*) out_c, (ne10_fft_cpx_int16_t*) in_c, cfg, 0, 0);
268  }
269  );
270  GET_TIME
271  (
272  time_neon,
273  {
274  for (i = 0; i < test_loop; i++)
275  ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 0, 0);
276  }
277  );
278 
279  time_speedup = (ne10_float32_t) time_c / time_neon;
280  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
281  ne10_log (__FUNCTION__, "Int16 unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
282 
283  /* IFFT test */
284  memcpy (in_c, out_c, 2 * fftSize * sizeof (ne10_int16_t));
285  memcpy (in_neon, out_c, 2 * fftSize * sizeof (ne10_int16_t));
286 
287  GET_TIME
288  (
289  time_c,
290  {
291  for (i = 0; i < test_loop; i++)
292  ne10_fft_c2c_1d_int16_c ( (ne10_fft_cpx_int16_t*) out_c, (ne10_fft_cpx_int16_t*) in_c, cfg, 1, 0);
293  }
294  );
295  GET_TIME
296  (
297  time_neon,
298  {
299  for (i = 0; i < test_loop; i++)
300  ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 1, 0);
301  }
302  );
303 
304  time_speedup = (ne10_float32_t) time_c / time_neon;
305  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
306  ne10_log (__FUNCTION__, "Int16 unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
307  /* scaled FFT test */
308  memcpy (in_c, testInput_i16_scaled, 2 * fftSize * sizeof (ne10_int16_t));
309  memcpy (in_neon, testInput_i16_scaled, 2 * fftSize * sizeof (ne10_int16_t));
310 
311  GET_TIME
312  (
313  time_c,
314  {
315  for (i = 0; i < test_loop; i++)
316  ne10_fft_c2c_1d_int16_c ( (ne10_fft_cpx_int16_t*) out_c, (ne10_fft_cpx_int16_t*) in_c, cfg, 0, 1);
317  }
318  );
319  GET_TIME
320  (
321  time_neon,
322  {
323  for (i = 0; i < test_loop; i++)
324  ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 0, 1);
325  }
326  );
327 
328  time_speedup = (ne10_float32_t) time_c / time_neon;
329  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
330  ne10_log (__FUNCTION__, "Int16 scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
331 
332  /* IFFT test */
333  memcpy (in_c, out_c, 2 * fftSize * sizeof (ne10_int16_t));
334  memcpy (in_neon, out_c, 2 * fftSize * sizeof (ne10_int16_t));
335 
336  GET_TIME
337  (
338  time_c,
339  {
340  for (i = 0; i < test_loop; i++)
341  ne10_fft_c2c_1d_int16_c ( (ne10_fft_cpx_int16_t*) out_c, (ne10_fft_cpx_int16_t*) in_c, cfg, 1, 1);
342  }
343  );
344  GET_TIME
345  (
346  time_neon,
347  {
348  for (i = 0; i < test_loop; i++)
349  ne10_fft_c2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 1, 1);
350  }
351  );
352 
353  time_speedup = (ne10_float32_t) time_c / time_neon;
354  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
355  ne10_log (__FUNCTION__, "Int16 scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
356 
357  NE10_FREE (cfg);
358  }
359 
360  NE10_FREE (guarded_in_c);
361  NE10_FREE (guarded_in_neon);
362  NE10_FREE (guarded_out_c);
363  NE10_FREE (guarded_out_neon);
364 }
365 
366 void test_fft_r2c_1d_int16_conformance()
367 {
368 
369  ne10_int32_t i = 0;
370  ne10_int32_t fftSize = 0;
372  ne10_float32_t * out_c_tmp = NULL;
373  ne10_float32_t * out_neon_tmp = NULL;
374 
375  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
376 
377  /* init input memory */
378  guarded_in_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
379  guarded_in_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
380  in_c = guarded_in_c + ARRAY_GUARD_LEN;
381  in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
382 
383  /* init dst memory */
384  guarded_out_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
385  guarded_out_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
386  out_c = guarded_out_c + ARRAY_GUARD_LEN;
387  out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
388 
389  out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
390  out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
391 
392  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
393  {
394  testInput_i16_unscaled[i] = (ne10_int16_t) (drand48() * 1024) - 512;
395  testInput_i16_scaled[i] = (ne10_int16_t) (drand48() * NE10_F2I16_MAX) - NE10_F2I16_MAX / 2;
396  }
397  for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
398  {
399  fprintf (stdout, "RFFT size %d\n", fftSize);
400  cfg = ne10_fft_alloc_r2c_int16 (fftSize);
401  if (cfg == NULL)
402  {
403  fprintf (stdout, "======ERROR, FFT alloc fails\n");
404  return;
405  }
406 
407  /* unscaled FFT test */
408  memcpy (in_c, testInput_i16_unscaled, fftSize * sizeof (ne10_int16_t));
409  memcpy (in_neon, testInput_i16_unscaled, fftSize * sizeof (ne10_int16_t));
410 
411  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int16_t));
412  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int16_t));
413 
414  ne10_fft_r2c_1d_int16_c ( (ne10_fft_cpx_int16_t*) out_c, in_c, cfg, 0);
415  ne10_fft_r2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, in_neon, cfg, 0);
416 
417  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int16_t)));
418  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int16_t)));
419 
420  //conformance test
421  for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
422  {
423  out_c_tmp[i] = (ne10_float32_t) out_c[i];
424  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
425  }
426  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
427  assert_false ( (snr < SNR_THRESHOLD_INT16));
428 
429  /* IFFT test */
430  for (i = 1; i < (fftSize / 2); i++)
431  {
432  in_c[2 * i] = testInput_i16_unscaled[2 * i];
433  in_c[2 * i + 1] = testInput_i16_unscaled[2 * i + 1];
434  in_c[2 * (fftSize - i)] = in_c[2 * i];
435  in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
436  }
437  in_c[0] = testInput_i16_unscaled[0];
438  in_c[1] = 0;
439  in_c[fftSize] = testInput_i16_unscaled[1];
440  in_c[fftSize + 1] = 0;
441  memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int16_t));
442 
443  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int16_t));
444  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int16_t));
445 
446  ne10_fft_c2r_1d_int16_c (out_c, (ne10_fft_cpx_int16_t*) in_c, cfg, 0);
447  ne10_fft_c2r_1d_int16_neon (out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 0);
448 
449  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int16_t)));
450  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int16_t)));
451 
452  //conformance test
453  for (i = 0; i < fftSize; i++)
454  {
455  out_c_tmp[i] = (ne10_float32_t) out_c[i];
456  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
457  }
458  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
459  assert_false ( (snr < SNR_THRESHOLD_INT16));
460 
461  /* scaled FFT test */
462  memcpy (in_c, testInput_i16_scaled, fftSize * sizeof (ne10_int16_t));
463  memcpy (in_neon, testInput_i16_scaled, fftSize * sizeof (ne10_int16_t));
464 
465  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int16_t));
466  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int16_t));
467 
468  ne10_fft_r2c_1d_int16_c ( (ne10_fft_cpx_int16_t*) out_c, in_c, cfg, 1);
469  ne10_fft_r2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, in_neon, cfg, 1);
470 
471  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int16_t)));
472  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int16_t)));
473 
474  //conformance test
475  for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
476  {
477  out_c_tmp[i] = (ne10_float32_t) out_c[i];
478  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
479  }
480  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
481  assert_false ( (snr < SNR_THRESHOLD_INT16));
482 
483  /* IFFT test */
484  for (i = 1; i < (fftSize / 2); i++)
485  {
486  in_c[2 * i] = testInput_i16_scaled[2 * i];
487  in_c[2 * i + 1] = testInput_i16_scaled[2 * i + 1];
488  in_c[2 * (fftSize - i)] = in_c[2 * i];
489  in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
490  }
491  in_c[0] = testInput_i16_scaled[0];
492  in_c[1] = 0;
493  in_c[fftSize] = testInput_i16_scaled[1];
494  in_c[fftSize + 1] = 0;
495  memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int16_t));
496 
497  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int16_t));
498  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int16_t));
499 
500  ne10_fft_c2r_1d_int16_c (out_c, (ne10_fft_cpx_int16_t*) in_c, cfg, 1);
501  ne10_fft_c2r_1d_int16_neon (out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 1);
502 
503  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int16_t)));
504  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int16_t)));
505 
506  //conformance test
507  for (i = 0; i < fftSize; i++)
508  {
509  out_c_tmp[i] = (ne10_float32_t) out_c[i];
510  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
511  }
512  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
513  assert_false ( (snr < SNR_THRESHOLD_INT16));
514 
515  NE10_FREE (cfg);
516  }
517 
518  NE10_FREE (guarded_in_c);
519  NE10_FREE (guarded_in_neon);
520  NE10_FREE (guarded_out_c);
521  NE10_FREE (guarded_out_neon);
522  NE10_FREE (out_c_tmp);
523  NE10_FREE (out_neon_tmp);
524 }
525 
526 void test_fft_r2c_1d_int16_performance()
527 {
528 
529  ne10_int32_t i = 0;
530  ne10_int32_t fftSize = 0;
532  ne10_int32_t test_loop = 0;
533 
534  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
535  fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio");
536 
537  /* init input memory */
538  guarded_in_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
539  guarded_in_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
540  in_c = guarded_in_c + ARRAY_GUARD_LEN;
541  in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
542 
543  /* init dst memory */
544  guarded_out_c = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
545  guarded_out_neon = (ne10_int16_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int16_t));
546  out_c = guarded_out_c + ARRAY_GUARD_LEN;
547  out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
548 
549  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
550  {
551  testInput_i16_unscaled[i] = (ne10_int16_t) (drand48() * 1024) - 512;
552  testInput_i16_scaled[i] = (ne10_int16_t) (drand48() * NE10_F2I16_MAX) - NE10_F2I16_MAX / 2;
553  }
554  for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
555  {
556  fprintf (stdout, "FFT size %d\n", fftSize);
557  cfg = ne10_fft_alloc_r2c_int16 (fftSize);
558  if (cfg == NULL)
559  {
560  fprintf (stdout, "======ERROR, FFT alloc fails\n");
561  return;
562  }
563  test_loop = TEST_COUNT / fftSize;
564 
565  /* unscaled FFT test */
566  memcpy (in_c, testInput_i16_unscaled , fftSize * sizeof (ne10_int16_t));
567  memcpy (in_neon, testInput_i16_unscaled , fftSize * sizeof (ne10_int16_t));
568 
569  GET_TIME
570  (
571  time_c,
572  {
573  for (i = 0; i < test_loop; i++)
574  ne10_fft_r2c_1d_int16_c ( (ne10_fft_cpx_int16_t*) out_c, in_c, cfg, 0);
575  }
576  );
577  GET_TIME
578  (
579  time_neon,
580  {
581  for (i = 0; i < test_loop; i++)
582  ne10_fft_r2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, in_neon, cfg, 0);
583  }
584  );
585 
586  time_speedup = (ne10_float32_t) time_c / time_neon;
587  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
588  ne10_log (__FUNCTION__, "Int16 unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
589 
590  /* IFFT test */
591  for (i = 1; i < (fftSize / 2); i++)
592  {
593  in_c[2 * i] = testInput_i16_unscaled[2 * i];
594  in_c[2 * i + 1] = testInput_i16_unscaled[2 * i + 1];
595  in_c[2 * (fftSize - i)] = in_c[2 * i];
596  in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
597  }
598  in_c[0] = testInput_i16_unscaled[0];
599  in_c[1] = 0;
600  in_c[fftSize] = testInput_i16_unscaled[1];
601  in_c[fftSize + 1] = 0;
602  memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int16_t));
603 
604  GET_TIME
605  (
606  time_c,
607  {
608  for (i = 0; i < test_loop; i++)
609  ne10_fft_c2r_1d_int16_c (out_c, (ne10_fft_cpx_int16_t*) in_c, cfg, 0);
610  }
611  );
612  GET_TIME
613  (
614  time_neon,
615  {
616  for (i = 0; i < test_loop; i++)
617  ne10_fft_c2r_1d_int16_neon (out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 0);
618  }
619  );
620 
621  time_speedup = (ne10_float32_t) time_c / time_neon;
622  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
623  ne10_log (__FUNCTION__, "Int16 unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
624 
625  /* scaled FFT test */
626  memcpy (in_c, testInput_i16_scaled , fftSize * sizeof (ne10_int16_t));
627  memcpy (in_neon, testInput_i16_scaled , fftSize * sizeof (ne10_int16_t));
628 
629  GET_TIME
630  (
631  time_c,
632  {
633  for (i = 0; i < test_loop; i++)
634  ne10_fft_r2c_1d_int16_c ( (ne10_fft_cpx_int16_t*) out_c, in_c, cfg, 1);
635  }
636  );
637  GET_TIME
638  (
639  time_neon,
640  {
641  for (i = 0; i < test_loop; i++)
642  ne10_fft_r2c_1d_int16_neon ( (ne10_fft_cpx_int16_t*) out_neon, in_neon, cfg, 1);
643  }
644  );
645 
646  time_speedup = (ne10_float32_t) time_c / time_neon;
647  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
648  ne10_log (__FUNCTION__, "Int16 scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
649 
650  /* IFFT test */
651  for (i = 1; i < (fftSize / 2); i++)
652  {
653  in_c[2 * i] = testInput_i16_scaled[2 * i];
654  in_c[2 * i + 1] = testInput_i16_scaled[2 * i + 1];
655  in_c[2 * (fftSize - i)] = in_c[2 * i];
656  in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
657  }
658  in_c[0] = testInput_i16_scaled[0];
659  in_c[1] = 0;
660  in_c[fftSize] = testInput_i16_scaled[1];
661  in_c[fftSize + 1] = 0;
662  memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int16_t));
663 
664  GET_TIME
665  (
666  time_c,
667  {
668  for (i = 0; i < test_loop; i++)
669  ne10_fft_c2r_1d_int16_c (out_c, (ne10_fft_cpx_int16_t*) in_c, cfg, 1);
670  }
671  );
672  GET_TIME
673  (
674  time_neon,
675  {
676  for (i = 0; i < test_loop; i++)
677  ne10_fft_c2r_1d_int16_neon (out_neon, (ne10_fft_cpx_int16_t*) in_neon, cfg, 1);
678  }
679  );
680 
681  time_speedup = (ne10_float32_t) time_c / time_neon;
682  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
683  ne10_log (__FUNCTION__, "Int16 scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
684 
685  NE10_FREE (cfg);
686  }
687 
688  NE10_FREE (guarded_in_c);
689  NE10_FREE (guarded_in_neon);
690  NE10_FREE (guarded_out_c);
691  NE10_FREE (guarded_out_neon);
692 }
693 
694 void test_fft_c2c_1d_int16()
695 {
696 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
697  test_fft_c2c_1d_int16_conformance();
698 #endif
699 
700 #if defined (PERFORMANCE_TEST)
701  test_fft_c2c_1d_int16_performance();
702 #endif
703 }
704 
705 void test_fft_r2c_1d_int16()
706 {
707 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
708  test_fft_r2c_1d_int16_conformance();
709 #endif
710 
711 #if defined (PERFORMANCE_TEST)
712  test_fft_r2c_1d_int16_performance();
713 #endif
714 }
715 
716 static void my_test_setup (void)
717 {
718  ne10_log_buffer_ptr = ne10_log_buffer;
719 }
720 
721 void test_fixture_fft_c2c_1d_int16 (void)
722 {
723  test_fixture_start(); // starts a fixture
724 
725  fixture_setup (my_test_setup);
726 
727  run_test (test_fft_c2c_1d_int16); // run tests
728 
729  test_fixture_end(); // ends a fixture
730 }
731 
732 void test_fixture_fft_r2c_1d_int16 (void)
733 {
734  test_fixture_start(); // starts a fixture
735 
736  fixture_setup (my_test_setup);
737 
738  run_test (test_fft_r2c_1d_int16); // run tests
739 
740  test_fixture_end(); // ends a fixture
741 }
Structure for the 16-bit fixed point FFT function.
Definition: NE10_types.h:294
#define NE10_F2I16_MAX
Definition: NE10_macros.h:71
uint8_t ne10_uint8_t
Definition: NE10_types.h:73
int32_t ne10_int32_t
Definition: NE10_types.h:76
#define MIN_LENGTH_SAMPLES_REAL
void my_test_setup(void)
void ne10_fft_c2r_1d_int16_neon(ne10_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2r_1d_int16 using NEON SIMD capabilities.
void ne10_fft_c2r_1d_int16_c(ne10_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2r_1d_int16 using plain C.
float ne10_float32_t
Definition: NE10_types.h:80
void ne10_fft_r2c_1d_int16_neon(ne10_fft_cpx_int16_t *fout, ne10_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_r2c_1d_int16 using NEON SIMD capabilities.
void ne10_fft_c2c_1d_int16_c(ne10_fft_cpx_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_cfg_int16_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2c_1d_int16 using plain C.
void ne10_fft_r2c_1d_int16_c(ne10_fft_cpx_int16_t *fout, ne10_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_r2c_1d_int16 using plain C.
#define TEST_COUNT
int64_t ne10_int64_t
Definition: NE10_types.h:78
#define NE10_FREE(p)
Definition: NE10_macros.h:54
#define TEST_LENGTH_SAMPLES
#define MIN_LENGTH_SAMPLES_CPX
ne10_fft_r2c_cfg_int16_t ne10_fft_alloc_r2c_int16(ne10_int32_t nfft)
Creates a configuration structure for variants of ne10_fft_r2c_1d_int16 and ne10_fft_c2r_1d_int16.
#define NE10_MALLOC
Definition: NE10_macros.h:53
#define SNR_THRESHOLD_INT16
ne10_fft_cfg_int16_t ne10_fft_alloc_c2c_int16(ne10_int32_t nfft)
Creates a configuration structure for variants of ne10_fft_c2c_1d_int16.
int16_t ne10_int16_t
Definition: NE10_types.h:74
void ne10_fft_c2c_1d_int16_neon(ne10_fft_cpx_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_cfg_int16_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2c_1d_int16 using NEON SIMD capabilities.