Project Ne10
An open, optimized software library for the ARM architecture.
test_suite_fft_int32.c
Go to the documentation of this file.
1 /*
2  * Copyright 2013-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : test_suite_fft_int32.c
30  */
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <math.h>
35 #include <string.h>
36 
37 #include "NE10_dsp.h"
38 #include "seatest.h"
39 #include "unit_test_common.h"
40 
41 
42 /* ----------------------------------------------------------------------
43 ** Global defines
44 ** ------------------------------------------------------------------- */
45 
46 /* Max FFT Length and double buffer for real and imag */
47 #define TEST_LENGTH_SAMPLES (32768)
48 #define MIN_LENGTH_SAMPLES_CPX (2)
49 #define MIN_LENGTH_SAMPLES_REAL (MIN_LENGTH_SAMPLES_CPX)
50 
51 #define SNR_THRESHOLD_INT32 25.0f
52 
53 #define TEST_COUNT 250000
54 
55 #define NE10_FFT_PARA_LEVEL 4
56 
57 /* ----------------------------------------------------------------------
58 ** Defines each of the tests performed
59 ** ------------------------------------------------------------------- */
60 
61 //input and output
62 static ne10_int32_t testInput_i32_unscaled[TEST_LENGTH_SAMPLES * 2];
63 static ne10_int32_t testInput_i32_scaled[TEST_LENGTH_SAMPLES * 2];
64 static ne10_int32_t * guarded_in_c = NULL;
65 static ne10_int32_t * guarded_in_neon = NULL;
66 static ne10_int32_t * in_c = NULL;
67 static ne10_int32_t * in_neon = NULL;
68 
69 static ne10_int32_t * guarded_out_c = NULL;
70 static ne10_int32_t * guarded_out_neon = NULL;
71 static ne10_int32_t * out_c = NULL;
72 static ne10_int32_t * out_neon = NULL;
73 
74 static ne10_float32_t snr = 0.0f;
75 
76 static ne10_int64_t time_c = 0;
77 static ne10_int64_t time_neon = 0;
78 static ne10_float32_t time_speedup = 0.0f;
79 static ne10_float32_t time_savings = 0.0f;
80 
81 void test_fft_c2c_1d_int32_conformance()
82 {
83 
84  ne10_int32_t i = 0;
85  ne10_int32_t baseSize = 0, fftSize = 0;
86  ne10_int32_t factor;
88  ne10_fft_cfg_int32_t cfg_neon;
89  ne10_float32_t * out_c_tmp = NULL;
90  ne10_float32_t * out_neon_tmp = NULL;
91 
92  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
93 
94  /* init input memory */
95  guarded_in_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
96  guarded_in_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
97  in_c = guarded_in_c + ARRAY_GUARD_LEN;
98  in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
99 
100  /* init dst memory */
101  guarded_out_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
102  guarded_out_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
103  out_c = guarded_out_c + ARRAY_GUARD_LEN;
104  out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
105 
106  out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
107  out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
108 
109  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
110  {
111  testInput_i32_unscaled[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
112  testInput_i32_scaled[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
113  }
114 
115  for (baseSize = MIN_LENGTH_SAMPLES_CPX; baseSize <= TEST_LENGTH_SAMPLES; baseSize *= 2)
116  {
117  factor = 2;
118  fftSize = baseSize;
119  while (factor && fftSize <= TEST_LENGTH_SAMPLES)
120  {
121  if (fftSize == 2 || fftSize % NE10_FFT_PARA_LEVEL == 0)
122  {
123  fprintf (stdout, "FFT size %d\n", fftSize);
124  /* FFT init */
125  cfg_c = ne10_fft_alloc_c2c_int32_c (fftSize);
126  if (cfg_c == NULL)
127  {
128  fprintf (stdout, "======ERROR, FFT alloc fails\n");
129  return;
130  }
131 
132  cfg_neon = ne10_fft_alloc_c2c_int32_neon (fftSize);
133  if (cfg_neon == NULL)
134  {
135  NE10_FREE (cfg_c);
136  fprintf (stdout, "======ERROR, FFT alloc fails\n");
137  return;
138  }
139 
140  /* unscaled FFT test */
141  memcpy (in_c, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
142  memcpy (in_neon, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
143 
144  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
145  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
146  ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 0, 0);
147  ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 0, 0);
148  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)));
149  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)));
150 
151  //conformance test
152  for (i = 0; i < fftSize * 2; i++)
153  {
154  out_c_tmp[i] = (ne10_float32_t) out_c[i];
155  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
156  }
157  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
158  assert_false ( (snr < SNR_THRESHOLD_INT32));
159 
160  /* IFFT test */
161  memcpy (in_c, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
162  memcpy (in_neon, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
163 
164  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
165  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
166  ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 1, 0);
167  ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 1, 0);
168  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)));
169  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)));
170 
171  //conformance test
172  for (i = 0; i < fftSize * 2; i++)
173  {
174  out_c_tmp[i] = (ne10_float32_t) out_c[i];
175  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
176  }
177  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
178  assert_false ( (snr < SNR_THRESHOLD_INT32));
179 
180  /* scaled FFT test */
181  memcpy (in_c, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
182  memcpy (in_neon, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
183 
184  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
185  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
186  ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 0, 1);
187  ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 0, 1);
188  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)));
189  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)));
190 
191  //conformance test
192  for (i = 0; i < fftSize * 2; i++)
193  {
194  out_c_tmp[i] = (ne10_float32_t) out_c[i];
195  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
196  }
197  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
198  assert_false ( (snr < SNR_THRESHOLD_INT32));
199 
200  /* IFFT test */
201  memcpy (in_c, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
202  memcpy (in_neon, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
203 
204  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t));
205  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t));
206  ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 1, 1);
207  ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 1, 1);
208  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * 2 * sizeof (ne10_int32_t)));
209  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * 2 * sizeof (ne10_int32_t)));
210 
211  //conformance test
212  for (i = 0; i < fftSize * 2; i++)
213  {
214  out_c_tmp[i] = (ne10_float32_t) out_c[i];
215  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
216  }
217  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
218  assert_false ( (snr < SNR_THRESHOLD_INT32));
219 
220  NE10_FREE (cfg_c);
221  NE10_FREE (cfg_neon);
222  }
223 
224  switch (factor)
225  {
226  case 2:
227  factor = 3;
228  fftSize += (fftSize / 2);
229  break;
230  case 3:
231  factor = 5;
232  fftSize += (fftSize / 3) * 2;
233  break;
234  case 5:
235  factor = 15;
236  fftSize += (fftSize * 2);
237  break;
238  case 15:
239  factor = 0;
240  break;
241  }
242  }
243  }
244 
245  NE10_FREE (guarded_in_c);
246  NE10_FREE (guarded_in_neon);
247  NE10_FREE (guarded_out_c);
248  NE10_FREE (guarded_out_neon);
249  NE10_FREE (out_c_tmp);
250  NE10_FREE (out_neon_tmp);
251 }
252 
253 void test_fft_c2c_1d_int32_performance()
254 {
255 
256  ne10_int32_t i = 0;
257  ne10_int32_t fftSize = 0;
258  ne10_fft_cfg_int32_t cfg_c;
259  ne10_fft_cfg_int32_t cfg_neon;
260  ne10_int32_t test_loop = 0;
261 
262  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
263  fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio");
264 
265  /* init input memory */
266  guarded_in_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
267  guarded_in_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
268  in_c = guarded_in_c + ARRAY_GUARD_LEN;
269  in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
270 
271  /* init dst memory */
272  guarded_out_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
273  guarded_out_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
274  out_c = guarded_out_c + ARRAY_GUARD_LEN;
275  out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
276 
277  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
278  {
279  testInput_i32_unscaled[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
280  testInput_i32_scaled[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
281  }
282  for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
283  {
284  fprintf (stdout, "FFT size %d\n", fftSize);
285 
286  /* FFT test */
287  memcpy (in_c, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
288  memcpy (in_neon, testInput_i32_unscaled, 2 * fftSize * sizeof (ne10_int32_t));
289  cfg_c = ne10_fft_alloc_c2c_int32_c (fftSize);
290  if (cfg_c == NULL)
291  {
292  fprintf (stdout, "======ERROR, FFT alloc fails\n");
293  return;
294  }
295 
296  cfg_neon = ne10_fft_alloc_c2c_int32_neon (fftSize);
297  if (cfg_neon == NULL)
298  {
299  NE10_FREE (cfg_c);
300  fprintf (stdout, "======ERROR, FFT alloc fails\n");
301  return;
302  }
303 
304  test_loop = TEST_COUNT / fftSize;
305 
306  GET_TIME
307  (
308  time_c,
309  {
310  for (i = 0; i < test_loop; i++)
311  ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 0, 0);
312  }
313  );
314  GET_TIME
315  (
316  time_neon,
317  {
318  for (i = 0; i < test_loop; i++)
319  ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_neon, 0, 0);
320  }
321  );
322  time_speedup = (ne10_float32_t) time_c / time_neon;
323  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
324  ne10_log (__FUNCTION__, " unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
325 
326  /* IFFT test */
327  memcpy (in_c, out_c, 2 * fftSize * sizeof (ne10_int32_t));
328  memcpy (in_neon, out_c, 2 * fftSize * sizeof (ne10_int32_t));
329 
330  GET_TIME
331  (
332  time_c,
333  {
334  for (i = 0; i < test_loop; i++)
335  ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 1, 0);
336  }
337  );
338  GET_TIME
339  (
340  time_neon,
341  {
342  for (i = 0; i < test_loop; i++)
343  ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 1, 0);
344  }
345  );
346 
347  time_speedup = (ne10_float32_t) time_c / time_neon;
348  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
349  ne10_log (__FUNCTION__, "unscaled IFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
350 
351  /* FFT test */
352  memcpy (in_c, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
353  memcpy (in_neon, testInput_i32_scaled, 2 * fftSize * sizeof (ne10_int32_t));
354 
355  GET_TIME
356  (
357  time_c,
358  {
359  for (i = 0; i < test_loop; i++)
360  ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 0, 1);
361  }
362  );
363  GET_TIME
364  (
365  time_neon,
366  {
367  for (i = 0; i < test_loop; i++)
368  ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_neon, 0, 1);
369  }
370  );
371  time_speedup = (ne10_float32_t) time_c / time_neon;
372  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
373  ne10_log (__FUNCTION__, " scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
374 
375  /* IFFT test */
376  memcpy (in_c, out_c, 2 * fftSize * sizeof (ne10_int32_t));
377  memcpy (in_neon, out_c, 2 * fftSize * sizeof (ne10_int32_t));
378 
379  GET_TIME
380  (
381  time_c,
382  {
383  for (i = 0; i < test_loop; i++)
384  ne10_fft_c2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, (ne10_fft_cpx_int32_t*) in_c, cfg_c, 1, 1);
385  }
386  );
387  GET_TIME
388  (
389  time_neon,
390  {
391  for (i = 0; i < test_loop; i++)
392  ne10_fft_c2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg_neon, 1, 1);
393  }
394  );
395 
396  time_speedup = (ne10_float32_t) time_c / time_neon;
397  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
398  ne10_log (__FUNCTION__, " scaled IFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
399 
400  NE10_FREE (cfg_c);
401  NE10_FREE (cfg_neon);
402  }
403 
404  NE10_FREE (guarded_in_c);
405  NE10_FREE (guarded_in_neon);
406  NE10_FREE (guarded_out_c);
407  NE10_FREE (guarded_out_neon);
408 }
409 
410 void test_fft_r2c_1d_int32_conformance()
411 {
412 
413  ne10_int32_t i = 0;
414  ne10_int32_t fftSize = 0;
416  ne10_float32_t * out_c_tmp = NULL;
417  ne10_float32_t * out_neon_tmp = NULL;
418 
419  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
420 
421  /* init input memory */
422  guarded_in_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
423  guarded_in_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
424  in_c = guarded_in_c + ARRAY_GUARD_LEN;
425  in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
426 
427  /* init dst memory */
428  guarded_out_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
429  guarded_out_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
430  out_c = guarded_out_c + ARRAY_GUARD_LEN;
431  out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
432 
433  out_c_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
434  out_neon_tmp = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2) * sizeof (ne10_float32_t));
435 
436  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
437  {
438  testInput_i32_unscaled[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
439  testInput_i32_scaled[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
440  }
441  for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
442  {
443  fprintf (stdout, "FFT size %d\n", fftSize);
444  /* FFT init */
445  cfg = ne10_fft_alloc_r2c_int32 (fftSize);
446  if (cfg == NULL)
447  {
448  fprintf (stdout, "======ERROR, FFT alloc fails\n");
449  return;
450  }
451 
452  /* unscaled FFT test */
453  memcpy (in_c, testInput_i32_unscaled, fftSize * sizeof (ne10_int32_t));
454  memcpy (in_neon, testInput_i32_unscaled, fftSize * sizeof (ne10_int32_t));
455 
456  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
457  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
458 
459  ne10_fft_r2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, in_c, cfg, 0);
460  ne10_fft_r2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, in_neon, cfg, 0);
461 
462  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t)));
463  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t)));
464 
465  //conformance test
466  for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
467  {
468  out_c_tmp[i] = (ne10_float32_t) out_c[i];
469  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
470  }
471  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
472  assert_false ( (snr < SNR_THRESHOLD_INT32));
473 
474  /* IFFT test */
475  for (i = 1; i < (fftSize / 2); i++)
476  {
477  in_c[2 * i] = testInput_i32_unscaled[2 * i];
478  in_c[2 * i + 1] = testInput_i32_unscaled[2 * i + 1];
479  in_c[2 * (fftSize - i)] = in_c[2 * i];
480  in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
481  }
482  in_c[0] = testInput_i32_unscaled[0];
483  in_c[1] = 0;
484  in_c[fftSize] = testInput_i32_unscaled[1];
485  in_c[fftSize + 1] = 0;
486  memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int32_t));
487 
488  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int32_t));
489  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int32_t));
490 
491  ne10_fft_c2r_1d_int32_c (out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 0);
492  ne10_fft_c2r_1d_int32_neon (out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 0);
493 
494  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int32_t)));
495  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int32_t)));
496 
497  //conformance test
498  for (i = 0; i < fftSize; i++)
499  {
500  out_c_tmp[i] = (ne10_float32_t) out_c[i];
501  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
502  }
503  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
504  assert_false ( (snr < SNR_THRESHOLD_INT32));
505 
506  /* scaled FFT test */
507  memcpy (in_c, testInput_i32_scaled, fftSize * sizeof (ne10_int32_t));
508  memcpy (in_neon, testInput_i32_scaled, fftSize * sizeof (ne10_int32_t));
509 
510  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
511  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t));
512 
513  ne10_fft_r2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, in_c, cfg, 1);
514  ne10_fft_r2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, in_neon, cfg, 1);
515 
516  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t)));
517  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, (fftSize / 2 + 1) * 2 * sizeof (ne10_int32_t)));
518 
519  //conformance test
520  for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
521  {
522  out_c_tmp[i] = (ne10_float32_t) out_c[i];
523  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
524  }
525  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
526  assert_false ( (snr < SNR_THRESHOLD_INT32));
527 
528  /* IFFT test */
529  for (i = 1; i < (fftSize / 2); i++)
530  {
531  in_c[2 * i] = testInput_i32_scaled[2 * i];
532  in_c[2 * i + 1] = testInput_i32_scaled[2 * i + 1];
533  in_c[2 * (fftSize - i)] = in_c[2 * i];
534  in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
535  }
536  in_c[0] = testInput_i32_scaled[0];
537  in_c[1] = 0;
538  in_c[fftSize] = testInput_i32_scaled[1];
539  in_c[fftSize + 1] = 0;
540  memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int32_t));
541 
542  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int32_t));
543  GUARD_ARRAY_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int32_t));
544 
545  ne10_fft_c2r_1d_int32_c (out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 1);
546  ne10_fft_c2r_1d_int32_neon (out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 1);
547 
548  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_c, fftSize * sizeof (ne10_int32_t)));
549  assert_true (CHECK_ARRAY_GUARD_UINT8 ( (ne10_uint8_t*) out_neon, fftSize * sizeof (ne10_int32_t)));
550 
551  //conformance test
552  for (i = 0; i < fftSize; i++)
553  {
554  out_c_tmp[i] = (ne10_float32_t) out_c[i];
555  out_neon_tmp[i] = (ne10_float32_t) out_neon[i];
556  }
557  snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
558  assert_false ( (snr < SNR_THRESHOLD_INT32));
559 
560 
561  NE10_FREE (cfg);
562  }
563 
564  NE10_FREE (guarded_in_c);
565  NE10_FREE (guarded_in_neon);
566  NE10_FREE (guarded_out_c);
567  NE10_FREE (guarded_out_neon);
568  NE10_FREE (out_c_tmp);
569  NE10_FREE (out_neon_tmp);
570 }
571 
572 void test_fft_r2c_1d_int32_performance()
573 {
574 
575  ne10_int32_t i = 0;
576  ne10_int32_t fftSize = 0;
578  ne10_int32_t test_loop = 0;
579 
580  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
581  fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio");
582 
583  /* init input memory */
584  guarded_in_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
585  guarded_in_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
586  in_c = guarded_in_c + ARRAY_GUARD_LEN;
587  in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
588 
589  /* init dst memory */
590  guarded_out_c = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
591  guarded_out_neon = (ne10_int32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_int32_t));
592  out_c = guarded_out_c + ARRAY_GUARD_LEN;
593  out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
594 
595  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
596  {
597  testInput_i32_unscaled[i] = (ne10_int32_t) (drand48() * 8192) - 4096;
598  testInput_i32_scaled[i] = (ne10_int32_t) (drand48() * NE10_F2I32_MAX) - NE10_F2I32_MAX / 2;
599  }
600  for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
601  {
602  fprintf (stdout, "FFT size %d\n", fftSize);
603 
604  cfg = ne10_fft_alloc_r2c_int32 (fftSize);
605  if (cfg == NULL)
606  {
607  fprintf (stdout, "======ERROR, FFT alloc fails\n");
608  return;
609  }
610  test_loop = TEST_COUNT / fftSize;
611  /* unscaled FFT test */
612  memcpy (in_c, testInput_i32_unscaled, fftSize * sizeof (ne10_int32_t));
613  memcpy (in_neon, testInput_i32_unscaled, fftSize * sizeof (ne10_int32_t));
614 
615  GET_TIME
616  (
617  time_c,
618  {
619  for (i = 0; i < test_loop; i++)
620  ne10_fft_r2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, in_c, cfg, 0);
621  }
622  );
623  GET_TIME
624  (
625  time_neon,
626  {
627  for (i = 0; i < test_loop; i++)
628  ne10_fft_r2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, in_neon, cfg, 0);
629  }
630  );
631 
632  time_speedup = (ne10_float32_t) time_c / time_neon;
633  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
634  ne10_log (__FUNCTION__, "Int32 unscaled RFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
635 
636  /* IFFT test */
637  for (i = 1; i < (fftSize / 2); i++)
638  {
639  in_c[2 * i] = testInput_i32_unscaled[2 * i];
640  in_c[2 * i + 1] = testInput_i32_unscaled[2 * i + 1];
641  in_c[2 * (fftSize - i)] = in_c[2 * i];
642  in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
643  }
644  in_c[0] = testInput_i32_unscaled[0];
645  in_c[1] = 0;
646  in_c[fftSize] = testInput_i32_unscaled[1];
647  in_c[fftSize + 1] = 0;
648  memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int32_t));
649 
650  GET_TIME
651  (
652  time_c,
653  {
654  for (i = 0; i < test_loop; i++)
655  ne10_fft_c2r_1d_int32_c (out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 0);
656  }
657  );
658  GET_TIME
659  (
660  time_neon,
661  {
662  for (i = 0; i < test_loop; i++)
663  ne10_fft_c2r_1d_int32_neon (out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 0);
664  }
665  );
666 
667  time_speedup = (ne10_float32_t) time_c / time_neon;
668  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
669  ne10_log (__FUNCTION__, "Int32 unscaled RIFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
670 
671  /* scaled FFT test */
672  memcpy (in_c, testInput_i32_scaled, fftSize * sizeof (ne10_int32_t));
673  memcpy (in_neon, testInput_i32_scaled, fftSize * sizeof (ne10_int32_t));
674 
675  GET_TIME
676  (
677  time_c,
678  {
679  for (i = 0; i < test_loop; i++)
680  ne10_fft_r2c_1d_int32_c ( (ne10_fft_cpx_int32_t*) out_c, in_c, cfg, 1);
681  }
682  );
683  GET_TIME
684  (
685  time_neon,
686  {
687  for (i = 0; i < test_loop; i++)
688  ne10_fft_r2c_1d_int32_neon ( (ne10_fft_cpx_int32_t*) out_neon, in_neon, cfg, 1);
689  }
690  );
691 
692  time_speedup = (ne10_float32_t) time_c / time_neon;
693  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
694  ne10_log (__FUNCTION__, "Int32 scaled RFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
695 
696  /* IFFT test */
697  for (i = 1; i < (fftSize / 2); i++)
698  {
699  in_c[2 * i] = testInput_i32_scaled[2 * i];
700  in_c[2 * i + 1] = testInput_i32_scaled[2 * i + 1];
701  in_c[2 * (fftSize - i)] = in_c[2 * i];
702  in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
703  }
704  in_c[0] = testInput_i32_scaled[0];
705  in_c[1] = 0;
706  in_c[fftSize] = testInput_i32_scaled[1];
707  in_c[fftSize + 1] = 0;
708  memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_int32_t));
709 
710  GET_TIME
711  (
712  time_c,
713  {
714  for (i = 0; i < test_loop; i++)
715  ne10_fft_c2r_1d_int32_c (out_c, (ne10_fft_cpx_int32_t*) in_c, cfg, 1);
716  }
717  );
718  GET_TIME
719  (
720  time_neon,
721  {
722  for (i = 0; i < test_loop; i++)
723  ne10_fft_c2r_1d_int32_neon (out_neon, (ne10_fft_cpx_int32_t*) in_neon, cfg, 1);
724  }
725  );
726 
727  time_speedup = (ne10_float32_t) time_c / time_neon;
728  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
729  ne10_log (__FUNCTION__, "Int32 scaled RIFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
730 
731  NE10_FREE (cfg);
732  }
733 
734  NE10_FREE (guarded_in_c);
735  NE10_FREE (guarded_in_neon);
736  NE10_FREE (guarded_out_c);
737  NE10_FREE (guarded_out_neon);
738 }
739 
740 void test_fft_c2c_1d_int32()
741 {
742 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
743  test_fft_c2c_1d_int32_conformance();
744 #endif
745 
746 #if defined (PERFORMANCE_TEST)
747  test_fft_c2c_1d_int32_performance();
748 #endif
749 }
750 
751 void test_fft_r2c_1d_int32()
752 {
753 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
754  test_fft_r2c_1d_int32_conformance();
755 #endif
756 
757 #if defined (PERFORMANCE_TEST)
758  test_fft_r2c_1d_int32_performance();
759 #endif
760 }
761 
762 static void my_test_setup (void)
763 {
764  ne10_log_buffer_ptr = ne10_log_buffer;
765 }
766 
767 void test_fixture_fft_c2c_1d_int32 (void)
768 {
769  test_fixture_start(); // starts a fixture
770 
771  fixture_setup (my_test_setup);
772 
773  run_test (test_fft_c2c_1d_int32); // run tests
774 
775  test_fixture_end(); // ends a fixture
776 }
777 
778 void test_fixture_fft_r2c_1d_int32 (void)
779 {
780  test_fixture_start(); // starts a fixture
781 
782  fixture_setup (my_test_setup);
783 
784  run_test (test_fft_r2c_1d_int32); // run tests
785 
786  test_fixture_end(); // ends a fixture
787 }
void ne10_fft_c2c_1d_int32_neon(ne10_fft_cpx_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_cfg_int32_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2c_1d_int32 using NEON SIMD capabilities.
uint8_t ne10_uint8_t
Definition: NE10_types.h:73
void ne10_fft_c2r_1d_int32_neon(ne10_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2r_1d_int32 using NEON SIMD capabilities.
#define TEST_LENGTH_SAMPLES
int32_t ne10_int32_t
Definition: NE10_types.h:76
void my_test_setup(void)
float ne10_float32_t
Definition: NE10_types.h:80
void ne10_fft_r2c_1d_int32_c(ne10_fft_cpx_int32_t *fout, ne10_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_r2c_1d_int32 using plain C.
int64_t ne10_int64_t
Definition: NE10_types.h:78
ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_neon(ne10_int32_t nfft)
Specific implementation of ne10_fft_alloc_c2c_int32 for ne10_fft_c2c_1d_int32_neon.
Definition: NE10_fft.c:451
#define SNR_THRESHOLD_INT32
void ne10_fft_r2c_1d_int32_neon(ne10_fft_cpx_int32_t *fout, ne10_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_r2c_1d_int32 using NEON SIMD capabilities.
void ne10_fft_c2c_1d_int32_c(ne10_fft_cpx_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_cfg_int32_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2c_1d_int32 using plain C.
#define NE10_FREE(p)
Definition: NE10_macros.h:54
Structure for the 32-bit fixed point FFT function.
Definition: NE10_types.h:325
ne10_fft_r2c_cfg_int32_t ne10_fft_alloc_r2c_int32(ne10_int32_t nfft)
Creates a configuration structure for variants of ne10_fft_r2c_1d_int32 and ne10_fft_c2r_1d_int32.
#define NE10_FFT_PARA_LEVEL
#define TEST_COUNT
#define NE10_F2I32_MAX
Definition: NE10_macros.h:81
#define MIN_LENGTH_SAMPLES_REAL
#define NE10_MALLOC
Definition: NE10_macros.h:53
#define MIN_LENGTH_SAMPLES_CPX
ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_c(ne10_int32_t nfft)
Specific implementation of ne10_fft_alloc_c2c_int32 for ne10_fft_c2c_1d_int32_c.
void ne10_fft_c2r_1d_int32_c(ne10_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2r_1d_int32 using plain C.