Project Ne10
An open, optimized software library for the ARM architecture.
test_suite_fft_float32.c
Go to the documentation of this file.
1 /*
2  * Copyright 2013-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : test_suite_fft_float32.c
30  */
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <math.h>
35 #include <string.h>
36 
37 #include "NE10_dsp.h"
38 #include "NE10_macros.h"
39 #include "seatest.h"
40 #include "unit_test_common.h"
41 
42 
43 /* ----------------------------------------------------------------------
44 ** Global defines
45 ** ------------------------------------------------------------------- */
46 
47 /* Max FFT Length and double buffer for real and imag */
48 #define TEST_LENGTH_SAMPLES (32768)
49 #define MIN_LENGTH_SAMPLES_CPX (2)
50 #define MIN_LENGTH_SAMPLES_REAL (MIN_LENGTH_SAMPLES_CPX)
51 #define SNR_THRESHOLD_FLOAT32 90.0f
52 
53 #define TEST_COUNT 10000000
54 
55 #define NE10_FFT_PARA_LEVEL 4
56 
57 /* ----------------------------------------------------------------------
58 ** Test input data for F32
59 ** Generated by the MATLAB rand() function
60 ** ------------------------------------------------------------------- */
61 
62 static ne10_float32_t testInput_f32[TEST_LENGTH_SAMPLES * 2];
63 
64 /* ----------------------------------------------------------------------
65 ** Defines each of the tests performed
66 ** ------------------------------------------------------------------- */
67 
68 //input and output
69 static ne10_float32_t * guarded_in_c = NULL;
70 static ne10_float32_t * guarded_in_neon = NULL;
71 static ne10_float32_t * in_c = NULL;
72 static ne10_float32_t * in_neon = NULL;
73 
74 static ne10_float32_t * guarded_out_c = NULL;
75 static ne10_float32_t * guarded_out_neon = NULL;
76 static ne10_float32_t * out_c = NULL;
77 static ne10_float32_t * out_neon = NULL;
78 
79 static ne10_float32_t snr = 0.0f;
80 
81 static ne10_int64_t time_c = 0;
82 static ne10_int64_t time_neon = 0;
83 static ne10_float32_t time_speedup = 0.0f;
84 static ne10_float32_t time_savings = 0.0f;
85 
86 static ne10_fft_cfg_float32_t cfg_c;
87 static ne10_fft_cfg_float32_t cfg_neon;
88 
89 static ne10_int32_t test_c2c_alloc (ne10_int32_t fftSize);
90 
91 void test_fft_c2c_1d_float32_conformance()
92 {
93  ne10_int32_t baseSize = 0, fftSize = 0;
94  ne10_int32_t flag_result = NE10_OK;
95  ne10_int32_t factor;
96 
97  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
98 
99  for (baseSize = MIN_LENGTH_SAMPLES_CPX; baseSize <= TEST_LENGTH_SAMPLES; baseSize *= 2)
100  {
101  factor = 2;
102  fftSize = baseSize;
103  while (factor && fftSize <= TEST_LENGTH_SAMPLES)
104  {
105  if (fftSize == 2 || fftSize % NE10_FFT_PARA_LEVEL == 0)
106  {
107  fprintf (stdout, "FFT size %d\n", fftSize);
108  flag_result = test_c2c_alloc (fftSize);
109  if (flag_result == NE10_ERR)
110  {
111  return;
112  }
113 
114  /* FFT test */
115  memcpy (in_c, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
116  memcpy (in_neon, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
117 
118  GUARD_ARRAY (out_c, fftSize * 2);
119  GUARD_ARRAY (out_neon, fftSize * 2);
120 
122  ne10_fft_c2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg_neon, 0);
123 
124  assert_true (CHECK_ARRAY_GUARD (out_c, fftSize * 2));
125  assert_true (CHECK_ARRAY_GUARD (out_neon, fftSize * 2));
126 
127  //conformance test
128  snr = CAL_SNR_FLOAT32 (out_c, out_neon, fftSize * 2);
129  assert_false ( (snr < SNR_THRESHOLD));
130 
131  /* IFFT test */
132  memcpy (in_c, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
133  memcpy (in_neon, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
134 
135  GUARD_ARRAY (out_c, fftSize * 2);
136  GUARD_ARRAY (out_neon, fftSize * 2);
137 
139  ne10_fft_c2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg_neon, 1);
140 
141  assert_true (CHECK_ARRAY_GUARD (out_c, fftSize * 2));
142  assert_true (CHECK_ARRAY_GUARD (out_neon, fftSize * 2));
143 
144  CHECK_ARRAY_GUARD (out_c, fftSize * 2);
145  CHECK_ARRAY_GUARD (out_neon, fftSize * 2);
146 
147  //conformance test
148  snr = CAL_SNR_FLOAT32 (out_c, out_neon, fftSize * 2);
149  assert_false ( (snr < SNR_THRESHOLD));
150 
151  NE10_FREE (cfg_c);
152  NE10_FREE (cfg_neon);
153  }
154 
155  switch (factor)
156  {
157  case 2:
158  factor = 3;
159  fftSize += (fftSize / 2);
160  break;
161  case 3:
162  factor = 5;
163  fftSize += (fftSize / 3) * 2;
164  break;
165  case 5:
166  factor = 15;
167  fftSize += (fftSize * 2);
168  break;
169  case 15:
170  factor = 0;
171  break;
172  }
173  }
174 
175  }
176 }
177 
178 void test_fft_c2c_1d_float32_performance()
179 {
180  ne10_int32_t i = 0;
181  ne10_int32_t fftSize = 0;
182  ne10_int32_t flag_result = NE10_OK;
183  ne10_int32_t test_loop = 0;
184 
185  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
186  fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio");
187 
188  for (fftSize = MIN_LENGTH_SAMPLES_CPX; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
189  {
190  fprintf (stdout, "FFT size %d\n", fftSize);
191 
192  /* FFT test */
193  memcpy (in_c, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
194  memcpy (in_neon, testInput_f32, 2 * fftSize * sizeof (ne10_float32_t));
195  flag_result = test_c2c_alloc (fftSize);
196  if (flag_result == NE10_ERR)
197  {
198  return;
199  }
200 
201  test_loop = TEST_COUNT / fftSize;
202 
203  GET_TIME
204  (
205  time_c,
206  {
207  for (i = 0; i < test_loop; i++)
209  }
210  );
211  GET_TIME
212  (
213  time_neon,
214  {
215  for (i = 0; i < test_loop; i++)
216  ne10_fft_c2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg_neon, 0);
217  }
218  );
219 
220  time_speedup = (ne10_float32_t) time_c / time_neon;
221  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
222  ne10_log (__FUNCTION__, "Float FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
223 
224  /* IFFT test */
225  memcpy (in_c, out_c, 2 * fftSize * sizeof (ne10_float32_t));
226  memcpy (in_neon, out_c, 2 * fftSize * sizeof (ne10_float32_t));
227 
228  GET_TIME
229  (
230  time_c,
231  {
232  for (i = 0; i < test_loop; i++)
234  }
235  );
236  GET_TIME
237  (
238  time_neon,
239  {
240  for (i = 0; i < test_loop; i++)
241  ne10_fft_c2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg_neon, 1);
242  }
243  );
244 
245  time_speedup = (ne10_float32_t) time_c / time_neon;
246  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
247  ne10_log (__FUNCTION__, "Float FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
248 
249  NE10_FREE (cfg_c);
250  NE10_FREE (cfg_neon);
251  }
252 }
253 
254 void test_fft_r2c_1d_float32_conformance()
255 {
256 
257  ne10_int32_t i = 0;
258  ne10_int32_t fftSize = 0;
260 
261  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
262 
263  for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
264  {
265  fprintf (stdout, "FFT size %d\n", fftSize);
266 
267  /* FFT test */
268  memcpy (in_c, testInput_f32, fftSize * sizeof (ne10_float32_t));
269  memcpy (in_neon, testInput_f32, fftSize * sizeof (ne10_float32_t));
270  cfg = ne10_fft_alloc_r2c_float32 (fftSize);
271  if (cfg == NULL)
272  {
273  fprintf (stdout, "======ERROR, FFT alloc fails\n");
274  return;
275  }
276 
277  GUARD_ARRAY (out_c, (fftSize / 2 + 1) * 2);
278  GUARD_ARRAY (out_neon, (fftSize / 2 + 1) * 2);
279 
280  ne10_fft_r2c_1d_float32_c ( (ne10_fft_cpx_float32_t*) out_c, in_c, cfg);
281  ne10_fft_r2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, in_neon, cfg);
282 
283  assert_true (CHECK_ARRAY_GUARD (out_c, (fftSize / 2 + 1) * 2));
284  assert_true (CHECK_ARRAY_GUARD (out_neon, (fftSize / 2 + 1) * 2));
285 
286  //conformance test
287  snr = CAL_SNR_FLOAT32 (out_c, out_neon, (fftSize / 2 + 1) * 2);
288  assert_false ( (snr < SNR_THRESHOLD_FLOAT32));
289 
290  /* IFFT test */
291  for (i = 1; i < (fftSize / 2); i++)
292  {
293  in_c[2 * i] = testInput_f32[2 * i];
294  in_c[2 * i + 1] = testInput_f32[2 * i + 1];
295  in_c[2 * (fftSize - i)] = in_c[2 * i];
296  in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
297  }
298  in_c[0] = testInput_f32[0];
299  in_c[1] = 0;
300  in_c[fftSize] = testInput_f32[1];
301  in_c[fftSize + 1] = 0;
302  memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_float32_t));
303 
304  GUARD_ARRAY (out_c, fftSize);
305  GUARD_ARRAY (out_neon, fftSize);
306 
308  ne10_fft_c2r_1d_float32_neon (out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg);
309 
310  assert_true (CHECK_ARRAY_GUARD (out_c, fftSize));
311  assert_true (CHECK_ARRAY_GUARD (out_neon, fftSize));
312 
313  //conformance test
314  snr = CAL_SNR_FLOAT32 (out_c, out_neon, fftSize);
315  assert_false ( (snr < SNR_THRESHOLD_FLOAT32));
316 
317  NE10_FREE (cfg);
318  }
319 }
320 
321 void test_fft_r2c_1d_float32_performance()
322 {
323 
324  ne10_int32_t i = 0;
325  ne10_int32_t fftSize = 0;
327  ne10_int32_t test_loop = 0;
328 
329  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
330  fprintf (stdout, "%25s%20s%20s%20s%20s\n", "FFT Length", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio");
331 
332  for (fftSize = MIN_LENGTH_SAMPLES_REAL; fftSize <= TEST_LENGTH_SAMPLES; fftSize *= 2)
333  {
334  fprintf (stdout, "FFT size %d\n", fftSize);
335 
336  /* FFT test */
337  memcpy (in_c, testInput_f32, fftSize * sizeof (ne10_float32_t));
338  memcpy (in_neon, testInput_f32, fftSize * sizeof (ne10_float32_t));
339  cfg = ne10_fft_alloc_r2c_float32 (fftSize);
340  if (cfg == NULL)
341  {
342  fprintf (stdout, "======ERROR, FFT alloc fails\n");
343  return;
344  }
345  test_loop = TEST_COUNT / fftSize;
346 
347  GET_TIME
348  (
349  time_c,
350  {
351  for (i = 0; i < test_loop; i++)
352  ne10_fft_r2c_1d_float32_c ( (ne10_fft_cpx_float32_t*) out_c, in_c, cfg);
353  }
354  );
355  GET_TIME
356  (
357  time_neon,
358  {
359  for (i = 0; i < test_loop; i++)
360  ne10_fft_r2c_1d_float32_neon ( (ne10_fft_cpx_float32_t*) out_neon, in_neon, cfg);
361  }
362  );
363 
364  time_speedup = (ne10_float32_t) time_c / time_neon;
365  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
366  ne10_log (__FUNCTION__, "Float FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
367 
368  /* IFFT test */
369  for (i = 1; i < (fftSize / 2); i++)
370  {
371  in_c[2 * i] = testInput_f32[2 * i];
372  in_c[2 * i + 1] = testInput_f32[2 * i + 1];
373  in_c[2 * (fftSize - i)] = in_c[2 * i];
374  in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
375  }
376  in_c[0] = testInput_f32[0];
377  in_c[1] = 0;
378  in_c[fftSize] = testInput_f32[1];
379  in_c[fftSize + 1] = 0;
380  memcpy (in_neon, in_c, fftSize * 2 * sizeof (ne10_float32_t));
381 
382  GET_TIME
383  (
384  time_c,
385  {
386  for (i = 0; i < test_loop; i++)
388  }
389  );
390  GET_TIME
391  (
392  time_neon,
393  {
394  for (i = 0; i < test_loop; i++)
395  ne10_fft_c2r_1d_float32_neon (out_neon, (ne10_fft_cpx_float32_t*) in_neon, cfg);
396  }
397  );
398 
399  time_speedup = (ne10_float32_t) time_c / time_neon;
400  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
401  ne10_log (__FUNCTION__, "Float FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
402 
403  NE10_FREE (cfg);
404  }
405 }
406 
407 static void my_test_setup (void)
408 {
409  ne10_log_buffer_ptr = ne10_log_buffer;
410  ne10_int32_t i;
411 
412  /* init input memory */
413  guarded_in_c = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_float32_t));
414  guarded_in_neon = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_float32_t));
415  in_c = guarded_in_c + ARRAY_GUARD_LEN;
416  in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
417 
418  /* init dst memory */
419  guarded_out_c = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_float32_t));
420  guarded_out_neon = (ne10_float32_t*) NE10_MALLOC ( (TEST_LENGTH_SAMPLES * 2 + ARRAY_GUARD_LEN * 2) * sizeof (ne10_float32_t));
421  out_c = guarded_out_c + ARRAY_GUARD_LEN;
422  out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
423 
424  for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
425  {
426  testInput_f32[i] = (ne10_float32_t) (drand48() * 32768.0f - 16384.0f);
427  }
428 }
429 
430 static void my_test_teardown (void)
431 {
432  NE10_FREE (guarded_in_c);
433  NE10_FREE (guarded_in_neon);
434  NE10_FREE (guarded_out_c);
435  NE10_FREE (guarded_out_neon);
436 }
437 
438 void test_fft_c2c_1d_float32()
439 {
440 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
441  test_fft_c2c_1d_float32_conformance();
442 #endif
443 
444 #if defined (PERFORMANCE_TEST)
445  test_fft_c2c_1d_float32_performance();
446 #endif
447 }
448 
449 void test_fft_r2c_1d_float32()
450 {
451 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
452  test_fft_r2c_1d_float32_conformance();
453 #endif
454 
455 #if defined (PERFORMANCE_TEST)
456  test_fft_r2c_1d_float32_performance();
457 #endif
458 }
459 
460 void test_fixture_fft_c2c_1d_float32 (void)
461 {
462  test_fixture_start(); // starts a fixture
463 
464  fixture_setup (my_test_setup);
465 
466  run_test (test_fft_c2c_1d_float32); // run tests
467 
468  fixture_teardown(my_test_teardown);
469 
470  test_fixture_end(); // ends a fixture
471 }
472 
473 void test_fixture_fft_r2c_1d_float32 (void)
474 {
475  test_fixture_start(); // starts a fixture
476 
477  fixture_setup (my_test_setup);
478 
479  run_test (test_fft_r2c_1d_float32); // run tests
480 
481  fixture_teardown(my_test_teardown);
482 
483  test_fixture_end(); // ends a fixture
484 }
485 
486 ne10_int32_t test_c2c_alloc (ne10_int32_t fftSize)
487 {
488  NE10_FREE (cfg_c);
489  NE10_FREE (cfg_neon);
490 
491  cfg_c = ne10_fft_alloc_c2c_float32_c (fftSize);
492  if (cfg_c == NULL)
493  {
494  fprintf (stdout, "======ERROR, FFT alloc fails\n");
495  return NE10_ERR;
496  }
497 
498  cfg_neon = ne10_fft_alloc_c2c_float32_neon (fftSize);
499  if (cfg_neon == NULL)
500  {
501  NE10_FREE (cfg_c);
502  fprintf (stdout, "======ERROR, FFT alloc fails\n");
503  return NE10_ERR;
504  }
505  return NE10_OK;
506 }
void ne10_fft_c2r_1d_float32_c(ne10_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Specific implementation of ne10_fft_c2r_1d_float32 using plain C.
ne10_fft_r2c_cfg_float32_t ne10_fft_alloc_r2c_float32(ne10_int32_t nfft)
Creates a configuration structure for variants of ne10_fft_r2c_1d_float32 and ne10_fft_c2r_1d_float32...
int32_t ne10_int32_t
Definition: NE10_types.h:76
void ne10_fft_c2c_1d_float32_neon(ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_cfg_float32_t cfg, ne10_int32_t inverse_fft)
Specific implementation of ne10_fft_c2c_1d_float32 using NEON SIMD capabilities.
void my_test_setup(void)
float ne10_float32_t
Definition: NE10_types.h:80
ne10_fft_cfg_float32_t ne10_fft_alloc_c2c_float32_neon(ne10_int32_t nfft)
Specific implementation of ne10_fft_alloc_c2c_float32 for ne10_fft_c2c_1d_float32_neon.
Definition: NE10_fft.c:348
Structure for the floating point FFT state.
Definition: NE10_types.h:239
int64_t ne10_int64_t
Definition: NE10_types.h:78
ne10_fft_cfg_float32_t ne10_fft_alloc_c2c_float32_c(ne10_int32_t nfft)
Specific implementation of ne10_fft_alloc_c2c_float32 for ne10_fft_c2c_1d_float32_c.
#define NE10_FREE(p)
Definition: NE10_macros.h:54
#define MIN_LENGTH_SAMPLES_CPX
void ne10_fft_c2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_cfg_float32_t cfg, ne10_int32_t inverse_fft)
Specific implementation of ne10_fft_c2c_1d_float32 using plain C.
#define SNR_THRESHOLD_FLOAT32
#define NE10_MALLOC
Definition: NE10_macros.h:53
void ne10_fft_c2r_1d_float32_neon(ne10_float32_t *fout, ne10_fft_cpx_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Specific implementation of ne10_fft_c2r_1d_float32 using NEON SIMD capabilities.
#define NE10_ERR
Definition: NE10_types.h:66
#define NE10_OK
Definition: NE10_types.h:65
#define TEST_LENGTH_SAMPLES
#define MIN_LENGTH_SAMPLES_REAL
void ne10_fft_r2c_1d_float32_c(ne10_fft_cpx_float32_t *fout, ne10_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Specific implementation of ne10_fft_r2c_1d_float32 using plain C.
#define TEST_COUNT
#define NE10_FFT_PARA_LEVEL
void ne10_fft_r2c_1d_float32_neon(ne10_fft_cpx_float32_t *fout, ne10_float32_t *fin, ne10_fft_r2c_cfg_float32_t cfg)
Specific implementation of ne10_fft_r2c_1d_float32 using NEON SIMD capabilities.