39 #include "unit_test_common.h" 47 #define TEST_LENGTH_SAMPLES (4096) 48 #define MIN_LENGTH_SAMPLES_CPX (2) 49 #define MIN_LENGTH_SAMPLES_REAL (MIN_LENGTH_SAMPLES_CPX) 51 #define SNR_THRESHOLD_INT16 15.0f 53 #define TEST_COUNT 250000 79 void test_fft_c2c_1d_int16_conformance()
88 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
93 in_c = guarded_in_c + ARRAY_GUARD_LEN;
94 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
99 out_c = guarded_out_c + ARRAY_GUARD_LEN;
100 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
107 testInput_i16_unscaled[i] = (
ne10_int32_t) (drand48() * 1024) - 512;
112 fprintf (stdout,
"FFT size %d\n", fftSize);
116 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
121 memcpy (in_c, testInput_i16_unscaled, 2 * fftSize *
sizeof (
ne10_int16_t));
122 memcpy (in_neon, testInput_i16_unscaled, 2 * fftSize *
sizeof (
ne10_int16_t));
134 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
139 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
143 memcpy (in_c, testInput_i16_unscaled, 2 * fftSize *
sizeof (
ne10_int16_t));
144 memcpy (in_neon, testInput_i16_unscaled, 2 * fftSize *
sizeof (
ne10_int16_t));
156 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
161 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
165 memcpy (in_c, testInput_i16_scaled, 2 * fftSize *
sizeof (
ne10_int16_t));
166 memcpy (in_neon, testInput_i16_scaled, 2 * fftSize *
sizeof (
ne10_int16_t));
178 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
183 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
187 memcpy (in_c, testInput_i16_scaled, 2 * fftSize *
sizeof (
ne10_int16_t));
188 memcpy (in_neon, testInput_i16_scaled, 2 * fftSize *
sizeof (
ne10_int16_t));
200 for (i = 0; i < TEST_LENGTH_SAMPLES * 2; i++)
205 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
219 void test_fft_c2c_1d_int16_performance()
227 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
228 fprintf (stdout,
"%25s%20s%20s%20s%20s\n",
"FFT Length",
"C Time (micro-s)",
"NEON Time (micro-s)",
"Time Savings",
"Performance Ratio");
233 in_c = guarded_in_c + ARRAY_GUARD_LEN;
234 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
239 out_c = guarded_out_c + ARRAY_GUARD_LEN;
240 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
244 testInput_i16_unscaled[i] = (
ne10_int16_t) (drand48() * 1024) - 512;
249 fprintf (stdout,
"FFT size %d\n", fftSize);
253 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
259 memcpy (in_c, testInput_i16_unscaled, 2 * fftSize *
sizeof (
ne10_int16_t));
260 memcpy (in_neon, testInput_i16_unscaled, 2 * fftSize *
sizeof (
ne10_int16_t));
266 for (i = 0; i < test_loop; i++)
274 for (i = 0; i < test_loop; i++)
280 time_savings = ( ( (
ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
281 ne10_log (__FUNCTION__,
"Int16 unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
284 memcpy (in_c, out_c, 2 * fftSize *
sizeof (
ne10_int16_t));
285 memcpy (in_neon, out_c, 2 * fftSize *
sizeof (
ne10_int16_t));
291 for (i = 0; i < test_loop; i++)
299 for (i = 0; i < test_loop; i++)
305 time_savings = ( ( (
ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
306 ne10_log (__FUNCTION__,
"Int16 unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
308 memcpy (in_c, testInput_i16_scaled, 2 * fftSize *
sizeof (
ne10_int16_t));
309 memcpy (in_neon, testInput_i16_scaled, 2 * fftSize *
sizeof (
ne10_int16_t));
315 for (i = 0; i < test_loop; i++)
323 for (i = 0; i < test_loop; i++)
329 time_savings = ( ( (
ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
330 ne10_log (__FUNCTION__,
"Int16 scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
333 memcpy (in_c, out_c, 2 * fftSize *
sizeof (
ne10_int16_t));
334 memcpy (in_neon, out_c, 2 * fftSize *
sizeof (
ne10_int16_t));
340 for (i = 0; i < test_loop; i++)
348 for (i = 0; i < test_loop; i++)
354 time_savings = ( ( (
ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
355 ne10_log (__FUNCTION__,
"Int16 scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
366 void test_fft_r2c_1d_int16_conformance()
375 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
380 in_c = guarded_in_c + ARRAY_GUARD_LEN;
381 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
386 out_c = guarded_out_c + ARRAY_GUARD_LEN;
387 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
394 testInput_i16_unscaled[i] = (
ne10_int16_t) (drand48() * 1024) - 512;
399 fprintf (stdout,
"RFFT size %d\n", fftSize);
403 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
408 memcpy (in_c, testInput_i16_unscaled, fftSize *
sizeof (
ne10_int16_t));
409 memcpy (in_neon, testInput_i16_unscaled, fftSize *
sizeof (
ne10_int16_t));
421 for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
426 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
430 for (i = 1; i < (fftSize / 2); i++)
432 in_c[2 * i] = testInput_i16_unscaled[2 * i];
433 in_c[2 * i + 1] = testInput_i16_unscaled[2 * i + 1];
434 in_c[2 * (fftSize - i)] = in_c[2 * i];
435 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
437 in_c[0] = testInput_i16_unscaled[0];
439 in_c[fftSize] = testInput_i16_unscaled[1];
440 in_c[fftSize + 1] = 0;
441 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (
ne10_int16_t));
453 for (i = 0; i < fftSize; i++)
458 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
462 memcpy (in_c, testInput_i16_scaled, fftSize *
sizeof (
ne10_int16_t));
463 memcpy (in_neon, testInput_i16_scaled, fftSize *
sizeof (
ne10_int16_t));
475 for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
480 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
484 for (i = 1; i < (fftSize / 2); i++)
486 in_c[2 * i] = testInput_i16_scaled[2 * i];
487 in_c[2 * i + 1] = testInput_i16_scaled[2 * i + 1];
488 in_c[2 * (fftSize - i)] = in_c[2 * i];
489 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
491 in_c[0] = testInput_i16_scaled[0];
493 in_c[fftSize] = testInput_i16_scaled[1];
494 in_c[fftSize + 1] = 0;
495 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (
ne10_int16_t));
507 for (i = 0; i < fftSize; i++)
512 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
526 void test_fft_r2c_1d_int16_performance()
534 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
535 fprintf (stdout,
"%25s%20s%20s%20s%20s\n",
"FFT Length",
"C Time (micro-s)",
"NEON Time (micro-s)",
"Time Savings",
"Performance Ratio");
540 in_c = guarded_in_c + ARRAY_GUARD_LEN;
541 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
546 out_c = guarded_out_c + ARRAY_GUARD_LEN;
547 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
551 testInput_i16_unscaled[i] = (
ne10_int16_t) (drand48() * 1024) - 512;
556 fprintf (stdout,
"FFT size %d\n", fftSize);
560 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
566 memcpy (in_c, testInput_i16_unscaled , fftSize *
sizeof (
ne10_int16_t));
567 memcpy (in_neon, testInput_i16_unscaled , fftSize *
sizeof (
ne10_int16_t));
573 for (i = 0; i < test_loop; i++)
581 for (i = 0; i < test_loop; i++)
587 time_savings = ( ( (
ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
588 ne10_log (__FUNCTION__,
"Int16 unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
591 for (i = 1; i < (fftSize / 2); i++)
593 in_c[2 * i] = testInput_i16_unscaled[2 * i];
594 in_c[2 * i + 1] = testInput_i16_unscaled[2 * i + 1];
595 in_c[2 * (fftSize - i)] = in_c[2 * i];
596 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
598 in_c[0] = testInput_i16_unscaled[0];
600 in_c[fftSize] = testInput_i16_unscaled[1];
601 in_c[fftSize + 1] = 0;
602 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (
ne10_int16_t));
608 for (i = 0; i < test_loop; i++)
616 for (i = 0; i < test_loop; i++)
622 time_savings = ( ( (
ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
623 ne10_log (__FUNCTION__,
"Int16 unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
626 memcpy (in_c, testInput_i16_scaled , fftSize *
sizeof (
ne10_int16_t));
627 memcpy (in_neon, testInput_i16_scaled , fftSize *
sizeof (
ne10_int16_t));
633 for (i = 0; i < test_loop; i++)
641 for (i = 0; i < test_loop; i++)
647 time_savings = ( ( (
ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
648 ne10_log (__FUNCTION__,
"Int16 scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
651 for (i = 1; i < (fftSize / 2); i++)
653 in_c[2 * i] = testInput_i16_scaled[2 * i];
654 in_c[2 * i + 1] = testInput_i16_scaled[2 * i + 1];
655 in_c[2 * (fftSize - i)] = in_c[2 * i];
656 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
658 in_c[0] = testInput_i16_scaled[0];
660 in_c[fftSize] = testInput_i16_scaled[1];
661 in_c[fftSize + 1] = 0;
662 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (
ne10_int16_t));
668 for (i = 0; i < test_loop; i++)
676 for (i = 0; i < test_loop; i++)
682 time_savings = ( ( (
ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
683 ne10_log (__FUNCTION__,
"Int16 scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
694 void test_fft_c2c_1d_int16()
696 #if defined (SMOKE_TEST)||(REGRESSION_TEST) 697 test_fft_c2c_1d_int16_conformance();
700 #if defined (PERFORMANCE_TEST) 701 test_fft_c2c_1d_int16_performance();
705 void test_fft_r2c_1d_int16()
707 #if defined (SMOKE_TEST)||(REGRESSION_TEST) 708 test_fft_r2c_1d_int16_conformance();
711 #if defined (PERFORMANCE_TEST) 712 test_fft_r2c_1d_int16_performance();
718 ne10_log_buffer_ptr = ne10_log_buffer;
721 void test_fixture_fft_c2c_1d_int16 (
void)
723 test_fixture_start();
727 run_test (test_fft_c2c_1d_int16);
732 void test_fixture_fft_r2c_1d_int16 (
void)
734 test_fixture_start();
738 run_test (test_fft_r2c_1d_int16);
Structure for the 16-bit fixed point FFT function.
#define MIN_LENGTH_SAMPLES_REAL
void ne10_fft_c2r_1d_int16_neon(ne10_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2r_1d_int16 using NEON SIMD capabilities.
void ne10_fft_c2r_1d_int16_c(ne10_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2r_1d_int16 using plain C.
void ne10_fft_r2c_1d_int16_neon(ne10_fft_cpx_int16_t *fout, ne10_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_r2c_1d_int16 using NEON SIMD capabilities.
void ne10_fft_c2c_1d_int16_c(ne10_fft_cpx_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_cfg_int16_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2c_1d_int16 using plain C.
void ne10_fft_r2c_1d_int16_c(ne10_fft_cpx_int16_t *fout, ne10_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_r2c_1d_int16 using plain C.
#define TEST_LENGTH_SAMPLES
#define MIN_LENGTH_SAMPLES_CPX
ne10_fft_r2c_cfg_int16_t ne10_fft_alloc_r2c_int16(ne10_int32_t nfft)
Creates a configuration structure for variants of ne10_fft_r2c_1d_int16 and ne10_fft_c2r_1d_int16.
#define SNR_THRESHOLD_INT16
ne10_fft_cfg_int16_t ne10_fft_alloc_c2c_int16(ne10_int32_t nfft)
Creates a configuration structure for variants of ne10_fft_c2c_1d_int16.
void ne10_fft_c2c_1d_int16_neon(ne10_fft_cpx_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_cfg_int16_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2c_1d_int16 using NEON SIMD capabilities.