39 #include "unit_test_common.h" 47 #define TEST_LENGTH_SAMPLES (32768) 48 #define MIN_LENGTH_SAMPLES_CPX (2) 49 #define MIN_LENGTH_SAMPLES_REAL (MIN_LENGTH_SAMPLES_CPX) 51 #define SNR_THRESHOLD_INT32 25.0f 53 #define TEST_COUNT 250000 55 #define NE10_FFT_PARA_LEVEL 4 81 void test_fft_c2c_1d_int32_conformance()
92 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
97 in_c = guarded_in_c + ARRAY_GUARD_LEN;
98 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
103 out_c = guarded_out_c + ARRAY_GUARD_LEN;
104 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
111 testInput_i32_unscaled[i] = (
ne10_int32_t) (drand48() * 8192) - 4096;
119 while (factor && fftSize <= TEST_LENGTH_SAMPLES)
123 fprintf (stdout,
"FFT size %d\n", fftSize);
128 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
133 if (cfg_neon == NULL)
136 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
141 memcpy (in_c, testInput_i32_unscaled, 2 * fftSize *
sizeof (
ne10_int32_t));
142 memcpy (in_neon, testInput_i32_unscaled, 2 * fftSize *
sizeof (
ne10_int32_t));
152 for (i = 0; i < fftSize * 2; i++)
157 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
161 memcpy (in_c, testInput_i32_unscaled, 2 * fftSize *
sizeof (
ne10_int32_t));
162 memcpy (in_neon, testInput_i32_unscaled, 2 * fftSize *
sizeof (
ne10_int32_t));
172 for (i = 0; i < fftSize * 2; i++)
177 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
181 memcpy (in_c, testInput_i32_scaled, 2 * fftSize *
sizeof (
ne10_int32_t));
182 memcpy (in_neon, testInput_i32_scaled, 2 * fftSize *
sizeof (
ne10_int32_t));
192 for (i = 0; i < fftSize * 2; i++)
197 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
201 memcpy (in_c, testInput_i32_scaled, 2 * fftSize *
sizeof (
ne10_int32_t));
202 memcpy (in_neon, testInput_i32_scaled, 2 * fftSize *
sizeof (
ne10_int32_t));
212 for (i = 0; i < fftSize * 2; i++)
217 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize * 2);
228 fftSize += (fftSize / 2);
232 fftSize += (fftSize / 3) * 2;
236 fftSize += (fftSize * 2);
253 void test_fft_c2c_1d_int32_performance()
262 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
263 fprintf (stdout,
"%25s%20s%20s%20s%20s\n",
"FFT Length",
"C Time (micro-s)",
"NEON Time (micro-s)",
"Time Savings",
"Performance Ratio");
268 in_c = guarded_in_c + ARRAY_GUARD_LEN;
269 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
274 out_c = guarded_out_c + ARRAY_GUARD_LEN;
275 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
279 testInput_i32_unscaled[i] = (
ne10_int32_t) (drand48() * 8192) - 4096;
284 fprintf (stdout,
"FFT size %d\n", fftSize);
287 memcpy (in_c, testInput_i32_unscaled, 2 * fftSize *
sizeof (
ne10_int32_t));
288 memcpy (in_neon, testInput_i32_unscaled, 2 * fftSize *
sizeof (
ne10_int32_t));
292 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
297 if (cfg_neon == NULL)
300 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
310 for (i = 0; i < test_loop; i++)
318 for (i = 0; i < test_loop; i++)
323 time_savings = ( ( (
ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
324 ne10_log (__FUNCTION__,
" unscaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
327 memcpy (in_c, out_c, 2 * fftSize *
sizeof (
ne10_int32_t));
328 memcpy (in_neon, out_c, 2 * fftSize *
sizeof (
ne10_int32_t));
334 for (i = 0; i < test_loop; i++)
342 for (i = 0; i < test_loop; i++)
348 time_savings = ( ( (
ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
349 ne10_log (__FUNCTION__,
"unscaled IFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
352 memcpy (in_c, testInput_i32_scaled, 2 * fftSize *
sizeof (
ne10_int32_t));
353 memcpy (in_neon, testInput_i32_scaled, 2 * fftSize *
sizeof (
ne10_int32_t));
359 for (i = 0; i < test_loop; i++)
367 for (i = 0; i < test_loop; i++)
372 time_savings = ( ( (
ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
373 ne10_log (__FUNCTION__,
" scaled FFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
376 memcpy (in_c, out_c, 2 * fftSize *
sizeof (
ne10_int32_t));
377 memcpy (in_neon, out_c, 2 * fftSize *
sizeof (
ne10_int32_t));
383 for (i = 0; i < test_loop; i++)
391 for (i = 0; i < test_loop; i++)
397 time_savings = ( ( (
ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
398 ne10_log (__FUNCTION__,
" scaled IFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
410 void test_fft_r2c_1d_int32_conformance()
419 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
424 in_c = guarded_in_c + ARRAY_GUARD_LEN;
425 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
430 out_c = guarded_out_c + ARRAY_GUARD_LEN;
431 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
438 testInput_i32_unscaled[i] = (
ne10_int32_t) (drand48() * 8192) - 4096;
443 fprintf (stdout,
"FFT size %d\n", fftSize);
448 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
453 memcpy (in_c, testInput_i32_unscaled, fftSize *
sizeof (
ne10_int32_t));
454 memcpy (in_neon, testInput_i32_unscaled, fftSize *
sizeof (
ne10_int32_t));
466 for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
471 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
475 for (i = 1; i < (fftSize / 2); i++)
477 in_c[2 * i] = testInput_i32_unscaled[2 * i];
478 in_c[2 * i + 1] = testInput_i32_unscaled[2 * i + 1];
479 in_c[2 * (fftSize - i)] = in_c[2 * i];
480 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
482 in_c[0] = testInput_i32_unscaled[0];
484 in_c[fftSize] = testInput_i32_unscaled[1];
485 in_c[fftSize + 1] = 0;
486 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (
ne10_int32_t));
498 for (i = 0; i < fftSize; i++)
503 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
507 memcpy (in_c, testInput_i32_scaled, fftSize *
sizeof (
ne10_int32_t));
508 memcpy (in_neon, testInput_i32_scaled, fftSize *
sizeof (
ne10_int32_t));
520 for (i = 0; i < (fftSize / 2 + 1) * 2; i++)
525 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, (fftSize / 2 + 1) * 2);
529 for (i = 1; i < (fftSize / 2); i++)
531 in_c[2 * i] = testInput_i32_scaled[2 * i];
532 in_c[2 * i + 1] = testInput_i32_scaled[2 * i + 1];
533 in_c[2 * (fftSize - i)] = in_c[2 * i];
534 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
536 in_c[0] = testInput_i32_scaled[0];
538 in_c[fftSize] = testInput_i32_scaled[1];
539 in_c[fftSize + 1] = 0;
540 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (
ne10_int32_t));
552 for (i = 0; i < fftSize; i++)
557 snr = CAL_SNR_FLOAT32 (out_c_tmp, out_neon_tmp, fftSize);
572 void test_fft_r2c_1d_int32_performance()
580 fprintf (stdout,
"----------%30s start\n", __FUNCTION__);
581 fprintf (stdout,
"%25s%20s%20s%20s%20s\n",
"FFT Length",
"C Time (micro-s)",
"NEON Time (micro-s)",
"Time Savings",
"Performance Ratio");
586 in_c = guarded_in_c + ARRAY_GUARD_LEN;
587 in_neon = guarded_in_neon + ARRAY_GUARD_LEN;
592 out_c = guarded_out_c + ARRAY_GUARD_LEN;
593 out_neon = guarded_out_neon + ARRAY_GUARD_LEN;
597 testInput_i32_unscaled[i] = (
ne10_int32_t) (drand48() * 8192) - 4096;
602 fprintf (stdout,
"FFT size %d\n", fftSize);
607 fprintf (stdout,
"======ERROR, FFT alloc fails\n");
612 memcpy (in_c, testInput_i32_unscaled, fftSize *
sizeof (
ne10_int32_t));
613 memcpy (in_neon, testInput_i32_unscaled, fftSize *
sizeof (
ne10_int32_t));
619 for (i = 0; i < test_loop; i++)
627 for (i = 0; i < test_loop; i++)
633 time_savings = ( ( (
ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
634 ne10_log (__FUNCTION__,
"Int32 unscaled RFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
637 for (i = 1; i < (fftSize / 2); i++)
639 in_c[2 * i] = testInput_i32_unscaled[2 * i];
640 in_c[2 * i + 1] = testInput_i32_unscaled[2 * i + 1];
641 in_c[2 * (fftSize - i)] = in_c[2 * i];
642 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
644 in_c[0] = testInput_i32_unscaled[0];
646 in_c[fftSize] = testInput_i32_unscaled[1];
647 in_c[fftSize + 1] = 0;
648 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (
ne10_int32_t));
654 for (i = 0; i < test_loop; i++)
662 for (i = 0; i < test_loop; i++)
668 time_savings = ( ( (
ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
669 ne10_log (__FUNCTION__,
"Int32 unscaled RIFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
672 memcpy (in_c, testInput_i32_scaled, fftSize *
sizeof (
ne10_int32_t));
673 memcpy (in_neon, testInput_i32_scaled, fftSize *
sizeof (
ne10_int32_t));
679 for (i = 0; i < test_loop; i++)
687 for (i = 0; i < test_loop; i++)
693 time_savings = ( ( (
ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
694 ne10_log (__FUNCTION__,
"Int32 scaled RFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
697 for (i = 1; i < (fftSize / 2); i++)
699 in_c[2 * i] = testInput_i32_scaled[2 * i];
700 in_c[2 * i + 1] = testInput_i32_scaled[2 * i + 1];
701 in_c[2 * (fftSize - i)] = in_c[2 * i];
702 in_c[2 * (fftSize - i) + 1] = -in_c[2 * i + 1];
704 in_c[0] = testInput_i32_scaled[0];
706 in_c[fftSize] = testInput_i32_scaled[1];
707 in_c[fftSize + 1] = 0;
708 memcpy (in_neon, in_c, fftSize * 2 *
sizeof (
ne10_int32_t));
714 for (i = 0; i < test_loop; i++)
722 for (i = 0; i < test_loop; i++)
728 time_savings = ( ( (
ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
729 ne10_log (__FUNCTION__,
"Int32 scaled RIFFT%21d%20lld%20lld%19.2f%%%18.2f:1\n", fftSize, time_c, time_neon, time_savings, time_speedup);
740 void test_fft_c2c_1d_int32()
742 #if defined (SMOKE_TEST)||(REGRESSION_TEST) 743 test_fft_c2c_1d_int32_conformance();
746 #if defined (PERFORMANCE_TEST) 747 test_fft_c2c_1d_int32_performance();
751 void test_fft_r2c_1d_int32()
753 #if defined (SMOKE_TEST)||(REGRESSION_TEST) 754 test_fft_r2c_1d_int32_conformance();
757 #if defined (PERFORMANCE_TEST) 758 test_fft_r2c_1d_int32_performance();
764 ne10_log_buffer_ptr = ne10_log_buffer;
767 void test_fixture_fft_c2c_1d_int32 (
void)
769 test_fixture_start();
773 run_test (test_fft_c2c_1d_int32);
778 void test_fixture_fft_r2c_1d_int32 (
void)
780 test_fixture_start();
784 run_test (test_fft_r2c_1d_int32);
void ne10_fft_c2c_1d_int32_neon(ne10_fft_cpx_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_cfg_int32_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2c_1d_int32 using NEON SIMD capabilities.
void ne10_fft_c2r_1d_int32_neon(ne10_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2r_1d_int32 using NEON SIMD capabilities.
#define TEST_LENGTH_SAMPLES
void ne10_fft_r2c_1d_int32_c(ne10_fft_cpx_int32_t *fout, ne10_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_r2c_1d_int32 using plain C.
ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_neon(ne10_int32_t nfft)
Specific implementation of ne10_fft_alloc_c2c_int32 for ne10_fft_c2c_1d_int32_neon.
#define SNR_THRESHOLD_INT32
void ne10_fft_r2c_1d_int32_neon(ne10_fft_cpx_int32_t *fout, ne10_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_r2c_1d_int32 using NEON SIMD capabilities.
void ne10_fft_c2c_1d_int32_c(ne10_fft_cpx_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_cfg_int32_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2c_1d_int32 using plain C.
Structure for the 32-bit fixed point FFT function.
ne10_fft_r2c_cfg_int32_t ne10_fft_alloc_r2c_int32(ne10_int32_t nfft)
Creates a configuration structure for variants of ne10_fft_r2c_1d_int32 and ne10_fft_c2r_1d_int32.
#define NE10_FFT_PARA_LEVEL
#define MIN_LENGTH_SAMPLES_REAL
#define MIN_LENGTH_SAMPLES_CPX
ne10_fft_cfg_int32_t ne10_fft_alloc_c2c_int32_c(ne10_int32_t nfft)
Specific implementation of ne10_fft_alloc_c2c_int32 for ne10_fft_c2c_1d_int32_c.
void ne10_fft_c2r_1d_int32_c(ne10_int32_t *fout, ne10_fft_cpx_int32_t *fin, ne10_fft_r2c_cfg_int32_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2r_1d_int32 using plain C.