Project Ne10
An open, optimized software library for the ARM architecture.
NE10_fft_generic_int32.neonintrinsic.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2015-16 ARM Limited
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /* license of Kiss FFT */
29 /*
30 Copyright (c) 2003-2010, Mark Borgerding
31 
32 All rights reserved.
33 
34 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
35 
36  * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
37  * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
38  * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
39 
40 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 */
42 
43 /*
44  * NE10 Library : dsp/NE10_fft_generic_int32.neonintrisic.cpp
45  *
46  * This file must be compiled by C++ toolchain because some functions are
47  * written as template functions to make it easier for compiler to
48  * reduce branch jump.
49  */
50 
52 
53 template<bool is_inverse>
56  const ne10_fft_cpx_int32_t *Fin,
57  const ne10_int32_t *factors,
58  const ne10_fft_cpx_int32_t *twiddles,
59  ne10_fft_cpx_int32_t *buffer,
60  ne10_int32_t is_scaled_flag)
61 {
62  ne10_int32_t stage_count = factors[0];
63  ne10_int32_t fstride = factors[1];
64  ne10_int32_t radix = factors[stage_count << 1]; // radix of first stage
65 
66  // nfft below is not the actual length of FFT, it is 1/4 of the actual one
67  // instead.
68  ne10_int32_t nfft = fstride * radix;
69 
70  void (*ne10_mixed_butterfly_f) (CPLX *, const CPLX *, const ne10_int32_t *,
71  const ne10_fft_cpx_int32_t *, CPLX *) = NULL;
72 
73  void (*ne10_last_stage_f) (CPLX *, const CPLX *, const ne10_fft_cpx_int32_t *,
75 
76  if (is_scaled_flag == 1)
77  {
78  ne10_mixed_butterfly_f =
79  ne10_mixed_radix_generic_butterfly_int32_neon_impl<is_inverse, true>;
80  }
81  else
82  {
83  ne10_mixed_butterfly_f =
84  ne10_mixed_radix_generic_butterfly_int32_neon_impl<is_inverse, false>;
85  }
86 
87  if (is_scaled_flag == 1)
88  {
89  ne10_last_stage_f =
90  ne10_c2c_1d_last_stage_neon<is_inverse, true>;
91  }
92  else
93  {
94  ne10_last_stage_f =
95  ne10_c2c_1d_last_stage_neon<is_inverse, false>;
96  }
97 
98  ne10_mixed_butterfly_f ((CPLX *) buffer,
99  (const CPLX *) Fin, // From Fin to buffer
100  factors,
101  twiddles,
102  (CPLX *) Fout); // Fout is "buffer" for these stages.
103 
104  ne10_last_stage_f ((CPLX *) Fout,
105  (const CPLX *) buffer, // From buffer to Fout
106  twiddles + nfft,
107  1, // out_step == fstride == 1
108  nfft, // in_step == mstride == nfft
109  nfft * 4); // Actual length of FFT
110 }
111 
113  ne10_fft_cpx_int32_t *Fout,
114  const ne10_fft_cpx_int32_t *Fin,
115  const ne10_int32_t *factors,
116  const ne10_fft_cpx_int32_t *twiddles,
117  ne10_fft_cpx_int32_t *buffer,
118  ne10_int32_t is_scaled_flag)
119 {
120  ne10_mixed_radix_generic_butterfly_int32_neon_dispatch <false> (
121  Fout, Fin, factors, twiddles, buffer, is_scaled_flag);
122 }
123 
125  ne10_fft_cpx_int32_t *Fout,
126  const ne10_fft_cpx_int32_t *Fin,
127  const ne10_int32_t *factors,
128  const ne10_fft_cpx_int32_t *twiddles,
129  ne10_fft_cpx_int32_t *buffer,
130  ne10_int32_t is_scaled_flag)
131 {
132  ne10_mixed_radix_generic_butterfly_int32_neon_dispatch <true> (
133  Fout, Fin, factors, twiddles, buffer, is_scaled_flag);
134 }
int32_t ne10_int32_t
Definition: NE10_types.h:76
void ne10_mixed_radix_generic_butterfly_inverse_int32_neon(ne10_fft_cpx_int32_t *Fout, const ne10_fft_cpx_int32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_int32_t *twiddles, ne10_fft_cpx_int32_t *buffer, ne10_int32_t is_scaled_flag)
Structure for the 32-bit fixed point FFT function.
Definition: NE10_types.h:325
void ne10_mixed_radix_generic_butterfly_int32_neon(ne10_fft_cpx_int32_t *Fout, const ne10_fft_cpx_int32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_int32_t *twiddles, ne10_fft_cpx_int32_t *buffer, ne10_int32_t is_scaled_flag)
void ne10_mixed_radix_generic_butterfly_int32_neon_dispatch(ne10_fft_cpx_int32_t *Fout, const ne10_fft_cpx_int32_t *Fin, const ne10_int32_t *factors, const ne10_fft_cpx_int32_t *twiddles, ne10_fft_cpx_int32_t *buffer, ne10_int32_t is_scaled_flag)