Project Ne10
An open, optimized software library for the ARM architecture.
NE10_fft_generic_int32.h
Go to the documentation of this file.
1 /*
2  * Copyright 2015-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /* license of Kiss FFT */
29 /*
30 Copyright (c) 2003-2010, Mark Borgerding
31 
32 All rights reserved.
33 
34 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
35 
36  * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
37  * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
38  * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
39 
40 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 */
42 
43 /*
44  * NE10 Library : dsp/NE10_fft_generic_int32.h
45  */
46 
47 #ifndef NE10_FFT_GENERIC_INT32_H
48 #define NE10_FFT_GENERIC_INT32_H
49 
50 #include "NE10_types.h"
51 #include "NE10_macros.h"
52 #include "NE10_fft.h"
53 
54 #define NE10_CPX_MUL_S32(Z,A,B) \
55  do { \
56  ne10_int32_t ARBR = ((NE10_F2I32_SAMPPROD) A.r * B.r) >> 31; \
57  ne10_int32_t ARBI = ((NE10_F2I32_SAMPPROD) A.r * B.i) >> 31; \
58  ne10_int32_t AIBR = ((NE10_F2I32_SAMPPROD) A.i * B.r) >> 31; \
59  ne10_int32_t AIBI = ((NE10_F2I32_SAMPPROD) A.i * B.i) >> 31; \
60  Z.r = ARBR - AIBI; \
61  Z.i = ARBI + AIBR; \
62  } while (0)
63 
64 #define NE10_S_MUL_S32(A,S) (((NE10_F2I32_SAMPPROD) (A) * (S)) >> 31)
65 
73 template<int RADIX>
74 inline void FFT_MUL_TW (ne10_fft_cpx_int32_t out[RADIX],
75  const ne10_fft_cpx_int32_t in[RADIX],
76  const ne10_fft_cpx_int32_t tw[RADIX]);
77 
78 template<>
79 inline void FFT_MUL_TW<2> (ne10_fft_cpx_int32_t out[2],
80  const ne10_fft_cpx_int32_t in[2],
81  const ne10_fft_cpx_int32_t tw[1])
82 {
83  out[0] = in[0];
84  NE10_CPX_MUL_S32 (out[1], in[1], tw[0]);
85 }
86 
87 template<int RADIX>
88 inline void FFT_MUL_TW (ne10_fft_cpx_int32_t out[RADIX],
89  const ne10_fft_cpx_int32_t in[RADIX],
90  const ne10_fft_cpx_int32_t tw[RADIX - 1])
91 {
92  FFT_MUL_TW<RADIX - 1> (out, in, tw);
93  NE10_CPX_MUL_S32 (out[RADIX - 1], in[RADIX - 1], tw[RADIX - 2]);
94 }
95 
97 // FFT Kernel
98 // F: Forward
99 // C: Complex
100 // U: Unscaled
102 
108 template<int RADIX>
109 inline void FFT_FCU (ne10_fft_cpx_int32_t scratch_out[RADIX],
110  const ne10_fft_cpx_int32_t scratch_in[RADIX]);
111 
117 template<>
118 inline void FFT_FCU<2> (ne10_fft_cpx_int32_t scratch_out[2],
119  const ne10_fft_cpx_int32_t scratch_in[2])
120 {
121  NE10_CPX_ADD (scratch_out[0], scratch_in[0], scratch_in[1]);
122  NE10_CPX_SUB (scratch_out[1], scratch_in[0], scratch_in[1]);
123 }
124 
130 template<>
131 inline void FFT_FCU<3> (ne10_fft_cpx_int32_t Fout[3],
132  const ne10_fft_cpx_int32_t Fin[3])
133 {
134  ne10_fft_cpx_int32_t scratch[4];
135  ne10_fft_cpx_int32_t scratch_in[3];
136 
137  scratch_in[0] = Fin[0];
138  scratch_in[1] = Fin[1];
139  scratch_in[2] = Fin[2];
140 
141  scratch[1] = scratch_in[1];
142  scratch[2] = scratch_in[2];
143 
144  NE10_CPX_ADD (scratch[3], scratch[1], scratch[2]);
145  NE10_CPX_SUB (scratch[0], scratch[1], scratch[2]);
146 
147  scratch_in[1].r = scratch_in[0].r - (scratch[3].r >> 1);
148  scratch_in[1].i = scratch_in[0].i - (scratch[3].i >> 1);
149 
150  scratch[0].r = NE10_S_MUL_S32 (scratch[0].r , -TW_3I_S32);
151  scratch[0].i = NE10_S_MUL_S32 (scratch[0].i , -TW_3I_S32);
152 
153  scratch_in[0].r += scratch[3].r;
154  scratch_in[0].i += scratch[3].i;
155 
156  scratch_in[2].r = scratch_in[1].r + scratch[0].i;
157  scratch_in[2].i = scratch_in[1].i - scratch[0].r;
158 
159  scratch_in[1].r -= scratch[0].i;
160  scratch_in[1].i += scratch[0].r;
161 
162  Fout[0] = scratch_in[0];
163  Fout[1] = scratch_in[1];
164  Fout[2] = scratch_in[2];
165 }
166 
172 template<>
173 inline void FFT_FCU<4> (ne10_fft_cpx_int32_t scratch_out[4],
174  const ne10_fft_cpx_int32_t scratch_in[4])
175 {
176  ne10_fft_cpx_int32_t scratch[4];
177 
178  NE10_CPX_ADD (scratch[0], scratch_in[0], scratch_in[2]);
179  NE10_CPX_SUB (scratch[1], scratch_in[0], scratch_in[2]);
180  NE10_CPX_ADD (scratch[2], scratch_in[1], scratch_in[3]);
181  NE10_CPX_SUB (scratch[3], scratch_in[1], scratch_in[3]);
182 
183  NE10_CPX_SUB (scratch_out[2], scratch[0], scratch[2]);
184  NE10_CPX_ADD (scratch_out[0], scratch[0], scratch[2]);
185 
186  scratch_out[1].r = scratch[1].r + scratch[3].i;
187  scratch_out[1].i = scratch[1].i - scratch[3].r;
188  scratch_out[3].r = scratch[1].r - scratch[3].i;
189  scratch_out[3].i = scratch[1].i + scratch[3].r;
190 }
191 
197 template<>
198 inline void FFT_FCU<5> (ne10_fft_cpx_int32_t Fout[5],
199  const ne10_fft_cpx_int32_t Fin[5])
200 {
201  ne10_fft_cpx_int32_t scratch[13], scratch_in[5];
202 
203  scratch_in[0] = Fin[0];
204  scratch_in[1] = Fin[1];
205  scratch_in[2] = Fin[2];
206  scratch_in[3] = Fin[3];
207  scratch_in[4] = Fin[4];
208 
209  scratch[0] = scratch_in[0];
210  scratch[1] = scratch_in[1];
211  scratch[2] = scratch_in[2];
212  scratch[3] = scratch_in[3];
213  scratch[4] = scratch_in[4];
214 
215  NE10_CPX_ADD (scratch[ 7], scratch[1], scratch[4]);
216  NE10_CPX_SUB (scratch[10], scratch[1], scratch[4]);
217  NE10_CPX_ADD (scratch[ 8], scratch[2], scratch[3]);
218  NE10_CPX_SUB (scratch[ 9], scratch[2], scratch[3]);
219 
220  scratch_in[0].r += scratch[7].r + scratch[8].r;
221  scratch_in[0].i += scratch[7].i + scratch[8].i;
222 
223  scratch[5].r = scratch[0].r
224  + NE10_S_MUL_S32 (scratch[7].r, TW_5A_S32.r)
225  + NE10_S_MUL_S32 (scratch[8].r, TW_5B_S32.r);
226  scratch[5].i = scratch[0].i
227  + NE10_S_MUL_S32 (scratch[7].i, TW_5A_S32.r)
228  + NE10_S_MUL_S32 (scratch[8].i, TW_5B_S32.r);
229 
230  scratch[6].r = NE10_S_MUL_S32 (scratch[10].i, TW_5A_S32.i)
231  + NE10_S_MUL_S32 (scratch[9].i, TW_5B_S32.i);
232  scratch[6].i = -NE10_S_MUL_S32 (scratch[10].r, TW_5A_S32.i)
233  - NE10_S_MUL_S32 (scratch[9].r, TW_5B_S32.i);
234 
235  NE10_CPX_SUB (scratch_in[1], scratch[5], scratch[6]);
236  NE10_CPX_ADD (scratch_in[4], scratch[5], scratch[6]);
237 
238  scratch[11].r = scratch[0].r
239  + NE10_S_MUL_S32 (scratch[7].r, TW_5B_S32.r)
240  + NE10_S_MUL_S32 (scratch[8].r, TW_5A_S32.r);
241  scratch[11].i = scratch[0].i
242  + NE10_S_MUL_S32 (scratch[7].i, TW_5B_S32.r)
243  + NE10_S_MUL_S32 (scratch[8].i, TW_5A_S32.r);
244 
245  scratch[12].r = -NE10_S_MUL_S32 (scratch[10].i, TW_5B_S32.i)
246  + NE10_S_MUL_S32 (scratch[9].i, TW_5A_S32.i);
247  scratch[12].i = NE10_S_MUL_S32 (scratch[10].r, TW_5B_S32.i)
248  - NE10_S_MUL_S32 (scratch[9].r, TW_5A_S32.i);
249 
250  NE10_CPX_ADD (scratch_in[2], scratch[11], scratch[12]);
251  NE10_CPX_SUB (scratch_in[3], scratch[11], scratch[12]);
252 
253  Fout[0] = scratch_in[0];
254  Fout[1] = scratch_in[1];
255  Fout[2] = scratch_in[2];
256  Fout[3] = scratch_in[3];
257  Fout[4] = scratch_in[4];
258 }
259 
263 template<class T>
264 inline void NE10_CONJ_S (T &);
265 
266 template<>
268 {
269  scalar.i = -scalar.i;
270 }
271 
277 template<int RADIX, class T>
278 inline void NE10_CONJ (T in[RADIX])
279 {
280  NE10_CONJ<RADIX - 1> (in);
281  NE10_CONJ_S<T> (in[RADIX - 1]);
282 }
283 
284 template<>
286 {
288 }
289 
290 template<class T>
291 inline T NE10_CPX_LOAD_S (const T *ptr)
292 {
293  return *ptr;
294 }
295 
296 template<class T>
297 inline void NE10_CPX_STORE_S (T *Fout, const T in)
298 {
299  *Fout = in;
300 }
301 
309 template<int RADIX, class T>
310 inline void NE10_LOAD_BY_STEP (T out[RADIX],
311  const T *Fin,
312  const ne10_int32_t in_step);
313 
314 template<>
316  ne10_fft_cpx_int32_t out[0],
317  const ne10_fft_cpx_int32_t *Fin,
318  const ne10_int32_t)
319 {
320  out[0] = NE10_CPX_LOAD_S<ne10_fft_cpx_int32_t> (Fin);
321 }
322 
323 template<int RADIX, class T>
324 inline void NE10_LOAD_BY_STEP (T out[RADIX],
325  const T *Fin,
326  const ne10_int32_t in_step)
327 {
328  out[0] = NE10_CPX_LOAD_S<T> (Fin);
329  NE10_LOAD_BY_STEP<RADIX - 1, T> (out + 1, Fin + in_step, in_step);
330 }
331 
339 template<int RADIX, class T>
340 inline void NE10_STORE_BY_STEP (T *Fout,
341  const T in[RADIX],
342  const ne10_int32_t out_step)
343 {
344  NE10_CPX_STORE_S<T> (Fout, in[0]);
345  NE10_STORE_BY_STEP<RADIX - 1, T> (Fout + out_step, in + 1, out_step);
346 }
347 
348 template<>
350  ne10_fft_cpx_int32_t *Fout,
351  const ne10_fft_cpx_int32_t in[1],
352  const ne10_int32_t)
353 {
354  Fout[0] = in[0];
355 }
356 
363 template<int RADIX>
364 inline void NE10_SCALED (ne10_fft_cpx_int32_t out[RADIX],
365  const ne10_int32_t scaling)
366 {
367  NE10_F2I32_FIXDIV (out[0], scaling);
368  NE10_SCALED<RADIX - 1> (out + 1, scaling);
369 }
370 
371 template<>
373  const ne10_int32_t scaling)
374 {
375  NE10_F2I32_FIXDIV (out[0], scaling);
376 }
377 
378 #endif // NE10_FFT_GENERIC_INT32_H
void FFT_FCU(ne10_fft_cpx_int32_t scratch_out[RADIX], const ne10_fft_cpx_int32_t scratch_in[RADIX])
Basic fixed-point butterfly used in each stage.
int32_t ne10_int32_t
Definition: NE10_types.h:76
void NE10_SCALED< 1 >(ne10_fft_cpx_int32_t out[1], const ne10_int32_t scaling)
void NE10_STORE_BY_STEP< 1, ne10_fft_cpx_int32_t >(ne10_fft_cpx_int32_t *Fout, const ne10_fft_cpx_int32_t in[1], const ne10_int32_t)
T NE10_CPX_LOAD_S(const T *ptr)
#define NE10_S_MUL_S32(A, S)
void NE10_LOAD_BY_STEP< 1, ne10_fft_cpx_int32_t >(ne10_fft_cpx_int32_t out[0], const ne10_fft_cpx_int32_t *Fin, const ne10_int32_t)
void NE10_SCALED(ne10_fft_cpx_int32_t out[RADIX], const ne10_int32_t scaling)
Scale a fixed-size array by given divider.
#define NE10_F2I32_FIXDIV(c, div)
Definition: NE10_macros.h:87
void FFT_FCU< 2 >(ne10_fft_cpx_int32_t scratch_out[2], const ne10_fft_cpx_int32_t scratch_in[2])
Basic fixed-point Radix-2 butterfly used in each stage.
void NE10_CONJ< 1, ne10_fft_cpx_int32_t >(ne10_fft_cpx_int32_t in[1])
void FFT_MUL_TW< 2 >(ne10_fft_cpx_int32_t out[2], const ne10_fft_cpx_int32_t in[2], const ne10_fft_cpx_int32_t tw[1])
void NE10_STORE_BY_STEP(T *Fout, const T in[RADIX], const ne10_int32_t out_step)
Store a fixed-size array to given buffer, by given step.
Structure for the 32-bit fixed point FFT function.
Definition: NE10_types.h:325
ne10_int32_t i
Definition: NE10_types.h:328
void NE10_LOAD_BY_STEP(T out[RADIX], const T *Fin, const ne10_int32_t in_step)
Load a fixed-size array from given buffer, by given step.
void NE10_CONJ_S(T &)
Conjugate a fix-point complex scalar/NEON vector.
#define NE10_CPX_MUL_S32(Z, A, B)
void NE10_CONJ(T in[RADIX])
Conjugate a fix-point complex array.
void FFT_FCU< 5 >(ne10_fft_cpx_int32_t Fout[5], const ne10_fft_cpx_int32_t Fin[5])
Basic fixed-point radix-5 butterfly used in each stage.
void NE10_CONJ_S< ne10_fft_cpx_int32_t >(ne10_fft_cpx_int32_t &scalar)
void FFT_FCU< 3 >(ne10_fft_cpx_int32_t Fout[3], const ne10_fft_cpx_int32_t Fin[3])
Basic fixed-point radix-3 butterfly used in each stage.
ne10_int32_t r
Definition: NE10_types.h:327
#define NE10_CPX_ADD(Z, A, B)
void FFT_FCU< 4 >(ne10_fft_cpx_int32_t scratch_out[4], const ne10_fft_cpx_int32_t scratch_in[4])
Basic fixed-point radix-4 butterfly used in each stage.
void NE10_CPX_STORE_S(T *Fout, const T in)
void FFT_MUL_TW(ne10_fft_cpx_int32_t out[RADIX], const ne10_fft_cpx_int32_t in[RADIX], const ne10_fft_cpx_int32_t tw[RADIX])
Multiply input with twiddles.
#define NE10_CPX_SUB(Z, A, B)