Project Ne10
An open, optimized software library for the ARM architecture.
NE10_fft_bfly.h
Go to the documentation of this file.
1 /*
2  * Copyright 2014-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : dsp/NE10_fft_bfly.h
30  */
31 
32 #include "NE10_types.h"
33 #include "NE10_fft_cplx_ops.h"
35 
36 #ifndef NE10_FFT_BFLY_H
37 #define NE10_FFT_BFLY_H
38 
39 // R2C FFT size==4
40 // In[4] R[0],R[1],R[2],R[3]
41 // OUT[4] R[0],R[1],I[1],R[2]
42 #define NE10_FFT_R2C_4R_RCR(OUT,IN) \
43  do { \
44  ne10_float32_t SCRATCH [2]; \
45  SCRATCH[0] = IN[0] + IN[2]; \
46  SCRATCH[1] = IN[1] + IN[3]; \
47  OUT[0] = SCRATCH[0] + SCRATCH[1]; \
48  OUT[1] = IN[0] - IN[2]; \
49  OUT[2] = IN[3] - IN[1]; \
50  OUT[3] = SCRATCH[0] - SCRATCH[1]; \
51  } while (0)
52 
53 // C2R FFT size==4 - inversed of R2C FFT
54 // In[4] R[0],R[1],I[1],R[2]
55 // OUT[4] R[0],R[1],R[2],R[3]
56 #define NE10_FFT_C2R_RCR_4R(OUT,IN) \
57  do { \
58  ne10_float32_t SCRATCH [4]; \
59  SCRATCH[0] =(IN[0] + IN[3]); \
60  SCRATCH[1] =(IN[0] - IN[3]); \
61  SCRATCH[2] = IN[1] + IN[1]; \
62  SCRATCH[3] = IN[2] + IN[2]; \
63  OUT[0] = SCRATCH[0] + SCRATCH[2]; \
64  OUT[1] = SCRATCH[1] - SCRATCH[3]; \
65  OUT[2] = SCRATCH[0] - SCRATCH[2]; \
66  OUT[3] = SCRATCH[1] + SCRATCH[3]; \
67  } while (0)
68 
69 // R2C FFT size==4
70 // In[4] R[0],R[1],R[2],R[3]
71 // OUT[4] R[0],I[0],R[1],I[1]
72 #define NE10_FFT_R2C_4R_CC(OUT,IN) \
73  do { \
74  ne10_float32_t SCRATCH [2]; \
75  ne10_float32_t TMP [2]; \
76  SCRATCH[0] = (IN[3] - IN[1]) * TW_81N_F32; \
77  SCRATCH[1] = (IN[3] + IN[1]) * TW_81N_F32; \
78  OUT[0] = IN[0] + SCRATCH[0]; \
79  OUT[2] = IN[0] - SCRATCH[0]; \
80  OUT[1] = SCRATCH[1] - IN[2]; \
81  OUT[3] = SCRATCH[1] + IN[2]; \
82  } while (0)
83 
84 // C2R FFT size==4 - inversed of R2C FFT
85 // In[4] R[0],I[0],R[1],I[1]
86 // OUT[4] R[0],R[1],R[2],R[3]
87 #define NE10_FFT_C2R_CC_4R(OUT,IN) \
88  do { \
89  ne10_float32_t SCRATCH [4]; \
90  OUT[0] = ( IN[0] + IN[2]); \
91  OUT[2] = (-IN[1] + IN[3]); \
92  OUT[0] = OUT[0] + OUT[0]; \
93  OUT[2] = OUT[2] + OUT[2]; \
94  SCRATCH[0] = (IN[0] - IN[2]); \
95  SCRATCH[1] = (IN[1] + IN[3]); \
96  SCRATCH[2] = (SCRATCH[0] + SCRATCH[1]); \
97  SCRATCH[3] = (SCRATCH[0] - SCRATCH[1]); \
98  OUT[3] = SCRATCH[2] / TW_81N_F32; \
99  OUT[1] = SCRATCH[3] / TW_81_F32; \
100  } while (0)
101 
102 // R2C FFT size==4
103 // In[4] R[0],I[0],R[1],I[1]
104 // OUT[4] R[0],I[0],R[1],I[1]
105 #define NE10_FFT_R2C_CC_CC(OUT,IN) \
106  do { \
107  ne10_fft_cpx_float32_t TMP[4]; \
108  ne10_float32_t TMP_SWAP; \
109  NE10_CPX_ADD (TMP[0], IN[0], IN[2]); \
110  NE10_CPX_SUB (TMP[1], IN[0], IN[2]); \
111  NE10_CPX_ADD (TMP[2], IN[1], IN[3]); \
112  NE10_CPX_SUB (TMP[3], IN[1], IN[3]); \
113  TMP_SWAP = TMP[3].i; \
114  TMP[3].i = - TMP[3].r; \
115  TMP[3].r = TMP_SWAP; \
116  OUT[0].r = TMP[0].r + TMP[2].r; \
117  OUT[0].i = TMP[0].i + TMP[2].i; \
118  OUT[2].r = TMP[0].r - TMP[2].r; \
119  OUT[2].i = -(TMP[0].i - TMP[2].i); \
120  OUT[1].r = TMP[1].r + TMP[3].r; \
121  OUT[1].i = TMP[1].i + TMP[3].i; \
122  OUT[3].r = TMP[1].r - TMP[3].r; \
123  OUT[3].i = -(TMP[1].i - TMP[3].i); \
124  } while (0)
125 
126 // C2R FFT size==4 - inversed of R2C FFT
127 // In[4] R[0],I[0],R[1],I[1]
128 // OUT[4] R[0],I[0],R[1],I[1]
129 #define NE10_FFT_C2R_CC_CC(OUT,IN) \
130  do { \
131  ne10_fft_cpx_float32_t SCRATCH[4]; \
132  SCRATCH[0].r = (IN[0].r + IN[1].r); \
133  SCRATCH[2].r = (IN[0].r - IN[1].r); \
134  SCRATCH[2].i = (IN[0].i + IN[1].i); \
135  SCRATCH[0].i = (IN[0].i - IN[1].i); \
136  SCRATCH[1].r = (IN[2].r + IN[3].r); \
137  SCRATCH[3].i = (IN[2].r - IN[3].r); \
138  SCRATCH[3].r = (IN[2].i + IN[3].i) * -1.0f; \
139  SCRATCH[1].i = (IN[2].i - IN[3].i); \
140  OUT[0].r = (SCRATCH[0].r + SCRATCH[1].r); \
141  OUT[2].r = (SCRATCH[0].r - SCRATCH[1].r); \
142  OUT[0].i = (SCRATCH[0].i + SCRATCH[1].i); \
143  OUT[2].i = (SCRATCH[0].i - SCRATCH[1].i); \
144  OUT[1].r = (SCRATCH[2].r + SCRATCH[3].r); \
145  OUT[3].r = (SCRATCH[2].r - SCRATCH[3].r); \
146  OUT[1].i = (SCRATCH[2].i + SCRATCH[3].i); \
147  OUT[3].i = (SCRATCH[2].i - SCRATCH[3].i); \
148  } while (0)
149 
150 #endif // NE10_FFT_BFLY_H