Project Ne10
An open, optimized software library for the ARM architecture.
NE10_iir.c
Go to the documentation of this file.
1 /*
2  * Copyright 2012-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : dsp/NE10_iir.c
30  */
31 
32 #include "NE10_types.h"
33 
113  ne10_float32_t * pSrc,
114  ne10_float32_t * pDst,
115  ne10_uint32_t blockSize)
116 {
117  ne10_float32_t fcurr, fnext = 0, gcurr, gnext; /* Temporary variables for lattice stages */
118  ne10_float32_t acc; /* Accumlator */
119  ne10_uint32_t blkCnt, tapCnt; /* temporary variables for counts */
120  ne10_float32_t *px1, *px2, *pk, *pv; /* temporary pointers for state and coef */
121  ne10_uint32_t numStages = S->numStages; /* number of stages */
122  ne10_float32_t *pState; /* State pointer */
123  ne10_float32_t *pStateCurnt; /* State current pointer */
124 
125 
126  /* Run the below code for Cortex-M4 and Cortex-M3 */
127 
128  gcurr = 0.0f;
129  blkCnt = blockSize;
130 
131  pState = &S->pState[0];
132 
133  /* Sample processing */
134  while (blkCnt > 0u)
135  {
136  /* Read Sample from input buffer */
137  /* fN(n) = x(n) */
138  fcurr = *pSrc++;
139 
140  /* Initialize state read pointer */
141  px1 = pState;
142  /* Initialize state write pointer */
143  px2 = pState;
144  /* Set accumulator to zero */
145  acc = 0.0f;
146  /* Initialize Ladder coeff pointer */
147  pv = &S->pvCoeffs[S->numStages];
148  /* Initialize Reflection coeff pointer */
149  pk = &S->pkCoeffs[0];
150 
151 
152  /* Process sample for first tap */
153  gcurr = *px1++;
154  /* fN-1(n) = fN(n) - kN * gN-1(n-1) */
155  fnext = fcurr - ( (*pk) * gcurr);
156  /* gN(n) = kN * fN-1(n) + gN-1(n-1) */
157  gnext = (fnext * (*pk++)) + gcurr;
158  /* write gN(n) into state for next sample processing */
159  *px2++ = gnext;
160  /* y(n) += gN(n) * vN */
161  acc += (gnext * (*pv--));
162 
163  /* Update f values for next coefficient processing */
164  fcurr = fnext;
165 
166  /* Loop unrolling. Process 4 taps at a time. */
167  tapCnt = (numStages - 1u) >> 2;
168 
169  while (tapCnt > 0u)
170  {
171  /* Process sample for 2nd, 6th ...taps */
172  /* Read gN-2(n-1) from state buffer */
173  gcurr = *px1++;
174  /* Process sample for 2nd, 6th .. taps */
175  /* fN-2(n) = fN-1(n) - kN-1 * gN-2(n-1) */
176  fnext = fcurr - ( (*pk) * gcurr);
177  /* gN-1(n) = kN-1 * fN-2(n) + gN-2(n-1) */
178  gnext = (fnext * (*pk++)) + gcurr;
179  /* y(n) += gN-1(n) * vN-1 */
180  /* process for gN-5(n) * vN-5, gN-9(n) * vN-9 ... */
181  acc += (gnext * (*pv--));
182  /* write gN-1(n) into state for next sample processing */
183  *px2++ = gnext;
184 
185 
186  /* Process sample for 3nd, 7th ...taps */
187  /* Read gN-3(n-1) from state buffer */
188  gcurr = *px1++;
189  /* Process sample for 3rd, 7th .. taps */
190  /* fN-3(n) = fN-2(n) - kN-2 * gN-3(n-1) */
191  fcurr = fnext - ( (*pk) * gcurr);
192  /* gN-2(n) = kN-2 * fN-3(n) + gN-3(n-1) */
193  gnext = (fcurr * (*pk++)) + gcurr;
194  /* y(n) += gN-2(n) * vN-2 */
195  /* process for gN-6(n) * vN-6, gN-10(n) * vN-10 ... */
196  acc += (gnext * (*pv--));
197  /* write gN-2(n) into state for next sample processing */
198  *px2++ = gnext;
199 
200 
201  /* Process sample for 4th, 8th ...taps */
202  /* Read gN-4(n-1) from state buffer */
203  gcurr = *px1++;
204  /* Process sample for 4th, 8th .. taps */
205  /* fN-4(n) = fN-3(n) - kN-3 * gN-4(n-1) */
206  fnext = fcurr - ( (*pk) * gcurr);
207  /* gN-3(n) = kN-3 * fN-4(n) + gN-4(n-1) */
208  gnext = (fnext * (*pk++)) + gcurr;
209  /* y(n) += gN-3(n) * vN-3 */
210  /* process for gN-7(n) * vN-7, gN-11(n) * vN-11 ... */
211  acc += (gnext * (*pv--));
212  /* write gN-3(n) into state for next sample processing */
213  *px2++ = gnext;
214 
215 
216  /* Process sample for 5th, 9th ...taps */
217  /* Read gN-5(n-1) from state buffer */
218  gcurr = *px1++;
219  /* Process sample for 5th, 9th .. taps */
220  /* fN-5(n) = fN-4(n) - kN-4 * gN-1(n-1) */
221  fcurr = fnext - ( (*pk) * gcurr);
222  /* gN-4(n) = kN-4 * fN-5(n) + gN-5(n-1) */
223  gnext = (fcurr * (*pk++)) + gcurr;
224  /* y(n) += gN-4(n) * vN-4 */
225  /* process for gN-8(n) * vN-8, gN-12(n) * vN-12 ... */
226  acc += (gnext * (*pv--));
227  /* write gN-4(n) into state for next sample processing */
228  *px2++ = gnext;
229 
230  tapCnt--;
231 
232  }
233 
234  fnext = fcurr;
235 
236  /* If the filter length is not a multiple of 4, compute the remaining filter taps */
237  tapCnt = (numStages - 1u) % 0x4u;
238 
239  while (tapCnt > 0u)
240  {
241  gcurr = *px1++;
242  /* Process sample for last taps */
243  fnext = fcurr - ( (*pk) * gcurr);
244  gnext = (fnext * (*pk++)) + gcurr;
245  /* Output samples for last taps */
246  acc += (gnext * (*pv--));
247  *px2++ = gnext;
248  fcurr = fnext;
249 
250  tapCnt--;
251 
252  }
253 
254 
255  /* y(n) += g0(n) * v0 */
256  acc += (fnext * (*pv));
257 
258  *px2++ = fnext;
259 
260  /* write out into pDst */
261  *pDst++ = acc;
262 
263  /* Advance the state pointer by 4 to process the next group of 4 samples */
264  pState = pState + 1u;
265  blkCnt--;
266 
267  }
268 
269  /* Processing is complete. Now copy last S->numStages samples to start of the buffer
270  for the preperation of next frame process */
271 
272  /* Points to the start of the state buffer */
273  pStateCurnt = &S->pState[0];
274  pState = &S->pState[blockSize];
275 
276  tapCnt = numStages >> 2u;
277 
278  /* copy data */
279  while (tapCnt > 0u)
280  {
281  *pStateCurnt++ = *pState++;
282  *pStateCurnt++ = *pState++;
283  *pStateCurnt++ = *pState++;
284  *pStateCurnt++ = *pState++;
285 
286  /* Decrement the loop counter */
287  tapCnt--;
288 
289  }
290 
291  /* Calculate remaining number of copies */
292  tapCnt = (numStages) % 0x4u;
293 
294  /* Copy the remaining q31_t data */
295  while (tapCnt > 0u)
296  {
297  *pStateCurnt++ = *pState++;
298 
299  /* Decrement the loop counter */
300  tapCnt--;
301  }
302 
303 } //end of IIR_Lattice group
float ne10_float32_t
Definition: NE10_types.h:80
ne10_uint16_t numStages
numStages of the of lattice filter.
Definition: NE10_types.h:418
ne10_float32_t * pkCoeffs
Points to the reflection coefficient array.
Definition: NE10_types.h:420
void ne10_iir_lattice_float_c(const ne10_iir_lattice_instance_f32_t *S, ne10_float32_t *pSrc, ne10_float32_t *pDst, ne10_uint32_t blockSize)
Specific implementation of ne10_iir_lattice_float using plain C.
Definition: NE10_iir.c:112
uint32_t ne10_uint32_t
Definition: NE10_types.h:77
Instance structure for the floating point IIR Lattice filter.
Definition: NE10_types.h:416
ne10_float32_t * pvCoeffs
Points to the ladder coefficient array.
Definition: NE10_types.h:421
ne10_float32_t * pState
Points to the state variable array.
Definition: NE10_types.h:419