42 Fout[0].
r = Fin[0].
r + Fin[1].
r;
43 Fout[0].
i = Fin[0].
i + Fin[1].
i;
44 Fout[1].
r = Fin[0].
r - Fin[1].
r;
45 Fout[1].
i = Fin[0].
i - Fin[1].
i;
52 Fout[0].
r = Fin[0].
r + Fin[1].
r;
53 Fout[0].
i = Fin[0].
i + Fin[1].
i;
54 Fout[1].
r = Fin[0].
r - Fin[1].
r;
55 Fout[1].
i = Fin[0].
i - Fin[1].
i;
62 Fout[0].
r = (Fin[0].
r + Fin[1].
r) >> 1;
63 Fout[0].
i = (Fin[0].
i + Fin[1].
i) >> 1;
64 Fout[1].
r = (Fin[0].
r - Fin[1].
r) >> 1;
65 Fout[1].
i = (Fin[0].
i - Fin[1].
i) >> 1;
72 Fout[0].
r = (Fin[0].
r + Fin[1].
r) >> 1;
73 Fout[0].
i = (Fin[0].
i + Fin[1].
i) >> 1;
74 Fout[1].
r = (Fin[0].
r - Fin[1].
r) >> 1;
75 Fout[1].
i = (Fin[0].
i - Fin[1].
i) >> 1;
85 s2_r = Fin[0].
r - Fin[2].
r;
86 s2_i = Fin[0].
i - Fin[2].
i;
88 tmp_r = Fin[0].
r + Fin[2].
r;
89 tmp_i = Fin[0].
i + Fin[2].
i;
91 s0_r = Fin[1].
r + Fin[3].
r;
92 s0_i = Fin[1].
i + Fin[3].
i;
94 s1_r = Fin[1].
r - Fin[3].
r;
95 s1_i = Fin[1].
i - Fin[3].
i;
96 Fout[2].
r = tmp_r - s0_r;
97 Fout[2].
i = tmp_i - s0_i;
98 Fout[0].
r = tmp_r + s0_r;
99 Fout[0].
i = tmp_i + s0_i;
101 Fout[1].
r = s2_r + s1_i;
102 Fout[1].
i = s2_i - s1_r;
103 Fout[3].
r = s2_r - s1_i;
104 Fout[3].
i = s2_i + s1_r;
114 s2_r = Fin[0].
r - Fin[2].
r;
115 s2_i = Fin[0].
i - Fin[2].
i;
117 tmp_r = Fin[0].
r + Fin[2].
r;
118 tmp_i = Fin[0].
i + Fin[2].
i;
120 s0_r = Fin[1].
r + Fin[3].
r;
121 s0_i = Fin[1].
i + Fin[3].
i;
123 s1_r = Fin[1].
r - Fin[3].
r;
124 s1_i = Fin[1].
i - Fin[3].
i;
126 Fout[2].
r = tmp_r - s0_r;
127 Fout[2].
i = tmp_i - s0_i;
128 Fout[0].
r = tmp_r + s0_r;
129 Fout[0].
i = tmp_i + s0_i;
131 Fout[1].
r = s2_r - s1_i;
132 Fout[1].
i = s2_i + s1_r;
133 Fout[3].
r = s2_r + s1_i;
134 Fout[3].
i = s2_i - s1_r;
144 s2_r = (Fin[0].
r - Fin[2].
r) >> 2;
145 s2_i = (Fin[0].
i - Fin[2].
i) >> 2;
146 tmp_r = (Fin[0].
r + Fin[2].
r) >> 2;
147 tmp_i = (Fin[0].
i + Fin[2].
i) >> 2;
149 s0_r = (Fin[1].
r + Fin[3].
r) >> 2;
150 s0_i = (Fin[1].
i + Fin[3].
i) >> 2;
151 s1_r = (Fin[1].
r - Fin[3].
r) >> 2;
152 s1_i = (Fin[1].
i - Fin[3].
i) >> 2;
154 Fout[2].
r = tmp_r - s0_r;
155 Fout[2].
i = tmp_i - s0_i;
156 Fout[0].
r = tmp_r + s0_r;
157 Fout[0].
i = tmp_i + s0_i;
159 Fout[1].
r = s2_r + s1_i;
160 Fout[1].
i = s2_i - s1_r;
161 Fout[3].
r = s2_r - s1_i;
162 Fout[3].
i = s2_i + s1_r;
172 s2_r = (Fin[0].
r - Fin[2].
r) >> 2;
173 s2_i = (Fin[0].
i - Fin[2].
i) >> 2;
174 tmp_r = (Fin[0].
r + Fin[2].
r) >> 2;
175 tmp_i = (Fin[0].
i + Fin[2].
i) >> 2;
177 s0_r = (Fin[1].
r + Fin[3].
r) >> 2;
178 s0_i = (Fin[1].
i + Fin[3].
i) >> 2;
179 s1_r = (Fin[1].
r - Fin[3].
r) >> 2;
180 s1_i = (Fin[1].
i - Fin[3].
i) >> 2;
182 Fout[2].
r = tmp_r - s0_r;
183 Fout[2].
i = tmp_i - s0_i;
184 Fout[0].
r = tmp_r + s0_r;
185 Fout[0].
i = tmp_i + s0_i;
187 Fout[1].
r = s2_r - s1_i;
188 Fout[1].
i = s2_i + s1_r;
189 Fout[3].
r = s2_r + s1_i;
190 Fout[3].
i = s2_i - s1_r;
197 ne10_int16_t s0_r, s0_i, s1_r, s1_i, s2_r, s2_i, s3_r, s3_i, s4_r, s4_i, s5_r, s5_i, s6_r, s6_i, s7_r, s7_i;
198 ne10_int16_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i;
201 s0_r = Fin[0].
r + Fin[4].
r;
202 s0_i = Fin[0].
i + Fin[4].
i;
203 s1_r = Fin[0].
r - Fin[4].
r;
204 s1_i = Fin[0].
i - Fin[4].
i;
205 s2_r = Fin[1].
r + Fin[5].
r;
206 s2_i = Fin[1].
i + Fin[5].
i;
207 s3_r = Fin[1].
r - Fin[5].
r;
208 s3_i = Fin[1].
i - Fin[5].
i;
209 s4_r = Fin[2].
r + Fin[6].
r;
210 s4_i = Fin[2].
i + Fin[6].
i;
211 s5_r = Fin[2].
r - Fin[6].
r;
212 s5_i = Fin[2].
i - Fin[6].
i;
213 s6_r = Fin[3].
r + Fin[7].
r;
214 s6_i = Fin[3].
i + Fin[7].
i;
215 s7_r = Fin[3].
r - Fin[7].
r;
216 s7_i = Fin[3].
i - Fin[7].
i;
226 Fout[0].
r = t1_r + t2_r;
227 Fout[0].
i = t1_i + t2_i;
228 Fout[4].
r = t1_r - t2_r;
229 Fout[4].
i = t1_i - t2_i;
230 Fout[2].
r = t0_r + t3_i;
231 Fout[2].
i = t0_i - t3_r;
232 Fout[6].
r = t0_r - t3_i;
233 Fout[6].
i = t0_i + t3_r;
248 Fout[1].
r = t1_r + t2_r;
249 Fout[1].
i = t1_i + t2_i;
250 Fout[5].
r = t1_r - t2_r;
251 Fout[5].
i = t1_i - t2_i;
252 Fout[3].
r = t0_r + t3_i;
253 Fout[3].
i = t0_i - t3_r;
254 Fout[7].
r = t0_r - t3_i;
255 Fout[7].
i = t0_i + t3_r;
262 ne10_int16_t s0_r, s0_i, s1_r, s1_i, s2_r, s2_i, s3_r, s3_i, s4_r, s4_i, s5_r, s5_i, s6_r, s6_i, s7_r, s7_i;
263 ne10_int16_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i;
266 s0_r = Fin[0].
r + Fin[4].
r;
267 s0_i = Fin[0].
i + Fin[4].
i;
268 s1_r = Fin[0].
r - Fin[4].
r;
269 s1_i = Fin[0].
i - Fin[4].
i;
270 s2_r = Fin[1].
r + Fin[5].
r;
271 s2_i = Fin[1].
i + Fin[5].
i;
272 s3_r = Fin[1].
r - Fin[5].
r;
273 s3_i = Fin[1].
i - Fin[5].
i;
274 s4_r = Fin[2].
r + Fin[6].
r;
275 s4_i = Fin[2].
i + Fin[6].
i;
276 s5_r = Fin[2].
r - Fin[6].
r;
277 s5_i = Fin[2].
i - Fin[6].
i;
278 s6_r = Fin[3].
r + Fin[7].
r;
279 s6_i = Fin[3].
i + Fin[7].
i;
280 s7_r = Fin[3].
r - Fin[7].
r;
281 s7_i = Fin[3].
i - Fin[7].
i;
291 Fout[0].
r = t1_r + t2_r;
292 Fout[0].
i = t1_i + t2_i;
293 Fout[4].
r = t1_r - t2_r;
294 Fout[4].
i = t1_i - t2_i;
295 Fout[2].
r = t0_r - t3_i;
296 Fout[2].
i = t0_i + t3_r;
297 Fout[6].
r = t0_r + t3_i;
298 Fout[6].
i = t0_i - t3_r;
313 Fout[1].
r = t1_r + t2_r;
314 Fout[1].
i = t1_i + t2_i;
315 Fout[5].
r = t1_r - t2_r;
316 Fout[5].
i = t1_i - t2_i;
317 Fout[3].
r = t0_r - t3_i;
318 Fout[3].
i = t0_i + t3_r;
319 Fout[7].
r = t0_r + t3_i;
320 Fout[7].
i = t0_i - t3_r;
326 ne10_int16_t s0_r, s0_i, s1_r, s1_i, s2_r, s2_i, s3_r, s3_i, s4_r, s4_i, s5_r, s5_i, s6_r, s6_i, s7_r, s7_i;
327 ne10_int16_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i;
330 s0_r = (Fin[0].
r + Fin[4].
r) >> 3;
331 s0_i = (Fin[0].
i + Fin[4].
i) >> 3;
332 s1_r = (Fin[0].
r - Fin[4].
r) >> 3;
333 s1_i = (Fin[0].
i - Fin[4].
i) >> 3;
334 s2_r = (Fin[1].
r + Fin[5].
r) >> 3;
335 s2_i = (Fin[1].
i + Fin[5].
i) >> 3;
336 s3_r = (Fin[1].
r - Fin[5].
r) >> 3;
337 s3_i = (Fin[1].
i - Fin[5].
i) >> 3;
338 s4_r = (Fin[2].
r + Fin[6].
r) >> 3;
339 s4_i = (Fin[2].
i + Fin[6].
i) >> 3;
340 s5_r = (Fin[2].
r - Fin[6].
r) >> 3;
341 s5_i = (Fin[2].
i - Fin[6].
i) >> 3;
342 s6_r = (Fin[3].
r + Fin[7].
r) >> 3;
343 s6_i = (Fin[3].
i + Fin[7].
i) >> 3;
344 s7_r = (Fin[3].
r - Fin[7].
r) >> 3;
345 s7_i = (Fin[3].
i - Fin[7].
i) >> 3;
355 Fout[0].
r = t1_r + t2_r;
356 Fout[0].
i = t1_i + t2_i;
357 Fout[4].
r = t1_r - t2_r;
358 Fout[4].
i = t1_i - t2_i;
359 Fout[2].
r = t0_r + t3_i;
360 Fout[2].
i = t0_i - t3_r;
361 Fout[6].
r = t0_r - t3_i;
362 Fout[6].
i = t0_i + t3_r;
377 Fout[1].
r = t1_r + t2_r;
378 Fout[1].
i = t1_i + t2_i;
379 Fout[5].
r = t1_r - t2_r;
380 Fout[5].
i = t1_i - t2_i;
381 Fout[3].
r = t0_r + t3_i;
382 Fout[3].
i = t0_i - t3_r;
383 Fout[7].
r = t0_r - t3_i;
384 Fout[7].
i = t0_i + t3_r;
391 ne10_int16_t s0_r, s0_i, s1_r, s1_i, s2_r, s2_i, s3_r, s3_i, s4_r, s4_i, s5_r, s5_i, s6_r, s6_i, s7_r, s7_i;
392 ne10_int16_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i;
395 s0_r = (Fin[0].
r + Fin[4].
r) >> 3;
396 s0_i = (Fin[0].
i + Fin[4].
i) >> 3;
397 s1_r = (Fin[0].
r - Fin[4].
r) >> 3;
398 s1_i = (Fin[0].
i - Fin[4].
i) >> 3;
399 s2_r = (Fin[1].
r + Fin[5].
r) >> 3;
400 s2_i = (Fin[1].
i + Fin[5].
i) >> 3;
401 s3_r = (Fin[1].
r - Fin[5].
r) >> 3;
402 s3_i = (Fin[1].
i - Fin[5].
i) >> 3;
403 s4_r = (Fin[2].
r + Fin[6].
r) >> 3;
404 s4_i = (Fin[2].
i + Fin[6].
i) >> 3;
405 s5_r = (Fin[2].
r - Fin[6].
r) >> 3;
406 s5_i = (Fin[2].
i - Fin[6].
i) >> 3;
407 s6_r = (Fin[3].
r + Fin[7].
r) >> 3;
408 s6_i = (Fin[3].
i + Fin[7].
i) >> 3;
409 s7_r = (Fin[3].
r - Fin[7].
r) >> 3;
410 s7_i = (Fin[3].
i - Fin[7].
i) >> 3;
420 Fout[0].
r = t1_r + t2_r;
421 Fout[0].
i = t1_i + t2_i;
422 Fout[4].
r = t1_r - t2_r;
423 Fout[4].
i = t1_i - t2_i;
424 Fout[2].
r = t0_r - t3_i;
425 Fout[2].
i = t0_i + t3_r;
426 Fout[6].
r = t0_r + t3_i;
427 Fout[6].
i = t0_i - t3_r;
442 Fout[1].
r = t1_r + t2_r;
443 Fout[1].
i = t1_i + t2_i;
444 Fout[5].
r = t1_r - t2_r;
445 Fout[5].
i = t1_i - t2_i;
446 Fout[3].
r = t0_r - t3_i;
447 Fout[3].
i = t0_i + t3_r;
448 Fout[7].
r = t0_r + t3_i;
449 Fout[7].
i = t0_i - t3_r;
460 int16x8x2_t q2_fpk, q2_fpnk, q2_tw, q2_dst, q2_dst2;
461 int16x8_t q_fpnk_r, q_fpnk_i;
462 int16x8_t q_f1k_r, q_f1k_i, q_f2k_r, q_f2k_i;
463 int16x8_t q_tw_r, q_tw_i;
464 int16x8_t q_tmp0, q_tmp1, q_tmp2, q_tmp3;
465 int16x8_t q_dst2_r, q_dst2_i;
466 int16_t *p_src, *p_src2, *p_dst, *p_dst2, *p_twiddles;
474 dst[0].
r = tdc.
r + tdc.
i;
475 dst[ncfft].
r = tdc.
r - tdc.
i;
476 dst[ncfft].
i = dst[0].
i = 0;
482 for (k = 1; k <= count ; k += 8)
484 p_src = (int16_t*) (& (src[k]));
485 p_src2 = (int16_t*) (& (src[ncfft - k - 7]));
486 p_twiddles = (int16_t*) (& (twiddles[k - 1]));
487 p_dst = (int16_t*) (& (dst[k]));
488 p_dst2 = (int16_t*) (& (dst[ncfft - k - 7]));
490 q2_fpk = vld2q_s16 (p_src);
491 q2_fpnk = vld2q_s16 (p_src2);
493 q2_tw = vld2q_s16 (p_twiddles);
494 q2_fpnk.val[0] = vrev32q_s16 (q2_fpnk.val[0]);
495 q2_fpnk.val[1] = vrev32q_s16 (q2_fpnk.val[1]);
496 q2_fpnk.val[0] = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q2_fpnk.val[0])));
497 q2_fpnk.val[1] = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q2_fpnk.val[1])));
498 q_fpnk_r = vcombine_s16 (vget_high_s16 (q2_fpnk.val[0]), vget_low_s16 (q2_fpnk.val[0]));
499 q_fpnk_i = vcombine_s16 (vget_high_s16 (q2_fpnk.val[1]), vget_low_s16 (q2_fpnk.val[1]));
500 q_fpnk_i = vnegq_s16 (q_fpnk_i);
502 q_f1k_r = vhaddq_s16 (q2_fpk.val[0], q_fpnk_r);
503 q_f1k_i = vhaddq_s16 (q2_fpk.val[1], q_fpnk_i);
505 q_f2k_r = vhsubq_s16 (q2_fpk.val[0], q_fpnk_r);
506 q_f2k_i = vhsubq_s16 (q2_fpk.val[1], q_fpnk_i);
508 q_tmp0 = vqdmulhq_s16 (q_f2k_r, q2_tw.val[0]);
509 q_tmp1 = vqdmulhq_s16 (q_f2k_i, q2_tw.val[1]);
510 q_tmp2 = vqdmulhq_s16 (q_f2k_r, q2_tw.val[1]);
511 q_tmp3 = vqdmulhq_s16 (q_f2k_i, q2_tw.val[0]);
512 q_tw_r = vsubq_s16 (q_tmp0, q_tmp1);
513 q_tw_i = vaddq_s16 (q_tmp2, q_tmp3);
515 q_dst2_r = vhsubq_s16 (q_f1k_r, q_tw_r);
516 q_dst2_i = vhsubq_s16 (q_tw_i, q_f1k_i);
517 q2_dst.val[0] = vhaddq_s16 (q_f1k_r, q_tw_r);
518 q2_dst.val[1] = vhaddq_s16 (q_f1k_i, q_tw_i);
519 q_dst2_r = vrev32q_s16 (q_dst2_r);
520 q_dst2_i = vrev32q_s16 (q_dst2_i);
521 q_dst2_r = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q_dst2_r))) ;
522 q_dst2_i = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q_dst2_i)));
523 q2_dst2.val[0] = vcombine_s16 (vget_high_s16 (q_dst2_r), vget_low_s16 (q_dst2_r));
524 q2_dst2.val[1] = vcombine_s16 (vget_high_s16 (q_dst2_i), vget_low_s16 (q_dst2_i));
525 vst2q_s16 (p_dst, q2_dst);
526 vst2q_s16 (p_dst2, q2_dst2);
532 for (k = 1; k <= count ; k += 8)
534 p_src = (int16_t*) (& (src[k]));
535 p_src2 = (int16_t*) (& (src[ncfft - k - 7]));
536 p_twiddles = (int16_t*) (& (twiddles[k - 1]));
537 p_dst = (int16_t*) (& (dst[k]));
538 p_dst2 = (int16_t*) (& (dst[ncfft - k - 7]));
540 q2_fpk = vld2q_s16 (p_src);
541 q2_fpnk = vld2q_s16 (p_src2);
543 q2_tw = vld2q_s16 (p_twiddles);
544 q2_fpnk.val[0] = vrev32q_s16 (q2_fpnk.val[0]);
545 q2_fpnk.val[1] = vrev32q_s16 (q2_fpnk.val[1]);
546 q2_fpnk.val[0] = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q2_fpnk.val[0])));
547 q2_fpnk.val[1] = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q2_fpnk.val[1])));
548 q_fpnk_r = vcombine_s16 (vget_high_s16 (q2_fpnk.val[0]), vget_low_s16 (q2_fpnk.val[0]));
549 q_fpnk_i = vcombine_s16 (vget_high_s16 (q2_fpnk.val[1]), vget_low_s16 (q2_fpnk.val[1]));
550 q_fpnk_i = vnegq_s16 (q_fpnk_i);
552 q_f1k_r = vaddq_s16 (q2_fpk.val[0], q_fpnk_r);
553 q_f1k_i = vaddq_s16 (q2_fpk.val[1], q_fpnk_i);
555 q_f2k_r = vsubq_s16 (q2_fpk.val[0], q_fpnk_r);
556 q_f2k_i = vsubq_s16 (q2_fpk.val[1], q_fpnk_i);
558 q_tmp0 = vqdmulhq_s16 (q_f2k_r, q2_tw.val[0]);
559 q_tmp1 = vqdmulhq_s16 (q_f2k_i, q2_tw.val[1]);
560 q_tmp2 = vqdmulhq_s16 (q_f2k_r, q2_tw.val[1]);
561 q_tmp3 = vqdmulhq_s16 (q_f2k_i, q2_tw.val[0]);
562 q_tw_r = vsubq_s16 (q_tmp0, q_tmp1);
563 q_tw_i = vaddq_s16 (q_tmp2, q_tmp3);
565 q_dst2_r = vhsubq_s16 (q_f1k_r, q_tw_r);
566 q_dst2_i = vhsubq_s16 (q_tw_i, q_f1k_i);
567 q2_dst.val[0] = vhaddq_s16 (q_f1k_r, q_tw_r);
568 q2_dst.val[1] = vhaddq_s16 (q_f1k_i, q_tw_i);
569 q_dst2_r = vrev32q_s16 (q_dst2_r);
570 q_dst2_i = vrev32q_s16 (q_dst2_i);
571 q_dst2_r = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q_dst2_r))) ;
572 q_dst2_i = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q_dst2_i)));
573 q2_dst2.val[0] = vcombine_s16 (vget_high_s16 (q_dst2_r), vget_low_s16 (q_dst2_r));
574 q2_dst2.val[1] = vcombine_s16 (vget_high_s16 (q_dst2_i), vget_low_s16 (q_dst2_i));
575 vst2q_s16 (p_dst, q2_dst);
576 vst2q_s16 (p_dst2, q2_dst2);
584 for (k = 1; k <= ncfft / 2 ; ++k)
587 fpnk.
r = src[ncfft - k].
r;
588 fpnk.
i = - src[ncfft - k].
i;
595 f1k.
r = fpk.
r + fpnk.
r;
596 f1k.
i = fpk.
i + fpnk.
i;
598 f2k.
r = fpk.
r - fpnk.
r;
599 f2k.
i = fpk.
i - fpnk.
i;
606 dst[k].
r = (f1k.
r + tw.
r) >> 1;
607 dst[k].
i = (f1k.
i + tw.
i) >> 1;
608 dst[ncfft - k].
r = (f1k.
r - tw.
r) >> 1;
609 dst[ncfft - k].
i = (tw.
i - f1k.
i) >> 1;
624 int16x8x2_t q2_fk, q2_fnkc, q2_tw, q2_dst, q2_dst2;
625 int16x8_t q_fnkc_r, q_fnkc_i;
626 int16x8_t q_fek_r, q_fek_i, q_fok_r, q_fok_i;
627 int16x8_t q_tmp0, q_tmp1, q_tmp2, q_tmp3;
628 int16x8_t q_dst2_r, q_dst2_i;
629 int16_t *p_src, *p_src2, *p_dst, *p_dst2, *p_twiddles;
632 dst[0].
r = src[0].
r + src[ncfft].
r;
633 dst[0].
i = src[0].
r - src[ncfft].
r;
641 for (k = 1; k <= count ; k += 8)
643 p_src = (int16_t*) (& (src[k]));
644 p_src2 = (int16_t*) (& (src[ncfft - k - 7]));
645 p_twiddles = (int16_t*) (& (twiddles[k - 1]));
646 p_dst = (int16_t*) (& (dst[k]));
647 p_dst2 = (int16_t*) (& (dst[ncfft - k - 7]));
649 q2_fk = vld2q_s16 (p_src);
650 q2_fnkc = vld2q_s16 (p_src2);
651 q2_tw = vld2q_s16 (p_twiddles);
652 q2_fnkc.val[0] = vrev32q_s16 (q2_fnkc.val[0]);
653 q2_fnkc.val[1] = vrev32q_s16 (q2_fnkc.val[1]);
654 q2_fnkc.val[0] = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q2_fnkc.val[0])));
655 q2_fnkc.val[1] = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q2_fnkc.val[1])));
656 q_fnkc_r = vcombine_s16 (vget_high_s16 (q2_fnkc.val[0]), vget_low_s16 (q2_fnkc.val[0]));
657 q_fnkc_i = vcombine_s16 (vget_high_s16 (q2_fnkc.val[1]), vget_low_s16 (q2_fnkc.val[1]));
658 q_fnkc_i = vnegq_s16 (q_fnkc_i);
660 q_fek_r = vhaddq_s16 (q2_fk.val[0], q_fnkc_r);
661 q_fek_i = vhaddq_s16 (q2_fk.val[1], q_fnkc_i);
662 q_tmp0 = vhsubq_s16 (q2_fk.val[0], q_fnkc_r);
663 q_tmp1 = vhsubq_s16 (q2_fk.val[1], q_fnkc_i);
665 q_fok_r = vqdmulhq_s16 (q_tmp0, q2_tw.val[0]);
666 q_fok_i = vqdmulhq_s16 (q_tmp1, q2_tw.val[0]);
667 q_tmp2 = vqdmulhq_s16 (q_tmp1, q2_tw.val[1]);
668 q_tmp3 = vqdmulhq_s16 (q_tmp0, q2_tw.val[1]);
669 q_fok_r = vaddq_s16 (q_fok_r, q_tmp2);
670 q_fok_i = vsubq_s16 (q_fok_i, q_tmp3);
672 q_dst2_r = vsubq_s16 (q_fek_r, q_fok_r);
673 q_dst2_i = vsubq_s16 (q_fok_i, q_fek_i);
674 q2_dst.val[0] = vaddq_s16 (q_fek_r, q_fok_r);
675 q2_dst.val[1] = vaddq_s16 (q_fek_i, q_fok_i);
676 q_dst2_r = vrev32q_s16 (q_dst2_r);
677 q_dst2_i = vrev32q_s16 (q_dst2_i);
678 q_dst2_r = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q_dst2_r))) ;
679 q_dst2_i = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q_dst2_i)));
680 q2_dst2.val[0] = vcombine_s16 (vget_high_s16 (q_dst2_r), vget_low_s16 (q_dst2_r));
681 q2_dst2.val[1] = vcombine_s16 (vget_high_s16 (q_dst2_i), vget_low_s16 (q_dst2_i));
682 vst2q_s16 (p_dst, q2_dst);
683 vst2q_s16 (p_dst2, q2_dst2);
690 for (k = 1; k <= count ; k += 8)
692 p_src = (int16_t*) (& (src[k]));
693 p_src2 = (int16_t*) (& (src[ncfft - k - 7]));
694 p_twiddles = (int16_t*) (& (twiddles[k - 1]));
695 p_dst = (int16_t*) (& (dst[k]));
696 p_dst2 = (int16_t*) (& (dst[ncfft - k - 7]));
698 q2_fk = vld2q_s16 (p_src);
699 q2_fnkc = vld2q_s16 (p_src2);
700 q2_tw = vld2q_s16 (p_twiddles);
701 q2_fnkc.val[0] = vrev32q_s16 (q2_fnkc.val[0]);
702 q2_fnkc.val[1] = vrev32q_s16 (q2_fnkc.val[1]);
703 q2_fnkc.val[0] = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q2_fnkc.val[0])));
704 q2_fnkc.val[1] = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q2_fnkc.val[1])));
705 q_fnkc_r = vcombine_s16 (vget_high_s16 (q2_fnkc.val[0]), vget_low_s16 (q2_fnkc.val[0]));
706 q_fnkc_i = vcombine_s16 (vget_high_s16 (q2_fnkc.val[1]), vget_low_s16 (q2_fnkc.val[1]));
707 q_fnkc_i = vnegq_s16 (q_fnkc_i);
709 q_fek_r = vaddq_s16 (q2_fk.val[0], q_fnkc_r);
710 q_fek_i = vaddq_s16 (q2_fk.val[1], q_fnkc_i);
711 q_tmp0 = vsubq_s16 (q2_fk.val[0], q_fnkc_r);
712 q_tmp1 = vsubq_s16 (q2_fk.val[1], q_fnkc_i);
714 q_fok_r = vqdmulhq_s16 (q_tmp0, q2_tw.val[0]);
715 q_fok_i = vqdmulhq_s16 (q_tmp1, q2_tw.val[0]);
716 q_tmp2 = vqdmulhq_s16 (q_tmp1, q2_tw.val[1]);
717 q_tmp3 = vqdmulhq_s16 (q_tmp0, q2_tw.val[1]);
718 q_fok_r = vaddq_s16 (q_fok_r, q_tmp2);
719 q_fok_i = vsubq_s16 (q_fok_i, q_tmp3);
721 q_dst2_r = vsubq_s16 (q_fek_r, q_fok_r);
722 q_dst2_i = vsubq_s16 (q_fok_i, q_fek_i);
723 q2_dst.val[0] = vaddq_s16 (q_fek_r, q_fok_r);
724 q2_dst.val[1] = vaddq_s16 (q_fek_i, q_fok_i);
725 q_dst2_r = vrev32q_s16 (q_dst2_r);
726 q_dst2_i = vrev32q_s16 (q_dst2_i);
727 q_dst2_r = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q_dst2_r))) ;
728 q_dst2_i = vreinterpretq_s16_s32 (vrev64q_s32 (vreinterpretq_s32_s16 (q_dst2_i)));
729 q2_dst2.val[0] = vcombine_s16 (vget_high_s16 (q_dst2_r), vget_low_s16 (q_dst2_r));
730 q2_dst2.val[1] = vcombine_s16 (vget_high_s16 (q_dst2_i), vget_low_s16 (q_dst2_i));
731 vst2q_s16 (p_dst, q2_dst);
732 vst2q_s16 (p_dst2, q2_dst2);
740 for (k = 1; k <= ncfft / 2; k++)
743 fnkc.
r = src[ncfft - k].
r;
744 fnkc.
i = -src[ncfft - k].
i;
751 fek.
r = fk.
r + fnkc.
r;
752 fek.
i = fk.
i + fnkc.
i;
754 tmp.
r = fk.
r - fnkc.
r;
755 tmp.
i = fk.
i - fnkc.
i;
762 dst[k].
r = fek.
r + fok.
r;
763 dst[k].
i = fek.
i + fok.
i;
765 dst[ncfft - k].
r = fek.
r - fok.
r;
766 dst[ncfft - k].
i = fok.
i - fek.
i;
791 ne10_fft2_backward_int16_scaled (fout, fin);
794 ne10_fft4_backward_int16_scaled (fout, fin);
797 ne10_fft8_backward_int16_scaled (fout, fin);
812 ne10_fft2_forward_int16_scaled (fout, fin);
815 ne10_fft4_forward_int16_scaled (fout, fin);
818 ne10_fft8_forward_int16_scaled (fout, fin);
836 ne10_fft2_backward_int16_unscaled (fout, fin);
839 ne10_fft4_backward_int16_unscaled (fout, fin);
842 ne10_fft8_backward_int16_unscaled (fout, fin);
857 ne10_fft2_forward_int16_unscaled (fout, fin);
860 ne10_fft4_forward_int16_unscaled (fout, fin);
863 ne10_fft8_forward_int16_unscaled (fout, fin);
889 c2c_state.
buffer = tmpbuf2;
892 ne10_fft_split_r2c_1d_int16_neon (fout, tmpbuf1, cfg->
super_twiddles, cfg->
ncfft, scaled_flag);
911 c2c_state.
buffer = tmpbuf2;
913 ne10_fft_split_c2r_1d_int16_neon (tmpbuf1, fin, cfg->
super_twiddles, cfg->
ncfft, scaled_flag);
void ne10_mixed_radix_fft_backward_int16_scaled_neon(ne10_fft_cpx_int16_t *Fout, ne10_fft_cpx_int16_t *fin, ne10_int32_t *factors, ne10_fft_cpx_int16_t *twiddles, ne10_fft_cpx_int16_t *buffer) asm("ne10_mixed_radix_fft_backward_int16_scaled_neon")
Structure for the 16-bit fixed point FFT function.
ne10_fft_cpx_int16_t * twiddles
void ne10_mixed_radix_fft_forward_int16_scaled_neon(ne10_fft_cpx_int16_t *Fout, ne10_fft_cpx_int16_t *fin, ne10_int32_t *factors, ne10_fft_cpx_int16_t *twiddles, ne10_fft_cpx_int16_t *buffer) asm("ne10_mixed_radix_fft_forward_int16_scaled_neon")
ne10_fft_cpx_int16_t * twiddles
void ne10_fft_c2r_1d_int16_neon(ne10_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2r_1d_int16 using NEON SIMD capabilities.
void ne10_fft_r2c_1d_int16_neon(ne10_fft_cpx_int16_t *fout, ne10_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_r2c_1d_int16 using NEON SIMD capabilities.
#define NE10_F2I16_SAMPPROD
ne10_fft_cpx_int16_t * super_twiddles
void ne10_mixed_radix_fft_forward_int16_unscaled_neon(ne10_fft_cpx_int16_t *Fout, ne10_fft_cpx_int16_t *fin, ne10_int32_t *factors, ne10_fft_cpx_int16_t *twiddles, ne10_fft_cpx_int16_t *buffer) asm("ne10_mixed_radix_fft_forward_int16_unscaled_neon")
void ne10_mixed_radix_fft_backward_int16_unscaled_neon(ne10_fft_cpx_int16_t *Fout, ne10_fft_cpx_int16_t *fin, ne10_int32_t *factors, ne10_fft_cpx_int16_t *twiddles, ne10_fft_cpx_int16_t *buffer) asm("ne10_mixed_radix_fft_backward_int16_unscaled_neon")
#define NE10_F2I16_FIXDIV(c, div)
ne10_fft_cpx_int16_t * buffer
ne10_fft_cpx_int16_t * buffer
void ne10_fft_c2c_1d_int16_neon(ne10_fft_cpx_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_cfg_int16_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
Specific implementation of ne10_fft_c2c_1d_int16 using NEON SIMD capabilities.