Project Ne10
An open, optimized software library for the ARM architecture.
Macros | Functions
NE10_fft_int16.neonintrinsic.c File Reference
#include <arm_neon.h>
#include "NE10_types.h"
#include "NE10_macros.h"
#include "NE10_fft.h"

Go to the source code of this file.

Macros

#define FFT4_FS_START
 
#define FFT4_FS
 
#define FFT4_FS_SCALED
 
#define FFT4_FWD_LS
 
#define FFT4_INV_LS
 
#define FFT8_FS_START
 
#define FFT8_FS
 
#define FFT8_FS_SCALED
 
#define FFT8_FWD_LS
 
#define FFT8_INV_LS
 
#define RADIX8x4_START
 
#define RADIX8x4_LOAD
 
#define RADIX8x4_STORE
 
#define RADIX8x4_FS_S0
 
#define RADIX8x4_FWD_S357
 
#define RADIX8x4_INV_S357
 
#define RADIX8x4_LS_02
 
#define RADIX8x4_FS_S0_SCALED
 
#define RADIX8x4_LS_02_SCALED
 
#define RADIX4x4_WITHOUT_TW_START
 
#define RADIX4x4_WITHOUT_TW_LOAD
 
#define RADIX4x4_WITHOUT_TW_STORE
 
#define RADIX4x4_WITHOUT_TW_S0
 
#define RADIX4x4_WITHOUT_TW_S0_SCALED
 
#define RADIX4x4_WITH_TW_START
 
#define RADIX4x4_WITH_TW_LOAD
 
#define RADIX4x4_WITH_TW_STORE
 
#define RADIX4x4_WITH_TW_S1_FWD
 
#define RADIX4x4_WITH_TW_S1_INV
 
#define RADIX4x4_WITH_TW_LS_02
 
#define RADIX4x4_WITH_TW_LS_02_SCALED
 
#define ne10_mixed_radix_fft_forward_int16_neon(scaled)
 
#define ne10_mixed_radix_fft_backward_int16_neon(scaled)
 

Functions

 ne10_mixed_radix_fft_forward_int16_neon (ne10_mixed_radix_fft_forward_int16_neon(scaled) ne10_mixed_radix_fft_backward_int16_neon() ne10_mixed_radix_fft_backward_int16_neon(scaled) static void ne10_fft_split_r2c_1d_int16_neon(ne10_fft_cpx_int16_t *dst unscaled)
 
void ne10_fft_c2c_1d_int16_neon (ne10_fft_cpx_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_cfg_int16_t cfg, ne10_int32_t inverse_fft, ne10_int32_t scaled_flag)
 Specific implementation of ne10_fft_c2c_1d_int16 using NEON SIMD capabilities. More...
 
void ne10_fft_r2c_1d_int16_neon (ne10_fft_cpx_int16_t *fout, ne10_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
 Specific implementation of ne10_fft_r2c_1d_int16 using NEON SIMD capabilities. More...
 
void ne10_fft_c2r_1d_int16_neon (ne10_int16_t *fout, ne10_fft_cpx_int16_t *fin, ne10_fft_r2c_cfg_int16_t cfg, ne10_int32_t scaled_flag)
 Specific implementation of ne10_fft_c2r_1d_int16 using NEON SIMD capabilities. More...
 

Macro Definition Documentation

#define FFT4_FS
Value:
s2_r = Fin[0].r - Fin[2].r; \
s2_i = Fin[0].i - Fin[2].i; \
tmp_r = Fin[0].r + Fin[2].r; \
tmp_i = Fin[0].i + Fin[2].i; \
s0_r = Fin[1].r + Fin[3].r; \
s0_i = Fin[1].i + Fin[3].i; \
s1_r = Fin[1].r - Fin[3].r; \
s1_i = Fin[1].i - Fin[3].i;

Definition at line 82 of file NE10_fft_int16.neonintrinsic.c.

#define FFT4_FS_SCALED
Value:
s2_r = (Fin[0].r - Fin[2].r) >> 2; \
s2_i = (Fin[0].i - Fin[2].i) >> 2; \
tmp_r = (Fin[0].r + Fin[2].r) >> 2; \
tmp_i = (Fin[0].i + Fin[2].i) >> 2; \
s0_r = (Fin[1].r + Fin[3].r) >> 2; \
s0_i = (Fin[1].i + Fin[3].i) >> 2; \
s1_r = (Fin[1].r - Fin[3].r) >> 2; \
s1_i = (Fin[1].i - Fin[3].i) >> 2;

Definition at line 92 of file NE10_fft_int16.neonintrinsic.c.

#define FFT4_FS_START
Value:
ne10_int16_t s0_r, s0_i, s1_r, s1_i, s2_r, s2_i; \
ne10_int16_t tmp_r, tmp_i;
int16_t ne10_int16_t
Definition: NE10_types.h:74

Definition at line 78 of file NE10_fft_int16.neonintrinsic.c.

#define FFT4_FWD_LS
Value:
Fout[2].r = tmp_r - s0_r; \
Fout[2].i = tmp_i - s0_i; \
Fout[0].r = tmp_r + s0_r; \
Fout[0].i = tmp_i + s0_i; \
Fout[1].r = s2_r + s1_i; \
Fout[1].i = s2_i - s1_r; \
Fout[3].r = s2_r - s1_i; \
Fout[3].i = s2_i + s1_r;

Definition at line 102 of file NE10_fft_int16.neonintrinsic.c.

#define FFT4_INV_LS
Value:
Fout[2].r = tmp_r - s0_r; \
Fout[2].i = tmp_i - s0_i; \
Fout[0].r = tmp_r + s0_r; \
Fout[0].i = tmp_i + s0_i; \
Fout[1].r = s2_r - s1_i; \
Fout[1].i = s2_i + s1_r; \
Fout[3].r = s2_r + s1_i; \
Fout[3].i = s2_i - s1_r;

Definition at line 112 of file NE10_fft_int16.neonintrinsic.c.

#define FFT8_FS
Value:
s0_r = Fin[0].r + Fin[4].r; \
s0_i = Fin[0].i + Fin[4].i; \
s1_r = Fin[0].r - Fin[4].r; \
s1_i = Fin[0].i - Fin[4].i; \
s2_r = Fin[1].r + Fin[5].r; \
s2_i = Fin[1].i + Fin[5].i; \
s3_r = Fin[1].r - Fin[5].r; \
s3_i = Fin[1].i - Fin[5].i; \
s4_r = Fin[2].r + Fin[6].r; \
s4_i = Fin[2].i + Fin[6].i; \
s5_r = Fin[2].r - Fin[6].r; \
s5_i = Fin[2].i - Fin[6].i; \
s6_r = Fin[3].r + Fin[7].r; \
s6_i = Fin[3].i + Fin[7].i; \
s7_r = Fin[3].r - Fin[7].r; \
s7_i = Fin[3].i - Fin[7].i;

Definition at line 162 of file NE10_fft_int16.neonintrinsic.c.

#define FFT8_FS_SCALED
Value:
s0_r = (Fin[0].r + Fin[4].r) >> 3; \
s0_i = (Fin[0].i + Fin[4].i) >> 3; \
s1_r = (Fin[0].r - Fin[4].r) >> 3; \
s1_i = (Fin[0].i - Fin[4].i) >> 3; \
s2_r = (Fin[1].r + Fin[5].r) >> 3; \
s2_i = (Fin[1].i + Fin[5].i) >> 3; \
s3_r = (Fin[1].r - Fin[5].r) >> 3; \
s3_i = (Fin[1].i - Fin[5].i) >> 3; \
s4_r = (Fin[2].r + Fin[6].r) >> 3; \
s4_i = (Fin[2].i + Fin[6].i) >> 3; \
s5_r = (Fin[2].r - Fin[6].r) >> 3; \
s5_i = (Fin[2].i - Fin[6].i) >> 3; \
s6_r = (Fin[3].r + Fin[7].r) >> 3; \
s6_i = (Fin[3].i + Fin[7].i) >> 3; \
s7_r = (Fin[3].r - Fin[7].r) >> 3; \
s7_i = (Fin[3].i - Fin[7].i) >> 3;

Definition at line 180 of file NE10_fft_int16.neonintrinsic.c.

#define FFT8_FS_START
Value:
ne10_int16_t s0_r, s0_i, s1_r, s1_i, s2_r, s2_i, s3_r, s3_i, s4_r, s4_i, s5_r, s5_i, s6_r, s6_i, s7_r, s7_i; \
ne10_int16_t t0_r, t0_i, t1_r, t1_i, t2_r, t2_i, t3_r, t3_i, t4_r, t4_i, t5_r, t5_i; \
const ne10_int16_t TW_81 = 23169;
int16_t ne10_int16_t
Definition: NE10_types.h:74

Definition at line 157 of file NE10_fft_int16.neonintrinsic.c.

#define FFT8_FWD_LS

Definition at line 199 of file NE10_fft_int16.neonintrinsic.c.

#define FFT8_INV_LS

Definition at line 237 of file NE10_fft_int16.neonintrinsic.c.

#define ne10_mixed_radix_fft_backward_int16_neon (   scaled)

Definition at line 1087 of file NE10_fft_int16.neonintrinsic.c.

#define ne10_mixed_radix_fft_forward_int16_neon (   scaled)

Definition at line 1003 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX4x4_WITH_TW_LOAD
Value:
d2_in0 = vld2_s16 (p_src); \
p_src += src_step; \
d2_in1 = vld2_s16 (p_src); \
p_src += src_step; \
d2_in2 = vld2_s16 (p_src); \
p_src += src_step; \
d2_in3 = vld2_s16 (p_src); \
p_src += src_step; \
d2_tw0 = vld2_s16 (p_tw); \
p_tw += tw_step; \
d2_tw1 = vld2_s16 (p_tw); \
p_tw += tw_step; \
d2_tw2 = vld2_s16 (p_tw); \
d_s1_r = vqdmulh_s16 (d2_in1.val[0], d2_tw0.val[0]); \
d_s1_i = vqdmulh_s16 (d2_in1.val[1], d2_tw0.val[0]); \
d_s2_r = vqdmulh_s16 (d2_in2.val[0], d2_tw1.val[0]); \
d_s2_i = vqdmulh_s16 (d2_in2.val[1], d2_tw1.val[0]); \
d_s3_r = vqdmulh_s16 (d2_in3.val[0], d2_tw2.val[0]); \
d_s3_i = vqdmulh_s16 (d2_in3.val[1], d2_tw2.val[0]); \
d_tmp0 = vqdmulh_s16 (d2_in1.val[1], d2_tw0.val[1]); \
d_tmp1 = vqdmulh_s16 (d2_in1.val[0], d2_tw0.val[1]); \
d_tmp2 = vqdmulh_s16 (d2_in2.val[1], d2_tw1.val[1]); \
d_tmp3 = vqdmulh_s16 (d2_in2.val[0], d2_tw1.val[1]); \
d_tmp4 = vqdmulh_s16 (d2_in3.val[1], d2_tw2.val[1]); \
d_tmp5 = vqdmulh_s16 (d2_in3.val[0], d2_tw2.val[1]);

Definition at line 805 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX4x4_WITH_TW_LS_02
Value:
d_s4_r = vadd_s16 (d2_in0.val[0], d_s2_r); \
d_s4_i = vadd_s16 (d2_in0.val[1], d_s2_i); \
d_s5_r = vsub_s16 (d2_in0.val[0], d_s2_r); \
d_s5_i = vsub_s16 (d2_in0.val[1], d_s2_i); \
d_s6_r = vadd_s16 (d_s1_r, d_s3_r); \
d_s6_i = vadd_s16 (d_s1_i, d_s3_i); \
d_s7_r = vsub_s16 (d_s1_r, d_s3_r); \
d_s7_i = vsub_s16 (d_s1_i, d_s3_i); \
d2_out2.val[0] = vsub_s16 (d_s4_r, d_s6_r); \
d2_out2.val[1] = vsub_s16 (d_s4_i, d_s6_i); \
d2_out0.val[0] = vadd_s16 (d_s4_r, d_s6_r); \
d2_out0.val[1] = vadd_s16 (d_s4_i, d_s6_i);

Definition at line 862 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX4x4_WITH_TW_LS_02_SCALED
Value:
d_s4_r = vhadd_s16 (d2_in0.val[0], d_s2_r); \
d_s4_i = vhadd_s16 (d2_in0.val[1], d_s2_i); \
d_s5_r = vhsub_s16 (d2_in0.val[0], d_s2_r); \
d_s5_i = vhsub_s16 (d2_in0.val[1], d_s2_i); \
d_s6_r = vhadd_s16 (d_s1_r, d_s3_r); \
d_s6_i = vhadd_s16 (d_s1_i, d_s3_i); \
d_s7_r = vhsub_s16 (d_s1_r, d_s3_r); \
d_s7_i = vhsub_s16 (d_s1_i, d_s3_i); \
d2_out2.val[0] = vhsub_s16 (d_s4_r, d_s6_r); \
d2_out2.val[1] = vhsub_s16 (d_s4_i, d_s6_i); \
d2_out0.val[0] = vhadd_s16 (d_s4_r, d_s6_r); \
d2_out0.val[1] = vhadd_s16 (d_s4_i, d_s6_i);

Definition at line 876 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX4x4_WITH_TW_S1_FWD
Value:
d_s1_r = vsub_s16 (d_s1_r, d_tmp0); \
d_s1_i = vadd_s16 (d_s1_i, d_tmp1); \
d_s2_r = vsub_s16 (d_s2_r, d_tmp2); \
d_s2_i = vadd_s16 (d_s2_i, d_tmp3); \
d_s3_r = vsub_s16 (d_s3_r, d_tmp4); \
d_s3_i = vadd_s16 (d_s3_i, d_tmp5);

Definition at line 845 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX4x4_WITH_TW_S1_INV
Value:
d_s1_r = vadd_s16 (d_s1_r, d_tmp0); \
d_s1_i = vsub_s16 (d_s1_i, d_tmp1); \
d_s2_r = vadd_s16 (d_s2_r, d_tmp2); \
d_s2_i = vsub_s16 (d_s2_i, d_tmp3); \
d_s3_r = vadd_s16 (d_s3_r, d_tmp4); \
d_s3_i = vsub_s16 (d_s3_i, d_tmp5);

Definition at line 853 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX4x4_WITH_TW_START
Value:
ne10_int32_t m_count; \
ne10_int32_t src_step = src_stride << 1; \
ne10_int32_t dst_step = dst_stride << 1; \
ne10_int32_t tw_step = mstride << 1; \
int16_t *p_src, *p_dst, *p_tw; \
int16x4x2_t d2_in0, d2_in1, d2_in2, d2_in3; \
int16x4x2_t d2_tw0, d2_tw1, d2_tw2; \
int16x4_t d_s1_r, d_s1_i, d_s2_r, d_s2_i, d_s3_r, d_s3_i; \
int16x4_t d_tmp0, d_tmp1, d_tmp2, d_tmp3, d_tmp4, d_tmp5; \
int16x4_t d_s4_r, d_s4_i, d_s5_r, d_s5_i, d_s6_r, d_s6_i, d_s7_r, d_s7_i; \
int16x4x2_t d2_out0, d2_out1, d2_out2, d2_out3; \
p_src = (int16_t *) Fin; \
p_dst = (int16_t *) Fout; \
p_tw = (int16_t *) tw;
int32_t ne10_int32_t
Definition: NE10_types.h:76

Definition at line 789 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX4x4_WITH_TW_STORE
Value:
vst2_s16 (p_dst, d2_out0); \
p_dst += dst_step; \
vst2_s16 (p_dst, d2_out1); \
p_dst += dst_step; \
vst2_s16 (p_dst, d2_out2); \
p_dst += dst_step; \
vst2_s16 (p_dst, d2_out3); \
p_dst += dst_step; \
p_src = p_src - src_step * 4 + 8; \
p_dst = p_dst - dst_step * 4 + 8; \
p_tw = p_tw - tw_step * 2 + 8;

Definition at line 832 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX4x4_WITHOUT_TW_LOAD
Value:
d2_in0 = vld2_s16 (p_src); \
p_src += src_step; \
d2_in1 = vld2_s16 (p_src); \
p_src += src_step; \
d2_in2 = vld2_s16 (p_src); \
p_src += src_step; \
d2_in3 = vld2_s16 (p_src); \
p_src += src_step;

Definition at line 635 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX4x4_WITHOUT_TW_S0
Value:
d_s0_r = vadd_s16 (d2_in0.val[0], d2_in2.val[0]); \
d_s0_i = vadd_s16 (d2_in0.val[1], d2_in2.val[1]); \
d_s1_r = vsub_s16 (d2_in0.val[0], d2_in2.val[0]); \
d_s1_i = vsub_s16 (d2_in0.val[1], d2_in2.val[1]); \
d_s2_r = vadd_s16 (d2_in1.val[0], d2_in3.val[0]); \
d_s2_i = vadd_s16 (d2_in1.val[1], d2_in3.val[1]); \
d_s3_r = vsub_s16 (d2_in1.val[0], d2_in3.val[0]); \
d_s3_i = vsub_s16 (d2_in1.val[1], d2_in3.val[1]); \
d_out2_r = vsub_s16 (d_s0_r, d_s2_r); \
d_out2_i = vsub_s16 (d_s0_i, d_s2_i); \
d_out0_r = vadd_s16 (d_s0_r, d_s2_r); \
d_out0_i = vadd_s16 (d_s0_i, d_s2_i);

Definition at line 668 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX4x4_WITHOUT_TW_S0_SCALED
Value:
d_s0_r = vhadd_s16 (d2_in0.val[0], d2_in2.val[0]); \
d_s0_i = vhadd_s16 (d2_in0.val[1], d2_in2.val[1]); \
d_s1_r = vhsub_s16 (d2_in0.val[0], d2_in2.val[0]); \
d_s1_i = vhsub_s16 (d2_in0.val[1], d2_in2.val[1]); \
d_s2_r = vhadd_s16 (d2_in1.val[0], d2_in3.val[0]); \
d_s2_i = vhadd_s16 (d2_in1.val[1], d2_in3.val[1]); \
d_s3_r = vhsub_s16 (d2_in1.val[0], d2_in3.val[0]); \
d_s3_i = vhsub_s16 (d2_in1.val[1], d2_in3.val[1]); \
d_out2_r = vhsub_s16 (d_s0_r, d_s2_r); \
d_out2_i = vhsub_s16 (d_s0_i, d_s2_i); \
d_out0_r = vhadd_s16 (d_s0_r, d_s2_r); \
d_out0_i = vhadd_s16 (d_s0_i, d_s2_i);

Definition at line 682 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX4x4_WITHOUT_TW_START
Value:
ne10_int32_t f_count; \
ne10_int32_t src_step = stride << 1; \
int16_t *p_src, *p_dst; \
int16x4x2_t d2_in0, d2_in1, d2_in2, d2_in3; \
int16x4_t d_s0_r, d_s0_i, d_s1_r, d_s1_i, d_s2_r, d_s2_i, d_s3_r, d_s3_i; \
int16x4_t d_out0_r, d_out0_i, d_out1_r, d_out1_i, d_out2_r, d_out2_i, d_out3_r, d_out3_i; \
int16x4x2_t d2_out0, d2_out1, d2_out2, d2_out3; \
int16x8x2_t q2_tmp0, q2_tmp1; \
int32x4x2_t q2_tmp2, q2_tmp3; \
p_src = (int16_t *) Fin; \
p_dst = (int16_t *) Fout;
int32_t ne10_int32_t
Definition: NE10_types.h:76

Definition at line 622 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX4x4_WITHOUT_TW_STORE
Value:
q2_tmp0 = vtrnq_s16 (vcombine_s16(d_out0_r, d_out0_i), vcombine_s16(d_out1_r, d_out1_i)); \
q2_tmp1 = vtrnq_s16 (vcombine_s16(d_out2_r, d_out2_i), vcombine_s16(d_out3_r, d_out3_i)); \
q2_tmp2 = vtrnq_s32 (vreinterpretq_s32_s16(q2_tmp0.val[0]), vreinterpretq_s32_s16(q2_tmp1.val[0])); \
q2_tmp3 = vtrnq_s32 (vreinterpretq_s32_s16(q2_tmp0.val[1]), vreinterpretq_s32_s16(q2_tmp1.val[1])); \
d2_out0.val[0] = vget_low_s16 (vreinterpretq_s16_s32(q2_tmp2.val[0])); \
d2_out0.val[1] = vget_high_s16 (vreinterpretq_s16_s32(q2_tmp2.val[0])); \
d2_out1.val[0] = vget_low_s16 (vreinterpretq_s16_s32(q2_tmp3.val[0])); \
d2_out1.val[1] = vget_high_s16 (vreinterpretq_s16_s32(q2_tmp3.val[0])); \
d2_out2.val[0] = vget_low_s16 (vreinterpretq_s16_s32(q2_tmp2.val[1])); \
d2_out2.val[1] = vget_high_s16 (vreinterpretq_s16_s32(q2_tmp2.val[1])); \
d2_out3.val[0] = vget_low_s16 (vreinterpretq_s16_s32(q2_tmp3.val[1])); \
d2_out3.val[1] = vget_high_s16 (vreinterpretq_s16_s32(q2_tmp3.val[1])); \
vst2_s16 (p_dst, d2_out0); \
p_dst += 8; \
vst2_s16 (p_dst, d2_out1); \
p_dst += 8; \
vst2_s16 (p_dst, d2_out2); \
p_dst += 8; \
vst2_s16 (p_dst, d2_out3); \
p_dst += 8; \
p_src = p_src - src_step * 4 + 8;

Definition at line 645 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX8x4_FS_S0
Value:
d_sin0_r = vadd_s16 (d2_in0.val[0], d2_in1.val[0]); \
d_sin0_i = vadd_s16 (d2_in0.val[1], d2_in1.val[1]); \
d_sin1_r = vsub_s16 (d2_in0.val[0], d2_in1.val[0]); \
d_sin1_i = vsub_s16 (d2_in0.val[1], d2_in1.val[1]); \
d_sin2_r = vadd_s16 (d2_in2.val[0], d2_in3.val[0]); \
d_sin2_i = vadd_s16 (d2_in2.val[1], d2_in3.val[1]); \
d_sin3_r = vsub_s16 (d2_in2.val[0], d2_in3.val[0]); \
d_sin3_i = vsub_s16 (d2_in2.val[1], d2_in3.val[1]); \
d_sin4_r = vadd_s16 (d2_in4.val[0], d2_in5.val[0]); \
d_sin4_i = vadd_s16 (d2_in4.val[1], d2_in5.val[1]); \
d_sin5_r = vsub_s16 (d2_in4.val[0], d2_in5.val[0]); \
d_sin5_i = vsub_s16 (d2_in4.val[1], d2_in5.val[1]); \
d_sin6_r = vadd_s16 (d2_in6.val[0], d2_in7.val[0]); \
d_sin6_i = vadd_s16 (d2_in6.val[1], d2_in7.val[1]); \
d_sin7_r = vsub_s16 (d2_in6.val[0], d2_in7.val[0]); \
d_sin7_i = vsub_s16 (d2_in6.val[1], d2_in7.val[1]);

Definition at line 393 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX8x4_FS_S0_SCALED
Value:
d_sin0_r = vhadd_s16 (d2_in0.val[0], d2_in1.val[0]); \
d_sin0_i = vhadd_s16 (d2_in0.val[1], d2_in1.val[1]); \
d_sin1_r = vhsub_s16 (d2_in0.val[0], d2_in1.val[0]); \
d_sin1_i = vhsub_s16 (d2_in0.val[1], d2_in1.val[1]); \
d_sin2_r = vhadd_s16 (d2_in2.val[0], d2_in3.val[0]); \
d_sin2_i = vhadd_s16 (d2_in2.val[1], d2_in3.val[1]); \
d_sin3_r = vhsub_s16 (d2_in2.val[0], d2_in3.val[0]); \
d_sin3_i = vhsub_s16 (d2_in2.val[1], d2_in3.val[1]); \
d_sin4_r = vhadd_s16 (d2_in4.val[0], d2_in5.val[0]); \
d_sin4_i = vhadd_s16 (d2_in4.val[1], d2_in5.val[1]); \
d_sin5_r = vhsub_s16 (d2_in4.val[0], d2_in5.val[0]); \
d_sin5_i = vhsub_s16 (d2_in4.val[1], d2_in5.val[1]); \
d_sin6_r = vhadd_s16 (d2_in6.val[0], d2_in7.val[0]); \
d_sin6_i = vhadd_s16 (d2_in6.val[1], d2_in7.val[1]); \
d_sin7_r = vhsub_s16 (d2_in6.val[0], d2_in7.val[0]); \
d_sin7_i = vhsub_s16 (d2_in6.val[1], d2_in7.val[1]);

Definition at line 465 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX8x4_FWD_S357
Value:
d_tw_81 = vdup_n_s16 (TW_81); \
d_tw_81n = vdup_n_s16 (TW_81N); \
d_s5_r = d_sin5_i; \
d_s5_i = vneg_s16 (d_sin5_r); \
d_s3_r = vadd_s16 (d_sin3_r, d_sin3_i); \
d_s3_i = vsub_s16 (d_sin3_i, d_sin3_r); \
d_s7_r = vsub_s16 (d_sin7_r, d_sin7_i); \
d_s7_i = vadd_s16 (d_sin7_i, d_sin7_r); \
d_s3_r = vqdmulh_s16 (d_s3_r, d_tw_81); \
d_s3_i = vqdmulh_s16 (d_s3_i, d_tw_81); \
d_s7_r = vqdmulh_s16 (d_s7_r, d_tw_81n); \
d_s7_i = vqdmulh_s16 (d_s7_i, d_tw_81n);

Definition at line 411 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX8x4_INV_S357
Value:
d_tw_81 = vdup_n_s16 (TW_81); \
d_tw_81n = vdup_n_s16 (TW_81N); \
d_s5_r = vneg_s16 (d_sin5_i); \
d_s5_i = d_sin5_r; \
d_s3_r = vsub_s16 (d_sin3_r, d_sin3_i); \
d_s3_i = vadd_s16 (d_sin3_i, d_sin3_r); \
d_s7_r = vadd_s16 (d_sin7_r, d_sin7_i); \
d_s7_i = vsub_s16 (d_sin7_i, d_sin7_r); \
d_s3_r = vqdmulh_s16 (d_s3_r, d_tw_81); \
d_s3_i = vqdmulh_s16 (d_s3_i, d_tw_81); \
d_s7_r = vqdmulh_s16 (d_s7_r, d_tw_81n); \
d_s7_i = vqdmulh_s16 (d_s7_i, d_tw_81n);

Definition at line 425 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX8x4_LOAD
Value:
d2_in0 = vld2_s16 (p_src); \
p_src += src_step; \
d2_in2 = vld2_s16 (p_src); \
p_src += src_step; \
d2_in4 = vld2_s16 (p_src); \
p_src += src_step; \
d2_in6 = vld2_s16 (p_src); \
p_src += src_step; \
d2_in1 = vld2_s16 (p_src); \
p_src += src_step; \
d2_in3 = vld2_s16 (p_src); \
p_src += src_step; \
d2_in5 = vld2_s16 (p_src); \
p_src += src_step; \
d2_in7 = vld2_s16 (p_src); \
p_src += src_step;

Definition at line 332 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX8x4_LS_02
Value:
d_s8_r = vadd_s16 (d_sin0_r, d_sin4_r); \
d_s8_i = vadd_s16 (d_sin0_i, d_sin4_i); \
d_s9_r = vadd_s16 (d_sin1_r, d_s5_r); \
d_s9_i = vadd_s16 (d_sin1_i, d_s5_i); \
d_s10_r = vsub_s16 (d_sin0_r, d_sin4_r); \
d_s10_i = vsub_s16 (d_sin0_i, d_sin4_i); \
d_s11_r = vsub_s16 (d_sin1_r, d_s5_r); \
d_s11_i = vsub_s16 (d_sin1_i, d_s5_i); \
d_s12_r = vadd_s16 (d_sin2_r, d_sin6_r); \
d_s12_i = vadd_s16 (d_sin2_i, d_sin6_i); \
d_s13_r = vadd_s16 (d_s3_r, d_s7_r); \
d_s13_i = vadd_s16 (d_s3_i, d_s7_i); \
d_s14_r = vsub_s16 (d_sin2_r, d_sin6_r); \
d_s14_i = vsub_s16 (d_sin2_i, d_sin6_i); \
d_s15_r = vsub_s16 (d_s3_r, d_s7_r); \
d_s15_i = vsub_s16 (d_s3_i, d_s7_i); \
d_out4_r = vsub_s16 (d_s8_r, d_s12_r); \
d_out4_i = vsub_s16 (d_s8_i, d_s12_i); \
d_out5_r = vsub_s16 (d_s9_r, d_s13_r); \
d_out5_i = vsub_s16 (d_s9_i, d_s13_i); \
d_out0_r = vadd_s16 (d_s8_r, d_s12_r); \
d_out0_i = vadd_s16 (d_s8_i, d_s12_i); \
d_out1_r = vadd_s16 (d_s9_r, d_s13_r); \
d_out1_i = vadd_s16 (d_s9_i, d_s13_i);

Definition at line 439 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX8x4_LS_02_SCALED
Value:
d_s8_r = vhadd_s16 (d_sin0_r, d_sin4_r); \
d_s8_i = vhadd_s16 (d_sin0_i, d_sin4_i); \
d_s9_r = vhadd_s16 (d_sin1_r, d_s5_r); \
d_s9_i = vhadd_s16 (d_sin1_i, d_s5_i); \
d_s10_r = vhsub_s16 (d_sin0_r, d_sin4_r); \
d_s10_i = vhsub_s16 (d_sin0_i, d_sin4_i); \
d_s11_r = vhsub_s16 (d_sin1_r, d_s5_r); \
d_s11_i = vhsub_s16 (d_sin1_i, d_s5_i); \
d_s12_r = vhadd_s16 (d_sin2_r, d_sin6_r); \
d_s12_i = vhadd_s16 (d_sin2_i, d_sin6_i); \
d_s13_r = vhadd_s16 (d_s3_r, d_s7_r); \
d_s13_i = vhadd_s16 (d_s3_i, d_s7_i); \
d_s14_r = vhsub_s16 (d_sin2_r, d_sin6_r); \
d_s14_i = vhsub_s16 (d_sin2_i, d_sin6_i); \
d_s15_r = vhsub_s16 (d_s3_r, d_s7_r); \
d_s15_i = vhsub_s16 (d_s3_i, d_s7_i); \
d_out4_r = vhsub_s16 (d_s8_r, d_s12_r); \
d_out4_i = vhsub_s16 (d_s8_i, d_s12_i); \
d_out5_r = vhsub_s16 (d_s9_r, d_s13_r); \
d_out5_i = vhsub_s16 (d_s9_i, d_s13_i); \
d_out0_r = vhadd_s16 (d_s8_r, d_s12_r); \
d_out0_i = vhadd_s16 (d_s8_i, d_s12_i); \
d_out1_r = vhadd_s16 (d_s9_r, d_s13_r); \
d_out1_i = vhadd_s16 (d_s9_i, d_s13_i);

Definition at line 483 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX8x4_START
Value:
ne10_int32_t f_count; \
ne10_int32_t src_step = stride << 1; \
const ne10_int16_t TW_81 = 23169; \
const ne10_int16_t TW_81N = -23169; \
int16_t *p_src, *p_dst; \
int16x4x2_t d2_in0, d2_in1, d2_in2, d2_in3, d2_in4, d2_in5, d2_in6, d2_in7; \
int16x4_t d_sin0_r, d_sin0_i, d_sin1_r, d_sin1_i, d_sin2_r, d_sin2_i, d_sin3_r, d_sin3_i; \
int16x4_t d_sin4_r, d_sin4_i, d_sin5_r, d_sin5_i, d_sin6_r, d_sin6_i, d_sin7_r, d_sin7_i; \
int16x4_t d_s3_r, d_s3_i, d_s5_r, d_s5_i, d_s7_r, d_s7_i; \
int16x4_t d_s8_r, d_s8_i, d_s9_r, d_s9_i, d_s10_r, d_s10_i, d_s11_r, d_s11_i; \
int16x4_t d_s12_r, d_s12_i, d_s13_r, d_s13_i, d_s14_r, d_s14_i, d_s15_r, d_s15_i; \
int16x4_t d_out0_r, d_out0_i, d_out1_r, d_out1_i, d_out2_r, d_out2_i, d_out3_r, d_out3_i; \
int16x4_t d_out4_r, d_out4_i, d_out5_r, d_out5_i, d_out6_r, d_out6_i, d_out7_r, d_out7_i; \
int16x4x2_t d2_out0, d2_out1, d2_out2, d2_out3, d2_out4, d2_out5, d2_out6, d2_out7; \
int16x8x2_t q2_tmp0, q2_tmp1, q2_tmp2, q2_tmp3; \
int32x4x2_t q2_tmp4, q2_tmp5, q2_tmp6, q2_tmp7; \
int16x4_t d_tw_81, d_tw_81n; \
p_src = (int16_t *) Fin; \
p_dst = (int16_t *) Fout;
int32_t ne10_int32_t
Definition: NE10_types.h:76
int16_t ne10_int16_t
Definition: NE10_types.h:74

Definition at line 310 of file NE10_fft_int16.neonintrinsic.c.

#define RADIX8x4_STORE

Definition at line 350 of file NE10_fft_int16.neonintrinsic.c.

Function Documentation

ne10_mixed_radix_fft_forward_int16_neon ( ne10_mixed_radix_fft_forward_int16_neon (scaled)ne10_mixed_radix_fft_backward_int16_neon ()ne10_mixed_radix_fft_backward_int16_neon (scaled)static void ne10_fft_split_r2c_1d_int16_neon (ne10_fft_cpx_int16_t *dst  unscaled)

Definition at line 1172 of file NE10_fft_int16.neonintrinsic.c.