46 float32x4_t rec = vrecpeq_f32 (n_cst);
47 rec = vmulq_f32 (vrecpsq_f32 (n_cst, rec), rec);
48 rec = vmulq_f32 (vrecpsq_f32 (n_cst, rec), rec);
49 n_dst = vmulq_f32 (n_src , rec);
52 float32x2_t rec = vrecpe_f32 (n_rest_cst);
53 rec = vmul_f32 (vrecps_f32 (n_rest_cst, rec), rec);
54 rec = vmul_f32 (vrecps_f32 (n_rest_cst, rec), rec);
55 n_rest = vmul_f32 (n_rest, rec);
64 float32x4_t rec = vrecpeq_f32 (n_cst);
65 rec = vmulq_f32 (vrecpsq_f32 (n_cst, rec), rec);
66 rec = vmulq_f32 (vrecpsq_f32 (n_cst, rec), rec);
67 n_dst = vmulq_f32 (n_src , rec);
70 float32x2_t rec = vrecpe_f32 (n_rest_cst);
71 rec = vmul_f32 (vrecps_f32 (n_rest_cst, rec), rec);
72 rec = vmul_f32 (vrecps_f32 (n_rest_cst, rec), rec);
73 n_rest = vmul_f32 (n_rest, rec);
82 float32x4_t rec = vrecpeq_f32 (n_cst1);
83 rec = vmulq_f32 (vrecpsq_f32 (n_cst1, rec), rec);
84 rec = vmulq_f32 (vrecpsq_f32 (n_cst1, rec), rec);
85 n_dst1 = vmulq_f32 (n_src1 , rec);
87 rec = vrecpeq_f32 (n_cst2);
88 rec = vmulq_f32 (vrecpsq_f32 (n_cst2, rec), rec);
89 rec = vmulq_f32 (vrecpsq_f32 (n_cst2, rec), rec);
90 n_dst2 = vmulq_f32 (n_src2 , rec);
92 rec = vrecpeq_f32 (n_cst3);
93 rec = vmulq_f32 (vrecpsq_f32 (n_cst3, rec), rec);
94 rec = vmulq_f32 (vrecpsq_f32 (n_cst3, rec), rec);
95 n_dst3 = vmulq_f32 (n_src3 , rec);
98 float32x2_t rec = vrecpe_f32 (n_rest_cst.val[0]);
99 rec = vmul_f32 (vrecps_f32 (n_rest_cst.val[0], rec), rec);
100 rec = vmul_f32 (vrecps_f32 (n_rest_cst.val[0], rec), rec);
101 n_rest.val[0] = vmul_f32 (n_rest.val[0] , rec);
103 rec = vrecpe_f32 (n_rest_cst.val[1]);
104 rec = vmul_f32 (vrecps_f32 (n_rest_cst.val[1], rec), rec);
105 rec = vmul_f32 (vrecps_f32 (n_rest_cst.val[1], rec), rec);
106 n_rest.val[1] = vmul_f32 (n_rest.val[1] , rec);
108 rec = vrecpe_f32 (n_rest_cst.val[2]);
109 rec = vmul_f32 (vrecps_f32 (n_rest_cst.val[2], rec), rec);
110 rec = vmul_f32 (vrecps_f32 (n_rest_cst.val[2], rec), rec);
111 n_rest.val[2] = vmul_f32 (n_rest.val[2] , rec);
120 float32x4_t rec = vrecpeq_f32 (n_cst);
121 rec = vmulq_f32 (vrecpsq_f32 (n_cst, rec), rec);
122 rec = vmulq_f32 (vrecpsq_f32 (n_cst, rec), rec);
123 n_dst = vmulq_f32 (n_src , rec);
#define NE10_DstSrcCst_DO_COUNT_TIMES_FLOAT_NEON(loopCode1, loopCode2)
ne10_result_t ne10_divc_vec4f_neon(ne10_vec4f_t *dst, ne10_vec4f_t *src, const ne10_vec4f_t *cst, ne10_uint32_t count)
Specific implementation of ne10_divc_vec4f using NEON intrinsics.
A 2-tuple of ne10_float32_t values.
ne10_result_t ne10_divc_vec3f_neon(ne10_vec3f_t *dst, ne10_vec3f_t *src, const ne10_vec3f_t *cst, ne10_uint32_t count)
Specific implementation of ne10_divc_vec3f using NEON intrinsics.
#define NE10_DstSrcCst_DO_COUNT_TIMES_VEC3F_NEON(loopCode1, loopCode2)
A 3-tuple of ne10_float32_t values.
ne10_result_t ne10_divc_float_neon(ne10_float32_t *dst, ne10_float32_t *src, const ne10_float32_t cst, ne10_uint32_t count)
Specific implementation of ne10_divc_float using NEON intrinsics.
#define NE10_DstSrcCst_DO_COUNT_TIMES_VEC2F_NEON(loopCode1, loopCode2)
#define NE10_DstSrcCst_DO_COUNT_TIMES_VEC4F_NEON(loopCode)
ne10_result_t ne10_divc_vec2f_neon(ne10_vec2f_t *dst, ne10_vec2f_t *src, const ne10_vec2f_t *cst, ne10_uint32_t count)
Specific implementation of ne10_divc_vec2f using NEON intrinsics.
A 4-tuple of ne10_float32_t values.