43 n_dst = vmlaq_f32 (n_acc, n_src, n_cst);
45 n_rest = vmla_f32 (n_rest_acc, n_rest, n_rest_cst);
53 n_dst = vmlaq_f32 (n_acc, n_src , n_cst);
55 n_rest = vmla_f32 (n_rest_acc, n_rest, n_rest_cst);
63 n_dst1 = vmlaq_f32 (n_acc1, n_src1 , n_cst1);
64 n_dst2 = vmlaq_f32 (n_acc2, n_src2 , n_cst2);
65 n_dst3 = vmlaq_f32 (n_acc3, n_src3 , n_cst3);
67 n_rest.val[0] = vmla_f32 (n_rest_acc.val[0], n_rest.val[0], n_rest_cst.val[0]);
68 n_rest.val[1] = vmla_f32 (n_rest_acc.val[1], n_rest.val[1], n_rest_cst.val[1]);
69 n_rest.val[2] = vmla_f32 (n_rest_acc.val[2], n_rest.val[2], n_rest_cst.val[2]);
77 n_dst = vmlaq_f32 (n_acc, n_src , n_cst);
#define NE10_DstAccSrcCst_DO_COUNT_TIMES_VEC4F_NEON(loopCode)
#define NE10_DstAccSrcCst_DO_COUNT_TIMES_VEC3F_NEON(loopCode1, loopCode2)
A 2-tuple of ne10_float32_t values.
#define NE10_DstAccSrcCst_DO_COUNT_TIMES_VEC2F_NEON(loopCode1, loopCode2)
ne10_result_t ne10_mlac_vec3f_neon(ne10_vec3f_t *dst, ne10_vec3f_t *acc, ne10_vec3f_t *src, const ne10_vec3f_t *cst, ne10_uint32_t count)
Specific implementation of ne10_mlac_vec3f using NEON intrinsics.
A 3-tuple of ne10_float32_t values.
ne10_result_t ne10_mlac_float_neon(ne10_float32_t *dst, ne10_float32_t *acc, ne10_float32_t *src, const ne10_float32_t cst, ne10_uint32_t count)
Specific implementation of ne10_mlac_float using NEON intrinsics.
#define NE10_DstAccSrcCst_DO_COUNT_TIMES_FLOAT_NEON(loopCode1, loopCode2)
ne10_result_t ne10_mlac_vec4f_neon(ne10_vec4f_t *dst, ne10_vec4f_t *acc, ne10_vec4f_t *src, const ne10_vec4f_t *cst, ne10_uint32_t count)
Specific implementation of ne10_mlac_vec4f using NEON intrinsics.
ne10_result_t ne10_mlac_vec2f_neon(ne10_vec2f_t *dst, ne10_vec2f_t *acc, ne10_vec2f_t *src, const ne10_vec2f_t *cst, ne10_uint32_t count)
Specific implementation of ne10_mlac_vec2f using NEON intrinsics.
A 4-tuple of ne10_float32_t values.