Project Ne10
An open, optimized software library for the ARM architecture.
NE10_mlac.neon.c
Go to the documentation of this file.
1 /*
2  * Copyright 2011-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : math/NE10_mlac.neon.c
30  */
31 
32 #include "NE10_types.h"
33 #include "macros.h"
34 
35 #include <assert.h>
36 #include <arm_neon.h>
37 
38 
40 {
42  (
43  n_dst = vmlaq_f32 (n_acc, n_src, n_cst);
44  ,
45  n_rest = vmla_f32 (n_rest_acc, n_rest, n_rest_cst);
46  );
47 }
48 
50 {
52  (
53  n_dst = vmlaq_f32 (n_acc, n_src , n_cst);
54  ,
55  n_rest = vmla_f32 (n_rest_acc, n_rest, n_rest_cst);
56  );
57 }
58 
60 {
62  (
63  n_dst1 = vmlaq_f32 (n_acc1, n_src1 , n_cst1);
64  n_dst2 = vmlaq_f32 (n_acc2, n_src2 , n_cst2);
65  n_dst3 = vmlaq_f32 (n_acc3, n_src3 , n_cst3);
66  ,
67  n_rest.val[0] = vmla_f32 (n_rest_acc.val[0], n_rest.val[0], n_rest_cst.val[0]); /* the X lane */
68  n_rest.val[1] = vmla_f32 (n_rest_acc.val[1], n_rest.val[1], n_rest_cst.val[1]); /* the Y lane */
69  n_rest.val[2] = vmla_f32 (n_rest_acc.val[2], n_rest.val[2], n_rest_cst.val[2]); /* the Z lane */
70  );
71 }
72 
74 {
76  (
77  n_dst = vmlaq_f32 (n_acc, n_src , n_cst);
78  );
79 }
#define NE10_DstAccSrcCst_DO_COUNT_TIMES_VEC4F_NEON(loopCode)
Definition: macros.h:105
#define NE10_DstAccSrcCst_DO_COUNT_TIMES_VEC3F_NEON(loopCode1, loopCode2)
Definition: macros.h:96
A 2-tuple of ne10_float32_t values.
Definition: NE10_types.h:87
float ne10_float32_t
Definition: NE10_types.h:80
#define NE10_DstAccSrcCst_DO_COUNT_TIMES_VEC2F_NEON(loopCode1, loopCode2)
Definition: macros.h:87
ne10_result_t ne10_mlac_vec3f_neon(ne10_vec3f_t *dst, ne10_vec3f_t *acc, ne10_vec3f_t *src, const ne10_vec3f_t *cst, ne10_uint32_t count)
Specific implementation of ne10_mlac_vec3f using NEON intrinsics.
uint32_t ne10_uint32_t
Definition: NE10_types.h:77
A 3-tuple of ne10_float32_t values.
Definition: NE10_types.h:96
ne10_result_t ne10_mlac_float_neon(ne10_float32_t *dst, ne10_float32_t *acc, ne10_float32_t *src, const ne10_float32_t cst, ne10_uint32_t count)
Specific implementation of ne10_mlac_float using NEON intrinsics.
#define NE10_DstAccSrcCst_DO_COUNT_TIMES_FLOAT_NEON(loopCode1, loopCode2)
Definition: macros.h:77
ne10_result_t ne10_mlac_vec4f_neon(ne10_vec4f_t *dst, ne10_vec4f_t *acc, ne10_vec4f_t *src, const ne10_vec4f_t *cst, ne10_uint32_t count)
Specific implementation of ne10_mlac_vec4f using NEON intrinsics.
ne10_result_t ne10_mlac_vec2f_neon(ne10_vec2f_t *dst, ne10_vec2f_t *acc, ne10_vec2f_t *src, const ne10_vec2f_t *cst, ne10_uint32_t count)
Specific implementation of ne10_mlac_vec2f using NEON intrinsics.
int ne10_result_t
Definition: NE10_types.h:82
A 4-tuple of ne10_float32_t values.
Definition: NE10_types.h:106