Project Ne10
An open, optimized software library for the ARM architecture.
NE10_mulcmatvec.c
Go to the documentation of this file.
1 /*
2  * Copyright 2011-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : math/NE10_mulcmatvec.neon.s
30  */
31 
32 #include "NE10_types.h"
33 #include "macros.h"
34 
35 #include <assert.h>
36 
38 {
39 #define A1 cst->c1.r1
40 #define B1 cst->c1.r2
41 #define C1 cst->c2.r1
42 #define D1 cst->c2.r2
43 
45  for ( unsigned int itr = 0; itr < count; itr++ )
46  {
47  dst[ itr ].x = A1 * src[ itr ].x + C1 * src[ itr ].y;
48  dst[ itr ].y = B1 * src[ itr ].x + D1 * src[ itr ].y;
49  }
50  return NE10_OK;
51 
52 #undef A1
53 #undef B1
54 #undef C1
55 #undef D1
56 }
57 
59 {
60 #define A1 cst->c1.r1
61 #define B1 cst->c1.r2
62 #define C1 cst->c1.r3
63 #define D1 cst->c2.r1
64 #define E1 cst->c2.r2
65 #define F1 cst->c2.r3
66 #define G1 cst->c3.r1
67 #define H1 cst->c3.r2
68 #define I1 cst->c3.r3
69 
71  for ( unsigned int itr = 0; itr < count; itr++ )
72  {
73  dst[ itr ].x = A1 * src[ itr ].x + D1 * src[ itr ].y + G1 * src[ itr ].z;
74  dst[ itr ].y = B1 * src[ itr ].x + E1 * src[ itr ].y + H1 * src[ itr ].z;
75  dst[ itr ].z = C1 * src[ itr ].x + F1 * src[ itr ].y + I1 * src[ itr ].z;
76  }
77  return NE10_OK;
78 
79 #undef A1
80 #undef B1
81 #undef C1
82 #undef D1
83 #undef E1
84 #undef F1
85 #undef G1
86 #undef H1
87 #undef I1
88 }
89 
91 {
92 #define A1 cst->c1.r1
93 #define B1 cst->c1.r2
94 #define C1 cst->c1.r3
95 #define D1 cst->c1.r4
96 #define E1 cst->c2.r1
97 #define F1 cst->c2.r2
98 #define G1 cst->c2.r3
99 #define H1 cst->c2.r4
100 #define I1 cst->c3.r1
101 #define J1 cst->c3.r2
102 #define K1 cst->c3.r3
103 #define L1 cst->c3.r4
104 #define M1 cst->c4.r1
105 #define N1 cst->c4.r2
106 #define O1 cst->c4.r3
107 #define P1 cst->c4.r4
108 
110  for ( unsigned int itr = 0; itr < count; itr++ )
111  {
112  dst[ itr ].x = A1 * src[ itr ].x + E1 * src[ itr ].y + I1 * src[ itr ].z + M1 * src[ itr ].w;
113  dst[ itr ].y = B1 * src[ itr ].x + F1 * src[ itr ].y + J1 * src[ itr ].z + N1 * src[ itr ].w;
114  dst[ itr ].z = C1 * src[ itr ].x + G1 * src[ itr ].y + K1 * src[ itr ].z + O1 * src[ itr ].w;
115  dst[ itr ].w = D1 * src[ itr ].x + H1 * src[ itr ].y + L1 * src[ itr ].z + P1 * src[ itr ].w;
116  }
117  return NE10_OK;
118 
119 #undef A1
120 #undef B1
121 #undef C1
122 #undef D1
123 #undef E1
124 #undef F1
125 #undef G1
126 #undef H1
127 #undef I1
128 #undef J1
129 #undef K1
130 #undef L1
131 #undef M1
132 #undef N1
133 #undef O1
134 #undef P1
135 }
ne10_float32_t x
Definition: NE10_types.h:108
ne10_float32_t y
Definition: NE10_types.h:109
#define C1
#define L1
A 2-tuple of ne10_float32_t values.
Definition: NE10_types.h:87
#define G1
#define F1
#define K1
ne10_result_t ne10_mulcmatvec_cm4x4f_v4f_c(ne10_vec4f_t *dst, const ne10_mat4x4f_t *cst, ne10_vec4f_t *src, ne10_uint32_t count)
Specific implementation of ne10_mulcmatvec_cm4x4f_v4f using plain C code.
uint32_t ne10_uint32_t
Definition: NE10_types.h:77
#define H1
ne10_float32_t z
Definition: NE10_types.h:110
#define M1
#define J1
ne10_float32_t x
Definition: NE10_types.h:98
#define A1
ne10_result_t ne10_mulcmatvec_cm2x2f_v2f_c(ne10_vec2f_t *dst, const ne10_mat2x2f_t *cst, ne10_vec2f_t *src, ne10_uint32_t count)
Specific implementation of ne10_mulcmatvec_cm2x2f_v2f using plain C code.
ne10_float32_t x
Definition: NE10_types.h:89
A 3-tuple of ne10_float32_t values.
Definition: NE10_types.h:96
#define I1
ne10_float32_t z
Definition: NE10_types.h:100
ne10_float32_t y
Definition: NE10_types.h:90
#define D1
#define O1
ne10_float32_t y
Definition: NE10_types.h:99
#define E1
#define NE10_OK
Definition: NE10_types.h:65
#define N1
#define B1
#define P1
#define NE10_CHECKPOINTER_DstSrc
Definition: factor.h:56
int ne10_result_t
Definition: NE10_types.h:82
A 4-tuple of ne10_float32_t values.
Definition: NE10_types.h:106
ne10_result_t ne10_mulcmatvec_cm3x3f_v3f_c(ne10_vec3f_t *dst, const ne10_mat3x3f_t *cst, ne10_vec3f_t *src, ne10_uint32_t count)
Specific implementation of ne10_mulcmatvec_cm3x3f_v3f using plain C code.
ne10_float32_t w
Definition: NE10_types.h:111