Project Ne10
An open, optimized software library for the ARM architecture.
NE10_mulmat.c
Go to the documentation of this file.
1 /*
2  * Copyright 2011-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : math/NE10_addmat.c
30  */
31 
32 #include "NE10_types.h"
33 #include "macros.h"
34 
35 #include <assert.h>
36 
38 {
39 #define A1 src1[ itr ].c1.r1
40 #define A2 src2[ itr ].c1.r1
41 #define B1 src1[ itr ].c1.r2
42 #define B2 src2[ itr ].c1.r2
43 #define C1 src1[ itr ].c2.r1
44 #define C2 src2[ itr ].c2.r1
45 #define D1 src1[ itr ].c2.r2
46 #define D2 src2[ itr ].c2.r2
47 
49  for ( unsigned int itr = 0; itr < count; itr++ )
50  {
51  dst[ itr ].c1.r1 = (A1 * A2) + (C1 * B2);
52  dst[ itr ].c1.r2 = (B1 * A2) + (D1 * B2);
53 
54  dst[ itr ].c2.r1 = (A1 * C2) + (C1 * D2);
55  dst[ itr ].c2.r2 = (B1 * C2) + (D1 * D2);
56  }
57  return NE10_OK;
58 
59 #undef A1
60 #undef A2
61 #undef B1
62 #undef B2
63 #undef C1
64 #undef C2
65 #undef D1
66 #undef D2
67 }
68 
70 {
71 #define A1 src1[ itr ].c1.r1
72 #define A2 src2[ itr ].c1.r1
73 #define B1 src1[ itr ].c1.r2
74 #define B2 src2[ itr ].c1.r2
75 #define C1 src1[ itr ].c1.r3
76 #define C2 src2[ itr ].c1.r3
77 #define D1 src1[ itr ].c2.r1
78 #define D2 src2[ itr ].c2.r1
79 #define E1 src1[ itr ].c2.r2
80 #define E2 src2[ itr ].c2.r2
81 #define F1 src1[ itr ].c2.r3
82 #define F2 src2[ itr ].c2.r3
83 #define G1 src1[ itr ].c3.r1
84 #define G2 src2[ itr ].c3.r1
85 #define H1 src1[ itr ].c3.r2
86 #define H2 src2[ itr ].c3.r2
87 #define I1 src1[ itr ].c3.r3
88 #define I2 src2[ itr ].c3.r3
89 
91  for ( unsigned int itr = 0; itr < count; itr++ )
92  {
93  dst[ itr ].c1.r1 = (A1 * A2) + (D1 * B2) + (G1 * C2);
94  dst[ itr ].c1.r2 = (B1 * A2) + (E1 * B2) + (H1 * C2);
95  dst[ itr ].c1.r3 = (C1 * A2) + (F1 * B2) + (I1 * C2);
96 
97  dst[ itr ].c2.r1 = (A1 * D2) + (D1 * E2) + (G1 * F2);
98  dst[ itr ].c2.r2 = (B1 * D2) + (E1 * E2) + (H1 * F2);
99  dst[ itr ].c2.r3 = (C1 * D2) + (F1 * E2) + (I1 * F2);
100 
101  dst[ itr ].c3.r1 = (A1 * G2) + (D1 * H2) + (G1 * I2);
102  dst[ itr ].c3.r2 = (B1 * G2) + (E1 * H2) + (H1 * I2);
103  dst[ itr ].c3.r3 = (C1 * G2) + (F1 * H2) + (I1 * I2);
104  }
105  return NE10_OK;
106 
107 #undef A1
108 #undef A2
109 #undef B1
110 #undef B2
111 #undef C1
112 #undef C2
113 #undef D1
114 #undef D2
115 #undef E1
116 #undef E2
117 #undef F1
118 #undef F2
119 #undef G1
120 #undef G2
121 #undef H1
122 #undef H2
123 #undef I1
124 #undef I2
125 }
126 
128 {
129 #define A1 src1[ itr ].c1.r1
130 #define A2 src2[ itr ].c1.r1
131 #define B1 src1[ itr ].c1.r2
132 #define B2 src2[ itr ].c1.r2
133 #define C1 src1[ itr ].c1.r3
134 #define C2 src2[ itr ].c1.r3
135 #define D1 src1[ itr ].c1.r4
136 #define D2 src2[ itr ].c1.r4
137 
138 #define E1 src1[ itr ].c2.r1
139 #define E2 src2[ itr ].c2.r1
140 #define F1 src1[ itr ].c2.r2
141 #define F2 src2[ itr ].c2.r2
142 #define G1 src1[ itr ].c2.r3
143 #define G2 src2[ itr ].c2.r3
144 #define H1 src1[ itr ].c2.r4
145 #define H2 src2[ itr ].c2.r4
146 
147 #define I1 src1[ itr ].c3.r1
148 #define I2 src2[ itr ].c3.r1
149 #define J1 src1[ itr ].c3.r2
150 #define J2 src2[ itr ].c3.r2
151 #define K1 src1[ itr ].c3.r3
152 #define K2 src2[ itr ].c3.r3
153 #define L1 src1[ itr ].c3.r4
154 #define L2 src2[ itr ].c3.r4
155 
156 #define M1 src1[ itr ].c4.r1
157 #define M2 src2[ itr ].c4.r1
158 #define N1 src1[ itr ].c4.r2
159 #define N2 src2[ itr ].c4.r2
160 #define O1 src1[ itr ].c4.r3
161 #define O2 src2[ itr ].c4.r3
162 #define P1 src1[ itr ].c4.r4
163 #define P2 src2[ itr ].c4.r4
164 
166  for ( unsigned int itr = 0; itr < count; itr++ )
167  {
168  dst[ itr ].c1.r1 = (A1 * A2) + (E1 * B2) + (I1 * C2) + (M1 * D2);
169  dst[ itr ].c1.r2 = (B1 * A2) + (F1 * B2) + (J1 * C2) + (N1 * D2);
170  dst[ itr ].c1.r3 = (C1 * A2) + (G1 * B2) + (K1 * C2) + (O1 * D2);
171  dst[ itr ].c1.r4 = (D1 * A2) + (H1 * B2) + (L1 * C2) + (P1 * D2);
172 
173  dst[ itr ].c2.r1 = (A1 * E2) + (E1 * F2) + (I1 * G2) + (M1 * H2);
174  dst[ itr ].c2.r2 = (B1 * E2) + (F1 * F2) + (J1 * G2) + (N1 * H2);
175  dst[ itr ].c2.r3 = (C1 * E2) + (G1 * F2) + (K1 * G2) + (O1 * H2);
176  dst[ itr ].c2.r4 = (D1 * E2) + (H1 * F2) + (L1 * G2) + (P1 * H2);
177 
178  dst[ itr ].c3.r1 = (A1 * I2) + (E1 * J2) + (I1 * K2) + (M1 * L2);
179  dst[ itr ].c3.r2 = (B1 * I2) + (F1 * J2) + (J1 * K2) + (N1 * L2);
180  dst[ itr ].c3.r3 = (C1 * I2) + (G1 * J2) + (K1 * K2) + (O1 * L2);
181  dst[ itr ].c3.r4 = (D1 * I2) + (H1 * J2) + (L1 * K2) + (P1 * L2);
182 
183  dst[ itr ].c4.r1 = (A1 * M2) + (E1 * N2) + (I1 * O2) + (M1 * P2);
184  dst[ itr ].c4.r2 = (B1 * M2) + (F1 * N2) + (J1 * O2) + (N1 * P2);
185  dst[ itr ].c4.r3 = (C1 * M2) + (G1 * N2) + (K1 * O2) + (O1 * P2);
186  dst[ itr ].c4.r4 = (D1 * M2) + (H1 * N2) + (L1 * O2) + (P1 * P2);
187  }
188  return NE10_OK;
189 
190 #undef A1
191 #undef A2
192 #undef B1
193 #undef B2
194 #undef C1
195 #undef C2
196 #undef D1
197 #undef D2
198 #undef E1
199 #undef E2
200 #undef F1
201 #undef F2
202 #undef G1
203 #undef G2
204 #undef H1
205 #undef H2
206 #undef I1
207 #undef I2
208 #undef J1
209 #undef J2
210 #undef K1
211 #undef K2
212 #undef L1
213 #undef L2
214 #undef M1
215 #undef M2
216 #undef N1
217 #undef N2
218 #undef O1
219 #undef O2
220 #undef P1
221 #undef P2
222 }
ne10_float32_t r2
Definition: NE10_types.h:145
ne10_mat_row4f c1
Definition: NE10_types.h:187
#define C2
#define H1
#define F2
#define O2
#define M1
#define P1
#define O1
#define A1
#define H2
#define P2
ne10_mat_row3f c3
Definition: NE10_types.h:153
#define K1
#define L2
ne10_float32_t r1
Definition: NE10_types.h:144
#define I1
ne10_mat_row4f c4
Definition: NE10_types.h:190
#define K2
uint32_t ne10_uint32_t
Definition: NE10_types.h:77
#define G2
#define D1
ne10_float32_t r3
Definition: NE10_types.h:181
#define B1
#define F1
ne10_mat_row2f c2
Definition: NE10_types.h:127
ne10_mat_row4f c2
Definition: NE10_types.h:188
#define N2
ne10_float32_t r3
Definition: NE10_types.h:146
ne10_result_t ne10_mulmat_2x2f_c(ne10_mat2x2f_t *dst, ne10_mat2x2f_t *src1, ne10_mat2x2f_t *src2, ne10_uint32_t count)
Specific implementation of ne10_mulmat_2x2f using plain C code.
Definition: NE10_mulmat.c:37
ne10_float32_t r2
Definition: NE10_types.h:180
#define G1
ne10_mat_row4f c3
Definition: NE10_types.h:189
#define C1
ne10_float32_t r2
Definition: NE10_types.h:121
#define A2
#define E1
#define I2
ne10_result_t ne10_mulmat_3x3f_c(ne10_mat3x3f_t *dst, ne10_mat3x3f_t *src1, ne10_mat3x3f_t *src2, ne10_uint32_t count)
Specific implementation of ne10_mulmat_3x3f using plain C code.
Definition: NE10_mulmat.c:69
#define D2
#define N1
#define NE10_OK
Definition: NE10_types.h:65
#define J1
#define NE10_CHECKPOINTER_DstSrc1Src2
Definition: factor.h:94
#define E2
#define M2
#define B2
ne10_float32_t r1
Definition: NE10_types.h:120
ne10_mat_row3f c1
Definition: NE10_types.h:151
ne10_mat_row2f c1
Definition: NE10_types.h:126
#define J2
ne10_float32_t r4
Definition: NE10_types.h:182
int ne10_result_t
Definition: NE10_types.h:82
ne10_float32_t r1
Definition: NE10_types.h:179
ne10_result_t ne10_mulmat_4x4f_c(ne10_mat4x4f_t *dst, ne10_mat4x4f_t *src1, ne10_mat4x4f_t *src2, ne10_uint32_t count)
Specific implementation of ne10_mulmat_4x4f using plain C code.
Definition: NE10_mulmat.c:127
#define L1
ne10_mat_row3f c2
Definition: NE10_types.h:152