Project Ne10
An open, optimized software library for the ARM architecture.
Main Page
Related Pages
Modules
Classes
Files
Examples
File List
File Members
common
macros.h
Go to the documentation of this file.
1
/*
2
* Copyright 2011-16 ARM Limited and Contributors.
3
* All rights reserved.
4
*
5
* Redistribution and use in source and binary forms, with or without
6
* modification, are permitted provided that the following conditions are met:
7
* * Redistributions of source code must retain the above copyright
8
* notice, this list of conditions and the following disclaimer.
9
* * Redistributions in binary form must reproduce the above copyright
10
* notice, this list of conditions and the following disclaimer in the
11
* documentation and/or other materials provided with the distribution.
12
* * Neither the name of the <organization> nor the
13
* names of its contributors may be used to endorse or promote products
14
* derived from this software without specific prior written permission.
15
*
16
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19
* DISCLAIMED. IN NO EVENT SHALL ARM Limited and Contributors. BE LIABLE FOR ANY
20
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
*/
27
28
/*
29
* NE10 Library : common/macros.h
30
*/
31
32
#include "
factor.h
"
33
34
/*
35
* The following macros are used within some of the NEON math function implementations.
36
*
37
* With the exception of the VEC4F macros, which can always wholly fill NEON SIMD vectors,
38
* each macro takes two parameters -- loopCode1 for the code to be run within the main
39
* SIMD loop, and loopCode2 for the code to be run in processing any leftover elements.
40
* The details of the variables that are exposed within these macros can be viewed in the
41
* specific MAINLOOP and SECONDLOOP sub-macros that each macro utilizes.
42
*/
43
44
#define NE10_DstSrcCst_DO_COUNT_TIMES_FLOAT_NEON(loopCode1, loopCode2) { \
45
NE10_CHECKPOINTER_DstSrcCst; \
46
float32x4_t n_cst = { cst, cst, cst, cst }; \
47
NE10_DstSrcCst_OPERATION_FLOAT_NEON( \
48
NE10_DstSrcCst_MAINLOOP_FLOAT_NEON(loopCode1); , \
49
NE10_DstSrcCst_SECONDLOOP_FLOAT_NEON(loopCode2); \
50
); \
51
}
52
53
#define NE10_DstSrcCst_DO_COUNT_TIMES_VEC2F_NEON(loopCode1, loopCode2) { \
54
NE10_CHECKPOINTER_DstSrcCst; \
55
NE10_DstSrcCst_OPERATION_VEC2F_NEON( \
56
NE10_DstSrcCst_MAINLOOP_VEC2F_NEON(loopCode1); , \
57
NE10_DstSrcCst_SECONDLOOP_VEC2F_NEON(loopCode2); \
58
); \
59
}
60
61
/* This macro uses interleaving to boost the performance */
62
#define NE10_DstSrcCst_DO_COUNT_TIMES_VEC3F_NEON(loopCode1, loopCode2) { \
63
NE10_CHECKPOINTER_DstSrcCst; \
64
NE10_DstSrcCst_OPERATION_VEC3F_NEON( \
65
NE10_DstSrcCst_MAINLOOP_VEC3F_NEON(loopCode1); , \
66
NE10_DstSrcCst_SECONDLOOP_VEC3F_NEON(loopCode2); \
67
); \
68
}
69
70
#define NE10_DstSrcCst_DO_COUNT_TIMES_VEC4F_NEON(loopCode) { \
71
NE10_CHECKPOINTER_DstSrcCst; \
72
NE10_DstSrcCst_OPERATION_VEC4F_NEON( \
73
NE10_DstSrcCst_MAINLOOP_VEC4F_NEON(loopCode); \
74
); \
75
}
76
77
#define NE10_DstAccSrcCst_DO_COUNT_TIMES_FLOAT_NEON(loopCode1, loopCode2) { \
78
float32x4_t n_acc; \
79
float32x4_t n_cst = { cst, cst, cst, cst }; \
80
NE10_CHECKPOINTER_DstAccSrcCst; \
81
NE10_DstAccSrcCst_OPERATION_FLOAT_NEON( \
82
NE10_DstAccSrcCst_MAINLOOP_FLOAT_NEON(loopCode1); , \
83
NE10_DstAccSrcCst_SECONDLOOP_FLOAT_NEON(loopCode2); \
84
); \
85
}
86
87
#define NE10_DstAccSrcCst_DO_COUNT_TIMES_VEC2F_NEON(loopCode1, loopCode2) { \
88
float32x4_t n_acc; \
89
NE10_CHECKPOINTER_DstAccSrcCst; \
90
NE10_DstAccSrcCst_OPERATION_VEC2F_NEON( \
91
NE10_DstAccSrcCst_MAINLOOP_VEC2F_NEON(loopCode1); , \
92
NE10_DstAccSrcCst_SECONDLOOP_VEC2F_NEON(loopCode2); \
93
); \
94
}
95
96
#define NE10_DstAccSrcCst_DO_COUNT_TIMES_VEC3F_NEON(loopCode1, loopCode2) { \
97
float32x4_t n_acc1, n_acc2, n_acc3; \
98
NE10_CHECKPOINTER_DstAccSrcCst; \
99
NE10_DstAccSrcCst_OPERATION_VEC3F_NEON( \
100
NE10_DstAccSrcCst_MAINLOOP_VEC3F_NEON(loopCode1); , \
101
NE10_DstAccSrcCst_SECONDLOOP_VEC3F_NEON(loopCode2); \
102
); \
103
}
104
105
#define NE10_DstAccSrcCst_DO_COUNT_TIMES_VEC4F_NEON(loopCode) { \
106
float32x4_t n_acc; \
107
NE10_CHECKPOINTER_DstAccSrcCst; \
108
NE10_DstAccSrcCst_OPERATION_VEC4F_NEON( \
109
NE10_DstAccSrcCst_MAINLOOP_VEC4F_NEON(loopCode); \
110
); \
111
}
112
113
#define NE10_DstCst_DO_COUNT_TIMES_FLOAT_NEON(loopCode1, loopCode2) { \
114
float32x4_t n_cst = { cst, cst, cst, cst }; \
115
NE10_CHECKPOINTER_DstCst; \
116
NE10_DstCst_OPERATION_FLOAT_NEON( \
117
NE10_DstCst_MAINLOOP_FLOAT_NEON(loopCode1); , \
118
NE10_DstCst_SECONDLOOP_FLOAT_NEON(loopCode2); \
119
); \
120
}
121
122
#define NE10_DstCst_DO_COUNT_TIMES_VEC2F_NEON(loopCode1, loopCode2) { \
123
NE10_CHECKPOINTER_DstCst; \
124
NE10_DstCst_OPERATION_VEC2F_NEON( \
125
NE10_DstCst_MAINLOOP_VEC2F_NEON(loopCode1); , \
126
NE10_DstCst_SECONDLOOP_VEC2F_NEON(loopCode2); \
127
); \
128
}
129
130
/* This macro uses interleaving to boost the performance */
131
#define NE10_DstCst_DO_COUNT_TIMES_VEC3F_NEON(loopCode1, loopCode2) { \
132
NE10_CHECKPOINTER_DstCst; \
133
NE10_DstCst_OPERATION_VEC3F_NEON( \
134
NE10_DstCst_MAINLOOP_VEC3F_NEON(loopCode1); , \
135
NE10_DstCst_SECONDLOOP_VEC3F_NEON(loopCode2); \
136
); \
137
}
138
139
#define NE10_DstCst_DO_COUNT_TIMES_VEC4F_NEON(loopCode) { \
140
NE10_CHECKPOINTER_DstCst; \
141
NE10_DstCst_OPERATION_VEC4F_NEON( \
142
NE10_DstCst_MAINLOOP_VEC4F_NEON(loopCode); \
143
); \
144
}
145
146
#define NE10_DstSrc1Src2_DO_COUNT_TIMES_FLOAT_NEON(loopCode1, loopCode2) { \
147
float32x4_t n_src2; \
148
NE10_CHECKPOINTER_DstSrc1Src2; \
149
NE10_DstSrc1Src2_OPERATION_FLOAT_NEON( \
150
NE10_DstSrc1Src2_MAINLOOP_FLOAT_NEON(loopCode1); , \
151
NE10_DstSrc1Src2_SECONDLOOP_FLOAT_NEON(loopCode2); \
152
); \
153
}
154
155
#define NE10_DstAccSrc1Src2_DO_COUNT_TIMES_FLOAT_NEON(loopCode1, loopCode2) { \
156
float32x4_t n_acc; \
157
float32x4_t n_src2; \
158
NE10_CHECKPOINTER_DstAccSrc1Src2; \
159
NE10_DstAccSrc1Src2_OPERATION_FLOAT_NEON( \
160
NE10_DstAccSrc1Src2_MAINLOOP_FLOAT_NEON(loopCode1); , \
161
NE10_DstAccSrc1Src2_SECONDLOOP_FLOAT_NEON(loopCode2); \
162
); \
163
}
factor.h
Generated on Fri Jun 30 2017 10:50:54 for Project Ne10 by
1.8.11