Project Ne10
An open, optimized software library for the ARM architecture.
NE10_physics.neon.c
Go to the documentation of this file.
1 /*
2  * Copyright 2014-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : physics/NE10_physics.c
30  */
31 
32 #include "NE10_types.h"
33 
34 #ifdef ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
36  ne10_vec2f_t *vertices,
37  ne10_mat2x2f_t *xf,
38  ne10_vec2f_t *radius,
39  ne10_uint32_t vertex_count)
40 asm ("ne10_physics_compute_aabb_vertex4_vec2f_neon");
41 #endif // ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
42 
43 static inline ne10_vec2f_t ne10_mul_matvec_float (ne10_mat2x2f_t T, ne10_vec2f_t v)
44 {
45  ne10_vec2f_t tmp;
46  ne10_float32_t x = (T.c2.r2 * v.x - T.c2.r1 * v.y) + T.c1.r1;
47  ne10_float32_t y = (T.c2.r1 * v.x + T.c2.r2 * v.y) + T.c1.r2;
48  tmp.x = x;
49  tmp.y = y;
50  return tmp;
51 }
52 
53 static inline ne10_float32_t min (float a, ne10_float32_t b)
54 {
55  return a < b ? a : b;
56 }
57 
58 static inline ne10_vec2f_t min_2f (ne10_vec2f_t a, ne10_vec2f_t b)
59 {
60  ne10_vec2f_t tmp = {min (a.x, b.x), min (a.y, b.y) };
61  return tmp;
62 }
63 
64 static inline ne10_float32_t max (float a, ne10_float32_t b)
65 {
66  return a > b ? a : b;
67 }
68 
69 static inline ne10_vec2f_t max_2f (ne10_vec2f_t a, ne10_vec2f_t b)
70 {
71  ne10_vec2f_t tmp = {max (a.x, b.x), max (a.y, b.y) };
72  return tmp;
73 }
74 
81 #ifdef ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
83  ne10_vec2f_t *vertices,
84  ne10_mat2x2f_t *xf,
85  ne10_vec2f_t *radius,
86  ne10_uint32_t vertex_count)
87 {
88  ne10_int32_t residual_loops = (vertex_count & 0x3);
89  ne10_int32_t main_loops = vertex_count - residual_loops;
90 
91  if (main_loops > 0)
92  {
93  ne10_physics_compute_aabb_vertex4_vec2f_neon (aabb, vertices, xf, radius, main_loops);
94  }
95 
96  if (residual_loops > 0)
97  {
98  ne10_vec2f_t lower;
99  ne10_vec2f_t upper;
100  ne10_vec2f_t lower2;
101  ne10_vec2f_t upper2;
102  ne10_vec2f_t v;
103  ne10_int32_t i;
104 
105  if (main_loops == 0)
106  {
107  lower = ne10_mul_matvec_float (*xf, vertices[main_loops]);
108  upper = lower;
109  }
110  else
111  {
112  lower2.x = aabb->c1.r1 + radius->x;
113  lower2.y = aabb->c1.r2 + radius->y;
114  upper2.x = aabb->c2.r1 - radius->x;
115  upper2.y = aabb->c2.r2 - radius->y;
116  lower = ne10_mul_matvec_float (*xf, vertices[main_loops]);
117  upper = lower;
118  lower = min_2f (lower, lower2);
119  upper = max_2f (upper, upper2);
120  }
121 
122  for (i = main_loops + 1; i < vertex_count; ++i)
123  {
124  v = ne10_mul_matvec_float (*xf, vertices[i]);
125  lower = min_2f (lower, v);
126  upper = max_2f (upper, v);
127  }
128 
129  aabb->c1.r1 = lower.x - radius->x;
130  aabb->c1.r2 = lower.y - radius->y;
131  aabb->c2.r1 = upper.x + radius->x;
132  aabb->c2.r2 = upper.y + radius->y;
133  }
134 }
135 #endif // ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
void ne10_physics_compute_aabb_vertex4_vec2f_neon(ne10_mat2x2f_t *aabb, ne10_vec2f_t *vertices, ne10_mat2x2f_t *xf, ne10_vec2f_t *radius, ne10_uint32_t vertex_count) asm("ne10_physics_compute_aabb_vertex4_vec2f_neon")
int32_t ne10_int32_t
Definition: NE10_types.h:76
A 2-tuple of ne10_float32_t values.
Definition: NE10_types.h:87
float ne10_float32_t
Definition: NE10_types.h:80
void ne10_physics_compute_aabb_vec2f_neon(ne10_mat2x2f_t *aabb, ne10_vec2f_t *vertices, ne10_mat2x2f_t *xf, ne10_vec2f_t *radius, ne10_uint32_t vertex_count)
Specific implementation of ne10_physics_compute_aabb_vec2f using NEON SIMD capabilities.
uint32_t ne10_uint32_t
Definition: NE10_types.h:77
ne10_float32_t x
Definition: NE10_types.h:89
ne10_mat_row2f c2
Definition: NE10_types.h:127
ne10_float32_t y
Definition: NE10_types.h:90
ne10_float32_t r2
Definition: NE10_types.h:121
ne10_float32_t r1
Definition: NE10_types.h:120
ne10_mat_row2f c1
Definition: NE10_types.h:126