Project Ne10
An open, optimized software library for the ARM architecture.
test_suite_physics.c
Go to the documentation of this file.
1 /*
2  * Copyright 2014-16 ARM Limited and Contributors.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  * * Redistributions of source code must retain the above copyright
8  * notice, this list of conditions and the following disclaimer.
9  * * Redistributions in binary form must reproduce the above copyright
10  * notice, this list of conditions and the following disclaimer in the
11  * documentation and/or other materials provided with the distribution.
12  * * Neither the name of ARM Limited nor the
13  * names of its contributors may be used to endorse or promote products
14  * derived from this software without specific prior written permission.
15  *
16  * THIS SOFTWARE IS PROVIDED BY ARM LIMITED AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL ARM LIMITED AND CONTRIBUTORS BE LIABLE FOR ANY
20  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 
28 /*
29  * NE10 Library : test/test_suite_physics.c
30  */
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <math.h>
35 
36 #include "NE10_physics.h"
37 #include "seatest.h"
38 #include "unit_test_common.h"
39 
40 /* ----------------------------------------------------------------------
41 ** Global defines
42 ** ------------------------------------------------------------------- */
43 #define TEST_LENGTH_SAMPLES 1024
44 #define TEST_COUNT 5000
45 
46 static ne10_int64_t time_c = 0;
47 static ne10_int64_t time_neon = 0;
48 static ne10_float32_t time_speedup = 0.0f;
49 static ne10_float32_t time_savings = 0.0f;
50 
51 static void float_array_assignment (ne10_float32_t *array, ne10_int32_t len)
52 {
53  int i;
54  for (i = 0; i < len; i++)
55  {
56  array[i] = (ne10_float32_t) (drand48() * 32768.0f - 16384.0f);
57  }
58 }
59 
60 
61 void test_compute_aabb_vec2f_conformance()
62 {
63 #if defined ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
64  ne10_vec2f_t radius = {0.2f, 0.2f};
65  ne10_vec2f_t *vertices_c, *vertices_neon;
66  ne10_mat2x2f_t aabb_c, aabb_neon;
67  ne10_mat2x2f_t xf;
68  ne10_int32_t vertex_count;
69  ne10_int32_t vec_size = sizeof (ne10_mat2x2f_t) / sizeof (ne10_float32_t);
70 
71  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
72 
73  /* init input memory */
74  vertices_c = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
75  vertices_neon = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
76  float_array_assignment ( (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
77  memcpy ( (ne10_float32_t *) vertices_neon, (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
78 
79  ne10_float32_t tmp = (ne10_float32_t) (drand48() * 64.0f - 32.0f);
80  xf.c1.r1 = (ne10_float32_t) (drand48() * 16.0f - 8.0f);
81  xf.c1.r2 = (ne10_float32_t) (drand48() * 16.0f - 8.0f);
82  xf.c2.r1 = sin (tmp);
83  xf.c2.r2 = cos (tmp);
84 
85 #if defined (REGRESSION_TEST)
86  for (vertex_count = 1; vertex_count < TEST_LENGTH_SAMPLES; vertex_count++)
87  {
88  //C version
89  ne10_physics_compute_aabb_vec2f_c (&aabb_c, vertices_c, &xf, &radius, vertex_count);
90  //neon version
91  ne10_physics_compute_aabb_vec2f_neon (&aabb_neon, vertices_neon, &xf, &radius, vertex_count);
92  printf ("----vertex_count %d\n", vertex_count);
93  assert_float_vec_equal ( (ne10_float32_t*) &aabb_c, (ne10_float32_t*) &aabb_neon, ERROR_MARGIN_LARGE, vec_size);
94  }
95 #else // defined (SMOKE_TEST)
96  for (vertex_count = 1; vertex_count < TEST_LENGTH_SAMPLES; vertex_count += 3)
97  {
98  //C version
99  ne10_physics_compute_aabb_vec2f_c (&aabb_c, vertices_c, &xf, &radius, vertex_count);
100  //neon version
101  ne10_physics_compute_aabb_vec2f_neon (&aabb_neon, vertices_neon, &xf, &radius, vertex_count);
102  printf ("----vertex_count %d\n", vertex_count);
103  assert_float_vec_equal ( (ne10_float32_t*) &aabb_c, (ne10_float32_t*) &aabb_neon, ERROR_MARGIN_LARGE, vec_size);
104  }
105 #endif
106  free (vertices_c);
107  free (vertices_neon);
108 #endif
109 }
110 
111 void test_compute_aabb_vec2f_performance()
112 {
113  ne10_vec2f_t radius = {0.2f, 0.2f};
114  ne10_vec2f_t *vertices_c, *vertices_neon;
115  ne10_mat2x2f_t aabb_c;
116  ne10_mat2x2f_t xf;
117  ne10_int32_t i;
118  ne10_int32_t vertex_count;
119  // ne10_int32_t vec_size = sizeof (ne10_mat2x2f_t) / sizeof (ne10_float32_t);
120 
121  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
122  fprintf (stdout, "%25s%20s%20s%20s%20s\n", "vertex count", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio");
123 
124  /* init input memory */
125  vertices_c = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
126  vertices_neon = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
127  float_array_assignment ( (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
128  memcpy ( (ne10_float32_t *) vertices_neon, (ne10_float32_t *) vertices_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t));
129 
130  ne10_float32_t tmp = (ne10_float32_t) (drand48() * 64.0f - 32.0f);
131  xf.c1.r1 = (ne10_float32_t) (drand48() * 16.0f - 8.0f);
132  xf.c1.r2 = (ne10_float32_t) (drand48() * 16.0f - 8.0f);
133  xf.c2.r1 = sin (tmp);
134  xf.c2.r2 = cos (tmp);
135 
136  for (vertex_count = 4; vertex_count < TEST_LENGTH_SAMPLES; vertex_count += 4)
137  {
138  //C version
139  GET_TIME
140  (time_c,
141  {
142  for (i = 0; i < TEST_COUNT; i++)
143  ne10_physics_compute_aabb_vec2f_c (&aabb_c, vertices_c, &xf, &radius, vertex_count);
144  }
145  );
146 
147 #ifdef ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
148  //neon version
149  ne10_mat2x2f_t aabb_neon;
150  GET_TIME
151  (time_neon,
152  {
153  for (i = 0; i < TEST_COUNT; i++)
154  ne10_physics_compute_aabb_vec2f_neon (&aabb_neon, vertices_neon, &xf, &radius, vertex_count);
155  }
156  );
157 #endif // ENABLE_NE10_PHYSICS_COMPUTE_AABB_VEC2F_NEON
158  time_speedup = (ne10_float32_t) time_c / time_neon;
159  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
160  printf ("vertax count: %10d time C: %10lld time NEON: %10lld\n", vertex_count, time_c, time_neon);
161  //ne10_log (__FUNCTION__, "Compute aabb%21d%20lld%20lld%19.2f%%%18.2f:1\n", vertex_count, time_c, time_neon, time_savings, time_speedup);
162  }
163  free (vertices_c);
164  free (vertices_neon);
165 }
166 
167 void test_relative_v_vec2f_conformance()
168 {
169 #if defined ENABLE_NE10_PHYSICS_RELATIVE_V_VEC2F_NEON
170  ne10_vec2f_t *guarded_dv_c, *guarded_dv_neon;
171  ne10_vec2f_t *dv_c, *dv_neon;
172  ne10_vec3f_t *v_wa, *v_wb;
173  ne10_vec2f_t *ra, *rb;
174  ne10_int32_t i;
175  ne10_int32_t count;
176  ne10_int32_t vec_size = sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t);
177 
178  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
179 
180  /* init input memory */
185  float_array_assignment ( (ne10_float32_t *) v_wa, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
186  float_array_assignment ( (ne10_float32_t *) v_wb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
187  float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
188  float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
189 
190  /* init dst memory */
191  guarded_dv_c = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
192  guarded_dv_neon = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
193  dv_c = (ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_c + ARRAY_GUARD_LEN);
194  dv_neon = (ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_neon + ARRAY_GUARD_LEN);
195 
196 #if defined (REGRESSION_TEST)
197  for (count = 1; count < TEST_LENGTH_SAMPLES; count++)
198  {
199  GUARD_ARRAY ( (ne10_float32_t*) dv_c, count * vec_size);
200  GUARD_ARRAY ( (ne10_float32_t*) dv_neon, count * vec_size);
201 
202  //C version
203  ne10_physics_relative_v_vec2f_c (dv_c, v_wa, ra, v_wb, rb, count);
204  //neon version
205  ne10_physics_relative_v_vec2f_neon (dv_neon, v_wa, ra, v_wb, rb, count);
206 
207  assert_true (CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_c, count * vec_size));
208  assert_true (CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_neon, count * vec_size));
209  printf ("----count %d\n", count);
210  for (i = 0; i < count; i++)
211  assert_float_vec_equal ( (ne10_float32_t*) &dv_c[i], (ne10_float32_t*) &dv_neon[i], ERROR_MARGIN_LARGE, vec_size);
212  }
213 #else // defined (SMOKE_TEST)
214  for (count = 1; count < TEST_LENGTH_SAMPLES; count += 5)
215  {
216  GUARD_ARRAY ( (ne10_float32_t*) dv_c, count * vec_size);
217  GUARD_ARRAY ( (ne10_float32_t*) dv_neon, count * vec_size);
218 
219  //C version
220  ne10_physics_relative_v_vec2f_c (dv_c, v_wa, ra, v_wb, rb, count);
221  //neon version
222  ne10_physics_relative_v_vec2f_neon (dv_neon, v_wa, ra, v_wb, rb, count);
223 
224  assert_true (CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_c, count * vec_size));
225  assert_true (CHECK_ARRAY_GUARD ( (ne10_float32_t*) dv_neon, count * vec_size));
226  printf ("----count %d\n", count);
227  for (i = 0; i < count; i++)
228  assert_float_vec_equal ( (ne10_float32_t*) &dv_c[i], (ne10_float32_t*) &dv_neon[i], ERROR_MARGIN_LARGE, vec_size);
229  }
230 #endif
231  free (v_wa);
232  free (v_wb);
233  free (ra);
234  free (rb);
235  free (guarded_dv_c);
236  free (guarded_dv_neon);
237 #endif
238 }
239 
240 void test_relative_v_vec2f_performance()
241 {
242  ne10_vec2f_t *guarded_dv_c, *guarded_dv_neon;
243  ne10_vec2f_t *dv_c, *dv_neon;
244  ne10_vec3f_t *v_wa, *v_wb;
245  ne10_vec2f_t *ra, *rb;
246  ne10_int32_t i;
247  ne10_int32_t count;
248  // ne10_int32_t vec_size = sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t);
249 
250  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
251  fprintf (stdout, "%25s%20s%20s%20s%20s\n", "count", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio");
252 
253  /* init input memory */
258  float_array_assignment ( (ne10_float32_t *) v_wa, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
259  float_array_assignment ( (ne10_float32_t *) v_wb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
260  float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
261  float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
262 
263  /* init dst memory */
264  guarded_dv_c = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
265  guarded_dv_neon = (ne10_vec2f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
266  dv_c = (ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_c + ARRAY_GUARD_LEN);
267  dv_neon = (ne10_vec2f_t*) ( (ne10_float32_t*) guarded_dv_neon + ARRAY_GUARD_LEN);
268 
269  for (count = 2; count < TEST_LENGTH_SAMPLES; count += 4)
270  {
271  //C version
272  GET_TIME
273  (time_c,
274  {
275  for (i = 0; i < TEST_COUNT; i++)
276  ne10_physics_relative_v_vec2f_c (dv_c, v_wa, ra, v_wb, rb, count);
277  }
278  );
279 #ifdef ENABLE_NE10_PHYSICS_RELATIVE_V_VEC2F_NEON
280  //neon version
281  GET_TIME
282  (time_neon,
283  {
284  for (i = 0; i < TEST_COUNT; i++)
285  ne10_physics_relative_v_vec2f_neon (dv_neon, v_wa, ra, v_wb, rb, count);
286  }
287  );
288  time_speedup = (ne10_float32_t) time_c / time_neon;
289  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
290  printf ("count: %10d time C: %10lld time NEON: %10lld\n", count, time_c, time_neon);
291  //ne10_log (__FUNCTION__, "Compute aabb%21d%20lld%20lld%19.2f%%%18.2f:1\n", count, time_c, time_neon, time_savings, time_speedup);
292 #endif // ENABLE_NE10_PHYSICS_RELATIVE_V_VEC2F_NEON
293  }
294 
295  free (v_wa);
296  free (v_wb);
297  free (ra);
298  free (rb);
299  free (guarded_dv_c);
300  free (guarded_dv_neon);
301 }
302 
303 void test_apply_impulse_vec2f_conformance()
304 {
305 #if defined ENABLE_NE10_PHYSICS_APPLY_IMPULSE_VEC2F_NEON
306  ne10_vec3f_t *guarded_v_wa_c, *guarded_v_wa_neon, *guarded_v_wb_c, *guarded_v_wb_neon;
307  ne10_vec3f_t *v_wa_c, *v_wa_neon, *v_wb_c, *v_wb_neon;
308  ne10_vec2f_t *ra, *rb, *ima, *imb, *p;
309  ne10_int32_t i;
310  ne10_int32_t count;
311  ne10_int32_t vec_size = sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t);
312 
313  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
314 
315  /* init input memory */
321  float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
322  float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
323  float_array_assignment ( (ne10_float32_t *) ima, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
324  float_array_assignment ( (ne10_float32_t *) imb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
325  float_array_assignment ( (ne10_float32_t *) p, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
326 
327  /* init dst memory */
328  guarded_v_wa_c = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
329  guarded_v_wa_neon = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
330  guarded_v_wb_c = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
331  guarded_v_wb_neon = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
332  v_wa_c = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_c + ARRAY_GUARD_LEN);
333  v_wa_neon = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_neon + ARRAY_GUARD_LEN);
334  v_wb_c = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_c + ARRAY_GUARD_LEN);
335  v_wb_neon = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_neon + ARRAY_GUARD_LEN);
336  float_array_assignment ( (ne10_float32_t *) v_wa_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
337  float_array_assignment ( (ne10_float32_t *) v_wb_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
338  memcpy (v_wa_neon, v_wa_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
339  memcpy (v_wb_neon, v_wb_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
340 
341 #if defined (REGRESSION_TEST)
342  for (count = 1; count < TEST_LENGTH_SAMPLES; count++)
343  {
344  GUARD_ARRAY ( (ne10_float32_t*) v_wa_c, count * vec_size);
345  GUARD_ARRAY ( (ne10_float32_t*) v_wa_neon, count * vec_size);
346  GUARD_ARRAY ( (ne10_float32_t*) v_wb_c, count * vec_size);
347  GUARD_ARRAY ( (ne10_float32_t*) v_wb_neon, count * vec_size);
348 
349  //C version
350  ne10_physics_apply_impulse_vec2f_c (v_wa_c, v_wb_c, ra, rb, ima, imb, p, count);
351  //neon version
352  ne10_physics_apply_impulse_vec2f_neon (v_wa_neon, v_wb_neon, ra, rb, ima, imb, p, count);
353 
354  assert_true (CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_c, count * vec_size));
355  assert_true (CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_neon, count * vec_size));
356  assert_true (CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_c, count * vec_size));
357  assert_true (CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_neon, count * vec_size));
358 
359  printf ("----count %d\n", count);
360  for (i = 0; i < count; i++)
361  {
362  assert_float_vec_equal ( (ne10_float32_t*) &v_wa_c[i], (ne10_float32_t*) &v_wa_neon[i], ERROR_MARGIN_LARGE, vec_size);
363  assert_float_vec_equal ( (ne10_float32_t*) &v_wb_c[i], (ne10_float32_t*) &v_wb_neon[i], ERROR_MARGIN_LARGE, vec_size);
364  }
365  }
366 #else // defined (SMOKE_TEST)
367  for (count = 1; count < TEST_LENGTH_SAMPLES; count += 5)
368  {
369  GUARD_ARRAY ( (ne10_float32_t*) v_wa_c, count * vec_size);
370  GUARD_ARRAY ( (ne10_float32_t*) v_wa_neon, count * vec_size);
371  GUARD_ARRAY ( (ne10_float32_t*) v_wb_c, count * vec_size);
372  GUARD_ARRAY ( (ne10_float32_t*) v_wb_neon, count * vec_size);
373 
374  //C version
375  ne10_physics_apply_impulse_vec2f_c (v_wa_c, v_wb_c, ra, rb, ima, imb, p, count);
376  //neon version
377  ne10_physics_apply_impulse_vec2f_neon (v_wa_neon, v_wb_neon, ra, rb, ima, imb, p, count);
378 
379  assert_true (CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_c, count * vec_size));
380  assert_true (CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wa_neon, count * vec_size));
381  assert_true (CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_c, count * vec_size));
382  assert_true (CHECK_ARRAY_GUARD ( (ne10_float32_t*) v_wb_neon, count * vec_size));
383  printf ("----count %d\n", count);
384  for (i = 0; i < count; i++)
385  {
386  assert_float_vec_equal ( (ne10_float32_t*) &v_wa_c[i], (ne10_float32_t*) &v_wa_neon[i], ERROR_MARGIN_LARGE, vec_size);
387  assert_float_vec_equal ( (ne10_float32_t*) &v_wb_c[i], (ne10_float32_t*) &v_wb_neon[i], ERROR_MARGIN_LARGE, vec_size);
388  }
389  }
390 #endif
391  free (ra);
392  free (rb);
393  free (ima);
394  free (imb);
395  free (p);
396  free (guarded_v_wa_c);
397  free (guarded_v_wa_neon);
398  free (guarded_v_wb_c);
399  free (guarded_v_wb_neon);
400 #endif
401 }
402 
403 void test_apply_impulse_vec2f_performance()
404 {
405  ne10_vec3f_t *guarded_v_wa_c, *guarded_v_wa_neon, *guarded_v_wb_c, *guarded_v_wb_neon;
406  ne10_vec3f_t *v_wa_c, *v_wa_neon, *v_wb_c, *v_wb_neon;
407  ne10_vec2f_t *ra, *rb, *ima, *imb, *p;
408  ne10_int32_t i;
409  ne10_int32_t count;
410  // ne10_int32_t vec_size = sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t);
411 
412  fprintf (stdout, "----------%30s start\n", __FUNCTION__);
413  fprintf (stdout, "%25s%20s%20s%20s%20s\n", "count", "C Time (micro-s)", "NEON Time (micro-s)", "Time Savings", "Performance Ratio");
414 
415  /* init input memory */
421  float_array_assignment ( (ne10_float32_t *) ra, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
422  float_array_assignment ( (ne10_float32_t *) rb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
423  float_array_assignment ( (ne10_float32_t *) ima, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
424  float_array_assignment ( (ne10_float32_t *) imb, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
425  float_array_assignment ( (ne10_float32_t *) p, TEST_LENGTH_SAMPLES * sizeof (ne10_vec2f_t) / sizeof (ne10_float32_t));
426 
427  /* init dst memory */
428  guarded_v_wa_c = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
429  guarded_v_wa_neon = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
430  guarded_v_wb_c = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
431  guarded_v_wb_neon = (ne10_vec3f_t*) NE10_MALLOC (TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) + + ARRAY_GUARD_LEN * 2 * sizeof (ne10_float32_t));
432  v_wa_c = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_c + ARRAY_GUARD_LEN);
433  v_wa_neon = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wa_neon + ARRAY_GUARD_LEN);
434  v_wb_c = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_c + ARRAY_GUARD_LEN);
435  v_wb_neon = (ne10_vec3f_t*) ( (ne10_float32_t*) guarded_v_wb_neon + ARRAY_GUARD_LEN);
436  float_array_assignment ( (ne10_float32_t *) v_wa_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
437  float_array_assignment ( (ne10_float32_t *) v_wb_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t) / sizeof (ne10_float32_t));
438  memcpy (v_wa_neon, v_wa_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
439  memcpy (v_wb_neon, v_wb_c, TEST_LENGTH_SAMPLES * sizeof (ne10_vec3f_t));
440 
441  for (count = 2; count < TEST_LENGTH_SAMPLES; count += 4)
442  {
443  //C version
444  GET_TIME
445  (time_c,
446  {
447  for (i = 0; i < TEST_COUNT; i++)
448  ne10_physics_apply_impulse_vec2f_c (v_wa_c, v_wb_c, ra, rb, ima, imb, p, count);
449  }
450  );
451 
452 #ifdef ENABLE_NE10_PHYSICS_APPLY_IMPULSE_VEC2F_NEON
453  //neon version
454  GET_TIME
455  (time_neon,
456  {
457  for (i = 0; i < TEST_COUNT; i++)
458  ne10_physics_apply_impulse_vec2f_neon (v_wa_neon, v_wb_neon, ra, rb, ima, imb, p, count);
459  }
460  );
461 #endif // ENABLE_NE10_PHYSICS_APPLY_IMPULSE_VEC2F_NEON
462  time_speedup = (ne10_float32_t) time_c / time_neon;
463  time_savings = ( ( (ne10_float32_t) (time_c - time_neon)) / time_c) * 100;
464  printf ("count: %10d time C: %10lld time NEON: %10lld\n", count, time_c, time_neon);
465  //ne10_log (__FUNCTION__, "Compute aabb%21d%20lld%20lld%19.2f%%%18.2f:1\n", count, time_c, time_neon, time_savings, time_speedup);
466 
467  }
468  free (ra);
469  free (rb);
470  free (ima);
471  free (imb);
472  free (p);
473  free (guarded_v_wa_c);
474  free (guarded_v_wa_neon);
475  free (guarded_v_wb_c);
476  free (guarded_v_wb_neon);
477 }
478 
479 void test_compute_aabb_vec2f()
480 {
481 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
482  test_compute_aabb_vec2f_conformance();
483 #endif
484 
485 #if defined (PERFORMANCE_TEST)
486  test_compute_aabb_vec2f_performance();
487 #endif
488 }
489 
490 void test_relative_v_vec2f()
491 {
492 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
493  test_relative_v_vec2f_conformance();
494 #endif
495 
496 #if defined (PERFORMANCE_TEST)
497  test_relative_v_vec2f_performance();
498 #endif
499 }
500 
501 void test_apply_impulse_vec2f()
502 {
503 #if defined (SMOKE_TEST)||(REGRESSION_TEST)
504  test_apply_impulse_vec2f_conformance();
505 #endif
506 
507 #if defined (PERFORMANCE_TEST)
508  test_apply_impulse_vec2f_performance();
509 #endif
510 }
511 
512 void my_test_setup (void)
513 {
514  //printf("------%-30s start\r\n", __FUNCTION__);
515 }
516 
517 void my_test_teardown (void)
518 {
519  //printf("--------end\r\n");
520 }
521 
522 void test_fixture_physics (void)
523 {
524  test_fixture_start(); // starts a fixture
525 
526  fixture_setup (my_test_setup);
527  fixture_teardown (my_test_teardown);
528 
529  run_test (test_compute_aabb_vec2f); // run tests
530  run_test (test_relative_v_vec2f);
531  run_test (test_apply_impulse_vec2f);
532 
533  test_fixture_end(); // ends a fixture
534 }
int32_t ne10_int32_t
Definition: NE10_types.h:76
A 2-tuple of ne10_float32_t values.
Definition: NE10_types.h:87
void my_test_setup(void)
float ne10_float32_t
Definition: NE10_types.h:80
#define TEST_COUNT
void ne10_physics_compute_aabb_vec2f_neon(ne10_mat2x2f_t *aabb, ne10_vec2f_t *vertices, ne10_mat2x2f_t *xf, ne10_vec2f_t *radius, ne10_uint32_t vertex_count)
Specific implementation of ne10_physics_compute_aabb_vec2f using NEON SIMD capabilities.
int64_t ne10_int64_t
Definition: NE10_types.h:78
void ne10_physics_apply_impulse_vec2f_c(ne10_vec3f_t *v_wa, ne10_vec3f_t *v_wb, ne10_vec2f_t *ra, ne10_vec2f_t *rb, ne10_vec2f_t *ima, ne10_vec2f_t *imb, ne10_vec2f_t *p, ne10_uint32_t count)
Specific implementation of ne10_physics_apply_impulse_vec2f using plain C.
Definition: NE10_physics.c:146
void ne10_physics_compute_aabb_vec2f_c(ne10_mat2x2f_t *aabb, ne10_vec2f_t *vertices, ne10_mat2x2f_t *xf, ne10_vec2f_t *radius, ne10_uint32_t vertex_count)
Specific implementation of ne10_physics_compute_aabb_vec2f using plain C.
Definition: NE10_physics.c:83
#define TEST_LENGTH_SAMPLES
void ne10_physics_relative_v_vec2f_c(ne10_vec2f_t *dv, ne10_vec3f_t *v_wa, ne10_vec2f_t *ra, ne10_vec3f_t *v_wb, ne10_vec2f_t *rb, ne10_uint32_t count)
Specific implementation of ne10_physics_relative_v_vec2f using plain C.
Definition: NE10_physics.c:112
void ne10_physics_relative_v_vec2f_neon(ne10_vec2f_t *dv, ne10_vec3f_t *v_wa, ne10_vec2f_t *ra, ne10_vec3f_t *v_wb, ne10_vec2f_t *rb, ne10_uint32_t count) asm("ne10_physics_relative_v_vec2f_neon")
Specific implementation of ne10_physics_relative_v_vec2f using NEON SIMD capabilities.
void my_test_teardown(void)
ne10_mat_row2f c2
Definition: NE10_types.h:127
A 3-tuple of ne10_float32_t values.
Definition: NE10_types.h:96
#define NE10_MALLOC
Definition: NE10_macros.h:53
ne10_float32_t r2
Definition: NE10_types.h:121
void ne10_physics_apply_impulse_vec2f_neon(ne10_vec3f_t *v_wa, ne10_vec3f_t *v_wb, ne10_vec2f_t *ra, ne10_vec2f_t *rb, ne10_vec2f_t *ima, ne10_vec2f_t *imb, ne10_vec2f_t *p, ne10_uint32_t count) asm("ne10_physics_apply_impulse_vec2f_neon")
Specific implementation of ne10_physics_apply_impulse_vec2f using NEON SIMD capabilities.
ne10_float32_t r1
Definition: NE10_types.h:120
ne10_mat_row2f c1
Definition: NE10_types.h:126