Project Ne10
An open, optimized software library for the ARM architecture.
Macros
factor.h File Reference

Go to the source code of this file.

Macros

#define FLOAT32_2x3(x1, y1, x2, y2, x3, y3)
 
#define NE10_CHECKPOINTER_DstSrcCst
 
#define NE10_CHECKPOINTER_DstSrc   NE10_CHECKPOINTER_DstSrcCst
 
#define NE10_CHECKPOINTER_3POINTER(arg1, arg2, arg3)
 
#define NE10_CHECKPOINTER_4POINTER(arg1, arg2, arg3, arg4)
 
#define NE10_CHECKPOINTER_DstAccSrcCst
 
#define NE10_CHECKPOINTER_DstCst   {}
 
#define NE10_CHECKPOINTER_DstSrc1Src2
 
#define NE10_CHECKPOINTER_DstAccSrc1Src2
 
#define NE10_DstSrcCst_MAINLOOP_FLOAT_NEON(loopCode)
 
#define NE10_DstSrcCst_SECONDLOOP_FLOAT_NEON(loopCode)
 
#define NE10_DstSrcCst_OPERATION_FLOAT_NEON(loopCode1, loopCode2)
 
#define NE10_DstSrcCst_MAINLOOP_VEC2F_NEON(loopCode)
 
#define NE10_DstSrcCst_SECONDLOOP_VEC2F_NEON(loopCode)
 
#define NE10_DstSrcCst_OPERATION_VEC2F_NEON(loopCode1, loopCode2)
 
#define NE10_DstSrcCst_MAINLOOP_VEC3F_NEON(loopCode)
 
#define NE10_DstSrcCst_SECONDLOOP_VEC3F_NEON(loopCode)
 
#define NE10_DstSrcCst_OPERATION_VEC3F_NEON(loopCode1, loopCode2)
 
#define NE10_DstSrcCst_MAINLOOP_VEC4F_NEON(loopCode)
 
#define NE10_DstSrcCst_OPERATION_VEC4F_NEON(loopCode)
 
#define NE10_DstAccSrcCst_MAINLOOP_FLOAT_NEON(loopCode)
 
#define NE10_DstAccSrcCst_SECONDLOOP_FLOAT_NEON(loopCode)
 
#define NE10_DstAccSrcCst_OPERATION_FLOAT_NEON   NE10_DstSrcCst_OPERATION_FLOAT_NEON
 
#define NE10_DstAccSrcCst_MAINLOOP_VEC2F_NEON(loopCode)
 
#define NE10_DstAccSrcCst_SECONDLOOP_VEC2F_NEON(loopCode)
 
#define NE10_DstAccSrcCst_OPERATION_VEC2F_NEON   NE10_DstSrcCst_OPERATION_VEC2F_NEON
 
#define NE10_DstAccSrcCst_MAINLOOP_VEC3F_NEON(loopCode)
 
#define NE10_DstAccSrcCst_SECONDLOOP_VEC3F_NEON(loopCode)
 
#define NE10_DstAccSrcCst_OPERATION_VEC3F_NEON   NE10_DstSrcCst_OPERATION_VEC3F_NEON
 
#define NE10_DstAccSrcCst_MAINLOOP_VEC4F_NEON(loopCode)
 
#define NE10_DstAccSrcCst_OPERATION_VEC4F_NEON   NE10_DstSrcCst_OPERATION_VEC4F_NEON
 
#define NE10_DstCst_MAINLOOP_FLOAT_NEON(loopCode)
 
#define NE10_DstCst_SECONDLOOP_FLOAT_NEON(loopCode)
 
#define NE10_DstCst_OPERATION_FLOAT_NEON(loopCode1, loopCode2)
 
#define NE10_DstCst_MAINLOOP_VEC2F_NEON(loopCode)
 
#define NE10_DstCst_SECONDLOOP_VEC2F_NEON(loopCode)
 
#define NE10_DstCst_OPERATION_VEC2F_NEON(loopCode1, loopCode2)
 
#define NE10_DstCst_MAINLOOP_VEC3F_NEON(loopCode)
 
#define NE10_DstCst_SECONDLOOP_VEC3F_NEON(loopCode)
 
#define NE10_DstCst_OPERATION_VEC3F_NEON(loopCode1, loopCode2)
 
#define NE10_DstCst_MAINLOOP_VEC4F_NEON(loopCode)
 
#define NE10_DstCst_OPERATION_VEC4F_NEON(loopCode)
 
#define NE10_DstSrc1Src2_MAINLOOP_FLOAT_NEON(loopCode)
 
#define NE10_DstSrc1Src2_SECONDLOOP_FLOAT_NEON(loopCode)
 
#define NE10_DstSrc1Src2_OPERATION_FLOAT_NEON   NE10_DstSrcCst_OPERATION_FLOAT_NEON
 
#define NE10_DstAccSrc1Src2_MAINLOOP_FLOAT_NEON(loopCode)
 
#define NE10_DstAccSrc1Src2_SECONDLOOP_FLOAT_NEON(loopCode)
 
#define NE10_DstAccSrc1Src2_OPERATION_FLOAT_NEON   NE10_DstAccSrcCst_OPERATION_FLOAT_NEON
 

Macro Definition Documentation

#define FLOAT32_2x3 (   x1,
  y1,
  x2,
  y2,
  x3,
  y3 
)
Value:
{{ \
{x1, y1}, {x2,y2}, {x3,y3} \
}}

Definition at line 33 of file factor.h.

#define NE10_CHECKPOINTER_3POINTER (   arg1,
  arg2,
  arg3 
)
Value:
if ( (void *)arg1 < (void *)arg2 ) \
{ assert ( (void *)arg1 + count <= (void *)arg2 ); } \
else if ( (void *)arg1 > (void *)arg2 ) \
{ assert ( (void *)arg2 + count <= (void *)arg1 ); } \
if ( (void *)arg1 < (void *)arg3 ) \
{ assert ( (void *)arg1 + count <= (void *)arg3 ); } \
else if ( (void *)arg1 > (void *)arg3 ) \
{ assert ( (void *)arg3 + count <= (void *)arg1 ); } \
if ( (void *)arg3 < (void *)arg2 ) \
{ assert ( (void *)arg3 + count <= (void *)arg2 ); } \
else if ( (void *)arg3 > (void *)arg2 ) \
{ assert ( (void *)arg2 + count <= (void *)arg3 ); }

Definition at line 58 of file factor.h.

#define NE10_CHECKPOINTER_4POINTER (   arg1,
  arg2,
  arg3,
  arg4 
)
Value:
NE10_CHECKPOINTER_3POINTER(arg1, arg2, arg3) \
if ( (void *)arg1 < (void *)arg4 ) \
{ assert ( (void *)arg1 + count <= (void *)arg4 ); } \
else if ( (void *)arg1 > (void *)arg4 ) \
{ assert ( (void *)arg4 + count <= (void *)arg1 ); } \
if ( (void *)arg2 < (void *)arg4 ) \
{ assert ( (void *)arg2 + count <= (void *)arg4 ); } \
else if ( (void *)arg2 > (void *)arg4 ) \
{ assert ( (void *)arg4 + count <= (void *)arg2 ); } \
if ( (void *)arg4 < (void *)arg3 ) \
{ assert ( (void *)arg4 + count <= (void *)arg3 ); } \
else if ( (void *)arg4 > (void *)arg3 ) \
{ assert ( (void *)arg3 + count <= (void *)arg4 ); }
#define NE10_CHECKPOINTER_3POINTER(arg1, arg2, arg3)
Definition: factor.h:58

Definition at line 72 of file factor.h.

#define NE10_CHECKPOINTER_DstAccSrc1Src2
Value:
{ \
NE10_CHECKPOINTER_4POINTER(dst, acc, src1, src2); }
#define NE10_CHECKPOINTER_4POINTER(arg1, arg2, arg3, arg4)
Definition: factor.h:72

Definition at line 97 of file factor.h.

#define NE10_CHECKPOINTER_DstAccSrcCst
Value:
{ \
NE10_CHECKPOINTER_3POINTER(dst, acc, src); }
#define NE10_CHECKPOINTER_3POINTER(arg1, arg2, arg3)
Definition: factor.h:58

Definition at line 89 of file factor.h.

#define NE10_CHECKPOINTER_DstCst   {}

Definition at line 92 of file factor.h.

#define NE10_CHECKPOINTER_DstSrc   NE10_CHECKPOINTER_DstSrcCst

Definition at line 56 of file factor.h.

#define NE10_CHECKPOINTER_DstSrc1Src2
Value:
{ \
NE10_CHECKPOINTER_3POINTER(dst, src1, src2); }
#define NE10_CHECKPOINTER_3POINTER(arg1, arg2, arg3)
Definition: factor.h:58

Definition at line 94 of file factor.h.

#define NE10_CHECKPOINTER_DstSrcCst
Value:
if ( (void *)dst < (void *)src ) \
{ assert ( (void *)dst + count <= (void *)src ); } \
else if ( (void *)dst > (void *)src ) \
{ assert ( (void *)src + count <= (void *)dst ); }

Definition at line 50 of file factor.h.

#define NE10_DstAccSrc1Src2_MAINLOOP_FLOAT_NEON (   loopCode)
Value:
{ \
/* load 4 values */ \
n_acc = vld1q_f32( (float32_t*)acc ); \
n_src = vld1q_f32( (float32_t*)src1 ); \
n_src2 = vld1q_f32( (float32_t*)src2 ); \
acc += 4; /* move to the next 4 float items; 4*float */ \
src1 += 4; \
src2 += 4; \
loopCode; /* the actual operation is placed here... */ /* The main loop iterates through four float values each time */ \
vst1q_f32 ( (float32_t*)dst , n_dst ); /* store theresults back */ \
dst += 4; /* move to the next items; 4*float */ \
}

Definition at line 549 of file factor.h.

#define NE10_DstAccSrc1Src2_OPERATION_FLOAT_NEON   NE10_DstAccSrcCst_OPERATION_FLOAT_NEON

Definition at line 578 of file factor.h.

#define NE10_DstAccSrc1Src2_SECONDLOOP_FLOAT_NEON (   loopCode)
Value:
{ \
float32x2_t n_rest_acc = { 0.0f , 0.0f }; /* temporary storage to be used with NEON load/store intrinsics */ \
float32x2_t n_rest = { 0.0f , 0.0f }; \
float32x2_t n_rest2 = { 0.0f, 0.0f }; \
n_rest_acc = vld1_lane_f32 ( (float32_t*)acc, n_rest_acc, 0); /* load into the first lane of d0 */ \
n_rest = vld1_lane_f32 ( (float32_t*)src1, n_rest, 0); /* load into the first lane of d1 */ \
n_rest2 = vld1_lane_f32 ( (float32_t*)src2, n_rest2, 0); /* load into the first lane of d2 */ \
loopCode; /* the actual operation is palced here ... */ /* exceptional cases where the count is not a multiple of 4 */ \
vst1_lane_f32( (float32_t*)dst, n_rest, 0); /* store the lane back into the memory */ \
/* move to the next item in the stream */ \
acc++; \
src1++; \
src2++; \
dst++; \
}

Definition at line 562 of file factor.h.

#define NE10_DstAccSrcCst_MAINLOOP_FLOAT_NEON (   loopCode)
Value:
{ \
/* load 4 values */ \
n_acc = vld1q_f32( (float32_t*)acc ); \
n_src = vld1q_f32( (float32_t*)src ); \
acc += 4; /* move to the next 4 float items; 4*float */ \
src += 4; \
loopCode; /* the actual operation is placed here... */ /* The main loop iterates through four float values each time */ \
vst1q_f32 ( (float32_t*)dst , n_dst ); /* store theresults back */ \
dst += 4; /* move to the next items; 4*float */ \
}

Definition at line 271 of file factor.h.

#define NE10_DstAccSrcCst_MAINLOOP_VEC2F_NEON (   loopCode)
Value:
{ \
n_acc = vld1q_f32( (float32_t*)acc ); /* load two vectors */ \
n_src = vld1q_f32( (float32_t*)src ); /* load two vectors */ \
acc += 2; /* move to the next two vectors */ \
src += 2; \
loopCode; /* actual operation */ /* The main loop iterates through two 2D vectors each time */ \
vst1q_f32 ( (float32_t*)dst , n_dst ); /* store back */ \
dst += 2; /* move to the next 2 vectors */ \
}

Definition at line 300 of file factor.h.

#define NE10_DstAccSrcCst_MAINLOOP_VEC3F_NEON (   loopCode)
Value:
{ \
n_acc1 = vld1q_f32( (float32_t*)acc ); /* Load accumulator values */ \
acc = ((void*)acc)+(4*sizeof(ne10_float32_t)); \
n_acc2 = vld1q_f32( (float32_t*)acc ); \
acc = ((void*)acc)+(4*sizeof(ne10_float32_t)); \
n_acc3 = vld1q_f32( (float32_t*)acc ); \
acc = ((void*)acc)+(4*sizeof(ne10_float32_t)); \
n_src1 = vld1q_f32( (float32_t*)src ); /* Load source values */ \
src = ((void*)src)+(4*sizeof(ne10_float32_t)); \
n_src2 = vld1q_f32( (float32_t*)src ); \
src = ((void*)src)+(4*sizeof(ne10_float32_t)); \
n_src3 = vld1q_f32( (float32_t*)src ); \
src = ((void*)src)+(4*sizeof(ne10_float32_t)); \
loopCode; /* The main loop iterates through three 3D vectors each time */ \
vst1q_f32 ( (float32_t*)dst , n_dst1 ); /* Store the results back into the memory */ \
dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
vst1q_f32 ( (float32_t*)dst , n_dst2 ); \
dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
vst1q_f32 ( (float32_t*)dst , n_dst3 ); \
dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
}
float ne10_float32_t
Definition: NE10_types.h:80

Definition at line 324 of file factor.h.

#define NE10_DstAccSrcCst_MAINLOOP_VEC4F_NEON (   loopCode)
Value:
{ \
n_acc = vld1q_f32( (float32_t*)acc ); \
n_src = vld1q_f32( (float32_t*)src ); \
acc ++; \
src ++; \
loopCode; \
vst1q_f32 ( (float32_t*)dst , n_dst ); /* The main loop iterates through one 4D vector each time */ \
dst ++; \
}

Definition at line 373 of file factor.h.

#define NE10_DstAccSrcCst_OPERATION_FLOAT_NEON   NE10_DstSrcCst_OPERATION_FLOAT_NEON

Definition at line 296 of file factor.h.

#define NE10_DstAccSrcCst_OPERATION_VEC2F_NEON   NE10_DstSrcCst_OPERATION_VEC2F_NEON

Definition at line 320 of file factor.h.

#define NE10_DstAccSrcCst_OPERATION_VEC3F_NEON   NE10_DstSrcCst_OPERATION_VEC3F_NEON

Definition at line 369 of file factor.h.

#define NE10_DstAccSrcCst_OPERATION_VEC4F_NEON   NE10_DstSrcCst_OPERATION_VEC4F_NEON

Definition at line 383 of file factor.h.

#define NE10_DstAccSrcCst_SECONDLOOP_FLOAT_NEON (   loopCode)
Value:
{ \
float32x2_t n_rest_acc = { 0.0f , 0.0f }; /* temporary storage to be used with NEON load/store intrinsics */ \
float32x2_t n_rest = { 0.0f , 0.0f }; /* temporary storage to be used with NEON load/store intrinsics */ \
float32x2_t n_rest_cst = { cst, cst }; /* temporary constant value for use in the main NEON operation */ \
n_rest_acc = vld1_lane_f32 ( (float32_t*)acc, n_rest_acc, 0); /* load into the first lane of d0 */ \
n_rest = vld1_lane_f32 ( (float32_t*)src, n_rest, 0); /* load into the first lane of d1 */ \
loopCode; /* the actual operation is palced here ... */ /* exceptional cases where the count is not a multiple of 4 */ \
vst1_lane_f32( (float32_t*)dst, n_rest, 0); /* store the lane back into the memory */ \
/* move to the next item in the stream */ \
acc++; \
src++; \
dst++; \
}

Definition at line 282 of file factor.h.

#define NE10_DstAccSrcCst_SECONDLOOP_VEC2F_NEON (   loopCode)
Value:
{ \
float32x2_t n_rest_acc; \
float32x2_t n_rest; \
float32x2_t n_rest_cst = { cst->x, cst->y }; \
n_rest_acc = vld1_f32( (float32_t*)acc ); \
n_rest = vld1_f32( (float32_t*)src ); \
loopCode; /* exceptional cases where the count isn't a multiple of 2 */ \
vst1_f32( (float32_t*)dst, n_rest); \
}

Definition at line 310 of file factor.h.

#define NE10_DstAccSrcCst_SECONDLOOP_VEC3F_NEON (   loopCode)
Value:
{ \
float32x2x3_t n_rest_acc = FLOAT32_2x3( \
0.0f, 0.0f, \
0.0f, 0.0f, \
0.0f, 0.0f \
); \
float32x2x3_t n_rest = FLOAT32_2x3( \
0.0f, 0.0f, \
0.0f, 0.0f, \
0.0f, 0.0f \
); \
float32x2x3_t n_rest_cst = { (const float32x2_t){cst->x, 0}, \
(const float32x2_t){cst->y, 0}, \
(const float32x2_t){cst->z, 0} }; \
n_rest_acc = vld3_lane_f32 ( (float32_t*)acc, n_rest_acc, 0); \
n_rest = vld3_lane_f32 ( (float32_t*)src, n_rest, 0); \
loopCode; /* exceptional cases where the count isn't a multiple of 3 */ \
vst3_lane_f32( (float32_t*)dst, n_rest, 0); \
acc++; \
src++; \
dst++; \
}
#define FLOAT32_2x3(x1, y1, x2, y2, x3, y3)
Definition: factor.h:33

Definition at line 346 of file factor.h.

#define NE10_DstCst_MAINLOOP_FLOAT_NEON (   loopCode)
Value:
{ \
/* load 4 values */ \
loopCode; /* the actual operation is placed here... */ /* The main loop iterates through four float values each time */ \
vst1q_f32 ( (float32_t*)dst , n_cst ); /* store theresults back */ \
dst += 4; /* move to the next items; 4*float */ \
}

Definition at line 393 of file factor.h.

#define NE10_DstCst_MAINLOOP_VEC2F_NEON (   loopCode)
Value:
{ \
loopCode; /* actual operation */ /* The main loop iterates through two 2D vectors each time */ \
vst1q_f32 ( (float32_t*)dst , n_cst ); /* store back */ \
dst += 2; /* move to the next 2 vectors */ \
}

Definition at line 427 of file factor.h.

#define NE10_DstCst_MAINLOOP_VEC3F_NEON (   loopCode)
Value:
{ \
loopCode; /* The main loop iterates through three 3D vectors each time */ \
vst1q_f32 ( (float32_t*)dst , n_cst1 ); \
dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
vst1q_f32 ( (float32_t*)dst , n_cst2 ); \
dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
vst1q_f32 ( (float32_t*)dst , n_cst3 ); \
dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
}
float ne10_float32_t
Definition: NE10_types.h:80

Definition at line 454 of file factor.h.

#define NE10_DstCst_MAINLOOP_VEC4F_NEON (   loopCode)
Value:
{ \
loopCode; \
vst1q_f32 ( (float32_t*)dst , n_cst ); /* The main loop iterates through one 4D vector each time */ \
dst ++; \
}

Definition at line 492 of file factor.h.

#define NE10_DstCst_OPERATION_FLOAT_NEON (   loopCode1,
  loopCode2 
)
Value:
{ \
int dif = 0; \
dif = count % 4; /* either 0 or one of 1,2,3; in the latter cases the second path is taken */ \
for (; count > dif; count -= 4) { \
loopCode1; \
} \
if ( 0 != dif ) { \
unsigned int idx; \
for ( idx = 0 ; idx < dif; idx++ ) { \
loopCode2; \
} \
} \
return res; \
}
#define NE10_OK
Definition: NE10_types.h:65
int ne10_result_t
Definition: NE10_types.h:82

Definition at line 408 of file factor.h.

#define NE10_DstCst_OPERATION_VEC2F_NEON (   loopCode1,
  loopCode2 
)
Value:
{ \
float32x4_t n_cst = { cst->x, cst->y, cst->x, cst->y }; \
int dif = count % 2; \
for (; count > dif; count -= 2) { \
loopCode1; \
} \
if ( 0 != dif ) { \
loopCode2; \
} \
return res; \
}
#define NE10_OK
Definition: NE10_types.h:65
int ne10_result_t
Definition: NE10_types.h:82

Definition at line 439 of file factor.h.

#define NE10_DstCst_OPERATION_VEC3F_NEON (   loopCode1,
  loopCode2 
)
Value:
{ \
float32x4_t n_cst1 = { cst->x, cst->y, cst->z, cst->x }; \
float32x4_t n_cst2 = { cst->y, cst->z, cst->x, cst->y }; \
float32x4_t n_cst3 = { cst->z, cst->x, cst->y, cst->z }; \
int dif = count % 4; \
for (; count > dif; count -= 4) { \
loopCode1; \
} \
if ( 0 != dif ) { \
unsigned int idx; \
for ( idx = 0 ; idx < dif; idx++ ) { \
loopCode2; \
} \
} \
return res; \
}
#define NE10_OK
Definition: NE10_types.h:65
int ne10_result_t
Definition: NE10_types.h:82

Definition at line 472 of file factor.h.

#define NE10_DstCst_OPERATION_VEC4F_NEON (   loopCode)
Value:
{ \
float32x4_t n_cst = { cst->x, cst->y, cst->z, cst->w }; \
for (; count != 0; count --) { \
loopCode; \
} \
return res; \
}
#define NE10_OK
Definition: NE10_types.h:65
int ne10_result_t
Definition: NE10_types.h:82

Definition at line 498 of file factor.h.

#define NE10_DstCst_SECONDLOOP_FLOAT_NEON (   loopCode)
Value:
{ \
float32x2_t n_rest_cst = { cst, cst }; /* temporary constant value for use in the main NEON operation */ \
loopCode; /* the actual operation is palced here ... */ /* exceptional cases where the count is not a multiple of 4 */ \
vst1_lane_f32( (float32_t*)dst, n_rest_cst, 0); /* store the lane back into the memory */ \
/* move to the next item in the stream */ \
dst++; \
}

Definition at line 400 of file factor.h.

#define NE10_DstCst_SECONDLOOP_VEC2F_NEON (   loopCode)
Value:
{ \
float32x2_t n_rest_cst = { cst->x, cst->y }; \
loopCode; /* exceptional cases where the count isn't a multiple of 2 */ \
vst1_f32( (float32_t*)dst, n_rest_cst); \
}

Definition at line 433 of file factor.h.

#define NE10_DstCst_SECONDLOOP_VEC3F_NEON (   loopCode)
Value:
{ \
float32x2x3_t n_rest_cst = { (const float32x2_t){cst->x, 0}, \
(const float32x2_t){cst->y, 0}, (const float32x2_t){cst->z, 0} }; \
loopCode; /* exceptional cases where the count isn't a multiple of 3 */ \
vst3_lane_f32( (float32_t*)dst, n_rest_cst, 0); \
dst++; \
}

Definition at line 464 of file factor.h.

#define NE10_DstSrc1Src2_MAINLOOP_FLOAT_NEON (   loopCode)
Value:
{ \
/* load 4 values */ \
n_src = vld1q_f32( (float32_t*)src1 ); \
src1 += 4; /* move to the next 4 float items; 4*float */ \
n_src2 = vld1q_f32( (float32_t*)src2 ); \
src2 += 4; /* move to the next 4 float items; 4*float */ \
loopCode; /* the actual operation is placed here... */ /* The main loop iterates through four float values each time */ \
vst1q_f32 ( (float32_t*)dst , n_dst ); /* store the results back */ \
dst += 4; /* move to the next items; 4*float */ \
}

Definition at line 515 of file factor.h.

#define NE10_DstSrc1Src2_OPERATION_FLOAT_NEON   NE10_DstSrcCst_OPERATION_FLOAT_NEON

Definition at line 539 of file factor.h.

#define NE10_DstSrc1Src2_SECONDLOOP_FLOAT_NEON (   loopCode)
Value:
{ \
float32x2_t n_rest = { 0.0f , 0.0f }; /* temporary storage to be used with NEON load/store intrinsics */ \
float32x2_t n_rest2 = { 0.0f , 0.0f }; \
n_rest = vld1_lane_f32 ( (float32_t*)src1, n_rest, 0); /* load into the first lane of d0 */ \
n_rest2 = vld1_lane_f32 ( (float32_t*)src2, n_rest, 0); \
loopCode; /* the actual operation is placed here ... */ /* exceptional cases where the count is not a multiple of 4 */ \
vst1_lane_f32( (float32_t*)dst, n_rest, 0); /* store the lane back into the memory */ \
/* move to the next item in the stream */ \
src1++; \
src2++; \
dst++; \
}

Definition at line 526 of file factor.h.

#define NE10_DstSrcCst_MAINLOOP_FLOAT_NEON (   loopCode)
Value:
{ \
/* load 4 values */ \
n_src = vld1q_f32( (float32_t*)src ); \
src += 4; /* move to the next 4 float items; 4*float */ \
loopCode; /* the actual operation is placed here... */ /* The main loop iterates through four float values each time */ \
vst1q_f32 ( (float32_t*)dst , n_dst ); /* store the results back */ \
dst += 4; /* move to the next items; 4*float */ \
}

Definition at line 118 of file factor.h.

#define NE10_DstSrcCst_MAINLOOP_VEC2F_NEON (   loopCode)
Value:
{ \
n_src = vld1q_f32( (float32_t*)src ); /* load two vectors */ \
src += 2; /* move to the next two vectors */ \
loopCode; /* actual operation */ /* The main loop iterates through two 2D vectors each time */ \
vst1q_f32 ( (float32_t*)dst , n_dst ); /* store back */ \
dst += 2; /* move to the next 2 vectors */ \
}

Definition at line 158 of file factor.h.

#define NE10_DstSrcCst_MAINLOOP_VEC3F_NEON (   loopCode)
Value:
{ \
n_src1 = vld1q_f32( (float32_t*)src ); \
src = ((void*)src)+(4*sizeof(ne10_float32_t)); \
n_src2 = vld1q_f32( (float32_t*)src ); \
src = ((void*)src)+(4*sizeof(ne10_float32_t)); \
n_src3 = vld1q_f32( (float32_t*)src ); \
src = ((void*)src)+(4*sizeof(ne10_float32_t)); \
loopCode; /* The main loop iterates through three 3D vectors each time */ \
vst1q_f32 ( (float32_t*)dst , n_dst1 ); \
dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
vst1q_f32 ( (float32_t*)dst , n_dst2 ); \
dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
vst1q_f32 ( (float32_t*)dst , n_dst3 ); \
dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
}
float ne10_float32_t
Definition: NE10_types.h:80

Definition at line 191 of file factor.h.

#define NE10_DstSrcCst_MAINLOOP_VEC4F_NEON (   loopCode)
Value:
{ \
n_src = vld1q_f32( (float32_t*)src ); \
src ++; \
loopCode; \
vst1q_f32 ( (float32_t*)dst , n_dst ); /* The main loop iterates through one 4D vector each time */ \
dst ++; \
}

Definition at line 244 of file factor.h.

#define NE10_DstSrcCst_OPERATION_FLOAT_NEON (   loopCode1,
  loopCode2 
)
Value:
{ \
float32x4_t n_src; \
float32x4_t n_dst; \
int dif = 0; \
dif = count % 4; /* either 0 or one of 1,2,3; in the latter cases the second path is taken */ \
for (; count > dif; count -= 4) { \
loopCode1; \
} \
if ( 0 != dif ) { \
unsigned int idx; \
for ( idx = 0 ; idx < dif; idx++ ) { \
loopCode2; \
} \
} \
return res; \
}
#define NE10_OK
Definition: NE10_types.h:65
int ne10_result_t
Definition: NE10_types.h:82

Definition at line 138 of file factor.h.

#define NE10_DstSrcCst_OPERATION_VEC2F_NEON (   loopCode1,
  loopCode2 
)
Value:
{ \
float32x4_t n_cst = { cst->x, cst->y, cst->x, cst->y }; \
float32x4_t n_src; \
float32x4_t n_dst; \
int dif = count % 2; \
for (; count > dif; count -= 2) { \
loopCode1; \
} \
if ( 0 != dif ) { \
loopCode2; \
} \
return res; \
}
#define NE10_OK
Definition: NE10_types.h:65
int ne10_result_t
Definition: NE10_types.h:82

Definition at line 174 of file factor.h.

#define NE10_DstSrcCst_OPERATION_VEC3F_NEON (   loopCode1,
  loopCode2 
)
Value:
{ \
float32x4_t n_cst1 = { cst->x, cst->y, cst->z, cst->x }; \
float32x4_t n_cst2 = { cst->y, cst->z, cst->x, cst->y }; \
float32x4_t n_cst3 = { cst->z, cst->x, cst->y, cst->z }; \
float32x4_t n_src1, n_src2, n_src3; \
float32x4_t n_dst1, n_dst2, n_dst3; \
int dif = count % 4; \
for (; count > dif; count -= 4) { \
loopCode1; \
} \
if ( 0 != dif ) { \
unsigned int idx; \
for ( idx = 0 ; idx < dif; idx++ ) { \
loopCode2; \
} \
} \
return res; \
}
#define NE10_OK
Definition: NE10_types.h:65
int ne10_result_t
Definition: NE10_types.h:82

Definition at line 219 of file factor.h.

#define NE10_DstSrcCst_OPERATION_VEC4F_NEON (   loopCode)
Value:
{ \
float32x4_t n_cst = { cst->x, cst->y, cst->z, cst->w }; \
float32x4_t n_src; \
float32x4_t n_dst; \
for (; count != 0; count --) { \
loopCode; \
} \
return res; \
}
#define NE10_OK
Definition: NE10_types.h:65
int ne10_result_t
Definition: NE10_types.h:82

Definition at line 252 of file factor.h.

#define NE10_DstSrcCst_SECONDLOOP_FLOAT_NEON (   loopCode)
Value:
{ \
float32x2_t n_rest = { 0.0f , 0.0f }; /* temporary storage to be used with NEON load/store intrinsics */ \
float32x2_t n_rest_cst = { cst, cst }; /* temporary constant value for use in the main NEON operation */ \
n_rest = vld1_lane_f32 ( (float32_t*)src, n_rest, 0); /* load into the first lane of d0 */ \
loopCode; /* the actual operation is placed here ... */ /* exceptional cases where the count is not a multiple of 4 */ \
vst1_lane_f32( (float32_t*)dst, n_rest, 0); /* store the lane back into the memory */ \
/* move to the next item in the stream */ \
src++; \
dst++; \
}

Definition at line 127 of file factor.h.

#define NE10_DstSrcCst_SECONDLOOP_VEC2F_NEON (   loopCode)
Value:
{ \
float32x2_t n_rest; \
float32x2_t n_rest_cst = { cst->x, cst->y }; \
n_rest = vld1_f32( (float32_t*)src ); \
loopCode; /* exceptional cases where the count isn't a multiple of 2 */ \
vst1_f32( (float32_t*)dst, n_rest); \
}

Definition at line 166 of file factor.h.

#define NE10_DstSrcCst_SECONDLOOP_VEC3F_NEON (   loopCode)
Value:
{ \
float32x2x3_t n_rest = FLOAT32_2x3( \
0.0f, 0.0f, 0.0f , 0.0f, 0.0f , 0.0f); \
float32x2x3_t n_rest_cst = { (const float32x2_t){cst->x, 0}, \
(const float32x2_t){cst->y, 0}, (const float32x2_t){cst->z, 0} }; \
n_rest = vld3_lane_f32 ( (float32_t*)src, n_rest, 0); \
loopCode; /* exceptional cases where the count isn't a multiple of 3 */ \
vst3_lane_f32( (float32_t*)dst, n_rest, 0); \
src++; \
dst++; \
}
#define FLOAT32_2x3(x1, y1, x2, y2, x3, y3)
Definition: factor.h:33

Definition at line 207 of file factor.h.