Macros
#define	FLOAT32_2x3(x1, y1, x2, y2, x3, y3)

#define	NE10_CHECKPOINTER_DstSrcCst

#define	NE10_CHECKPOINTER_DstSrc NE10_CHECKPOINTER_DstSrcCst

#define	NE10_CHECKPOINTER_3POINTER(arg1, arg2, arg3)

#define	NE10_CHECKPOINTER_4POINTER(arg1, arg2, arg3, arg4)

#define	NE10_CHECKPOINTER_DstAccSrcCst

#define	NE10_CHECKPOINTER_DstCst {}

#define	NE10_CHECKPOINTER_DstSrc1Src2

#define	NE10_CHECKPOINTER_DstAccSrc1Src2

#define	NE10_DstSrcCst_MAINLOOP_FLOAT_NEON(loopCode)

#define	NE10_DstSrcCst_SECONDLOOP_FLOAT_NEON(loopCode)

#define	NE10_DstSrcCst_OPERATION_FLOAT_NEON(loopCode1, loopCode2)

#define	NE10_DstSrcCst_MAINLOOP_VEC2F_NEON(loopCode)

#define	NE10_DstSrcCst_SECONDLOOP_VEC2F_NEON(loopCode)

#define	NE10_DstSrcCst_OPERATION_VEC2F_NEON(loopCode1, loopCode2)

#define	NE10_DstSrcCst_MAINLOOP_VEC3F_NEON(loopCode)

#define	NE10_DstSrcCst_SECONDLOOP_VEC3F_NEON(loopCode)

#define	NE10_DstSrcCst_OPERATION_VEC3F_NEON(loopCode1, loopCode2)

#define	NE10_DstSrcCst_MAINLOOP_VEC4F_NEON(loopCode)

#define	NE10_DstSrcCst_OPERATION_VEC4F_NEON(loopCode)

#define	NE10_DstAccSrcCst_MAINLOOP_FLOAT_NEON(loopCode)

#define	NE10_DstAccSrcCst_SECONDLOOP_FLOAT_NEON(loopCode)

#define	NE10_DstAccSrcCst_OPERATION_FLOAT_NEON NE10_DstSrcCst_OPERATION_FLOAT_NEON

#define	NE10_DstAccSrcCst_MAINLOOP_VEC2F_NEON(loopCode)

#define	NE10_DstAccSrcCst_SECONDLOOP_VEC2F_NEON(loopCode)

#define	NE10_DstAccSrcCst_OPERATION_VEC2F_NEON NE10_DstSrcCst_OPERATION_VEC2F_NEON

#define	NE10_DstAccSrcCst_MAINLOOP_VEC3F_NEON(loopCode)

#define	NE10_DstAccSrcCst_SECONDLOOP_VEC3F_NEON(loopCode)

#define	NE10_DstAccSrcCst_OPERATION_VEC3F_NEON NE10_DstSrcCst_OPERATION_VEC3F_NEON

#define	NE10_DstAccSrcCst_MAINLOOP_VEC4F_NEON(loopCode)

#define	NE10_DstAccSrcCst_OPERATION_VEC4F_NEON NE10_DstSrcCst_OPERATION_VEC4F_NEON

#define	NE10_DstCst_MAINLOOP_FLOAT_NEON(loopCode)

#define	NE10_DstCst_SECONDLOOP_FLOAT_NEON(loopCode)

#define	NE10_DstCst_OPERATION_FLOAT_NEON(loopCode1, loopCode2)

#define	NE10_DstCst_MAINLOOP_VEC2F_NEON(loopCode)

#define	NE10_DstCst_SECONDLOOP_VEC2F_NEON(loopCode)

#define	NE10_DstCst_OPERATION_VEC2F_NEON(loopCode1, loopCode2)

#define	NE10_DstCst_MAINLOOP_VEC3F_NEON(loopCode)

#define	NE10_DstCst_SECONDLOOP_VEC3F_NEON(loopCode)

#define	NE10_DstCst_OPERATION_VEC3F_NEON(loopCode1, loopCode2)

#define	NE10_DstCst_MAINLOOP_VEC4F_NEON(loopCode)

#define	NE10_DstCst_OPERATION_VEC4F_NEON(loopCode)

#define	NE10_DstSrc1Src2_MAINLOOP_FLOAT_NEON(loopCode)

#define	NE10_DstSrc1Src2_SECONDLOOP_FLOAT_NEON(loopCode)

#define	NE10_DstSrc1Src2_OPERATION_FLOAT_NEON NE10_DstSrcCst_OPERATION_FLOAT_NEON

#define	NE10_DstAccSrc1Src2_MAINLOOP_FLOAT_NEON(loopCode)

#define	NE10_DstAccSrc1Src2_SECONDLOOP_FLOAT_NEON(loopCode)

#define	NE10_DstAccSrc1Src2_OPERATION_FLOAT_NEON NE10_DstAccSrcCst_OPERATION_FLOAT_NEON

Macro Definition Documentation

#define FLOAT32_2x3	(	x1,
		y1,
		x2,
		y2,
		x3,
		y3
	)

Value:

{{ \
        {x1, y1}, {x2,y2}, {x3,y3} \
    }}

Definition at line 33 of file factor.h.

#define NE10_CHECKPOINTER_3POINTER	(	arg1,
		arg2,
		arg3
	)

Value:

if ( (void *)arg1 < (void *)arg2 ) \
    { assert ( (void *)arg1 + count <= (void *)arg2 ); } \
   else if ( (void *)arg1 > (void *)arg2 ) \
    { assert ( (void *)arg2 + count <= (void *)arg1 ); } \
   if ( (void *)arg1 < (void *)arg3 ) \
    { assert ( (void *)arg1 + count <= (void *)arg3 ); } \
   else if ( (void *)arg1 > (void *)arg3 ) \
    { assert ( (void *)arg3 + count <= (void *)arg1 ); } \
   if ( (void *)arg3 < (void *)arg2 ) \
    { assert ( (void *)arg3 + count <= (void *)arg2 ); } \
   else if ( (void *)arg3 > (void *)arg2 ) \
    { assert ( (void *)arg2 + count <= (void *)arg3 ); }

Definition at line 58 of file factor.h.

#define NE10_CHECKPOINTER_4POINTER	(	arg1,
		arg2,
		arg3,
		arg4
	)

Value:

NE10_CHECKPOINTER_3POINTER(arg1, arg2, arg3) \
   if ( (void *)arg1 < (void *)arg4 ) \
    { assert ( (void *)arg1 + count <= (void *)arg4 ); } \
   else if ( (void *)arg1 > (void *)arg4 ) \
    { assert ( (void *)arg4 + count <= (void *)arg1 ); } \
   if ( (void *)arg2 < (void *)arg4 ) \
    { assert ( (void *)arg2 + count <= (void *)arg4 ); } \
   else if ( (void *)arg2 > (void *)arg4 ) \
    { assert ( (void *)arg4 + count <= (void *)arg2 ); } \
   if ( (void *)arg4 < (void *)arg3 ) \
    { assert ( (void *)arg4 + count <= (void *)arg3 ); } \
   else if ( (void *)arg4 > (void *)arg3 ) \
    { assert ( (void *)arg3 + count <= (void *)arg4 ); }

Definition at line 72 of file factor.h.

#define NE10_CHECKPOINTER_DstAccSrc1Src2

Value:

{ \

NE10_CHECKPOINTER_4POINTER(dst, acc, src1, src2); }

NE10_CHECKPOINTER_4POINTER

#define NE10_CHECKPOINTER_4POINTER(arg1, arg2, arg3, arg4)

Definition: factor.h:72

Definition at line 97 of file factor.h.

#define NE10_CHECKPOINTER_DstAccSrcCst

Value:

{ \

NE10_CHECKPOINTER_3POINTER(dst, acc, src); }

NE10_CHECKPOINTER_3POINTER

#define NE10_CHECKPOINTER_3POINTER(arg1, arg2, arg3)

Definition: factor.h:58

Definition at line 89 of file factor.h.

#define NE10_CHECKPOINTER_DstCst {}

Definition at line 92 of file factor.h.

#define NE10_CHECKPOINTER_DstSrc NE10_CHECKPOINTER_DstSrcCst

Definition at line 56 of file factor.h.

#define NE10_CHECKPOINTER_DstSrc1Src2

Value:

{ \

NE10_CHECKPOINTER_3POINTER(dst, src1, src2); }

NE10_CHECKPOINTER_3POINTER

#define NE10_CHECKPOINTER_3POINTER(arg1, arg2, arg3)

Definition: factor.h:58

Definition at line 94 of file factor.h.

#define NE10_CHECKPOINTER_DstSrcCst

Value:

if ( (void *)dst < (void *)src ) \
    { assert ( (void *)dst + count <= (void *)src ); } \
   else if ( (void *)dst > (void *)src ) \
    { assert ( (void *)src + count <= (void *)dst ); }

Definition at line 50 of file factor.h.

#define NE10_DstAccSrc1Src2_MAINLOOP_FLOAT_NEON ( loopCode )

Value:

{ \
     /* load 4 values  */ \
     n_acc = vld1q_f32( (float32_t*)acc ); \
     n_src = vld1q_f32( (float32_t*)src1 ); \
     n_src2 = vld1q_f32( (float32_t*)src2 ); \
     acc += 4; /* move to the next 4 float items; 4*float */ \
     src1 += 4; \
     src2 += 4; \
     loopCode; /* the actual operation is placed here... */ /* The main loop iterates through four float values each time */ \
     vst1q_f32 ( (float32_t*)dst , n_dst ); /* store theresults back */ \
     dst += 4; /* move to the next items; 4*float */ \
    }

Definition at line 549 of file factor.h.

#define NE10_DstAccSrc1Src2_OPERATION_FLOAT_NEON NE10_DstAccSrcCst_OPERATION_FLOAT_NEON

Definition at line 578 of file factor.h.

#define NE10_DstAccSrc1Src2_SECONDLOOP_FLOAT_NEON ( loopCode )

Value:

{ \
      float32x2_t n_rest_acc = { 0.0f , 0.0f }; /* temporary storage to be used with NEON load/store intrinsics */ \
      float32x2_t n_rest = { 0.0f , 0.0f }; \
      float32x2_t n_rest2 = { 0.0f, 0.0f }; \
      n_rest_acc = vld1_lane_f32 ( (float32_t*)acc, n_rest_acc, 0); /* load into the first lane of d0 */ \
      n_rest = vld1_lane_f32 ( (float32_t*)src1, n_rest, 0); /* load into the first lane of d1 */ \
      n_rest2 = vld1_lane_f32 ( (float32_t*)src2, n_rest2, 0); /* load into the first lane of d2 */ \
      loopCode; /* the actual operation is palced here ... */ /* exceptional cases where the count is not a multiple of 4 */ \
      vst1_lane_f32( (float32_t*)dst, n_rest, 0); /* store the lane back into the memory */ \
      /* move to the next item in the stream */ \
      acc++; \
      src1++; \
      src2++; \
      dst++; \
     }

Definition at line 562 of file factor.h.

#define NE10_DstAccSrcCst_MAINLOOP_FLOAT_NEON ( loopCode )

Value:

{ \
     /* load 4 values  */ \
     n_acc = vld1q_f32( (float32_t*)acc ); \
     n_src = vld1q_f32( (float32_t*)src ); \
     acc += 4; /* move to the next 4 float items; 4*float */ \
     src += 4; \
     loopCode; /* the actual operation is placed here... */ /* The main loop iterates through four float values each time */ \
     vst1q_f32 ( (float32_t*)dst , n_dst ); /* store theresults back */ \
     dst += 4; /* move to the next items; 4*float */ \
    }

Definition at line 271 of file factor.h.

#define NE10_DstAccSrcCst_MAINLOOP_VEC2F_NEON ( loopCode )

Value:

{ \
     n_acc = vld1q_f32( (float32_t*)acc ); /* load two vectors */ \
     n_src = vld1q_f32( (float32_t*)src ); /* load two vectors */ \
     acc += 2; /* move to the next two vectors */ \
     src += 2; \
     loopCode; /* actual operation */ /* The main loop iterates through two 2D vectors each time */ \
     vst1q_f32 ( (float32_t*)dst , n_dst ); /* store back */ \
     dst += 2; /* move to the next 2 vectors */ \
    }

Definition at line 300 of file factor.h.

#define NE10_DstAccSrcCst_MAINLOOP_VEC3F_NEON ( loopCode )

Value:

{ \
     n_acc1 = vld1q_f32( (float32_t*)acc ); /* Load accumulator values */ \
     acc = ((void*)acc)+(4*sizeof(ne10_float32_t)); \
     n_acc2 = vld1q_f32( (float32_t*)acc ); \
     acc = ((void*)acc)+(4*sizeof(ne10_float32_t)); \
     n_acc3 = vld1q_f32( (float32_t*)acc ); \
     acc = ((void*)acc)+(4*sizeof(ne10_float32_t)); \
     n_src1 = vld1q_f32( (float32_t*)src ); /* Load source values */ \
     src = ((void*)src)+(4*sizeof(ne10_float32_t)); \
     n_src2 = vld1q_f32( (float32_t*)src ); \
     src = ((void*)src)+(4*sizeof(ne10_float32_t)); \
     n_src3 = vld1q_f32( (float32_t*)src ); \
     src = ((void*)src)+(4*sizeof(ne10_float32_t)); \
     loopCode; /* The main loop iterates through three 3D vectors each time */ \
     vst1q_f32 ( (float32_t*)dst , n_dst1 ); /* Store the results back into the memory */ \
     dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
     vst1q_f32 ( (float32_t*)dst , n_dst2 ); \
     dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
     vst1q_f32 ( (float32_t*)dst , n_dst3 ); \
     dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
  }

Definition at line 324 of file factor.h.

#define NE10_DstAccSrcCst_MAINLOOP_VEC4F_NEON ( loopCode )

Value:

{ \
     n_acc = vld1q_f32( (float32_t*)acc ); \
     n_src = vld1q_f32( (float32_t*)src ); \
     acc ++; \
     src ++; \
     loopCode; \
     vst1q_f32 ( (float32_t*)dst , n_dst );  /* The main loop iterates through one 4D vector each time */ \
     dst ++; \
  }

Definition at line 373 of file factor.h.

#define NE10_DstAccSrcCst_OPERATION_FLOAT_NEON NE10_DstSrcCst_OPERATION_FLOAT_NEON

Definition at line 296 of file factor.h.

#define NE10_DstAccSrcCst_OPERATION_VEC2F_NEON NE10_DstSrcCst_OPERATION_VEC2F_NEON

Definition at line 320 of file factor.h.

#define NE10_DstAccSrcCst_OPERATION_VEC3F_NEON NE10_DstSrcCst_OPERATION_VEC3F_NEON

Definition at line 369 of file factor.h.

#define NE10_DstAccSrcCst_OPERATION_VEC4F_NEON NE10_DstSrcCst_OPERATION_VEC4F_NEON

Definition at line 383 of file factor.h.

#define NE10_DstAccSrcCst_SECONDLOOP_FLOAT_NEON ( loopCode )

Value:

{ \
      float32x2_t n_rest_acc = { 0.0f , 0.0f }; /* temporary storage to be used with NEON load/store intrinsics */ \
      float32x2_t n_rest = { 0.0f , 0.0f }; /* temporary storage to be used with NEON load/store intrinsics */ \
      float32x2_t n_rest_cst = { cst, cst }; /* temporary constant value for use in the main NEON operation */ \
      n_rest_acc = vld1_lane_f32 ( (float32_t*)acc, n_rest_acc, 0); /* load into the first lane of d0 */ \
      n_rest = vld1_lane_f32 ( (float32_t*)src, n_rest, 0); /* load into the first lane of d1 */ \
      loopCode; /* the actual operation is palced here ... */ /* exceptional cases where the count is not a multiple of 4 */ \
      vst1_lane_f32( (float32_t*)dst, n_rest, 0); /* store the lane back into the memory */ \
      /* move to the next item in the stream */ \
      acc++; \
      src++; \
      dst++; \
     }

Definition at line 282 of file factor.h.

#define NE10_DstAccSrcCst_SECONDLOOP_VEC2F_NEON ( loopCode )

Value:

{ \
     float32x2_t n_rest_acc; \
     float32x2_t n_rest; \
     float32x2_t n_rest_cst = { cst->x, cst->y }; \
     n_rest_acc = vld1_f32( (float32_t*)acc  ); \
     n_rest = vld1_f32( (float32_t*)src  ); \
     loopCode; /* exceptional cases where the count isn't a multiple of 2 */ \
     vst1_f32( (float32_t*)dst, n_rest); \
    }

Definition at line 310 of file factor.h.

#define NE10_DstAccSrcCst_SECONDLOOP_VEC3F_NEON ( loopCode )

Value:

{ \
      float32x2x3_t n_rest_acc = FLOAT32_2x3( \
         0.0f, 0.0f, \
         0.0f, 0.0f, \
         0.0f, 0.0f  \
      ); \
      float32x2x3_t n_rest = FLOAT32_2x3( \
        0.0f, 0.0f, \
        0.0f, 0.0f, \
        0.0f, 0.0f  \
      ); \
      float32x2x3_t n_rest_cst = { (const float32x2_t){cst->x, 0}, \
                                  (const float32x2_t){cst->y, 0}, \
                                  (const float32x2_t){cst->z, 0} };     \
      n_rest_acc = vld3_lane_f32 ( (float32_t*)acc, n_rest_acc, 0);       \
      n_rest = vld3_lane_f32 ( (float32_t*)src, n_rest, 0);       \
      loopCode; /* exceptional cases where the count isn't a multiple of 3 */ \
      vst3_lane_f32( (float32_t*)dst, n_rest, 0); \
      acc++; \
      src++; \
      dst++; \
  }

Definition at line 346 of file factor.h.

#define NE10_DstCst_MAINLOOP_FLOAT_NEON ( loopCode )

Value:

{ \
     /* load 4 values  */ \
     loopCode; /* the actual operation is placed here... */ /* The main loop iterates through four float values each time */ \
     vst1q_f32 ( (float32_t*)dst , n_cst ); /* store theresults back */ \
     dst += 4; /* move to the next items; 4*float */ \
    }

Definition at line 393 of file factor.h.

#define NE10_DstCst_MAINLOOP_VEC2F_NEON ( loopCode )

Value:

{ \
     loopCode; /* actual operation */ /* The main loop iterates through two 2D vectors each time */ \
     vst1q_f32 ( (float32_t*)dst , n_cst ); /* store back */ \
     dst += 2; /* move to the next 2 vectors */ \
    }

Definition at line 427 of file factor.h.

#define NE10_DstCst_MAINLOOP_VEC3F_NEON ( loopCode )

Value:

{ \
     loopCode; /* The main loop iterates through three 3D vectors each time */ \
     vst1q_f32 ( (float32_t*)dst , n_cst1 ); \
     dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
     vst1q_f32 ( (float32_t*)dst , n_cst2 ); \
     dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
     vst1q_f32 ( (float32_t*)dst , n_cst3 ); \
     dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
  }

Definition at line 454 of file factor.h.

#define NE10_DstCst_MAINLOOP_VEC4F_NEON ( loopCode )

Value:

{ \
     loopCode; \
     vst1q_f32 ( (float32_t*)dst , n_cst );  /* The main loop iterates through one 4D vector each time */ \
     dst ++; \
   }

Definition at line 492 of file factor.h.

#define NE10_DstCst_OPERATION_FLOAT_NEON	(	loopCode1,
		loopCode2
	)

Value:

{ \
   ne10_result_t res = NE10_OK; \
   int dif = 0; \
   dif = count % 4; /* either 0 or one of 1,2,3; in the latter cases the second path is taken */ \
   for (; count > dif; count -= 4) { \
     loopCode1; \
    } \
   if ( 0 != dif ) { \
    unsigned int idx; \
    for ( idx = 0 ; idx < dif; idx++ ) { \
      loopCode2; \
     } \
    } \
   return res; \
  }

Definition at line 408 of file factor.h.

#define NE10_DstCst_OPERATION_VEC2F_NEON	(	loopCode1,
		loopCode2
	)

Value:

{ \
   ne10_result_t res = NE10_OK; \
   float32x4_t n_cst = { cst->x, cst->y, cst->x, cst->y }; \
   int dif = count % 2; \
   for (; count > dif; count -= 2) { \
    loopCode1; \
   } \
   if ( 0 != dif ) { \
    loopCode2; \
   } \
   return res; \
  }

Definition at line 439 of file factor.h.

#define NE10_DstCst_OPERATION_VEC3F_NEON	(	loopCode1,
		loopCode2
	)

Value:

{ \
   ne10_result_t res = NE10_OK; \
   float32x4_t n_cst1 = { cst->x, cst->y, cst->z, cst->x }; \
   float32x4_t n_cst2 = { cst->y, cst->z, cst->x, cst->y }; \
   float32x4_t n_cst3 = { cst->z, cst->x, cst->y, cst->z }; \
   int dif = count % 4;  \
   for (; count > dif; count -= 4) { \
    loopCode1; \
  } \
  if ( 0 != dif ) { \
    unsigned int idx; \
    for ( idx = 0 ; idx < dif; idx++ ) { \
      loopCode2; \
     } \
    } \
   return res; \
  }

Definition at line 472 of file factor.h.

#define NE10_DstCst_OPERATION_VEC4F_NEON ( loopCode )

Value:

{ \
   ne10_result_t res = NE10_OK; \
   float32x4_t n_cst = { cst->x, cst->y, cst->z, cst->w }; \
   for (; count != 0; count --) { \
     loopCode; \
    } \
   return res; \
  }

Definition at line 498 of file factor.h.

#define NE10_DstCst_SECONDLOOP_FLOAT_NEON ( loopCode )

Value:

{ \
      float32x2_t n_rest_cst = { cst, cst }; /* temporary constant value for use in the main NEON operation */ \
      loopCode; /* the actual operation is palced here ... */ /* exceptional cases where the count is not a multiple of 4 */ \
      vst1_lane_f32( (float32_t*)dst, n_rest_cst, 0); /* store the lane back into the memory */ \
      /* move to the next item in the stream */ \
      dst++; \
     }

Definition at line 400 of file factor.h.

#define NE10_DstCst_SECONDLOOP_VEC2F_NEON ( loopCode )

Value:

{ \
     float32x2_t n_rest_cst = { cst->x, cst->y }; \
     loopCode; /* exceptional cases where the count isn't a multiple of 2 */ \
     vst1_f32( (float32_t*)dst, n_rest_cst); \
    }

Definition at line 433 of file factor.h.

#define NE10_DstCst_SECONDLOOP_VEC3F_NEON ( loopCode )

Value:

{ \
      float32x2x3_t n_rest_cst = { (const float32x2_t){cst->x, 0}, \
      (const float32x2_t){cst->y, 0}, (const float32x2_t){cst->z, 0} }; \
      loopCode; /* exceptional cases where the count isn't a multiple of 3 */ \
      vst3_lane_f32( (float32_t*)dst, n_rest_cst, 0); \
      dst++; \
     }

Definition at line 464 of file factor.h.

#define NE10_DstSrc1Src2_MAINLOOP_FLOAT_NEON ( loopCode )

Value:

{ \
     /* load 4 values  */ \
     n_src = vld1q_f32( (float32_t*)src1 ); \
     src1 += 4; /* move to the next 4 float items; 4*float */ \
     n_src2 = vld1q_f32( (float32_t*)src2 ); \
     src2 += 4; /* move to the next 4 float items; 4*float */ \
     loopCode; /* the actual operation is placed here... */ /* The main loop iterates through four float values each time */ \
     vst1q_f32 ( (float32_t*)dst , n_dst ); /* store the results back */ \
     dst += 4; /* move to the next items; 4*float */ \
    }

Definition at line 515 of file factor.h.

#define NE10_DstSrc1Src2_OPERATION_FLOAT_NEON NE10_DstSrcCst_OPERATION_FLOAT_NEON

Definition at line 539 of file factor.h.

#define NE10_DstSrc1Src2_SECONDLOOP_FLOAT_NEON ( loopCode )

Value:

{ \
      float32x2_t n_rest = { 0.0f , 0.0f }; /* temporary storage to be used with NEON load/store intrinsics */ \
      float32x2_t n_rest2 = { 0.0f , 0.0f }; \
      n_rest = vld1_lane_f32 ( (float32_t*)src1, n_rest, 0); /* load into the first lane of d0 */ \
      n_rest2 = vld1_lane_f32 ( (float32_t*)src2, n_rest, 0); \
      loopCode; /* the actual operation is placed here ... */ /* exceptional cases where the count is not a multiple of 4 */ \
      vst1_lane_f32( (float32_t*)dst, n_rest, 0); /* store the lane back into the memory */ \
      /* move to the next item in the stream */ \
      src1++; \
      src2++; \
      dst++; \
     }

Definition at line 526 of file factor.h.

#define NE10_DstSrcCst_MAINLOOP_FLOAT_NEON ( loopCode )

Value:

{ \
     /* load 4 values  */ \
     n_src = vld1q_f32( (float32_t*)src ); \
     src += 4; /* move to the next 4 float items; 4*float */ \
     loopCode; /* the actual operation is placed here... */ /* The main loop iterates through four float values each time */ \
     vst1q_f32 ( (float32_t*)dst , n_dst ); /* store the results back */ \
     dst += 4; /* move to the next items; 4*float */ \
    }

Definition at line 118 of file factor.h.

#define NE10_DstSrcCst_MAINLOOP_VEC2F_NEON ( loopCode )

Value:

{ \
     n_src = vld1q_f32( (float32_t*)src ); /* load two vectors */ \
     src += 2; /* move to the next two vectors */ \
     loopCode; /* actual operation */ /* The main loop iterates through two 2D vectors each time */ \
     vst1q_f32 ( (float32_t*)dst , n_dst ); /* store back */ \
     dst += 2; /* move to the next 2 vectors */ \
    }

Definition at line 158 of file factor.h.

#define NE10_DstSrcCst_MAINLOOP_VEC3F_NEON ( loopCode )

Value:

{ \
     n_src1 = vld1q_f32( (float32_t*)src ); \
     src = ((void*)src)+(4*sizeof(ne10_float32_t)); \
     n_src2 = vld1q_f32( (float32_t*)src ); \
     src = ((void*)src)+(4*sizeof(ne10_float32_t)); \
     n_src3 = vld1q_f32( (float32_t*)src ); \
     src = ((void*)src)+(4*sizeof(ne10_float32_t)); \
     loopCode; /* The main loop iterates through three 3D vectors each time */ \
     vst1q_f32 ( (float32_t*)dst , n_dst1 ); \
     dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
     vst1q_f32 ( (float32_t*)dst , n_dst2 ); \
     dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
     vst1q_f32 ( (float32_t*)dst , n_dst3 ); \
     dst = ((void*)dst)+(4*sizeof(ne10_float32_t)); \
  }

Definition at line 191 of file factor.h.

#define NE10_DstSrcCst_MAINLOOP_VEC4F_NEON ( loopCode )

Value:

{ \
     n_src = vld1q_f32( (float32_t*)src ); \
     src ++; \
     loopCode; \
     vst1q_f32 ( (float32_t*)dst , n_dst );  /* The main loop iterates through one 4D vector each time */ \
     dst ++; \
   }

Definition at line 244 of file factor.h.

#define NE10_DstSrcCst_OPERATION_FLOAT_NEON	(	loopCode1,
		loopCode2
	)

Value:

{ \
   ne10_result_t res = NE10_OK; \
   float32x4_t n_src; \
   float32x4_t n_dst; \
   int dif = 0; \
   dif = count % 4; /* either 0 or one of 1,2,3; in the latter cases the second path is taken */ \
   for (; count > dif; count -= 4) { \
     loopCode1; \
    } \
   if ( 0 != dif ) { \
    unsigned int idx; \
    for ( idx = 0 ; idx < dif; idx++ ) { \
      loopCode2; \
     } \
    } \
   return res; \
  }

Definition at line 138 of file factor.h.

#define NE10_DstSrcCst_OPERATION_VEC2F_NEON	(	loopCode1,
		loopCode2
	)

Value:

{ \
   ne10_result_t res = NE10_OK; \
   float32x4_t n_cst = { cst->x, cst->y, cst->x, cst->y }; \
   float32x4_t n_src; \
   float32x4_t n_dst; \
   int dif = count % 2; \
   for (; count > dif; count -= 2) { \
    loopCode1; \
   } \
   if ( 0 != dif ) { \
    loopCode2; \
   } \
   return res; \
  }

Definition at line 174 of file factor.h.

#define NE10_DstSrcCst_OPERATION_VEC3F_NEON	(	loopCode1,
		loopCode2
	)

Value:

{ \
   ne10_result_t res = NE10_OK; \
   float32x4_t n_cst1 = { cst->x, cst->y, cst->z, cst->x }; \
   float32x4_t n_cst2 = { cst->y, cst->z, cst->x, cst->y }; \
   float32x4_t n_cst3 = { cst->z, cst->x, cst->y, cst->z }; \
    float32x4_t n_src1, n_src2, n_src3; \
   float32x4_t n_dst1, n_dst2, n_dst3; \
   int dif = count % 4;  \
   for (; count > dif; count -= 4) { \
    loopCode1; \
  } \
  if ( 0 != dif ) { \
    unsigned int idx; \
    for ( idx = 0 ; idx < dif; idx++ ) { \
      loopCode2; \
     } \
    } \
   return res; \
  }

Definition at line 219 of file factor.h.

#define NE10_DstSrcCst_OPERATION_VEC4F_NEON ( loopCode )

Value:

{ \
   ne10_result_t res = NE10_OK; \
   float32x4_t n_cst = { cst->x, cst->y, cst->z, cst->w }; \
   float32x4_t n_src; \
   float32x4_t n_dst; \
   for (; count != 0; count --) { \
     loopCode; \
    } \
   return res; \
  }

Definition at line 252 of file factor.h.

#define NE10_DstSrcCst_SECONDLOOP_FLOAT_NEON ( loopCode )

Value:

{ \
      float32x2_t n_rest = { 0.0f , 0.0f }; /* temporary storage to be used with NEON load/store intrinsics */ \
      float32x2_t n_rest_cst = { cst, cst }; /* temporary constant value for use in the main NEON operation */ \
      n_rest = vld1_lane_f32 ( (float32_t*)src, n_rest, 0); /* load into the first lane of d0 */ \
      loopCode; /* the actual operation is placed here ... */ /* exceptional cases where the count is not a multiple of 4 */ \
      vst1_lane_f32( (float32_t*)dst, n_rest, 0); /* store the lane back into the memory */ \
      /* move to the next item in the stream */ \
      src++; \
      dst++; \
     }

Definition at line 127 of file factor.h.

#define NE10_DstSrcCst_SECONDLOOP_VEC2F_NEON ( loopCode )

Value:

{ \
     float32x2_t n_rest; \
     float32x2_t n_rest_cst = { cst->x, cst->y }; \
     n_rest = vld1_f32( (float32_t*)src  ); \
     loopCode; /* exceptional cases where the count isn't a multiple of 2 */ \
     vst1_f32( (float32_t*)dst, n_rest); \
    }

Definition at line 166 of file factor.h.

#define NE10_DstSrcCst_SECONDLOOP_VEC3F_NEON ( loopCode )

Value:

{ \
      float32x2x3_t n_rest = FLOAT32_2x3( \
        0.0f, 0.0f, 0.0f , 0.0f, 0.0f , 0.0f); \
      float32x2x3_t n_rest_cst = { (const float32x2_t){cst->x, 0}, \
             (const float32x2_t){cst->y, 0}, (const float32x2_t){cst->z, 0} }; \
      n_rest = vld3_lane_f32 ( (float32_t*)src, n_rest, 0); \
      loopCode; /* exceptional cases where the count isn't a multiple of 3 */ \
      vst3_lane_f32( (float32_t*)dst, n_rest, 0); \
      src++; \
      dst++; \
     }

Definition at line 207 of file factor.h.

Macros

Macro Definition Documentation