Project Ne10
An open, optimized software library for the ARM architecture.
Functions | Variables
Vector Multiply-Accumulate

Functions

ne10_result_t ne10_mlac_float_c (ne10_float32_t *dst, ne10_float32_t *acc, ne10_float32_t *src, const ne10_float32_t cst, ne10_uint32_t count)
 Specific implementation of ne10_mlac_float using plain C code. More...
 
ne10_result_t ne10_mlac_float_neon (ne10_float32_t *dst, ne10_float32_t *acc, ne10_float32_t *src, const ne10_float32_t cst, ne10_uint32_t count)
 Specific implementation of ne10_mlac_float using NEON intrinsics. More...
 
ne10_result_t ne10_mlac_float_asm (ne10_float32_t *dst, ne10_float32_t *acc, ne10_float32_t *src, const ne10_float32_t cst, ne10_uint32_t count)
 Specific implementation of ne10_mlac_float using NEON assembly. More...
 
ne10_result_t ne10_mlac_vec2f_c (ne10_vec2f_t *dst, ne10_vec2f_t *acc, ne10_vec2f_t *src, const ne10_vec2f_t *cst, ne10_uint32_t count)
 Specific implementation of ne10_mlac_vec2f using plain C code. More...
 
ne10_result_t ne10_mlac_vec2f_neon (ne10_vec2f_t *dst, ne10_vec2f_t *acc, ne10_vec2f_t *src, const ne10_vec2f_t *cst, ne10_uint32_t count)
 Specific implementation of ne10_mlac_vec2f using NEON intrinsics. More...
 
ne10_result_t ne10_mlac_vec2f_asm (ne10_vec2f_t *dst, ne10_vec2f_t *acc, ne10_vec2f_t *src, const ne10_vec2f_t *cst, ne10_uint32_t count)
 Specific implementation of ne10_mlac_vec2f using NEON assembly. More...
 
ne10_result_t ne10_mlac_vec3f_c (ne10_vec3f_t *dst, ne10_vec3f_t *acc, ne10_vec3f_t *src, const ne10_vec3f_t *cst, ne10_uint32_t count)
 Specific implementation of ne10_mlac_vec3f using plain C code. More...
 
ne10_result_t ne10_mlac_vec3f_neon (ne10_vec3f_t *dst, ne10_vec3f_t *acc, ne10_vec3f_t *src, const ne10_vec3f_t *cst, ne10_uint32_t count)
 Specific implementation of ne10_mlac_vec3f using NEON intrinsics. More...
 
ne10_result_t ne10_mlac_vec3f_asm (ne10_vec3f_t *dst, ne10_vec3f_t *acc, ne10_vec3f_t *src, const ne10_vec3f_t *cst, ne10_uint32_t count)
 Specific implementation of ne10_mlac_vec3f using NEON assembly. More...
 
ne10_result_t ne10_mlac_vec4f_c (ne10_vec4f_t *dst, ne10_vec4f_t *acc, ne10_vec4f_t *src, const ne10_vec4f_t *cst, ne10_uint32_t count)
 Specific implementation of ne10_mlac_vec4f using plain C code. More...
 
ne10_result_t ne10_mlac_vec4f_neon (ne10_vec4f_t *dst, ne10_vec4f_t *acc, ne10_vec4f_t *src, const ne10_vec4f_t *cst, ne10_uint32_t count)
 Specific implementation of ne10_mlac_vec4f using NEON intrinsics. More...
 
ne10_result_t ne10_mlac_vec4f_asm (ne10_vec4f_t *dst, ne10_vec4f_t *acc, ne10_vec4f_t *src, const ne10_vec4f_t *cst, ne10_uint32_t count)
 Specific implementation of ne10_mlac_vec4f using NEON assembly. More...
 
ne10_result_t ne10_mla_float_c (ne10_float32_t *dst, ne10_float32_t *acc, ne10_float32_t *src1, ne10_float32_t *src2, ne10_uint32_t count)
 Specific implementation of ne10_mla_float using plain C code. More...
 
ne10_result_t ne10_mla_float_neon (ne10_float32_t *dst, ne10_float32_t *acc, ne10_float32_t *src1, ne10_float32_t *src2, ne10_uint32_t count) asm("ne10_mla_float_neon")
 Specific implementation of ne10_mla_float using NEON intrinsics. More...
 
ne10_result_t ne10_mla_float_asm (ne10_float32_t *dst, ne10_float32_t *acc, ne10_float32_t *src1, ne10_float32_t *src2, ne10_uint32_t count)
 Specific implementation of ne10_mla_float using NEON assembly. More...
 
ne10_result_t ne10_vmla_vec2f_c (ne10_vec2f_t *dst, ne10_vec2f_t *acc, ne10_vec2f_t *src1, ne10_vec2f_t *src2, ne10_uint32_t count)
 Specific implementation of ne10_vmla_vec2f using plain C code. More...
 
ne10_result_t ne10_vmla_vec2f_neon (ne10_vec2f_t *dst, ne10_vec2f_t *acc, ne10_vec2f_t *src1, ne10_vec2f_t *src2, ne10_uint32_t count) asm("ne10_vmla_vec2f_neon")
 Specific implementation of ne10_vmla_vec2f using NEON intrinsics. More...
 
ne10_result_t ne10_vmla_vec2f_asm (ne10_vec2f_t *dst, ne10_vec2f_t *acc, ne10_vec2f_t *src1, ne10_vec2f_t *src2, ne10_uint32_t count)
 Specific implementation of ne10_vmla_vec2f using NEON assembly. More...
 
ne10_result_t ne10_vmla_vec3f_c (ne10_vec3f_t *dst, ne10_vec3f_t *acc, ne10_vec3f_t *src1, ne10_vec3f_t *src2, ne10_uint32_t count)
 Specific implementation of ne10_vmla_vec3f using plain C code. More...
 
ne10_result_t ne10_vmla_vec3f_neon (ne10_vec3f_t *dst, ne10_vec3f_t *acc, ne10_vec3f_t *src1, ne10_vec3f_t *src2, ne10_uint32_t count) asm("ne10_vmla_vec3f_neon")
 Specific implementation of ne10_vmla_vec3f using NEON intrinsics. More...
 
ne10_result_t ne10_vmla_vec3f_asm (ne10_vec3f_t *dst, ne10_vec3f_t *acc, ne10_vec3f_t *src1, ne10_vec3f_t *src2, ne10_uint32_t count)
 Specific implementation of ne10_vmla_vec3f using NEON assembly. More...
 
ne10_result_t ne10_vmla_vec4f_c (ne10_vec4f_t *dst, ne10_vec4f_t *acc, ne10_vec4f_t *src1, ne10_vec4f_t *src2, ne10_uint32_t count)
 Specific implementation of ne10_vmla_vec4f using plain C code. More...
 
ne10_result_t ne10_vmla_vec4f_neon (ne10_vec4f_t *dst, ne10_vec4f_t *acc, ne10_vec4f_t *src1, ne10_vec4f_t *src2, ne10_uint32_t count) asm("ne10_vmla_vec4f_neon")
 Specific implementation of ne10_vmla_vec4f using NEON intrinsics. More...
 
ne10_result_t ne10_vmla_vec4f_asm (ne10_vec4f_t *dst, ne10_vec4f_t *acc, ne10_vec4f_t *src1, ne10_vec4f_t *src2, ne10_uint32_t count)
 Specific implementation of ne10_vmla_vec4f using NEON assembly. More...
 

Variables

ne10_result_t(* ne10_mlac_float )(ne10_float32_t *dst, ne10_float32_t *acc, ne10_float32_t *src, const ne10_float32_t cst, ne10_uint32_t count)
 Multiplies all scalar elements of an input array by constant value, adding this product to a value of the same index in another input array, and storing the results in an output array. More...
 
ne10_result_t(* ne10_mlac_vec2f )(ne10_vec2f_t *dst, ne10_vec2f_t *acc, ne10_vec2f_t *src, const ne10_vec2f_t *cst, ne10_uint32_t count)
 Element-wise multiplies all the 2D vectors of an input array by a constant vector, adding this vector to another of the same index in another input array, and storing the results in an output array. More...
 
ne10_result_t(* ne10_mlac_vec3f )(ne10_vec3f_t *dst, ne10_vec3f_t *acc, ne10_vec3f_t *src, const ne10_vec3f_t *cst, ne10_uint32_t count)
 Element-wise multiplies all the 3D vectors of an input array by a constant vector, adding this vector to another of the same index in another input array, and storing the results in an output array. More...
 
ne10_result_t(* ne10_mlac_vec4f )(ne10_vec4f_t *dst, ne10_vec4f_t *acc, ne10_vec4f_t *src, const ne10_vec4f_t *cst, ne10_uint32_t count)
 Element-wise multiplies all the 4D vectors of an input array by a constant vector, adding this vector to another of the same index in another input array, and storing the results in an output array. More...
 
ne10_result_t(* ne10_mla_float )(ne10_float32_t *dst, ne10_float32_t *acc, ne10_float32_t *src1, ne10_float32_t *src2, ne10_uint32_t count)
 Multiplies all scalar elements of an input array by those of the same index in another, adding this product to a value of the same index in yet another input array, and storing the results in an output array. More...
 
ne10_result_t(* ne10_vmla_vec2f )(ne10_vec2f_t *dst, ne10_vec2f_t *acc, ne10_vec2f_t *src1, ne10_vec2f_t *src2, ne10_uint32_t count)
 Element-wise multiplies all the 2D vector elements of an input array by those of the same index in another, adding this vector to another of the same index in yet another input array, and storing the results in an output array. More...
 
ne10_result_t(* ne10_vmla_vec3f )(ne10_vec3f_t *dst, ne10_vec3f_t *acc, ne10_vec3f_t *src1, ne10_vec3f_t *src2, ne10_uint32_t count)
 Element-wise multiplies all the 3D vector elements of an input array by those of the same index in another, adding this vector to another of the same index in yet another input array, and storing the results in an output array. More...
 
ne10_result_t(* ne10_vmla_vec4f )(ne10_vec4f_t *dst, ne10_vec4f_t *acc, ne10_vec4f_t *src1, ne10_vec4f_t *src2, ne10_uint32_t count)
 Element-wise multiplies all the 4D vector elements of an input array by those of the same index in another, adding this vector to another of the same index in yet another input array, and storing the results in an output array. More...
 

Detailed Description

These functions implement vector multiply-accumulate operations for single precision floating point values.

Function Documentation

ne10_result_t ne10_mla_float_asm ( ne10_float32_t dst,
ne10_float32_t acc,
ne10_float32_t src1,
ne10_float32_t src2,
ne10_uint32_t  count 
)

Specific implementation of ne10_mla_float using NEON assembly.

ne10_result_t ne10_mla_float_c ( ne10_float32_t dst,
ne10_float32_t acc,
ne10_float32_t src1,
ne10_float32_t src2,
ne10_uint32_t  count 
)

Specific implementation of ne10_mla_float using plain C code.

Definition at line 37 of file NE10_mla.c.

ne10_result_t ne10_mla_float_neon ( ne10_float32_t dst,
ne10_float32_t acc,
ne10_float32_t src1,
ne10_float32_t src2,
ne10_uint32_t  count 
)

Specific implementation of ne10_mla_float using NEON intrinsics.

ne10_result_t ne10_mlac_float_asm ( ne10_float32_t dst,
ne10_float32_t acc,
ne10_float32_t src,
const ne10_float32_t  cst,
ne10_uint32_t  count 
)

Specific implementation of ne10_mlac_float using NEON assembly.

ne10_result_t ne10_mlac_float_c ( ne10_float32_t dst,
ne10_float32_t acc,
ne10_float32_t src,
const ne10_float32_t  cst,
ne10_uint32_t  count 
)

Specific implementation of ne10_mlac_float using plain C code.

Definition at line 37 of file NE10_mlac.c.

ne10_result_t ne10_mlac_float_neon ( ne10_float32_t dst,
ne10_float32_t acc,
ne10_float32_t src,
const ne10_float32_t  cst,
ne10_uint32_t  count 
)

Specific implementation of ne10_mlac_float using NEON intrinsics.

Definition at line 39 of file NE10_mlac.neon.c.

ne10_result_t ne10_mlac_vec2f_asm ( ne10_vec2f_t dst,
ne10_vec2f_t acc,
ne10_vec2f_t src,
const ne10_vec2f_t cst,
ne10_uint32_t  count 
)

Specific implementation of ne10_mlac_vec2f using NEON assembly.

ne10_result_t ne10_mlac_vec2f_c ( ne10_vec2f_t dst,
ne10_vec2f_t acc,
ne10_vec2f_t src,
const ne10_vec2f_t cst,
ne10_uint32_t  count 
)

Specific implementation of ne10_mlac_vec2f using plain C code.

Definition at line 47 of file NE10_mlac.c.

ne10_result_t ne10_mlac_vec2f_neon ( ne10_vec2f_t dst,
ne10_vec2f_t acc,
ne10_vec2f_t src,
const ne10_vec2f_t cst,
ne10_uint32_t  count 
)

Specific implementation of ne10_mlac_vec2f using NEON intrinsics.

Definition at line 49 of file NE10_mlac.neon.c.

ne10_result_t ne10_mlac_vec3f_asm ( ne10_vec3f_t dst,
ne10_vec3f_t acc,
ne10_vec3f_t src,
const ne10_vec3f_t cst,
ne10_uint32_t  count 
)

Specific implementation of ne10_mlac_vec3f using NEON assembly.

ne10_result_t ne10_mlac_vec3f_c ( ne10_vec3f_t dst,
ne10_vec3f_t acc,
ne10_vec3f_t src,
const ne10_vec3f_t cst,
ne10_uint32_t  count 
)

Specific implementation of ne10_mlac_vec3f using plain C code.

Definition at line 58 of file NE10_mlac.c.

ne10_result_t ne10_mlac_vec3f_neon ( ne10_vec3f_t dst,
ne10_vec3f_t acc,
ne10_vec3f_t src,
const ne10_vec3f_t cst,
ne10_uint32_t  count 
)

Specific implementation of ne10_mlac_vec3f using NEON intrinsics.

Definition at line 59 of file NE10_mlac.neon.c.

ne10_result_t ne10_mlac_vec4f_asm ( ne10_vec4f_t dst,
ne10_vec4f_t acc,
ne10_vec4f_t src,
const ne10_vec4f_t cst,
ne10_uint32_t  count 
)

Specific implementation of ne10_mlac_vec4f using NEON assembly.

ne10_result_t ne10_mlac_vec4f_c ( ne10_vec4f_t dst,
ne10_vec4f_t acc,
ne10_vec4f_t src,
const ne10_vec4f_t cst,
ne10_uint32_t  count 
)

Specific implementation of ne10_mlac_vec4f using plain C code.

Definition at line 70 of file NE10_mlac.c.

ne10_result_t ne10_mlac_vec4f_neon ( ne10_vec4f_t dst,
ne10_vec4f_t acc,
ne10_vec4f_t src,
const ne10_vec4f_t cst,
ne10_uint32_t  count 
)

Specific implementation of ne10_mlac_vec4f using NEON intrinsics.

Definition at line 73 of file NE10_mlac.neon.c.

ne10_result_t ne10_vmla_vec2f_asm ( ne10_vec2f_t dst,
ne10_vec2f_t acc,
ne10_vec2f_t src1,
ne10_vec2f_t src2,
ne10_uint32_t  count 
)

Specific implementation of ne10_vmla_vec2f using NEON assembly.

ne10_result_t ne10_vmla_vec2f_c ( ne10_vec2f_t dst,
ne10_vec2f_t acc,
ne10_vec2f_t src1,
ne10_vec2f_t src2,
ne10_uint32_t  count 
)

Specific implementation of ne10_vmla_vec2f using plain C code.

Definition at line 47 of file NE10_mla.c.

ne10_result_t ne10_vmla_vec2f_neon ( ne10_vec2f_t dst,
ne10_vec2f_t acc,
ne10_vec2f_t src1,
ne10_vec2f_t src2,
ne10_uint32_t  count 
)

Specific implementation of ne10_vmla_vec2f using NEON intrinsics.

ne10_result_t ne10_vmla_vec3f_asm ( ne10_vec3f_t dst,
ne10_vec3f_t acc,
ne10_vec3f_t src1,
ne10_vec3f_t src2,
ne10_uint32_t  count 
)

Specific implementation of ne10_vmla_vec3f using NEON assembly.

ne10_result_t ne10_vmla_vec3f_c ( ne10_vec3f_t dst,
ne10_vec3f_t acc,
ne10_vec3f_t src1,
ne10_vec3f_t src2,
ne10_uint32_t  count 
)

Specific implementation of ne10_vmla_vec3f using plain C code.

Definition at line 58 of file NE10_mla.c.

ne10_result_t ne10_vmla_vec3f_neon ( ne10_vec3f_t dst,
ne10_vec3f_t acc,
ne10_vec3f_t src1,
ne10_vec3f_t src2,
ne10_uint32_t  count 
)

Specific implementation of ne10_vmla_vec3f using NEON intrinsics.

ne10_result_t ne10_vmla_vec4f_asm ( ne10_vec4f_t dst,
ne10_vec4f_t acc,
ne10_vec4f_t src1,
ne10_vec4f_t src2,
ne10_uint32_t  count 
)

Specific implementation of ne10_vmla_vec4f using NEON assembly.

ne10_result_t ne10_vmla_vec4f_c ( ne10_vec4f_t dst,
ne10_vec4f_t acc,
ne10_vec4f_t src1,
ne10_vec4f_t src2,
ne10_uint32_t  count 
)

Specific implementation of ne10_vmla_vec4f using plain C code.

Definition at line 70 of file NE10_mla.c.

ne10_result_t ne10_vmla_vec4f_neon ( ne10_vec4f_t dst,
ne10_vec4f_t acc,
ne10_vec4f_t src1,
ne10_vec4f_t src2,
ne10_uint32_t  count 
)

Specific implementation of ne10_vmla_vec4f using NEON intrinsics.

Variable Documentation

ne10_result_t(* ne10_mla_float) (ne10_float32_t *dst, ne10_float32_t *acc, ne10_float32_t *src1, ne10_float32_t *src2, ne10_uint32_t count)

Multiplies all scalar elements of an input array by those of the same index in another, adding this product to a value of the same index in yet another input array, and storing the results in an output array.

Points to ne10_mlac_float_c, ne10_mlac_float_neon, or ne10_mlac_float_asm. This operation can be performed in-place.

Parameters
[out]dstPointer to the destination array
[in]accPointer to the array of elements to be added post-multiplication
[in]src1Pointer to the source array
[in]src2Pointer to the array of elements to multiply by
[in]countThe number of multiply-accumulates to be performed (i.e. the length of the source arrays)

Definition at line 252 of file NE10_init_math.c.

ne10_result_t(* ne10_mlac_float) (ne10_float32_t *dst, ne10_float32_t *acc, ne10_float32_t *src, const ne10_float32_t cst, ne10_uint32_t count)

Multiplies all scalar elements of an input array by constant value, adding this product to a value of the same index in another input array, and storing the results in an output array.

Points to ne10_mlac_float_c, ne10_mlac_float_neon, or ne10_mlac_float_asm.

This operation can be performed in-place.

Parameters
[out]dstPointer to the destination array
[in]accPointer to the array of elements to be added post-multiplication
[in]srcPointer to the source array
[in]cstThe constant to multiply by
[in]countThe number of multiply-accumulates to be performed (i.e. the length of the source array)

Definition at line 244 of file NE10_init_math.c.

ne10_result_t(* ne10_mlac_vec2f) (ne10_vec2f_t *dst, ne10_vec2f_t *acc, ne10_vec2f_t *src, const ne10_vec2f_t *cst, ne10_uint32_t count)

Element-wise multiplies all the 2D vectors of an input array by a constant vector, adding this vector to another of the same index in another input array, and storing the results in an output array.

Points to ne10_mlac_vec2f_c, ne10_mlac_vec2f_neon, or ne10_mlac_vec2f_asm. This operation can be performed in-place.

Parameters
[out]dstPointer to the destination array
[in]accPointer to the array of elements to be added post-multiplication
[in]srcPointer to the source array
[in]cstThe constant to multiply by
[in]countThe number of multiply-accumulates to be performed (i.e. the length of the source array)

Definition at line 245 of file NE10_init_math.c.

ne10_result_t(* ne10_mlac_vec3f) (ne10_vec3f_t *dst, ne10_vec3f_t *acc, ne10_vec3f_t *src, const ne10_vec3f_t *cst, ne10_uint32_t count)

Element-wise multiplies all the 3D vectors of an input array by a constant vector, adding this vector to another of the same index in another input array, and storing the results in an output array.

Points to ne10_mlac_vec3f_c, ne10_mlac_vec3f_neon, or ne10_mlac_vec3f_asm. This operation can be performed in-place.

Parameters
[out]dstPointer to the destination array
[in]accPointer to the array of elements to be added post-multiplication
[in]srcPointer to the source array
[in]cstThe constant to multiply by
[in]countThe number of multiply-accumulates to be performed (i.e. the length of the source array)

Definition at line 246 of file NE10_init_math.c.

ne10_result_t(* ne10_mlac_vec4f) (ne10_vec4f_t *dst, ne10_vec4f_t *acc, ne10_vec4f_t *src, const ne10_vec4f_t *cst, ne10_uint32_t count)

Element-wise multiplies all the 4D vectors of an input array by a constant vector, adding this vector to another of the same index in another input array, and storing the results in an output array.

Points to ne10_mlac_vec4f_c, ne10_mlac_vec4f_neon, or ne10_mlac_vec4f_asm. This operation can be performed in-place.

Parameters
[out]dstPointer to the destination array
[in]accPointer to the array of elements to be added post-multiplication
[in]srcPointer to the source array
[in]cstThe constant to multiply by
[in]countThe number of multiply-accumulates to be performed (i.e. the length of the source array)

Definition at line 247 of file NE10_init_math.c.

ne10_result_t(* ne10_vmla_vec2f) (ne10_vec2f_t *dst, ne10_vec2f_t *acc, ne10_vec2f_t *src1, ne10_vec2f_t *src2, ne10_uint32_t count)

Element-wise multiplies all the 2D vector elements of an input array by those of the same index in another, adding this vector to another of the same index in yet another input array, and storing the results in an output array.

Points to ne10_vmla_vec2f_c, ne10_vmla_vec2f_neon, or ne10_vmla_vec2f_asm. This operation can be performed in-place.

Parameters
[out]dstPointer to the destination array
[in]accPointer to the array of elements to be added post-multiplication
[in]src1Pointer to the source array
[in]src2Pointer to the array of elements to multiply by
[in]countThe number of multiply-accumulates to be performed (i.e. the length of the source arrays)

Definition at line 270 of file NE10_init_math.c.

ne10_result_t(* ne10_vmla_vec3f) (ne10_vec3f_t *dst, ne10_vec3f_t *acc, ne10_vec3f_t *src1, ne10_vec3f_t *src2, ne10_uint32_t count)

Element-wise multiplies all the 3D vector elements of an input array by those of the same index in another, adding this vector to another of the same index in yet another input array, and storing the results in an output array.

Points to ne10_vmla_vec3f_c, ne10_vmla_vec3f_neon, or ne10_vmla_vec3f_asm. This operation can be performed in-place.

Parameters
[out]dstPointer to the destination array
[in]accPointer to the array of elements to be added post-multiplication
[in]src1Pointer to the source array
[in]src2Pointer to the array of elements to multiply by
[in]countThe number of multiply-accumulates to be performed (i.e. the length of the source arrays)

Definition at line 271 of file NE10_init_math.c.

ne10_result_t(* ne10_vmla_vec4f) (ne10_vec4f_t *dst, ne10_vec4f_t *acc, ne10_vec4f_t *src1, ne10_vec4f_t *src2, ne10_uint32_t count)

Element-wise multiplies all the 4D vector elements of an input array by those of the same index in another, adding this vector to another of the same index in yet another input array, and storing the results in an output array.

Points to ne10_vmla_vec4f_c, ne10_vmla_vec4f_neon, or ne10_vmla_vec4f_asm. This operation can be performed in-place.

Parameters
[out]dstPointer to the destination array
[in]accPointer to the array of elements to be added post-multiplication
[in]src1Pointer to the source array
[in]src2Pointer to the array of elements to multiply by
[in]countThe number of multiply-accumulates to be performed (i.e. the length of the source arrays)

Definition at line 272 of file NE10_init_math.c.