diff --git a/src/Makefile b/src/Makefile index b06b977..2aed327 100644 --- a/src/Makefile +++ b/src/Makefile @@ -10,7 +10,7 @@ OBJS = main.o kernel.o minidexed.o config.o userinterface.o uimenu.o \ mididevice.o midikeyboard.o serialmididevice.o pckeyboard.o \ sysexfileloader.o performanceconfig.o perftimer.o \ effect_platervbstereo.o uibuttons.o midipin.o \ - arm_float_to_q23.o \ + arm_float_to_q23.o arm_scale_zip_f32.o \ net/ftpdaemon.o net/ftpworker.o net/applemidi.o net/udpmidi.o net/mdnspublisher.o udpmididevice.o OPTIMIZE = -O3 diff --git a/src/arm_scale_zip_f32.c b/src/arm_scale_zip_f32.c new file mode 100644 index 0000000..28ff1c7 --- /dev/null +++ b/src/arm_scale_zip_f32.c @@ -0,0 +1,85 @@ +#include "arm_scale_zip_f32.h" + +/** + Scale two vectors and zip after. For floating-point data, the algorithm used is: + +
+ pDst[n] = pSrc1[n] * scale, pDst[n+1] = pSrc2[n] * scale 0 <= n < blockSize. ++ + */ + +/** +* @brief Scale two floating-point vector with a scalar and zip after. +* @param[in] pSrc1 points to the input vector 1 +* @param[in] pSrc2 points to the input vector 2 +* @param[in] scale scale scalar +* @param[out] pDst points to the output vector +* @param[in] blockSize number of samples in the vector +*/ + +#if defined(ARM_MATH_NEON_EXPERIMENTAL) +void arm_scale_zip_f32( + const float32_t * pSrc1, + const float32_t * pSrc2, + float32_t scale, + float32_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; /* Loop counter */ + + f32x2x2_t res; + + /* Compute 2 outputs at a time */ + blkCnt = blockSize >> 1U; + + while (blkCnt > 0U) + { + res.val[0] = vmul_n_f32(vld1_f32(pSrc1), scale); + res.val[1] = vmul_n_f32(vld1_f32(pSrc2), scale); + vst2_f32(pDst, res); + + /* Increment pointers */ + pSrc1 += 2; + pSrc2 += 2; + pDst += 4; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize & 1; + + while (blkCnt > 0U) + { + *pDst++ = *pSrc1++ * scale; + *pDst++ = *pSrc2++ * scale; + + /* Decrement the loop counter */ + blkCnt--; + } +} +#else +void arm_scale_zip_f32( + const float32_t * pSrc1, + const float32_t * pSrc2, + float32_t scale, + float32_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; /* Loop counter */ + + blkCnt = blockSize; + + while (blkCnt > 0U) + { + *pDst++ = *pSrc1++ * scale; + *pDst++ = *pSrc2++ * scale; + + /* Decrement the loop counter */ + blkCnt--; + } +} +#endif diff --git a/src/arm_scale_zip_f32.h b/src/arm_scale_zip_f32.h new file mode 100644 index 0000000..6629b22 --- /dev/null +++ b/src/arm_scale_zip_f32.h @@ -0,0 +1,22 @@ +#pragma once + +#include "arm_math_types.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/** +* @brief Scale two floating-point vector with a scalar and zip after. +* @param[in] pSrc1 points to the input vector 1 +* @param[in] pSrc2 points to the input vector 2 +* @param[in] scale scale scalar +* @param[out] pDst points to the output vector +* @param[in] blockSize number of samples in the vector +*/ +void arm_scale_zip_f32(const float32_t * pSrc1, const float32_t * pSrc2, float32_t scale, float32_t * pDst, uint32_t blockSize); + +#ifdef __cplusplus +} +#endif diff --git a/src/minidexed.cpp b/src/minidexed.cpp index acd79a2..257ef55 100644 --- a/src/minidexed.cpp +++ b/src/minidexed.cpp @@ -30,6 +30,7 @@ #include