mirror of https://github.com/probonopd/MiniDexed
Use SoundFormatSigned24_32 with NEON (#852)
* use SoundFormatSigned24_32 format instead of SoundFormatSigned16 More detailed, and not much slower. * fix ARM_MATH_NEON defines --------- Co-authored-by: probonopd <probonopd@users.noreply.github.com>syslog
parent
9d8ed87aab
commit
acf9e11d5f
@ -0,0 +1,88 @@ |
|||||||
|
#include "arm_float_to_q23.h" |
||||||
|
|
||||||
|
#if defined(ARM_MATH_NEON_EXPERIMENTAL) |
||||||
|
void arm_float_to_q23(const float32_t * pSrc, q23_t * pDst, uint32_t blockSize) |
||||||
|
{ |
||||||
|
const float32_t *pIn = pSrc; /* Src pointer */ |
||||||
|
uint32_t blkCnt; /* loop counter */ |
||||||
|
|
||||||
|
float32x4_t inV; |
||||||
|
|
||||||
|
int32x4_t cvt; |
||||||
|
|
||||||
|
blkCnt = blockSize >> 2U; |
||||||
|
|
||||||
|
/* Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */ |
||||||
|
while (blkCnt > 0U) |
||||||
|
{ |
||||||
|
/* C = A * 8388608 */ |
||||||
|
/* Convert from float to q23 and then store the results in the destination buffer */ |
||||||
|
inV = vld1q_f32(pIn); |
||||||
|
|
||||||
|
cvt = vcvtq_n_s32_f32(inV, 23); |
||||||
|
|
||||||
|
/* saturate */ |
||||||
|
cvt = vminq_s32(cvt, vdupq_n_s32(0x007fffff)); |
||||||
|
cvt = vmaxq_s32(cvt, vdupq_n_s32(0xff800000)); |
||||||
|
|
||||||
|
vst1q_s32(pDst, cvt); |
||||||
|
pDst += 4; |
||||||
|
pIn += 4; |
||||||
|
|
||||||
|
/* Decrement the loop counter */ |
||||||
|
blkCnt--; |
||||||
|
} |
||||||
|
|
||||||
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
|
** No loop unrolling is used. */ |
||||||
|
blkCnt = blockSize & 3; |
||||||
|
|
||||||
|
while (blkCnt > 0U) |
||||||
|
{ |
||||||
|
/* C = A * 8388608 */ |
||||||
|
/* Convert from float to q23 and then store the results in the destination buffer */ |
||||||
|
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24); |
||||||
|
|
||||||
|
/* Decrement the loop counter */ |
||||||
|
blkCnt--; |
||||||
|
} |
||||||
|
} |
||||||
|
#else |
||||||
|
void arm_float_to_q23(const float32_t * pSrc, q23_t * pDst, uint32_t blockSize) |
||||||
|
{ |
||||||
|
uint32_t blkCnt; /* Loop counter */ |
||||||
|
const float32_t *pIn = pSrc; /* Source pointer */ |
||||||
|
|
||||||
|
/* Loop unrolling: Compute 4 outputs at a time */ |
||||||
|
blkCnt = blockSize >> 2U; |
||||||
|
|
||||||
|
while (blkCnt > 0U) |
||||||
|
{ |
||||||
|
/* C = A * 8388608 */ |
||||||
|
/* convert from float to Q23 and store result in destination buffer */ |
||||||
|
|
||||||
|
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24); |
||||||
|
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24); |
||||||
|
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24); |
||||||
|
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24); |
||||||
|
|
||||||
|
/* Decrement loop counter */ |
||||||
|
blkCnt--; |
||||||
|
} |
||||||
|
|
||||||
|
/* Loop unrolling: Compute remaining outputs */ |
||||||
|
blkCnt = blockSize % 0x4U; |
||||||
|
|
||||||
|
while (blkCnt > 0U) |
||||||
|
{ |
||||||
|
/* C = A * 8388608 */ |
||||||
|
/* Convert from float to q23 and then store the results in the destination buffer */ |
||||||
|
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24); |
||||||
|
|
||||||
|
/* Decrement loop counter */ |
||||||
|
blkCnt--; |
||||||
|
} |
||||||
|
|
||||||
|
} |
||||||
|
#endif /* #if defined(ARM_MATH_NEON_EXPERIMENTAL) */ |
@ -0,0 +1,22 @@ |
|||||||
|
#pragma once |
||||||
|
|
||||||
|
#include "arm_math_types.h" |
||||||
|
|
||||||
|
typedef int32_t q23_t; |
||||||
|
|
||||||
|
#ifdef __cplusplus |
||||||
|
extern "C" |
||||||
|
{ |
||||||
|
#endif |
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief Converts the elements of the floating-point vector to Q23 vector. |
||||||
|
* @param[in] pSrc points to the floating-point input vector |
||||||
|
* @param[out] pDst points to the Q23 output vector |
||||||
|
* @param[in] blockSize length of the input vector |
||||||
|
*/ |
||||||
|
void arm_float_to_q23(const float32_t * pSrc, q23_t * pDst, uint32_t blockSize); |
||||||
|
|
||||||
|
#ifdef __cplusplus |
||||||
|
} |
||||||
|
#endif |
Loading…
Reference in new issue