mirror of https://github.com/probonopd/MiniDexed
Use SoundFormatSigned24_32 with NEON (#852)
* use SoundFormatSigned24_32 format instead of SoundFormatSigned16 More detailed, and not much slower. * fix ARM_MATH_NEON defines --------- Co-authored-by: probonopd <probonopd@users.noreply.github.com>syslog
parent
9d8ed87aab
commit
acf9e11d5f
@ -0,0 +1,88 @@ |
||||
#include "arm_float_to_q23.h" |
||||
|
||||
#if defined(ARM_MATH_NEON_EXPERIMENTAL) |
||||
void arm_float_to_q23(const float32_t * pSrc, q23_t * pDst, uint32_t blockSize) |
||||
{ |
||||
const float32_t *pIn = pSrc; /* Src pointer */ |
||||
uint32_t blkCnt; /* loop counter */ |
||||
|
||||
float32x4_t inV; |
||||
|
||||
int32x4_t cvt; |
||||
|
||||
blkCnt = blockSize >> 2U; |
||||
|
||||
/* Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */ |
||||
while (blkCnt > 0U) |
||||
{ |
||||
/* C = A * 8388608 */ |
||||
/* Convert from float to q23 and then store the results in the destination buffer */ |
||||
inV = vld1q_f32(pIn); |
||||
|
||||
cvt = vcvtq_n_s32_f32(inV, 23); |
||||
|
||||
/* saturate */ |
||||
cvt = vminq_s32(cvt, vdupq_n_s32(0x007fffff)); |
||||
cvt = vmaxq_s32(cvt, vdupq_n_s32(0xff800000)); |
||||
|
||||
vst1q_s32(pDst, cvt); |
||||
pDst += 4; |
||||
pIn += 4; |
||||
|
||||
/* Decrement the loop counter */ |
||||
blkCnt--; |
||||
} |
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */ |
||||
blkCnt = blockSize & 3; |
||||
|
||||
while (blkCnt > 0U) |
||||
{ |
||||
/* C = A * 8388608 */ |
||||
/* Convert from float to q23 and then store the results in the destination buffer */ |
||||
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24); |
||||
|
||||
/* Decrement the loop counter */ |
||||
blkCnt--; |
||||
} |
||||
} |
||||
#else |
||||
void arm_float_to_q23(const float32_t * pSrc, q23_t * pDst, uint32_t blockSize) |
||||
{ |
||||
uint32_t blkCnt; /* Loop counter */ |
||||
const float32_t *pIn = pSrc; /* Source pointer */ |
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */ |
||||
blkCnt = blockSize >> 2U; |
||||
|
||||
while (blkCnt > 0U) |
||||
{ |
||||
/* C = A * 8388608 */ |
||||
/* convert from float to Q23 and store result in destination buffer */ |
||||
|
||||
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24); |
||||
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24); |
||||
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24); |
||||
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24); |
||||
|
||||
/* Decrement loop counter */ |
||||
blkCnt--; |
||||
} |
||||
|
||||
/* Loop unrolling: Compute remaining outputs */ |
||||
blkCnt = blockSize % 0x4U; |
||||
|
||||
while (blkCnt > 0U) |
||||
{ |
||||
/* C = A * 8388608 */ |
||||
/* Convert from float to q23 and then store the results in the destination buffer */ |
||||
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24); |
||||
|
||||
/* Decrement loop counter */ |
||||
blkCnt--; |
||||
} |
||||
|
||||
} |
||||
#endif /* #if defined(ARM_MATH_NEON_EXPERIMENTAL) */ |
@ -0,0 +1,22 @@ |
||||
#pragma once |
||||
|
||||
#include "arm_math_types.h" |
||||
|
||||
typedef int32_t q23_t; |
||||
|
||||
#ifdef __cplusplus |
||||
extern "C" |
||||
{ |
||||
#endif |
||||
|
||||
/**
|
||||
* @brief Converts the elements of the floating-point vector to Q23 vector. |
||||
* @param[in] pSrc points to the floating-point input vector |
||||
* @param[out] pDst points to the Q23 output vector |
||||
* @param[in] blockSize length of the input vector |
||||
*/ |
||||
void arm_float_to_q23(const float32_t * pSrc, q23_t * pDst, uint32_t blockSize); |
||||
|
||||
#ifdef __cplusplus |
||||
} |
||||
#endif |
Loading…
Reference in new issue