Use SoundFormatSigned24_32 with NEON (#852)

* use SoundFormatSigned24_32 format instead of SoundFormatSigned16

More detailed, and not much slower.

* fix ARM_MATH_NEON defines

---------

Co-authored-by: probonopd <probonopd@users.noreply.github.com>
syslog
soyer 1 day ago committed by GitHub
parent 9d8ed87aab
commit acf9e11d5f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 1
      src/Makefile
  2. 3
      src/Synth_Dexed.mk
  3. 88
      src/arm_float_to_q23.c
  4. 22
      src/arm_float_to_q23.h
  5. 19
      src/minidexed.cpp

@ -10,6 +10,7 @@ OBJS = main.o kernel.o minidexed.o config.o userinterface.o uimenu.o \
mididevice.o midikeyboard.o serialmididevice.o pckeyboard.o \
sysexfileloader.o performanceconfig.o perftimer.o \
effect_compressor.o effect_platervbstereo.o uibuttons.o midipin.o \
arm_float_to_q23.o \
net/ftpdaemon.o net/ftpworker.o net/applemidi.o net/udpmidi.o net/mdnspublisher.o udpmididevice.o
OPTIMIZE = -O3

@ -40,8 +40,9 @@ INCLUDE += -I $(CMSIS_DSP_COMPUTELIB_INCLUDE_DIR)
DEFINE += -DUSE_FX
ifeq ($(strip $(AARCH)),64)
ifeq ($(RPI), $(filter $(RPI), 3 4 5))
DEFINE += -DARM_MATH_NEON
DEFINE += -DARM_MATH_NEON_EXPERIMENTAL
DEFINE += -DHAVE_NEON
endif

@ -0,0 +1,88 @@
#include "arm_float_to_q23.h"
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
void arm_float_to_q23(const float32_t * pSrc, q23_t * pDst, uint32_t blockSize)
{
const float32_t *pIn = pSrc; /* Src pointer */
uint32_t blkCnt; /* loop counter */
float32x4_t inV;
int32x4_t cvt;
blkCnt = blockSize >> 2U;
/* Compute 4 outputs at a time.
** a second loop below computes the remaining 1 to 3 samples. */
while (blkCnt > 0U)
{
/* C = A * 8388608 */
/* Convert from float to q23 and then store the results in the destination buffer */
inV = vld1q_f32(pIn);
cvt = vcvtq_n_s32_f32(inV, 23);
/* saturate */
cvt = vminq_s32(cvt, vdupq_n_s32(0x007fffff));
cvt = vmaxq_s32(cvt, vdupq_n_s32(0xff800000));
vst1q_s32(pDst, cvt);
pDst += 4;
pIn += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize & 3;
while (blkCnt > 0U)
{
/* C = A * 8388608 */
/* Convert from float to q23 and then store the results in the destination buffer */
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24);
/* Decrement the loop counter */
blkCnt--;
}
}
#else
void arm_float_to_q23(const float32_t * pSrc, q23_t * pDst, uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
const float32_t *pIn = pSrc; /* Source pointer */
/* Loop unrolling: Compute 4 outputs at a time */
blkCnt = blockSize >> 2U;
while (blkCnt > 0U)
{
/* C = A * 8388608 */
/* convert from float to Q23 and store result in destination buffer */
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24);
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24);
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24);
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24);
/* Decrement loop counter */
blkCnt--;
}
/* Loop unrolling: Compute remaining outputs */
blkCnt = blockSize % 0x4U;
while (blkCnt > 0U)
{
/* C = A * 8388608 */
/* Convert from float to q23 and then store the results in the destination buffer */
*pDst++ = (q23_t) __SSAT((q31_t) (*pIn++ * 8388608.0f), 24);
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* #if defined(ARM_MATH_NEON_EXPERIMENTAL) */

@ -0,0 +1,22 @@
#pragma once
#include "arm_math_types.h"
typedef int32_t q23_t;
#ifdef __cplusplus
extern "C"
{
#endif
/**
* @brief Converts the elements of the floating-point vector to Q23 vector.
* @param[in] pSrc points to the floating-point input vector
* @param[out] pDst points to the Q23 output vector
* @param[in] blockSize length of the input vector
*/
void arm_float_to_q23(const float32_t * pSrc, q23_t * pDst, uint32_t blockSize);
#ifdef __cplusplus
}
#endif

@ -29,6 +29,7 @@
#include <string.h>
#include <stdio.h>
#include <assert.h>
#include "arm_float_to_q23.h"
const char WLANFirmwarePath[] = "SD:firmware/";
const char WLANConfigFile[] = "SD:wpa_supplicant.conf";
@ -359,7 +360,7 @@ bool CMiniDexed::Initialize (void)
return false;
}
m_pSoundDevice->SetWriteFormat (SoundFormatSigned16, Channels);
m_pSoundDevice->SetWriteFormat (SoundFormatSigned24_32, Channels);
m_nQueueSizeFrames = m_pSoundDevice->GetQueueSizeFrames ();
@ -1260,8 +1261,8 @@ void CMiniDexed::ProcessSound (void)
m_pTG[0]->getSamples (SampleBuffer, nFrames);
// Convert single float array (mono) to int16 array
int16_t tmp_int[nFrames];
arm_float_to_q15(SampleBuffer,tmp_int,nFrames);
int32_t tmp_int[nFrames];
arm_float_to_q23(SampleBuffer,tmp_int,nFrames);
if (m_pSoundDevice->Write (tmp_int, sizeof(tmp_int)) != (int) sizeof(tmp_int))
{
@ -1328,7 +1329,7 @@ void CMiniDexed::ProcessSound (void)
// Note: one TG per audio channel; output=mono; no processing.
const int Channels = 8; // One TG per channel
float32_t tmp_float[nFrames*Channels];
int16_t tmp_int[nFrames*Channels];
int32_t tmp_int[nFrames*Channels];
if(nMasterVolume > 0.0)
{
@ -1350,11 +1351,11 @@ void CMiniDexed::ProcessSound (void)
}
}
}
arm_float_to_q15(tmp_float,tmp_int,nFrames*Channels);
arm_float_to_q23(tmp_float,tmp_int,nFrames*Channels);
}
else
{
arm_fill_q15(0, tmp_int, nFrames*Channels);
arm_fill_q31(0, tmp_int, nFrames*Channels);
}
// Prevent PCM510x analog mute from kicking in
@ -1378,7 +1379,7 @@ void CMiniDexed::ProcessSound (void)
// BEGIN TG mixing
float32_t tmp_float[nFrames*2];
int16_t tmp_int[nFrames*2];
int32_t tmp_int[nFrames*2];
if(nMasterVolume > 0.0)
{
@ -1444,11 +1445,11 @@ void CMiniDexed::ProcessSound (void)
tmp_float[(i*2)+1]=SampleBuffer[indexR][i];
}
}
arm_float_to_q15(tmp_float,tmp_int,nFrames*2);
arm_float_to_q23(tmp_float,tmp_int,nFrames*2);
}
else
{
arm_fill_q15(0, tmp_int, nFrames * 2);
arm_fill_q31(0, tmp_int, nFrames * 2);
}
// Prevent PCM510x analog mute from kicking in

Loading…
Cancel
Save