From c80da12e63b8281d7e1a241f86d8e3cf93d113be Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Wed, 9 Jul 2025 18:22:58 +0200 Subject: [PATCH 1/7] reverb: do not use the reverb mixer if reverb is disabled --- src/minidexed.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/minidexed.cpp b/src/minidexed.cpp index 51e6444..0e1c283 100644 --- a/src/minidexed.cpp +++ b/src/minidexed.cpp @@ -1409,7 +1409,6 @@ void CMiniDexed::ProcessSound (void) for (uint8_t i = 0; i < m_nToneGenerators; i++) { tg_mixer->doAddMix(i,m_OutputLevel[i]); - reverb_send_mixer->doAddMix(i,m_OutputLevel[i]); } // END TG mixing @@ -1425,6 +1424,11 @@ void CMiniDexed::ProcessSound (void) { float32_t ReverbBuffer[2][nFrames]; float32_t ReverbSendBuffer[2][nFrames]; + + for (uint8_t i = 0; i < m_nToneGenerators; i++) + { + reverb_send_mixer->doAddMix(i,m_OutputLevel[i]); + } arm_fill_f32(0.0f, ReverbBuffer[indexL], nFrames); arm_fill_f32(0.0f, ReverbBuffer[indexR], nFrames); From 2e90cb459ef180fe054d6b629616a8f6a55616b7 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Wed, 9 Jul 2025 18:24:59 +0200 Subject: [PATCH 2/7] reverb: do not fill the reverb buffers unnecessarily they will be overwritten later --- src/minidexed.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/minidexed.cpp b/src/minidexed.cpp index 0e1c283..1b5b4f6 100644 --- a/src/minidexed.cpp +++ b/src/minidexed.cpp @@ -1430,11 +1430,6 @@ void CMiniDexed::ProcessSound (void) reverb_send_mixer->doAddMix(i,m_OutputLevel[i]); } - arm_fill_f32(0.0f, ReverbBuffer[indexL], nFrames); - arm_fill_f32(0.0f, ReverbBuffer[indexR], nFrames); - arm_fill_f32(0.0f, ReverbSendBuffer[indexR], nFrames); - arm_fill_f32(0.0f, ReverbSendBuffer[indexL], nFrames); - m_ReverbSpinLock.Acquire (); reverb_send_mixer->getMix(ReverbSendBuffer[indexL], ReverbSendBuffer[indexR]); From f1dce8f6f022d80f2fadd2fd036acd2da68798af Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Wed, 9 Jul 2025 19:15:03 +0200 Subject: [PATCH 3/7] mixer: use the buffer directly, do not copy --- src/effect_mixer.hpp | 14 ++++++++++++++ src/minidexed.cpp | 22 ++++++++++++---------- 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/src/effect_mixer.hpp b/src/effect_mixer.hpp index 44184ab..78efbb4 100644 --- a/src/effect_mixer.hpp +++ b/src/effect_mixer.hpp @@ -168,6 +168,20 @@ public: arm_fill_f32(0.0f, sumbufR, buffer_length); } + void getBuffers(float32_t (*buffers[2])) + { + buffers[0] = sumbufL; + buffers[1] = sumbufR; + } + + void zeroFill() + { + if(sumbufL) + arm_fill_f32(0.0f, sumbufL, buffer_length); + if(sumbufR) + arm_fill_f32(0.0f, sumbufR, buffer_length); + } + protected: using AudioMixer::sumbufL; using AudioMixer::multiplier; diff --git a/src/minidexed.cpp b/src/minidexed.cpp index 1b5b4f6..d8e64df 100644 --- a/src/minidexed.cpp +++ b/src/minidexed.cpp @@ -1406,25 +1406,28 @@ void CMiniDexed::ProcessSound (void) if(nMasterVolume > 0.0) { + // get the mix buffer of all TGs + float32_t *SampleBuffer[2]; + tg_mixer->getBuffers(SampleBuffer); + + tg_mixer->zeroFill(); + for (uint8_t i = 0; i < m_nToneGenerators; i++) { tg_mixer->doAddMix(i,m_OutputLevel[i]); } // END TG mixing - // BEGIN create SampleBuffer for holding audio data - float32_t SampleBuffer[2][nFrames]; - // END create SampleBuffer for holding audio data - - // get the mix of all TGs - tg_mixer->getMix(SampleBuffer[indexL], SampleBuffer[indexR]); - // BEGIN adding reverb if (m_nParameter[ParameterReverbEnable]) { float32_t ReverbBuffer[2][nFrames]; - float32_t ReverbSendBuffer[2][nFrames]; - + + float32_t *ReverbSendBuffer[2]; + reverb_send_mixer->getBuffers(ReverbSendBuffer); + + reverb_send_mixer->zeroFill(); + for (uint8_t i = 0; i < m_nToneGenerators; i++) { reverb_send_mixer->doAddMix(i,m_OutputLevel[i]); @@ -1432,7 +1435,6 @@ void CMiniDexed::ProcessSound (void) m_ReverbSpinLock.Acquire (); - reverb_send_mixer->getMix(ReverbSendBuffer[indexL], ReverbSendBuffer[indexR]); reverb->doReverb(ReverbSendBuffer[indexL],ReverbSendBuffer[indexR],ReverbBuffer[indexL], ReverbBuffer[indexR],nFrames); // scale down and add left reverb buffer by reverb level From f264aae58cea7effe2f8156feee6583bb7cf35bc Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Wed, 9 Jul 2025 20:47:53 +0200 Subject: [PATCH 4/7] mixer: remove the unused and buggy 3 parameter doAddMix() It doesn't work well if multiplier[channel]==UNITY_GAIN --- src/effect_mixer.hpp | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/src/effect_mixer.hpp b/src/effect_mixer.hpp index 78efbb4..2cf3586 100644 --- a/src/effect_mixer.hpp +++ b/src/effect_mixer.hpp @@ -135,23 +135,6 @@ public: arm_add_f32(sumbufR, tmp, sumbufR, buffer_length); } - void doAddMix(uint8_t channel, float32_t* inL, float32_t* inR) - { - float32_t tmp[buffer_length]; - - assert(inL); - assert(inR); - - // left - if(multiplier[channel]!=UNITY_GAIN) - arm_scale_f32(inL,multiplier[channel],tmp,buffer_length); - arm_add_f32(sumbufL, tmp, sumbufL, buffer_length); - // right - if(multiplier[channel]!=UNITY_GAIN) - arm_scale_f32(inR,multiplier[channel],tmp,buffer_length); - arm_add_f32(sumbufR, tmp, sumbufR, buffer_length); - } - void getMix(float32_t* bufferL, float32_t* bufferR) { assert(bufferR); From ce9f82ca51ea493f12da693f9cf25351486ef31d Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Wed, 9 Jul 2025 20:52:56 +0200 Subject: [PATCH 5/7] mixer: prescale the scale parameter so one scale per element is enough --- src/effect_mixer.hpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/effect_mixer.hpp b/src/effect_mixer.hpp index 2cf3586..9cf5070 100644 --- a/src/effect_mixer.hpp +++ b/src/effect_mixer.hpp @@ -124,14 +124,10 @@ public: assert(in); // left - arm_scale_f32(in, panorama[channel][0], tmp, buffer_length); - if(multiplier[channel]!=UNITY_GAIN) - arm_scale_f32(tmp,multiplier[channel],tmp,buffer_length); + arm_scale_f32(in, panorama[channel][0] * multiplier[channel], tmp, buffer_length); arm_add_f32(sumbufL, tmp, sumbufL, buffer_length); // right - arm_scale_f32(in, panorama[channel][1], tmp, buffer_length); - if(multiplier[channel]!=UNITY_GAIN) - arm_scale_f32(tmp,multiplier[channel],tmp,buffer_length); + arm_scale_f32(in, panorama[channel][1] * multiplier[channel], tmp, buffer_length); arm_add_f32(sumbufR, tmp, sumbufR, buffer_length); } From 7c68142b6b5d8b0a7f40725fd296dab7b9ce67f7 Mon Sep 17 00:00:00 2001 From: Gergo Koteles Date: Tue, 22 Jul 2025 00:02:45 +0200 Subject: [PATCH 6/7] ProcessSound: do not optimize for special values this makes the code easier to understand and the execution more predictable --- src/minidexed.cpp | 131 ++++++++++++++++++---------------------------- 1 file changed, 52 insertions(+), 79 deletions(-) diff --git a/src/minidexed.cpp b/src/minidexed.cpp index d8e64df..acd79a2 100644 --- a/src/minidexed.cpp +++ b/src/minidexed.cpp @@ -1354,33 +1354,20 @@ void CMiniDexed::ProcessSound (void) float32_t tmp_float[nFrames*Channels]; int32_t tmp_int[nFrames*Channels]; - if(nMasterVolume > 0.0) + // Convert dual float array (8 chan) to single int16 array (8 chan) + for(uint16_t i=0; i0.0 && nMasterVolume <1.0) - { - tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i] * nMasterVolume; - } - else if(nMasterVolume == 1.0) - { - tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i]; - } - } + tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i] * nMasterVolume; } - arm_float_to_q23(tmp_float,tmp_int,nFrames*Channels); - } - else - { - arm_fill_q31(0, tmp_int, nFrames*Channels); } + arm_float_to_q23(tmp_float,tmp_int,nFrames*Channels); + // Prevent PCM510x analog mute from kicking in for (uint8_t tg = 0; tg < Channels; tg++) { @@ -1404,78 +1391,64 @@ void CMiniDexed::ProcessSound (void) float32_t tmp_float[nFrames*2]; int32_t tmp_int[nFrames*2]; - if(nMasterVolume > 0.0) - { - // get the mix buffer of all TGs - float32_t *SampleBuffer[2]; - tg_mixer->getBuffers(SampleBuffer); + // get the mix buffer of all TGs + float32_t *SampleBuffer[2]; + tg_mixer->getBuffers(SampleBuffer); - tg_mixer->zeroFill(); + tg_mixer->zeroFill(); - for (uint8_t i = 0; i < m_nToneGenerators; i++) - { - tg_mixer->doAddMix(i,m_OutputLevel[i]); - } - // END TG mixing - - // BEGIN adding reverb - if (m_nParameter[ParameterReverbEnable]) - { - float32_t ReverbBuffer[2][nFrames]; + for (uint8_t i = 0; i < m_nToneGenerators; i++) + { + tg_mixer->doAddMix(i,m_OutputLevel[i]); + } + // END TG mixing - float32_t *ReverbSendBuffer[2]; - reverb_send_mixer->getBuffers(ReverbSendBuffer); + // BEGIN adding reverb + if (m_nParameter[ParameterReverbEnable]) + { + float32_t ReverbBuffer[2][nFrames]; - reverb_send_mixer->zeroFill(); + float32_t *ReverbSendBuffer[2]; + reverb_send_mixer->getBuffers(ReverbSendBuffer); - for (uint8_t i = 0; i < m_nToneGenerators; i++) - { - reverb_send_mixer->doAddMix(i,m_OutputLevel[i]); - } + reverb_send_mixer->zeroFill(); - m_ReverbSpinLock.Acquire (); + for (uint8_t i = 0; i < m_nToneGenerators; i++) + { + reverb_send_mixer->doAddMix(i,m_OutputLevel[i]); + } - reverb->doReverb(ReverbSendBuffer[indexL],ReverbSendBuffer[indexR],ReverbBuffer[indexL], ReverbBuffer[indexR],nFrames); + m_ReverbSpinLock.Acquire (); - // scale down and add left reverb buffer by reverb level - arm_scale_f32(ReverbBuffer[indexL], reverb->get_level(), ReverbBuffer[indexL], nFrames); - arm_add_f32(SampleBuffer[indexL], ReverbBuffer[indexL], SampleBuffer[indexL], nFrames); - // scale down and add right reverb buffer by reverb level - arm_scale_f32(ReverbBuffer[indexR], reverb->get_level(), ReverbBuffer[indexR], nFrames); - arm_add_f32(SampleBuffer[indexR], ReverbBuffer[indexR], SampleBuffer[indexR], nFrames); + reverb->doReverb(ReverbSendBuffer[indexL],ReverbSendBuffer[indexR],ReverbBuffer[indexL], ReverbBuffer[indexR],nFrames); - m_ReverbSpinLock.Release (); - } - // END adding reverb + // scale down and add left reverb buffer by reverb level + arm_scale_f32(ReverbBuffer[indexL], reverb->get_level(), ReverbBuffer[indexL], nFrames); + arm_add_f32(SampleBuffer[indexL], ReverbBuffer[indexL], SampleBuffer[indexL], nFrames); + // scale down and add right reverb buffer by reverb level + arm_scale_f32(ReverbBuffer[indexR], reverb->get_level(), ReverbBuffer[indexR], nFrames); + arm_add_f32(SampleBuffer[indexR], ReverbBuffer[indexR], SampleBuffer[indexR], nFrames); - // swap stereo channels if needed prior to writing back out - if (m_bChannelsSwapped) - { - indexL=1; - indexR=0; - } + m_ReverbSpinLock.Release (); + } + // END adding reverb - // Convert dual float array (left, right) to single int16 array (left/right) - for(uint16_t i=0; i0.0 && nMasterVolume <1.0) - { - tmp_float[i*2]=SampleBuffer[indexL][i] * nMasterVolume; - tmp_float[(i*2)+1]=SampleBuffer[indexR][i] * nMasterVolume; - } - else if(nMasterVolume == 1.0) - { - tmp_float[i*2]=SampleBuffer[indexL][i]; - tmp_float[(i*2)+1]=SampleBuffer[indexR][i]; - } - } - arm_float_to_q23(tmp_float,tmp_int,nFrames*2); + // swap stereo channels if needed prior to writing back out + if (m_bChannelsSwapped) + { + indexL=1; + indexR=0; } - else + + // Convert dual float array (left, right) to single int16 array (left/right) + for(uint16_t i=0; i Date: Mon, 21 Jul 2025 23:20:48 +0200 Subject: [PATCH 7/7] ProcessSound: use arm_scale_zip_f32 --- src/Makefile | 2 +- src/arm_scale_zip_f32.c | 85 +++++++++++++++++++++++++++++++++++++++++ src/arm_scale_zip_f32.h | 22 +++++++++++ src/minidexed.cpp | 7 +--- 4 files changed, 110 insertions(+), 6 deletions(-) create mode 100644 src/arm_scale_zip_f32.c create mode 100644 src/arm_scale_zip_f32.h diff --git a/src/Makefile b/src/Makefile index b06b977..2aed327 100644 --- a/src/Makefile +++ b/src/Makefile @@ -10,7 +10,7 @@ OBJS = main.o kernel.o minidexed.o config.o userinterface.o uimenu.o \ mididevice.o midikeyboard.o serialmididevice.o pckeyboard.o \ sysexfileloader.o performanceconfig.o perftimer.o \ effect_platervbstereo.o uibuttons.o midipin.o \ - arm_float_to_q23.o \ + arm_float_to_q23.o arm_scale_zip_f32.o \ net/ftpdaemon.o net/ftpworker.o net/applemidi.o net/udpmidi.o net/mdnspublisher.o udpmididevice.o OPTIMIZE = -O3 diff --git a/src/arm_scale_zip_f32.c b/src/arm_scale_zip_f32.c new file mode 100644 index 0000000..28ff1c7 --- /dev/null +++ b/src/arm_scale_zip_f32.c @@ -0,0 +1,85 @@ +#include "arm_scale_zip_f32.h" + +/** + Scale two vectors and zip after. For floating-point data, the algorithm used is: + +
+      pDst[n] = pSrc1[n] * scale, pDst[n+1] = pSrc2[n] * scale   0 <= n < blockSize.
+  
+ + */ + +/** +* @brief Scale two floating-point vector with a scalar and zip after. +* @param[in] pSrc1 points to the input vector 1 +* @param[in] pSrc2 points to the input vector 2 +* @param[in] scale scale scalar +* @param[out] pDst points to the output vector +* @param[in] blockSize number of samples in the vector +*/ + +#if defined(ARM_MATH_NEON_EXPERIMENTAL) +void arm_scale_zip_f32( + const float32_t * pSrc1, + const float32_t * pSrc2, + float32_t scale, + float32_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; /* Loop counter */ + + f32x2x2_t res; + + /* Compute 2 outputs at a time */ + blkCnt = blockSize >> 1U; + + while (blkCnt > 0U) + { + res.val[0] = vmul_n_f32(vld1_f32(pSrc1), scale); + res.val[1] = vmul_n_f32(vld1_f32(pSrc2), scale); + vst2_f32(pDst, res); + + /* Increment pointers */ + pSrc1 += 2; + pSrc2 += 2; + pDst += 4; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize & 1; + + while (blkCnt > 0U) + { + *pDst++ = *pSrc1++ * scale; + *pDst++ = *pSrc2++ * scale; + + /* Decrement the loop counter */ + blkCnt--; + } +} +#else +void arm_scale_zip_f32( + const float32_t * pSrc1, + const float32_t * pSrc2, + float32_t scale, + float32_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; /* Loop counter */ + + blkCnt = blockSize; + + while (blkCnt > 0U) + { + *pDst++ = *pSrc1++ * scale; + *pDst++ = *pSrc2++ * scale; + + /* Decrement the loop counter */ + blkCnt--; + } +} +#endif diff --git a/src/arm_scale_zip_f32.h b/src/arm_scale_zip_f32.h new file mode 100644 index 0000000..6629b22 --- /dev/null +++ b/src/arm_scale_zip_f32.h @@ -0,0 +1,22 @@ +#pragma once + +#include "arm_math_types.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/** +* @brief Scale two floating-point vector with a scalar and zip after. +* @param[in] pSrc1 points to the input vector 1 +* @param[in] pSrc2 points to the input vector 2 +* @param[in] scale scale scalar +* @param[out] pDst points to the output vector +* @param[in] blockSize number of samples in the vector +*/ +void arm_scale_zip_f32(const float32_t * pSrc1, const float32_t * pSrc2, float32_t scale, float32_t * pDst, uint32_t blockSize); + +#ifdef __cplusplus +} +#endif diff --git a/src/minidexed.cpp b/src/minidexed.cpp index acd79a2..257ef55 100644 --- a/src/minidexed.cpp +++ b/src/minidexed.cpp @@ -30,6 +30,7 @@ #include #include #include "arm_float_to_q23.h" +#include "arm_scale_zip_f32.h" const char WLANFirmwarePath[] = "SD:firmware/"; const char WLANConfigFile[] = "SD:wpa_supplicant.conf"; @@ -1441,11 +1442,7 @@ void CMiniDexed::ProcessSound (void) } // Convert dual float array (left, right) to single int16 array (left/right) - for(uint16_t i=0; i