diff --git a/src/Makefile b/src/Makefile index b06b977..2aed327 100644 --- a/src/Makefile +++ b/src/Makefile @@ -10,7 +10,7 @@ OBJS = main.o kernel.o minidexed.o config.o userinterface.o uimenu.o \ mididevice.o midikeyboard.o serialmididevice.o pckeyboard.o \ sysexfileloader.o performanceconfig.o perftimer.o \ effect_platervbstereo.o uibuttons.o midipin.o \ - arm_float_to_q23.o \ + arm_float_to_q23.o arm_scale_zip_f32.o \ net/ftpdaemon.o net/ftpworker.o net/applemidi.o net/udpmidi.o net/mdnspublisher.o udpmididevice.o OPTIMIZE = -O3 diff --git a/src/arm_scale_zip_f32.c b/src/arm_scale_zip_f32.c new file mode 100644 index 0000000..28ff1c7 --- /dev/null +++ b/src/arm_scale_zip_f32.c @@ -0,0 +1,85 @@ +#include "arm_scale_zip_f32.h" + +/** + Scale two vectors and zip after. For floating-point data, the algorithm used is: + +
+      pDst[n] = pSrc1[n] * scale, pDst[n+1] = pSrc2[n] * scale   0 <= n < blockSize.
+  
+ + */ + +/** +* @brief Scale two floating-point vector with a scalar and zip after. +* @param[in] pSrc1 points to the input vector 1 +* @param[in] pSrc2 points to the input vector 2 +* @param[in] scale scale scalar +* @param[out] pDst points to the output vector +* @param[in] blockSize number of samples in the vector +*/ + +#if defined(ARM_MATH_NEON_EXPERIMENTAL) +void arm_scale_zip_f32( + const float32_t * pSrc1, + const float32_t * pSrc2, + float32_t scale, + float32_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; /* Loop counter */ + + f32x2x2_t res; + + /* Compute 2 outputs at a time */ + blkCnt = blockSize >> 1U; + + while (blkCnt > 0U) + { + res.val[0] = vmul_n_f32(vld1_f32(pSrc1), scale); + res.val[1] = vmul_n_f32(vld1_f32(pSrc2), scale); + vst2_f32(pDst, res); + + /* Increment pointers */ + pSrc1 += 2; + pSrc2 += 2; + pDst += 4; + + /* Decrement the loop counter */ + blkCnt--; + } + + /* If the blockSize is not a multiple of 4, compute any remaining output samples here. + ** No loop unrolling is used. */ + blkCnt = blockSize & 1; + + while (blkCnt > 0U) + { + *pDst++ = *pSrc1++ * scale; + *pDst++ = *pSrc2++ * scale; + + /* Decrement the loop counter */ + blkCnt--; + } +} +#else +void arm_scale_zip_f32( + const float32_t * pSrc1, + const float32_t * pSrc2, + float32_t scale, + float32_t * pDst, + uint32_t blockSize) +{ + uint32_t blkCnt; /* Loop counter */ + + blkCnt = blockSize; + + while (blkCnt > 0U) + { + *pDst++ = *pSrc1++ * scale; + *pDst++ = *pSrc2++ * scale; + + /* Decrement the loop counter */ + blkCnt--; + } +} +#endif diff --git a/src/arm_scale_zip_f32.h b/src/arm_scale_zip_f32.h new file mode 100644 index 0000000..6629b22 --- /dev/null +++ b/src/arm_scale_zip_f32.h @@ -0,0 +1,22 @@ +#pragma once + +#include "arm_math_types.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/** +* @brief Scale two floating-point vector with a scalar and zip after. +* @param[in] pSrc1 points to the input vector 1 +* @param[in] pSrc2 points to the input vector 2 +* @param[in] scale scale scalar +* @param[out] pDst points to the output vector +* @param[in] blockSize number of samples in the vector +*/ +void arm_scale_zip_f32(const float32_t * pSrc1, const float32_t * pSrc2, float32_t scale, float32_t * pDst, uint32_t blockSize); + +#ifdef __cplusplus +} +#endif diff --git a/src/effect_mixer.hpp b/src/effect_mixer.hpp index 44184ab..9cf5070 100644 --- a/src/effect_mixer.hpp +++ b/src/effect_mixer.hpp @@ -124,31 +124,10 @@ public: assert(in); // left - arm_scale_f32(in, panorama[channel][0], tmp, buffer_length); - if(multiplier[channel]!=UNITY_GAIN) - arm_scale_f32(tmp,multiplier[channel],tmp,buffer_length); - arm_add_f32(sumbufL, tmp, sumbufL, buffer_length); - // right - arm_scale_f32(in, panorama[channel][1], tmp, buffer_length); - if(multiplier[channel]!=UNITY_GAIN) - arm_scale_f32(tmp,multiplier[channel],tmp,buffer_length); - arm_add_f32(sumbufR, tmp, sumbufR, buffer_length); - } - - void doAddMix(uint8_t channel, float32_t* inL, float32_t* inR) - { - float32_t tmp[buffer_length]; - - assert(inL); - assert(inR); - - // left - if(multiplier[channel]!=UNITY_GAIN) - arm_scale_f32(inL,multiplier[channel],tmp,buffer_length); + arm_scale_f32(in, panorama[channel][0] * multiplier[channel], tmp, buffer_length); arm_add_f32(sumbufL, tmp, sumbufL, buffer_length); // right - if(multiplier[channel]!=UNITY_GAIN) - arm_scale_f32(inR,multiplier[channel],tmp,buffer_length); + arm_scale_f32(in, panorama[channel][1] * multiplier[channel], tmp, buffer_length); arm_add_f32(sumbufR, tmp, sumbufR, buffer_length); } @@ -168,6 +147,20 @@ public: arm_fill_f32(0.0f, sumbufR, buffer_length); } + void getBuffers(float32_t (*buffers[2])) + { + buffers[0] = sumbufL; + buffers[1] = sumbufR; + } + + void zeroFill() + { + if(sumbufL) + arm_fill_f32(0.0f, sumbufL, buffer_length); + if(sumbufR) + arm_fill_f32(0.0f, sumbufR, buffer_length); + } + protected: using AudioMixer::sumbufL; using AudioMixer::multiplier; diff --git a/src/minidexed.cpp b/src/minidexed.cpp index 51e6444..257ef55 100644 --- a/src/minidexed.cpp +++ b/src/minidexed.cpp @@ -30,6 +30,7 @@ #include #include #include "arm_float_to_q23.h" +#include "arm_scale_zip_f32.h" const char WLANFirmwarePath[] = "SD:firmware/"; const char WLANConfigFile[] = "SD:wpa_supplicant.conf"; @@ -1354,33 +1355,20 @@ void CMiniDexed::ProcessSound (void) float32_t tmp_float[nFrames*Channels]; int32_t tmp_int[nFrames*Channels]; - if(nMasterVolume > 0.0) + // Convert dual float array (8 chan) to single int16 array (8 chan) + for(uint16_t i=0; i0.0 && nMasterVolume <1.0) - { - tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i] * nMasterVolume; - } - else if(nMasterVolume == 1.0) - { - tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i]; - } - } + tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i] * nMasterVolume; } - arm_float_to_q23(tmp_float,tmp_int,nFrames*Channels); - } - else - { - arm_fill_q31(0, tmp_int, nFrames*Channels); } + arm_float_to_q23(tmp_float,tmp_int,nFrames*Channels); + // Prevent PCM510x analog mute from kicking in for (uint8_t tg = 0; tg < Channels; tg++) { @@ -1404,77 +1392,60 @@ void CMiniDexed::ProcessSound (void) float32_t tmp_float[nFrames*2]; int32_t tmp_int[nFrames*2]; - if(nMasterVolume > 0.0) - { - for (uint8_t i = 0; i < m_nToneGenerators; i++) - { - tg_mixer->doAddMix(i,m_OutputLevel[i]); - reverb_send_mixer->doAddMix(i,m_OutputLevel[i]); - } - // END TG mixing + // get the mix buffer of all TGs + float32_t *SampleBuffer[2]; + tg_mixer->getBuffers(SampleBuffer); - // BEGIN create SampleBuffer for holding audio data - float32_t SampleBuffer[2][nFrames]; - // END create SampleBuffer for holding audio data + tg_mixer->zeroFill(); - // get the mix of all TGs - tg_mixer->getMix(SampleBuffer[indexL], SampleBuffer[indexR]); + for (uint8_t i = 0; i < m_nToneGenerators; i++) + { + tg_mixer->doAddMix(i,m_OutputLevel[i]); + } + // END TG mixing - // BEGIN adding reverb - if (m_nParameter[ParameterReverbEnable]) - { - float32_t ReverbBuffer[2][nFrames]; - float32_t ReverbSendBuffer[2][nFrames]; + // BEGIN adding reverb + if (m_nParameter[ParameterReverbEnable]) + { + float32_t ReverbBuffer[2][nFrames]; - arm_fill_f32(0.0f, ReverbBuffer[indexL], nFrames); - arm_fill_f32(0.0f, ReverbBuffer[indexR], nFrames); - arm_fill_f32(0.0f, ReverbSendBuffer[indexR], nFrames); - arm_fill_f32(0.0f, ReverbSendBuffer[indexL], nFrames); + float32_t *ReverbSendBuffer[2]; + reverb_send_mixer->getBuffers(ReverbSendBuffer); - m_ReverbSpinLock.Acquire (); + reverb_send_mixer->zeroFill(); - reverb_send_mixer->getMix(ReverbSendBuffer[indexL], ReverbSendBuffer[indexR]); - reverb->doReverb(ReverbSendBuffer[indexL],ReverbSendBuffer[indexR],ReverbBuffer[indexL], ReverbBuffer[indexR],nFrames); + for (uint8_t i = 0; i < m_nToneGenerators; i++) + { + reverb_send_mixer->doAddMix(i,m_OutputLevel[i]); + } - // scale down and add left reverb buffer by reverb level - arm_scale_f32(ReverbBuffer[indexL], reverb->get_level(), ReverbBuffer[indexL], nFrames); - arm_add_f32(SampleBuffer[indexL], ReverbBuffer[indexL], SampleBuffer[indexL], nFrames); - // scale down and add right reverb buffer by reverb level - arm_scale_f32(ReverbBuffer[indexR], reverb->get_level(), ReverbBuffer[indexR], nFrames); - arm_add_f32(SampleBuffer[indexR], ReverbBuffer[indexR], SampleBuffer[indexR], nFrames); + m_ReverbSpinLock.Acquire (); - m_ReverbSpinLock.Release (); - } - // END adding reverb + reverb->doReverb(ReverbSendBuffer[indexL],ReverbSendBuffer[indexR],ReverbBuffer[indexL], ReverbBuffer[indexR],nFrames); - // swap stereo channels if needed prior to writing back out - if (m_bChannelsSwapped) - { - indexL=1; - indexR=0; - } + // scale down and add left reverb buffer by reverb level + arm_scale_f32(ReverbBuffer[indexL], reverb->get_level(), ReverbBuffer[indexL], nFrames); + arm_add_f32(SampleBuffer[indexL], ReverbBuffer[indexL], SampleBuffer[indexL], nFrames); + // scale down and add right reverb buffer by reverb level + arm_scale_f32(ReverbBuffer[indexR], reverb->get_level(), ReverbBuffer[indexR], nFrames); + arm_add_f32(SampleBuffer[indexR], ReverbBuffer[indexR], SampleBuffer[indexR], nFrames); - // Convert dual float array (left, right) to single int16 array (left/right) - for(uint16_t i=0; i0.0 && nMasterVolume <1.0) - { - tmp_float[i*2]=SampleBuffer[indexL][i] * nMasterVolume; - tmp_float[(i*2)+1]=SampleBuffer[indexR][i] * nMasterVolume; - } - else if(nMasterVolume == 1.0) - { - tmp_float[i*2]=SampleBuffer[indexL][i]; - tmp_float[(i*2)+1]=SampleBuffer[indexR][i]; - } - } - arm_float_to_q23(tmp_float,tmp_int,nFrames*2); + m_ReverbSpinLock.Release (); } - else + // END adding reverb + + // swap stereo channels if needed prior to writing back out + if (m_bChannelsSwapped) { - arm_fill_q31(0, tmp_int, nFrames * 2); + indexL=1; + indexR=0; } + // Convert dual float array (left, right) to single int16 array (left/right) + arm_scale_zip_f32(SampleBuffer[indexL], SampleBuffer[indexR], nMasterVolume, tmp_float, nFrames); + + arm_float_to_q23(tmp_float,tmp_int,nFrames*2); + // Prevent PCM510x analog mute from kicking in if (tmp_int[nFrames * 2 - 1] == 0) {