Merge pull request #952 from soyersoyer/reverbspeed

Mixing/reverb speed improvements
3 days ago · 1fa9e56e3b
parent 81e66a788d d5ddae6ac0
commit 1fa9e56e3b
5 changed files with 174 additions and 103 deletions
--- a/src/Makefile
+++ b/src/Makefile
@ -10,7 +10,7 @@ OBJS = main.o kernel.o minidexed.o config.o userinterface.o uimenu.o \
       mididevice.o midikeyboard.o serialmididevice.o pckeyboard.o \
       sysexfileloader.o performanceconfig.o perftimer.o \
       effect_platervbstereo.o uibuttons.o midipin.o \
-       arm_float_to_q23.o \
+       arm_float_to_q23.o arm_scale_zip_f32.o \
       net/ftpdaemon.o net/ftpworker.o net/applemidi.o net/udpmidi.o net/mdnspublisher.o udpmididevice.o
 OPTIMIZE = -O3
--- a/src/arm_scale_zip_f32.c
+++ b/src/arm_scale_zip_f32.c
@ -0,0 +1,85 @@
 #include "arm_scale_zip_f32.h"
 /**
  Scale two vectors and zip after.  For floating-point data, the algorithm used is:
  <pre>
      pDst[n] = pSrc1[n] * scale, pDst[n+1] = pSrc2[n] * scale   0 <= n < blockSize.
  </pre>
 */
 /**
 * @brief Scale two floating-point vector with a scalar and zip after.
 * @param[in]  pSrc1      points to the input vector 1
 * @param[in]  pSrc2      points to the input vector 2
 * @param[in]  scale      scale scalar
 * @param[out] pDst       points to the output vector
 * @param[in]  blockSize  number of samples in the vector
 */
 #if defined(ARM_MATH_NEON_EXPERIMENTAL)
 void arm_scale_zip_f32(
  const float32_t * pSrc1,
  const float32_t * pSrc2,
        float32_t scale,
        float32_t * pDst,
        uint32_t blockSize)
 {
    uint32_t blkCnt;                               /* Loop counter */
    f32x2x2_t res;
    /* Compute 2 outputs at a time */
    blkCnt = blockSize >> 1U;
    while (blkCnt > 0U)
    {
        res.val[0] = vmul_n_f32(vld1_f32(pSrc1), scale);
        res.val[1] = vmul_n_f32(vld1_f32(pSrc2), scale);
        vst2_f32(pDst, res);
        /* Increment pointers */
        pSrc1 += 2;
        pSrc2 += 2;
        pDst += 4;
        /* Decrement the loop counter */
        blkCnt--;
    }
    /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
    ** No loop unrolling is used. */
    blkCnt = blockSize & 1;
    while (blkCnt > 0U)
    {
        *pDst++ = *pSrc1++ * scale;
        *pDst++ = *pSrc2++ * scale;
        /* Decrement the loop counter */
        blkCnt--;
    }
 }
 #else
 void arm_scale_zip_f32(
  const float32_t * pSrc1,
  const float32_t * pSrc2,
        float32_t scale,
        float32_t * pDst,
        uint32_t blockSize)
 {
  uint32_t blkCnt;                               /* Loop counter */
  blkCnt = blockSize;
  while (blkCnt > 0U)
  {
      *pDst++ = *pSrc1++ * scale;
      *pDst++ = *pSrc2++ * scale;
      /* Decrement the loop counter */
      blkCnt--;
  }
 }
 #endif
--- a/src/arm_scale_zip_f32.h
+++ b/src/arm_scale_zip_f32.h
@ -0,0 +1,22 @@
 #pragma once
 #include "arm_math_types.h"
 #ifdef __cplusplus
 extern "C"
 {
 #endif
 /**
 * @brief Scale two floating-point vector with a scalar and zip after.
 * @param[in]  pSrc1      points to the input vector 1
 * @param[in]  pSrc2      points to the input vector 2
 * @param[in]  scale      scale scalar
 * @param[out] pDst       points to the output vector
 * @param[in]  blockSize  number of samples in the vector
 */
 void arm_scale_zip_f32(const float32_t * pSrc1, const float32_t * pSrc2, float32_t scale, float32_t * pDst, uint32_t blockSize);
 #ifdef __cplusplus
 }
 #endif
--- a/src/effect_mixer.hpp
+++ b/src/effect_mixer.hpp
@ -124,31 +124,10 @@ public:
 		assert(in);
 		// left
-		arm_scale_f32(in, panorama[channel][0], tmp, buffer_length);
+		arm_scale_f32(in, panorama[channel][0] * multiplier[channel], tmp, buffer_length);
 		if(multiplier[channel]!=UNITY_GAIN)
 			arm_scale_f32(tmp,multiplier[channel],tmp,buffer_length);
 		arm_add_f32(sumbufL, tmp, sumbufL, buffer_length);
 		// right
 		arm_scale_f32(in, panorama[channel][1], tmp, buffer_length);
 		if(multiplier[channel]!=UNITY_GAIN)
 			arm_scale_f32(tmp,multiplier[channel],tmp,buffer_length);
 		arm_add_f32(sumbufR, tmp, sumbufR, buffer_length);
 	}
 	void doAddMix(uint8_t channel, float32_t* inL, float32_t* inR)
 	{
 		float32_t tmp[buffer_length];
 		assert(inL);
 		assert(inR);
 		// left
 		if(multiplier[channel]!=UNITY_GAIN)
 			arm_scale_f32(inL,multiplier[channel],tmp,buffer_length);
 		arm_add_f32(sumbufL, tmp, sumbufL, buffer_length);
 		// right
-		if(multiplier[channel]!=UNITY_GAIN)
+		arm_scale_f32(in, panorama[channel][1] * multiplier[channel], tmp, buffer_length);
 			arm_scale_f32(inR,multiplier[channel],tmp,buffer_length);
 		arm_add_f32(sumbufR, tmp, sumbufR, buffer_length);
 	}
@ -168,6 +147,20 @@ public:
 			arm_fill_f32(0.0f, sumbufR, buffer_length);
 	}
 	void getBuffers(float32_t (*buffers[2]))
 	{
 		buffers[0] = sumbufL;
 		buffers[1] = sumbufR;
 	}
 	void zeroFill()
 	{
 		if(sumbufL)
 			arm_fill_f32(0.0f, sumbufL, buffer_length);
 		if(sumbufR)
 			arm_fill_f32(0.0f, sumbufR, buffer_length);
 	}
 protected:
 	using AudioMixer<NN>::sumbufL;
 	using AudioMixer<NN>::multiplier;
--- a/src/minidexed.cpp
+++ b/src/minidexed.cpp
@ -30,6 +30,7 @@
 #include <stdio.h>
 #include <assert.h>
 #include "arm_float_to_q23.h"
 #include "arm_scale_zip_f32.h"
 const char WLANFirmwarePath[] = "SD:firmware/";
 const char WLANConfigFile[]   = "SD:wpa_supplicant.conf";
@ -1354,33 +1355,20 @@ void CMiniDexed::ProcessSound (void)
 			float32_t tmp_float[nFrames*Channels];
 			int32_t tmp_int[nFrames*Channels];
-			if(nMasterVolume > 0.0)
+			// Convert dual float array (8 chan) to single int16 array (8 chan)
 			for(uint16_t i=0; i<nFrames;i++)
 			{
-				// Convert dual float array (8 chan) to single int16 array (8 chan)
+				// TGs will alternate on L/R channels for each output
-				for(uint16_t i=0; i<nFrames;i++)
+				// reading directly from the TG OutputLevel buffer with
 				// no additional processing.
 				for (uint8_t tg = 0; tg < Channels; tg++)
 				{
-					// TGs will alternate on L/R channels for each output
+					tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i] * nMasterVolume;
 					// reading directly from the TG OutputLevel buffer with
 					// no additional processing.
 					for (uint8_t tg = 0; tg < Channels; tg++)
 					{
 						if(nMasterVolume >0.0 && nMasterVolume <1.0)
 						{
 							tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i] * nMasterVolume;
 						}
 						else if(nMasterVolume == 1.0)
 						{
 							tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i];
 						}
 					}
 				}
 				arm_float_to_q23(tmp_float,tmp_int,nFrames*Channels);
 			}
 			else
 			{
 				arm_fill_q31(0, tmp_int, nFrames*Channels);
 			}
 			arm_float_to_q23(tmp_float,tmp_int,nFrames*Channels);
 			// Prevent PCM510x analog mute from kicking in
 			for (uint8_t tg = 0; tg < Channels; tg++) 
 			{
@ -1404,77 +1392,60 @@ void CMiniDexed::ProcessSound (void)
 			float32_t tmp_float[nFrames*2];
 			int32_t tmp_int[nFrames*2];
-			if(nMasterVolume > 0.0)
+			// get the mix buffer of all TGs
-			{
+			float32_t *SampleBuffer[2];
-				for (uint8_t i = 0; i < m_nToneGenerators; i++)
+			tg_mixer->getBuffers(SampleBuffer);
 				{
 					tg_mixer->doAddMix(i,m_OutputLevel[i]);
 					reverb_send_mixer->doAddMix(i,m_OutputLevel[i]);
 				}
 				// END TG mixing
-				// BEGIN create SampleBuffer for holding audio data
+			tg_mixer->zeroFill();
 				float32_t SampleBuffer[2][nFrames];
 				// END create SampleBuffer for holding audio data
-				// get the mix of all TGs
+			for (uint8_t i = 0; i < m_nToneGenerators; i++)
-				tg_mixer->getMix(SampleBuffer[indexL], SampleBuffer[indexR]);
+			{
 				tg_mixer->doAddMix(i,m_OutputLevel[i]);
 			}
 			// END TG mixing
-				// BEGIN adding reverb
+			// BEGIN adding reverb
-				if (m_nParameter[ParameterReverbEnable])
+			if (m_nParameter[ParameterReverbEnable])
-				{
+			{
-					float32_t ReverbBuffer[2][nFrames];
+				float32_t ReverbBuffer[2][nFrames];
 					float32_t ReverbSendBuffer[2][nFrames];
-					arm_fill_f32(0.0f, ReverbBuffer[indexL], nFrames);
+				float32_t *ReverbSendBuffer[2];
-					arm_fill_f32(0.0f, ReverbBuffer[indexR], nFrames);
+				reverb_send_mixer->getBuffers(ReverbSendBuffer);
 					arm_fill_f32(0.0f, ReverbSendBuffer[indexR], nFrames);
 					arm_fill_f32(0.0f, ReverbSendBuffer[indexL], nFrames);
-					m_ReverbSpinLock.Acquire ();
+				reverb_send_mixer->zeroFill();
-					reverb_send_mixer->getMix(ReverbSendBuffer[indexL], ReverbSendBuffer[indexR]);
+				for (uint8_t i = 0; i < m_nToneGenerators; i++)
-					reverb->doReverb(ReverbSendBuffer[indexL],ReverbSendBuffer[indexR],ReverbBuffer[indexL], ReverbBuffer[indexR],nFrames);
+				{
 					reverb_send_mixer->doAddMix(i,m_OutputLevel[i]);
 				}
-					// scale down and add left reverb buffer by reverb level 
+				m_ReverbSpinLock.Acquire ();
 					arm_scale_f32(ReverbBuffer[indexL], reverb->get_level(), ReverbBuffer[indexL], nFrames);
 					arm_add_f32(SampleBuffer[indexL], ReverbBuffer[indexL], SampleBuffer[indexL], nFrames);
 					// scale down and add right reverb buffer by reverb level 
 					arm_scale_f32(ReverbBuffer[indexR], reverb->get_level(), ReverbBuffer[indexR], nFrames);
 					arm_add_f32(SampleBuffer[indexR], ReverbBuffer[indexR], SampleBuffer[indexR], nFrames);
-					m_ReverbSpinLock.Release ();
+				reverb->doReverb(ReverbSendBuffer[indexL],ReverbSendBuffer[indexR],ReverbBuffer[indexL], ReverbBuffer[indexR],nFrames);
 				}
 				// END adding reverb
-				// swap stereo channels if needed prior to writing back out
+				// scale down and add left reverb buffer by reverb level 
-				if (m_bChannelsSwapped)
+				arm_scale_f32(ReverbBuffer[indexL], reverb->get_level(), ReverbBuffer[indexL], nFrames);
-				{
+				arm_add_f32(SampleBuffer[indexL], ReverbBuffer[indexL], SampleBuffer[indexL], nFrames);
-					indexL=1;
+				// scale down and add right reverb buffer by reverb level 
-					indexR=0;
+				arm_scale_f32(ReverbBuffer[indexR], reverb->get_level(), ReverbBuffer[indexR], nFrames);
-				}
+				arm_add_f32(SampleBuffer[indexR], ReverbBuffer[indexR], SampleBuffer[indexR], nFrames);
-				// Convert dual float array (left, right) to single int16 array (left/right)
+				m_ReverbSpinLock.Release ();
 				for(uint16_t i=0; i<nFrames;i++)
 				{
 					if(nMasterVolume >0.0 && nMasterVolume <1.0)
 					{
 						tmp_float[i*2]=SampleBuffer[indexL][i] * nMasterVolume;
 						tmp_float[(i*2)+1]=SampleBuffer[indexR][i] * nMasterVolume;
 					}
 					else if(nMasterVolume == 1.0)
 					{
 						tmp_float[i*2]=SampleBuffer[indexL][i];
 						tmp_float[(i*2)+1]=SampleBuffer[indexR][i];
 					}
 				}
 				arm_float_to_q23(tmp_float,tmp_int,nFrames*2);
 			}
-			else
+			// END adding reverb
 			// swap stereo channels if needed prior to writing back out
 			if (m_bChannelsSwapped)
 			{
-				arm_fill_q31(0, tmp_int, nFrames * 2);
+				indexL=1;
 				indexR=0;
 			}
 			// Convert dual float array (left, right) to single int16 array (left/right)
 			arm_scale_zip_f32(SampleBuffer[indexL], SampleBuffer[indexR], nMasterVolume, tmp_float, nFrames);
 			arm_float_to_q23(tmp_float,tmp_int,nFrames*2);
 			// Prevent PCM510x analog mute from kicking in
 			if (tmp_int[nFrames * 2 - 1] == 0)
 			{