diff --git a/src/Makefile b/src/Makefile
index b06b977..2aed327 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -10,7 +10,7 @@ OBJS = main.o kernel.o minidexed.o config.o userinterface.o uimenu.o \
mididevice.o midikeyboard.o serialmididevice.o pckeyboard.o \
sysexfileloader.o performanceconfig.o perftimer.o \
effect_platervbstereo.o uibuttons.o midipin.o \
- arm_float_to_q23.o \
+ arm_float_to_q23.o arm_scale_zip_f32.o \
net/ftpdaemon.o net/ftpworker.o net/applemidi.o net/udpmidi.o net/mdnspublisher.o udpmididevice.o
OPTIMIZE = -O3
diff --git a/src/arm_scale_zip_f32.c b/src/arm_scale_zip_f32.c
new file mode 100644
index 0000000..28ff1c7
--- /dev/null
+++ b/src/arm_scale_zip_f32.c
@@ -0,0 +1,85 @@
+#include "arm_scale_zip_f32.h"
+
+/**
+ Scale two vectors and zip after. For floating-point data, the algorithm used is:
+
+
+ pDst[n] = pSrc1[n] * scale, pDst[n+1] = pSrc2[n] * scale 0 <= n < blockSize.
+
+
+ */
+
+/**
+* @brief Scale two floating-point vector with a scalar and zip after.
+* @param[in] pSrc1 points to the input vector 1
+* @param[in] pSrc2 points to the input vector 2
+* @param[in] scale scale scalar
+* @param[out] pDst points to the output vector
+* @param[in] blockSize number of samples in the vector
+*/
+
+#if defined(ARM_MATH_NEON_EXPERIMENTAL)
+void arm_scale_zip_f32(
+ const float32_t * pSrc1,
+ const float32_t * pSrc2,
+ float32_t scale,
+ float32_t * pDst,
+ uint32_t blockSize)
+{
+ uint32_t blkCnt; /* Loop counter */
+
+ f32x2x2_t res;
+
+ /* Compute 2 outputs at a time */
+ blkCnt = blockSize >> 1U;
+
+ while (blkCnt > 0U)
+ {
+ res.val[0] = vmul_n_f32(vld1_f32(pSrc1), scale);
+ res.val[1] = vmul_n_f32(vld1_f32(pSrc2), scale);
+ vst2_f32(pDst, res);
+
+ /* Increment pointers */
+ pSrc1 += 2;
+ pSrc2 += 2;
+ pDst += 4;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+
+ /* If the blockSize is not a multiple of 4, compute any remaining output samples here.
+ ** No loop unrolling is used. */
+ blkCnt = blockSize & 1;
+
+ while (blkCnt > 0U)
+ {
+ *pDst++ = *pSrc1++ * scale;
+ *pDst++ = *pSrc2++ * scale;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+}
+#else
+void arm_scale_zip_f32(
+ const float32_t * pSrc1,
+ const float32_t * pSrc2,
+ float32_t scale,
+ float32_t * pDst,
+ uint32_t blockSize)
+{
+ uint32_t blkCnt; /* Loop counter */
+
+ blkCnt = blockSize;
+
+ while (blkCnt > 0U)
+ {
+ *pDst++ = *pSrc1++ * scale;
+ *pDst++ = *pSrc2++ * scale;
+
+ /* Decrement the loop counter */
+ blkCnt--;
+ }
+}
+#endif
diff --git a/src/arm_scale_zip_f32.h b/src/arm_scale_zip_f32.h
new file mode 100644
index 0000000..6629b22
--- /dev/null
+++ b/src/arm_scale_zip_f32.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include "arm_math_types.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/**
+* @brief Scale two floating-point vector with a scalar and zip after.
+* @param[in] pSrc1 points to the input vector 1
+* @param[in] pSrc2 points to the input vector 2
+* @param[in] scale scale scalar
+* @param[out] pDst points to the output vector
+* @param[in] blockSize number of samples in the vector
+*/
+void arm_scale_zip_f32(const float32_t * pSrc1, const float32_t * pSrc2, float32_t scale, float32_t * pDst, uint32_t blockSize);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/effect_mixer.hpp b/src/effect_mixer.hpp
index 44184ab..9cf5070 100644
--- a/src/effect_mixer.hpp
+++ b/src/effect_mixer.hpp
@@ -124,31 +124,10 @@ public:
assert(in);
// left
- arm_scale_f32(in, panorama[channel][0], tmp, buffer_length);
- if(multiplier[channel]!=UNITY_GAIN)
- arm_scale_f32(tmp,multiplier[channel],tmp,buffer_length);
- arm_add_f32(sumbufL, tmp, sumbufL, buffer_length);
- // right
- arm_scale_f32(in, panorama[channel][1], tmp, buffer_length);
- if(multiplier[channel]!=UNITY_GAIN)
- arm_scale_f32(tmp,multiplier[channel],tmp,buffer_length);
- arm_add_f32(sumbufR, tmp, sumbufR, buffer_length);
- }
-
- void doAddMix(uint8_t channel, float32_t* inL, float32_t* inR)
- {
- float32_t tmp[buffer_length];
-
- assert(inL);
- assert(inR);
-
- // left
- if(multiplier[channel]!=UNITY_GAIN)
- arm_scale_f32(inL,multiplier[channel],tmp,buffer_length);
+ arm_scale_f32(in, panorama[channel][0] * multiplier[channel], tmp, buffer_length);
arm_add_f32(sumbufL, tmp, sumbufL, buffer_length);
// right
- if(multiplier[channel]!=UNITY_GAIN)
- arm_scale_f32(inR,multiplier[channel],tmp,buffer_length);
+ arm_scale_f32(in, panorama[channel][1] * multiplier[channel], tmp, buffer_length);
arm_add_f32(sumbufR, tmp, sumbufR, buffer_length);
}
@@ -168,6 +147,20 @@ public:
arm_fill_f32(0.0f, sumbufR, buffer_length);
}
+ void getBuffers(float32_t (*buffers[2]))
+ {
+ buffers[0] = sumbufL;
+ buffers[1] = sumbufR;
+ }
+
+ void zeroFill()
+ {
+ if(sumbufL)
+ arm_fill_f32(0.0f, sumbufL, buffer_length);
+ if(sumbufR)
+ arm_fill_f32(0.0f, sumbufR, buffer_length);
+ }
+
protected:
using AudioMixer::sumbufL;
using AudioMixer::multiplier;
diff --git a/src/minidexed.cpp b/src/minidexed.cpp
index 51e6444..257ef55 100644
--- a/src/minidexed.cpp
+++ b/src/minidexed.cpp
@@ -30,6 +30,7 @@
#include
#include
#include "arm_float_to_q23.h"
+#include "arm_scale_zip_f32.h"
const char WLANFirmwarePath[] = "SD:firmware/";
const char WLANConfigFile[] = "SD:wpa_supplicant.conf";
@@ -1354,33 +1355,20 @@ void CMiniDexed::ProcessSound (void)
float32_t tmp_float[nFrames*Channels];
int32_t tmp_int[nFrames*Channels];
- if(nMasterVolume > 0.0)
+ // Convert dual float array (8 chan) to single int16 array (8 chan)
+ for(uint16_t i=0; i0.0 && nMasterVolume <1.0)
- {
- tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i] * nMasterVolume;
- }
- else if(nMasterVolume == 1.0)
- {
- tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i];
- }
- }
+ tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i] * nMasterVolume;
}
- arm_float_to_q23(tmp_float,tmp_int,nFrames*Channels);
- }
- else
- {
- arm_fill_q31(0, tmp_int, nFrames*Channels);
}
+ arm_float_to_q23(tmp_float,tmp_int,nFrames*Channels);
+
// Prevent PCM510x analog mute from kicking in
for (uint8_t tg = 0; tg < Channels; tg++)
{
@@ -1404,77 +1392,60 @@ void CMiniDexed::ProcessSound (void)
float32_t tmp_float[nFrames*2];
int32_t tmp_int[nFrames*2];
- if(nMasterVolume > 0.0)
- {
- for (uint8_t i = 0; i < m_nToneGenerators; i++)
- {
- tg_mixer->doAddMix(i,m_OutputLevel[i]);
- reverb_send_mixer->doAddMix(i,m_OutputLevel[i]);
- }
- // END TG mixing
+ // get the mix buffer of all TGs
+ float32_t *SampleBuffer[2];
+ tg_mixer->getBuffers(SampleBuffer);
- // BEGIN create SampleBuffer for holding audio data
- float32_t SampleBuffer[2][nFrames];
- // END create SampleBuffer for holding audio data
+ tg_mixer->zeroFill();
- // get the mix of all TGs
- tg_mixer->getMix(SampleBuffer[indexL], SampleBuffer[indexR]);
+ for (uint8_t i = 0; i < m_nToneGenerators; i++)
+ {
+ tg_mixer->doAddMix(i,m_OutputLevel[i]);
+ }
+ // END TG mixing
- // BEGIN adding reverb
- if (m_nParameter[ParameterReverbEnable])
- {
- float32_t ReverbBuffer[2][nFrames];
- float32_t ReverbSendBuffer[2][nFrames];
+ // BEGIN adding reverb
+ if (m_nParameter[ParameterReverbEnable])
+ {
+ float32_t ReverbBuffer[2][nFrames];
- arm_fill_f32(0.0f, ReverbBuffer[indexL], nFrames);
- arm_fill_f32(0.0f, ReverbBuffer[indexR], nFrames);
- arm_fill_f32(0.0f, ReverbSendBuffer[indexR], nFrames);
- arm_fill_f32(0.0f, ReverbSendBuffer[indexL], nFrames);
+ float32_t *ReverbSendBuffer[2];
+ reverb_send_mixer->getBuffers(ReverbSendBuffer);
- m_ReverbSpinLock.Acquire ();
+ reverb_send_mixer->zeroFill();
- reverb_send_mixer->getMix(ReverbSendBuffer[indexL], ReverbSendBuffer[indexR]);
- reverb->doReverb(ReverbSendBuffer[indexL],ReverbSendBuffer[indexR],ReverbBuffer[indexL], ReverbBuffer[indexR],nFrames);
+ for (uint8_t i = 0; i < m_nToneGenerators; i++)
+ {
+ reverb_send_mixer->doAddMix(i,m_OutputLevel[i]);
+ }
- // scale down and add left reverb buffer by reverb level
- arm_scale_f32(ReverbBuffer[indexL], reverb->get_level(), ReverbBuffer[indexL], nFrames);
- arm_add_f32(SampleBuffer[indexL], ReverbBuffer[indexL], SampleBuffer[indexL], nFrames);
- // scale down and add right reverb buffer by reverb level
- arm_scale_f32(ReverbBuffer[indexR], reverb->get_level(), ReverbBuffer[indexR], nFrames);
- arm_add_f32(SampleBuffer[indexR], ReverbBuffer[indexR], SampleBuffer[indexR], nFrames);
+ m_ReverbSpinLock.Acquire ();
- m_ReverbSpinLock.Release ();
- }
- // END adding reverb
+ reverb->doReverb(ReverbSendBuffer[indexL],ReverbSendBuffer[indexR],ReverbBuffer[indexL], ReverbBuffer[indexR],nFrames);
- // swap stereo channels if needed prior to writing back out
- if (m_bChannelsSwapped)
- {
- indexL=1;
- indexR=0;
- }
+ // scale down and add left reverb buffer by reverb level
+ arm_scale_f32(ReverbBuffer[indexL], reverb->get_level(), ReverbBuffer[indexL], nFrames);
+ arm_add_f32(SampleBuffer[indexL], ReverbBuffer[indexL], SampleBuffer[indexL], nFrames);
+ // scale down and add right reverb buffer by reverb level
+ arm_scale_f32(ReverbBuffer[indexR], reverb->get_level(), ReverbBuffer[indexR], nFrames);
+ arm_add_f32(SampleBuffer[indexR], ReverbBuffer[indexR], SampleBuffer[indexR], nFrames);
- // Convert dual float array (left, right) to single int16 array (left/right)
- for(uint16_t i=0; i0.0 && nMasterVolume <1.0)
- {
- tmp_float[i*2]=SampleBuffer[indexL][i] * nMasterVolume;
- tmp_float[(i*2)+1]=SampleBuffer[indexR][i] * nMasterVolume;
- }
- else if(nMasterVolume == 1.0)
- {
- tmp_float[i*2]=SampleBuffer[indexL][i];
- tmp_float[(i*2)+1]=SampleBuffer[indexR][i];
- }
- }
- arm_float_to_q23(tmp_float,tmp_int,nFrames*2);
+ m_ReverbSpinLock.Release ();
}
- else
+ // END adding reverb
+
+ // swap stereo channels if needed prior to writing back out
+ if (m_bChannelsSwapped)
{
- arm_fill_q31(0, tmp_int, nFrames * 2);
+ indexL=1;
+ indexR=0;
}
+ // Convert dual float array (left, right) to single int16 array (left/right)
+ arm_scale_zip_f32(SampleBuffer[indexL], SampleBuffer[indexR], nMasterVolume, tmp_float, nFrames);
+
+ arm_float_to_q23(tmp_float,tmp_int,nFrames*2);
+
// Prevent PCM510x analog mute from kicking in
if (tmp_int[nFrames * 2 - 1] == 0)
{