Merge pull request #952 from soyersoyer/reverbspeed

Mixing/reverb speed improvements
main continuous
probonopd 3 days ago committed by GitHub
commit 1fa9e56e3b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
  1. 2
      src/Makefile
  2. 85
      src/arm_scale_zip_f32.c
  3. 22
      src/arm_scale_zip_f32.h
  4. 39
      src/effect_mixer.hpp
  5. 67
      src/minidexed.cpp

@ -10,7 +10,7 @@ OBJS = main.o kernel.o minidexed.o config.o userinterface.o uimenu.o \
mididevice.o midikeyboard.o serialmididevice.o pckeyboard.o \
sysexfileloader.o performanceconfig.o perftimer.o \
effect_platervbstereo.o uibuttons.o midipin.o \
arm_float_to_q23.o \
arm_float_to_q23.o arm_scale_zip_f32.o \
net/ftpdaemon.o net/ftpworker.o net/applemidi.o net/udpmidi.o net/mdnspublisher.o udpmididevice.o
OPTIMIZE = -O3

@ -0,0 +1,85 @@
#include "arm_scale_zip_f32.h"
/**
Scale two vectors and zip after. For floating-point data, the algorithm used is:
<pre>
pDst[n] = pSrc1[n] * scale, pDst[n+1] = pSrc2[n] * scale 0 <= n < blockSize.
</pre>
*/
/**
* @brief Scale two floating-point vector with a scalar and zip after.
* @param[in] pSrc1 points to the input vector 1
* @param[in] pSrc2 points to the input vector 2
* @param[in] scale scale scalar
* @param[out] pDst points to the output vector
* @param[in] blockSize number of samples in the vector
*/
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
void arm_scale_zip_f32(
const float32_t * pSrc1,
const float32_t * pSrc2,
float32_t scale,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
f32x2x2_t res;
/* Compute 2 outputs at a time */
blkCnt = blockSize >> 1U;
while (blkCnt > 0U)
{
res.val[0] = vmul_n_f32(vld1_f32(pSrc1), scale);
res.val[1] = vmul_n_f32(vld1_f32(pSrc2), scale);
vst2_f32(pDst, res);
/* Increment pointers */
pSrc1 += 2;
pSrc2 += 2;
pDst += 4;
/* Decrement the loop counter */
blkCnt--;
}
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
** No loop unrolling is used. */
blkCnt = blockSize & 1;
while (blkCnt > 0U)
{
*pDst++ = *pSrc1++ * scale;
*pDst++ = *pSrc2++ * scale;
/* Decrement the loop counter */
blkCnt--;
}
}
#else
void arm_scale_zip_f32(
const float32_t * pSrc1,
const float32_t * pSrc2,
float32_t scale,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* Loop counter */
blkCnt = blockSize;
while (blkCnt > 0U)
{
*pDst++ = *pSrc1++ * scale;
*pDst++ = *pSrc2++ * scale;
/* Decrement the loop counter */
blkCnt--;
}
}
#endif

@ -0,0 +1,22 @@
#pragma once
#include "arm_math_types.h"
#ifdef __cplusplus
extern "C"
{
#endif
/**
* @brief Scale two floating-point vector with a scalar and zip after.
* @param[in] pSrc1 points to the input vector 1
* @param[in] pSrc2 points to the input vector 2
* @param[in] scale scale scalar
* @param[out] pDst points to the output vector
* @param[in] blockSize number of samples in the vector
*/
void arm_scale_zip_f32(const float32_t * pSrc1, const float32_t * pSrc2, float32_t scale, float32_t * pDst, uint32_t blockSize);
#ifdef __cplusplus
}
#endif

@ -124,31 +124,10 @@ public:
assert(in);
// left
arm_scale_f32(in, panorama[channel][0], tmp, buffer_length);
if(multiplier[channel]!=UNITY_GAIN)
arm_scale_f32(tmp,multiplier[channel],tmp,buffer_length);
arm_add_f32(sumbufL, tmp, sumbufL, buffer_length);
// right
arm_scale_f32(in, panorama[channel][1], tmp, buffer_length);
if(multiplier[channel]!=UNITY_GAIN)
arm_scale_f32(tmp,multiplier[channel],tmp,buffer_length);
arm_add_f32(sumbufR, tmp, sumbufR, buffer_length);
}
void doAddMix(uint8_t channel, float32_t* inL, float32_t* inR)
{
float32_t tmp[buffer_length];
assert(inL);
assert(inR);
// left
if(multiplier[channel]!=UNITY_GAIN)
arm_scale_f32(inL,multiplier[channel],tmp,buffer_length);
arm_scale_f32(in, panorama[channel][0] * multiplier[channel], tmp, buffer_length);
arm_add_f32(sumbufL, tmp, sumbufL, buffer_length);
// right
if(multiplier[channel]!=UNITY_GAIN)
arm_scale_f32(inR,multiplier[channel],tmp,buffer_length);
arm_scale_f32(in, panorama[channel][1] * multiplier[channel], tmp, buffer_length);
arm_add_f32(sumbufR, tmp, sumbufR, buffer_length);
}
@ -168,6 +147,20 @@ public:
arm_fill_f32(0.0f, sumbufR, buffer_length);
}
void getBuffers(float32_t (*buffers[2]))
{
buffers[0] = sumbufL;
buffers[1] = sumbufR;
}
void zeroFill()
{
if(sumbufL)
arm_fill_f32(0.0f, sumbufL, buffer_length);
if(sumbufR)
arm_fill_f32(0.0f, sumbufR, buffer_length);
}
protected:
using AudioMixer<NN>::sumbufL;
using AudioMixer<NN>::multiplier;

@ -30,6 +30,7 @@
#include <stdio.h>
#include <assert.h>
#include "arm_float_to_q23.h"
#include "arm_scale_zip_f32.h"
const char WLANFirmwarePath[] = "SD:firmware/";
const char WLANConfigFile[] = "SD:wpa_supplicant.conf";
@ -1354,8 +1355,6 @@ void CMiniDexed::ProcessSound (void)
float32_t tmp_float[nFrames*Channels];
int32_t tmp_int[nFrames*Channels];
if(nMasterVolume > 0.0)
{
// Convert dual float array (8 chan) to single int16 array (8 chan)
for(uint16_t i=0; i<nFrames;i++)
{
@ -1363,23 +1362,12 @@ void CMiniDexed::ProcessSound (void)
// reading directly from the TG OutputLevel buffer with
// no additional processing.
for (uint8_t tg = 0; tg < Channels; tg++)
{
if(nMasterVolume >0.0 && nMasterVolume <1.0)
{
tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i] * nMasterVolume;
}
else if(nMasterVolume == 1.0)
{
tmp_float[(i*Channels)+tg]=m_OutputLevel[tg][i];
}
}
}
arm_float_to_q23(tmp_float,tmp_int,nFrames*Channels);
}
else
{
arm_fill_q31(0, tmp_int, nFrames*Channels);
}
// Prevent PCM510x analog mute from kicking in
for (uint8_t tg = 0; tg < Channels; tg++)
@ -1404,36 +1392,35 @@ void CMiniDexed::ProcessSound (void)
float32_t tmp_float[nFrames*2];
int32_t tmp_int[nFrames*2];
if(nMasterVolume > 0.0)
{
// get the mix buffer of all TGs
float32_t *SampleBuffer[2];
tg_mixer->getBuffers(SampleBuffer);
tg_mixer->zeroFill();
for (uint8_t i = 0; i < m_nToneGenerators; i++)
{
tg_mixer->doAddMix(i,m_OutputLevel[i]);
reverb_send_mixer->doAddMix(i,m_OutputLevel[i]);
}
// END TG mixing
// BEGIN create SampleBuffer for holding audio data
float32_t SampleBuffer[2][nFrames];
// END create SampleBuffer for holding audio data
// get the mix of all TGs
tg_mixer->getMix(SampleBuffer[indexL], SampleBuffer[indexR]);
// BEGIN adding reverb
if (m_nParameter[ParameterReverbEnable])
{
float32_t ReverbBuffer[2][nFrames];
float32_t ReverbSendBuffer[2][nFrames];
arm_fill_f32(0.0f, ReverbBuffer[indexL], nFrames);
arm_fill_f32(0.0f, ReverbBuffer[indexR], nFrames);
arm_fill_f32(0.0f, ReverbSendBuffer[indexR], nFrames);
arm_fill_f32(0.0f, ReverbSendBuffer[indexL], nFrames);
float32_t *ReverbSendBuffer[2];
reverb_send_mixer->getBuffers(ReverbSendBuffer);
reverb_send_mixer->zeroFill();
for (uint8_t i = 0; i < m_nToneGenerators; i++)
{
reverb_send_mixer->doAddMix(i,m_OutputLevel[i]);
}
m_ReverbSpinLock.Acquire ();
reverb_send_mixer->getMix(ReverbSendBuffer[indexL], ReverbSendBuffer[indexR]);
reverb->doReverb(ReverbSendBuffer[indexL],ReverbSendBuffer[indexR],ReverbBuffer[indexL], ReverbBuffer[indexR],nFrames);
// scale down and add left reverb buffer by reverb level
@ -1455,25 +1442,9 @@ void CMiniDexed::ProcessSound (void)
}
// Convert dual float array (left, right) to single int16 array (left/right)
for(uint16_t i=0; i<nFrames;i++)
{
if(nMasterVolume >0.0 && nMasterVolume <1.0)
{
tmp_float[i*2]=SampleBuffer[indexL][i] * nMasterVolume;
tmp_float[(i*2)+1]=SampleBuffer[indexR][i] * nMasterVolume;
}
else if(nMasterVolume == 1.0)
{
tmp_float[i*2]=SampleBuffer[indexL][i];
tmp_float[(i*2)+1]=SampleBuffer[indexR][i];
}
}
arm_scale_zip_f32(SampleBuffer[indexL], SampleBuffer[indexR], nMasterVolume, tmp_float, nFrames);
arm_float_to_q23(tmp_float,tmp_int,nFrames*2);
}
else
{
arm_fill_q31(0, tmp_int, nFrames * 2);
}
// Prevent PCM510x analog mute from kicking in
if (tmp_int[nFrames * 2 - 1] == 0)

Loading…
Cancel
Save