From 065689a892fbce6b7cdd02d587332e501581f22b Mon Sep 17 00:00:00 2001
From: Graham Whaley
Date: Thu, 12 May 2022 10:23:28 +0100
Subject: [PATCH 1/3] spectral: Add Spectral noise reduction
Add Spectral Noise Reduction module, based on the code from
the UHSDR / Convolution-SDR code bases.
Signed-off-by: Graham Whaley
---
AudioSpectralDenoise_F32.cpp | 342 +++++++++++++++++++++++++++++++++++
AudioSpectralDenoise_F32.h | 198 ++++++++++++++++++++
OpenAudio_ArduinoLibrary.h | 1 +
3 files changed, 541 insertions(+)
create mode 100644 AudioSpectralDenoise_F32.cpp
create mode 100644 AudioSpectralDenoise_F32.h
diff --git a/AudioSpectralDenoise_F32.cpp b/AudioSpectralDenoise_F32.cpp
new file mode 100644
index 0000000..b032864
--- /dev/null
+++ b/AudioSpectralDenoise_F32.cpp
@@ -0,0 +1,342 @@
+/* AudioSpectralDenoise_F2.h
+ * Spectral noise reduction
+ *
+ * Extracted and based on the work found in the:
+ * - Convolution SDR: https://github.com/DD4WH/Teensy-ConvolutionSDR
+ * - UHSDR: https://github.com/df8oe/UHSDR/blob/active-devel/mchf-eclipse/drivers/audio/audio_nr.c
+ *
+ * License: GNU GPLv3
+ * Both the Convolution SDR and UHSDR are licensed under GPLv3.
+ */
+
+#include "AudioSpectralDenoise_F32.h"
+
+#include
+
+// No serial debug by default
+static const bool serial_debug = false;
+
+int AudioSpectralDenoise_F32::setup(const AudioSettings_F32 & settings,
+ const int _N_FFT)
+{
+ enable(false); //Disable us, just incase we are already active...
+ sample_rate_Hz = settings.sample_rate_Hz;
+
+ if (N_FFT == -1) {
+ //setup the FFT and IFFT. If they return a negative FFT, it wasn't an allowed FFT size.
+ N_FFT = myFFT.setup(settings, _N_FFT); //hopefully, we got the same N_FFT that we asked for
+ if (N_FFT < 1)
+ return N_FFT;
+ N_FFT = myIFFT.setup(settings, _N_FFT); //hopefully, we got the same N_FFT that we asked for
+ if (N_FFT < 1)
+ return N_FFT;
+
+ //As we do a complex fft on a real signal, we only use half the returned FFT bins due
+ // to conjugate symmetry. Store the number of bins to make it obvious and handy.
+ N_bins = N_FFT / 2;
+
+ //Spectral uses sqrtHann filtering
+ (myFFT.getFFTObject())->useHanningWindow(); //applied prior to FFT
+
+ //allocate memory to hold frequency domain data - complex r+i, so double the size of the
+ // fft size.
+ complex_2N_buffer = new (std::nothrow) float32_t[2 * N_FFT];
+ if (complex_2N_buffer == NULL) return -1;
+
+ NR_X = new (std::nothrow) float32_t[N_bins];
+ if (NR_X == NULL) return -1;
+ ph1y = new (std::nothrow) float32_t[N_bins];
+ if (ph1y == NULL) return -1;
+ pslp = new (std::nothrow) float32_t[N_bins];
+ if (pslp == NULL) return -1;
+ xt = new (std::nothrow) float32_t[N_bins];
+ if (xt == NULL) return -1;
+ NR_SNR_post = new (std::nothrow) float32_t[N_bins];
+ if (NR_SNR_post == NULL) return -1;
+ NR_SNR_prio = new (std::nothrow) float32_t[N_bins];
+ if (NR_SNR_prio == NULL) return -1;
+ NR_Hk_old = new (std::nothrow) float32_t[N_bins];
+ if (NR_Hk_old == NULL) return -1;
+ NR_G = new (std::nothrow) float32_t[N_bins];
+ if (NR_G == NULL) return -1;
+ NR_Nest = new (std::nothrow) float32_t[N_bins];
+ if (NR_Nest == NULL) return -1;
+ }
+
+ //Clear out and initialise
+ for (int bindx = 0; bindx < N_bins; bindx++) {
+ NR_Hk_old[bindx] = 0.1; // old gain
+ NR_Nest[bindx] = 0.01;
+ NR_X[bindx] = 0.0;
+ NR_SNR_post[bindx] = 2.0;
+ NR_SNR_prio[bindx] = 1.0;
+ NR_G[bindx] = 0.0;
+ }
+
+ //Work out the 'bin' range for our chosen voice frequencies
+ // divide 2 to account for nyquist
+ VAD_low = VAD_low_freq / ((sample_rate_Hz / 2.0) / (float32_t) (N_bins));
+ VAD_high = VAD_high_freq / ((sample_rate_Hz / 2.0) / (float32_t) N_bins);
+
+ xih1 = powf(10, asnr / 10.0);
+ pfac = (1.0 / pspri - 1.0) * (1.0 + xih1);
+ xih1r = 1.0 / (1.0 + xih1) - 1.0;
+
+ //Configure the other things that might rely on the fft size of bitrate
+ tinc = 1.0 / (sample_rate_Hz / AUDIO_BLOCK_SAMPLES); //Frame time
+ tax = -tinc / log(tax_factor); //noise output smoothing constant in seconds = -tinc/ln(0.8)
+ tap = -tinc / log(tap_factor); //speech prob smoothing constant in seconds = -tinc/ln(0.9)
+ ap = expf(-tinc / tap); //noise output smoothing factor
+ ax = expf(-tinc / tax); //noise output smoothing factor
+
+ if (serial_debug) {
+ Serial.println(" Spectral setup with fft:" + String(N_FFT));
+ Serial.println(" FFT nblocks:" + String(myFFT.getNBuffBlocks()));
+ Serial.println(" iFFT nblocks:" + String(myIFFT.getNBuffBlocks()));
+ Serial.println(" Sample rate:" + String(sample_rate_Hz));
+ Serial.println(" bins:" + String(N_bins));
+ Serial.println(" VAD low:" + String(VAD_low));
+ Serial.println(" VAD low freq:" + String(getVADLowFreq()));
+ Serial.println(" VAD high:" + String(VAD_high));
+ Serial.println(" VAD high freq:" + String(getVADHighFreq()));
+ Serial.println(" tinc:" + String(tinc, 5));
+ Serial.println(" tax_factor:" + String(tax_factor, 5));
+ Serial.println(" tap_factor:" + String(tap_factor, 5));
+ Serial.println(" tax:" + String(tax, 5));
+ Serial.println(" tap:" + String(tap, 5));
+ Serial.println(" ax:" + String(ax, 5));
+ Serial.println(" ap:" + String(ap, 5));
+ Serial.println(" xih1:" + String(xih1, 5));
+ Serial.println(" xih1r:" + String(xih1r, 5));
+ Serial.println(" pfac:" + String(pfac, 5));
+ Serial.println(" snr_prio_min:" + String(getSNRPrioMin(), 5));
+ Serial.println(" power_threshold:" + String(getPowerThreshold(), 5));
+ Serial.println(" asnr:" + String(getAsnr(), 5));
+ Serial.println(" NR_alpha:" + String(getNRAlpha(), 5));
+ Serial.println(" NR_width:" + String(getNRWidth(), 5));
+
+ Serial.flush();
+ }
+
+ enable(true);
+ return is_enabled;
+}
+
+void AudioSpectralDenoise_F32::update(void)
+{
+ //get a pointer to the latest data
+ audio_block_f32_t *in_audio_block = AudioStream_F32::receiveReadOnly_f32();
+ if (!in_audio_block)
+ return;
+
+ //simply return the audio if this class hasn't been enabled
+ if (!is_enabled) {
+ AudioStream_F32::transmit(in_audio_block);
+ AudioStream_F32::release(in_audio_block);
+ return;
+ }
+ //******************************************************************************
+ //convert to frequency domain
+ //FFT is in complex_2N_buffer, interleaved real, imaginary, real, imaginary, etc
+ myFFT.execute(in_audio_block, complex_2N_buffer);
+
+ // Preserve the block id, so we can pass it out with our final result
+ unsigned long incoming_id = in_audio_block->id;
+
+ // We just passed ownership of in_audio_block to myFFT, so we can
+ // release it here as we won't use it here again.
+ AudioStream_F32::release(in_audio_block);
+
+ if (init_phase == 1) {
+ if (serial_debug) {
+ Serial.println("One time init");
+ Serial.flush();
+ }
+ init_phase++;
+
+ for (int bindx = 0; bindx < N_bins; bindx++) {
+ NR_G[bindx] = 1.0;
+ NR_Hk_old[bindx] = 1.0; // old gain or xu in development mode
+ NR_Nest[bindx] = 0.0;
+ pslp[bindx] = 0.5;
+ }
+ }
+ //******************************************************************************
+ //***** Calculate magnitude, used later for noise estimates and calculations
+ // AIUI, as we are only passing real values into a complex FFT, the resulting
+ // data contains duplicated mirrored data, thus we only need to evaluate the
+ // magnitude of the first half of the bins, as it will be identical to that
+ // of the second half of the bins. When we finally apply the NR results to the
+ // FFT data we apply it to both the first half and the conjugate, mirror style.
+ // Fundamentally, this saves us half the processing on some parts.
+ for (int bindx = 0; bindx < N_bins; bindx++) {
+ NR_X[bindx] =
+ (complex_2N_buffer[bindx * 2] * complex_2N_buffer[bindx * 2] +
+ complex_2N_buffer[bindx * 2 + 1] * complex_2N_buffer[bindx * 2 + 1]);
+ }
+
+ //Second stage initialisation
+ if (init_phase == 2) {
+ static int NR_init_counter = 0;
+ if (serial_debug) {
+ Serial.println("Two time init (" + String(NR_init_counter) + ")");
+ Serial.flush();
+ }
+ for (int bindx = 0; bindx < N_bins; bindx++) {
+ // we do it 20 times to average over 20 frames for app. 100ms only on
+ // NR_on/bandswitch/modeswitch,...
+ NR_Nest[bindx] = NR_Nest[bindx] + 0.05 * NR_X[bindx];
+ xt[bindx] = psini * NR_Nest[bindx];
+ }
+ NR_init_counter++;
+ if (NR_init_counter > 19) //average over 20 frames for app. 100ms
+ {
+ if (serial_debug) {
+ Serial.println("Two time init done");
+ Serial.flush();
+ }
+ NR_init_counter = 0;
+ init_phase++;
+ }
+ if (serial_debug)
+ Serial.println(" Two time loop done");
+ }
+
+ //Now we are fully initialised, we can actually do the NR processing
+ //******************************************************************************
+ //MMSE (Minimum Mean Square Error) based noise estimate
+ // code/algo inspired by the matlab based voicebox library:
+ // http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html
+ // Noise estimate code can be found at:
+ // https://github.com/YouriT/matlab-speech/blob/master/MATLAB_CODE_SOURCE/voicebox/estnoiseg.m
+ for (int bindx = 0; bindx < N_bins; bindx++) {
+ float32_t xtr;
+
+ // a-posteriori speech presence probability
+ ph1y[bindx] = 1.0 / (1.0 + pfac * expf(xih1r * NR_X[bindx] / xt[bindx]));
+ // smoothed speech presence probability
+ pslp[bindx] = ap * pslp[bindx] + (1.0 - ap) * ph1y[bindx];
+
+ // limit ph1y
+ if (pslp[bindx] > psthr) {
+ ph1y[bindx] = 1.0 - pnsaf;
+ } else {
+ ph1y[bindx] = fmin(ph1y[bindx], 1.0);
+ }
+ // estimated raw noise spectrum
+ xtr = (1.0 - ph1y[bindx]) * NR_X[bindx] + ph1y[bindx] * xt[bindx];
+ // smooth the noise estimate
+ xt[bindx] = ax * xt[bindx] + (1.0 - ax) * xtr;
+ }
+
+ // Limit the ratios
+ // I don't have a lot of info on how this works, but SNRpost and SNRprio are related
+ // to both Ephraim&Malah(84) and Romanin(2009) papers
+ for (int bindx = 0; bindx < N_bins; bindx++) {
+ // limited to +30 /-15 dB, might be still too much of reduction, let's try it?
+ NR_SNR_post[bindx] = fmax(fmin(NR_X[bindx] / xt[bindx], 1000.0), snr_prio_min);
+
+ NR_SNR_prio[bindx] =
+ fmax(NR_alpha * NR_Hk_old[bindx] +
+ (1.0 - NR_alpha) * fmax(NR_SNR_post[bindx] - 1.0, 0.0), 0.0);
+ }
+
+ //******************************************************************************
+ // VAD
+ // maybe we should limit this to the signal containing bins (filtering!!)
+ for (int bindx = VAD_low; bindx < VAD_high; bindx++) {
+ float32_t v =
+ NR_SNR_prio[bindx] * NR_SNR_post[bindx] / (1.0 + NR_SNR_prio[bindx]);
+ NR_G[bindx] = 1.0 / NR_SNR_post[bindx] * sqrtf((0.7212 * v + v * v));
+ NR_Hk_old[bindx] = NR_SNR_post[bindx] * NR_G[bindx] * NR_G[bindx];
+ }
+
+ //******************************************************************************
+ // Do the musical noise reduction
+ // musical noise "artefact" reduction by dynamic averaging - depending on SNR ratio
+ pre_power = 0.0;
+ post_power = 0.0;
+ for (int bindx = VAD_low; bindx < VAD_high; bindx++) {
+ pre_power += NR_X[bindx];
+ post_power += NR_G[bindx] * NR_G[bindx] * NR_X[bindx];
+ }
+
+ power_ratio = post_power / pre_power;
+ if (power_ratio > power_threshold) {
+ power_ratio = 1.0;
+ NN = 1;
+ } else {
+ NN = 1 + 2 * (int)(0.5 +
+ NR_width * (1.0 - power_ratio / power_threshold));
+ }
+
+ for (int bindx = VAD_low + NN / 2; bindx < VAD_high - NN / 2; bindx++) {
+ NR_Nest[bindx] = 0.0;
+ for (int m = bindx - NN / 2; m <= bindx + NN / 2; m++) {
+ NR_Nest[bindx] += NR_G[m];
+ }
+ NR_Nest[bindx] /= (float32_t) NN;
+ }
+
+ // and now the edges - only going NN steps forward and taking the average
+ // lower edge
+ for (int bindx = VAD_low; bindx < VAD_low + NN / 2; bindx++) {
+ NR_Nest[bindx] = 0.0;
+ for (int m = bindx; m < (bindx + NN); m++) {
+ NR_Nest[bindx] += NR_G[m];
+ }
+ NR_Nest[bindx] /= (float32_t) NN;
+ }
+
+ // upper edge - only going NN steps backward and taking the average
+ for (int bindx = VAD_high - NN; bindx < VAD_high; bindx++) {
+ NR_Nest[bindx] = 0.0;
+ for (int m = bindx; m > (bindx - NN); m--) {
+ NR_Nest[bindx] += NR_G[m];
+ }
+ NR_Nest[bindx] /= (float32_t) NN;
+ }
+ // end of edge treatment
+
+ for (int bindx = VAD_low + NN / 2; bindx < VAD_high - NN / 2; bindx++) {
+ NR_G[bindx] = NR_Nest[bindx];
+ }
+ // end of musical noise reduction
+
+ //******************************************************************************
+ // And finally actually apply the weightings to the signals...
+ // FINAL SPECTRAL WEIGHTING: Multiply current FFT results with complex_2N_buffer for
+ // bins with the bin-specific gain factors G
+ for (int bindx = 0; bindx < N_bins; bindx++) {
+ // real part
+ complex_2N_buffer[bindx * 2] = complex_2N_buffer[bindx * 2] * NR_G[bindx];
+
+ // imag part
+ complex_2N_buffer[bindx * 2 + 1] =
+ complex_2N_buffer[bindx * 2 + 1] * NR_G[bindx];
+
+ // real part conjugate symmetric
+ //N_bins * 4 == N_FFT * 2 == N_FFT[real, imag]
+ complex_2N_buffer[N_bins * 4 - bindx * 2 - 2] =
+ complex_2N_buffer[N_bins * 4 - bindx * 2 - 2] * NR_G[bindx];
+
+ // imag part conjugate symmetric
+ complex_2N_buffer[N_bins * 4 - bindx * 2 - 1] =
+ complex_2N_buffer[N_bins * 4 - bindx * 2 - 1] * NR_G[bindx];
+ }
+
+ //******************************************************************************
+ //And finally call the IFFT, back to the time domain, and pass the processed block on
+
+ //out_block is pre-allocated in here.
+ audio_block_f32_t *out_audio_block = myIFFT.execute(complex_2N_buffer);
+
+ //update the block number to match the incoming one
+ out_audio_block->id = incoming_id;
+
+ //send the returned audio block. Don't issue the release command here because myIFFT will re-use it
+ //don't release this buffer because myIFFT re-uses it within its own code
+ AudioStream_F32::transmit(out_audio_block); //don't release this buffer because myIFFT re-uses it within its own code
+
+ return;
+}
diff --git a/AudioSpectralDenoise_F32.h b/AudioSpectralDenoise_F32.h
new file mode 100644
index 0000000..ca352e3
--- /dev/null
+++ b/AudioSpectralDenoise_F32.h
@@ -0,0 +1,198 @@
+/*
+ * AudioSpectralDenoise_F32
+ *
+ * Created: Graham Whaley, 2022
+ * Purpose: Spectral noise reduction
+ *
+ * This processes a single stream of audio data (i.e., it is mono)
+ *
+ * License: GNU GPLv3 License
+ * As the code it is derived from is GPLv3
+ *
+ * Based off the work from the UHSDR project, as also used in the mcHF and Convolution-SDR
+ * projects.
+ * Reference documentation can be found at https://github.com/df8oe/UHSDR/wiki/Noise-reduction
+ * Code extracted into isolated files can be found at
+ * https://github.com/grahamwhaley/DSPham/blob/master/spectral.cpp
+ */
+
+#ifndef _AudioSpectralDenoise_F32_h
+#define _AudioSpectralDenoise_F32_h
+
+#include "AudioStream_F32.h"
+#include
+#include "FFT_Overlapped_OA_F32.h"
+#include
+
+class AudioSpectralDenoise_F32:public AudioStream_F32 {
+//GUI: inputs:1, outputs:1 //this line used for automatic generation of GUI node
+//GUI: shortName:spectral
+public:
+ AudioSpectralDenoise_F32(void):AudioStream_F32(1, inputQueueArray_f32) {
+ };
+ AudioSpectralDenoise_F32(const AudioSettings_F32 &
+ settings):AudioStream_F32(1, inputQueueArray_f32) {
+ }
+ AudioSpectralDenoise_F32(const AudioSettings_F32 & settings,
+ const int _N_FFT):AudioStream_F32(1,
+ inputQueueArray_f32)
+ {
+ setup(settings, _N_FFT);
+ }
+
+ //destructor...release all of the memory that has been allocated
+ ~AudioSpectralDenoise_F32(void) {
+ if (complex_2N_buffer) delete complex_2N_buffer;
+ if (NR_X) delete NR_X;
+ if (ph1y) delete ph1y;
+ if (pslp) delete pslp;
+ if (xt) delete xt;
+ if (NR_SNR_post) delete NR_SNR_post;
+ if (NR_SNR_prio) delete NR_SNR_prio;
+ if (NR_Hk_old) delete NR_Hk_old;
+ if (NR_G) delete NR_G;
+ if (NR_Nest) delete NR_Nest;
+ }
+
+ //Our default FFT size is 256. That is time and space efficient, but
+ // if you are running at a 'high' sample rate, the NR 'buckets' might
+ // be quite small. You may want to use a 1024 FFT if running at 44.1KHz
+ // for instance, if you can afford the time and space overheads.
+ int setup(const AudioSettings_F32 & settings, const int _N_FFT = 256);
+
+ virtual void update(void);
+ bool enable(bool state = true) {
+ is_enabled = state;
+ return is_enabled;
+ }
+ bool enabled(void) {
+ return is_enabled;
+ }
+
+ //Getters and Setters
+ float32_t getAsnr(void) {
+ return asnr;
+ }
+ void setAsnr(float32_t v) {
+ asnr = v;
+ }
+ float32_t getVADHighFreq(void) {
+ return VAD_high_freq;
+ }
+ void setVADHighFreq(float32_t f) {
+ VAD_high_freq = f;
+ }
+ float32_t getVADLowFreq(void) {
+ return VAD_low_freq;
+ }
+ void setVADLowFreq(float32_t f) {
+ VAD_low_freq = f;
+ }
+ float32_t getNRAlpha(void) {
+ return NR_alpha;
+ }
+ void setNRAlpha(float32_t v) {
+ NR_alpha = v;
+ if (NR_alpha < 0.9)
+ NR_alpha = 0.9;
+ if (NR_alpha > 0.9999)
+ NR_alpha = 0.9999;
+ }
+ float32_t getSNRPrioMin(void) {
+ return snr_prio_min;
+ }
+ void setSNRPrioMin(float32_t v) {
+ snr_prio_min = v;
+ }
+ int16_t getNRWidth(void) {
+ return NR_width;
+ }
+ void setNRWidth(int16_t v) {
+ NR_width = v;
+ }
+ float32_t getPowerThreshold(void) {
+ return power_threshold;
+ }
+ void setPowerThreshold(float32_t v) {
+ power_threshold = v;
+ }
+ float32_t getTaxFactor(void) {
+ return tax_factor;
+ }
+ void setTaxFactor(float32_t v) {
+ tax_factor = v;
+ }
+ float32_t getTapFactor(void) {
+ return tap_factor;
+ }
+ void setTapFactor(float32_t v) {
+ tap_factor = v;
+ }
+
+private:
+ static const int max_fft = 2048; //The largest FFT FFT_OA handles. Fixed so we can fix the
+ //array sizes - FIXME - a hack, but easier than doing the dynamic allocations for now.
+
+ uint8_t init_phase = 1; //Track our phases of initialisation
+ int is_enabled = 0;
+ float32_t *complex_2N_buffer; //Store our FFT real/imag data
+ audio_block_f32_t *inputQueueArray_f32[1]; //memory pointer for the input to this module
+ FFT_Overlapped_OA_F32 myFFT;
+ IFFT_Overlapped_OA_F32 myIFFT;
+ int N_FFT = -1; //How big an FFT are we using?
+ int N_bins = -1; //How many actual data bins are we processing on
+ float sample_rate_Hz = AUDIO_SAMPLE_RATE;
+
+ //*********** NR vars
+ //Magnitudes (fabs) of power for the last four (three?) audio blocks
+ float32_t *NR_X = NULL;
+
+ float32_t *ph1y = NULL;
+ float32_t *pslp = NULL;
+ float32_t *xt = NULL;
+
+ const float32_t psini = 0.5; //initial speech probability
+ const float32_t pspri = 0.5; //prior speech probability
+ float32_t asnr = 25; //active SNR in dB - seems to make less different than I expected.
+ float32_t xih1;
+ float32_t pfac;
+ float32_t xih1r;
+
+ const float32_t psthr = 0.99; //threshold for smoothed speech probability
+ const float32_t pnsaf = 0.01; //noise probability safety value
+ float32_t tinc; //Frame time in seconds
+ float32_t tax_factor = 0.8; //Noise output smoothing factor
+ float32_t tax; //noise output smoothing constant in seconds = -tinc/ln(0.8)
+ float32_t tap_factor = 0.9; //Speech probability smoothing factor
+ float32_t tap; //speech prob smoothing constant in seconds = -tinc/ln(0.9)
+ float32_t ap; //noise output smoothing factor
+ float32_t ax; //noise output smoothing factor
+ float32_t snr_prio_min = powf(10, -(float32_t) 20 / 20.0); //Lower limit of SNR ratio calculation
+ // Time smoothing of gain weights. Makes quite a difference to the NR performance.
+ float32_t NR_alpha = 0.99; //range 0.98-0.9999. 0.95 acts much too hard: reverb effects.
+
+ float32_t *NR_SNR_post = NULL;
+ float32_t *NR_SNR_prio = NULL;
+ float32_t *NR_Hk_old = NULL;
+
+ // preliminary gain factors (before time smoothing) and after that contains the frequency
+ // smoothed gain factors
+ float32_t *NR_G = NULL;
+
+ //Our Noise estimate array - 'one dimentional' is a hangover from the old version of the
+ // original code that used multiple entries for averaging, which seems to have then been
+ // dropped, but the arrays still left in place.
+ float32_t *NR_Nest = NULL;
+
+ float32_t VAD_low_freq = 100.0;
+ float32_t VAD_high_freq = 3600.0;
+ //if we grow the FFT to 1024, these might need to be bigger than a uint8?
+ uint8_t VAD_low, VAD_high; //lower/upper bounds for 'voice spectrum' slot processing
+ int16_t NN; //used as part of VAD calculations, n-bin averaging?. Also, why an int16 ?
+ int16_t NR_width = 4;
+ float32_t pre_power, post_power; //Used in VAD calculations
+ float32_t power_ratio;
+ float32_t power_threshold = 0.4;
+};
+
+#endif
diff --git a/OpenAudio_ArduinoLibrary.h b/OpenAudio_ArduinoLibrary.h
index 29eceed..0388333 100644
--- a/OpenAudio_ArduinoLibrary.h
+++ b/OpenAudio_ArduinoLibrary.h
@@ -21,6 +21,7 @@
#include "AudioMixer_F32.h"
#include "AudioMultiply_F32.h"
#include "AudioSettings_F32.h"
+#include "AudioSpectralDenoise_F32.h"
#include "input_i2s_f32.h"
#include "output_i2s_f32.h"
#include "play_queue_f32.h"
From 0d481fc332692c1257f8fce2ee11e5eaa97a632f Mon Sep 17 00:00:00 2001
From: Graham Whaley
Date: Thu, 12 May 2022 10:24:23 +0100
Subject: [PATCH 2/3] spectral: Add Spectral NR example
Add Spectral NR example. Example switches between
'passthrough' and 'Spectral NR' processing every 10
seconds to allow comparison.
Input is via I2S and USB. Output is on I2S/headphones
Signed-off-by: Graham Whaley
---
examples/SpectralDenoise/SpectralDenoise.ino | 124 +++++++++++++++++++
1 file changed, 124 insertions(+)
create mode 100644 examples/SpectralDenoise/SpectralDenoise.ino
diff --git a/examples/SpectralDenoise/SpectralDenoise.ino b/examples/SpectralDenoise/SpectralDenoise.ino
new file mode 100644
index 0000000..0581b51
--- /dev/null
+++ b/examples/SpectralDenoise/SpectralDenoise.ino
@@ -0,0 +1,124 @@
+/* Spectral Noise reduction test program.
+ *
+ * The example takes sound in from both the I2S and USB of the Teensy/audio-daughtercard,
+ * processes it, and sends it back out the I2S/headphone ports.
+ * Every 10 seconds it switches from Spectral processing to data-passthrough and back,
+ * to aid comparison.
+ * Some information is printed on the serial monitor.
+ *
+ * This example requires the Teensy Board 'USB Type' in the Arduino Tools menu to be set
+ * to a type that includes 'Audio', and ideally 'Serial' as well. Tested with
+ * 'Serial+MIDI+Audio'.
+ * If you do not set 'Audio', you will get a compliation errors similar to:
+ * "... OpenAudio_ArduinoLibrary/USB_Audio_F32.h: In member function 'virtual void AudioOutputUSB_F32::update()':"
+ * "... OpenAudio_ArduinoLibrary/USB_Audio_F32.h:139:3: error: 'usb_out' was not declared in this scope"
+ *
+ * MIT License. use at your own risk.
+ */
+
+#include "OpenAudio_ArduinoLibrary.h"
+#include "AudioStream_F32.h"
+#include "USB_Audio_F32.h"
+#include
+#include
+#include
+#include
+#include
+
+// GUItool: begin automatically generated code
+AudioInputI2S_F32 audioInI2S1; //xy=117,343
+AudioInputUSB_F32 audioInUSB1; //xy=146,397
+AudioMixer4_F32 input_mixer; //xy=370,321
+AudioSpectralDenoise_F32 Spectral; //xy=852,250
+AudioMixer4_F32 output_mixer; //xy=993,296
+AudioSwitch4_OA_F32 processing_switch;
+AudioOutputI2S_F32 audioOutI2S1; //xy=1257,367
+AudioOutputUSB_F32 audioOutUSB1; //xy=1261,418
+
+//Inputs - mixed into one stream
+AudioConnection_F32 patchCord1(audioInI2S1, 0, input_mixer, 0);
+AudioConnection_F32 patchCord2(audioInUSB1, 0, input_mixer, 1);
+
+//route through a switch, so we can switch Spectral in/out
+AudioConnection_F32 patchCord3(input_mixer, 0, processing_switch, 0);
+
+//First route is direct - direct to the output mixer
+AudioConnection_F32 patchCord4(processing_switch, 0, output_mixer, 0);
+
+//Second route is through Spectral to the output mixer
+AudioConnection_F32 patchCord5(processing_switch, 1, Spectral, 0);
+AudioConnection_F32 patchCord6(Spectral, 0, output_mixer, 1);
+
+//And finally output the mixer to the output channels
+AudioConnection_F32 patchCord7(output_mixer, 0, audioOutI2S1, 0);
+AudioConnection_F32 patchCord8(output_mixer, 0, audioOutI2S1, 1);
+AudioConnection_F32 patchCord9(output_mixer, 0, audioOutUSB1, 0);
+AudioConnection_F32 patchCord10(output_mixer, 0, audioOutUSB1, 1);
+
+AudioControlSGTL5000 sgtl5000_1; //xy=519,146
+// GUItool: end automatically generated code
+
+AudioSettings_F32 audio_settings(AUDIO_SAMPLE_RATE_EXACT, AUDIO_BLOCK_SAMPLES);
+
+int current_cycle = 0; //Choose how we route the audio - to process or not
+
+static void spectralSetup(void){
+ //Use a 1024 FFT in this example
+ if (Spectral.setup(audio_settings, 1024) < 0 ) {
+ Serial.println("Failed to setup Spectral");
+ } else {
+ Serial.println("Spectral setup OK");
+ }
+ Serial.flush();
+}
+
+//The setup function is called once when the system starts up
+void setup(void) {
+ //Start the USB serial link (to aid debugging)
+ Serial.begin(115200); delay(500);
+ Serial.println("Setup starting...");
+
+ //Allocate dynamically shuffled memory for the audio subsystem
+ AudioMemory(30); AudioMemory_F32(30);
+
+ Serial.println("Calling Spectral setup");
+ spectralSetup();
+ Serial.println("Spectral Setup done");
+ sgtl5000_1.enable();
+ sgtl5000_1.unmuteHeadphone();
+ sgtl5000_1.volume(0.5);
+
+ //End of setup
+ Serial.println("Setup complete.");
+};
+
+
+//After setup(), the loop function loops forever.
+//Note that the audio modules are called in the background.
+//They do not need to be serviced by the loop() function.
+void loop(void) {
+ // every 'n' seconds move to the next cycle of processing.
+ if ( ((millis()/1000) % 10) == 0 ) {
+ current_cycle++;
+ if (current_cycle >= 2) current_cycle = 0;
+
+ switch( current_cycle ) {
+ case 0:
+ Serial.println("Passthrough");
+ processing_switch.setChannel(0);
+ break;
+
+ case 1:
+ Serial.println("Run Spectral NR");
+ processing_switch.setChannel(1);
+ break;
+
+ default:
+ current_cycle = 0; //oops - reset to start
+ break;
+ }
+ }
+
+ //Nap - we don't need to hard-spin...
+ delay(1000);
+};
From 762df65cadfadad306910ce1acea7b826ba34dfb Mon Sep 17 00:00:00 2001
From: Graham Whaley
Date: Thu, 12 May 2022 10:35:14 +0100
Subject: [PATCH 3/3] spectral: Add SpectralNoise to the GUI tool
Add the SpectralNoise component to the UI/GUI Audio
Design Tool.
Signed-off-by: Graham Whaley
---
docs/index.html | 109 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 109 insertions(+)
diff --git a/docs/index.html b/docs/index.html
index c2b7919..cc5e6b9 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -386,6 +386,7 @@ span.mainfunction {color: #993300; font-weight: bolder}
{"type":"AudioLMSDenoiseNotch_F32","data":{"defaults":{"name":{"value":"new"}},"shortName":"LMS","inputs":"1","output":"0","category":"filter-function","color":"#E6E0F8","icon":"arrow-in.png","outputs":"1"}},
+ {"type":"AudioSpectralDenoise_F32","data":{"defaults":{"name":{"value":"new"}},"shortName":"Spectral","inputs":"1","output":"0","category":"filter-function","color":"#E6E0F8","icon":"arrow-in.png","outputs":"1"}},
{"type":"AudioFilterFreqWeighting_F32","data":{"defaults":{"name":{"value":"new"}},"shortName":"freqWeight","inputs":"NaN","output":"0","category":"filter-function","color":"#E6E0F8","icon":"arrow-in.png","outputs":"NaN"}},
{"type":"AudioFilterTimeWeighting_F32","data":{"defaults":{"name":{"value":"new"}},"shortName":"timeWeight","inputs":"1","output":"0","category":"filter-function","color":"#E6E0F8","icon":"arrow-in.png","outputs":"1"}},
{"type":"AudioMathAdd_F32","data":{"defaults":{"name":{"value":"new"}},"shortName":"mathAdd","inputs":"2","output":"0","category":"math-function","color":"#E6E0F8","icon":"arrow-in.png","outputs":"1"}},
@@ -899,6 +900,114 @@ See Compressor and Compressor2 for complete, ready to use classes.
+
+
+