From 817b33e2d67ae330d4e74c7ded81b5812de5f02e Mon Sep 17 00:00:00 2001
From: boblark <bob@janbob.com>
Date: Sat, 13 Mar 2021 10:43:44 -0800
Subject: [PATCH] Add analyze_fft4096_iq_F32 and example ino for it

---
 analyze_fft4096_iq_F32.cpp               | 382 +++++++++++++++++++++++
 analyze_fft4096_iq_F32.h                 | 302 ++++++++++++++++++
 examples/TestFFT4096iq/TestFFT4096iq.ino |  73 +++++
 3 files changed, 757 insertions(+)
 create mode 100644 analyze_fft4096_iq_F32.cpp
 create mode 100644 analyze_fft4096_iq_F32.h
 create mode 100644 examples/TestFFT4096iq/TestFFT4096iq.ino

diff --git a/analyze_fft4096_iq_F32.cpp b/analyze_fft4096_iq_F32.cpp
new file mode 100644
index 0000000..38760cb
--- /dev/null
+++ b/analyze_fft4096_iq_F32.cpp
@@ -0,0 +1,382 @@
+/*
+ *   analyze_fft4096_iq_F32.cpp       Assembled by Bob Larkin   9 Mar 2021
+ *
+ *  This class is Teensy 4.x ONLY.
+ *  F32 Bolocks are always 128 floats, and any data rate is OK.
+ *
+ * Converted to F32 floating point input and also extended
+ * for complex I and Q inputs
+ *   * Adapted all I/O to be F32 floating point for OpenAudio_ArduinoLibrary
+ *   * Future: Add outputs for I & Q FFT x2 for overlapped FFT
+ *   * Windowing None, Hann, Kaiser and Blackman-Harris.
+ *
+ * Conversion Copyright (c) 2021 Bob Larkin
+ * Same MIT license as PJRC:
+ *
+ *  Audio Library for Teensy 3.X
+ * Copyright (c) 2014, Paul Stoffregen, paul@pjrc.com
+ *
+ * Development of this audio library was funded by PJRC.COM, LLC by sales of
+ * Teensy and Audio Adaptor boards.  Please support PJRC's efforts to develop
+ * open source software by purchasing Teensy or other PJRC products.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice, development funding notice, and this permission
+ * notice shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+// ***************  TEENSY 4.X ONLY   ****************
+#if defined(__IMXRT1062__)
+
+#include <Arduino.h>
+#include "analyze_fft4096_iq_F32.h"
+
+// Note: Suppports block size of 128 only.  Very "built in."
+
+// Move audio data from audio_block_f32_t to the interleaved FFT instance buffer.
+static void copy_to_fft_buffer1(void *destination, const void *sourceI, const void *sourceQ)  {
+    const float *srcI = (const float *)sourceI;
+    const float *srcQ = (const float *)sourceQ;
+    float *dst = (float *)destination;  // part of fft_buffer array. 256 floats per call
+    for (int i=0; i < 128; i++) {
+       *dst++ = *srcI++;     // real sample, interleave
+       *dst++ = *srcQ++;     // imag
+       }
+    }
+
+static void apply_window_to_fft_buffer1(void *fft_buffer, const void *window) {
+    float *buf = (float *)fft_buffer;      // 0th entry is real (do window) 1st is imag
+    const float *win = (float *)window;
+    for (int i=0; i < 4096; i++)  {
+       buf[2*i] *= *win;      // real
+       buf[2*i + 1] *= *win++;  // imag
+       }
+    }
+
+void AudioAnalyzeFFT4096_IQ_F32::update(void)  {
+  audio_block_f32_t *block_i,*block_q;
+  int ii;
+
+  block_i = receiveReadOnly_f32(0);
+  if (!block_i) return;
+  block_q = receiveReadOnly_f32(1);
+  if (!block_q)  {
+     release(block_i);
+     return;
+     }
+  // Here with two new blocks of data
+
+  switch (state) {
+  case 0:
+      blocklist_i[0] = block_i;  blocklist_q[0] = block_q;
+      state = 1;
+      break;
+  case 1:
+      blocklist_i[1] = block_i;  blocklist_q[1] = block_q;
+      state = 2;
+      break;
+  case 2:
+      blocklist_i[2] = block_i;  blocklist_q[2] = block_q;
+      state = 3;
+      break;
+  case 3:
+      blocklist_i[3] = block_i;  blocklist_q[3] = block_q;
+      state = 4;
+      break;
+  case 4:
+      blocklist_i[4] = block_i;  blocklist_q[4] = block_q;
+      state = 5;
+      break;
+  case 5:
+      blocklist_i[5] = block_i;  blocklist_q[5] = block_q;
+      state = 6;
+      break;
+  case 6:
+      blocklist_i[6] = block_i;  blocklist_q[6] = block_q;
+      state = 7;
+      break;
+  case 7:
+      blocklist_i[7] = block_i;  blocklist_q[7] = block_q;
+      state = 8;
+      break;
+  case 8:
+      blocklist_i[8] = block_i;  blocklist_q[8] = block_q;
+      state = 9;
+      break;
+  case 9:
+      blocklist_i[9] = block_i;  blocklist_q[9] = block_q;
+      state = 10;
+      break;
+  case 10:
+      blocklist_i[10] = block_i;  blocklist_q[10] = block_q;
+      state = 11;
+      break;
+  case 11:
+      blocklist_i[11] = block_i;  blocklist_q[11] = block_q;
+      state = 12;
+      break;
+  case 12:
+      blocklist_i[12] = block_i;  blocklist_q[12] = block_q;
+      state = 13;
+      break;
+  case 13:
+      blocklist_i[13] = block_i;  blocklist_q[13] = block_q;
+      state = 14;
+      break;
+  case 14:
+      blocklist_i[14] = block_i;  blocklist_q[14] = block_q;
+      state = 15;
+      break;
+  case 15:
+      blocklist_i[15] = block_i;  blocklist_q[15] = block_q;
+      state = 16;
+      break;
+  case 16:
+      blocklist_i[16] = block_i;  blocklist_q[16] = block_q;
+
+     // This next forming of the sumsq[] takes 48 uSec
+     count++;
+     for (int i = 0; i < 2048; i++)   {
+        // From complex FFT the "negative frequencies" are mirrors of the frequencies above fs/2.  So, we get
+        // frequencies from 0 to fs by re-arranging the coefficients. These are powers (not Volts)
+        // See DD4WH SDR
+        float ss0 = fft_buffer[2 * i] *     fft_buffer[2 * i] +
+                    fft_buffer[2 * i + 1] * fft_buffer[2 * i + 1];
+        float ss1 = fft_buffer[2 * (i + 2048)] *     fft_buffer[2 * (i + 2048)] +
+                    fft_buffer[2 * (i + 2048) + 1] * fft_buffer[2 * (i + 2048) + 1];
+
+        if(count==1) {       // Starting new average
+           sumsq[i+2048] = ss0;
+           sumsq[i] = ss1;
+           }
+        else if (count <= nAverage) { // Adding on to average
+           sumsq[i+2048] += ss0;
+           sumsq[i] += ss1;
+           }
+        }
+      // sumsq[] is filled.  Wait to state==17 to convert to dBFS, etc
+      state = 17;
+      break;
+  case 17:
+      blocklist_i[17] = block_i;  blocklist_q[17] = block_q;
+
+     // This state==17 block takes 710 uSec for DBFS, but
+     // only 65 for POWER.  DB conversions do not need to be under
+     // this interrupt and POWER output should be used if time is short.
+     if (count >= nAverage) {    // Average is finished
+        // count = 0;
+        outputflag = false; // Avoid starting read() during block 17 to 18
+        float inAf = 1.0f/(float)nAverage;
+        for (int i=0; i < 2048; i++) {
+            // xAxis, bit 0 left/right;  bit 1 low to high
+            if(xAxis & 0X02)
+               ii = i;
+            else
+               ii = i^2048;
+            if(xAxis & 0X01)
+               ii = (4095 - ii);
+
+            if(outputType==FFT_RMS)
+            output[i] = sqrtf(inAf*sumsq[ii]);
+            else if(outputType==FFT_POWER)
+               output[i] = inAf*sumsq[ii];
+            else if(outputType==FFT_DBFS)
+               output[i] = 10.0f*log10f(inAf*sumsq[ii])-66.23f;  // Scaled to FS sine wave
+            else
+               output[i] = 0.0f;
+            }
+            // outputflag = true;   Wait for next block
+         }  // end of Average is Finished
+      state = 18;
+      break;
+  case 18:
+      blocklist_i[18] = block_i;  blocklist_q[18] = block_q;
+
+     // Second half of post-FFT processing.  dBFS (log10f) is the big user of time.
+     if (count >= nAverage) {    // Average is finished
+        count = 0;
+        float inAf = 1.0f/(float)nAverage;
+        for (int i=2048; i < 4096; i++) {
+            // xAxis, bit 0 left/right;  bit 1 low to high
+            if(xAxis & 0X02)
+               ii = i;
+            else
+               ii = i^2048;
+            if(xAxis & 0X01)
+               ii = (4095 - ii);
+
+            if(outputType==FFT_RMS)
+            output[i] = sqrtf(inAf*sumsq[ii]);
+            else if(outputType==FFT_POWER)
+               output[i] = inAf*sumsq[ii];
+            else if(outputType==FFT_DBFS)
+               output[i] = 10.0f*log10f(inAf*sumsq[ii])-66.23f;  // Scaled to FS sine wave
+            else
+               output[i] = 0.0f;
+            }
+            outputflag = true;
+         }  // end of Average is Finished
+      state = 19;
+      break;
+  case 19:
+      blocklist_i[19] = block_i;  blocklist_q[19] = block_q;
+      state = 20;
+      break;
+  case 20:
+      blocklist_i[20] = block_i;  blocklist_q[20] = block_q;
+      state = 21;
+      break;
+  case 21:
+      blocklist_i[21] = block_i;  blocklist_q[21] = block_q;
+      state = 22;
+      break;
+  case 22:
+      blocklist_i[22] = block_i;  blocklist_q[22] = block_q;
+      state = 23;
+      break;
+  case 23:
+      blocklist_i[23] = block_i;  blocklist_q[23] = block_q;
+      state = 24;
+      break;
+  case 24:
+      blocklist_i[24] = block_i;  blocklist_q[24] = block_q;
+      state = 25;
+      break;
+  case 25:
+      blocklist_i[25] = block_i;  blocklist_q[25] = block_q;
+      state = 26;
+      break;
+  case 26:
+      blocklist_i[26] = block_i;  blocklist_q[26] = block_q;
+      state = 27;
+      break;
+  case 27:
+      blocklist_i[27] = block_i;  blocklist_q[27] = block_q;
+      state = 28;
+      break;
+  case 28:
+      blocklist_i[28] = block_i;  blocklist_q[28] = block_q;
+      state = 29;
+      break;
+  case 29:
+      blocklist_i[29] = block_i;  blocklist_q[29] = block_q;
+      state = 30;
+      break;
+  case 30:
+      blocklist_i[30] = block_i;  blocklist_q[30] = block_q;
+      state = 31;
+      break;
+  case 31:
+      blocklist_i[31] = block_i;  blocklist_q[31] = block_q;
+
+      // This state==31 takes about 500 uSec, including the FFT.  
+      copy_to_fft_buffer1(fft_buffer+0x000, blocklist_i[0]->data, blocklist_q[0]->data);
+      copy_to_fft_buffer1(fft_buffer+0x100, blocklist_i[1]->data, blocklist_q[1]->data);
+      copy_to_fft_buffer1(fft_buffer+0x200, blocklist_i[2]->data, blocklist_q[2]->data);
+      copy_to_fft_buffer1(fft_buffer+0x300, blocklist_i[3]->data, blocklist_q[3]->data);
+      copy_to_fft_buffer1(fft_buffer+0x400, blocklist_i[4]->data, blocklist_q[4]->data);
+      copy_to_fft_buffer1(fft_buffer+0x500, blocklist_i[5]->data, blocklist_q[5]->data);
+      copy_to_fft_buffer1(fft_buffer+0x600, blocklist_i[6]->data, blocklist_q[6]->data);
+      copy_to_fft_buffer1(fft_buffer+0x700, blocklist_i[7]->data, blocklist_q[7]->data);
+      copy_to_fft_buffer1(fft_buffer+0x800, blocklist_i[8]->data, blocklist_q[8]->data);
+      copy_to_fft_buffer1(fft_buffer+0x900, blocklist_i[9]->data, blocklist_q[9]->data);
+      copy_to_fft_buffer1(fft_buffer+0xA00, blocklist_i[10]->data, blocklist_q[10]->data);
+      copy_to_fft_buffer1(fft_buffer+0xB00, blocklist_i[11]->data, blocklist_q[11]->data);
+      copy_to_fft_buffer1(fft_buffer+0xC00, blocklist_i[12]->data, blocklist_q[12]->data);
+      copy_to_fft_buffer1(fft_buffer+0xD00, blocklist_i[13]->data, blocklist_q[13]->data);
+      copy_to_fft_buffer1(fft_buffer+0xE00, blocklist_i[14]->data, blocklist_q[14]->data);
+      copy_to_fft_buffer1(fft_buffer+0xF00, blocklist_i[15]->data, blocklist_q[15]->data);
+      copy_to_fft_buffer1(fft_buffer+0x1000, blocklist_i[16]->data, blocklist_q[16]->data);
+      copy_to_fft_buffer1(fft_buffer+0x1100, blocklist_i[17]->data, blocklist_q[17]->data);
+      copy_to_fft_buffer1(fft_buffer+0x1200, blocklist_i[18]->data, blocklist_q[18]->data);
+      copy_to_fft_buffer1(fft_buffer+0x1300, blocklist_i[19]->data, blocklist_q[19]->data);
+      copy_to_fft_buffer1(fft_buffer+0x1400, blocklist_i[20]->data, blocklist_q[20]->data);
+      copy_to_fft_buffer1(fft_buffer+0x1500, blocklist_i[21]->data, blocklist_q[21]->data);
+      copy_to_fft_buffer1(fft_buffer+0x1600, blocklist_i[22]->data, blocklist_q[22]->data);
+      copy_to_fft_buffer1(fft_buffer+0x1700, blocklist_i[23]->data, blocklist_q[23]->data);
+      copy_to_fft_buffer1(fft_buffer+0x1800, blocklist_i[24]->data, blocklist_q[24]->data);
+      copy_to_fft_buffer1(fft_buffer+0x1900, blocklist_i[25]->data, blocklist_q[25]->data);
+      copy_to_fft_buffer1(fft_buffer+0x1A00, blocklist_i[26]->data, blocklist_q[26]->data);
+      copy_to_fft_buffer1(fft_buffer+0x1B00, blocklist_i[27]->data, blocklist_q[27]->data);
+      copy_to_fft_buffer1(fft_buffer+0x1C00, blocklist_i[28]->data, blocklist_q[28]->data);
+      copy_to_fft_buffer1(fft_buffer+0x1D00, blocklist_i[29]->data, blocklist_q[29]->data);
+      copy_to_fft_buffer1(fft_buffer+0x1E00, blocklist_i[30]->data, blocklist_q[30]->data);
+      copy_to_fft_buffer1(fft_buffer+0x1F00, blocklist_i[31]->data, blocklist_q[31]->data);
+
+      if (pWin)
+         apply_window_to_fft_buffer1(fft_buffer, window);
+
+      // Teensyduino core for T4.x supports arm_cfft_f32
+      // arm_cfft_f32 (const arm_cfft_instance_f32 *S, float32_t *p1, uint8_t ifftFlag, uint8_t bitReverseFlag)
+      arm_cfft_f32(&Sfft, fft_buffer, 0, 1);
+
+       release(blocklist_i[0]);  release(blocklist_q[0]);
+       release(blocklist_i[1]);  release(blocklist_q[1]);
+       release(blocklist_i[2]);  release(blocklist_q[2]);
+       release(blocklist_i[3]);  release(blocklist_q[3]);
+       release(blocklist_i[4]);  release(blocklist_q[4]);
+       release(blocklist_i[5]);  release(blocklist_q[5]);
+       release(blocklist_i[6]);  release(blocklist_q[6]);
+       release(blocklist_i[7]);  release(blocklist_q[7]);
+       release(blocklist_i[8]);  release(blocklist_q[8]);
+       release(blocklist_i[9]);  release(blocklist_q[9]);
+       release(blocklist_i[10]);  release(blocklist_q[10]);
+       release(blocklist_i[11]);  release(blocklist_q[11]);
+       release(blocklist_i[12]);  release(blocklist_q[12]);
+       release(blocklist_i[13]);  release(blocklist_q[13]);
+       release(blocklist_i[14]);  release(blocklist_q[14]);
+       release(blocklist_i[15]);  release(blocklist_q[15]);
+
+       blocklist_i[0] = blocklist_i[16];
+       blocklist_i[1] = blocklist_i[17];
+       blocklist_i[2] = blocklist_i[18];
+       blocklist_i[3] = blocklist_i[19];
+       blocklist_i[4] = blocklist_i[20];
+       blocklist_i[5] = blocklist_i[21];
+       blocklist_i[6] = blocklist_i[22];
+       blocklist_i[7] = blocklist_i[23];
+       blocklist_i[8] = blocklist_i[24];
+       blocklist_i[9] = blocklist_i[25];
+       blocklist_i[10] = blocklist_i[26];
+       blocklist_i[11] = blocklist_i[27];
+       blocklist_i[12] = blocklist_i[28];
+       blocklist_i[13] = blocklist_i[29];
+       blocklist_i[14] = blocklist_i[30];
+       blocklist_i[15] = blocklist_i[31];
+
+       blocklist_q[0] = blocklist_q[16];
+       blocklist_q[1] = blocklist_q[17];
+       blocklist_q[2] = blocklist_q[18];
+       blocklist_q[3] = blocklist_q[19];
+       blocklist_q[4] = blocklist_q[20];
+       blocklist_q[5] = blocklist_q[21];
+       blocklist_q[6] = blocklist_q[22];
+       blocklist_q[7] = blocklist_q[23];
+       blocklist_q[8] = blocklist_q[24];
+       blocklist_q[9] = blocklist_q[25];
+       blocklist_q[10] = blocklist_q[26];
+       blocklist_q[11] = blocklist_q[27];
+       blocklist_q[12] = blocklist_q[28];
+       blocklist_q[13] = blocklist_q[29];
+       blocklist_q[14] = blocklist_q[30];
+       blocklist_q[15] = blocklist_q[31];
+
+       state = 16;
+       break;       // From case 31
+    }  // End of switch & case 31
+  }  // End update()
+#endif
diff --git a/analyze_fft4096_iq_F32.h b/analyze_fft4096_iq_F32.h
new file mode 100644
index 0000000..3609541
--- /dev/null
+++ b/analyze_fft4096_iq_F32.h
@@ -0,0 +1,302 @@
+/*
+ *   Analyze_fft4096_iq_F32.h    Assembled by Bob Larkin   9 Mar 2021
+ *
+ *  Note: Teensy 4.x Only, 3.x not supported
+ *
+ * Does Fast Fourier Transform of a 4096 point complex (I-Q) input.
+ * Output is one of three measures of the power in each of the 4096
+ * output bins, Power, RMS level or dB relative to a full scale
+ * sine wave.  Windowing of the input data is provided for to reduce
+ * spreading of the power in the output bins.  All inputs are Teensy
+ * floating point extension (_F32) and all outputs are floating point.
+ *
+ * Features include:
+ *   * I and Q inputs are OpenAudio_Arduino Library F32 compatible.
+ *   * FFT output for every 512 inputs to overlapped FFTs to
+ *     compensate for windowing.
+ *   * Windowing None, Hann, Kaiser and Blackman-Harris.
+ *   * Multiple bin-sum output to simulate wider bins.
+ *   * Power averaging of multiple FFT
+ *   * Soon: F32 audio outputs for I & Q
+ *
+ * Conversion Copyright (c) 2021 Bob Larkin
+ * Same MIT license as PJRC:
+ *
+ * From original real FFT:
+ *  Audio Library for Teensy 3.X
+ * Copyright (c) 2014, Paul Stoffregen, paul@pjrc.com
+ *
+ * Development of this audio library was funded by PJRC.COM, LLC by sales of
+ * Teensy and Audio Adaptor boards.  Please support PJRC's efforts to develop
+ * open source software by purchasing Teensy or other PJRC products.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice, development funding notice, and this permission
+ * notice shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* Does complex input FFT of 4096 points.  Multiple non-audio (via functions)
+ * output formats of RMS (same as I16 version, and default),
+ * Power or dBFS (full scale).  Output can be bin by bin or a pointer to
+ * the output array is available.  Several window functions are provided by
+ * in-class design, or a custom window can be provided from the INO.
+ *
+ * Functions (See comments below and #defines above:
+ *   bool available()
+ *   float read(unsigned int binNumber)
+ *   float read(unsigned int binFirst, unsigned int binLast)
+ *   int windowFunction(int wNum)
+ *   int windowFunction(int wNum, float _kdb)  // Kaiser only
+ *   float* getData(void)
+ *   float* getWindow(void)
+ *   void putWindow(float *pwin)
+ *   void setNAverage(int NAve)   // >=1
+ *   void setOutputType(int _type)
+ *   void setXAxis(uint8_t _xAxis)  // 0, 1, 2, 3
+ *
+ * x-Axis direction and offset per setXAxis(xAxis) for sine to I
+ * and cosine to Q.
+ *   If xAxis=0  f=fs/2 in middle, f=0 on right edge
+ *   If xAxis=1  f=fs/2 in middle, f=0 on left edge
+ *   If xAxis=2  f=fs/2 on left edge, f=0 in middle
+ *   If xAxis=3  f=fs/2 on right edgr, f=0 in middle
+ * If there is 180 degree phase shift to I or Q these all get reversed.
+ *
+ * Timing, max is longest update() time:
+ *   T4.0 Windowed, dBFS Out, 987 uSec
+ *
+ * Scaling:
+ *   Full scale for floating point DSP is a nebulous concept.  Normally the
+ *   full scale is -1.0 to +1.0.  This is an unscaled FFT and for a sine
+ *   wave centered in frequency on a bin and of FS amplitude, the power
+ *   at that center bin will grow by 4096^2/4 = about 4 million without windowing.
+ *   Windowing loss cuts this down.  The RMS level can growwithout windowing to
+ *   4096.  The dBFS has been scaled to make this max value 0 dBFS by
+ *   removing 66.2 dB.  With floating point, the dynamic range is maintained
+ *   no matter how it is scaled, but this factor needs to be considered
+ *   when building the INO.
+ */
+
+#ifndef analyze_fft4096iq_h_
+#define analyze_fft4096iq_h_
+
+// ***************  TEENSY 4.X ONLY   ****************
+#if defined(__IMXRT1062__)
+
+#include "Arduino.h"
+#include "AudioStream_F32.h"
+#include "arm_math.h"
+#include "mathDSP_F32.h"
+#include "arm_const_structs.h"
+
+#define FFT_RMS 0
+#define FFT_POWER 1
+#define FFT_DBFS 2
+
+#define NO_WINDOW 0
+#define AudioWindowNone 0
+#define AudioWindowHanning4096 1
+#define AudioWindowKaiser4096 2
+#define AudioWindowBlackmanHarris4096 3
+
+class AudioAnalyzeFFT4096_IQ_F32 : public AudioStream_F32  {
+//GUI: inputs:2, outputs:4  //this line used for automatic generation of GUI node
+//GUI: shortName:FFT4096IQ
+public:
+    AudioAnalyzeFFT4096_IQ_F32() : AudioStream_F32(2, inputQueueArray) {
+        // __MK20DX128__ T_LC;  __MKL26Z64__ T3.0;  __MK20DX256__T3.1 and T3.2
+        // __MK64FX512__) T3.5; __MK66FX1M0__ T3.6; __IMXRT1062__ T4.0 and T4.1
+
+        // Teensy4 core library has the right files for new FFT
+        // arm CMSIS library has predefined structures of type arm_cfft_instance_f32
+        Sfft = arm_cfft_sR_f32_len4096;   // This is one of the structures
+        useHanningWindow();
+    }
+    // There is no varient for "settings," as blocks other than 128 are
+    // not supported and, nothing depends on sample rate so we don't need that.
+
+    // Returns true when output data is available.
+    bool available() {
+#if defined(__IMXRT1062__)
+        if (outputflag == true) {
+            outputflag = false;  // No double returns
+            return true;
+        }
+        return false;
+#else
+        // Don't know how you got this far, but....
+        Serial.println("Teensy 3.x NOT SUPPORTED");
+        return false;
+#endif
+    }
+
+    // Returns a single bin output
+    float read(unsigned int binNumber) {
+        if (binNumber>4095 || binNumber<0) return 0.0;
+        return output[binNumber];
+    }
+
+    // Return sum of several bins. Normally use with power output.
+    // This produces the equivalent of bigger bins.
+    float read(unsigned int binFirst, unsigned int binLast) {
+        if (binFirst > binLast) {
+            unsigned int tmp = binLast;
+            binLast = binFirst;
+            binFirst = tmp;
+        }
+        if (binFirst > 4095) return 0.0;
+        if (binLast > 4095) binLast = 4095;
+        float sum = 0;
+        do {
+            sum += output[binFirst++];
+        } while (binFirst <= binLast);
+        return sum;
+    }
+
+    // Sets None, Hann, or Blackman-Harris window with no parameter
+    int windowFunction(int wNum) {
+       if(wNum == AudioWindowKaiser4096)
+          return -1;                 // Kaiser needs the kdb
+       windowFunction(wNum, 0.0f);
+       return 0;
+    }
+
+    int windowFunction(int wNum, float _kdb) {
+      float kd;
+      pWin = window;
+      if(wNum == NO_WINDOW)
+         pWin = NULL;
+      else if (wNum == AudioWindowKaiser4096)  {
+         if(_kdb<20.0f)
+            kd = 20.0f;
+         else
+            kd = _kdb;
+         useKaiserWindow(kd);
+         }
+      else if (wNum == AudioWindowBlackmanHarris4096)
+         useBHWindow();
+     else
+         useHanningWindow();   // Default
+     return 0;
+     }
+
+    // Fast pointer transfer.  Be aware that the data will go away
+    // after the next 256 data points occur.
+    float* getData(void)  {
+       // available() sets outputflag false
+       return output;
+       }
+
+    // You can use this to design windows
+    float* getWindow(void)  {
+       return window;
+       }
+
+    // Bring custom window from the INO
+    void putWindow(float *pwin)  {
+       float *p = window;
+       for(int i=0; i<4096; i++)
+          *p++ = *pwin++;   // Copy for the FFT
+       }
+
+    // Number of FFT averaged in the output
+    void setNAverage(int _nAverage)  {
+       nAverage = _nAverage;
+       }
+
+    // Output RMS (default), power or dBFS (FFT_RMS, FFT_POWER, FFT_DBFS)
+    void setOutputType(int _type)  {
+       outputType = _type;
+       }
+
+    // xAxis, bit 0 left/right;  bit 1 low to high;  default 0X03
+    void setXAxis(uint8_t _xAxis)  {
+       xAxis = _xAxis;
+       }
+
+  virtual void update(void);
+
+private:
+  float output[4096];
+  float window[4096];
+  float *pWin = window;
+  float fft_buffer[8192];
+  float sumsq[8192];  // Avoid re-use of output[]
+  uint8_t state = 0;
+  bool outputflag = false;
+  audio_block_f32_t *inputQueueArray[2];
+  audio_block_f32_t *blocklist_i[32];
+  audio_block_f32_t *blocklist_q[32];
+  // For T4.x
+  // const static arm_cfft_instance_f32   arm_cfft_sR_f32_len1024;
+  arm_cfft_instance_f32 Sfft;
+
+  int outputType = FFT_RMS;  //Same type as I16 version init
+  int count = 0;
+  int nAverage = 1;
+  uint8_t xAxis = 0x03;
+
+    // The Hann window is a good all-around window
+    void useHanningWindow(void) {
+        for (int i=0; i < 4096; i++) {
+           // 2*PI/4095 = 0.00153435538
+           window[i] = 0.5*(1.0 - cosf(0.00153435538f*(float)i));
+        }
+    }
+
+    // Blackman-Harris produces a first sidelobe more than 90 dB down.
+    // The price is a bandwidth of about 2 bins.  Very useful at times.
+    void useBHWindow(void) {
+        for (int i=0; i < 4096; i++) {
+           float kx = 0.00153435538f;  // 2*PI/4095
+           int ix = (float) i;
+           window[i] = 0.35875 -
+                       0.48829*cosf(     kx*ix) +
+                       0.14128*cosf(2.0f*kx*ix) -
+                       0.01168*cosf(3.0f*kx*ix);
+        }
+    }
+
+    /* The windowing function here is that of James Kaiser.  This has a number
+     * of desirable features. The sidelobes drop off as the frequency away from a transition.
+     * Also, the tradeoff of sidelobe level versus cutoff rate is variable.
+     * Here we specify it in terms of kdb, the highest sidelobe, in dB, next to a sharp cutoff. For
+     * calculating the windowing vector, we need a parameter beta, found as follows:
+     */
+    void useKaiserWindow(float kdb)  {
+       float32_t beta, kbes, xn2;
+       mathDSP_F32 mathEqualizer;  // For Bessel function
+
+       if (kdb < 20.0f)
+           beta = 0.0;
+       else
+           beta = -2.17+0.17153*kdb-0.0002841*kdb*kdb; // Within a dB or so
+
+       // Note: i0f is the fp zero'th order modified Bessel function (see mathDSP_F32.h)
+       kbes = 1.0f / mathEqualizer.i0f(beta);      // An additional derived parameter used in loop
+       for (int n=0; n<512; n++) {
+          xn2 = 0.5f+(float32_t)n;
+          // 4/(1023^2)=0.00000382215877f
+          // 4/(4095^2) = 2.3853504E-7
+          xn2 = 2.3853504E-7*xn2*xn2;
+          window[511 - n]=kbes*(mathEqualizer.i0f(beta*sqrtf(1.0-xn2)));
+          window[512 + n] = window[511 - n];
+       }
+    }
+  };
+#endif
+#endif
diff --git a/examples/TestFFT4096iq/TestFFT4096iq.ino b/examples/TestFFT4096iq/TestFFT4096iq.ino
new file mode 100644
index 0000000..c11d615
--- /dev/null
+++ b/examples/TestFFT4096iq/TestFFT4096iq.ino
@@ -0,0 +1,73 @@
+
+// TestFFT2048iq.ino  for Teensy 4.x
+// Bob Larkin 9 March 2021
+
+// Generate Sin and Cosine pair and input to IQ FFT.
+// Serial Print out powers of all 4096 bins in
+// dB relative to Sine Wave Full Scale
+
+// Public Domain
+
+#include "OpenAudio_ArduinoLibrary.h"
+#include "AudioStream_F32.h"
+
+// GUItool: begin automatically generated code
+AudioSynthSineCosine_F32   sine_cos1;       //xy=76,532
+AudioAnalyzeFFT4096_IQ_F32 FFT4096iq1;      //xy=243,532
+AudioOutputI2S_F32         audioOutI2S1;    //xy=246,591
+AudioConnection_F32        patchCord1(sine_cos1, 0, FFT4096iq1, 0);
+AudioConnection_F32        patchCord2(sine_cos1, 1, FFT4096iq1, 1);
+// GUItool: end automatically generated code
+
+void setup(void) {
+
+  Serial.begin(9600);
+  delay(1000);
+
+  // The 4096 complex FFT needs 32 F32 memory for real and 32 for imag.
+  // Set memory to more than 64, depending on other useage.
+  AudioMemory_F32(100);
+  Serial.println("FFT4096IQ Test");
+
+  sine_cos1.amplitude(1.0f); // Initialize Waveform Generator
+
+  // Pick T4.x bin center
+  //sine_cos1.frequency(689.0625f);
+
+  // or pick any old frequency
+  sine_cos1.frequency(1000.0f);
+
+  // elect the output format
+  FFT4096iq1.setOutputType(FFT_DBFS);
+
+  // Select the wndow function
+  //FFT4096iq1.windowFunction(AudioWindowNone);
+  //FFT4096iq1.windowFunction(AudioWindowHanning4096);
+  //FFT4096iq1.windowFunction(AudioWindowKaiser4096, 55.0f);
+  FFT4096iq1.windowFunction(AudioWindowBlackmanHarris4096);
+
+  // Uncomment to Serial print window function
+  // float* pw = FFT4096iq1.getWindow();   // Print window
+  // for (int i=0; i<4096; i++) Serial.println(pw[i], 7);
+
+  // xAxis, bit 0 left/right;  bit 1 low to high;  default 0X03
+  FFT4096iq1.setXAxis(0X03);
+
+  FFT4096iq1.setNAverage(1);
+  delay(100);
+  }
+
+void loop(void)  {
+  static bool doPrint=true;
+  float *pPwr;
+  // Print output, once
+  if( FFT4096iq1.available() && doPrint )  {
+      pPwr = FFT4096iq1.getData();
+      for(int i=0; i<4096; i++)
+        Serial.println(*(pPwr + i), 8 );
+      doPrint = false;
+      }
+  Serial.print(" Audio MEM Float32 Peak: ");
+  Serial.println(AudioMemoryUsageMax_F32());
+  delay(500);
+  }