From 70b8ce6c4449f36a29d7390a0740735a5217e089 Mon Sep 17 00:00:00 2001
From: boblark <bob@janbob.com>
Date: Mon, 21 Feb 2022 21:13:33 -0800
Subject: [PATCH] Initial add of analyze_fft4096_iqem_F32

---
 OpenAudio_ArduinoLibrary.h                   |   1 +
 analyze_fft4096_iqem_F32.cpp                 | 429 +++++++++++++++++++
 analyze_fft4096_iqem_F32.h                   | 348 +++++++++++++++
 examples/TestFFT4096iqEM/TestFFT4096iqEM.ino |  94 ++++
 keywords.txt                                 |   1 +
 5 files changed, 873 insertions(+)
 create mode 100644 analyze_fft4096_iqem_F32.cpp
 create mode 100644 analyze_fft4096_iqem_F32.h
 create mode 100644 examples/TestFFT4096iqEM/TestFFT4096iqEM.ino

diff --git a/OpenAudio_ArduinoLibrary.h b/OpenAudio_ArduinoLibrary.h
index 21dea21..f6a957d 100644
--- a/OpenAudio_ArduinoLibrary.h
+++ b/OpenAudio_ArduinoLibrary.h
@@ -35,6 +35,7 @@
 #include "analyze_fft1024_iq_F32.h"
 #include "analyze_fft2048_iq_F32.h"
 #include "analyze_fft4096_iq_F32.h"
+#include "analyze_fft4096_iqem_F32.h"
 #include "analyze_peak_f32.h"
 #include "analyze_rms_f32.h"
 #include "analyze_tonedetect_F32.h"
diff --git a/analyze_fft4096_iqem_F32.cpp b/analyze_fft4096_iqem_F32.cpp
new file mode 100644
index 0000000..9d600df
--- /dev/null
+++ b/analyze_fft4096_iqem_F32.cpp
@@ -0,0 +1,429 @@
+/*
+ *   analyze_fft4096_iq_F32.cpp       Assembled by Bob Larkin   9 Mar 2021
+ *
+ * External Memory  ****  BETA TEST VERSION - NOT FULLY TESTED **** <<<<<<<<<<
+ *
+ *  This class is Teensy 4.x ONLY.
+ *  F32 Bolocks are always 128 floats, and any data rate is OK.
+ *
+ * Converted to F32 floating point input and also extended
+ * for complex I and Q inputs
+ *   * Adapted all I/O to be F32 floating point for OpenAudio_ArduinoLibrary
+ *   * Future: Add outputs for I & Q FFT x2 for overlapped FFT
+ *   * Windowing None, Hann, Kaiser and Blackman-Harris.
+ *
+ * Conversion Copyright (c) 2021 Bob Larkin
+ * Same MIT license as PJRC:
+ *
+ *  Audio Library for Teensy 3.X
+ * Copyright (c) 2014, Paul Stoffregen, paul@pjrc.com
+ *
+ * Development of this audio library was funded by PJRC.COM, LLC by sales of
+ * Teensy and Audio Adaptor boards.  Please support PJRC's efforts to develop
+ * open source software by purchasing Teensy or other PJRC products.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice, development funding notice, and this permission
+ * notice shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+// ***************  TEENSY 4.X ONLY   ****************
+#if defined(__IMXRT1062__)
+
+#include <Arduino.h>
+#include "analyze_fft4096_iqem_F32.h"
+
+// Note: Suppports block size of 128 only.  Very "built in."
+
+// Move audio data from audio_block_f32_t to the interleaved FFT instance buffer.
+static void copy_to_fft_buffer1(void *destination, const void *sourceI, const void *sourceQ)  {
+    const float *srcI = (const float *)sourceI;
+    const float *srcQ = (const float *)sourceQ;
+    float *dst = (float *)destination;  // part of fft_buffer array. 256 floats per call
+    for (int i=0; i < 128; i++) {
+       *dst++ = *srcI++;     // real sample, interleave
+       *dst++ = *srcQ++;     // imag
+       }
+    }
+
+void AudioAnalyzeFFT4096_IQEM_F32::update(void)  {
+  audio_block_f32_t *block_i,*block_q;
+  int i, ii;
+
+  block_i = receiveReadOnly_f32(0);
+  if (!block_i) return;
+  block_q = receiveReadOnly_f32(1);
+  if (!block_q)  {
+     release(block_i);
+     return;
+     }
+  // Here with two new blocks of data.  These are retained until the FFT
+  // but with new pointers, blocklist_i[] and blocklist_q[].
+  switch (state) {
+  case 0:
+      blocklist_i[0] = block_i;  blocklist_q[0] = block_q;  // Copy 2 ptrs
+      state = 1;
+      break;
+  case 1:
+      blocklist_i[1] = block_i;  blocklist_q[1] = block_q;
+      state = 2;
+      break;
+  case 2:
+      blocklist_i[2] = block_i;  blocklist_q[2] = block_q;
+      state = 3;
+      break;
+  case 3:
+      blocklist_i[3] = block_i;  blocklist_q[3] = block_q;
+      state = 4;
+      break;
+  case 4:
+      blocklist_i[4] = block_i;  blocklist_q[4] = block_q;
+      state = 5;
+      break;
+  case 5:
+      blocklist_i[5] = block_i;  blocklist_q[5] = block_q;
+      state = 6;
+      break;
+  case 6:
+      blocklist_i[6] = block_i;  blocklist_q[6] = block_q;
+      state = 7;
+      break;
+  case 7:
+      blocklist_i[7] = block_i;  blocklist_q[7] = block_q;
+      state = 8;
+      break;
+  case 8:
+      blocklist_i[8] = block_i;  blocklist_q[8] = block_q;
+      state = 9;
+      break;
+  case 9:
+      blocklist_i[9] = block_i;  blocklist_q[9] = block_q;
+      state = 10;
+      break;
+  case 10:
+      blocklist_i[10] = block_i;  blocklist_q[10] = block_q;
+      state = 11;
+      break;
+  case 11:
+      blocklist_i[11] = block_i;  blocklist_q[11] = block_q;
+      state = 12;
+      break;
+  case 12:
+      blocklist_i[12] = block_i;  blocklist_q[12] = block_q;
+      state = 13;
+      break;
+  case 13:
+      blocklist_i[13] = block_i;  blocklist_q[13] = block_q;
+      state = 14;
+      break;
+  case 14:
+      blocklist_i[14] = block_i;  blocklist_q[14] = block_q;
+      state = 15;
+      break;
+  case 15:
+      blocklist_i[15] = block_i;  blocklist_q[15] = block_q;
+      state = 16;
+      break;
+  //  ********************************************************
+  // Once things are running, the loop comes back to this point
+  case 16:
+      blocklist_i[16] = block_i;  blocklist_q[16] = block_q;
+
+     // Now work on the FFT output data.  This was created in case 31.
+     // This next forming of the sumsq[] takes 48 uSec
+     count++;
+     for (int i = 0; i < 2048; i++)   {
+        // Re-arranging the coefficients. These are bin powers (not Volts)
+        // See DD4WH SDR
+        float ss0 = *(pFFT_buffer + 2*i) * *(pFFT_buffer + 2*i) +
+                    *(pFFT_buffer + 2*i+1) * *(pFFT_buffer + 2*i+1);
+        float ss1 = *(pFFT_buffer + 2*(i+2048)) * *(pFFT_buffer + 2*(i+2048)) +
+                    *(pFFT_buffer + 2*(i+2048)+1) * *(pFFT_buffer + 2*(i+2048)+1);
+
+        if(!(pSumsq==NULL)) {            // We have memory to do averages
+           if(count==1) {                // Starting new average
+              *(pSumsq+i+2048) = ss0;
+              *(pSumsq+i) = ss1;
+              }
+           else if (count <= nAverage) { // Adding on to average
+              *(pSumsq+i+2048) += ss0;
+              *(pSumsq+i) += ss1;
+              }
+           }
+        else                             // No averaging is used
+           {
+           // Parts of pFFT_buffer are becoming available for
+           // temporary storage, but not all:
+           *(pFFT_buffer+i) = ss0;
+           *(pFFT_buffer+4096+i) = ss1;
+           // Now in pFFT_buffer 0,2047 and 4096,6143
+           }
+        }
+
+      // sumsq[] is filled.  Wait to state==17 to convert to dBFS, etc
+      state = 17;
+      break;
+  case 17:
+      blocklist_i[17] = block_i;  blocklist_q[17] = block_q;
+
+     // This state==17 block takes 710 uSec for DBFS, but
+     // only 65 for POWER.  DB conversions do not need to be under
+     // this interrupt and POWER output should be used if time is short.
+     if (pSumsq==NULL || count>=nAverage) { // Average is not being done or is finished
+        outputflag = false; // Avoid starting read() during block 17 to 18
+        float inAf = 1.0f/(float)nAverage;
+        for (ii=0; ii < 2048; ii++) {
+            // xAxis, bit 0 left/right;  bit 1 low to high
+            if(xAxis & 0X02)
+               i = ii;
+            else
+               i = ii^2048;
+
+            if(xAxis & 0X01)
+               i = (4095 - i);
+
+            if(!(pSumsq==NULL)) { // We have memory to do averages
+               if(outputType==FFT_RMS)
+                  *(pOutput+i) = sqrtf(inAf* *(pSumsq+ii));
+               else if(outputType==FFT_POWER)
+                  *(pOutput+i) = inAf* *(pSumsq+ii);
+               else if(outputType==FFT_DBFS)
+                  *(pOutput+i) = 10.0f*log10f(inAf* *(pSumsq+ii))-66.23f; // Scaled to FS sine wave
+               else
+                  *(pOutput+i) = 0.0f;
+               }
+            else {               // No averaging
+               if(outputType==FFT_RMS)
+                  *(pOutput+i) = sqrtf(*(pFFT_buffer+ii));
+               else if(outputType==FFT_POWER)
+                  *(pOutput+i) = *(pFFT_buffer+ii);
+               else if(outputType==FFT_DBFS)
+                  *(pOutput+i) = 10.0f*log10f(*(pFFT_buffer+ii))-66.23f;
+               }  // End, no averaging
+	       }  // End of "over all i"
+         }  // end of Average is Finished
+      state = 18;
+      break;
+  case 18:
+      blocklist_i[18] = block_i;  blocklist_q[18] = block_q;
+
+     // Second half of post-FFT processing.  dBFS (log10f) is the big user of time.
+     if (pSumsq==NULL || count>=nAverage) {    // Average is finished
+
+		 Serial.println(count);
+
+        count = 0;                  // CHECK WHERE IS count++ ???  <<<<<<<<<<<<<<
+        float inAf = 1.0f/(float)nAverage;
+        // ii is the index to data source, i is for data output
+        for (int ii=2048; ii < 4096; ii++) {
+            // xAxis, bit 0 left/right;  bit 1 low to high
+            if(xAxis & 0X02)
+               i = ii;
+            else
+               i = ii^2048;
+
+            if(xAxis & 0X01)
+               i = (4095 - i);
+
+            if(!(pSumsq==NULL)) {     // We have memory to do averages
+                if(outputType==FFT_RMS)
+                   *(pOutput+i) = sqrtf(inAf* *(pSumsq+ii));
+                else if(outputType==FFT_POWER)
+                   *(pOutput+i) = inAf* *(pSumsq+ii);
+                else if(outputType==FFT_DBFS)
+                   *(pOutput+i) = 10.0f*log10f(inAf* *(pSumsq+ii))-66.23f;  // Scaled to FS sine wave
+                else
+                   *(pOutput+i) = 0.0f;
+                }
+            else {                     // No averaging being done
+               if(outputType==FFT_RMS)
+                  *(pOutput+i) = sqrtf(*(pFFT_buffer+ii+2048));
+               else if(outputType==FFT_POWER)
+                  *(pOutput+i) = *(pFFT_buffer+ii+2048);
+               else if(outputType==FFT_DBFS)
+                  *(pOutput+i) = 10.0f*log10f(*(pFFT_buffer+ii+2048))-66.23f;
+               else
+                   *(pOutput+i) = 0.0f;
+                }
+            }
+            outputflag = true;
+        }  // end of Average is Finished
+      state = 19;
+      break;
+  case 19:
+      blocklist_i[19] = block_i;  blocklist_q[19] = block_q;
+      state = 20;
+      break;
+  case 20:
+      blocklist_i[20] = block_i;  blocklist_q[20] = block_q;
+      state = 21;
+      break;
+  case 21:
+      blocklist_i[21] = block_i;  blocklist_q[21] = block_q;
+      state = 22;
+      break;
+  case 22:
+      blocklist_i[22] = block_i;  blocklist_q[22] = block_q;
+      state = 23;
+      break;
+  case 23:
+      blocklist_i[23] = block_i;  blocklist_q[23] = block_q;
+      state = 24;
+      break;
+  case 24:
+      blocklist_i[24] = block_i;  blocklist_q[24] = block_q;
+      state = 25;
+      break;
+  case 25:
+      blocklist_i[25] = block_i;  blocklist_q[25] = block_q;
+      state = 26;
+      break;
+  case 26:
+      blocklist_i[26] = block_i;  blocklist_q[26] = block_q;
+      state = 27;
+      break;
+  case 27:
+      blocklist_i[27] = block_i;  blocklist_q[27] = block_q;
+      state = 28;
+      break;
+  case 28:
+      blocklist_i[28] = block_i;  blocklist_q[28] = block_q;
+      state = 29;
+      break;
+  case 29:
+      blocklist_i[29] = block_i;  blocklist_q[29] = block_q;
+      state = 30;
+      break;
+  case 30:
+      blocklist_i[30] = block_i;  blocklist_q[30] = block_q;
+      state = 31;
+      break;
+  case 31:
+      blocklist_i[31] = block_i;  blocklist_q[31] = block_q;
+
+      // Copy 8192 data to fft_buffer This state==31 takes about 500 uSec, including the FFT.
+      // i & q interleaved data.
+      copy_to_fft_buffer1(pFFT_buffer+0x000, blocklist_i[0]->data, blocklist_q[0]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x100, blocklist_i[1]->data, blocklist_q[1]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x200, blocklist_i[2]->data, blocklist_q[2]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x300, blocklist_i[3]->data, blocklist_q[3]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x400, blocklist_i[4]->data, blocklist_q[4]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x500, blocklist_i[5]->data, blocklist_q[5]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x600, blocklist_i[6]->data, blocklist_q[6]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x700, blocklist_i[7]->data, blocklist_q[7]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x800, blocklist_i[8]->data, blocklist_q[8]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x900, blocklist_i[9]->data, blocklist_q[9]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0xA00, blocklist_i[10]->data, blocklist_q[10]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0xB00, blocklist_i[11]->data, blocklist_q[11]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0xC00, blocklist_i[12]->data, blocklist_q[12]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0xD00, blocklist_i[13]->data, blocklist_q[13]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0xE00, blocklist_i[14]->data, blocklist_q[14]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0xF00, blocklist_i[15]->data, blocklist_q[15]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x1000, blocklist_i[16]->data, blocklist_q[16]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x1100, blocklist_i[17]->data, blocklist_q[17]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x1200, blocklist_i[18]->data, blocklist_q[18]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x1300, blocklist_i[19]->data, blocklist_q[19]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x1400, blocklist_i[20]->data, blocklist_q[20]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x1500, blocklist_i[21]->data, blocklist_q[21]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x1600, blocklist_i[22]->data, blocklist_q[22]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x1700, blocklist_i[23]->data, blocklist_q[23]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x1800, blocklist_i[24]->data, blocklist_q[24]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x1900, blocklist_i[25]->data, blocklist_q[25]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x1A00, blocklist_i[26]->data, blocklist_q[26]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x1B00, blocklist_i[27]->data, blocklist_q[27]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x1C00, blocklist_i[28]->data, blocklist_q[28]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x1D00, blocklist_i[29]->data, blocklist_q[29]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x1E00, blocklist_i[30]->data, blocklist_q[30]->data);
+      copy_to_fft_buffer1(pFFT_buffer+0x1F00, blocklist_i[31]->data, blocklist_q[31]->data);
+
+
+    // Apply the window function, if any, to the time series.  Half size window buffer.
+    if(wNum!=NULL && pWindow)
+      {
+      for (int i=0; i < 2048; i++)  {
+         *(pFFT_buffer + 2*i) *= *(pWindow + i);      // real
+         *(pFFT_buffer + 2*i+1) *= *(pWindow + i);    // imag
+         }
+      for (int i=0; i < 2048; i++)  {                 // Second half
+         *(pFFT_buffer + 8191 - 2*i) *= *(pWindow + i);
+         *(pFFT_buffer + 8190 - 2*i) *= *(pWindow + i);
+         }
+      }
+
+      // Teensyduino core for T4.x supports arm_cfft_f32
+      // arm_cfft_f32 (const arm_cfft_instance_f32 *S, float32_t *p1,
+      //                     uint8_t ifftFlag, uint8_t bitReverseFlag)
+      // I & O are real/imag interleaved in 8192-float point array p1.
+      arm_cfft_f32(&Sfft, pFFT_buffer, 0, 1);
+
+      release(blocklist_i[0]);  release(blocklist_q[0]);
+      release(blocklist_i[1]);  release(blocklist_q[1]);
+      release(blocklist_i[2]);  release(blocklist_q[2]);
+      release(blocklist_i[3]);  release(blocklist_q[3]);
+      release(blocklist_i[4]);  release(blocklist_q[4]);
+      release(blocklist_i[5]);  release(blocklist_q[5]);
+      release(blocklist_i[6]);  release(blocklist_q[6]);
+      release(blocklist_i[7]);  release(blocklist_q[7]);
+      release(blocklist_i[8]);  release(blocklist_q[8]);
+      release(blocklist_i[9]);  release(blocklist_q[9]);
+      release(blocklist_i[10]);  release(blocklist_q[10]);
+      release(blocklist_i[11]);  release(blocklist_q[11]);
+      release(blocklist_i[12]);  release(blocklist_q[12]);
+      release(blocklist_i[13]);  release(blocklist_q[13]);
+      release(blocklist_i[14]);  release(blocklist_q[14]);
+      release(blocklist_i[15]);  release(blocklist_q[15]);
+
+      blocklist_i[0] = blocklist_i[16];
+      blocklist_i[1] = blocklist_i[17];
+      blocklist_i[2] = blocklist_i[18];
+      blocklist_i[3] = blocklist_i[19];
+      blocklist_i[4] = blocklist_i[20];
+      blocklist_i[5] = blocklist_i[21];
+      blocklist_i[6] = blocklist_i[22];
+      blocklist_i[7] = blocklist_i[23];
+      blocklist_i[8] = blocklist_i[24];
+      blocklist_i[9] = blocklist_i[25];
+      blocklist_i[10] = blocklist_i[26];
+      blocklist_i[11] = blocklist_i[27];
+      blocklist_i[12] = blocklist_i[28];
+      blocklist_i[13] = blocklist_i[29];
+      blocklist_i[14] = blocklist_i[30];
+      blocklist_i[15] = blocklist_i[31];
+
+      blocklist_q[0] = blocklist_q[16];
+      blocklist_q[1] = blocklist_q[17];
+      blocklist_q[2] = blocklist_q[18];
+      blocklist_q[3] = blocklist_q[19];
+      blocklist_q[4] = blocklist_q[20];
+      blocklist_q[5] = blocklist_q[21];
+      blocklist_q[6] = blocklist_q[22];
+      blocklist_q[7] = blocklist_q[23];
+      blocklist_q[8] = blocklist_q[24];
+      blocklist_q[9] = blocklist_q[25];
+      blocklist_q[10] = blocklist_q[26];
+      blocklist_q[11] = blocklist_q[27];
+      blocklist_q[12] = blocklist_q[28];
+      blocklist_q[13] = blocklist_q[29];
+      blocklist_q[14] = blocklist_q[30];
+      blocklist_q[15] = blocklist_q[31];
+
+      state = 16;
+      break;       // From case 31
+    } // End of switch & case 31
+  }  // End update()
+  // End, if Teensy 4.x
+#endif
diff --git a/analyze_fft4096_iqem_F32.h b/analyze_fft4096_iqem_F32.h
new file mode 100644
index 0000000..a51a4e1
--- /dev/null
+++ b/analyze_fft4096_iqem_F32.h
@@ -0,0 +1,348 @@
+/*
+ *   analyze_fft4096_iqem_F32.h    Assembled by Bob Larkin   9 Mar 2021
+ *
+ * External Memory  ****  BETA TEST VERSION - NOT FULLY TESTED **** <<<<<<<<<<
+ *
+ *  Note: Teensy 4.x Only, 3.x not supported
+ *
+ * Does Fast Fourier Transform of a 4096 point complex (I-Q) input.
+ * Output is one of three measures of the power in each of the 4096
+ * output bins, Power, RMS level or dB relative to a full scale
+ * sine wave.  Windowing of the input data is provided for to reduce
+ * spreading of the power in the output bins.  All inputs are Teensy
+ * floating point extension (_F32) and all outputs are floating point.
+ *
+ * Features include:
+ *   * I and Q inputs are OpenAudio_Arduino Library F32 compatible.
+ *   * FFT output for every 2048 inputs to overlapped FFTs to
+ *     compensate for windowing.
+ *   * Windowing None, Hann, Kaiser and Blackman-Harris.
+ *   * Multiple bin-sum output to simulate wider bins.
+ *   * Power averaging of multiple FFT
+ *
+ * Conversion Copyright (c) 2021 Bob Larkin
+ * Same MIT license as PJRC:
+ *
+ * From original real FFT:
+ *  Audio Library for Teensy 3.X
+ * Copyright (c) 2014, Paul Stoffregen, paul@pjrc.com
+ *
+ * Development of this audio library was funded by PJRC.COM, LLC by sales of
+ * Teensy and Audio Adaptor boards.  Please support PJRC's efforts to develop
+ * open source software by purchasing Teensy or other PJRC products.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice, development funding notice, and this permission
+ * notice shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* Does complex input FFT of 4096 points.  Multiple non-audio (via functions)
+ * output formats of RMS (same as I16 version, and default),
+ * Power or dBFS (full scale).  Output can be bin by bin or a pointer to
+ * the output array is available.  Several window functions are provided by
+ * in-class design, or a custom window can be provided from the INO.
+ *
+ * Memory for IQem FFT.  The large blocks of memory must be declared in the INO.
+ * This typically looks like:
+ * float32_t  fftOutput[4096];  // Array used for FFT Output to the INO program
+ * float32_t  window[2048];     // Windows reduce sidelobes with FFT's *Half Size*
+ * float32_t  fftBuffer[8192];  // Used by FFT, 4096 real, 4096 imag, interleaved
+ * float32_t  sumsq[4096];      // Required ONLY if power averaging is being done
+ *
+ * These blocks of memory are communicated to the FFT in the object creation, that
+ * might look like:
+ *   AudioAnalyzeFFT4096_IQEM_F32 myFFT(fftOutput, window, fftBuffer);
+ * or, if power averaging is used, the extra parameter is needed as:
+ *   AudioAnalyzeFFT4096_IQEM_F32 myFFT(fftOutput, window, fftBuffer, sumsq);
+ *
+ * The memory arrays must be declared before the FFT object.  About 74 kBytes are
+ * required if power averaging is used and about 58 kBytes without power averaging.
+ *
+ * In addition, this requires 64 AudioMemory_F32 which work out to about  an
+ * additional 33 kBytesof memory.
+ *
+ * If several FFT sizes are used, one at a time, the memory can be shared.  Probably
+ * the simplest way to do this with a Teensy is to set up C-language unions.
+ *
+ * Functions (See comments below and #defines above:
+ *   bool available()
+ *   float read(unsigned int binNumber)
+ *   float read(unsigned int binFirst, unsigned int binLast)
+ *   int windowFunction(int wNum)
+ *   int windowFunction(int wNum, float _kdb)  // Kaiser only
+ *   void setNAverage(int NAve)   // >=1
+ *   void setOutputType(int _type)
+ *   void setXAxis(uint8_t _xAxis)  // 0, 1, 2, 3
+ *
+ * x-Axis direction and offset per setXAxis(xAxis) for sine to I
+ * and cosine to Q:
+ *   If xAxis=0  f=0 in middle, f=fs/2 on left edge
+ *   If xAxis=1  f=0 in middle, f=fs/2 on right edge
+ *   If xAxis=2  f=0 on right edge, f=fs/2 in middle
+ *   If xAxis=3  f=0 on left edge, f=fs/2 in middle
+ * If there is 180 degree phase shift to I or Q these all get reversed.
+ * xAxis=1 is a mathemetically consistent method.  It has positive frequencies
+ * on the right and negative ones on the left.  The center is half the sample
+ * rate, both + and -.  Uniormly sampled data lives in this circular world.rate.
+ *
+ * Timing, max is longest update() time:
+ *   T4.0 Windowed, dBFS Out, 987 uSec  <<<<<<CHECK
+ *
+ * Windows:  The FFT window array memory is provided by the INO.  Three common and
+ * useful window functions, plus no window, can be filled into the array by calling
+ * one of the following:
+ *   windowFunction(AudioWindowNone);
+ *   windowFunction(AudioWindowHanning4096);
+ *   windowFunction(AudioWindowKaiser4096);
+ *   windowFunction(AudioWindowBlackmanHarris4096);
+ * See:  https://en.wikipedia.org/wiki/Window_function
+ *
+ * To use an alternate window function, just fill it into the array, window, above.
+ * It is only half of the window (2048 floats).  It looks like a full window
+ * function with the righ half missing.  It should start with small
+ * values on the left (near[0]) and go to 1.0 at the right ([2048]).
+ *
+ * As with all library FFT's this one provides overlapping time series.  This
+ * tends to compensate for the attenuation at the window edges when doing a sequence
+ * of FFT's.  For that reason there can be a new FFT result every 2048 time
+ * series data points.
+ *
+ * Scaling:
+ *   Full scale for floating point DSP is a nebulous concept.  Normally the
+ *   full scale is -1.0 to +1.0.  This is an unscaled FFT and for a sine
+ *   wave centered in frequency on a bin and of FS amplitude, the power
+ *   at that center bin will grow by 4096^2/4 = about 4 million without windowing.
+ *   Windowing loss cuts this down.  The RMS level can growwithout windowing to
+ *   4096.  The dBFS has been scaled to make this max value 0 dBFS by
+ *   removing 66.2 dB.  With floating point, the dynamic range is maintained
+ *   no matter how it is scaled, but this factor needs to be considered
+ *   when building the INO.
+ */
+ /*  Info
+  * __MK20DX128__ T_LC;  __MKL26Z64__ T3.0;  __MK20DX256__T3.1 and T3.2
+  * __MK64FX512__) T3.5; __MK66FX1M0__ T3.6; __IMXRT1062__ T4.0 and T4.1 */
+
+#ifndef analyze_fft4096_iqem_h_
+#define analyze_fft4096_iqem_h_
+
+// ***************  TEENSY 4.X ONLY   ****************
+#if defined(__IMXRT1062__)
+
+#include "Arduino.h"
+#include "AudioStream_F32.h"
+#include "arm_math.h"
+#include "mathDSP_F32.h"
+#include "arm_const_structs.h"
+
+#define FFT_RMS 0
+#define FFT_POWER 1
+#define FFT_DBFS 2
+
+#define NO_WINDOW 0
+#define AudioWindowNone 0
+#define AudioWindowHanning4096 1
+#define AudioWindowKaiser4096 2
+#define AudioWindowBlackmanHarris4096 3
+
+class AudioAnalyzeFFT4096_IQEM_F32 : public AudioStream_F32  {
+//GUI: inputs:2, outputs:0  //this line used for automatic generation of GUI node
+//GUI: shortName:FFT4096IQem
+
+public:
+    AudioAnalyzeFFT4096_IQEM_F32   // Without sumsq in call for averaging
+      (float32_t* _pOutput, float32_t* _pWindow, float32_t* _pFFT_buffer) :
+      AudioStream_F32(2, inputQueueArray) {
+        pOutput = _pOutput;
+        pWindow = _pWindow;
+        pFFT_buffer = _pFFT_buffer;
+        pSumsq = NULL;
+        // Teensy4 core library has the right files for new FFT
+        // arm CMSIS library has predefined structures of type arm_cfft_instance_f32
+        Sfft = arm_cfft_sR_f32_len4096;   // This is one of the structures
+        useHanningWindow();
+    }
+
+    AudioAnalyzeFFT4096_IQEM_F32  // Constructor to include sumsq power averaging.
+      (float32_t* _pOutput, float32_t* _pWindow, float32_t* _pFFT_buffer,
+       float32_t* _pSumsq) :
+       AudioStream_F32(2, inputQueueArray) {
+        pOutput = _pOutput;
+        pWindow = _pWindow;
+        pFFT_buffer = _pFFT_buffer;
+        pSumsq = _pSumsq;
+        // Teensy4 core library has the right files for new FFT
+        // arm CMSIS library has predefined structures of type arm_cfft_instance_f32
+        Sfft = arm_cfft_sR_f32_len4096;   // This is one of the structures
+        useHanningWindow();
+    }
+
+    // There is no varient for "settings," as blocks other than 128 are
+    // not supported and, nothing depends on sample rate so we don't need that.
+
+    // Returns true when output data is available.
+    bool available() {
+#if defined(__IMXRT1062__)
+        if (outputflag == true) {
+            outputflag = false;  // No double returns
+            return true;
+        }
+        return false;
+#else
+        // Don't know how you got this far, but....
+        Serial.println("Teensy 3.x NOT SUPPORTED");
+        return false;
+#endif
+    }
+
+    // Returns a single bin output
+    float read(unsigned int binNumber) {
+        if (binNumber>4095 || binNumber<0) return 0.0;
+        return *(pOutput + binNumber);
+    }
+
+    // Return sum of several bins. Normally use with power output.
+    // This produces the equivalent of bigger bins.
+    float read(unsigned int binFirst, unsigned int binLast) {
+        if (binFirst > binLast) {
+            unsigned int tmp = binLast;
+            binLast = binFirst;
+            binFirst = tmp;
+        }
+        if (binFirst > 4095) return 0.0;
+        if (binLast > 4095) binLast = 4095;
+        float sum = 0;
+        do {
+            sum += *(pOutput + binFirst++);
+        } while (binFirst <= binLast);
+        return sum;
+    }
+
+    // Sets None, Hann, or Blackman-Harris window with no parameter
+    int windowFunction(int _wNum) {
+	   wNum = _wNum;
+       if(wNum == AudioWindowKaiser4096)
+          return -1;                 // Kaiser needs the kdb
+       windowFunction(wNum, 0.0f);
+       return 0;
+    }
+
+    int windowFunction(int _wNum, float _kdb) { // Kaiser case
+      float kd;
+      wNum = _wNum;
+      if (wNum == AudioWindowKaiser4096)  {
+         if(_kdb<20.0f)
+            kd = 20.0f;
+         else
+            kd = _kdb;
+         useKaiserWindow(kd);
+         }
+      else if (wNum == AudioWindowBlackmanHarris4096)
+         useBHWindow();
+      else
+         useHanningWindow();   // Default
+     return 0;
+     }
+
+    // Number of FFT averaged in the output
+    void setNAverage(int _nAverage)  {
+       if(!(pSumsq==NULL))  // We can average because we have memory.
+           nAverage = _nAverage;
+       }
+
+    // Output RMS (default), power or dBFS (FFT_RMS, FFT_POWER, FFT_DBFS)
+    void setOutputType(int _type)  {
+       outputType = _type;
+       }
+
+    // xAxis, bit 0 left/right;  bit 1 low to high;  default 0X03
+    void setXAxis(uint8_t _xAxis)  {
+       xAxis = _xAxis;
+       }
+
+  virtual void update(void);
+
+private:
+  float32_t  *pOutput, *pWindow, *pFFT_buffer;
+  float32_t  *pSumsq;
+  int wNum = AudioWindowHanning4096;
+  uint8_t state = 0;
+  bool outputflag = false;
+  audio_block_f32_t *inputQueueArray[2];
+  audio_block_f32_t *blocklist_i[32];
+  audio_block_f32_t *blocklist_q[32];
+  // For T4.x
+  // const static arm_cfft_instance_f32   arm_cfft_sR_f32_len1024;
+  arm_cfft_instance_f32 Sfft;
+  int outputType = FFT_RMS;  //Same type as I16 version init
+  int count = 0;
+  int nAverage = 1;
+  uint8_t xAxis = 0x03;
+
+    // The Hann window is a good all-around window
+    // This can be used with zero-bias frequency interpolation.
+    // pWidow points to INO supplied buffer. 4096 for now.  MAKE 2048 <<<<<<<<<<<<<<<<
+    void useHanningWindow(void) {
+		if(!pWindow) return;   // No placefor a window
+        for (int i=0; i < 2048; i++) {
+           // 2*PI/4095 = 0.00153435538
+           *(pWindow + i) = 0.5*(1.0 - cosf(0.00153435538f*(float)i));
+        }
+    }
+
+    // Blackman-Harris produces a first sidelobe more than 90 dB down.
+    // The price is a bandwidth of about 2 bins.  Very useful at times.
+    void useBHWindow(void) {
+		if(!pWindow) return;
+        for (int i=0; i < 2048; i++) {
+           float kx = 0.00153435538f;  // 2*PI/4095
+           int ix = (float) i;
+           *(pWindow + i) = 0.35875 -
+                       0.48829*cosf(     kx*ix) +
+                       0.14128*cosf(2.0f*kx*ix) -
+                       0.01168*cosf(3.0f*kx*ix);
+        }
+    }
+
+    /* The windowing function here is that of James Kaiser.  This has a number
+     * of desirable features. The sidelobes drop off as the frequency away from a transition.
+     * Also, the tradeoff of sidelobe level versus cutoff rate is variable.
+     * Here we specify it in terms of kdb, the highest sidelobe, in dB, next to a sharp cutoff. For
+     * calculating the windowing vector, we need a parameter beta, found as follows:
+     */
+    void useKaiserWindow(float kdb)  {
+       float32_t beta, kbes, xn2;
+       mathDSP_F32 mathEqualizer;  // For Bessel function
+
+	   if(!pWindow) return;
+
+       if (kdb < 20.0f)
+           beta = 0.0;
+       else
+           beta = -2.17+0.17153*kdb-0.0002841*kdb*kdb; // Within a dB or so
+
+       // Note: i0f is the fp zero'th order modified Bessel function (see mathDSP_F32.h)
+       kbes = 1.0f / mathEqualizer.i0f(beta); // An additional derived parameter used in loop
+       for (int n=0; n<2048; n++) {
+          xn2 = 0.5f+(float32_t)n;
+          // 4/(4095^2) = 2.3853504E-7
+          xn2 = 2.3853504E-7*xn2*xn2;
+          *(pWindow + 2047 - n) = kbes*(mathEqualizer.i0f(beta*sqrtf(1.0-xn2)));
+       }
+    }
+  };
+#endif
+#endif
diff --git a/examples/TestFFT4096iqEM/TestFFT4096iqEM.ino b/examples/TestFFT4096iqEM/TestFFT4096iqEM.ino
new file mode 100644
index 0000000..aafff49
--- /dev/null
+++ b/examples/TestFFT4096iqEM/TestFFT4096iqEM.ino
@@ -0,0 +1,94 @@
+// TestFFT2048iqEM.ino  for Teensy 4.x
+// Bob Larkin 9 March 2021
+
+// Generate Sin and Cosine pair and input to IQ FFT.
+// Serial Print out powers of all 4096 bins in
+// dB relative to Sine Wave Full Scale
+//       EXTERNAL MEMORY FFT
+// Public Domain
+
+#include "OpenAudio_ArduinoLibrary.h"
+#include "AudioStream_F32.h"
+
+// Memory for IQ FFT
+float32_t  fftOutput[4096];  // Array to allow fftBuffer[] to be available for new in data
+float32_t  window[2048];     // Half size window storage
+float32_t  fftBuffer[8192];  // Used for FFT, 4096 real, 4096 imag, interleaved
+float32_t  sumsq[4096];      // Required if power averaging is being done
+
+int jj;
+
+// GUItool: begin automatically generated code
+AudioSynthSineCosine_F32   sine_cos1;       //xy=76,532
+//                                                                         Optional
+// (float32_t* _pOutput, float32_t* _pWindow, float32_t* _pFFT_buffer, float32_t* _pSumsq)
+//AudioAnalyzeFFT4096_IQEM_F32 FFT4096iqEM1(fftOutput, window, fftBuffer);      //xy=243,532
+AudioAnalyzeFFT4096_IQEM_F32 FFT4096iqEM1(fftOutput, window, fftBuffer, sumsq);  // w/ power ave
+AudioOutputI2S_F32         audioOutI2S1;    //xy=246,591
+AudioConnection_F32        patchCord1(sine_cos1, 0, FFT4096iqEM1, 0);
+AudioConnection_F32        patchCord2(sine_cos1, 1, FFT4096iqEM1, 1);
+// GUItool: end automatically generated code
+
+void setup(void) {
+
+  Serial.begin(9600);
+  delay(1000);
+
+  // The 4096 complex FFT needs 32 F32 memory for real and 32 for imag.
+  // Set memory to more than 64, depending on other useage.
+  AudioMemory_F32(100);
+  Serial.println("FFT4096IQem Test");
+
+  sine_cos1.amplitude(1.0f); // Initialize Waveform Generator
+
+  // Pick T4.x bin center
+  //sine_cos1.frequency(689.0625f);
+
+  // or pick any old frequency
+  sine_cos1.frequency(1000.0f);
+
+  // elect the output format, FFT_RMS, FFT_POWER, or FFT_DBFS
+  FFT4096iqEM1.setOutputType(FFT_DBFS);
+
+  // Select the wndow function, designed by FFT object
+  //FFT4096iqEM1.windowFunction(AudioWindowNone);
+  //FFT4096iqEM1.windowFunction(AudioWindowHanning4096);
+  //FFT4096iqEM1.windowFunction(AudioWindowKaiser4096, 55.0f);
+  FFT4096iqEM1.windowFunction(AudioWindowBlackmanHarris4096);
+
+  // Uncomment to Serial print window function
+  // for (int i=0; i<2048; i++) Serial.println(*(window+i), 7);
+
+  // xAxis, bit 0 left/right;  bit 1 low to high;  default 0X03
+  FFT4096iqEM1.setXAxis(0X01);
+
+  // In order to average powers, a buffer for sumsq[4096] must be
+  // globally declared and that pointer, sumsq, set as the last
+  // parameter in the object creation.  Then the following will
+  // cause averaging of 4 powers:
+  FFT4096iqEM1.setNAverage(20);
+
+  jj = 0;   // This is todelay data gathering to get steady state
+  }
+
+void loop(void)  {
+  static bool doPrint=true;
+  float *pPwr;
+
+  delay(10);
+
+  // Print output, once
+  if( FFT4096iqEM1.available() && doPrint )  {
+	  if(jj++ < 3)return;
+      for(int i=0; i<4096; i++)
+        {
+        Serial.print((int)((float32_t)i * 44100.0/4096.0));
+        Serial.print(" ");
+        Serial.println(*(fftOutput + i), 8 );
+	    }
+      doPrint = false;
+      }
+  Serial.print(" Audio MEM Float32 Peak: ");
+  Serial.println(AudioMemoryUsageMax_F32());
+  delay(500);
+  }
diff --git a/keywords.txt b/keywords.txt
index cf11ee2..1bee52d 100644
--- a/keywords.txt
+++ b/keywords.txt
@@ -19,6 +19,7 @@ setXAxis	KEYWORD2
 AudioAnalyzeFFT1024_F32	KEYWORD1
 AudioAnalyzeFFT2048_F32	KEYWORD1
 AudioAnalyzeFFT4096_F32	KEYWORD1
+AudioAnalyzeFFT4096_IQEM_F32	KEYWORD1
 
 AudioAnalyzePeak_F32 KEYWORD1
 readPeakToPeak	KEYWORD2