From 235dacf5a10cb802437a2f309b4a765304816d06 Mon Sep 17 00:00:00 2001 From: Steve Lascos Date: Sat, 29 Feb 2020 12:46:42 -0500 Subject: [PATCH] Added support to use an intermediate DMA buffer for SPI memory transfers --- src/BASpiMemory.h | 40 +++++++--- src/DmaSpi.h | 37 ++++++++- src/LibBasicFunctions.h | 5 +- src/LibMemoryManagement.h | 2 + src/common/AudioDelay.cpp | 14 ++++ src/peripherals/BASpiMemory.cpp | 130 ++++++++++++++++++++++++++++++-- 6 files changed, 207 insertions(+), 21 deletions(-) diff --git a/src/BASpiMemory.h b/src/BASpiMemory.h index f4f5950..0128750 100644 --- a/src/BASpiMemory.h +++ b/src/BASpiMemory.h @@ -136,11 +136,6 @@ protected: }; -#if defined (__IMXRT1062__) -//#if 0 -using BASpiMemoryDMA = BASpiMemory; -#else - /**************************************************************************//** * This wrapper class uses the Arduino SPI (Wire) library to access the SPI ram * via DMA. @@ -223,22 +218,43 @@ public: /// @param wordOffset, offset from the start of the DMA buffer in words to begin reading void readBufferContents(uint16_t *dest, size_t numWords, size_t wordOffset = 0); + /// Creates and allocates an intermediate copy buffer that is suitable for DMA transfers. It is up to the + /// user to ensure they never request a read/write larger than the size of this buffer when using this + /// feature. + /// @details In some use cases you may want to DMA to/from memory buffers that are in memory regions that + /// are not directly usable for DMA. Specifying a non-zero copy buffer size will create an intermediate + /// DMA-compatible buffer. By default, the size is zero and an intermediate copy is not performed. + /// DMA requires the user data be in a DMA accessible region and that it be aligned to the + /// the size of a cache line, and that the cache line isn't shared with any other data that might + /// be used on a different thread. Best practice is for a DMA buffer to start on a cache-line + /// boundary and be exactly sized to an integer multiple of cache lines. + /// @param numBytes the number of bytes to allocate for the intermediate copy buffer. + /// @returns true on success, false on failure + bool setDmaCopyBufferSize(size_t numBytes); + + /// get the current size of the DMA copy buffer. Zero size means no intermediate copy is performed. + /// @returns the size of the intermediate copy buffer in bytes. + size_t getDmaCopyBufferSize(void) { return m_dmaCopyBufferSize; } + private: - DmaSpiGeneric *m_spiDma = nullptr; - AbstractChipSelect *m_cs = nullptr; + DmaSpiGeneric *m_spiDma = nullptr; + AbstractChipSelect *m_cs = nullptr; - uint8_t *m_txCommandBuffer = nullptr; - DmaSpi::Transfer *m_txTransfer; - uint8_t *m_rxCommandBuffer = nullptr; - DmaSpi::Transfer *m_rxTransfer; + uint8_t *m_txCommandBuffer = nullptr; + DmaSpi::Transfer *m_txTransfer = nullptr; + uint8_t *m_rxCommandBuffer = nullptr; + DmaSpi::Transfer *m_rxTransfer = nullptr; uint16_t m_txXferCount; uint16_t m_rxXferCount; + size_t m_dmaCopyBufferSize = 0; + uint8_t *m_dmaWriteCopyBuffer = nullptr; + volatile uint8_t *m_dmaReadCopyBuffer = nullptr; + void m_setSpiCmdAddr(int command, size_t address, uint8_t *dest); }; -#endif // BASpiMemoryDMA declaration } /* namespace BALibrary */ diff --git a/src/DmaSpi.h b/src/DmaSpi.h index 00edcfa..e7b576e 100644 --- a/src/DmaSpi.h +++ b/src/DmaSpi.h @@ -199,7 +199,9 @@ namespace DmaSpi volatile uint8_t* pDest = nullptr, const uint8_t& fill = 0, AbstractChipSelect* cs = nullptr, - TransferType transferType = TransferType::NORMAL + TransferType transferType = TransferType::NORMAL, + uint8_t *pSourceIntermediate = nullptr, + volatile uint8_t *pDestIntermediate = nullptr ) : m_state(State::idle), m_pSource(pSource), m_transferCount(transferCount), @@ -207,7 +209,9 @@ namespace DmaSpi m_fill(fill), m_pNext(nullptr), m_pSelect(cs), - m_transferType(transferType) + m_transferType(transferType), + m_pSourceIntermediate(pSourceIntermediate), + m_pDestIntermediate(pDestIntermediate) { DMASPI_PRINT(("Transfer @ %p\n", this)); }; @@ -229,6 +233,10 @@ namespace DmaSpi Transfer* m_pNext; AbstractChipSelect* m_pSelect; TransferType m_transferType; + + uint8_t *m_pSourceIntermediate = nullptr; + volatile uint8_t *m_pDestIntermediate = nullptr; + volatile uint8_t *m_pDestOriginal = nullptr; }; } // namespace DmaSpi @@ -553,6 +561,14 @@ class AbstractDmaSpi DMASPI_PRINT(("DmaSpi::rxIsr_()\n")); rxChannel_()->clearInterrupt(); // end current transfer: deselect and mark as done + + // Check if intermediate buffer was used + if (m_pCurrentTransfer->m_pDestIntermediate) { + // copy when using an intermediate buffer + memcpy((void *)m_pCurrentTransfer->m_pDestOriginal, // DMA contents copied to original + (void *)m_pCurrentTransfer->m_pDest, // source is the actual DMA buffer + m_pCurrentTransfer->m_transferCount); + } finishCurrentTransfer(); DMASPI_PRINT((" state = ")); @@ -606,6 +622,14 @@ class AbstractDmaSpi { // real data sink DMASPI_PRINT((" real sink\n")); + + // Check for intermediate buffer + if (m_pCurrentTransfer->m_pDestIntermediate) { + // Modify the DMA so it will fill the intermediate buffer instead + // store the original buffer for memcpy in rx_isr() + m_pCurrentTransfer->m_pDestOriginal = m_pCurrentTransfer->m_pDest; + m_pCurrentTransfer->m_pDest = m_pCurrentTransfer->m_pDestIntermediate; + } arm_dcache_flush_delete((void *)m_pCurrentTransfer->m_pDest, m_pCurrentTransfer->m_transferCount); rxChannel_()->destinationBuffer(m_pCurrentTransfer->m_pDest, m_pCurrentTransfer->m_transferCount); @@ -622,6 +646,15 @@ class AbstractDmaSpi if (m_pCurrentTransfer->m_pSource != nullptr) { // real data source + if (m_pCurrentTransfer->m_pSourceIntermediate) { + // copy and use the intermediate buffer + memcpy((void*)m_pCurrentTransfer->m_pSourceIntermediate, + (void*)m_pCurrentTransfer->m_pSource, + m_pCurrentTransfer->m_transferCount + ); + // DMA will now transfer from intermediate buffer + m_pCurrentTransfer->m_pSource = m_pCurrentTransfer->m_pSourceIntermediate; + } DMASPI_PRINT((" real source\n")); arm_dcache_flush_delete((void *)m_pCurrentTransfer->m_pSource, m_pCurrentTransfer->m_transferCount); txChannel_()->sourceBuffer(m_pCurrentTransfer->m_pSource, diff --git a/src/LibBasicFunctions.h b/src/LibBasicFunctions.h index ee4ac5b..1dba21a 100644 --- a/src/LibBasicFunctions.h +++ b/src/LibBasicFunctions.h @@ -196,7 +196,10 @@ public: /// @returns pointer to the underlying ExtMemSlot. ExtMemSlot *getSlot() const { return m_slot; } - + /// Calling this function will force the underlying SPI DMA to use an extra copy buffer. This is needed if the user + /// audio buffers are not guarenteed to be cache aligned. + /// @returns true if suceess, false if an error occurs + bool setSpiDmaCopyBuffer(void); /// Ween using INTERNAL memory, thsi function can return a pointer to the underlying RingBuffer that contains /// audio_block_t * pointers. diff --git a/src/LibMemoryManagement.h b/src/LibMemoryManagement.h index 50c8c38..4d049b3 100644 --- a/src/LibMemoryManagement.h +++ b/src/LibMemoryManagement.h @@ -146,6 +146,8 @@ public: bool isReadBusy() const; + BASpiMemory *getSpiMemoryHandle() { return m_spi; } + /// DEBUG USE: prints out the slot member variables void printStatus(void) const; diff --git a/src/common/AudioDelay.cpp b/src/common/AudioDelay.cpp index 4153f25..10c420b 100644 --- a/src/common/AudioDelay.cpp +++ b/src/common/AudioDelay.cpp @@ -209,6 +209,20 @@ bool AudioDelay::interpolateDelay(int16_t *extendedSourceBuffer, int16_t *destBu return true; } +bool AudioDelay::setSpiDmaCopyBuffer(void) +{ + bool returnValue = false; + if (m_slot->isUseDma()) { + // For DMA use on T4.0 we need this kluge + BASpiMemoryDMA * spiDma = static_cast(m_slot->getSpiMemoryHandle()); + if (spiDma) { + spiDma->setDmaCopyBufferSize(sizeof(int16_t) * AUDIO_BLOCK_SAMPLES); + returnValue = true; + } + } + return returnValue; +} + } diff --git a/src/peripherals/BASpiMemory.cpp b/src/peripherals/BASpiMemory.cpp index 67e6c4c..90ed46d 100644 --- a/src/peripherals/BASpiMemory.cpp +++ b/src/peripherals/BASpiMemory.cpp @@ -17,6 +17,8 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ +#include "stdlib.h" +#include "assert.h" #include "Arduino.h" #include "BASpiMemory.h" @@ -49,6 +51,10 @@ constexpr int SPI_ADDR_0_MASK = 0x0000FF; constexpr int CMD_ADDRESS_SIZE = 4; constexpr int MAX_DMA_XFER_SIZE = 0x4000; +constexpr size_t MEM_ALIGNED_ALLOC = 32; // number of bytes to align DMA buffer to + +void * dma_aligned_malloc(size_t align, size_t size); +void dma_aligned_free(void * ptr); BASpiMemory::BASpiMemory(SpiDeviceId memDeviceId) { @@ -351,9 +357,7 @@ void BASpiMemory::m_rawRead16(size_t address, uint16_t *dest, size_t numWords) digitalWrite(m_csPin, HIGH); } -#if defined (__IMXRT1062__) -//#if 0 -#else + ///////////////////////////////////////////////////////////////////////////// // BASpiMemoryDMA ///////////////////////////////////////////////////////////////////////////// @@ -472,6 +476,14 @@ void BASpiMemoryDMA::write(size_t address, uint8_t *src, size_t numBytes) size_t bytesRemaining = numBytes; uint8_t *srcPtr = src; size_t nextAddress = address; + uint8_t *intermediateBuffer = nullptr; + + // Check for intermediate buffer use + if (m_dmaCopyBufferSize) { + // copy to the intermediate buffer; + intermediateBuffer = m_dmaWriteCopyBuffer; + memcpy(intermediateBuffer, src, numBytes); + } while (bytesRemaining > 0) { m_txXferCount = m_bytesToXfer(nextAddress, min(bytesRemaining, static_cast(MAX_DMA_XFER_SIZE))); // check for die boundary @@ -482,7 +494,7 @@ void BASpiMemoryDMA::write(size_t address, uint8_t *src, size_t numBytes) m_spiDma->registerTransfer(m_txTransfer[1]); while ( m_txTransfer[0].busy() || m_txTransfer[1].busy()) { yield(); } // wait until not busy - m_txTransfer[0] = DmaSpi::Transfer(srcPtr, m_txXferCount, nullptr, 0, m_cs, TransferType::NO_START_CS); + m_txTransfer[0] = DmaSpi::Transfer(srcPtr, m_txXferCount, nullptr, 0, m_cs, TransferType::NO_START_CS, intermediateBuffer, nullptr); m_spiDma->registerTransfer(m_txTransfer[0]); bytesRemaining -= m_txXferCount; srcPtr += m_txXferCount; @@ -539,6 +551,13 @@ void BASpiMemoryDMA::read(size_t address, uint8_t *dest, size_t numBytes) size_t bytesRemaining = numBytes; uint8_t *destPtr = dest; size_t nextAddress = address; + volatile uint8_t *intermediateBuffer = nullptr; + + // Check for intermediate buffer use + if (m_dmaCopyBufferSize) { + intermediateBuffer = m_dmaReadCopyBuffer; + } + while (bytesRemaining > 0) { m_rxXferCount = m_bytesToXfer(nextAddress, min(bytesRemaining, static_cast(MAX_DMA_XFER_SIZE))); // check for die boundary @@ -549,7 +568,7 @@ void BASpiMemoryDMA::read(size_t address, uint8_t *dest, size_t numBytes) while ( m_rxTransfer[0].busy() || m_rxTransfer[1].busy()) { yield(); } - m_rxTransfer[0] = DmaSpi::Transfer(nullptr, m_rxXferCount, destPtr, 0, m_cs, TransferType::NO_START_CS); + m_rxTransfer[0] = DmaSpi::Transfer(nullptr, m_rxXferCount, destPtr, 0, m_cs, TransferType::NO_START_CS, nullptr, intermediateBuffer); m_spiDma->registerTransfer(m_rxTransfer[0]); bytesRemaining -= m_rxXferCount; @@ -574,6 +593,105 @@ bool BASpiMemoryDMA::isReadBusy(void) const { return (m_rxTransfer[0].busy() or m_rxTransfer[1].busy()); } -#endif // BASpiMemoryDMA definition + +bool BASpiMemoryDMA::setDmaCopyBufferSize(size_t numBytes) +{ + if (m_dmaWriteCopyBuffer) { + dma_aligned_free((void *)m_dmaWriteCopyBuffer); + m_dmaCopyBufferSize = 0; + } + if (m_dmaReadCopyBuffer) { + dma_aligned_free((void *)m_dmaReadCopyBuffer); + m_dmaCopyBufferSize = 0; + } + + if (numBytes > 0) { + m_dmaWriteCopyBuffer = (uint8_t*)dma_aligned_malloc(MEM_ALIGNED_ALLOC, numBytes); + if (!m_dmaWriteCopyBuffer) { + // allocate failed + m_dmaCopyBufferSize = 0; + return false; + } + + m_dmaReadCopyBuffer = (volatile uint8_t*)dma_aligned_malloc(MEM_ALIGNED_ALLOC, numBytes); + if (!m_dmaReadCopyBuffer) { + // allocate failed + m_dmaCopyBufferSize = 0; + return false; + } + m_dmaCopyBufferSize = numBytes; + } + + return true; +} + +// Some convenience functions to malloc and free aligned memory +// Number of bytes we're using for storing +// the aligned pointer offset +typedef uint16_t offset_t; +#define PTR_OFFSET_SZ sizeof(offset_t) + +#ifndef align_up +#define align_up(num, align) \ + (((num) + ((align) - 1)) & ~((align) - 1)) +#endif + +void * dma_aligned_malloc(size_t align, size_t size) +{ + void * ptr = NULL; + + // We want it to be a power of two since + // align_up operates on powers of two + assert((align & (align - 1)) == 0); + + if(align && size) + { + /* + * We know we have to fit an offset value + * We also allocate extra bytes to ensure we + * can meet the alignment + */ + uint32_t hdr_size = PTR_OFFSET_SZ + (align - 1); + void * p = malloc(size + hdr_size); + + if(p) + { + /* + * Add the offset size to malloc's pointer + * (we will always store that) + * Then align the resulting value to the + * target alignment + */ + ptr = (void *) align_up(((uintptr_t)p + PTR_OFFSET_SZ), align); + + // Calculate the offset and store it + // behind our aligned pointer + *((offset_t *)ptr - 1) = + (offset_t)((uintptr_t)ptr - (uintptr_t)p); + + } // else NULL, could not malloc + } //else NULL, invalid arguments + + return ptr; +} + +void dma_aligned_free(void * ptr) +{ + assert(ptr); + + /* + * Walk backwards from the passed-in pointer + * to get the pointer offset. We convert to an offset_t + * pointer and rely on pointer math to get the data + */ + offset_t offset = *((offset_t *)ptr - 1); + + /* + * Once we have the offset, we can get our + * original pointer and call free + */ + void * p = (void *)((uint8_t *)ptr - offset); + free(p); +} } /* namespace BALibrary */