diff --git a/cpp/src/resofilter.cc b/cpp/src/resofilter.cc index b9934d6..431aff1 100644 --- a/cpp/src/resofilter.cc +++ b/cpp/src/resofilter.cc @@ -21,6 +21,9 @@ // The full implementation requires both a tuning table and 2x // oversampling, neither of which are present yet, but we'll get there. +#include +#include + #include "synth.h" #include "freqlut.h" #include "exp2.h" @@ -46,6 +49,147 @@ int32_t compute_alpha(int32_t logf) { return min(1 << 24, Freqlut::lookup(logf)); } +// Some really generic 4x4 matrix multiplication operations, suitable +// for NEON'ing + +static void matmult4(float dst[16], const float a[16], const float b[16]) { + dst[0] = a[0] * b[0] + a[4] * b[1] + a[8] * b[2] + a[12] * b[3]; + dst[1] = a[1] * b[0] + a[5] * b[1] + a[9] * b[2] + a[13] * b[3]; + dst[2] = a[2] * b[0] + a[6] * b[1] + a[10] * b[2] + a[14] * b[3]; + dst[3] = a[3] * b[0] + a[7] * b[1] + a[11] * b[2] + a[15] * b[3]; + dst[4] = a[0] * b[4] + a[4] * b[5] + a[8] * b[6] + a[12] * b[7]; + dst[5] = a[1] * b[4] + a[5] * b[5] + a[9] * b[6] + a[13] * b[7]; + dst[6] = a[2] * b[4] + a[6] * b[5] + a[10] * b[6] + a[14] * b[7]; + dst[7] = a[3] * b[4] + a[7] * b[5] + a[11] * b[6] + a[15] * b[7]; + dst[8] = a[0] * b[8] + a[4] * b[9] + a[8] * b[10] + a[12] * b[11]; + dst[9] = a[1] * b[8] + a[5] * b[9] + a[9] * b[10] + a[13] * b[11]; + dst[10] = a[2] * b[8] + a[6] * b[9] + a[10] * b[10] + a[14] * b[11]; + dst[11] = a[3] * b[8] + a[7] * b[9] + a[11] * b[10] + a[15] * b[11]; + dst[12] = a[0] * b[12] + a[4] * b[13] + a[8] * b[14] + a[12] * b[15]; + dst[13] = a[1] * b[12] + a[5] * b[13] + a[9] * b[14] + a[13] * b[15]; + dst[14] = a[2] * b[12] + a[6] * b[13] + a[10] * b[14] + a[14] * b[15]; + dst[15] = a[3] * b[12] + a[7] * b[13] + a[11] * b[14] + a[15] * b[15]; +} + +static void matvec4(float dst[4], const float a[16], const float b[4]) { + dst[0] = a[0] * b[0] + a[4] * b[1] + a[8] * b[2] + a[12] * b[3]; + dst[1] = a[1] * b[0] + a[5] * b[1] + a[9] * b[2] + a[13] * b[3]; + dst[2] = a[2] * b[0] + a[6] * b[1] + a[10] * b[2] + a[14] * b[3]; + dst[3] = a[3] * b[0] + a[7] * b[1] + a[11] * b[2] + a[15] * b[3]; +} + +static void vecupdate4(float dst[4], float x, const float a[4]) { + for (int i = 0; i < 4; i++) { + dst[i] += x * a[i]; + } +} + +/* compute dst := dst + x * a */ +static void matupdate4(float dst[16], float x, const float a[16]) { + for (int i = 0; i < 16; i++) { + dst[i] += x * a[i]; + } +} + +static void matcopy(float *dst, const float *src, int n) { + memcpy(dst, src, n * sizeof(float)); +} + +void dump_matrix(const float a[20]) { + for (int row = 0; row < 5; row++) { + printf("%s[", row == 0 ? "[" : " "); + for (int col = 0; col < 5; col++) { + float x = row == 0 ? (col == 0 ? 1.0 : 0.0) : a[col * 4 + (row - 1)]; + printf("%6f ", x); + } + printf("]%s\n", row == 4 ? "]" : ""); + } +} + +static void make_state_transition(float result[20], int32_t f0, int32_t k) { + // TODO: these should depend on k, and be just enough to meet error bound + int n1 = 4; + int n2 = 4; + float f = f0 * (1.0 / (1 << (24 + n2))); + float k_f = k * (1.0 / (1 << 24)); + k_f = min(k_f, 3.98f); + + // these are 5x5 matrices of which we store the bottom 5x4 + // Top row of Jacobian is all zeros + float j[20] = {0}; + + // set up initial jacobian + j[0] = f; + j[4] = -f; + j[5] = f; + j[9] = -f; + j[10] = f; + j[14] = -f; + j[15] = f; + j[16] = -k_f * f; + j[19] = -f; + + // Top row of exponential is [1 0 0 0 0] + float a[20] = {0}; + a[4] = 1.0; + a[9] = 1.0; + a[14] = 1.0; + a[19] = 1.0; + + float c[20]; + matcopy(c, j, 20); + + static const float scales[4] = {1.0, 1/2.0, 1/6.0, 1/24.0}; + // taylor's series to n1 + for (int i = 0; i < n1; i++) { + float scale = scales[i]; + vecupdate4(a, scale, c); + matupdate4(a+4, scale, c + 4); + if (i < n1 - 1) { + float tmp[20]; + matvec4(tmp, c + 4, j); + matmult4(tmp + 4, c + 4, j + 4); + matcopy(c, tmp, 20); + } + } + + // repeated squaring + for (int i = 0; i < n2; i++) { + float tmp[20]; + matvec4(tmp, a + 4, a); + matmult4(tmp + 4, a + 4, a + 4); + for (int j = 0; j < 4; j++) { + a[j] += tmp[j]; + } + matcopy(a + 4, tmp + 4, 16); + } + matcopy(result, a, 20); +} + +void test_matrix() { + float a[20]; + make_state_transition(a, 1.0 * (1 << 24), 3.99 * (1 << 24)); + dump_matrix(a); +} + +#if defined(USE_MATRIX) +void ResoFilter::process(const int32_t **inbufs, const int32_t *control_in, + const int32_t *control_last, int32_t **outbufs) { + float a[20]; + make_state_transition(a, compute_alpha(control_in[0]), control_in[1]); + const int32_t *ibuf = inbufs[0]; + int32_t *obuf = outbufs[0]; + for (int i = 0; i < n; i++) { + float signal = ibuf[i]; + float tmp[4]; + matvec4(tmp, a + 4, x); + for (int k = 0; k < 4; k++) { + x[k] = tmp[k] + signal * a[k]; + obuf[i] = x[3]; + } + } +} +#else void ResoFilter::process(const int32_t **inbufs, const int32_t *control_in, const int32_t *control_last, int32_t **outbufs) { int32_t alpha = compute_alpha(control_last[0]); @@ -72,7 +216,7 @@ void ResoFilter::process(const int32_t **inbufs, const int32_t *control_in, int32_t w2 = w[2]; int32_t w3 = w[3]; int32_t yy0 = yy; -#endif; +#endif for (int i = 0; i < n; i++) { alpha += delta_alpha; k += delta_k; @@ -111,3 +255,4 @@ void ResoFilter::process(const int32_t **inbufs, const int32_t *control_in, yy = yy0; #endif } +#endif // USE_MATRIX diff --git a/cpp/src/resofilter.h b/cpp/src/resofilter.h index fa711dd..c0720bb 100644 --- a/cpp/src/resofilter.h +++ b/cpp/src/resofilter.h @@ -19,6 +19,8 @@ #include "module.h" +#define USE_MATRIX + class ResoFilter : Module { public: ResoFilter(); @@ -28,11 +30,18 @@ class ResoFilter : Module { void process(const int32_t **inbufs, const int32_t *control_in, const int32_t *control_last, int32_t **outbufs); private: + #if defined(USE_MATRIX) + float x[4]; +#else int32_t x[4]; #if defined(NONLINEARITY) int32_t w[4]; int32_t yy; #endif +#endif }; +// remove when done +void test_matrix(); + #endif // SYNTH_RESOFILTER_H_