From a02bfe55d0b9fbcac2a35006855b1f5a863c0c4a Mon Sep 17 00:00:00 2001 From: Jared Boone Date: Sat, 2 Jan 2016 10:34:17 -0800 Subject: [PATCH] Decimators for wider IFIR output bandwidth. --- firmware/baseband/dsp_decimate.cpp | 119 +++++++++++++++++++++++++++++ firmware/baseband/dsp_decimate.hpp | 58 ++++++++++++++ 2 files changed, 177 insertions(+) diff --git a/firmware/baseband/dsp_decimate.cpp b/firmware/baseband/dsp_decimate.cpp index 5698b33d..c8a70898 100644 --- a/firmware/baseband/dsp_decimate.cpp +++ b/firmware/baseband/dsp_decimate.cpp @@ -177,6 +177,71 @@ static inline uint32_t scale_round_and_pack( return __PKHBT(saturated_real, saturated_imag, 16); } +// FIRC8xR16x24FS4Decim4 ////////////////////////////////////////////////// + +FIRC8xR16x24FS4Decim4::FIRC8xR16x24FS4Decim4() { + z_.fill({}); +} + +void FIRC8xR16x24FS4Decim4::configure( + const std::array& taps, + const int32_t scale, + const Shift shift +) { + const int negate_factor = (shift == Shift::Up) ? -1 : 1; + for(size_t i=0; i(__builtin_assume_aligned(z_.data(), 4)); + const vec2_s16* const t = static_cast(__builtin_assume_aligned(taps_.data(), 4)); + uint32_t* const d = static_cast(__builtin_assume_aligned(dst.p, 4)); + + const auto k = output_scale; + + const size_t count = src.count / decimation_factor; + for(size_t i=0; i(__builtin_assume_aligned(&src.p[i * decimation_factor], 4)); + + complex32_t accum; + + // Oldest samples are discarded. + accum = mac_fs4_shift(z, t, 0, accum); + accum = mac_fs4_shift(z, t, 1, accum); + + // Middle samples are shifted earlier in the "z" delay buffer. + accum = mac_fs4_shift_and_store(z, t, decimation_factor, 0, accum); + accum = mac_fs4_shift_and_store(z, t, decimation_factor, 1, accum); + accum = mac_fs4_shift_and_store(z, t, decimation_factor, 2, accum); + accum = mac_fs4_shift_and_store(z, t, decimation_factor, 3, accum); + accum = mac_fs4_shift_and_store(z, t, decimation_factor, 4, accum); + accum = mac_fs4_shift_and_store(z, t, decimation_factor, 5, accum); + accum = mac_fs4_shift_and_store(z, t, decimation_factor, 6, accum); + accum = mac_fs4_shift_and_store(z, t, decimation_factor, 7, accum); + + // Newest samples come from "in" buffer, are copied to "z" delay buffer. + accum = mac_fs4_shift_and_store_new_c8_samples(z, t, in, decimation_factor, 0, taps_count, accum); + accum = mac_fs4_shift_and_store_new_c8_samples(z, t, in, decimation_factor, 1, taps_count, accum); + + d[i] = scale_round_and_pack(accum, k); + } + + return { + dst.p, + count, + src.sampling_rate / decimation_factor + }; +} + // FIRC8xR16x24FS4Decim8 ////////////////////////////////////////////////// FIRC8xR16x24FS4Decim8::FIRC8xR16x24FS4Decim8() { @@ -242,6 +307,60 @@ buffer_c16_t FIRC8xR16x24FS4Decim8::execute( }; } +// FIRC16xR16x16Decim2 //////////////////////////////////////////////////// + +FIRC16xR16x16Decim2::FIRC16xR16x16Decim2() { + z_.fill({}); +} + +void FIRC16xR16x16Decim2::configure( + const std::array& taps, + const int32_t scale +) { + std::copy(taps.cbegin(), taps.cend(), taps_.begin()); + output_scale = scale; +} + +buffer_c16_t FIRC16xR16x16Decim2::execute( + buffer_c16_t src, + buffer_c16_t dst +) { + vec2_s16* const z = static_cast(__builtin_assume_aligned(z_.data(), 4)); + const vec2_s16* const t = static_cast(__builtin_assume_aligned(taps_.data(), 4)); + uint32_t* const d = static_cast(__builtin_assume_aligned(dst.p, 4)); + + const auto k = output_scale; + + const size_t count = src.count / decimation_factor; + for(size_t i=0; i(__builtin_assume_aligned(&src.p[i * decimation_factor], 4)); + + complex32_t accum; + + // Oldest samples are discarded. + accum = mac_shift(z, t, 0, accum); + + // Middle samples are shifted earlier in the "z" delay buffer. + accum = mac_shift_and_store(z, t, decimation_factor, 0, accum); + accum = mac_shift_and_store(z, t, decimation_factor, 1, accum); + accum = mac_shift_and_store(z, t, decimation_factor, 2, accum); + accum = mac_shift_and_store(z, t, decimation_factor, 3, accum); + accum = mac_shift_and_store(z, t, decimation_factor, 4, accum); + accum = mac_shift_and_store(z, t, decimation_factor, 5, accum); + + // Newest samples come from "in" buffer, are copied to "z" delay buffer. + accum = mac_shift_and_store_new_c16_samples(z, t, in, decimation_factor, 0, taps_count, accum); + + d[i] = scale_round_and_pack(accum, k); + } + + return { + dst.p, + count, + src.sampling_rate / decimation_factor + }; +} + // FIRC16xR16x32Decim8 //////////////////////////////////////////////////// FIRC16xR16x32Decim8::FIRC16xR16x32Decim8() { diff --git a/firmware/baseband/dsp_decimate.hpp b/firmware/baseband/dsp_decimate.hpp index 5079f259..97f02703 100644 --- a/firmware/baseband/dsp_decimate.hpp +++ b/firmware/baseband/dsp_decimate.hpp @@ -92,6 +92,38 @@ private: const std::array& taps; }; +class FIRC8xR16x24FS4Decim4 { +public: + static constexpr size_t taps_count = 24; + static constexpr size_t decimation_factor = 4; + + using sample_t = complex8_t; + using tap_t = int16_t; + + enum class Shift : bool { + Down = true, + Up = false + }; + + FIRC8xR16x24FS4Decim4(); + + void configure( + const std::array& taps, + const int32_t scale, + const Shift shift = Shift::Down + ); + + buffer_c16_t execute( + buffer_c8_t src, + buffer_c16_t dst + ); + +private: + std::array z_; + std::array taps_; + int32_t output_scale = 0; +}; + class FIRC8xR16x24FS4Decim8 { public: static constexpr size_t taps_count = 24; @@ -124,6 +156,32 @@ private: int32_t output_scale = 0; }; +class FIRC16xR16x16Decim2 { +public: + static constexpr size_t taps_count = 16; + static constexpr size_t decimation_factor = 2; + + using sample_t = complex16_t; + using tap_t = int16_t; + + FIRC16xR16x16Decim2(); + + void configure( + const std::array& taps, + const int32_t scale + ); + + buffer_c16_t execute( + buffer_c16_t src, + buffer_c16_t dst + ); + +private: + std::array z_; + std::array taps_; + int32_t output_scale = 0; +}; + class FIRC16xR16x32Decim8 { public: static constexpr size_t taps_count = 32;