Decimators for wider IFIR output bandwidth.

This commit is contained in:
Jared Boone 2016-01-02 10:34:17 -08:00
parent 096ebe47d8
commit a02bfe55d0
2 changed files with 177 additions and 0 deletions

View File

@ -177,6 +177,71 @@ static inline uint32_t scale_round_and_pack(
return __PKHBT(saturated_real, saturated_imag, 16);
}
// FIRC8xR16x24FS4Decim4 //////////////////////////////////////////////////
FIRC8xR16x24FS4Decim4::FIRC8xR16x24FS4Decim4() {
z_.fill({});
}
void FIRC8xR16x24FS4Decim4::configure(
const std::array<tap_t, taps_count>& taps,
const int32_t scale,
const Shift shift
) {
const int negate_factor = (shift == Shift::Up) ? -1 : 1;
for(size_t i=0; i<taps.size(); i+=4) {
taps_[i+0] = taps[i+0];
taps_[i+1] = taps[i+1] * negate_factor;
taps_[i+2] = -taps[i+2];
taps_[i+3] = taps[i+3] * negate_factor;
}
output_scale = scale;
}
buffer_c16_t FIRC8xR16x24FS4Decim4::execute(
buffer_c8_t src,
buffer_c16_t dst
) {
vec2_s16* const z = static_cast<vec2_s16*>(__builtin_assume_aligned(z_.data(), 4));
const vec2_s16* const t = static_cast<vec2_s16*>(__builtin_assume_aligned(taps_.data(), 4));
uint32_t* const d = static_cast<uint32_t*>(__builtin_assume_aligned(dst.p, 4));
const auto k = output_scale;
const size_t count = src.count / decimation_factor;
for(size_t i=0; i<count; i++) {
const vec4_s8* const in = static_cast<const vec4_s8*>(__builtin_assume_aligned(&src.p[i * decimation_factor], 4));
complex32_t accum;
// Oldest samples are discarded.
accum = mac_fs4_shift(z, t, 0, accum);
accum = mac_fs4_shift(z, t, 1, accum);
// Middle samples are shifted earlier in the "z" delay buffer.
accum = mac_fs4_shift_and_store(z, t, decimation_factor, 0, accum);
accum = mac_fs4_shift_and_store(z, t, decimation_factor, 1, accum);
accum = mac_fs4_shift_and_store(z, t, decimation_factor, 2, accum);
accum = mac_fs4_shift_and_store(z, t, decimation_factor, 3, accum);
accum = mac_fs4_shift_and_store(z, t, decimation_factor, 4, accum);
accum = mac_fs4_shift_and_store(z, t, decimation_factor, 5, accum);
accum = mac_fs4_shift_and_store(z, t, decimation_factor, 6, accum);
accum = mac_fs4_shift_and_store(z, t, decimation_factor, 7, accum);
// Newest samples come from "in" buffer, are copied to "z" delay buffer.
accum = mac_fs4_shift_and_store_new_c8_samples(z, t, in, decimation_factor, 0, taps_count, accum);
accum = mac_fs4_shift_and_store_new_c8_samples(z, t, in, decimation_factor, 1, taps_count, accum);
d[i] = scale_round_and_pack(accum, k);
}
return {
dst.p,
count,
src.sampling_rate / decimation_factor
};
}
// FIRC8xR16x24FS4Decim8 //////////////////////////////////////////////////
FIRC8xR16x24FS4Decim8::FIRC8xR16x24FS4Decim8() {
@ -242,6 +307,60 @@ buffer_c16_t FIRC8xR16x24FS4Decim8::execute(
};
}
// FIRC16xR16x16Decim2 ////////////////////////////////////////////////////
FIRC16xR16x16Decim2::FIRC16xR16x16Decim2() {
z_.fill({});
}
void FIRC16xR16x16Decim2::configure(
const std::array<tap_t, taps_count>& taps,
const int32_t scale
) {
std::copy(taps.cbegin(), taps.cend(), taps_.begin());
output_scale = scale;
}
buffer_c16_t FIRC16xR16x16Decim2::execute(
buffer_c16_t src,
buffer_c16_t dst
) {
vec2_s16* const z = static_cast<vec2_s16*>(__builtin_assume_aligned(z_.data(), 4));
const vec2_s16* const t = static_cast<vec2_s16*>(__builtin_assume_aligned(taps_.data(), 4));
uint32_t* const d = static_cast<uint32_t*>(__builtin_assume_aligned(dst.p, 4));
const auto k = output_scale;
const size_t count = src.count / decimation_factor;
for(size_t i=0; i<count; i++) {
const vec2_s16* const in = static_cast<const vec2_s16*>(__builtin_assume_aligned(&src.p[i * decimation_factor], 4));
complex32_t accum;
// Oldest samples are discarded.
accum = mac_shift(z, t, 0, accum);
// Middle samples are shifted earlier in the "z" delay buffer.
accum = mac_shift_and_store(z, t, decimation_factor, 0, accum);
accum = mac_shift_and_store(z, t, decimation_factor, 1, accum);
accum = mac_shift_and_store(z, t, decimation_factor, 2, accum);
accum = mac_shift_and_store(z, t, decimation_factor, 3, accum);
accum = mac_shift_and_store(z, t, decimation_factor, 4, accum);
accum = mac_shift_and_store(z, t, decimation_factor, 5, accum);
// Newest samples come from "in" buffer, are copied to "z" delay buffer.
accum = mac_shift_and_store_new_c16_samples(z, t, in, decimation_factor, 0, taps_count, accum);
d[i] = scale_round_and_pack(accum, k);
}
return {
dst.p,
count,
src.sampling_rate / decimation_factor
};
}
// FIRC16xR16x32Decim8 ////////////////////////////////////////////////////
FIRC16xR16x32Decim8::FIRC16xR16x32Decim8() {

View File

@ -92,6 +92,38 @@ private:
const std::array<int16_t, taps_count>& taps;
};
class FIRC8xR16x24FS4Decim4 {
public:
static constexpr size_t taps_count = 24;
static constexpr size_t decimation_factor = 4;
using sample_t = complex8_t;
using tap_t = int16_t;
enum class Shift : bool {
Down = true,
Up = false
};
FIRC8xR16x24FS4Decim4();
void configure(
const std::array<tap_t, taps_count>& taps,
const int32_t scale,
const Shift shift = Shift::Down
);
buffer_c16_t execute(
buffer_c8_t src,
buffer_c16_t dst
);
private:
std::array<vec2_s16, taps_count - decimation_factor> z_;
std::array<tap_t, taps_count> taps_;
int32_t output_scale = 0;
};
class FIRC8xR16x24FS4Decim8 {
public:
static constexpr size_t taps_count = 24;
@ -124,6 +156,32 @@ private:
int32_t output_scale = 0;
};
class FIRC16xR16x16Decim2 {
public:
static constexpr size_t taps_count = 16;
static constexpr size_t decimation_factor = 2;
using sample_t = complex16_t;
using tap_t = int16_t;
FIRC16xR16x16Decim2();
void configure(
const std::array<tap_t, taps_count>& taps,
const int32_t scale
);
buffer_c16_t execute(
buffer_c16_t src,
buffer_c16_t dst
);
private:
std::array<vec2_s16, taps_count - decimation_factor> z_;
std::array<tap_t, taps_count> taps_;
int32_t output_scale = 0;
};
class FIRC16xR16x32Decim8 {
public:
static constexpr size_t taps_count = 32;