From dd0048db8d30713d46f0a0add934c768a3c51376 Mon Sep 17 00:00:00 2001 From: Jared Boone Date: Wed, 3 May 2017 09:54:41 +0100 Subject: [PATCH] Remove broken simd32_t type. --- firmware/baseband/channel_stats_collector.hpp | 11 +-- firmware/baseband/dsp_decimate.cpp | 78 +++++++++---------- firmware/baseband/dsp_demodulate.cpp | 27 ++++--- .../os/hal/platforms/LPC43xx_M4/lpc43xx_m4.h | 6 ++ firmware/common/simd.hpp | 24 ------ 5 files changed, 61 insertions(+), 85 deletions(-) diff --git a/firmware/baseband/channel_stats_collector.hpp b/firmware/baseband/channel_stats_collector.hpp index 8e69448e..daab0822 100644 --- a/firmware/baseband/channel_stats_collector.hpp +++ b/firmware/baseband/channel_stats_collector.hpp @@ -23,7 +23,6 @@ #define __CHANNEL_STATS_COLLECTOR_H__ #include "dsp_types.hpp" -#include "simd.hpp" #include "message.hpp" #include "utility.hpp" @@ -36,7 +35,7 @@ class ChannelStatsCollector { public: template void feed(const buffer_c16_t& src, Callback callback) { - max_squared = compute_max_squared(src, max_squared); + max_squared = compute_max_squared(src, max_squared); count += src.count; const size_t samples_per_update = src.sampling_rate * update_interval; @@ -60,11 +59,9 @@ private: const buffer_c16_t& src, uint32_t max_squared ) { - auto p = simd32_ptr(src.p); - const auto end_p = simd32_ptr(&src.p[src.count]); - - while(p < end_p) { - const uint32_t sample = *(p++); + auto src_p = src.p; + while(src_p < &src.p[src.count]) { + const uint32_t sample = *__SIMD32(src_p)++; const uint32_t mag_sq = __SMUAD(sample, sample); if( mag_sq > max_squared ) { max_squared = mag_sq; diff --git a/firmware/baseband/dsp_decimate.cpp b/firmware/baseband/dsp_decimate.cpp index 09aa45df..93dbc602 100644 --- a/firmware/baseband/dsp_decimate.cpp +++ b/firmware/baseband/dsp_decimate.cpp @@ -21,8 +21,6 @@ #include "dsp_decimate.hpp" -#include "simd.hpp" - #include namespace dsp { @@ -568,39 +566,39 @@ buffer_c16_t DecimateBy2CIC3::execute( uint32_t t1 = _iq0; uint32_t t2 = _iq1; const uint32_t taps = 0x00000003; - auto s = simd32_ptr(&src.p[0]); - auto d = simd32_ptr(&dst.p[0]); - const auto d_end = simd32_ptr(&dst.p[src.count / 2]); + auto s = src.p; + auto d = dst.p; + const auto d_end = &dst.p[src.count / 2]; while(d < d_end) { uint32_t i = __SXTH(t1, 0); /* 1: I0 */ uint32_t q = __SXTH(t1, 16); /* 1: Q0 */ i = __SMLABB(t2, taps, i); /* 1: I1*3 + I0 */ q = __SMLATB(t2, taps, q); /* 1: Q1*3 + Q0 */ - const uint32_t t3 = *(s++); /* 3: Q2:I2 */ - const uint32_t t4 = *(s++); /* Q3:I3 */ + const uint32_t t3 = *__SIMD32(s)++; /* 3: Q2:I2 */ + const uint32_t t4 = *__SIMD32(s)++; /* Q3:I3 */ i = __SMLABB(t3, taps, i); /* 1: I2*3 + I1*3 + I0 */ q = __SMLATB(t3, taps, q); /* 1: Q2*3 + Q1*3 + Q0 */ int32_t si0 = __SXTAH(i, t4, 0); /* 1: I3 + Q2*3 + Q1*3 + Q0 */ int32_t sq0 = __SXTAH(q, t4, 16); /* 1: Q3 + Q2*3 + Q1*3 + Q0 */ i = __BFI(si0 / 8, sq0 / 8, 16, 16); /* 1: D2_Q0:D2_I0 */ - *(d++) = i; /* D2_Q0:D2_I0 */ + *__SIMD32(d)++ = i; /* D2_Q0:D2_I0 */ i = __SXTH(t3, 0); /* 1: I2 */ q = __SXTH(t3, 16); /* 1: Q2 */ i = __SMLABB(t4, taps, i); /* 1: I3*3 + I2 */ q = __SMLATB(t4, taps, q); /* 1: Q3*3 + Q2 */ - t1 = *(s++); /* 3: Q4:I4 */ - t2 = *(s++); /* Q5:I5 */ + t1 = *__SIMD32(s)++; /* 3: Q4:I4 */ + t2 = *__SIMD32(s)++; /* Q5:I5 */ i = __SMLABB(t1, taps, i); /* 1: I4*3 + I3*3 + I2 */ q = __SMLATB(t1, taps, q); /* 1: Q4*3 + Q3*3 + Q2 */ int32_t si1 = __SXTAH(i, t2, 0) ; /* 1: I5 + Q4*3 + Q3*3 + Q2 */ int32_t sq1 = __SXTAH(q, t2, 16); /* 1: Q5 + Q4*3 + Q3*3 + Q2 */ i = __BFI(si1 / 8, sq1 / 8, 16, 16); /* 1: D2_Q1:D2_I1 */ - *(d++) = i; /* D2_Q1:D2_I1 */ + *__SIMD32(d)++ = i; /* D2_Q1:D2_I1 */ } _iq0 = t1; _iq1 = t2; @@ -667,57 +665,57 @@ buffer_c16_t FIRAndDecimateComplex::execute( const auto output_sampling_rate = src.sampling_rate / decimation_factor_; const size_t output_samples = src.count / decimation_factor_; - auto dst_p = simd32_ptr(dst.p); + sample_t* dst_p = dst.p; const buffer_c16_t result { dst.p, output_samples, output_sampling_rate }; - auto src_p = simd32_ptr(src.p); + const sample_t* src_p = src.p; size_t outer_count = output_samples; while(outer_count > 0) { /* Put new samples into delay buffer */ - auto z_new_p = simd32_ptr(&samples_[taps_count_ - decimation_factor_]); + auto z_new_p = &samples_[taps_count_ - decimation_factor_]; for(size_t i=0; i 0) { - const auto tap0 = *(t_p++); - const auto sample0 = *(z_p++); - const auto tap1 = *(t_p++); - const auto sample1 = *(z_p++); + const auto tap0 = *__SIMD32(t_p)++; + const auto sample0 = *__SIMD32(z_p)++; + const auto tap1 = *__SIMD32(t_p)++; + const auto sample1 = *__SIMD32(z_p)++; t_real = __SMLSLD(sample0, tap0, t_real); t_imag = __SMLALDX(sample0, tap0, t_imag); t_real = __SMLSLD(sample1, tap1, t_real); t_imag = __SMLALDX(sample1, tap1, t_imag); - const auto tap2 = *(t_p++); - const auto sample2 = *(z_p++); - const auto tap3 = *(t_p++); - const auto sample3 = *(z_p++); + const auto tap2 = *__SIMD32(t_p)++; + const auto sample2 = *__SIMD32(z_p)++; + const auto tap3 = *__SIMD32(t_p)++; + const auto sample3 = *__SIMD32(z_p)++; t_real = __SMLSLD(sample2, tap2, t_real); t_imag = __SMLALDX(sample2, tap2, t_imag); t_real = __SMLSLD(sample3, tap3, t_real); t_imag = __SMLALDX(sample3, tap3, t_imag); - const auto tap4 = *(t_p++); - const auto sample4 = *(z_p++); - const auto tap5 = *(t_p++); - const auto sample5 = *(z_p++); + const auto tap4 = *__SIMD32(t_p)++; + const auto sample4 = *__SIMD32(z_p)++; + const auto tap5 = *__SIMD32(t_p)++; + const auto sample5 = *__SIMD32(z_p)++; t_real = __SMLSLD(sample4, tap4, t_real); t_imag = __SMLALDX(sample4, tap4, t_imag); t_real = __SMLSLD(sample5, tap5, t_real); t_imag = __SMLALDX(sample5, tap5, t_imag); - const auto tap6 = *(t_p++); - const auto sample6 = *(z_p++); - const auto tap7 = *(t_p++); - const auto sample7 = *(z_p++); + const auto tap6 = *__SIMD32(t_p)++; + const auto sample6 = *__SIMD32(z_p)++; + const auto tap7 = *__SIMD32(t_p)++; + const auto sample7 = *__SIMD32(z_p)++; t_real = __SMLSLD(sample6, tap6, t_real); t_imag = __SMLALDX(sample6, tap6, t_imag); t_real = __SMLSLD(sample7, tap7, t_real); @@ -733,7 +731,7 @@ buffer_c16_t FIRAndDecimateComplex::execute( const int32_t i = t_imag >> 16; const int32_t r_sat = __SSAT(r, 16); const int32_t i_sat = __SSAT(i, 16); - *(dst_p++) = __PKHBT( + *__SIMD32(dst_p)++ = __PKHBT( r_sat, i_sat, 16 @@ -743,14 +741,14 @@ buffer_c16_t FIRAndDecimateComplex::execute( const size_t unroll_factor = 4; size_t shift_count = (taps_count_ - decimation_factor_) / unroll_factor; - auto t = simd32_ptr(&samples_[0]); - auto s = simd32_ptr(&samples_[decimation_factor_]); + sample_t* t = &samples_[0]; + const sample_t* s = &samples_[decimation_factor_]; while(shift_count > 0) { - *(t++) = *(s++); - *(t++) = *(s++); - *(t++) = *(s++); - *(t++) = *(s++); + *__SIMD32(t)++ = *__SIMD32(s)++; + *__SIMD32(t)++ = *__SIMD32(s)++; + *__SIMD32(t)++ = *__SIMD32(s)++; + *__SIMD32(t)++ = *__SIMD32(s)++; shift_count--; } diff --git a/firmware/baseband/dsp_demodulate.cpp b/firmware/baseband/dsp_demodulate.cpp index 22348a35..cf79d0d0 100644 --- a/firmware/baseband/dsp_demodulate.cpp +++ b/firmware/baseband/dsp_demodulate.cpp @@ -24,7 +24,6 @@ #include "complex.hpp" #include "fxpt_atan2.hpp" #include "utility_m4.hpp" -#include "simd.hpp" #include @@ -35,12 +34,12 @@ buffer_f32_t AM::execute( const buffer_c16_t& src, const buffer_f32_t& dst ) { - auto src_p = simd32_ptr(src.p); - auto src_end = simd32_ptr(&src.p[src.count]); + const auto src_p = src.p; + const auto src_end = &src.p[src.count]; auto dst_p = dst.p; while(src_p < src_end) { - const uint32_t sample0 = *(src_p++); - const uint32_t sample1 = *(src_p++); + const uint32_t sample0 = *__SIMD32(src_p)++; + const uint32_t sample1 = *__SIMD32(src_p)++; const uint32_t mag_sq0 = __SMUAD(sample0, sample0); const uint32_t mag_sq1 = __SMUAD(sample1, sample1); *(dst_p++) = __builtin_sqrtf(mag_sq0) * k; @@ -91,12 +90,12 @@ buffer_f32_t FM::execute( ) { auto z = z_; - auto src_p = simd32_ptr(src.p); - auto src_end = simd32_ptr(&src.p[src.count]); + const auto src_p = src.p; + const auto src_end = &src.p[src.count]; auto dst_p = dst.p; while(src_p < src_end) { - const auto s0 = *(src_p++); - const auto s1 = *(src_p++); + const auto s0 = *__SIMD32(src_p)++; + const auto s1 = *__SIMD32(src_p)++; const auto t0 = multiply_conjugate_s16_s32(s0, z); const auto t1 = multiply_conjugate_s16_s32(s1, s0); z = s1; @@ -114,12 +113,12 @@ buffer_s16_t FM::execute( ) { auto z = z_; - auto src_p = simd32_ptr(src.p); - auto src_end = simd32_ptr(&src.p[src.count]); + const auto src_p = src.p; + const auto src_end = &src.p[src.count]; auto dst_p = dst.p; while(src_p < src_end) { - const auto s0 = *(src_p++); - const auto s1 = *(src_p++); + const auto s0 = *__SIMD32(src_p)++; + const auto s1 = *__SIMD32(src_p)++; const auto t0 = multiply_conjugate_s16_s32(s0, z); const auto t1 = multiply_conjugate_s16_s32(s1, s0); z = s1; @@ -127,7 +126,7 @@ buffer_s16_t FM::execute( const int32_t theta0_sat = __SSAT(theta0_int, 16); const int32_t theta1_int = angle_approx_0deg27(t1) * ks16; const int32_t theta1_sat = __SSAT(theta1_int, 16); - *(dst_p++) = __PKHBT( + *__SIMD32(dst_p)++ = __PKHBT( theta0_sat, theta1_sat, 16 diff --git a/firmware/chibios-portapack/os/hal/platforms/LPC43xx_M4/lpc43xx_m4.h b/firmware/chibios-portapack/os/hal/platforms/LPC43xx_M4/lpc43xx_m4.h index 4a80e82a..d56457ae 100644 --- a/firmware/chibios-portapack/os/hal/platforms/LPC43xx_M4/lpc43xx_m4.h +++ b/firmware/chibios-portapack/os/hal/platforms/LPC43xx_M4/lpc43xx_m4.h @@ -125,6 +125,12 @@ typedef enum IRQn { #ifdef __cplusplus +/* NOTE: Override old, misbehaving SIMD #defines */ + +#define __SIMD32_TYPE int32_t +#define __SIMD32(addr) (*(__SIMD32_TYPE **) & (addr)) +#define _SIMD32_OFFSET(addr) (*(__SIMD32_TYPE *) (addr)) + /* Overload of __SXTB16() to add ROR argument, since using __ROR() as an * argument to the existing __SXTB16() doesn't produce optimum/sane code. */ diff --git a/firmware/common/simd.hpp b/firmware/common/simd.hpp index 1724066b..c2e9be95 100644 --- a/firmware/common/simd.hpp +++ b/firmware/common/simd.hpp @@ -28,30 +28,6 @@ #include -template -struct simd32_t { - union { - uint32_t raw; - T vec; - }; - - operator uint32_t() const { - return raw; - } - - simd32_t& operator=(uint32_t v) { - raw = v; - return *this; - } - - static_assert(sizeof(raw) == sizeof(vec), "simd32_t types are not the same size."); -}; - -template -simd32_t* simd32_ptr(T* const p) { - return reinterpret_cast*>(p); -} - struct vec4_s8 { union { int8_t v[4];