portapack-mayhem/firmware/baseband/dsp_decimate.hpp

/*
 * Copyright (C) 2014 Jared Boone, ShareBrained Technology, Inc.
 *
 * This file is part of PortaPack.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; see the file COPYING.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street,
 * Boston, MA 02110-1301, USA.
 */

#ifndef __DSP_DECIMATE_H__
#define __DSP_DECIMATE_H__

#include <cstdint>
#include <array>
#include <memory>
#include <algorithm>

#include "utility.hpp"

#include "dsp_types.hpp"

namespace dsp {
namespace decimate {

class TranslateByFSOver4AndDecimateBy2CIC3 {
public:
	buffer_c16_t execute(
		buffer_c8_t src,
		buffer_c16_t dst
	);

private:
	uint32_t _q1_i0 { 0 };
	uint32_t _q0_i1 { 0 };
};

class DecimateBy2CIC3 {
public:
	buffer_c16_t execute(
		buffer_c16_t src,
		buffer_c16_t dst
	);

private:
	uint32_t _iq0 { 0 };
	uint32_t _iq1 { 0 };
};

class FIR64AndDecimateBy2Real {
public:
	static constexpr size_t taps_count = 64;

	FIR64AndDecimateBy2Real(
		const std::array<int16_t, taps_count>& taps
	) : taps(taps)
	{
	}

	buffer_s16_t execute(
		buffer_s16_t src,
		buffer_s16_t dst
	);

private:
	std::array<int16_t, taps_count + 2> z;
	const std::array<int16_t, taps_count>& taps;
};

class FIRAndDecimateComplex {
public:
	using sample_t = complex16_t;
	using tap_t = complex16_t;

	using taps_t = tap_t[];

	/* NOTE! Current code makes an assumption that block of samples to be
	 * processed will be a multiple of the taps_count.
	 */
	FIRAndDecimateComplex(
	) : taps_count_ { 0 },
		decimation_factor_ { 1 }
	{
	}

	template<typename T>
	void configure(
		const T& taps,
		const size_t decimation_factor
	) {
		samples_ = std::make_unique<samples_t>(taps.size());
		taps_reversed_ = std::make_unique<taps_t>(taps.size());
		taps_count_ = taps.size();
		decimation_factor_ = decimation_factor;
		std::reverse_copy(taps.cbegin(), taps.cend(), &taps_reversed_[0]);
	}

	buffer_c16_t execute(
		buffer_c16_t src,
		buffer_c16_t dst
	);
	
private:
	using samples_t = sample_t[];

	std::unique_ptr<samples_t> samples_;
	std::unique_ptr<taps_t> taps_reversed_;
	size_t taps_count_;
	size_t decimation_factor_;
};

class DecimateBy2CIC4Real {
public:
	buffer_s16_t execute(
		buffer_s16_t src,
		buffer_s16_t dst
	);

private:
	int16_t z[5];
};
#if 0
class DecimateBy2HBF5Complex {
public:
	buffer_c16_t execute(
		buffer_c16_t const src,
		buffer_c16_t const dst
	);

private:
	complex16_t z[11];
};

class DecimateBy2HBF7Complex {
public:
	buffer_c16_t execute(
		buffer_c16_t const src,
		buffer_c16_t const dst
	);

private:
	complex16_t z[11];
};
#endif
/* From http://www.dspguru.com/book/export/html/3

Here are several basic techniques to fake circular buffers:

Split the calculation: You can split any FIR calculation into its "pre-wrap"
and "post-wrap" parts. By splitting the calculation into these two parts, you
essentially can do the circular logic only once, rather than once per tap.
(See fir_double_z in FirAlgs.c above.)

Duplicate the delay line: For a FIR with N taps, use a delay line of size 2N.
Copy each sample to its proper location, as well as at location-plus-N.
Therefore, the FIR calculation's MAC loop can be done on a flat buffer of N
points, starting anywhere within the first set of N points. The second set of
N delayed samples provides the "wrap around" comparable to a true circular
buffer. (See fir_double_z in FirAlgs.c above.)

Duplicate the coefficients: This is similar to the above, except that the
duplication occurs in terms of the coefficients, not the delay line.
Compared to the previous method, this has a calculation advantage of not
having to store each incoming sample twice, and it also has a memory
advantage when the same coefficient set will be used on multiple delay lines.
(See fir_double_h in FirAlgs.c above.)

Use block processing: In block processing, you use a delay line which is a
multiple of the number of taps. You therefore only have to move the data
once per block to implement the delay-line mechanism. When the block size
becomes "large", the overhead of a moving the delay line once per block
becomes negligible.
*/

#if 0
template<size_t N>
class FIRAndDecimateBy2Complex {
public:
	FIR64AndDecimateBy2Complex(
		const std::array<int16_t, N>& taps
	) : taps { taps }
	{
	}

	buffer_c16_t execute(
		buffer_c16_t const src,
		buffer_c16_t const dst
	) {
		/* int16_t input (sample count "n" must be multiple of 4)
		 * -> int16_t output, decimated by 2.
		 * taps are normalized to 1 << 16 == 1.0.
		 */

		return { dst.p, src.count / 2 };
	}

private:
	std::array<complex16_t, N> z;
	const std::array<int16_t, N>& taps;

	complex<int16_t> process_one(const size_t start_offset) {
		const auto split = &z[start_offset];
		const auto end = &z[z.size()];
		auto tap = &taps[0];

		complex<int32_t> t { 0, 0 };

		auto p = split;
		while(p < end) {
			const auto t = *(tap++);
			const auto c = *(p++);
			t.real += c.real * t;
			t.imag += c.imag * t;
		}

		p = &z[0];
		while(p < split) {
			const auto t = *(tap++);
			const auto c = *(p++);
			t.real += c.real * t;
			t.imag += c.imag * t;
		}

		return { t.real / 65536, t.imag / 65536 };
	}
};
#endif
} /* namespace decimate */
} /* namespace dsp */

#endif/*__DSP_DECIMATE_H__*/
Initial firmware commit. 2015-07-08 11:39:24 -04:00			`/*`
			`* Copyright (C) 2014 Jared Boone, ShareBrained Technology, Inc.`
			`*`
			`* This file is part of PortaPack.`
			`*`
			`* This program is free software; you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation; either version 2, or (at your option)`
			`* any later version.`
			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
			`* along with this program; see the file COPYING. If not, write to`
			`* the Free Software Foundation, Inc., 51 Franklin Street,`
			`* Boston, MA 02110-1301, USA.`
			`*/`

			`#ifndef __DSP_DECIMATE_H__`
			`#define __DSP_DECIMATE_H__`

			`#include <cstdint>`
			`#include <array>`
Remove taps_count template arg for FIRAndDecimateBy2Complex. Use heap to allocate samples and taps buffers, so filters of different lengths can be supported. 2015-11-03 15:11:32 -05:00			`#include <memory>`
			`#include <algorithm>`

			`#include "utility.hpp"`
Initial firmware commit. 2015-07-08 11:39:24 -04:00
			`#include "dsp_types.hpp"`

			`namespace dsp {`
			`namespace decimate {`

			`class TranslateByFSOver4AndDecimateBy2CIC3 {`
			`public:`
			`buffer_c16_t execute(`
			`buffer_c8_t src,`
			`buffer_c16_t dst`
			`);`

			`private:`
			`uint32_t _q1_i0 { 0 };`
			`uint32_t _q0_i1 { 0 };`
			`};`

			`class DecimateBy2CIC3 {`
			`public:`
			`buffer_c16_t execute(`
			`buffer_c16_t src,`
			`buffer_c16_t dst`
			`);`

			`private:`
			`uint32_t _iq0 { 0 };`
			`uint32_t _iq1 { 0 };`
			`};`

			`class FIR64AndDecimateBy2Real {`
			`public:`
			`static constexpr size_t taps_count = 64;`

			`FIR64AndDecimateBy2Real(`
			`const std::array<int16_t, taps_count>& taps`
Change reference initializations errors in GCC 4.8 Resolves issue #12. 2015-07-17 15:07:38 -04:00			`) : taps(taps)`
Initial firmware commit. 2015-07-08 11:39:24 -04:00			`{`
			`}`

			`buffer_s16_t execute(`
			`buffer_s16_t src,`
			`buffer_s16_t dst`
			`);`

			`private:`
			`std::array<int16_t, taps_count + 2> z;`
			`const std::array<int16_t, taps_count>& taps;`
			`};`

FIRAndDecimateBy2Complex: expose decimation_factor, bring work function into class. 2015-11-03 19:52:42 -05:00			`class FIRAndDecimateComplex {`
Initial firmware commit. 2015-07-08 11:39:24 -04:00			`public:`
Remove taps_count template arg for FIRAndDecimateBy2Complex. Use heap to allocate samples and taps buffers, so filters of different lengths can be supported. 2015-11-03 15:11:32 -05:00			`using sample_t = complex16_t;`
			`using tap_t = complex16_t;`

			`using taps_t = tap_t[];`

Initial firmware commit. 2015-07-08 11:39:24 -04:00			`/* NOTE! Current code makes an assumption that block of samples to be`
			`* processed will be a multiple of the taps_count.`
			`*/`
FIRAndDecimateBy2Complex: expose decimation_factor, bring work function into class. 2015-11-03 19:52:42 -05:00			`FIRAndDecimateComplex(`
Default constructors and configure methods for baseband classes. 2015-11-05 13:19:05 -05:00			`) : taps_count_ { 0 },`
			`decimation_factor_ { 1 }`
Remove taps_count template arg for FIRAndDecimateBy2Complex. Use heap to allocate samples and taps buffers, so filters of different lengths can be supported. 2015-11-03 15:11:32 -05:00			`{`
			`}`

			`template<typename T>`
Default constructors and configure methods for baseband classes. 2015-11-05 13:19:05 -05:00			`void configure(`
FIRAndDecimateBy2Complex: expose decimation_factor, bring work function into class. 2015-11-03 19:52:42 -05:00			`const T& taps,`
			`const size_t decimation_factor`
Default constructors and configure methods for baseband classes. 2015-11-05 13:19:05 -05:00			`) {`
			`samples_ = std::make_unique<samples_t>(taps.size());`
			`taps_reversed_ = std::make_unique<taps_t>(taps.size());`
			`taps_count_ = taps.size();`
			`decimation_factor_ = decimation_factor;`
Remove taps_count template arg for FIRAndDecimateBy2Complex. Use heap to allocate samples and taps buffers, so filters of different lengths can be supported. 2015-11-03 15:11:32 -05:00			`std::reverse_copy(taps.cbegin(), taps.cend(), &taps_reversed_[0]);`
Initial firmware commit. 2015-07-08 11:39:24 -04:00			`}`

			`buffer_c16_t execute(`
			`buffer_c16_t src,`
			`buffer_c16_t dst`
FIRAndDecimateBy2Complex: expose decimation_factor, bring work function into class. 2015-11-03 19:52:42 -05:00			`);`

Initial firmware commit. 2015-07-08 11:39:24 -04:00			`private:`
Remove taps_count template arg for FIRAndDecimateBy2Complex. Use heap to allocate samples and taps buffers, so filters of different lengths can be supported. 2015-11-03 15:11:32 -05:00			`using samples_t = sample_t[];`

Default constructors and configure methods for baseband classes. 2015-11-05 13:19:05 -05:00			`std::unique_ptr<samples_t> samples_;`
			`std::unique_ptr<taps_t> taps_reversed_;`
			`size_t taps_count_;`
			`size_t decimation_factor_;`
Initial firmware commit. 2015-07-08 11:39:24 -04:00			`};`

			`class DecimateBy2CIC4Real {`
			`public:`
			`buffer_s16_t execute(`
			`buffer_s16_t src,`
			`buffer_s16_t dst`
			`);`

			`private:`
			`int16_t z[5];`
			`};`
			`#if 0`
			`class DecimateBy2HBF5Complex {`
			`public:`
			`buffer_c16_t execute(`
			`buffer_c16_t const src,`
			`buffer_c16_t const dst`
			`);`

			`private:`
			`complex16_t z[11];`
			`};`

			`class DecimateBy2HBF7Complex {`
			`public:`
			`buffer_c16_t execute(`
			`buffer_c16_t const src,`
			`buffer_c16_t const dst`
			`);`

			`private:`
			`complex16_t z[11];`
			`};`
			`#endif`
			`/* From http://www.dspguru.com/book/export/html/3`

			`Here are several basic techniques to fake circular buffers:`

			`Split the calculation: You can split any FIR calculation into its "pre-wrap"`
			`and "post-wrap" parts. By splitting the calculation into these two parts, you`
			`essentially can do the circular logic only once, rather than once per tap.`
			`(See fir_double_z in FirAlgs.c above.)`

			`Duplicate the delay line: For a FIR with N taps, use a delay line of size 2N.`
			`Copy each sample to its proper location, as well as at location-plus-N.`
			`Therefore, the FIR calculation's MAC loop can be done on a flat buffer of N`
			`points, starting anywhere within the first set of N points. The second set of`
			`N delayed samples provides the "wrap around" comparable to a true circular`
			`buffer. (See fir_double_z in FirAlgs.c above.)`

			`Duplicate the coefficients: This is similar to the above, except that the`
			`duplication occurs in terms of the coefficients, not the delay line.`
			`Compared to the previous method, this has a calculation advantage of not`
			`having to store each incoming sample twice, and it also has a memory`
			`advantage when the same coefficient set will be used on multiple delay lines.`
			`(See fir_double_h in FirAlgs.c above.)`

			`Use block processing: In block processing, you use a delay line which is a`
			`multiple of the number of taps. You therefore only have to move the data`
			`once per block to implement the delay-line mechanism. When the block size`
			`becomes "large", the overhead of a moving the delay line once per block`
			`becomes negligible.`
			`*/`

			`#if 0`
			`template<size_t N>`
			`class FIRAndDecimateBy2Complex {`
			`public:`
			`FIR64AndDecimateBy2Complex(`
			`const std::array<int16_t, N>& taps`
			`) : taps { taps }`
			`{`
			`}`

			`buffer_c16_t execute(`
			`buffer_c16_t const src,`
			`buffer_c16_t const dst`
			`) {`
			`/* int16_t input (sample count "n" must be multiple of 4)`
			`* -> int16_t output, decimated by 2.`
			`* taps are normalized to 1 << 16 == 1.0.`
			`*/`

			`return { dst.p, src.count / 2 };`
			`}`

			`private:`
			`std::array<complex16_t, N> z;`
			`const std::array<int16_t, N>& taps;`

			`complex<int16_t> process_one(const size_t start_offset) {`
			`const auto split = &z[start_offset];`
			`const auto end = &z[z.size()];`
			`auto tap = &taps[0];`

			`complex<int32_t> t { 0, 0 };`

			`auto p = split;`
			`while(p < end) {`
			`const auto t = *(tap++);`
			`const auto c = *(p++);`
			`t.real += c.real * t;`
			`t.imag += c.imag * t;`
			`}`

			`p = &z[0];`
			`while(p < split) {`
			`const auto t = *(tap++);`
			`const auto c = *(p++);`
			`t.real += c.real * t;`
			`t.imag += c.imag * t;`
			`}`

			`return { t.real / 65536, t.imag / 65536 };`
			`}`
			`};`
			`#endif`
			`} /* namespace decimate */`
			`} /* namespace dsp */`

			`#endif/__DSP_DECIMATE_H__/`