portapack-mayhem/firmware/baseband/dsp_decimate.hpp

/*
 * Copyright (C) 2014 Jared Boone, ShareBrained Technology, Inc.
 *
 * This file is part of PortaPack.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; see the file COPYING.  If not, write to
 * the Free Software Foundation, Inc., 51 Franklin Street,
 * Boston, MA 02110-1301, USA.
 */

#ifndef __DSP_DECIMATE_H__
#define __DSP_DECIMATE_H__

#include <cstdint>
#include <array>
#include <memory>
#include <algorithm>

#include "utility.hpp"

#include "dsp_types.hpp"

namespace dsp {
namespace decimate {

class Complex8DecimateBy2CIC3 {
public:
	buffer_c16_t execute(
		buffer_c8_t src,
		buffer_c16_t dst
	);

private:
	uint32_t _i1_i0 { 0 };
	uint32_t _q1_q0 { 0 };
};

class TranslateByFSOver4AndDecimateBy2CIC3 {
public:
	buffer_c16_t execute(
		buffer_c8_t src,
		buffer_c16_t dst
	);

private:
	uint32_t _q1_i0 { 0 };
	uint32_t _q0_i1 { 0 };
};

class DecimateBy2CIC3 {
public:
	buffer_c16_t execute(
		buffer_c16_t src,
		buffer_c16_t dst
	);

private:
	uint32_t _iq0 { 0 };
	uint32_t _iq1 { 0 };
};

class FIR64AndDecimateBy2Real {
public:
	static constexpr size_t taps_count = 64;

	FIR64AndDecimateBy2Real(
		const std::array<int16_t, taps_count>& taps
	) : taps(taps)
	{
	}

	buffer_s16_t execute(
		buffer_s16_t src,
		buffer_s16_t dst
	);

private:
	std::array<int16_t, taps_count + 2> z;
	const std::array<int16_t, taps_count>& taps;
};

class FIRAndDecimateComplex {
public:
	using sample_t = complex16_t;
	using tap_t = complex16_t;

	using taps_t = tap_t[];

	/* NOTE! Current code makes an assumption that block of samples to be
	 * processed will be a multiple of the taps_count.
	 */
	FIRAndDecimateComplex(
	) : taps_count_ { 0 },
		decimation_factor_ { 1 }
	{
	}

	template<typename T>
	void configure(
		const T& taps,
		const size_t decimation_factor
	) {
		samples_ = std::make_unique<samples_t>(taps.size());
		taps_reversed_ = std::make_unique<taps_t>(taps.size());
		taps_count_ = taps.size();
		decimation_factor_ = decimation_factor;
		std::reverse_copy(taps.cbegin(), taps.cend(), &taps_reversed_[0]);
	}

	buffer_c16_t execute(
		buffer_c16_t src,
		buffer_c16_t dst
	);

private:
	using samples_t = sample_t[];

	std::unique_ptr<samples_t> samples_;
	std::unique_ptr<taps_t> taps_reversed_;
	size_t taps_count_;
	size_t decimation_factor_;
};

class DecimateBy2CIC4Real {
public:
	buffer_s16_t execute(
		buffer_s16_t src,
		buffer_s16_t dst
	);

private:
	int16_t z[5];
};
#if 0
class DecimateBy2HBF5Complex {
public:
	buffer_c16_t execute(
		buffer_c16_t const src,
		buffer_c16_t const dst
	);

private:
	complex16_t z[11];
};

class DecimateBy2HBF7Complex {
public:
	buffer_c16_t execute(
		buffer_c16_t const src,
		buffer_c16_t const dst
	);

private:
	complex16_t z[11];
};
#endif
/* From http://www.dspguru.com/book/export/html/3

Here are several basic techniques to fake circular buffers:

Split the calculation: You can split any FIR calculation into its "pre-wrap"
and "post-wrap" parts. By splitting the calculation into these two parts, you
essentially can do the circular logic only once, rather than once per tap.
(See fir_double_z in FirAlgs.c above.)

Duplicate the delay line: For a FIR with N taps, use a delay line of size 2N.
Copy each sample to its proper location, as well as at location-plus-N.
Therefore, the FIR calculation's MAC loop can be done on a flat buffer of N
points, starting anywhere within the first set of N points. The second set of
N delayed samples provides the "wrap around" comparable to a true circular
buffer. (See fir_double_z in FirAlgs.c above.)

Duplicate the coefficients: This is similar to the above, except that the
duplication occurs in terms of the coefficients, not the delay line.
Compared to the previous method, this has a calculation advantage of not
having to store each incoming sample twice, and it also has a memory
advantage when the same coefficient set will be used on multiple delay lines.
(See fir_double_h in FirAlgs.c above.)

Use block processing: In block processing, you use a delay line which is a
multiple of the number of taps. You therefore only have to move the data
once per block to implement the delay-line mechanism. When the block size
becomes "large", the overhead of a moving the delay line once per block
becomes negligible.
*/

#if 0
template<size_t N>
class FIRAndDecimateBy2Complex {
public:
	FIR64AndDecimateBy2Complex(
		const std::array<int16_t, N>& taps
	) : taps { taps }
	{
	}

	buffer_c16_t execute(
		buffer_c16_t const src,
		buffer_c16_t const dst
	) {
		/* int16_t input (sample count "n" must be multiple of 4)
		 * -> int16_t output, decimated by 2.
		 * taps are normalized to 1 << 16 == 1.0.
		 */

		return { dst.p, src.count / 2 };
	}

private:
	std::array<complex16_t, N> z;
	const std::array<int16_t, N>& taps;

	complex<int16_t> process_one(const size_t start_offset) {
		const auto split = &z[start_offset];
		const auto end = &z[z.size()];
		auto tap = &taps[0];

		complex<int32_t> t { 0, 0 };

		auto p = split;
		while(p < end) {
			const auto t = *(tap++);
			const auto c = *(p++);
			t.real += c.real * t;
			t.imag += c.imag * t;
		}

		p = &z[0];
		while(p < split) {
			const auto t = *(tap++);
			const auto c = *(p++);
			t.real += c.real * t;
			t.imag += c.imag * t;
		}

		return { t.real / 65536, t.imag / 65536 };
	}
};
#endif
} /* namespace decimate */
} /* namespace dsp */

#endif/*__DSP_DECIMATE_H__*/