From 567fee1d9880941488aed6b82b5811d075606fe0 Mon Sep 17 00:00:00 2001
From: heurist1 <git@heurist.co.uk>
Date: Tue, 28 Feb 2023 19:02:05 +0000
Subject: [PATCH] Create optimised integer functions to reduce CPU usage

(cherry picked from commit a47bfe1da7bffe9f752e4c522e11593cce6dffd0)
---
 firmware/common/ui.cpp      |  7 ++++
 firmware/common/ui.hpp      |  2 ++
 firmware/common/utility.cpp | 72 +++++++++++++++++++++++++++++++++++++
 firmware/common/utility.hpp |  4 +++
 4 files changed, 85 insertions(+)
diff --git a/firmware/common/ui.cpp b/firmware/common/ui.cpp
index 406f5e49..c1a44894 100644
--- a/firmware/common/ui.cpp
+++ b/firmware/common/ui.cpp
@@ -21,6 +21,7 @@
 
 #include "ui.hpp"
 #include "sine_table.hpp"
+#include "utility.hpp"
 
 #include <algorithm>
 
@@ -96,4 +97,10 @@ Point polar_to_point(float angle, uint32_t distance) {
 	             sin_f32(DEG_TO_RAD(-angle) - (pi / 2)) * distance); 
 }
 
+Point fast_polar_to_point(int32_t angle, uint32_t distance) {
+	//polar to compass with y negated for screen drawing
+	return Point((int16_sin_s4(((1<<16)*(-angle + 180))/360) * distance)/(1<<16), 
+	             (int16_sin_s4(((1<<16)*(-angle - 90))/360) * distance)/(1<<16)); 
+}
+
 } /* namespace ui */
diff --git a/firmware/common/ui.hpp b/firmware/common/ui.hpp
index ee8610fb..4dcf4fbf 100644
--- a/firmware/common/ui.hpp
+++ b/firmware/common/ui.hpp
@@ -343,6 +343,8 @@ struct TouchEvent {
 
 Point polar_to_point(float angle, uint32_t distance);
 
+Point fast_polar_to_point(int32_t angle, uint32_t distance);
+
 } /* namespace ui */
 
 #endif/*__UI_H__*/
diff --git a/firmware/common/utility.cpp b/firmware/common/utility.cpp
index b29a9b74..7e1a8ace 100644
--- a/firmware/common/utility.cpp
+++ b/firmware/common/utility.cpp
@@ -91,6 +91,78 @@ float mag2_to_dbv_norm(const float mag2) {
 	return (fast_log2(mag2) - mag2_log2_max) * mag2_to_db_factor;
 }
 
+// Integer in and out approximation
+// >40 times faster float sqrt(x*x+y*y) on Cortex M0
+// derived from https://dspguru.com/dsp/tricks/magnitude-estimator/
+int fast_int_magnitude(int y, int x)
+{
+  if(y<0){y=-y;}
+  if(x<0){x=-x;}
+  if (x>y)  {
+    return ((x*61)+(y*26)+32)/64;
+  } else {
+    return ((y*61)+(x*26)+32)/64;
+  }
+}
+
+// Integer x and y returning an integer bearing in degrees
+// Accurate to 0.5 degrees, so output scaled to whole degrees
+// >60 times faster than float atan2 on Cortex M0
+int int_atan2(int y, int x)
+{
+	// Number of bits to shift up before doing the maths.  A larger shift
+	// may beable to gain accuracy, but it would cause the correction
+	// entries to be larger than 1 byte
+	static const int bits = 10; 
+	static const int pi4 = (1 << bits);
+	static const int pi34 = (3 << bits);
+
+	// Special case
+	if (x == 0 && y == 0) { return 0; }
+
+	// Form an approximate angle
+	const int yabs = y >= 0 ? y : -y;
+	int angle;
+	if (x >= 0) {
+		angle = pi4 - pi4 * (x - yabs) / (x + yabs);
+	} else {
+		angle = pi34 - pi4 * (x + yabs) / (yabs - x);
+	}
+	// Correct the result using a lookup table
+	static const int8_t correct[32] = { 0, -23, -42, -59, -72, -83 ,-89 ,-92 ,-92 ,-88 ,-81, -71, -58, -43 ,-27, -9, 9, 27,	43,	58,	71,	81,	88,	92,	92,	89,	83,	72,	59,	42,	23,	0 };
+	static const int rnd = (1 << (bits - 1)) / 45; // Minor correction to round to correction values better (add 0.5)
+	const int idx = ((angle + rnd) >> (bits - 4)) & 0x1F;
+	angle += correct[idx];
+
+	// Scale for output in degrees
+	static const int half = (1 << (bits - 1));
+	angle = ((angle * 45)+half) >> bits; // Add on half before rounding
+	if (y < 0) { angle = -angle; }
+	return angle;
+}
+
+// 16 bit value represents a full cycle in but can handle multiples of this. 
+// Output in range +/- 16 bit value representing +/- 1.0
+// 4th order cosine approximation has very small error
+// >200 times faster tan float sin on Cortex M0
+// see https://www.coranac.com/2009/07/sines/
+int32_t int16_sin_s4(int32_t x)
+{
+	static const int qN = 14, qA = 16, qR=12, B = 19900, C = 3516;
+
+	const int32_t c = x << (30 - qN); // Semi-circle info into carry.
+	x -= 1 << qN;                     // sine -> cosine calc
+
+	x = x << (31 - qN);               // Mask with PI
+	x = x >> (31 - qN);               // Note: SIGNED shift! (to qN)
+	x = x*x >> (2 * qN - 14);         // x=x^2 To Q14
+
+	int32_t y = B - (x*C >> 14);      // B - x^2*C
+	y = (1 << qA) - (x*y >> qR);      // A - x^2*(B-x^2*C)
+
+	return c >= 0 ? y : -y;
+}
+
 /* GCD implementation derived from recursive implementation at
  * http://en.wikipedia.org/wiki/Binary_GCD_algorithm
  */
diff --git a/firmware/common/utility.hpp b/firmware/common/utility.hpp
index 2cd3fcc6..f60097bd 100644
--- a/firmware/common/utility.hpp
+++ b/firmware/common/utility.hpp
@@ -90,6 +90,10 @@ inline float magnitude_squared(const std::complex<float> c) {
 	return r2 + i2;
 }
 
+int fast_int_magnitude(int y, int x);
+int int_atan2(int y, int x);
+int32_t int16_sin_s4(int32_t x);
+
 template<class T>
 struct range_t {
 	const T minimum;