Merge pull request #2078

1dd52415 Fix #1991 asm multiply again (Howard Chu)
This commit is contained in:
Riccardo Spagni 2017-06-18 17:32:03 +02:00
commit f973a2f81a
No known key found for this signature in database
GPG Key ID: 55432DF31CCD4FCD

View File

@ -979,34 +979,31 @@ STATIC void cn_mul128(const uint64_t *a, const uint64_t *b, uint64_t *r)
r[1] = lo; r[1] = lo;
} }
#else /* ARM32 */ #else /* ARM32 */
/* Can work as inline, but actually runs slower. Keep it separate */
#define mul(a, b, c) cn_mul128((const uint32_t *)a, (const uint32_t *)b, (uint32_t *)c) #define mul(a, b, c) cn_mul128((const uint32_t *)a, (const uint32_t *)b, (uint32_t *)c)
void cn_mul128(const uint32_t *aa, const uint32_t *bb, uint32_t *r) STATIC void cn_mul128(const uint32_t *aa, const uint32_t *bb, uint32_t *r)
{ {
uint32_t t0, t1; uint32_t t0, t1, t2=0, t3=0;
__asm__ __volatile__( __asm__ __volatile__(
"umull %[t0], %[t1], %[a], %[b]\n\t" "umull %[t0], %[t1], %[a], %[b]\n\t"
"str %[t0], [%[r], #8]\n\t" "str %[t0], %[ll]\n\t"
// accumulating with 0 can never overflow/carry // accumulating with 0 can never overflow/carry
"mov %[t0], #0\n\t" "eor %[t0], %[t0]\n\t"
"umlal %[t1], %[t0], %[a], %[B]\n\t" "umlal %[t1], %[t0], %[a], %[B]\n\t"
"mov %[a], #0\n\t" "umlal %[t1], %[t2], %[A], %[b]\n\t"
"umlal %[t1], %[a], %[A], %[b]\n\t" "str %[t1], %[lh]\n\t"
"str %[t1], [%[r], #12]\n\t"
"mov %[b], #0\n\t" "umlal %[t0], %[t3], %[A], %[B]\n\t"
"umlal %[t0], %[b], %[A], %[B]\n\t"
// final add may have a carry // final add may have a carry
"adds %[t0], %[t0], %[a]\n\t" "adds %[t0], %[t0], %[t2]\n\t"
"adc %[t1], %[b], #0\n\t" "adc %[t1], %[t3], #0\n\t"
"str %[t0], [%[r]]\n\t" "str %[t0], %[hl]\n\t"
"str %[t1], [%[r], #4]\n\t" "str %[t1], %[hh]\n\t"
: [t0]"=&r"(t0), [t1]"=&r"(t1), "=m"(r[0]), "=m"(r[1]), "=m"(r[2]), "=m"(r[3]) : [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"+r"(t2), [t3]"+r"(t3), [hl]"=m"(r[0]), [hh]"=m"(r[1]), [ll]"=m"(r[2]), [lh]"=m"(r[3])
: [A]"r"(aa[1]), [a]"r"(aa[0]), [B]"r"(bb[1]), [b]"r"(bb[0]), [r]"r"(r) : [A]"r"(aa[1]), [a]"r"(aa[0]), [B]"r"(bb[1]), [b]"r"(bb[0])
: "cc"); : "cc");
} }
#endif /* !aarch64 */ #endif /* !aarch64 */