Merge pull request #1 from monero-project/master

Attempting an online merge in my own repo
This commit is contained in:
Neozaru 2014-05-21 23:51:45 +02:00
commit 7c3783f5b1
7 changed files with 445 additions and 156 deletions

View File

@ -59,8 +59,8 @@ else()
else() else()
set(STATIC_ASSERT_FLAG "-Dstatic_assert=_Static_assert") set(STATIC_ASSERT_FLAG "-Dstatic_assert=_Static_assert")
endif() endif()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11 -D_GNU_SOURCE ${MINGW_FLAG} ${STATIC_ASSERT_FLAG} ${WARNINGS} ${C_WARNINGS} ${ARCH_FLAG}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11 -D_GNU_SOURCE ${MINGW_FLAG} ${STATIC_ASSERT_FLAG} ${WARNINGS} ${C_WARNINGS} ${ARCH_FLAG} -maes")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -D_GNU_SOURCE ${MINGW_FLAG} ${WARNINGS} ${CXX_WARNINGS} ${ARCH_FLAG}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -D_GNU_SOURCE ${MINGW_FLAG} ${WARNINGS} ${CXX_WARNINGS} ${ARCH_FLAG} -maes")
if(APPLE) if(APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_TR1_TUPLE=0") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DGTEST_HAS_TR1_TUPLE=0")
endif() endif()

177
src/crypto/aesb.c Normal file
View File

@ -0,0 +1,177 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved.
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation.
This software is provided 'as is' with no explicit or implied warranties
in respect of its operation, including, but not limited to, correctness
and fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 20/12/2007
*/
#include <stdint.h>
#if defined(__cplusplus)
extern "C"
{
#endif
#define TABLE_ALIGN 32
#define WPOLY 0x011b
#define N_COLS 4
#define AES_BLOCK_SIZE 16
#define RC_LENGTH (5 * (AES_BLOCK_SIZE / 4 - 2))
#if defined(_MSC_VER)
#define ALIGN __declspec(align(TABLE_ALIGN))
#elif defined(__GNUC__)
#define ALIGN __attribute__ ((aligned(16)))
#else
#define ALIGN
#endif
#define rf1(r,c) (r)
#define word_in(x,c) (*((uint32_t*)(x)+(c)))
#define word_out(x,c,v) (*((uint32_t*)(x)+(c)) = (v))
#define s(x,c) x[c]
#define si(y,x,c) (s(y,c) = word_in(x, c))
#define so(y,x,c) word_out(y, c, s(x,c))
#define state_in(y,x) si(y,x,0); si(y,x,1); si(y,x,2); si(y,x,3)
#define state_out(y,x) so(y,x,0); so(y,x,1); so(y,x,2); so(y,x,3)
#define round(rm,y,x,k) rm(y,x,k,0); rm(y,x,k,1); rm(y,x,k,2); rm(y,x,k,3)
#define to_byte(x) ((x) & 0xff)
#define bval(x,n) to_byte((x) >> (8 * (n)))
#define fwd_var(x,r,c)\
( r == 0 ? ( c == 0 ? s(x,0) : c == 1 ? s(x,1) : c == 2 ? s(x,2) : s(x,3))\
: r == 1 ? ( c == 0 ? s(x,1) : c == 1 ? s(x,2) : c == 2 ? s(x,3) : s(x,0))\
: r == 2 ? ( c == 0 ? s(x,2) : c == 1 ? s(x,3) : c == 2 ? s(x,0) : s(x,1))\
: ( c == 0 ? s(x,3) : c == 1 ? s(x,0) : c == 2 ? s(x,1) : s(x,2)))
#define fwd_rnd(y,x,k,c) (s(y,c) = (k)[c] ^ four_tables(x,t_use(f,n),fwd_var,rf1,c))
#define sb_data(w) {\
w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5),\
w(0x30), w(0x01), w(0x67), w(0x2b), w(0xfe), w(0xd7), w(0xab), w(0x76),\
w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0),\
w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0),\
w(0xb7), w(0xfd), w(0x93), w(0x26), w(0x36), w(0x3f), w(0xf7), w(0xcc),\
w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15),\
w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a),\
w(0x07), w(0x12), w(0x80), w(0xe2), w(0xeb), w(0x27), w(0xb2), w(0x75),\
w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0),\
w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84),\
w(0x53), w(0xd1), w(0x00), w(0xed), w(0x20), w(0xfc), w(0xb1), w(0x5b),\
w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf),\
w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85),\
w(0x45), w(0xf9), w(0x02), w(0x7f), w(0x50), w(0x3c), w(0x9f), w(0xa8),\
w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5),\
w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2),\
w(0xcd), w(0x0c), w(0x13), w(0xec), w(0x5f), w(0x97), w(0x44), w(0x17),\
w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73),\
w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88),\
w(0x46), w(0xee), w(0xb8), w(0x14), w(0xde), w(0x5e), w(0x0b), w(0xdb),\
w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c),\
w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79),\
w(0xe7), w(0xc8), w(0x37), w(0x6d), w(0x8d), w(0xd5), w(0x4e), w(0xa9),\
w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08),\
w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6),\
w(0xe8), w(0xdd), w(0x74), w(0x1f), w(0x4b), w(0xbd), w(0x8b), w(0x8a),\
w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e),\
w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e),\
w(0xe1), w(0xf8), w(0x98), w(0x11), w(0x69), w(0xd9), w(0x8e), w(0x94),\
w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf),\
w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68),\
w(0x41), w(0x99), w(0x2d), w(0x0f), w(0xb0), w(0x54), w(0xbb), w(0x16) }
#define rc_data(w) {\
w(0x01), w(0x02), w(0x04), w(0x08), w(0x10),w(0x20), w(0x40), w(0x80),\
w(0x1b), w(0x36) }
#define bytes2word(b0, b1, b2, b3) (((uint32_t)(b3) << 24) | \
((uint32_t)(b2) << 16) | ((uint32_t)(b1) << 8) | (b0))
#define h0(x) (x)
#define w0(p) bytes2word(p, 0, 0, 0)
#define w1(p) bytes2word(0, p, 0, 0)
#define w2(p) bytes2word(0, 0, p, 0)
#define w3(p) bytes2word(0, 0, 0, p)
#define u0(p) bytes2word(f2(p), p, p, f3(p))
#define u1(p) bytes2word(f3(p), f2(p), p, p)
#define u2(p) bytes2word(p, f3(p), f2(p), p)
#define u3(p) bytes2word(p, p, f3(p), f2(p))
#define v0(p) bytes2word(fe(p), f9(p), fd(p), fb(p))
#define v1(p) bytes2word(fb(p), fe(p), f9(p), fd(p))
#define v2(p) bytes2word(fd(p), fb(p), fe(p), f9(p))
#define v3(p) bytes2word(f9(p), fd(p), fb(p), fe(p))
#define f2(x) ((x<<1) ^ (((x>>7) & 1) * WPOLY))
#define f4(x) ((x<<2) ^ (((x>>6) & 1) * WPOLY) ^ (((x>>6) & 2) * WPOLY))
#define f8(x) ((x<<3) ^ (((x>>5) & 1) * WPOLY) ^ (((x>>5) & 2) * WPOLY) ^ (((x>>5) & 4) * WPOLY))
#define f3(x) (f2(x) ^ x)
#define f9(x) (f8(x) ^ x)
#define fb(x) (f8(x) ^ f2(x) ^ x)
#define fd(x) (f8(x) ^ f4(x) ^ x)
#define fe(x) (f8(x) ^ f4(x) ^ f2(x))
#define t_dec(m,n) t_##m##n
#define t_set(m,n) t_##m##n
#define t_use(m,n) t_##m##n
#define d_4(t,n,b,e,f,g,h) ALIGN const t n[4][256] = { b(e), b(f), b(g), b(h) }
#define four_tables(x,tab,vf,rf,c) \
(tab[0][bval(vf(x,0,c),rf(0,c))] \
^ tab[1][bval(vf(x,1,c),rf(1,c))] \
^ tab[2][bval(vf(x,2,c),rf(2,c))] \
^ tab[3][bval(vf(x,3,c),rf(3,c))])
d_4(uint32_t, t_dec(f,n), sb_data, u0, u1, u2, u3);
void aesb_single_round(const uint8_t *in, uint8_t *out, uint8_t *expandedKey)
{
uint32_t b0[4], b1[4];
const uint32_t *kp = (uint32_t *) expandedKey;
state_in(b0, in);
round(fwd_rnd, b1, b0, kp);
state_out(out, b1);
}
void aesb_pseudo_round(const uint8_t *in, uint8_t *out, uint8_t *expandedKey)
{
uint32_t b0[4], b1[4];
const uint32_t *kp = (uint32_t *) expandedKey;
state_in(b0, in);
round(fwd_rnd, b1, b0, kp);
round(fwd_rnd, b0, b1, kp + 1 * N_COLS);
round(fwd_rnd, b1, b0, kp + 2 * N_COLS);
round(fwd_rnd, b0, b1, kp + 3 * N_COLS);
round(fwd_rnd, b1, b0, kp + 4 * N_COLS);
round(fwd_rnd, b0, b1, kp + 5 * N_COLS);
round(fwd_rnd, b1, b0, kp + 6 * N_COLS);
round(fwd_rnd, b0, b1, kp + 7 * N_COLS);
round(fwd_rnd, b1, b0, kp + 8 * N_COLS);
round(fwd_rnd, b0, b1, kp + 9 * N_COLS);
state_out(out, b0);
}
#if defined(__cplusplus)
}
#endif

View File

@ -39,9 +39,9 @@ extern "C" {
//#define OAES_HAVE_ISAAC 1 //#define OAES_HAVE_ISAAC 1
//#endif // OAES_HAVE_ISAAC //#endif // OAES_HAVE_ISAAC
#ifndef OAES_DEBUG //#ifndef OAES_DEBUG
#define OAES_DEBUG 0 //#define OAES_DEBUG 0
#endif // OAES_DEBUG //#endif // OAES_DEBUG
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -64,30 +64,6 @@ static const char _NR[] = {
# define min(a,b) (((a)<(b)) ? (a) : (b)) # define min(a,b) (((a)<(b)) ? (a) : (b))
#endif /* min */ #endif /* min */
typedef struct _oaes_key
{
size_t data_len;
uint8_t *data;
size_t exp_data_len;
uint8_t *exp_data;
size_t num_keys;
size_t key_base;
} oaes_key;
typedef struct _oaes_ctx
{
#ifdef OAES_HAVE_ISAAC
randctx * rctx;
#endif // OAES_HAVE_ISAAC
#ifdef OAES_DEBUG
oaes_step_cb step_cb;
#endif // OAES_DEBUG
oaes_key * key;
OAES_OPTION options;
uint8_t iv[OAES_BLOCK_SIZE];
} oaes_ctx;
// "OAES<8-bit header version><8-bit type><16-bit options><8-bit flags><56-bit reserved>" // "OAES<8-bit header version><8-bit type><16-bit options><8-bit flags><56-bit reserved>"
static uint8_t oaes_header[OAES_BLOCK_SIZE] = { static uint8_t oaes_header[OAES_BLOCK_SIZE] = {

View File

@ -101,6 +101,32 @@ typedef int ( * oaes_step_cb ) (
typedef uint16_t OAES_OPTION; typedef uint16_t OAES_OPTION;
typedef struct _oaes_key
{
size_t data_len;
uint8_t *data;
size_t exp_data_len;
uint8_t *exp_data;
size_t num_keys;
size_t key_base;
} oaes_key;
typedef struct _oaes_ctx
{
#ifdef OAES_HAVE_ISAAC
randctx * rctx;
#endif // OAES_HAVE_ISAAC
#ifdef OAES_DEBUG
oaes_step_cb step_cb;
#endif // OAES_DEBUG
oaes_key * key;
OAES_OPTION options;
uint8_t iv[OAES_BLOCK_SIZE];
} oaes_ctx;
/* /*
* // usage: * // usage:
* *

View File

@ -11,141 +11,251 @@
#include "hash-ops.h" #include "hash-ops.h"
#include "oaes_lib.h" #include "oaes_lib.h"
static void (*const extra_hashes[4])(const void *, size_t, char *) = { #include <emmintrin.h>
hash_extra_blake, hash_extra_groestl, hash_extra_jh, hash_extra_skein
};
#define MEMORY (1 << 21) /* 2 MiB */ #if defined(_MSC_VER) || defined(__INTEL_COMPILER)
#include <intrin.h>
#define STATIC
#define INLINE __inline
#if !defined(RDATA_ALIGN16)
#define RDATA_ALIGN16 __declspec(align(16))
#endif
#else
#include <wmmintrin.h>
#define STATIC static
#define INLINE inline
#if !defined(RDATA_ALIGN16)
#define RDATA_ALIGN16 __attribute__ ((aligned(16)))
#endif
#endif
#define MEMORY (1 << 21) // 2MB scratchpad
#define ITER (1 << 20) #define ITER (1 << 20)
#define AES_BLOCK_SIZE 16 #define AES_BLOCK_SIZE 16
#define AES_KEY_SIZE 32 /*16*/ #define AES_KEY_SIZE 32
#define INIT_SIZE_BLK 8 #define INIT_SIZE_BLK 8
#define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE) #define INIT_SIZE_BYTE (INIT_SIZE_BLK * AES_BLOCK_SIZE)
static size_t e2i(const uint8_t* a, size_t count) { return (*((uint64_t*)a) / AES_BLOCK_SIZE) & (count - 1); } #define U64(x) ((uint64_t *) (x))
#define R128(x) ((__m128i *) (x))
static void mul(const uint8_t* a, const uint8_t* b, uint8_t* res) { extern int aesb_single_round(const uint8_t *in, uint8_t*out, const uint8_t *expandedKey);
uint64_t a0, b0; extern int aesb_pseudo_round(const uint8_t *in, uint8_t *out, const uint8_t *expandedKey);
uint64_t hi, lo;
a0 = SWAP64LE(((uint64_t*)a)[0]);
b0 = SWAP64LE(((uint64_t*)b)[0]);
lo = mul128(a0, b0, &hi);
((uint64_t*)res)[0] = SWAP64LE(hi);
((uint64_t*)res)[1] = SWAP64LE(lo);
}
static void sum_half_blocks(uint8_t* a, const uint8_t* b) {
uint64_t a0, a1, b0, b1;
a0 = SWAP64LE(((uint64_t*)a)[0]);
a1 = SWAP64LE(((uint64_t*)a)[1]);
b0 = SWAP64LE(((uint64_t*)b)[0]);
b1 = SWAP64LE(((uint64_t*)b)[1]);
a0 += b0;
a1 += b1;
((uint64_t*)a)[0] = SWAP64LE(a0);
((uint64_t*)a)[1] = SWAP64LE(a1);
}
static void copy_block(uint8_t* dst, const uint8_t* src) {
memcpy(dst, src, AES_BLOCK_SIZE);
}
static void swap_blocks(uint8_t* a, uint8_t* b) {
size_t i;
uint8_t t;
for (i = 0; i < AES_BLOCK_SIZE; i++) {
t = a[i];
a[i] = b[i];
b[i] = t;
}
}
static void xor_blocks(uint8_t* a, const uint8_t* b) {
size_t i;
for (i = 0; i < AES_BLOCK_SIZE; i++) {
a[i] ^= b[i];
}
}
#pragma pack(push, 1) #pragma pack(push, 1)
union cn_slow_hash_state { union cn_slow_hash_state
union hash_state hs; {
struct { union hash_state hs;
uint8_t k[64]; struct
uint8_t init[INIT_SIZE_BYTE]; {
}; uint8_t k[64];
uint8_t init[INIT_SIZE_BYTE];
};
}; };
#pragma pack(pop) #pragma pack(pop)
void cn_slow_hash(const void *data, size_t length, char *hash) { #if defined(_MSC_VER) || defined(__INTEL_COMPILER)
uint8_t long_state[MEMORY]; #define cpuid(info,x) __cpuidex(info,x,0)
union cn_slow_hash_state state; #else
uint8_t text[INIT_SIZE_BYTE]; void cpuid(int CPUInfo[4], int InfoType)
uint8_t a[AES_BLOCK_SIZE]; {
uint8_t b[AES_BLOCK_SIZE]; __asm__ __volatile__
uint8_t c[AES_BLOCK_SIZE]; (
uint8_t d[AES_BLOCK_SIZE]; "cpuid":
size_t i, j; "=a" (CPUInfo[0]),
uint8_t aes_key[AES_KEY_SIZE]; "=b" (CPUInfo[1]),
OAES_CTX* aes_ctx; "=c" (CPUInfo[2]),
"=d" (CPUInfo[3]) :
hash_process(&state.hs, data, length); "a" (InfoType), "c" (0)
memcpy(text, state.init, INIT_SIZE_BYTE); );
memcpy(aes_key, state.hs.b, AES_KEY_SIZE); }
aes_ctx = oaes_alloc(); #endif
oaes_key_import_data(aes_ctx, aes_key, AES_KEY_SIZE); STATIC INLINE void mul(const uint8_t *a, const uint8_t *b, uint8_t *res)
for (i = 0; i < MEMORY / INIT_SIZE_BYTE; i++) { {
for (j = 0; j < INIT_SIZE_BLK; j++) { uint64_t a0, b0;
oaes_pseudo_encrypt_ecb(aes_ctx, &text[AES_BLOCK_SIZE * j]); uint64_t hi, lo;
}
memcpy(&long_state[i * INIT_SIZE_BYTE], text, INIT_SIZE_BYTE); a0 = U64(a)[0];
} b0 = U64(b)[0];
lo = mul128(a0, b0, &hi);
for (i = 0; i < 16; i++) { U64(res)[0] = hi;
a[i] = state.k[ i] ^ state.k[32 + i]; U64(res)[1] = lo;
b[i] = state.k[16 + i] ^ state.k[48 + i]; }
}
STATIC INLINE void sum_half_blocks(uint8_t *a, const uint8_t *b)
for (i = 0; i < ITER / 2; i++) { {
/* Dependency chain: address -> read value ------+ uint64_t a0, a1, b0, b1;
* written value <-+ hard function (AES or MUL) <+ a0 = U64(a)[0];
* next address <-+ a1 = U64(a)[1];
*/ b0 = U64(b)[0];
/* Iteration 1 */ b1 = U64(b)[1];
j = e2i(a, MEMORY / AES_BLOCK_SIZE); a0 += b0;
copy_block(c, &long_state[j * AES_BLOCK_SIZE]); a1 += b1;
oaes_encryption_round(a, c); U64(a)[0] = a0;
xor_blocks(b, c); U64(a)[1] = a1;
swap_blocks(b, c); }
copy_block(&long_state[j * AES_BLOCK_SIZE], c);
assert(j == e2i(a, MEMORY / AES_BLOCK_SIZE)); STATIC INLINE void swap_blocks(uint8_t *a, uint8_t *b)
swap_blocks(a, b); {
/* Iteration 2 */ uint64_t t[2];
j = e2i(a, MEMORY / AES_BLOCK_SIZE); U64(t)[0] = U64(a)[0];
copy_block(c, &long_state[j * AES_BLOCK_SIZE]); U64(t)[1] = U64(a)[1];
mul(a, c, d); U64(a)[0] = U64(b)[0];
sum_half_blocks(b, d); U64(a)[1] = U64(b)[1];
swap_blocks(b, c); U64(b)[0] = U64(t)[0];
xor_blocks(b, c); U64(b)[1] = U64(t)[1];
copy_block(&long_state[j * AES_BLOCK_SIZE], c); }
assert(j == e2i(a, MEMORY / AES_BLOCK_SIZE));
swap_blocks(a, b); STATIC INLINE void xor_blocks(uint8_t *a, const uint8_t *b)
} {
U64(a)[0] ^= U64(b)[0];
memcpy(text, state.init, INIT_SIZE_BYTE); U64(a)[1] ^= U64(b)[1];
oaes_key_import_data(aes_ctx, &state.hs.b[32], AES_KEY_SIZE); }
for (i = 0; i < MEMORY / INIT_SIZE_BYTE; i++) {
for (j = 0; j < INIT_SIZE_BLK; j++) { STATIC INLINE int check_aes_hw(void)
xor_blocks(&text[j * AES_BLOCK_SIZE], &long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]); {
oaes_pseudo_encrypt_ecb(aes_ctx, &text[j * AES_BLOCK_SIZE]); int cpuid_results[4];
} static int supported = -1;
}
memcpy(state.init, text, INIT_SIZE_BYTE); if(supported >= 0)
hash_permutation(&state.hs); return supported;
/*memcpy(hash, &state, 32);*/
extra_hashes[state.hs.b[0] & 3](&state, 200, hash); cpuid(cpuid_results,1);
oaes_free(&aes_ctx); return supported = cpuid_results[2] & (1 << 25);
}
STATIC INLINE void aesni_pseudo_round(const uint8_t *in, uint8_t *out,
const uint8_t *expandedKey)
{
__m128i *k = R128(expandedKey);
__m128i d;
d = _mm_loadu_si128(R128(in));
d = _mm_aesenc_si128(d, *R128(&k[0]));
d = _mm_aesenc_si128(d, *R128(&k[1]));
d = _mm_aesenc_si128(d, *R128(&k[2]));
d = _mm_aesenc_si128(d, *R128(&k[3]));
d = _mm_aesenc_si128(d, *R128(&k[4]));
d = _mm_aesenc_si128(d, *R128(&k[5]));
d = _mm_aesenc_si128(d, *R128(&k[6]));
d = _mm_aesenc_si128(d, *R128(&k[7]));
d = _mm_aesenc_si128(d, *R128(&k[8]));
d = _mm_aesenc_si128(d, *R128(&k[9]));
_mm_storeu_si128((R128(out)), d);
}
void cn_slow_hash(const void *data, size_t length, char *hash)
{
uint8_t long_state[MEMORY];
uint8_t text[INIT_SIZE_BYTE];
uint8_t a[AES_BLOCK_SIZE];
uint8_t b[AES_BLOCK_SIZE];
uint8_t d[AES_BLOCK_SIZE];
uint8_t aes_key[AES_KEY_SIZE];
RDATA_ALIGN16 uint8_t expandedKey[256];
union cn_slow_hash_state state;
size_t i, j;
uint8_t *p = NULL;
oaes_ctx *aes_ctx;
int useAes = check_aes_hw();
static void (*const extra_hashes[4])(const void *, size_t, char *) =
{
hash_extra_blake, hash_extra_groestl, hash_extra_jh, hash_extra_skein
};
hash_process(&state.hs, data, length);
memcpy(text, state.init, INIT_SIZE_BYTE);
aes_ctx = (oaes_ctx *) oaes_alloc();
oaes_key_import_data(aes_ctx, state.hs.b, AES_KEY_SIZE);
// use aligned data
memcpy(expandedKey, aes_ctx->key->exp_data, aes_ctx->key->exp_data_len);
if(useAes)
{
for(i = 0; i < MEMORY / INIT_SIZE_BYTE; i++)
{
for(j = 0; j < INIT_SIZE_BLK; j++)
aesni_pseudo_round(&text[AES_BLOCK_SIZE * j], &text[AES_BLOCK_SIZE * j], expandedKey);
memcpy(&long_state[i * INIT_SIZE_BYTE], text, INIT_SIZE_BYTE);
}
}
else
{
for(i = 0; i < MEMORY / INIT_SIZE_BYTE; i++)
{
for(j = 0; j < INIT_SIZE_BLK; j++)
aesb_pseudo_round(&text[AES_BLOCK_SIZE * j], &text[AES_BLOCK_SIZE * j], expandedKey);
memcpy(&long_state[i * INIT_SIZE_BYTE], text, INIT_SIZE_BYTE);
}
}
U64(a)[0] = U64(&state.k[0])[0] ^ U64(&state.k[32])[0];
U64(a)[1] = U64(&state.k[0])[1] ^ U64(&state.k[32])[1];
U64(b)[0] = U64(&state.k[16])[0] ^ U64(&state.k[48])[0];
U64(b)[1] = U64(&state.k[16])[1] ^ U64(&state.k[48])[1];
for(i = 0; i < ITER / 2; i++)
{
#define TOTALBLOCKS (MEMORY / AES_BLOCK_SIZE)
#define state_index(x) (((*((uint64_t *)x) >> 4) & (TOTALBLOCKS - 1)) << 4)
// Iteration 1
p = &long_state[state_index(a)];
if(useAes)
_mm_storeu_si128(R128(p), _mm_aesenc_si128(_mm_loadu_si128(R128(p)), _mm_loadu_si128(R128(a))));
else
aesb_single_round(p, p, a);
xor_blocks(b, p);
swap_blocks(b, p);
swap_blocks(a, b);
// Iteration 2
p = &long_state[state_index(a)];
mul(a, p, d);
sum_half_blocks(b, d);
swap_blocks(b, p);
xor_blocks(b, p);
swap_blocks(a, b);
}
memcpy(text, state.init, INIT_SIZE_BYTE);
oaes_key_import_data(aes_ctx, &state.hs.b[32], AES_KEY_SIZE);
memcpy(expandedKey, aes_ctx->key->exp_data, aes_ctx->key->exp_data_len);
if(useAes)
{
for(i = 0; i < MEMORY / INIT_SIZE_BYTE; i++)
{
for(j = 0; j < INIT_SIZE_BLK; j++)
{
xor_blocks(&text[j * AES_BLOCK_SIZE], &long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]);
aesni_pseudo_round(&text[j * AES_BLOCK_SIZE], &text[j * AES_BLOCK_SIZE], expandedKey);
}
}
}
else
{
for(i = 0; i < MEMORY / INIT_SIZE_BYTE; i++)
{
for(j = 0; j < INIT_SIZE_BLK; j++)
{
xor_blocks(&text[j * AES_BLOCK_SIZE], &long_state[i * INIT_SIZE_BYTE + j * AES_BLOCK_SIZE]);
aesb_pseudo_round(&text[AES_BLOCK_SIZE * j], &text[AES_BLOCK_SIZE * j], expandedKey);
}
}
}
oaes_free((OAES_CTX **) &aes_ctx);
memcpy(state.init, text, INIT_SIZE_BYTE);
hash_permutation(&state.hs);
extra_hashes[state.hs.b[0] & 3](&state, 200, hash);
} }

View File

@ -1,4 +1,4 @@
#define BUILD_COMMIT_ID "@VERSION@" #define BUILD_COMMIT_ID "@VERSION@"
#define PROJECT_VERSION "0.8.6" #define PROJECT_VERSION "0.8.8.2"
#define PROJECT_VERSION_BUILD_NO "295" #define PROJECT_VERSION_BUILD_NO "1"
#define PROJECT_VERSION_LONG PROJECT_VERSION "." PROJECT_VERSION_BUILD_NO "(" BUILD_COMMIT_ID ")" #define PROJECT_VERSION_LONG PROJECT_VERSION "." PROJECT_VERSION_BUILD_NO "(" BUILD_COMMIT_ID ")"