diff options
Diffstat (limited to 'tools/4/source/src/pkg2zip_aes_x86.c')
-rw-r--r-- | tools/4/source/src/pkg2zip_aes_x86.c | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/tools/4/source/src/pkg2zip_aes_x86.c b/tools/4/source/src/pkg2zip_aes_x86.c new file mode 100644 index 0000000..de3a05a --- /dev/null +++ b/tools/4/source/src/pkg2zip_aes_x86.c @@ -0,0 +1,206 @@ +#include "pkg2zip_aes.h" + +#include <string.h> +#include <wmmintrin.h> // AESNI +#include <tmmintrin.h> // SSSE3 +#include "pkg2zip_aes_x86.h" + +#define AES128_INIT(ctx, x, rcon) \ +{ \ + __m128i a, b; \ + _mm_store_si128(ctx, x); \ + a = _mm_aeskeygenassist_si128(x, rcon); \ + a = _mm_shuffle_epi32(a, 0xff); \ + b = _mm_slli_si128(x, 4); \ + x = _mm_xor_si128(x, b); \ + b = _mm_slli_si128(b, 4); \ + x = _mm_xor_si128(x, b); \ + b = _mm_slli_si128(b, 4); \ + x = _mm_xor_si128(x, b); \ + x = _mm_xor_si128(x, a); \ +} + +/*void region_xor_sse(unsigned char* dst, unsigned char* src, int block_size) +{ + __m128i* dst_ptr = (__m128i*)dst; + + __m128i xmm1 = _mm_load_si128((__m128i*)src); + __m128i xmm2 = _mm_load_si128(dst_ptr); + + xmm2 = _mm_xor_si128(xmm1, xmm2); + _mm_store_si128(dst_ptr, xmm2); +}*/ + +void region_xor_sse(unsigned char* dst, unsigned char* src, int len) +{ + unsigned char * restrict p1 = __builtin_assume_aligned(src, 16); + unsigned char * restrict p2 = __builtin_assume_aligned(dst, 16); + + unsigned int i; + for (i = 0; i < len; ++i) + p2[i] = p1[i] ^ p2[i]; +} + +void xor1_sse(unsigned char *dest, unsigned char *src1, unsigned char *src2, int size) +{ + int i; + for(i = 0; i < size; i++) + { + dest[i] = src1[i] ^ src2[i]; + } +} + +void aes128_init_x86(aes128_key* ctx, const uint8_t* key) +{ + __m128i* ekey = (__m128i*)ctx->key; + + __m128i x = _mm_loadu_si128((const __m128i*)key); + AES128_INIT(ekey + 0, x, 0x01); + AES128_INIT(ekey + 1, x, 0x02); + AES128_INIT(ekey + 2, x, 0x04); + AES128_INIT(ekey + 3, x, 0x08); + AES128_INIT(ekey + 4, x, 0x10); + AES128_INIT(ekey + 5, x, 0x20); + AES128_INIT(ekey + 6, x, 0x40); + AES128_INIT(ekey + 7, x, 0x80); + AES128_INIT(ekey + 8, x, 0x1b); + AES128_INIT(ekey + 9, x, 0x36); + _mm_store_si128(ekey + 10, x); +} + +void aes128_init_dec_x86(aes128_key* ctx, const uint8_t* key) +{ + aes128_key enc; + aes128_init_x86(&enc, key); + + const __m128i* ekey = (__m128i*)&enc.key; + __m128i* dkey = (__m128i*)&ctx->key; + + _mm_store_si128(dkey + 10, _mm_load_si128(ekey + 0)); + for (size_t i = 1; i < 10; i++) + { + _mm_store_si128(dkey + 10 - i, _mm_aesimc_si128(_mm_load_si128(ekey + i))); + } + _mm_store_si128(dkey + 0, _mm_load_si128(ekey + 10)); +} + +static __m128i aes128_encrypt_x86(__m128i input, const __m128i* key) +{ + __m128i tmp = _mm_xor_si128(input, _mm_load_si128(key + 0)); + tmp = _mm_aesenc_si128(tmp, _mm_load_si128(key + 1)); + tmp = _mm_aesenc_si128(tmp, _mm_load_si128(key + 2)); + tmp = _mm_aesenc_si128(tmp, _mm_load_si128(key + 3)); + tmp = _mm_aesenc_si128(tmp, _mm_load_si128(key + 4)); + tmp = _mm_aesenc_si128(tmp, _mm_load_si128(key + 5)); + tmp = _mm_aesenc_si128(tmp, _mm_load_si128(key + 6)); + tmp = _mm_aesenc_si128(tmp, _mm_load_si128(key + 7)); + tmp = _mm_aesenc_si128(tmp, _mm_load_si128(key + 8)); + tmp = _mm_aesenc_si128(tmp, _mm_load_si128(key + 9)); + return _mm_aesenclast_si128(tmp, _mm_load_si128(key + 10)); +} + +static __m128i aes128_decrypt_x86(__m128i input, const __m128i* key) +{ + __m128i tmp = _mm_xor_si128(input, _mm_load_si128(key + 0)); + tmp = _mm_aesdec_si128(tmp, _mm_load_si128(key + 1)); + tmp = _mm_aesdec_si128(tmp, _mm_load_si128(key + 2)); + tmp = _mm_aesdec_si128(tmp, _mm_load_si128(key + 3)); + tmp = _mm_aesdec_si128(tmp, _mm_load_si128(key + 4)); + tmp = _mm_aesdec_si128(tmp, _mm_load_si128(key + 5)); + tmp = _mm_aesdec_si128(tmp, _mm_load_si128(key + 6)); + tmp = _mm_aesdec_si128(tmp, _mm_load_si128(key + 7)); + tmp = _mm_aesdec_si128(tmp, _mm_load_si128(key + 8)); + tmp = _mm_aesdec_si128(tmp, _mm_load_si128(key + 9)); + return _mm_aesdeclast_si128(tmp, _mm_load_si128(key + 10)); +} + +void aes128_ecb_encrypt_x86(const aes128_key* ctx, const uint8_t* input, uint8_t* output) +{ + const __m128i* key = (__m128i*)ctx->key; + __m128i tmp = aes128_encrypt_x86(_mm_loadu_si128((const __m128i*)input), key); + _mm_storeu_si128((__m128i*)output, tmp); +} + +void aes128_ecb_decrypt_x86(const aes128_key* ctx, const uint8_t* input, uint8_t* output) +{ + const __m128i* key = (__m128i*)ctx->key; + __m128i tmp = aes128_decrypt_x86(_mm_loadu_si128((const __m128i*)input), key); + _mm_storeu_si128((__m128i*)output, tmp); +} + +static __m128i ctr_increment(__m128i counter) +{ + __m128i swap = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + __m128i tmp = _mm_shuffle_epi8(counter, swap); + tmp = _mm_add_epi64(tmp, _mm_set_epi32(0, 0, 0, 1)); + return _mm_shuffle_epi8(tmp, swap); +} + +void aes128_ctr_xor_x86(const aes128_key* ctx, const uint8_t* iv, uint8_t* buffer, size_t size) +{ + const __m128i* key = (__m128i*)ctx->key; + __m128i counter = _mm_loadu_si128((const __m128i*)iv); + + while (size >= 16) + { + __m128i block = aes128_encrypt_x86(counter, key); + __m128i tmp = _mm_xor_si128(_mm_loadu_si128((const __m128i*)buffer), block); + _mm_storeu_si128((__m128i*)buffer, tmp); + + counter = ctr_increment(counter); + + buffer += 16; + size -= 16; + } + + if (size != 0) + { + uint8_t full[16]; + memcpy(full, buffer, size); + memset(full + size, 0, 16 - size); + + __m128i block = aes128_encrypt_x86(counter, key); + __m128i tmp = _mm_xor_si128(_mm_loadu_si128((const __m128i*)full), block); + _mm_storeu_si128((__m128i*)full, tmp); + + memcpy(buffer, full, size); + } +} + +void aes128_cmac_process_x86(const aes128_key* ctx, uint8_t* block, const uint8_t* buffer, uint32_t size) +{ + const __m128i* key = (__m128i*)ctx->key; + __m128i* data = (__m128i*)buffer; + + __m128i tmp = _mm_loadu_si128((__m128i*)block); + for (uint32_t i = 0; i < size; i += 16) + { + __m128i input = _mm_loadu_si128(data++); + tmp = _mm_xor_si128(tmp, input); + tmp = aes128_encrypt_x86(tmp, key); + } + _mm_storeu_si128((__m128i*)block, tmp); +} + +void aes128_psp_decrypt_x86(const aes128_key* ctx, const uint8_t* prev, const uint8_t* block, uint8_t* buffer, uint32_t size) +{ + const __m128i* key = (__m128i*)ctx->key; + __m128i one = _mm_setr_epi32(0, 0, 0, 1); + + __m128i x = _mm_load_si128((__m128i*)prev); + __m128i y = _mm_load_si128((__m128i*)block); + + __m128i* data = (__m128i*)buffer; + + for (uint32_t i = 0; i < size; i += 16) + { + y = _mm_add_epi32(y, one); + + __m128i out = aes128_decrypt_x86(y, key); + + out = _mm_xor_si128(out, _mm_loadu_si128(data)); + out = _mm_xor_si128(out, x); + _mm_storeu_si128(data++, out); + x = y; + } +} |