From dec848f0721be8afd06a8c4c5e1dd97cb8476b2d Mon Sep 17 00:00:00 2001 From: yangfl Date: Wed, 27 Jun 2018 17:09:46 +0800 Subject: [PATCH] use builtin __AVX__ and __AES__ macros and reduce code duplication --- Makefile.homebrew | 2 +- Makefile.linux | 2 +- Makefile.mingw | 2 +- Makefile.osx | 2 +- build/CMakeLists.txt | 7 +- libi2pd/CPU.cpp | 14 +++- libi2pd/Crypto.cpp | 162 ++++++++++++++----------------------------- libi2pd/Crypto.h | 23 +++--- libi2pd/Identity.cpp | 39 ++++++----- 9 files changed, 104 insertions(+), 149 deletions(-) diff --git a/Makefile.homebrew b/Makefile.homebrew index a6e645ee..6830b051 100644 --- a/Makefile.homebrew +++ b/Makefile.homebrew @@ -34,7 +34,7 @@ endif # Seems like all recent Mac's have AES-NI, after firmware upgrade 2.2 # Found no good way to detect it from command line. TODO: Might be some osx sysinfo magic ifeq ($(USE_AESNI),yes) - CXXFLAGS += -maes -DAESNI + CXXFLAGS += -maes endif ifeq ($(USE_AVX),1) CXXFLAGS += -mavx diff --git a/Makefile.linux b/Makefile.linux index cf045eb4..e9609876 100644 --- a/Makefile.linux +++ b/Makefile.linux @@ -64,7 +64,7 @@ ifneq ($(shell $(GREP) -c aes /proc/cpuinfo),0) ifeq ($(machine), aarch64) CXXFLAGS += -DARM64AES else - CPU_FLAGS += -maes -DAESNI + CPU_FLAGS += -maes endif endif endif diff --git a/Makefile.mingw b/Makefile.mingw index b40d0ada..fe897ae0 100644 --- a/Makefile.mingw +++ b/Makefile.mingw @@ -37,7 +37,7 @@ endif # don't change following line to ifeq ($(USE_AESNI),yes) !!! ifeq ($(USE_AESNI),1) - CPU_FLAGS += -maes -DAESNI + CPU_FLAGS += -maes else CPU_FLAGS += -msse endif diff --git a/Makefile.osx b/Makefile.osx index 8bbf37f0..5752b2fe 100644 --- a/Makefile.osx +++ b/Makefile.osx @@ -21,7 +21,7 @@ ifeq ($(USE_UPNP),yes) endif ifeq ($(USE_AESNI),1) - CXXFLAGS += -maes -DAESNI + CXXFLAGS += -maes else CXXFLAGS += -msse endif diff --git a/build/CMakeLists.txt b/build/CMakeLists.txt index 632edc03..bed5ee10 100644 --- a/build/CMakeLists.txt +++ b/build/CMakeLists.txt @@ -39,7 +39,7 @@ include_directories(${LIBI2PD_CLIENT_SRC_DIR}) set (LIBI2PD_SRC "${LIBI2PD_SRC_DIR}/BloomFilter.cpp" "${LIBI2PD_SRC_DIR}/Config.cpp" - "${LIBI2PD_SRC_DIR}/CPU.cpp" + "${LIBI2PD_SRC_DIR}/CPU.cpp" "${LIBI2PD_SRC_DIR}/Crypto.cpp" "${LIBI2PD_SRC_DIR}/CryptoKey.cpp" "${LIBI2PD_SRC_DIR}/Garlic.cpp" @@ -77,10 +77,10 @@ set (LIBI2PD_SRC "${LIBI2PD_SRC_DIR}/api.cpp" "${LIBI2PD_SRC_DIR}/Event.cpp" "${LIBI2PD_SRC_DIR}/Gost.cpp" - "${LIBI2PD_SRC_DIR}/ChaCha20.cpp" + "${LIBI2PD_SRC_DIR}/ChaCha20.cpp" "${LIBI2PD_SRC_DIR}/Poly1305.cpp" "${LIBI2PD_SRC_DIR}/Ed25519.cpp" - "${LIBI2PD_SRC_DIR}/NTCP2.cpp" + "${LIBI2PD_SRC_DIR}/NTCP2.cpp" ) if (WITH_WEBSOCKETS) @@ -234,7 +234,6 @@ endif () if (WITH_AESNI) set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes" ) - add_definitions ( -DAESNI ) endif() if (WITH_AVX) diff --git a/libi2pd/CPU.cpp b/libi2pd/CPU.cpp index d2868a20..a707c3dc 100644 --- a/libi2pd/CPU.cpp +++ b/libi2pd/CPU.cpp @@ -21,23 +21,35 @@ namespace cpu void Detect() { +#if defined(__AES__) || defined(__AVX__) + #if defined(__x86_64__) || defined(__i386__) int info[4]; __cpuid(0, info[0], info[1], info[2], info[3]); if (info[0] >= 0x00000001) { __cpuid(0x00000001, info[0], info[1], info[2], info[3]); +#ifdef __AES__ aesni = info[2] & bit_AES; // AESNI +#endif // __AES__ +#ifdef __AVX__ avx = info[2] & bit_AVX; // AVX +#endif // __AVX__ } -#endif +#endif // defined(__x86_64__) || defined(__i386__) + +#ifdef __AES__ if(aesni) { LogPrint(eLogInfo, "AESNI enabled"); } +#endif // __AES__ +#ifdef __AVX__ if(avx) { LogPrint(eLogInfo, "AVX enabled"); } +#endif // __AVX__ +#endif // defined(__AES__) || defined(__AVX__) } } } diff --git a/libi2pd/Crypto.cpp b/libi2pd/Crypto.cpp index 6d859342..24ce9c72 100644 --- a/libi2pd/Crypto.cpp +++ b/libi2pd/Crypto.cpp @@ -399,7 +399,7 @@ namespace crypto bn2buf (x, encrypted + 1, len); bn2buf (y, encrypted + 1 + len, len); RAND_bytes (encrypted + 1 + 2*len, 256 - 2*len); - } + } else { bn2buf (x, encrypted, len); @@ -468,10 +468,10 @@ namespace crypto CBCDecryption decryption; decryption.SetKey (shared); decryption.SetIV (iv); - if (zeroPadding) + if (zeroPadding) decryption.Decrypt (encrypted + 258, 256, m); else - decryption.Decrypt (encrypted + 256, 256, m); + decryption.Decrypt (encrypted + 256, 256, m); // verify and copy uint8_t hash[32]; SHA256 (m + 33, 222, hash); @@ -522,9 +522,9 @@ namespace crypto { uint64_t buf[256]; uint64_t hash[12]; // 96 bytes +#ifdef __AVX__ if(i2p::cpu::avx) { -#ifdef AVX __asm__ ( "vmovups %[key], %%ymm0 \n" @@ -543,30 +543,9 @@ namespace crypto [buf]"r"(buf), [hash]"r"(hash) : "memory", "%xmm0" // TODO: change to %ymm0 later ); -#else - // ikeypad - buf[0] = key.GetLL ()[0] ^ IPAD; - buf[1] = key.GetLL ()[1] ^ IPAD; - buf[2] = key.GetLL ()[2] ^ IPAD; - buf[3] = key.GetLL ()[3] ^ IPAD; - buf[4] = IPAD; - buf[5] = IPAD; - buf[6] = IPAD; - buf[7] = IPAD; - // okeypad - hash[0] = key.GetLL ()[0] ^ OPAD; - hash[1] = key.GetLL ()[1] ^ OPAD; - hash[2] = key.GetLL ()[2] ^ OPAD; - hash[3] = key.GetLL ()[3] ^ OPAD; - hash[4] = OPAD; - hash[5] = OPAD; - hash[6] = OPAD; - hash[7] = OPAD; - // fill last 16 bytes with zeros (first hash size assumed 32 bytes in I2P) - memset (hash + 10, 0, 16); -#endif } else +#endif { // ikeypad buf[0] = key.GetLL ()[0] ^ IPAD; @@ -600,12 +579,12 @@ namespace crypto } // AES -#ifdef AESNI +#ifdef __AES__ #ifdef ARM64AES void init_aesenc(void){ // TODO: Implementation } - + #endif #define KeyExpansion256(round0,round1) \ @@ -632,7 +611,7 @@ namespace crypto "movaps %%xmm3, "#round1"(%[sched]) \n" #endif -#ifdef AESNI +#ifdef __AES__ void ECBCryptoAESNI::ExpandKey (const AESKey& key) { __asm__ @@ -669,11 +648,11 @@ namespace crypto : [key]"r"((const uint8_t *)key), [sched]"r"(GetKeySchedule ()) // input : "%xmm1", "%xmm2", "%xmm3", "%xmm4", "memory" // clogged ); - } + } #endif -#if AESNI +#ifdef __AES__ #define EncryptAES256(sched) \ "pxor (%["#sched"]), %%xmm0 \n" \ "aesenc 16(%["#sched"]), %%xmm0 \n" \ @@ -691,12 +670,12 @@ namespace crypto "aesenc 208(%["#sched"]), %%xmm0 \n" \ "aesenclast 224(%["#sched"]), %%xmm0 \n" #endif - + void ECBEncryption::Encrypt (const ChipherBlock * in, ChipherBlock * out) { +#ifdef __AES__ if(i2p::cpu::aesni) { -#ifdef AESNI __asm__ ( "movups (%[in]), %%xmm0 \n" @@ -704,17 +683,15 @@ namespace crypto "movups %%xmm0, (%[out]) \n" : : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory" ); -#else - AES_encrypt (in->buf, out->buf, &m_Key); -#endif } else +#endif { AES_encrypt (in->buf, out->buf, &m_Key); - } + } } -#ifdef AESNI +#ifdef __AES__ #define DecryptAES256(sched) \ "pxor 224(%["#sched"]), %%xmm0 \n" \ "aesdec 208(%["#sched"]), %%xmm0 \n" \ @@ -732,12 +709,12 @@ namespace crypto "aesdec 16(%["#sched"]), %%xmm0 \n" \ "aesdeclast (%["#sched"]), %%xmm0 \n" #endif - + void ECBDecryption::Decrypt (const ChipherBlock * in, ChipherBlock * out) { +#ifdef __AES__ if(i2p::cpu::aesni) { -#ifdef AESNI __asm__ ( "movups (%[in]), %%xmm0 \n" @@ -745,17 +722,15 @@ namespace crypto "movups %%xmm0, (%[out]) \n" : : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory" ); -#else - AES_decrypt (in->buf, out->buf, &m_Key); -#endif } else +#endif { AES_decrypt (in->buf, out->buf, &m_Key); } } -#ifdef AESNI +#ifdef __AES__ #define CallAESIMC(offset) \ "movaps "#offset"(%[shed]), %%xmm0 \n" \ "aesimc %%xmm0, %%xmm0 \n" \ @@ -764,25 +739,23 @@ namespace crypto void ECBEncryption::SetKey (const AESKey& key) { +#ifdef __AES__ if(i2p::cpu::aesni) { -#ifdef AESNI - ExpandKey (key); -#else - AES_set_encrypt_key (key, 256, &m_Key); -#endif + ExpandKey (key); } else +#endif { AES_set_encrypt_key (key, 256, &m_Key); } } - + void ECBDecryption::SetKey (const AESKey& key) { +#ifdef __AES__ if(i2p::cpu::aesni) { -#ifdef AESNI ExpandKey (key); // expand encryption key first // then invert it using aesimc __asm__ @@ -802,11 +775,9 @@ namespace crypto CallAESIMC(208) : : [shed]"r"(GetKeySchedule ()) : "%xmm0", "memory" ); -#else - AES_set_decrypt_key (key, 256, &m_Key); -#endif } else +#endif { AES_set_decrypt_key (key, 256, &m_Key); } @@ -815,9 +786,9 @@ namespace crypto void CBCEncryption::Encrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out) { +#ifdef __AES__ if(i2p::cpu::aesni) { -#ifdef AESNI __asm__ ( "movups (%[iv]), %%xmm1 \n" @@ -837,16 +808,9 @@ namespace crypto [in]"r"(in), [out]"r"(out), [num]"r"(numBlocks) : "%xmm0", "%xmm1", "cc", "memory" ); -#else - for (int i = 0; i < numBlocks; i++) - { - *m_LastBlock.GetChipherBlock () ^= in[i]; - m_ECBEncryption.Encrypt (m_LastBlock.GetChipherBlock (), m_LastBlock.GetChipherBlock ()); - out[i] = *m_LastBlock.GetChipherBlock (); - } -#endif } else +#endif { for (int i = 0; i < numBlocks; i++) { @@ -867,9 +831,9 @@ namespace crypto void CBCEncryption::Encrypt (const uint8_t * in, uint8_t * out) { +#ifdef __AES__ if(i2p::cpu::aesni) { -#ifdef AESNI __asm__ ( "movups (%[iv]), %%xmm1 \n" @@ -883,19 +847,17 @@ namespace crypto [in]"r"(in), [out]"r"(out) : "%xmm0", "%xmm1", "memory" ); -#else - Encrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out); -#endif } else +#endif Encrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out); } void CBCDecryption::Decrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out) { +#ifdef __AES__ if(i2p::cpu::aesni) { -#ifdef AESNI __asm__ ( "movups (%[iv]), %%xmm1 \n" @@ -916,17 +878,9 @@ namespace crypto [in]"r"(in), [out]"r"(out), [num]"r"(numBlocks) : "%xmm0", "%xmm1", "%xmm2", "cc", "memory" ); -#else - for (int i = 0; i < numBlocks; i++) - { - ChipherBlock tmp = in[i]; - m_ECBDecryption.Decrypt (in + i, out + i); - out[i] ^= *m_IV.GetChipherBlock (); - *m_IV.GetChipherBlock () = tmp; - } -#endif } else +#endif { for (int i = 0; i < numBlocks; i++) { @@ -947,9 +901,9 @@ namespace crypto void CBCDecryption::Decrypt (const uint8_t * in, uint8_t * out) { +#ifdef __AES__ if(i2p::cpu::aesni) { -#ifdef AESNI __asm__ ( "movups (%[iv]), %%xmm1 \n" @@ -963,19 +917,17 @@ namespace crypto [in]"r"(in), [out]"r"(out) : "%xmm0", "%xmm1", "memory" ); -#else - Decrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out); -#endif } else +#endif Decrypt (1, (const ChipherBlock *)in, (ChipherBlock *)out); } void TunnelEncryption::Encrypt (const uint8_t * in, uint8_t * out) { +#ifdef __AES__ if(i2p::cpu::aesni) { -#ifdef AESNI __asm__ ( // encrypt IV @@ -1001,14 +953,9 @@ namespace crypto [in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes : "%xmm0", "%xmm1", "cc", "memory" ); -#else - m_IVEncryption.Encrypt ((const ChipherBlock *)in, (ChipherBlock *)out); // iv - m_LayerEncryption.SetIV (out); - m_LayerEncryption.Encrypt (in + 16, i2p::tunnel::TUNNEL_DATA_ENCRYPTED_SIZE, out + 16); // data - m_IVEncryption.Encrypt ((ChipherBlock *)out, (ChipherBlock *)out); // double iv -#endif } else +#endif { m_IVEncryption.Encrypt ((const ChipherBlock *)in, (ChipherBlock *)out); // iv m_LayerEncryption.SetIV (out); @@ -1019,9 +966,9 @@ namespace crypto void TunnelDecryption::Decrypt (const uint8_t * in, uint8_t * out) { +#ifdef __AES__ if(i2p::cpu::aesni) { -#ifdef AESNI __asm__ ( // decrypt IV @@ -1048,14 +995,9 @@ namespace crypto [in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes : "%xmm0", "%xmm1", "%xmm2", "cc", "memory" ); -#else - m_IVDecryption.Decrypt ((const ChipherBlock *)in, (ChipherBlock *)out); // iv - m_LayerDecryption.SetIV (out); - m_LayerDecryption.Decrypt (in + 16, i2p::tunnel::TUNNEL_DATA_ENCRYPTED_SIZE, out + 16); // data - m_IVDecryption.Decrypt ((ChipherBlock *)out, (ChipherBlock *)out); // double iv -#endif } else +#endif { m_IVDecryption.Decrypt ((const ChipherBlock *)in, (ChipherBlock *)out); // iv m_LayerDecryption.SetIV (out); @@ -1068,7 +1010,7 @@ namespace crypto bool AEADChaCha20Poly1305 (const uint8_t * msg, size_t msgLen, const uint8_t * ad, size_t adLen, const uint8_t * key, const uint8_t * nonce, uint8_t * buf, size_t len, bool encrypt) { - if (len < msgLen) return false; + if (len < msgLen) return false; if (encrypt && len < msgLen + 16) return false; bool ret = true; #if LEGACY_OPENSSL @@ -1076,40 +1018,40 @@ namespace crypto uint8_t polyKey[64]; memset(polyKey, 0, sizeof(polyKey)); chacha20 (polyKey, 64, nonce, key, 0); - // encrypt data + // encrypt data memcpy (buf, msg, msgLen); chacha20 (buf, msgLen, nonce, key, 1); - + // create Poly1305 message - if (!ad) adLen = 0; + if (!ad) adLen = 0; std::vector polyMsg(adLen + msgLen + 3*16); size_t offset = 0; uint8_t padding[16]; memset (padding, 0, 16); if (ad) - { + { memcpy (polyMsg.data (), ad, adLen); offset += adLen; // additional authenticated data auto rem = adLen & 0x0F; // %16 - if (rem) + if (rem) { // padding1 rem = 16 - rem; - memcpy (polyMsg.data () + offset, padding, rem); offset += rem; + memcpy (polyMsg.data () + offset, padding, rem); offset += rem; } } memcpy (polyMsg.data () + offset, encrypt ? buf : msg, msgLen); offset += msgLen; // encrypted data auto rem = msgLen & 0x0F; // %16 - if (rem) + if (rem) { // padding2 rem = 16 - rem; - memcpy (polyMsg.data () + offset, padding, rem); offset += rem; + memcpy (polyMsg.data () + offset, padding, rem); offset += rem; } - htole64buf (polyMsg.data () + offset, adLen); offset += 8; + htole64buf (polyMsg.data () + offset, adLen); offset += 8; htole64buf (polyMsg.data () + offset, msgLen); offset += 8; if (encrypt) { - // calculate Poly1305 tag and write in after encrypted data + // calculate Poly1305 tag and write in after encrypted data Poly1305HMAC ((uint32_t *)(buf + msgLen), (uint32_t *)polyKey, polyMsg.data (), offset); } else @@ -1118,9 +1060,9 @@ namespace crypto // calculate Poly1305 tag Poly1305HMAC (tag, (uint32_t *)polyKey, polyMsg.data (), offset); if (memcmp (tag, msg + msgLen, 16)) ret = false; // compare with provided - } + } #else - int outlen = 0; + int outlen = 0; EVP_CIPHER_CTX *ctx = EVP_CIPHER_CTX_new (); if (encrypt) { @@ -1141,8 +1083,8 @@ namespace crypto EVP_DecryptUpdate(ctx, NULL, &outlen, ad, adLen); ret = EVP_DecryptUpdate(ctx, buf, &outlen, msg, msgLen) > 0; } - - EVP_CIPHER_CTX_free (ctx); + + EVP_CIPHER_CTX_free (ctx); #endif return ret; } diff --git a/libi2pd/Crypto.h b/libi2pd/Crypto.h index 25646dbb..fe0fcddf 100644 --- a/libi2pd/Crypto.h +++ b/libi2pd/Crypto.h @@ -69,9 +69,9 @@ namespace crypto void operator^=(const ChipherBlock& other) // XOR { +#ifdef __AVX__ if (i2p::cpu::avx) { -#ifdef AVX __asm__ ( "vmovups (%[buf]), %%xmm0 \n" @@ -82,12 +82,9 @@ namespace crypto : [buf]"r"(buf), [other]"r"(other.buf) : "%xmm0", "%xmm1", "memory" ); -#else - for (int i = 0; i < 16; i++) - buf[i] ^= other.buf[i]; -#endif } else +#endif { // TODO: implement it better for (int i = 0; i < 16; i++) @@ -123,7 +120,7 @@ namespace crypto }; -#ifdef AESNI +#ifdef __AES__ #ifdef ARM64AES void init_aesenc(void) __attribute__((constructor)); #endif @@ -143,7 +140,7 @@ namespace crypto }; #endif -#ifdef AESNI +#ifdef __AES__ class ECBEncryption: public ECBCryptoAESNI #else class ECBEncryption @@ -152,14 +149,14 @@ namespace crypto public: void SetKey (const AESKey& key); - + void Encrypt(const ChipherBlock * in, ChipherBlock * out); private: AES_KEY m_Key; }; -#ifdef AESNI +#ifdef __AES__ class ECBDecryption: public ECBCryptoAESNI #else class ECBDecryption @@ -188,7 +185,7 @@ namespace crypto void Encrypt (const uint8_t * in, uint8_t * out); // one block ECBEncryption & ECB() { return m_ECBEncryption; } - + private: AESAlignedBuffer<16> m_LastBlock; @@ -211,7 +208,7 @@ namespace crypto void Decrypt (const uint8_t * in, uint8_t * out); // one block ECBDecryption & ECB() { return m_ECBDecryption; } - + private: AESAlignedBuffer<16> m_IV; @@ -255,8 +252,8 @@ namespace crypto }; // AEAD/ChaCha20/Poly1305 - bool AEADChaCha20Poly1305 (const uint8_t * msg, size_t msgLen, const uint8_t * ad, size_t adLen, const uint8_t * key, const uint8_t * nonce, uint8_t * buf, size_t len, bool encrypt); // msgLen is len without tag - + bool AEADChaCha20Poly1305 (const uint8_t * msg, size_t msgLen, const uint8_t * ad, size_t adLen, const uint8_t * key, const uint8_t * nonce, uint8_t * buf, size_t len, bool encrypt); // msgLen is len without tag + // init and terminate void InitCrypto (bool precomputation); void TerminateCrypto (); diff --git a/libi2pd/Identity.cpp b/libi2pd/Identity.cpp index 3fcd16ad..7f64d931 100644 --- a/libi2pd/Identity.cpp +++ b/libi2pd/Identity.cpp @@ -719,24 +719,29 @@ namespace data XORMetric operator^(const IdentHash& key1, const IdentHash& key2) { XORMetric m; -#if defined(__AVX__) // for AVX - __asm__ - ( - "vmovups %1, %%ymm0 \n" - "vmovups %2, %%ymm1 \n" - "vxorps %%ymm0, %%ymm1, %%ymm1 \n" - "vmovups %%ymm1, %0 \n" - : "=m"(*m.metric) - : "m"(*key1), "m"(*key2) - : "memory", "%xmm0", "%xmm1" // should be replaced by %ymm0/1 once supported by compiler - ); -#else - const uint64_t * hash1 = key1.GetLL (), * hash2 = key2.GetLL (); - m.metric_ll[0] = hash1[0] ^ hash2[0]; - m.metric_ll[1] = hash1[1] ^ hash2[1]; - m.metric_ll[2] = hash1[2] ^ hash2[2]; - m.metric_ll[3] = hash1[3] ^ hash2[3]; +#ifdef __AVX__ + if(i2p::cpu::avx) + { + __asm__ + ( + "vmovups %1, %%ymm0 \n" + "vmovups %2, %%ymm1 \n" + "vxorps %%ymm0, %%ymm1, %%ymm1 \n" + "vmovups %%ymm1, %0 \n" + : "=m"(*m.metric) + : "m"(*key1), "m"(*key2) + : "memory", "%xmm0", "%xmm1" // should be replaced by %ymm0/1 once supported by compiler + ); + } + else #endif + { + const uint64_t * hash1 = key1.GetLL (), * hash2 = key2.GetLL (); + m.metric_ll[0] = hash1[0] ^ hash2[0]; + m.metric_ll[1] = hash1[1] ^ hash2[1]; + m.metric_ll[2] = hash1[2] ^ hash2[2]; + m.metric_ll[3] = hash1[3] ^ hash2[3]; + } return m; }