Skip to content

Commit

Permalink
Replace MD5 with MurmurHash3_128 in SPMI (dotnet#78527)
Browse files Browse the repository at this point in the history
Co-authored-by: Jakob Botsch Nielsen <Jakob.botsch.nielsen@gmail.com>
Co-authored-by: Jan Kotas <jkotas@microsoft.com>
  • Loading branch information
3 people committed Nov 20, 2022
1 parent 37cb86c commit 5c420f1
Show file tree
Hide file tree
Showing 11 changed files with 128 additions and 191 deletions.
9 changes: 9 additions & 0 deletions THIRD-PARTY-NOTICES.TXT
Original file line number Diff line number Diff line change
Expand Up @@ -1206,3 +1206,12 @@ As an exception, if, as a result of your compiling your source code, portions
of this Software are embedded into a machine-executable object form of such
source code, you may redistribute such embedded portions in such object form
without including the above copyright and permission notices.


License for MurmurHash3
--------------------------------------

https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp

MurmurHash3 was written by Austin Appleby, and is placed in the public
domain. The author hereby disclaims copyright to this source
10 changes: 5 additions & 5 deletions src/coreclr/tools/superpmi/mcs/removedup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ bool RemoveDup::unique(MethodContext* mc)

// Assume that there are lots of duplicates, so don't allocate a new buffer for the MD5 hash data
// until we know we're going to add it to the map.
char md5Buff[MD5_HASH_BUFFER_SIZE];
mc->dumpMethodMD5HashToBuffer(md5Buff, MD5_HASH_BUFFER_SIZE, /* ignoreMethodName */ true, &newInfo, newFlags);
char md5Buff[MM3_HASH_BUFFER_SIZE];
mc->dumpMethodHashToBuffer(md5Buff, MM3_HASH_BUFFER_SIZE, /* ignoreMethodName */ true, &newInfo, newFlags);

if (m_inFile->GetIndex(newInfo.ILCodeSize) == -1)
m_inFile->Add(newInfo.ILCodeSize, new DenseLightWeightMap<char*>());
Expand All @@ -77,14 +77,14 @@ bool RemoveDup::unique(MethodContext* mc)
for (unsigned i = 0; i < ourRank->GetCount(); i++)
{
char* md5Buff2 = ourRank->Get(i);
if (strncmp(md5Buff, md5Buff2, MD5_HASH_BUFFER_SIZE) == 0)
if (strncmp(md5Buff, md5Buff2, MM3_HASH_BUFFER_SIZE) == 0)
{
return false;
}
}

char* newmd5Buff = new char[MD5_HASH_BUFFER_SIZE];
memcpy(newmd5Buff, md5Buff, MD5_HASH_BUFFER_SIZE);
char* newmd5Buff = new char[MM3_HASH_BUFFER_SIZE];
memcpy(newmd5Buff, md5Buff, MM3_HASH_BUFFER_SIZE);
ourRank->Append(newmd5Buff);
return true;
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/tools/superpmi/mcs/verbtoc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ int verbTOC::DoWork(const char* nameOfInput)
MethodContext* mc = mci.Current();

TOCElementNode* nxt = new TOCElementNode(mci.MethodContextNumber(), mci.CurrentPos());
mc->dumpMethodMD5HashToBuffer(nxt->tocElement.Hash, MD5_HASH_BUFFER_SIZE);
mc->dumpMethodHashToBuffer(nxt->tocElement.Hash, MM3_HASH_BUFFER_SIZE);

if (curElem != nullptr)
{
Expand Down
210 changes: 87 additions & 123 deletions src/coreclr/tools/superpmi/superpmi-shared/hash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,87 +2,110 @@
// The .NET Foundation licenses this file to you under the MIT license.

//----------------------------------------------------------
// hash.cpp - Class for hashing a text stream using MD5 hashing
//
// Note that on Windows, acquiring the Crypto hash provider is expensive, so
// only do that once and cache it.
// hash.cpp - Class for hashing a text stream using MurMurHash3 hashing
//----------------------------------------------------------

#include "standardpch.h"
#include "runtimedetails.h"
#include "errorhandling.h"
#include "md5.h"
#include "hash.h"

Hash::Hash()
#ifndef TARGET_UNIX
: m_Initialized(false)
, m_hCryptProv(NULL)
#endif // !TARGET_UNIX
// MurmurHash3 was written by Austin Appleby, and is placed in the public
// domain. The author hereby disclaims copyright to this source code.
//
// Implementation was copied from https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
// with changes around strict-aliasing/unaligned reads

inline uint64_t ROTL64(uint64_t x, int8_t r)
{
return (x << r) | (x >> (64 - r));
}

Hash::~Hash()
inline uint64_t getblock64(const uint8_t* ptr)
{
Destroy(); // Ignoring return code.
uint64_t val = 0;
memcpy(&val, ptr, sizeof(uint64_t));
return val;
}

// static
bool Hash::Initialize()
inline void setblock64(uint8_t* ptr, uint64_t val)
{
#ifdef TARGET_UNIX

// No initialization necessary.
return true;

#else // !TARGET_UNIX

if (m_Initialized)
{
LogError("Hash class has already been initialized");
return false;
}

// Get handle to the crypto provider
if (!CryptAcquireContextA(&m_hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT))
goto OnError;

m_Initialized = true;
return true;

OnError:
LogError("Failed to create a hash using the Crypto API (Error 0x%X)", GetLastError());

if (m_hCryptProv != NULL)
CryptReleaseContext(m_hCryptProv, 0);

m_Initialized = false;
return false;
memcpy(ptr, &val, sizeof(uint64_t));
}

#endif // !TARGET_UNIX
// Finalization mix - force all bits of a hash block to avalanche
inline uint64_t fmix64(uint64_t k)
{
k ^= k >> 33;
k *= 0xff51afd7ed558ccdLLU;
k ^= k >> 33;
k *= 0xc4ceb9fe1a85ec53LLU;
k ^= k >> 33;
return k;
}

// static
bool Hash::Destroy()
static void MurmurHash3_128(const void* key, const size_t len, const uint32_t seed, void* out)
{
#ifdef TARGET_UNIX
const uint8_t* data = static_cast<const uint8_t*>(key);
const size_t nblocks = len / MM3_HASH_BYTE_SIZE;
uint64_t h1 = seed;
uint64_t h2 = seed;
const uint64_t c1 = 0x87c37b91114253d5LLU;
const uint64_t c2 = 0x4cf5ad432745937fLLU;

// body
for (size_t i = 0; i < nblocks; i++)
{
uint64_t k1 = getblock64(data + (i * 2 + 0) * sizeof(uint64_t));
uint64_t k2 = getblock64(data + (i * 2 + 1) * sizeof(uint64_t));

// No destruction necessary.
return true;
k1 *= c1; k1 = ROTL64(k1, 31); k1 *= c2; h1 ^= k1;
h1 = ROTL64(h1, 27); h1 += h2; h1 = h1 * 5 + 0x52dce729;
k2 *= c2; k2 = ROTL64(k2, 33); k2 *= c1; h2 ^= k2;
h2 = ROTL64(h2, 31); h2 += h1; h2 = h2 * 5 + 0x38495ab5;
}

#else // !TARGET_UNIX
// tail
const uint8_t* tail = data + nblocks * MM3_HASH_BYTE_SIZE;
uint64_t k1 = 0;
uint64_t k2 = 0;

// Should probably check Crypt() function return codes.
if (m_hCryptProv != NULL)
switch (len & 15)
{
CryptReleaseContext(m_hCryptProv, 0);
m_hCryptProv = NULL;
case 15: k2 ^= static_cast<uint64_t>(tail[14]) << 48; FALLTHROUGH;
case 14: k2 ^= static_cast<uint64_t>(tail[13]) << 40; FALLTHROUGH;
case 13: k2 ^= static_cast<uint64_t>(tail[12]) << 32; FALLTHROUGH;
case 12: k2 ^= static_cast<uint64_t>(tail[11]) << 24; FALLTHROUGH;
case 11: k2 ^= static_cast<uint64_t>(tail[10]) << 16; FALLTHROUGH;
case 10: k2 ^= static_cast<uint64_t>(tail[9]) << 8; FALLTHROUGH;
case 9: k2 ^= static_cast<uint64_t>(tail[8]) << 0;
k2 *= c2; k2 = ROTL64(k2, 33); k2 *= c1; h2 ^= k2;
FALLTHROUGH;

case 8: k1 ^= static_cast<uint64_t>(tail[7]) << 56; FALLTHROUGH;
case 7: k1 ^= static_cast<uint64_t>(tail[6]) << 48; FALLTHROUGH;
case 6: k1 ^= static_cast<uint64_t>(tail[5]) << 40; FALLTHROUGH;
case 5: k1 ^= static_cast<uint64_t>(tail[4]) << 32; FALLTHROUGH;
case 4: k1 ^= static_cast<uint64_t>(tail[3]) << 24; FALLTHROUGH;
case 3: k1 ^= static_cast<uint64_t>(tail[2]) << 16; FALLTHROUGH;
case 2: k1 ^= static_cast<uint64_t>(tail[1]) << 8; FALLTHROUGH;
case 1: k1 ^= static_cast<uint64_t>(tail[0]) << 0;
k1 *= c1; k1 = ROTL64(k1, 31); k1 *= c2; h1 ^= k1;
break;
}

m_Initialized = false;
return true;

#endif // !TARGET_UNIX
// finalization
h1 ^= len;
h2 ^= len;
h1 += h2;
h2 += h1;
h1 = fmix64(h1);
h2 = fmix64(h2);
h1 += h2;
h2 += h1;

setblock64(static_cast<uint8_t*>(out), h1);
setblock64(static_cast<uint8_t*>(out) + sizeof(uint64_t), h2);
}

// Hash::WriteHashValueAsText - Take a binary hash value in the array of bytes pointed to by
Expand All @@ -94,7 +117,7 @@ bool Hash::WriteHashValueAsText(const BYTE* pHash, size_t cbHash, char* hashText
{
// This could be:
//
// for (DWORD i = 0; i < MD5_HASH_BYTE_SIZE; i++)
// for (DWORD i = 0; i < MM3_HASH_BYTE_SIZE; i++)
// {
// sprintf_s(hash + i * 2, hashLen - i * 2, "%02X", bHash[i]);
// }
Expand All @@ -121,77 +144,18 @@ bool Hash::WriteHashValueAsText(const BYTE* pHash, size_t cbHash, char* hashText
return true;
}

// Hash::HashBuffer - Compute an MD5 hash of the data pointed to by 'pBuffer', of 'bufLen' bytes,
// Hash::HashBuffer - Compute a MurMurHash3 hash of the data pointed to by 'pBuffer', of 'bufLen' bytes,
// writing the hexadecimal ASCII text representation of the hash to the buffer pointed to by 'hash',
// of 'hashLen' bytes in size, which must be at least MD5_HASH_BUFFER_SIZE bytes.
// of 'hashLen' bytes in size, which must be at least MM3_HASH_BUFFER_SIZE bytes.
//
// Returns the number of bytes written, or -1 on error.
int Hash::HashBuffer(BYTE* pBuffer, size_t bufLen, char* hash, size_t hashLen)
{
#ifdef TARGET_UNIX

MD5HASHDATA md5_hashdata;
MD5 md5_hasher;

if (hashLen < MD5_HASH_BUFFER_SIZE)
return -1;

md5_hasher.Hash(pBuffer, (ULONG)bufLen, &md5_hashdata);
uint8_t murMurHash[MM3_HASH_BYTE_SIZE];
MurmurHash3_128(pBuffer, bufLen, 0, murMurHash);

DWORD md5_hashdata_size = sizeof(md5_hashdata.rgb) / sizeof(BYTE);
Assert(md5_hashdata_size == MD5_HASH_BYTE_SIZE);

if (!WriteHashValueAsText(md5_hashdata.rgb, md5_hashdata_size, hash, hashLen))
if (!WriteHashValueAsText(murMurHash, MM3_HASH_BYTE_SIZE, hash, hashLen))
return -1;

return MD5_HASH_BUFFER_SIZE; // if we had success we wrote MD5_HASH_BUFFER_SIZE bytes to the buffer

#else // !TARGET_UNIX

if (!m_Initialized)
{
LogError("Hash class not initialized");
return -1;
}

HCRYPTHASH hCryptHash;
BYTE bHash[MD5_HASH_BYTE_SIZE];
DWORD cbHash = MD5_HASH_BYTE_SIZE;

if (hashLen < MD5_HASH_BUFFER_SIZE)
return -1;

if (!CryptCreateHash(m_hCryptProv, CALG_MD5, 0, 0, &hCryptHash))
goto OnError;

if (!CryptHashData(hCryptHash, pBuffer, (DWORD)bufLen, 0))
goto OnError;

if (!CryptGetHashParam(hCryptHash, HP_HASHVAL, bHash, &cbHash, 0))
goto OnError;

if (cbHash != MD5_HASH_BYTE_SIZE)
goto OnError;

if (!WriteHashValueAsText(bHash, cbHash, hash, hashLen))
return -1;

// Clean up.
CryptDestroyHash(hCryptHash);
hCryptHash = NULL;

return MD5_HASH_BUFFER_SIZE; // if we had success we wrote MD5_HASH_BUFFER_SIZE bytes to the buffer

OnError:
LogError("Failed to create a hash using the Crypto API (Error 0x%X)", GetLastError());

if (hCryptHash != NULL)
{
CryptDestroyHash(hCryptHash);
hCryptHash = NULL;
}

return -1;

#endif // !TARGET_UNIX
return MM3_HASH_BUFFER_SIZE; // if we had success we wrote MM3_HASH_BUFFER_SIZE bytes to the buffer
}
31 changes: 4 additions & 27 deletions src/coreclr/tools/superpmi/superpmi-shared/hash.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,39 +7,16 @@
#ifndef _hash
#define _hash

#define MD5_HASH_BYTE_SIZE 16 // MD5 is 128-bit, so we need 16 bytes to store it
#define MD5_HASH_BUFFER_SIZE 33 // MD5 is 128-bit, so we need 32 chars + 1 char to store null-terminator
#define MM3_HASH_BYTE_SIZE 16 // MurMurHash3 is 128-bit, so we need 16 bytes to store it
#define MM3_HASH_BUFFER_SIZE 33 // MurMurHash3 is 128-bit, so we need 32 chars + 1 char to store null-terminator

class Hash
{
public:

Hash();
~Hash();

bool Initialize();
bool Destroy();

bool IsInitialized()
{
#ifdef TARGET_UNIX
return true; // No initialization necessary.
#else // TARGET_UNIX
return m_Initialized;
#endif // !TARGET_UNIX

}

int HashBuffer(BYTE* pBuffer, size_t bufLen, char* hash, size_t hashLen);
static int HashBuffer(BYTE* pBuffer, size_t bufLen, char* hash, size_t hashLen);

private:

bool WriteHashValueAsText(const BYTE* pHash, size_t cbHash, char* hashTextBuffer, size_t hashTextBufferLen);

#ifndef TARGET_UNIX
bool m_Initialized;
HCRYPTPROV m_hCryptProv;
#endif // !TARGET_UNIX
static bool WriteHashValueAsText(const BYTE* pHash, size_t cbHash, char* hashTextBuffer, size_t hashTextBufferLen);
};

#endif
Loading

0 comments on commit 5c420f1

Please sign in to comment.