Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Phase 1 of refactoring pgo data pipeline #46638

Merged
merged 22 commits into from
Jan 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
729602b
Add InstrumentationData event to the runtime
davidwrighton Jan 5, 2021
63f7a88
VM builds
davidwrighton Jan 4, 2021
b6e29a0
Fixup superpmi and remove not yet ready recordPgoInstrumentationBySch…
davidwrighton Jan 5, 2021
71d2165
Zapper updated
davidwrighton Jan 4, 2021
2754acd
It all builds
davidwrighton Jan 5, 2021
26208e6
Correct handling for reading pgo data
davidwrighton Jan 5, 2021
5504b9a
- Handle count schema items correctly
davidwrighton Jan 6, 2021
36a76b9
Fix Linux build
davidwrighton Jan 6, 2021
bff1fec
Fix gcc build failure
davidwrighton Jan 6, 2021
e79e1c4
Apply formatting patch
davidwrighton Jan 6, 2021
73e87a6
Initialize m_pgoManager as needed
davidwrighton Jan 7, 2021
8e55edf
Fixup comments as requested
davidwrighton Jan 7, 2021
0c44c37
Remove unneccessary extra PgoInstrumentationKind enum
davidwrighton Jan 7, 2021
6bd7641
Remove unnecessary struct PgoInstrumentationSchema duplication
davidwrighton Jan 7, 2021
3dd567a
Extract pgo format processing logic to an independent header
davidwrighton Jan 7, 2021
9a5068f
Correct !FEATURE_PGO stubs
davidwrighton Jan 7, 2021
249156a
Fix Zap IBC instrumentation path both reading and writing
davidwrighton Jan 8, 2021
d2c9e55
Merge branch 'master' into pgo_prototype
davidwrighton Jan 8, 2021
3de57b1
Fix issues identified by jit experimental run
davidwrighton Jan 11, 2021
19c9e55
Needed to be a custom lock now that it actually does something
davidwrighton Jan 12, 2021
474b821
SuperPMI fix 2
davidwrighton Jan 13, 2021
aac6c8c
Merge branch 'master' of github.com:dotnet/runtime into pgo_prototype
davidwrighton Jan 13, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions src/coreclr/ToolBox/superpmi/superpmi-shared/agnostic.h
Original file line number Diff line number Diff line change
Expand Up @@ -463,18 +463,33 @@ struct Agnostic_IsCompatibleDelegate
DWORDLONG delegateCls;
};

struct Agnostic_AllocMethodBlockCounts
struct Agnostic_PgoInstrumentationSchema
{
DWORDLONG Offset;
ICorJitInfo::PgoInstrumentationKind InstrumentationKind;
int32_t ILOffset;
int32_t Count;
int32_t Other;
};

struct Agnostic_AllocPgoInstrumentationBySchema
{
DWORDLONG address;
DWORD count;
DWORD schema_index;
DWORD schemaCount;
DWORD result;
};

struct Agnostic_GetMethodBlockCounts
struct Agnostic_GetPgoInstrumentationResults
{
DWORD count;
DWORD pBlockCounts_index;
DWORD numRuns;
DWORD schemaCount;
DWORD dataByteCount;
DWORD schema_index;
DWORD data_index;
DWORD result;
};

Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/ToolBox/superpmi/superpmi-shared/lwmlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
#define DENSELWM(map, value) LWM(map, this_is_an_error, value)
#endif

LWM(AllocMethodBlockCounts, DWORD, Agnostic_AllocMethodBlockCounts)
LWM(AllocPgoInstrumentationBySchema, DWORDLONG, Agnostic_AllocPgoInstrumentationBySchema)
LWM(GetPgoInstrumentationResults, DWORDLONG, Agnostic_GetPgoInstrumentationResults)
LWM(AppendClassName, Agnostic_AppendClassName, DWORD)
LWM(AreTypesEquivalent, DLDL, DWORD)
LWM(AsCorInfoType, DWORDLONG, DWORD)
Expand Down Expand Up @@ -54,7 +55,6 @@ LWM(GetArgNext, DWORDLONG, DWORDLONG)
LWM(GetArgType, Agnostic_GetArgType_Key, Agnostic_GetArgType_Value)
LWM(GetArrayInitializationData, DLD, DWORDLONG)
LWM(GetArrayRank, DWORDLONG, DWORD)
LWM(GetMethodBlockCounts, DWORDLONG, Agnostic_GetMethodBlockCounts)
LWM(GetBoundaries, DWORDLONG, Agnostic_GetBoundaries)
LWM(GetBoxHelper, DWORDLONG, DWORD)
LWM(GetBuiltinClass, DWORD, DWORDLONG)
Expand Down
159 changes: 112 additions & 47 deletions src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ void MethodContext::Destroy()
#include "lwmlist.h"

delete cr;
FreeTempAllocations();
}

#define sparseAddLen(target) \
Expand Down Expand Up @@ -288,6 +289,8 @@ void MethodContext::MethodInitHelper(unsigned char* buff2, unsigned int totalLen
unsigned char canary = 0xff;
unsigned char* buff3 = nullptr;

FreeTempAllocations();

while (buffIndex < totalLen)
{
mcPackets packetType = (mcPackets)buff2[buffIndex++];
Expand Down Expand Up @@ -5069,36 +5072,65 @@ DWORD MethodContext::repGetFieldThreadLocalStoreID(CORINFO_FIELD_HANDLE field, v
}


void MethodContext::recAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts, HRESULT result)
void MethodContext::recAllocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, BYTE** pInstrumentationData, HRESULT result)
{
if (AllocMethodBlockCounts == nullptr)
AllocMethodBlockCounts = new LightWeightMap<DWORD, Agnostic_AllocMethodBlockCounts>();
if (AllocPgoInstrumentationBySchema == nullptr)
AllocPgoInstrumentationBySchema = new LightWeightMap<DWORDLONG, Agnostic_AllocPgoInstrumentationBySchema>();

Agnostic_AllocMethodBlockCounts value;
Agnostic_AllocPgoInstrumentationBySchema value;

value.address = CastPointer(*pBlockCounts);
value.count = (DWORD)count;
value.schemaCount = countSchemaItems;
value.address = CastPointer(*pInstrumentationData);
Agnostic_PgoInstrumentationSchema* agnosticSchema = (Agnostic_PgoInstrumentationSchema*)malloc(sizeof(Agnostic_PgoInstrumentationSchema) * countSchemaItems);
for (UINT32 i = 0; i < countSchemaItems; i++)
{
agnosticSchema[i].Offset = pSchema[i].Offset;
agnosticSchema[i].InstrumentationKind = pSchema[i].InstrumentationKind;
agnosticSchema[i].ILOffset = pSchema[i].ILOffset;
agnosticSchema[i].Count = pSchema[i].Count;
agnosticSchema[i].Other = pSchema[i].Other;
}
value.schema_index = AllocPgoInstrumentationBySchema->AddBuffer((unsigned char*)agnosticSchema, sizeof(Agnostic_PgoInstrumentationSchema) * countSchemaItems);
free(agnosticSchema);
value.result = (DWORD)result;

AllocMethodBlockCounts->Add((DWORD)0, value);
AllocPgoInstrumentationBySchema->Add(CastHandle(ftnHnd), value);
}
void MethodContext::dmpAllocMethodBlockCounts(DWORD key, const Agnostic_AllocMethodBlockCounts& value)

void MethodContext::dmpAllocPgoInstrumentationBySchema(DWORDLONG key, const Agnostic_AllocPgoInstrumentationBySchema& value)
{
printf("AllocMethodBlockCounts key %u, value addr-%016llX cnt-%u res-%08X", key, value.address, value.count, value.result);
printf("AllocPgoInstrumentationBySchema key ftn-%016llX, value addr-%016llX cnt-%u res-%08X", key, value.address, value.schemaCount, value.result);
Agnostic_PgoInstrumentationSchema* pBuf =
(Agnostic_PgoInstrumentationSchema*)AllocPgoInstrumentationBySchema->GetBuffer(value.schema_index);

for (UINT32 i = 0; i < value.schemaCount; i++)
{
printf(" Offset %016llX ILOffset %u Kind %u Count %u Other %u\n", pBuf[i].Offset, pBuf[i].ILOffset, pBuf[i].InstrumentationKind, pBuf[i].Count, pBuf[i].Other);
}
}
HRESULT MethodContext::repAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts)

DWORD MethodContext::repAllocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, BYTE** pInstrumentationData)
{
Agnostic_AllocMethodBlockCounts value;
value = AllocMethodBlockCounts->Get((DWORD)0);
Agnostic_AllocPgoInstrumentationBySchema value;
value = AllocPgoInstrumentationBySchema->Get(CastHandle(ftnHnd));

if (count != value.count)
if (countSchemaItems != value.schemaCount)
{
LogWarning("AllocMethodBlockCount mismatch: record %d, replay %d", value.count, count);
LogWarning("AllocPgoInstrumentationBySchema mismatch: record %d, replay %d", value.schemaCount, countSchemaItems);
}

HRESULT result = (HRESULT)value.result;

// Allocate a scratch buffer, linked to method context via AllocMethodBlockCounts, so it gets
Agnostic_PgoInstrumentationSchema* pAgnosticSchema = (Agnostic_PgoInstrumentationSchema*)AllocPgoInstrumentationBySchema->GetBuffer(value.schema_index);
size_t maxOffset = 0;
for (UINT32 iSchema = 0; iSchema < countSchemaItems && iSchema < value.schemaCount; iSchema++)
{
pSchema[iSchema].Offset = (size_t)pAgnosticSchema[iSchema].Offset;
if (pSchema[iSchema].Offset > maxOffset)
maxOffset = pSchema[iSchema].Offset;
}

// Allocate a scratch buffer, linked to method context via AllocPgoInstrumentationBySchema, so it gets
// cleaned up when the method context does.
//
// We won't bother recording this via AddBuffer because currently SPMI will never look at it.
Expand All @@ -5107,54 +5139,87 @@ HRESULT MethodContext::repAllocMethodBlockCounts(ULONG count, ICorJitInfo::Block
// Todo, perhaps: record the buffer as a compile result instead, and defer copying until
// jit completion so we can snapshot the offsets the jit writes.
//
*pBlockCounts = (ICorJitInfo::BlockCounts*)AllocMethodBlockCounts->CreateBuffer(count * sizeof(ICorJitInfo::BlockCounts));
cr->recAddressMap((void*)value.address, (void*)*pBlockCounts, count * (sizeof(ICorJitInfo::BlockCounts)));
// Add 16 bytes of represent writeable space
size_t bufSize = maxOffset + 16;
*pInstrumentationData = (BYTE*)AllocJitTempBuffer((unsigned)bufSize);
cr->recAddressMap((void*)value.address, (void*)*pInstrumentationData, (unsigned)bufSize);
return result;
}

void MethodContext::recGetMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd,
UINT32 * pCount,
ICorJitInfo::BlockCounts** pBlockCounts,
UINT32 * pNumRuns,
HRESULT result)
void MethodContext::recGetPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd,
ICorJitInfo::PgoInstrumentationSchema** pSchema,
UINT32* pCountSchemaItems,
BYTE** pInstrumentationData,
HRESULT result)
{
if (GetMethodBlockCounts == nullptr)
GetMethodBlockCounts = new LightWeightMap<DWORDLONG, Agnostic_GetMethodBlockCounts>();
if (GetPgoInstrumentationResults == nullptr)
GetPgoInstrumentationResults = new LightWeightMap<DWORDLONG, Agnostic_GetPgoInstrumentationResults>();

Agnostic_GetPgoInstrumentationResults value;

Agnostic_GetMethodBlockCounts value;
value.schemaCount = *pCountSchemaItems;

value.count = (DWORD)*pCount;
value.pBlockCounts_index =
GetMethodBlockCounts->AddBuffer((unsigned char*)*pBlockCounts, sizeof(ICorJitInfo::BlockCounts) * (*pCount));
value.numRuns = (DWORD)*pNumRuns;
Agnostic_PgoInstrumentationSchema* agnosticSchema = (Agnostic_PgoInstrumentationSchema*)malloc(sizeof(Agnostic_PgoInstrumentationSchema) * (*pCountSchemaItems));
size_t maxOffset = 0;
for (UINT32 i = 0; i < (*pCountSchemaItems); i++)
{
if ((*pSchema)[i].Offset > maxOffset)
maxOffset = (*pSchema)[i].Offset;
agnosticSchema[i].Offset = (*pSchema)[i].Offset;
agnosticSchema[i].InstrumentationKind = (*pSchema)[i].InstrumentationKind;
agnosticSchema[i].ILOffset = (*pSchema)[i].ILOffset;
agnosticSchema[i].Count = (*pSchema)[i].Count;
agnosticSchema[i].Other = (*pSchema)[i].Other;
}
value.schema_index = GetPgoInstrumentationResults->AddBuffer((unsigned char*)agnosticSchema, sizeof(Agnostic_PgoInstrumentationSchema) * (*pCountSchemaItems));
free(agnosticSchema);

// This isn't strictly accurate, but I think it'll do
size_t bufSize = maxOffset + 16;

value.data_index = GetPgoInstrumentationResults->AddBuffer((unsigned char*)*pInstrumentationData, (unsigned)bufSize);
value.dataByteCount = (unsigned)bufSize;
value.result = (DWORD)result;

GetMethodBlockCounts->Add(CastHandle(ftnHnd), value);
GetPgoInstrumentationResults->Add(CastHandle(ftnHnd), value);
}
void MethodContext::dmpGetMethodBlockCounts(DWORDLONG key, const Agnostic_GetMethodBlockCounts& value)
void MethodContext::dmpGetPgoInstrumentationResults(DWORDLONG key, const Agnostic_GetPgoInstrumentationResults& value)
{
printf("GetMethodBlockCounts key ftn-%016llX, value cnt-%u profileBuf-", key, value.count);
ICorJitInfo::BlockCounts* pBuf =
(ICorJitInfo::BlockCounts*)GetMethodBlockCounts->GetBuffer(value.pBlockCounts_index);
for (DWORD i = 0; i < value.count; i++, pBuf++)
printf("GetMethodBlockCounts key ftn-%016llX, value schemaCnt-%u profileBufSize-%u", key, value.schemaCount, value.dataByteCount);
Agnostic_PgoInstrumentationSchema* pBuf =
(Agnostic_PgoInstrumentationSchema*)GetPgoInstrumentationResults->GetBuffer(value.schema_index);

for (UINT32 i = 0; i < value.schemaCount; i++)
{
printf("{il-%u,cnt-%u}", pBuf->ILOffset, pBuf->ExecutionCount);
printf(" Offset %016llX ILOffset %u Kind %u Count %u Other %u\n", pBuf[i].Offset, pBuf[i].ILOffset, pBuf[i].InstrumentationKind, pBuf[i].Count, pBuf[i].Other);
}
GetMethodBlockCounts->Unlock();
printf(" numRuns-%u result-%u", value.numRuns, value.result);

// TODO, dump actual count data
}
HRESULT MethodContext::repGetMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd,
UINT32 * pCount,
ICorJitInfo::BlockCounts** pBlockCounts,
UINT32 * pNumRuns)
DWORD MethodContext::repGetPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd,
ICorJitInfo::PgoInstrumentationSchema** pSchema,
UINT32* pCountSchemaItems,
BYTE** pInstrumentationData)
{
Agnostic_GetMethodBlockCounts tempValue;
Agnostic_GetPgoInstrumentationResults tempValue;

tempValue = GetMethodBlockCounts->Get(CastHandle(ftnHnd));
tempValue = GetPgoInstrumentationResults->Get(CastHandle(ftnHnd));

*pCountSchemaItems = (UINT32)tempValue.schemaCount;
*pInstrumentationData = (BYTE*)GetPgoInstrumentationResults->GetBuffer(tempValue.data_index);

*pSchema = (ICorJitInfo::PgoInstrumentationSchema*)AllocJitTempBuffer(tempValue.schemaCount * sizeof(ICorJitInfo::PgoInstrumentationSchema));

Agnostic_PgoInstrumentationSchema* pAgnosticSchema = (Agnostic_PgoInstrumentationSchema*)GetPgoInstrumentationResults->GetBuffer(tempValue.schema_index);
for (UINT32 iSchema = 0; iSchema < tempValue.schemaCount; iSchema++)
{
(*pSchema)[iSchema].Offset = (size_t)pAgnosticSchema[iSchema].Offset;
(*pSchema)[iSchema].ILOffset = pAgnosticSchema[iSchema].ILOffset;
(*pSchema)[iSchema].InstrumentationKind = pAgnosticSchema[iSchema].InstrumentationKind;
(*pSchema)[iSchema].Count = pAgnosticSchema[iSchema].Count;
(*pSchema)[iSchema].Other = pAgnosticSchema[iSchema].Other;
}

*pCount = (UINT32)tempValue.count;
*pBlockCounts = (ICorJitInfo::BlockCounts*)GetMethodBlockCounts->GetBuffer(tempValue.pBlockCounts_index);
*pNumRuns = (UINT32)tempValue.numRuns;
HRESULT result = (HRESULT)tempValue.result;
return result;
}
Expand Down
52 changes: 36 additions & 16 deletions src/coreclr/ToolBox/superpmi/superpmi-shared/methodcontext.h
Original file line number Diff line number Diff line change
Expand Up @@ -635,20 +635,13 @@ class MethodContext
void dmpGetFieldThreadLocalStoreID(DWORDLONG key, DLD value);
DWORD repGetFieldThreadLocalStoreID(CORINFO_FIELD_HANDLE field, void** ppIndirection);

void recAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts, HRESULT result);
void dmpAllocMethodBlockCounts(DWORD key, const Agnostic_AllocMethodBlockCounts& value);
HRESULT repAllocMethodBlockCounts(ULONG count, ICorJitInfo::BlockCounts** pBlockCounts);

void recGetMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd,
UINT32 * pCount,
ICorJitInfo::BlockCounts** pBlockCounts,
UINT32 * pNumRuns,
HRESULT result);
void dmpGetMethodBlockCounts(DWORDLONG key, const Agnostic_GetMethodBlockCounts& value);
HRESULT repGetMethodBlockCounts(CORINFO_METHOD_HANDLE ftnHnd,
UINT32 * pCount,
ICorJitInfo::BlockCounts** pBlockCounts,
UINT32 * pNumRuns);
void recAllocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, BYTE** pInstrumentationData, HRESULT result);
void dmpAllocPgoInstrumentationBySchema(DWORDLONG key, const Agnostic_AllocPgoInstrumentationBySchema& value);
DWORD repAllocPgoInstrumentationBySchema(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema* pSchema, UINT32 countSchemaItems, BYTE** pInstrumentationData);

void recGetPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema** pSchema, UINT32* pCountSchemaItems, BYTE** pInstrumentationData, HRESULT result);
void dmpGetPgoInstrumentationResults(DWORDLONG key, const Agnostic_GetPgoInstrumentationResults& value);
DWORD repGetPgoInstrumentationResults(CORINFO_METHOD_HANDLE ftnHnd, ICorJitInfo::PgoInstrumentationSchema** pSchema, UINT32* pCountSchemaItems, BYTE** pInstrumentationData);

void recGetLikelyClass(CORINFO_METHOD_HANDLE ftnHnd, CORINFO_CLASS_HANDLE baseHnd, UINT32 ilOffset, CORINFO_CLASS_HANDLE classHnd, UINT32* pLikelihood, UINT32* pNumberOfClasses);
void dmpGetLikelyClass(const Agnostic_GetLikelyClass& key, const Agnostic_GetLikelyClassResult& value);
Expand Down Expand Up @@ -823,14 +816,39 @@ class MethodContext

// MD5 hasher
static Hash m_hash;

// Scheme for jit time temporary allocations
struct DeletionNode
{
DeletionNode* pNext;
};
DeletionNode *nodesToDelete = nullptr;

void* AllocJitTempBuffer(size_t size)
{
DeletionNode *pDeletionNode = (DeletionNode *)malloc(sizeof(DeletionNode) + size);
pDeletionNode = this->nodesToDelete;
this->nodesToDelete = pDeletionNode;
return pDeletionNode + 1;
}

void FreeTempAllocations()
{
while (nodesToDelete != nullptr)
{
DeletionNode *next = nodesToDelete->pNext;
free(nodesToDelete);
nodesToDelete = next;
}
}
};

// ********************* Please keep this up-to-date to ease adding more ***************
// Highest packet number: 185
// Highest packet number: 187
// *************************************************************************************
enum mcPackets
{
Packet_AllocMethodBlockCounts = 131,
Packet_AllocMethodBlockCounts = 131, // retired 1/4/2021
Packet_AppendClassName = 149, // Added 8/6/2014 - needed for SIMD
Packet_AreTypesEquivalent = 1,
Packet_AsCorInfoType = 2,
Expand Down Expand Up @@ -988,6 +1006,8 @@ enum mcPackets
Packet_SatisfiesMethodConstraints = 111,
Packet_ShouldEnforceCallvirtRestriction = 112, // Retired 2/18/2020
Packet_SigInstHandleMap = 184,
Packet_AllocPgoInstrumentationBySchema = 186, // Added 1/4/2021
Packet_GetPgoInstrumentationResults = 187, // Added 1/4/2021

PacketCR_AddressMap = 113,
PacketCR_AllocGCInfo = 114,
Expand Down
Loading