Skip to content

Commit

Permalink
Reduce file size in non-native mode by specializing native allocations
Browse files Browse the repository at this point in the history
We are currently spending a lot of space in the result file in
non-native mode by writing the native frame id, which is always 0. To
improve the situation, specialize the native allocation record so the
regular one doesn't need to have this field.

Signed-off-by: Pablo Galindo <pablogsal@gmail.com>
  • Loading branch information
pablogsal authored and godlygeek committed Apr 25, 2022
1 parent 8c2f23b commit 4c460ae
Show file tree
Hide file tree
Showing 7 changed files with 127 additions and 52 deletions.
73 changes: 64 additions & 9 deletions src/memray/_memray/record_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ bool
RecordReader::parseNativeFrameIndex()
{
UnresolvedNativeFrame frame{};
if (!d_input->read(reinterpret_cast<char*>(&frame), sizeof(UnresolvedNativeFrame))) {
if (!d_input->read(reinterpret_cast<char*>(&frame), sizeof(frame))) {
return false;
}
std::lock_guard<std::mutex> lock(d_mutex);
Expand All @@ -180,14 +180,37 @@ RecordReader::parseNativeFrameIndex()
bool
RecordReader::parseAllocationRecord()
{
if (!d_input->read(
reinterpret_cast<char*>(&d_latest_allocation.record),
sizeof(d_latest_allocation.record)))
{
AllocationRecord record;
if (!d_input->read(reinterpret_cast<char*>(&record), sizeof(record))) {
return false;
}

auto& stack = d_stack_traces[d_latest_allocation.record.tid];
auto& stack = d_stack_traces[record.tid];
d_latest_allocation.tid = record.tid;
d_latest_allocation.address = record.address;
d_latest_allocation.size = record.size;
d_latest_allocation.allocator = record.allocator;
d_latest_allocation.native_frame_id = 0;
d_latest_allocation.frame_index = stack.empty() ? 0 : stack.back();
d_latest_allocation.native_segment_generation = 0;
d_latest_allocation.n_allocations = 1;
return true;
}

bool
RecordReader::parseNativeAllocationRecord()
{
NativeAllocationRecord record;
if (!d_input->read(reinterpret_cast<char*>(&record), sizeof(record))) {
return false;
}

auto& stack = d_stack_traces[record.tid];
d_latest_allocation.tid = record.tid;
d_latest_allocation.address = record.address;
d_latest_allocation.size = record.size;
d_latest_allocation.allocator = record.allocator;
d_latest_allocation.native_frame_id = record.native_frame_id;
d_latest_allocation.frame_index = stack.empty() ? 0 : stack.back();
d_latest_allocation.native_segment_generation = d_symbol_resolver.currentSegmentGeneration();
d_latest_allocation.n_allocations = 1;
Expand Down Expand Up @@ -277,6 +300,15 @@ RecordReader::nextRecord()
}
return RecordResult::ALLOCATION_RECORD;
}
case RecordType::ALLOCATION_WITH_NATIVE: {
if (!parseNativeAllocationRecord()) {
if (d_input->is_open()) {
LOG(ERROR) << "Failed to parse allocation record with native info";
}
return RecordResult::ERROR;
}
return RecordResult::ALLOCATION_RECORD;
}
case RecordType::MEMORY_RECORD: {
if (!parseMemoryRecord()) {
if (d_input->is_open()) LOG(ERROR) << "Failed to parse memory record";
Expand Down Expand Up @@ -479,10 +511,10 @@ RecordReader::dumpAllRecords()
case RecordType::UNINITIALIZED: {
// Skip it. All remaining bytes should be 0.
} break;
case RecordType::ALLOCATION: {
printf("ALLOCATION ");
case RecordType::ALLOCATION_WITH_NATIVE: {
printf("ALLOCATION_WITH_NATIVE ");

AllocationRecord record;
NativeAllocationRecord record;
if (!d_input->read(reinterpret_cast<char*>(&record), sizeof(record))) {
Py_RETURN_NONE;
}
Expand All @@ -503,6 +535,29 @@ RecordReader::dumpAllRecords()
allocator,
record.native_frame_id);
} break;

case RecordType::ALLOCATION: {
printf("ALLOCATION ");

AllocationRecord record;
if (!d_input->read(reinterpret_cast<char*>(&record), sizeof(record))) {
Py_RETURN_NONE;
}

const char* allocator = allocatorName(record.allocator);

std::string unknownAllocator;
if (!allocator) {
unknownAllocator =
"<unknown allocator " + std::to_string((int)record.allocator) + ">";
allocator = unknownAllocator.c_str();
}
printf("tid=%lu address=%p size=%zd allocator=%s\n",
record.tid,
(void*)record.address,
record.size,
allocator);
} break;
case RecordType::FRAME_PUSH: {
printf("FRAME_PUSH ");

Expand Down
1 change: 1 addition & 0 deletions src/memray/_memray/record_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ class RecordReader
[[nodiscard]] bool parseFrameIndex();
[[nodiscard]] bool parseNativeFrameIndex();
[[nodiscard]] bool parseAllocationRecord();
[[nodiscard]] bool parseNativeAllocationRecord();
[[nodiscard]] bool parseSegmentHeader();
[[nodiscard]] bool parseSegment(Segment& segment);
[[nodiscard]] bool parseThreadRecord();
Expand Down
10 changes: 5 additions & 5 deletions src/memray/_memray/records.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,16 @@ Allocation::toPythonObject() const
return nullptr; \
} \
} while (0)
PyObject* elem = PyLong_FromLong(record.tid);
PyObject* elem = PyLong_FromLong(tid);
__CHECK_ERROR(elem);
PyTuple_SET_ITEM(tuple, 0, elem);
elem = PyLong_FromUnsignedLong(record.address);
elem = PyLong_FromUnsignedLong(address);
__CHECK_ERROR(elem);
PyTuple_SET_ITEM(tuple, 1, elem);
elem = PyLong_FromSize_t(record.size);
elem = PyLong_FromSize_t(size);
__CHECK_ERROR(elem);
PyTuple_SET_ITEM(tuple, 2, elem);
elem = PyLong_FromLong(static_cast<int>(record.allocator));
elem = PyLong_FromLong(static_cast<int>(allocator));
__CHECK_ERROR(elem);
PyTuple_SET_ITEM(tuple, 3, elem);
elem = PyLong_FromSize_t(frame_index);
Expand All @@ -41,7 +41,7 @@ Allocation::toPythonObject() const
elem = PyLong_FromSize_t(n_allocations);
__CHECK_ERROR(elem);
PyTuple_SET_ITEM(tuple, 5, elem);
elem = PyLong_FromSize_t(record.native_frame_id);
elem = PyLong_FromSize_t(native_frame_id);
__CHECK_ERROR(elem);
PyTuple_SET_ITEM(tuple, 6, elem);
elem = PyLong_FromSize_t(native_segment_generation);
Expand Down
35 changes: 24 additions & 11 deletions src/memray/_memray/records.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
namespace memray::tracking_api {

const char MAGIC[] = "memray";
const int CURRENT_HEADER_VERSION = 6;
const int CURRENT_HEADER_VERSION = 7;

using frame_id_t = size_t;
using thread_id_t = unsigned long;
Expand All @@ -23,15 +23,16 @@ using millis_t = long long;
enum class RecordType {
UNINITIALIZED = 0,
ALLOCATION = 1,
FRAME_INDEX = 2,
FRAME_PUSH = 3,
NATIVE_TRACE_INDEX = 4,
MEMORY_MAP_START = 5,
SEGMENT_HEADER = 6,
SEGMENT = 7,
FRAME_POP = 8,
THREAD_RECORD = 9,
MEMORY_RECORD = 10,
ALLOCATION_WITH_NATIVE = 2,
FRAME_INDEX = 3,
FRAME_PUSH = 4,
NATIVE_TRACE_INDEX = 5,
MEMORY_MAP_START = 6,
SEGMENT_HEADER = 7,
SEGMENT = 8,
FRAME_POP = 9,
THREAD_RECORD = 10,
MEMORY_RECORD = 11,
};

struct TrackerStats
Expand Down Expand Up @@ -67,6 +68,14 @@ struct MemoryRecord
};

struct AllocationRecord
{
thread_id_t tid;
uintptr_t address;
size_t size;
hooks::Allocator allocator;
};

struct NativeAllocationRecord
{
thread_id_t tid;
uintptr_t address;
Expand All @@ -77,7 +86,11 @@ struct AllocationRecord

struct Allocation
{
tracking_api::AllocationRecord record;
thread_id_t tid;
uintptr_t address;
size_t size;
hooks::Allocator allocator;
frame_id_t native_frame_id{0};
size_t frame_index{0};
size_t native_segment_generation{0};
size_t n_allocations{1};
Expand Down
38 changes: 18 additions & 20 deletions src/memray/_memray/snapshot.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,26 +53,24 @@ Interval::rightIntersects(const Interval& other) const
void
SnapshotAllocationAggregator::addAllocation(const Allocation& allocation)
{
switch (hooks::allocatorKind(allocation.record.allocator)) {
switch (hooks::allocatorKind(allocation.allocator)) {
case hooks::AllocatorKind::SIMPLE_ALLOCATOR: {
d_ptr_to_allocation[allocation.record.address] = allocation;
d_ptr_to_allocation[allocation.address] = allocation;
break;
}
case hooks::AllocatorKind::SIMPLE_DEALLOCATOR: {
auto it = d_ptr_to_allocation.find(allocation.record.address);
auto it = d_ptr_to_allocation.find(allocation.address);
if (it != d_ptr_to_allocation.end()) {
d_ptr_to_allocation.erase(it);
}
break;
}
case hooks::AllocatorKind::RANGED_ALLOCATOR: {
auto& record = allocation.record;
d_interval_tree.addInterval(record.address, record.size, allocation);
d_interval_tree.addInterval(allocation.address, allocation.size, allocation);
break;
}
case hooks::AllocatorKind::RANGED_DEALLOCATOR: {
auto& record = allocation.record;
d_interval_tree.removeInterval(record.address, record.size);
d_interval_tree.removeInterval(allocation.address, allocation.size);
break;
}
}
Expand All @@ -86,14 +84,14 @@ SnapshotAllocationAggregator::getSnapshotAllocations(bool merge_threads)

for (const auto& it : d_ptr_to_allocation) {
const Allocation& record = it.second;
const thread_id_t thread_id = merge_threads ? NO_THREAD_INFO : record.record.tid;
const thread_id_t thread_id = merge_threads ? NO_THREAD_INFO : record.tid;
auto alloc_it = stack_to_allocation.find(std::pair(record.frame_index, thread_id));
if (alloc_it == stack_to_allocation.end()) {
stack_to_allocation.insert(
alloc_it,
std::pair(std::pair(record.frame_index, thread_id), record));
} else {
alloc_it->second.record.size += record.record.size;
alloc_it->second.size += record.size;
alloc_it->second.n_allocations += 1;
}
}
Expand All @@ -102,16 +100,16 @@ SnapshotAllocationAggregator::getSnapshotAllocations(bool merge_threads)
// we update the allocation to reflect the actual size at the peak, based on the lengths
// of the ranges in the interval tree.
for (const auto& [range, allocation] : d_interval_tree) {
const thread_id_t thread_id = merge_threads ? NO_THREAD_INFO : allocation.record.tid;
const thread_id_t thread_id = merge_threads ? NO_THREAD_INFO : allocation.tid;
auto alloc_it = stack_to_allocation.find(std::pair(allocation.frame_index, thread_id));
if (alloc_it == stack_to_allocation.end()) {
Allocation new_alloc = allocation;
new_alloc.record.size = range.size();
new_alloc.size = range.size();
stack_to_allocation.insert(
alloc_it,
std::pair(std::pair(allocation.frame_index, thread_id), new_alloc));
} else {
alloc_it->second.record.size += range.size();
alloc_it->second.size += range.size();
alloc_it->second.n_allocations += 1;
}
}
Expand Down Expand Up @@ -156,30 +154,30 @@ void
HighWatermarkFinder::processAllocation(const Allocation& allocation)
{
size_t index = d_allocations_seen++;
switch (hooks::allocatorKind(allocation.record.allocator)) {
switch (hooks::allocatorKind(allocation.allocator)) {
case hooks::AllocatorKind::SIMPLE_ALLOCATOR: {
d_current_memory += allocation.record.size;
d_current_memory += allocation.size;
updatePeak(index);
d_ptr_to_allocation_size[allocation.record.address] = allocation.record.size;
d_ptr_to_allocation_size[allocation.address] = allocation.size;
break;
}
case hooks::AllocatorKind::SIMPLE_DEALLOCATOR: {
auto it = d_ptr_to_allocation_size.find(allocation.record.address);
auto it = d_ptr_to_allocation_size.find(allocation.address);
if (it != d_ptr_to_allocation_size.end()) {
d_current_memory -= it->second;
d_ptr_to_allocation_size.erase(it);
}
break;
}
case hooks::AllocatorKind::RANGED_ALLOCATOR: {
d_mmap_intervals.addInterval(allocation.record.address, allocation.record.size, allocation);
d_current_memory += allocation.record.size;
d_mmap_intervals.addInterval(allocation.address, allocation.size, allocation);
d_current_memory += allocation.size;
updatePeak(index);
break;
}
case hooks::AllocatorKind::RANGED_DEALLOCATOR: {
const auto address = allocation.record.address;
const auto size = allocation.record.size;
const auto address = allocation.address;
const auto size = allocation.size;
const auto removed = d_mmap_intervals.removeInterval(address, size);

if (!removed.has_value()) {
Expand Down
21 changes: 14 additions & 7 deletions src/memray/_memray/tracking_api.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -475,9 +475,9 @@ Tracker::trackAllocationImpl(void* ptr, size_t size, hooks::Allocator func)
python_stack_tracker.emitPendingPops();
python_stack_tracker.emitPendingPushes();

size_t native_index = 0;
if (d_unwind_native_frames) {
NativeTrace trace;
frame_id_t native_index = 0;
// Skip the internal frames so we don't need to filter them later.
if (trace.fill(2)) {
native_index = d_native_trace_tree.getTraceIndex(trace, [&](frame_id_t ip, uint32_t index) {
Expand All @@ -486,12 +486,19 @@ Tracker::trackAllocationImpl(void* ptr, size_t size, hooks::Allocator func)
UnresolvedNativeFrame{ip, index});
});
}
}
NativeAllocationRecord
record{thread_id(), reinterpret_cast<uintptr_t>(ptr), size, func, native_index};
if (!d_writer->writeRecord(RecordType::ALLOCATION_WITH_NATIVE, record)) {
std::cerr << "Failed to write output, deactivating tracking" << std::endl;
deactivate();
}

AllocationRecord record{thread_id(), reinterpret_cast<uintptr_t>(ptr), size, func, native_index};
if (!d_writer->writeRecord(RecordType::ALLOCATION, record)) {
std::cerr << "Failed to write output, deactivating tracking" << std::endl;
deactivate();
} else {
AllocationRecord record{thread_id(), reinterpret_cast<uintptr_t>(ptr), size, func};
if (!d_writer->writeRecord(RecordType::ALLOCATION, record)) {
std::cerr << "Failed to write output, deactivating tracking" << std::endl;
deactivate();
}
}
}

Expand All @@ -511,7 +518,7 @@ Tracker::trackDeallocationImpl(void* ptr, size_t size, hooks::Allocator func)
python_stack_tracker.emitPendingPops();
python_stack_tracker.emitPendingPushes();

AllocationRecord record{thread_id(), reinterpret_cast<uintptr_t>(ptr), size, func, 0};
AllocationRecord record{thread_id(), reinterpret_cast<uintptr_t>(ptr), size, func};
if (!d_writer->writeRecord(RecordType::ALLOCATION, record)) {
std::cerr << "Failed to write output, deactivating tracking" << std::endl;
deactivate();
Expand Down
1 change: 1 addition & 0 deletions tests/integration/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ def test_successful_parse(self, tmp_path):
# GIVEN
record_types = [
"ALLOCATION",
"ALLOCATION_WITH_NATIVE",
"FRAME_PUSH",
"FRAME_POP",
"FRAME_ID",
Expand Down

0 comments on commit 4c460ae

Please sign in to comment.