From 4c460ae2a0ec4bc250cba860173c12fa7ded480a Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 25 Apr 2022 00:03:13 +0100 Subject: [PATCH] Reduce file size in non-native mode by specializing native allocations We are currently spending a lot of space in the result file in non-native mode by writing the native frame id, which is always 0. To improve the situation, specialize the native allocation record so the regular one doesn't need to have this field. Signed-off-by: Pablo Galindo --- src/memray/_memray/record_reader.cpp | 73 ++++++++++++++++++++++++---- src/memray/_memray/record_reader.h | 1 + src/memray/_memray/records.cpp | 10 ++-- src/memray/_memray/records.h | 35 ++++++++----- src/memray/_memray/snapshot.cpp | 38 +++++++-------- src/memray/_memray/tracking_api.cpp | 21 +++++--- tests/integration/test_main.py | 1 + 7 files changed, 127 insertions(+), 52 deletions(-) diff --git a/src/memray/_memray/record_reader.cpp b/src/memray/_memray/record_reader.cpp index 2fc5cf483e..a724db0791 100644 --- a/src/memray/_memray/record_reader.cpp +++ b/src/memray/_memray/record_reader.cpp @@ -169,7 +169,7 @@ bool RecordReader::parseNativeFrameIndex() { UnresolvedNativeFrame frame{}; - if (!d_input->read(reinterpret_cast(&frame), sizeof(UnresolvedNativeFrame))) { + if (!d_input->read(reinterpret_cast(&frame), sizeof(frame))) { return false; } std::lock_guard lock(d_mutex); @@ -180,14 +180,37 @@ RecordReader::parseNativeFrameIndex() bool RecordReader::parseAllocationRecord() { - if (!d_input->read( - reinterpret_cast(&d_latest_allocation.record), - sizeof(d_latest_allocation.record))) - { + AllocationRecord record; + if (!d_input->read(reinterpret_cast(&record), sizeof(record))) { return false; } - auto& stack = d_stack_traces[d_latest_allocation.record.tid]; + auto& stack = d_stack_traces[record.tid]; + d_latest_allocation.tid = record.tid; + d_latest_allocation.address = record.address; + d_latest_allocation.size = record.size; + d_latest_allocation.allocator = record.allocator; + d_latest_allocation.native_frame_id = 0; + d_latest_allocation.frame_index = stack.empty() ? 0 : stack.back(); + d_latest_allocation.native_segment_generation = 0; + d_latest_allocation.n_allocations = 1; + return true; +} + +bool +RecordReader::parseNativeAllocationRecord() +{ + NativeAllocationRecord record; + if (!d_input->read(reinterpret_cast(&record), sizeof(record))) { + return false; + } + + auto& stack = d_stack_traces[record.tid]; + d_latest_allocation.tid = record.tid; + d_latest_allocation.address = record.address; + d_latest_allocation.size = record.size; + d_latest_allocation.allocator = record.allocator; + d_latest_allocation.native_frame_id = record.native_frame_id; d_latest_allocation.frame_index = stack.empty() ? 0 : stack.back(); d_latest_allocation.native_segment_generation = d_symbol_resolver.currentSegmentGeneration(); d_latest_allocation.n_allocations = 1; @@ -277,6 +300,15 @@ RecordReader::nextRecord() } return RecordResult::ALLOCATION_RECORD; } + case RecordType::ALLOCATION_WITH_NATIVE: { + if (!parseNativeAllocationRecord()) { + if (d_input->is_open()) { + LOG(ERROR) << "Failed to parse allocation record with native info"; + } + return RecordResult::ERROR; + } + return RecordResult::ALLOCATION_RECORD; + } case RecordType::MEMORY_RECORD: { if (!parseMemoryRecord()) { if (d_input->is_open()) LOG(ERROR) << "Failed to parse memory record"; @@ -479,10 +511,10 @@ RecordReader::dumpAllRecords() case RecordType::UNINITIALIZED: { // Skip it. All remaining bytes should be 0. } break; - case RecordType::ALLOCATION: { - printf("ALLOCATION "); + case RecordType::ALLOCATION_WITH_NATIVE: { + printf("ALLOCATION_WITH_NATIVE "); - AllocationRecord record; + NativeAllocationRecord record; if (!d_input->read(reinterpret_cast(&record), sizeof(record))) { Py_RETURN_NONE; } @@ -503,6 +535,29 @@ RecordReader::dumpAllRecords() allocator, record.native_frame_id); } break; + + case RecordType::ALLOCATION: { + printf("ALLOCATION "); + + AllocationRecord record; + if (!d_input->read(reinterpret_cast(&record), sizeof(record))) { + Py_RETURN_NONE; + } + + const char* allocator = allocatorName(record.allocator); + + std::string unknownAllocator; + if (!allocator) { + unknownAllocator = + ""; + allocator = unknownAllocator.c_str(); + } + printf("tid=%lu address=%p size=%zd allocator=%s\n", + record.tid, + (void*)record.address, + record.size, + allocator); + } break; case RecordType::FRAME_PUSH: { printf("FRAME_PUSH "); diff --git a/src/memray/_memray/record_reader.h b/src/memray/_memray/record_reader.h index 3fa7162f63..411ded751c 100644 --- a/src/memray/_memray/record_reader.h +++ b/src/memray/_memray/record_reader.h @@ -80,6 +80,7 @@ class RecordReader [[nodiscard]] bool parseFrameIndex(); [[nodiscard]] bool parseNativeFrameIndex(); [[nodiscard]] bool parseAllocationRecord(); + [[nodiscard]] bool parseNativeAllocationRecord(); [[nodiscard]] bool parseSegmentHeader(); [[nodiscard]] bool parseSegment(Segment& segment); [[nodiscard]] bool parseThreadRecord(); diff --git a/src/memray/_memray/records.cpp b/src/memray/_memray/records.cpp index 413291a524..7f33687d22 100644 --- a/src/memray/_memray/records.cpp +++ b/src/memray/_memray/records.cpp @@ -23,16 +23,16 @@ Allocation::toPythonObject() const return nullptr; \ } \ } while (0) - PyObject* elem = PyLong_FromLong(record.tid); + PyObject* elem = PyLong_FromLong(tid); __CHECK_ERROR(elem); PyTuple_SET_ITEM(tuple, 0, elem); - elem = PyLong_FromUnsignedLong(record.address); + elem = PyLong_FromUnsignedLong(address); __CHECK_ERROR(elem); PyTuple_SET_ITEM(tuple, 1, elem); - elem = PyLong_FromSize_t(record.size); + elem = PyLong_FromSize_t(size); __CHECK_ERROR(elem); PyTuple_SET_ITEM(tuple, 2, elem); - elem = PyLong_FromLong(static_cast(record.allocator)); + elem = PyLong_FromLong(static_cast(allocator)); __CHECK_ERROR(elem); PyTuple_SET_ITEM(tuple, 3, elem); elem = PyLong_FromSize_t(frame_index); @@ -41,7 +41,7 @@ Allocation::toPythonObject() const elem = PyLong_FromSize_t(n_allocations); __CHECK_ERROR(elem); PyTuple_SET_ITEM(tuple, 5, elem); - elem = PyLong_FromSize_t(record.native_frame_id); + elem = PyLong_FromSize_t(native_frame_id); __CHECK_ERROR(elem); PyTuple_SET_ITEM(tuple, 6, elem); elem = PyLong_FromSize_t(native_segment_generation); diff --git a/src/memray/_memray/records.h b/src/memray/_memray/records.h index d98e5cfc09..40af275b4f 100644 --- a/src/memray/_memray/records.h +++ b/src/memray/_memray/records.h @@ -14,7 +14,7 @@ namespace memray::tracking_api { const char MAGIC[] = "memray"; -const int CURRENT_HEADER_VERSION = 6; +const int CURRENT_HEADER_VERSION = 7; using frame_id_t = size_t; using thread_id_t = unsigned long; @@ -23,15 +23,16 @@ using millis_t = long long; enum class RecordType { UNINITIALIZED = 0, ALLOCATION = 1, - FRAME_INDEX = 2, - FRAME_PUSH = 3, - NATIVE_TRACE_INDEX = 4, - MEMORY_MAP_START = 5, - SEGMENT_HEADER = 6, - SEGMENT = 7, - FRAME_POP = 8, - THREAD_RECORD = 9, - MEMORY_RECORD = 10, + ALLOCATION_WITH_NATIVE = 2, + FRAME_INDEX = 3, + FRAME_PUSH = 4, + NATIVE_TRACE_INDEX = 5, + MEMORY_MAP_START = 6, + SEGMENT_HEADER = 7, + SEGMENT = 8, + FRAME_POP = 9, + THREAD_RECORD = 10, + MEMORY_RECORD = 11, }; struct TrackerStats @@ -67,6 +68,14 @@ struct MemoryRecord }; struct AllocationRecord +{ + thread_id_t tid; + uintptr_t address; + size_t size; + hooks::Allocator allocator; +}; + +struct NativeAllocationRecord { thread_id_t tid; uintptr_t address; @@ -77,7 +86,11 @@ struct AllocationRecord struct Allocation { - tracking_api::AllocationRecord record; + thread_id_t tid; + uintptr_t address; + size_t size; + hooks::Allocator allocator; + frame_id_t native_frame_id{0}; size_t frame_index{0}; size_t native_segment_generation{0}; size_t n_allocations{1}; diff --git a/src/memray/_memray/snapshot.cpp b/src/memray/_memray/snapshot.cpp index e24726a1e5..3e536de608 100644 --- a/src/memray/_memray/snapshot.cpp +++ b/src/memray/_memray/snapshot.cpp @@ -53,26 +53,24 @@ Interval::rightIntersects(const Interval& other) const void SnapshotAllocationAggregator::addAllocation(const Allocation& allocation) { - switch (hooks::allocatorKind(allocation.record.allocator)) { + switch (hooks::allocatorKind(allocation.allocator)) { case hooks::AllocatorKind::SIMPLE_ALLOCATOR: { - d_ptr_to_allocation[allocation.record.address] = allocation; + d_ptr_to_allocation[allocation.address] = allocation; break; } case hooks::AllocatorKind::SIMPLE_DEALLOCATOR: { - auto it = d_ptr_to_allocation.find(allocation.record.address); + auto it = d_ptr_to_allocation.find(allocation.address); if (it != d_ptr_to_allocation.end()) { d_ptr_to_allocation.erase(it); } break; } case hooks::AllocatorKind::RANGED_ALLOCATOR: { - auto& record = allocation.record; - d_interval_tree.addInterval(record.address, record.size, allocation); + d_interval_tree.addInterval(allocation.address, allocation.size, allocation); break; } case hooks::AllocatorKind::RANGED_DEALLOCATOR: { - auto& record = allocation.record; - d_interval_tree.removeInterval(record.address, record.size); + d_interval_tree.removeInterval(allocation.address, allocation.size); break; } } @@ -86,14 +84,14 @@ SnapshotAllocationAggregator::getSnapshotAllocations(bool merge_threads) for (const auto& it : d_ptr_to_allocation) { const Allocation& record = it.second; - const thread_id_t thread_id = merge_threads ? NO_THREAD_INFO : record.record.tid; + const thread_id_t thread_id = merge_threads ? NO_THREAD_INFO : record.tid; auto alloc_it = stack_to_allocation.find(std::pair(record.frame_index, thread_id)); if (alloc_it == stack_to_allocation.end()) { stack_to_allocation.insert( alloc_it, std::pair(std::pair(record.frame_index, thread_id), record)); } else { - alloc_it->second.record.size += record.record.size; + alloc_it->second.size += record.size; alloc_it->second.n_allocations += 1; } } @@ -102,16 +100,16 @@ SnapshotAllocationAggregator::getSnapshotAllocations(bool merge_threads) // we update the allocation to reflect the actual size at the peak, based on the lengths // of the ranges in the interval tree. for (const auto& [range, allocation] : d_interval_tree) { - const thread_id_t thread_id = merge_threads ? NO_THREAD_INFO : allocation.record.tid; + const thread_id_t thread_id = merge_threads ? NO_THREAD_INFO : allocation.tid; auto alloc_it = stack_to_allocation.find(std::pair(allocation.frame_index, thread_id)); if (alloc_it == stack_to_allocation.end()) { Allocation new_alloc = allocation; - new_alloc.record.size = range.size(); + new_alloc.size = range.size(); stack_to_allocation.insert( alloc_it, std::pair(std::pair(allocation.frame_index, thread_id), new_alloc)); } else { - alloc_it->second.record.size += range.size(); + alloc_it->second.size += range.size(); alloc_it->second.n_allocations += 1; } } @@ -156,15 +154,15 @@ void HighWatermarkFinder::processAllocation(const Allocation& allocation) { size_t index = d_allocations_seen++; - switch (hooks::allocatorKind(allocation.record.allocator)) { + switch (hooks::allocatorKind(allocation.allocator)) { case hooks::AllocatorKind::SIMPLE_ALLOCATOR: { - d_current_memory += allocation.record.size; + d_current_memory += allocation.size; updatePeak(index); - d_ptr_to_allocation_size[allocation.record.address] = allocation.record.size; + d_ptr_to_allocation_size[allocation.address] = allocation.size; break; } case hooks::AllocatorKind::SIMPLE_DEALLOCATOR: { - auto it = d_ptr_to_allocation_size.find(allocation.record.address); + auto it = d_ptr_to_allocation_size.find(allocation.address); if (it != d_ptr_to_allocation_size.end()) { d_current_memory -= it->second; d_ptr_to_allocation_size.erase(it); @@ -172,14 +170,14 @@ HighWatermarkFinder::processAllocation(const Allocation& allocation) break; } case hooks::AllocatorKind::RANGED_ALLOCATOR: { - d_mmap_intervals.addInterval(allocation.record.address, allocation.record.size, allocation); - d_current_memory += allocation.record.size; + d_mmap_intervals.addInterval(allocation.address, allocation.size, allocation); + d_current_memory += allocation.size; updatePeak(index); break; } case hooks::AllocatorKind::RANGED_DEALLOCATOR: { - const auto address = allocation.record.address; - const auto size = allocation.record.size; + const auto address = allocation.address; + const auto size = allocation.size; const auto removed = d_mmap_intervals.removeInterval(address, size); if (!removed.has_value()) { diff --git a/src/memray/_memray/tracking_api.cpp b/src/memray/_memray/tracking_api.cpp index fc5e1d0dc2..afcc10ea4d 100644 --- a/src/memray/_memray/tracking_api.cpp +++ b/src/memray/_memray/tracking_api.cpp @@ -475,9 +475,9 @@ Tracker::trackAllocationImpl(void* ptr, size_t size, hooks::Allocator func) python_stack_tracker.emitPendingPops(); python_stack_tracker.emitPendingPushes(); - size_t native_index = 0; if (d_unwind_native_frames) { NativeTrace trace; + frame_id_t native_index = 0; // Skip the internal frames so we don't need to filter them later. if (trace.fill(2)) { native_index = d_native_trace_tree.getTraceIndex(trace, [&](frame_id_t ip, uint32_t index) { @@ -486,12 +486,19 @@ Tracker::trackAllocationImpl(void* ptr, size_t size, hooks::Allocator func) UnresolvedNativeFrame{ip, index}); }); } - } + NativeAllocationRecord + record{thread_id(), reinterpret_cast(ptr), size, func, native_index}; + if (!d_writer->writeRecord(RecordType::ALLOCATION_WITH_NATIVE, record)) { + std::cerr << "Failed to write output, deactivating tracking" << std::endl; + deactivate(); + } - AllocationRecord record{thread_id(), reinterpret_cast(ptr), size, func, native_index}; - if (!d_writer->writeRecord(RecordType::ALLOCATION, record)) { - std::cerr << "Failed to write output, deactivating tracking" << std::endl; - deactivate(); + } else { + AllocationRecord record{thread_id(), reinterpret_cast(ptr), size, func}; + if (!d_writer->writeRecord(RecordType::ALLOCATION, record)) { + std::cerr << "Failed to write output, deactivating tracking" << std::endl; + deactivate(); + } } } @@ -511,7 +518,7 @@ Tracker::trackDeallocationImpl(void* ptr, size_t size, hooks::Allocator func) python_stack_tracker.emitPendingPops(); python_stack_tracker.emitPendingPushes(); - AllocationRecord record{thread_id(), reinterpret_cast(ptr), size, func, 0}; + AllocationRecord record{thread_id(), reinterpret_cast(ptr), size, func}; if (!d_writer->writeRecord(RecordType::ALLOCATION, record)) { std::cerr << "Failed to write output, deactivating tracking" << std::endl; deactivate(); diff --git a/tests/integration/test_main.py b/tests/integration/test_main.py index 9f272cd802..209a604c19 100644 --- a/tests/integration/test_main.py +++ b/tests/integration/test_main.py @@ -342,6 +342,7 @@ def test_successful_parse(self, tmp_path): # GIVEN record_types = [ "ALLOCATION", + "ALLOCATION_WITH_NATIVE", "FRAME_PUSH", "FRAME_POP", "FRAME_ID",