Skip to content

Commit

Permalink
[Courgette] Add and use AddressTranslator::PointerToTargetRVA(); Upda…
Browse files Browse the repository at this point in the history
…te comments.

Addresses in Courgette (abs32 and rel32) are represented in these forms:
 (1) Location RVA.
 (2) Location FileOffset.
 (3) Pointer in image.
 (4) Target VA.
 (5) Target RVA.
We already have (1) -> (2), (2) -> (1), (2) -> (3), (1) -> (3) for
existing usage. Now we add (3) -> (5) and refactor accordingly (with
helpers to do (4) -> (5) for PE files). PointerToTargetRVA() will
be used again we apply LabelManager to save 25% peak RAM.

Review URL: https://codereview.chromium.org/1807293003

Cr-Commit-Position: refs/heads/master@{#382920}
  • Loading branch information
samuelhuang authored and Commit bot committed Mar 23, 2016
1 parent 880fae5 commit f940a8c
Show file tree
Hide file tree
Showing 8 changed files with 103 additions and 56 deletions.
1 change: 1 addition & 0 deletions courgette/disassembler.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class Disassembler : public AddressTranslator {
virtual FileOffset RVAToFileOffset(RVA rva) const override = 0;
const uint8_t* FileOffsetToPointer(FileOffset file_offset) const override;
const uint8_t* RVAToPointer(RVA rva) const override;
RVA PointerToTargetRVA(const uint8_t* p) const = 0;

virtual ExecutableType kind() const = 0;

Expand Down
9 changes: 7 additions & 2 deletions courgette/disassembler_elf_32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ FileOffset DisassemblerElf32::RVAToFileOffset(RVA rva) const {
return kNoFileOffset;
}

RVA DisassemblerElf32::PointerToTargetRVA(const uint8_t* p) const {
// TODO(huangs): Add check (e.g., IsValidTargetRVA(), but more efficient).
return Read32LittleEndian(p);
}

bool DisassemblerElf32::ParseHeader() {
if (length() < sizeof(Elf32_Ehdr))
return Bad("Too small");
Expand Down Expand Up @@ -350,8 +355,8 @@ CheckBool DisassemblerElf32::ParseProgbitsSection(

if (*current_abs_offset != end_abs_offset &&
file_offset == **current_abs_offset) {
const uint8_t* p = FileOffsetToPointer(file_offset);
RVA target_rva = Read32LittleEndian(p);
RVA target_rva = PointerToTargetRVA(FileOffsetToPointer(file_offset));
DCHECK_NE(kNoRVA, target_rva);

if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)))
return false;
Expand Down
2 changes: 1 addition & 1 deletion courgette/disassembler_elf_32.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ class DisassemblerElf32 : public Disassembler {
// Disassembler interfaces.
RVA FileOffsetToRVA(FileOffset file_offset) const override;
FileOffset RVAToFileOffset(RVA rva) const override;
RVA PointerToTargetRVA(const uint8_t* p) const override;
virtual ExecutableType kind() const override = 0;
bool ParseHeader() override;
bool Disassemble(AssemblyProgram* target) override;
Expand All @@ -90,7 +91,6 @@ class DisassemblerElf32 : public Disassembler {
ScopedVector<TypedRVA> &Rel32Locations() { return rel32_locations_; }

protected:

bool UpdateLength();

// Misc Section Helpers
Expand Down
45 changes: 27 additions & 18 deletions courgette/disassembler_win32_x64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,21 @@ DisassemblerWin32X64::DisassemblerWin32X64(const void* start, size_t length)
number_of_data_directories_(0) {
}

RVA DisassemblerWin32X64::FileOffsetToRVA(FileOffset file_offset) const {
for (int i = 0; i < number_of_sections_; ++i) {
const Section* section = &sections_[i];
if (file_offset >= section->file_offset_of_raw_data) {
FileOffset offset_in_section =
file_offset - section->file_offset_of_raw_data;
if (offset_in_section < section->size_of_raw_data)
return static_cast<RVA>(section->virtual_address + offset_in_section);
}
}

NOTREACHED();
return kNoRVA;
}

FileOffset DisassemblerWin32X64::RVAToFileOffset(RVA rva) const {
const Section* section = RVAToSection(rva);
if (section != nullptr) {
Expand All @@ -65,19 +80,8 @@ FileOffset DisassemblerWin32X64::RVAToFileOffset(RVA rva) const {
return kNoFileOffset;
}

RVA DisassemblerWin32X64::FileOffsetToRVA(FileOffset file_offset) const {
for (int i = 0; i < number_of_sections_; ++i) {
const Section* section = &sections_[i];
if (file_offset >= section->file_offset_of_raw_data) {
FileOffset offset_in_section =
file_offset - section->file_offset_of_raw_data;
if (offset_in_section < section->size_of_raw_data)
return static_cast<RVA>(section->virtual_address + offset_in_section);
}
}

NOTREACHED();
return kNoRVA;
RVA DisassemblerWin32X64::PointerToTargetRVA(const uint8_t* p) const {
return Address64ToRVA(Read64LittleEndian(p));
}

// ParseHeader attempts to match up the buffer with the Windows data
Expand Down Expand Up @@ -338,6 +342,12 @@ const Section* DisassemblerWin32X64::RVAToSection(RVA rva) const {
return nullptr;
}

RVA DisassemblerWin32X64::Address64ToRVA(uint64_t address) const {
if (address < image_base() || address >= image_base() + size_of_image_)
return kNoRVA;
return base::checked_cast<RVA>(address - image_base());
}

std::string DisassemblerWin32X64::SectionName(const Section* section) {
if (section == nullptr)
return "<none>";
Expand Down Expand Up @@ -389,9 +399,8 @@ bool DisassemblerWin32X64::ParseAbs32Relocs() {
#if COURGETTE_HISTOGRAM_TARGETS
for (size_t i = 0; i < abs32_locations_.size(); ++i) {
RVA rva = abs32_locations_[i];
// The 4 bytes at the relocation are a reference to some address.
uint32_t target_address = Read32LittleEndian(RVAToPointer(rva));
++abs32_target_rvas_[target_address - image_base()];
// The 8 bytes at the relocation are a reference to some address.
++abs32_target_rvas_[PointerToTargetRVA(RVAToPointer(rva))];
}
#endif
return true;
Expand Down Expand Up @@ -601,8 +610,8 @@ CheckBool DisassemblerWin32X64::ParseFileRegion(const Section* section,
++abs32_pos;

if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) {
uint64_t target_address = Read64LittleEndian(p);
RVA target_rva = base::checked_cast<RVA>(target_address - image_base());
RVA target_rva = PointerToTargetRVA(p);
DCHECK_NE(kNoRVA, target_rva);
// TODO(sra): target could be Label+offset. It is not clear how to guess
// which it might be. We assume offset==0.
if (!program->EmitAbs64(program->FindOrMakeAbs32Label(target_rva)))
Expand Down
5 changes: 5 additions & 0 deletions courgette/disassembler_win32_x64.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class DisassemblerWin32X64 : public Disassembler {
// Disassembler interfaces.
RVA FileOffsetToRVA(FileOffset file_offset) const override;
FileOffset RVAToFileOffset(RVA rva) const override;
RVA PointerToTargetRVA(const uint8_t* p) const override;
ExecutableType kind() const override { return EXE_WIN_32_X64; }
bool ParseHeader() override;
bool Disassemble(AssemblyProgram* target) override;
Expand All @@ -46,6 +47,10 @@ class DisassemblerWin32X64 : public Disassembler {
// Returns Section containing the relative virtual address, or null if none.
const Section* RVAToSection(RVA rva) const;

// (4) -> (5) (see AddressTranslator comment): Returns the RVA of the VA
// specified by |address|, or kNoRVA if |address| lies outside of the image.
RVA Address64ToRVA(uint64_t address) const;

static std::string SectionName(const Section* section);

protected:
Expand Down
48 changes: 28 additions & 20 deletions courgette/disassembler_win32_x86.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,21 @@ DisassemblerWin32X86::DisassemblerWin32X86(const void* start, size_t length)
number_of_data_directories_(0) {
}

RVA DisassemblerWin32X86::FileOffsetToRVA(FileOffset file_offset) const {
for (int i = 0; i < number_of_sections_; ++i) {
const Section* section = &sections_[i];
if (file_offset >= section->file_offset_of_raw_data) {
FileOffset offset_in_section =
file_offset - section->file_offset_of_raw_data;
if (offset_in_section < section->size_of_raw_data)
return static_cast<RVA>(section->virtual_address + offset_in_section);
}
}

NOTREACHED();
return kNoRVA;
}

FileOffset DisassemblerWin32X86::RVAToFileOffset(RVA rva) const {
const Section* section = RVAToSection(rva);
if (section != nullptr) {
Expand All @@ -65,19 +80,8 @@ FileOffset DisassemblerWin32X86::RVAToFileOffset(RVA rva) const {
return kNoFileOffset;
}

RVA DisassemblerWin32X86::FileOffsetToRVA(FileOffset file_offset) const {
for (int i = 0; i < number_of_sections_; ++i) {
const Section* section = &sections_[i];
if (file_offset >= section->file_offset_of_raw_data) {
FileOffset offset_in_section =
file_offset - section->file_offset_of_raw_data;
if (offset_in_section < section->size_of_raw_data)
return static_cast<RVA>(section->virtual_address + offset_in_section);
}
}

NOTREACHED();
return kNoRVA;
RVA DisassemblerWin32X86::PointerToTargetRVA(const uint8_t* p) const {
return Address32ToRVA(Read32LittleEndian(p));
}

// ParseHeader attempts to match up the buffer with the Windows data
Expand Down Expand Up @@ -308,9 +312,8 @@ bool DisassemblerWin32X86::ParseRelocs(std::vector<RVA> *relocs) {
// Skip the relocs that live outside of the image. It might be the case
// if a reloc is relative to a register, e.g.:
// mov ecx,dword ptr [eax+044D5888h]
uint32_t target_address = Read32LittleEndian(RVAToPointer(rva));
if (target_address < image_base_ ||
target_address > (image_base_ + size_of_image_)) {
RVA target_rva = PointerToTargetRVA(RVAToPointer(rva));
if (target_rva == kNoRVA) {
continue;
}
if (type == 3) { // IMAGE_REL_BASED_HIGHLOW
Expand Down Expand Up @@ -344,6 +347,12 @@ const Section* DisassemblerWin32X86::RVAToSection(RVA rva) const {
return nullptr;
}

RVA DisassemblerWin32X86::Address32ToRVA(uint32_t address) const {
if (address < image_base() || address >= image_base() + size_of_image_)
return kNoRVA;
return static_cast<RVA>(address - image_base());
}

std::string DisassemblerWin32X86::SectionName(const Section* section) {
if (section == nullptr)
return "<none>";
Expand Down Expand Up @@ -396,8 +405,7 @@ bool DisassemblerWin32X86::ParseAbs32Relocs() {
for (size_t i = 0; i < abs32_locations_.size(); ++i) {
RVA rva = abs32_locations_[i];
// The 4 bytes at the relocation are a reference to some address.
uint32_t target_address = Read32LittleEndian(RVAToPointer(rva));
++abs32_target_rvas_[target_address - image_base()];
++abs32_target_rvas_[PointerToTargetRVA(RVAToPointer(rva))];
}
#endif
return true;
Expand Down Expand Up @@ -531,8 +539,8 @@ CheckBool DisassemblerWin32X86::ParseFileRegion(const Section* section,
++abs32_pos;

if (abs32_pos != abs32_locations_.end() && *abs32_pos == current_rva) {
uint32_t target_address = Read32LittleEndian(p);
RVA target_rva = target_address - image_base();
RVA target_rva = PointerToTargetRVA(p);
DCHECK_NE(kNoRVA, target_rva);
// TODO(sra): target could be Label+offset. It is not clear how to guess
// which it might be. We assume offset==0.
if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)))
Expand Down
5 changes: 5 additions & 0 deletions courgette/disassembler_win32_x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class DisassemblerWin32X86 : public Disassembler {
// Disassembler interfaces.
RVA FileOffsetToRVA(FileOffset file_offset) const override;
FileOffset RVAToFileOffset(RVA rva) const override;
RVA PointerToTargetRVA(const uint8_t* p) const override;
ExecutableType kind() const override { return EXE_WIN_32_X86; }
bool ParseHeader() override;
bool Disassemble(AssemblyProgram* target) override;
Expand All @@ -46,6 +47,10 @@ class DisassemblerWin32X86 : public Disassembler {
// Returns Section containing the relative virtual address, or null if none.
const Section* RVAToSection(RVA rva) const;

// (4) -> (5) (see AddressTranslator comment): Returns the RVA of the VA
// specified by |address|, or kNoRVA if |address| lies outside of the image.
RVA Address32ToRVA(uint32_t address) const;

static std::string SectionName(const Section* section);

protected:
Expand Down
44 changes: 29 additions & 15 deletions courgette/image_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@ namespace courgette {
// - VA (Virtual Address): Virtual memory address of a loaded image. This is
// subject to relocation by the OS.
// - RVA (Relative Virtual Address): VA relative to some base address. This is
// the preferred way to specify pointers in an image. Two ways to encode RVA
// are:
// - abs32: RVA value is encoded directly.
// - rel32: RVA is encoded as offset from an instruction address. This is
// commonly used for relative branch/call opcodes.
// Courgette operates on File Offsets and RVAs only.
// the preferred way to specify pointers in an image.
//
// In Courgette we consider two types of addresses:
// - abs32: In an image these are directly stored as VA whose locations are
// stored in the relocation table.
// - rel32: In an image these appear in branch/call opcodes, and are represented
// as offsets from an instruction address.

using RVA = uint32_t;
const RVA kUnassignedRVA = 0xFFFFFFFFU;
Expand All @@ -33,24 +34,37 @@ const RVA kNoRVA = 0xFFFFFFFFU;
using FileOffset = size_t;
const FileOffset kNoFileOffset = UINTPTR_MAX;

// An interface for {File Offset, RVA, pointer to image data} translation.
// An interface translate and read addresses. The main conversion path is:
// (1) Location RVA.
// (2) Location FileOffset.
// (3) Pointer in image.
// (4) Target VA (32-bit or 64-bit).
// (5) Target RVA (32-bit).
// For abs32, we get (1) from relocation table, and convert to (5).
// For rel32, we get (2) from scanning opcode, and convert to (1).
class AddressTranslator {
public:
// Returns the RVA corresponding to |file_offset|, or kNoRVA if nonexistent.
// (2) -> (1): Returns the RVA corresponding to |file_offset|, or kNoRVA if
// nonexistent.
virtual RVA FileOffsetToRVA(FileOffset file_offset) const = 0;

// Returns the file offset corresponding to |rva|, or kNoFileOffset if
// nonexistent.
// (1) -> (2): Returns the file offset corresponding to |rva|, or
// kNoFileOffset if nonexistent.
virtual FileOffset RVAToFileOffset(RVA rva) const = 0;

// Returns the pointer to the image data for |file_offset|. Assumes that
// 0 <= |file_offset| <= image size. If |file_offset| == image, the resulting
// pointer is an end bound for iteration that should never be dereferenced.
// (2) -> (3): Returns image data pointer correspnoding to |file_offset|.
// Assumes 0 <= |file_offset| <= image size.
// If |file_offset| == image size, then the resulting pointer is an end bound
// for iteration, and should not be dereferenced.
virtual const uint8_t* FileOffsetToPointer(FileOffset file_offset) const = 0;

// Returns the pointer to the image data for |rva|, or null if |rva| is
// invalid.
// (1) -> (3): Returns the pointer to the image data for |rva|, or null if
// |rva| is invalid.
virtual const uint8_t* RVAToPointer(RVA rva) const = 0;

// (3) -> (5): Returns the target RVA located at |p|, where |p| is a pointer
// to image data.
virtual RVA PointerToTargetRVA(const uint8_t* p) const = 0;
};

// A Label is a symbolic reference to an address. Unlike a conventional
Expand Down

0 comments on commit f940a8c

Please sign in to comment.