Skip to content

Commit

Permalink
Add stacktrace printing using llvm-dwarfdump
Browse files Browse the repository at this point in the history
We call llvm-dwarfdump at compile time in CMake, parse the output, post process
it and save it in a binary file which contains a list of addresses and the
corresponding line numbers and filename indexes. LFortran then reads this
binary file when printing stacktrace. On macOS it speeds it up over 100x.

There is no other dependency besides llvm-dwarfdump, which is part of LLVM.
  • Loading branch information
certik committed Mar 15, 2021
1 parent fa0417a commit 6ac0af0
Show file tree
Hide file tree
Showing 6 changed files with 273 additions and 17 deletions.
11 changes: 10 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ set(WITH_UNWIND no
CACHE BOOL "Build with unwind support")
set(WITH_BFD no
CACHE BOOL "Build with BFD support")
set(WITH_DWARFDUMP no
CACHE BOOL "Build with dwarfdump support")
set(WITH_LINKH no
CACHE BOOL "Build with link.h support")
set(WITH_MACHO no
Expand All @@ -162,18 +164,24 @@ set(WITH_STACKTRACE no
CACHE BOOL "Build with stacktrace support (requires binutils-dev)")
if (WITH_STACKTRACE)
set(WITH_UNWIND yes)
set(WITH_BFD yes)
if (APPLE)
set(WITH_MACHO yes)
if (NOT WITH_BFD)
set(WITH_DWARFDUMP yes)
endif()
else()
set(WITH_LINKH yes)
set(WITH_BFD yes)
endif()
set(HAVE_LFORTRAN_STACKTRACE yes)
endif()
if (WITH_BFD)
find_package(BFD REQUIRED)
set(HAVE_LFORTRAN_BFD yes)
endif()
if (WITH_DWARFDUMP)
set(HAVE_LFORTRAN_DWARFDUMP yes)
endif()
if (WITH_LINKH)
find_package(LINKH REQUIRED)
set(HAVE_LFORTRAN_LINK yes)
Expand Down Expand Up @@ -209,6 +217,7 @@ message("LFORTRAN_STATIC_BIN: ${LFORTRAN_STATIC_BIN}")
message("WITH_STACKTRACE: ${WITH_STACKTRACE}")
message("WITH_UNWIND: ${WITH_UNWIND}")
message("WITH_BFD: ${WITH_BFD}")
message("WITH_DWARFDUMP: ${WITH_DWARFDUMP}")
message("WITH_LINKH: ${WITH_LINKH}")
message("WITH_MACHO: ${WITH_MACHO}")
message("HAVE_LFORTRAN_DEMANGLE: ${HAVE_LFORTRAN_DEMANGLE}")
Expand Down
12 changes: 12 additions & 0 deletions src/bin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@ if (WITH_STACKTRACE AND APPLE AND CMAKE_CXX_COMPILER_ID MATCHES Clang)
POST_BUILD
COMMAND dsymutil lfortran
)
if (WITH_DWARFDUMP)
add_custom_command(
TARGET lfortran
POST_BUILD
COMMAND llvm-dwarfdump --debug-line lfortran.dSYM > lfortran.dSYM/raw.txt
)
add_custom_command(
TARGET lfortran
POST_BUILD
COMMAND ./dwarf_convert.py lfortran.dSYM/raw.txt lfortran.dSYM/lines.txt lfortran.dSYM/lines.dat
)
endif()
endif()

# Ensure "Release" is not appended to the path on Windows:
Expand Down
162 changes: 162 additions & 0 deletions src/bin/dwarf_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
#!/usr/bin/env python

"""
This script is used to convert the output of dwarfdump into a file that is easy
to load into lfortran to lookup filenames and line numbers for a given address.
Here is how to use it:
cd src/bin
llvm-dwarfdump --debug-line lfortran.dSYM > lfortran.dSYM/symbols.txt
./dwarf_convert.py lfortran.dSYM/symbols.txt lfortran.dSYM/lines.txt lfortran.dSYM/lines.dat
This is meant to be executed at build time.
A better solution would be to use the `dwarf` library directly from C++ and
generate the same output directly. Here is the source code of llvm-dwarfdump:
https://github.com/llvm/llvm-project/blob/91a6ad5ad887a16e361338303d4ff3d29dba5e10/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
We just have to do exactly what it does, but generate the output in the format
of lines.txt and lines.dat
"""

from collections import namedtuple
from glob import glob
import os
import re
from struct import pack
import sys

DebugLines = namedtuple("DebugLines", ["lines"])
DebugLine = namedtuple("DebugLine", ["include_directories", "file_names", "addresses"])
IncludeDirectory = namedtuple("IncludeDirectory", ["id", "path"])
FileName = namedtuple("FileName", ["id", "filename", "dir_idx"])

ASRDebugLines = namedtuple("ASRDebugLines", ["filenames", "addresses"])

class Parser:
"""
Parser for the output generated by dwarfdump.
On macOS (both Intel and ARM based):
dwarfdump --debug-line src/bin/lfortran.dSYM > symbols.txt
Then parse it using:
p = Parser()
ast = p.parse_file("symbols.txt")
"""

def parse_file(self, filename):
self.file = open(filename)
self.line = self.file.readline()
while not self.line.startswith(".debug_line contents:"):
self.line = self.file.readline()

self.line = self.file.readline()
lines = []
while self.line.startswith("debug_line"):
d = self.parse_debug_line()
lines.append(d)
return DebugLines(lines)

def parse_debug_line(self):
self.line = self.file.readline()
while not self.line.startswith("include_directories"):
self.line = self.file.readline()

include_directories = []
while self.line.startswith("include_directories"):
n, path = re.compile(r"include_directories\[[ ]*(\d+)\] = \"([^\"]+)\"").findall(self.line)[0]
n = int(n)
include_directories.append(IncludeDirectory(n, path))
self.line = self.file.readline()

file_names = []
while self.line.startswith("file_names"):
n = re.compile(r"file_names\[[ ]*(\d+)\]:").findall(self.line)[0]
n = int(n)

self.line = self.file.readline()
filename = re.compile(r"name: \"([^\"]+)\"").findall(self.line)[0]

self.line = self.file.readline()
dir_idx = re.compile(r"dir_index: (\d+)").findall(self.line)[0]
dir_idx = int(dir_idx)

self.line = self.file.readline()
self.line = self.file.readline()

file_names.append(FileName(n, filename, dir_idx))

self.line = self.file.readline()

self.line = self.file.readline()
self.line = self.file.readline()
self.line = self.file.readline()

addresses = []
while self.line.startswith("0x"):
address, line, column, file_id = self.line.split()[:4]
address = int(address, base=16)
line = int(line)
column = int(column)
file_id = int(file_id)
addresses.append([address, line, column, file_id])
self.line = self.file.readline()

self.line = self.file.readline()

d = DebugLine(include_directories, file_names, addresses)
return d

def ast_to_asr(ast):
local_files = glob("../../**/*.cpp", recursive=True) + \
glob("../../**/*.h", recursive=True)
for i in range(len(local_files)):
local_files[i] = os.path.abspath(local_files[i])
def make_abs(end_path):
if end_path[0] != "/":
for f in local_files:
if f.endswith(end_path):
return f
return end_path
lines = []
last_address = -1
global_filename_id = 0
global_filenames = []
global_addresses = []
for line in ast.lines:
include_dirs = {}
for inc in line.include_directories:
include_dirs[inc.id] = inc.path
filenames = {}
for filename in line.file_names:
prefix = ""
if filename.dir_idx != 0:
prefix = include_dirs[filename.dir_idx] + "/"
filenames[filename.id] = global_filename_id
global_filenames.append(make_abs(prefix+filename.filename))
global_filename_id += 1
for address, line_num, column, file_id in line.addresses:
filename = global_filenames[filenames[file_id]]
assert last_address <= address
last_address = address
if line_num != 0:
global_addresses.append([address, line_num, filenames[file_id]])
return ASRDebugLines(global_filenames, global_addresses)


p = Parser()
ast = p.parse_file(sys.argv[1])
asr = ast_to_asr(ast)
with open(sys.argv[2], "w") as f:
f.write(str(len(asr.filenames)) + "\n")
for filename in asr.filenames:
f.write(filename + "\n")
f.write(str(len(asr.addresses)) + "\n")
with open(sys.argv[3], "wb") as f:
for addr, line, fileid in asr.addresses:
f.write(pack("3Q", addr, line, fileid))
1 change: 1 addition & 0 deletions src/lfortran/config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
/* Define if stacktrace is enabled */
#cmakedefine HAVE_LFORTRAN_STACKTRACE
#cmakedefine HAVE_LFORTRAN_BFD
#cmakedefine HAVE_LFORTRAN_DWARFDUMP
#cmakedefine HAVE_LFORTRAN_LINK
#cmakedefine HAVE_LFORTRAN_MACHO
#cmakedefine HAVE_LFORTRAN_UNWIND
Expand Down
102 changes: 87 additions & 15 deletions src/lfortran/stacktrace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ int load_symbol_table(bfd *abfd, line_data *data)
return 0;
}

void get_symbol_info(std::string binary_filename, uintptr_t addr,
void get_symbol_info_bfd(std::string binary_filename, uintptr_t addr,
std::string &source_filename, std::string &function_name,
int &line_number)
{
Expand Down Expand Up @@ -438,15 +438,29 @@ std::string addr2str(const StacktraceItem &i)
s << " File unknown, absolute address: " << (void*) i.pc;
s << color(style::reset);
} else {
s << color(style::dim);
s << " Binary file \"";
s << color(style::reset);
s << color(style::bold) << color(fg::magenta);
s << i.binary_filename;
s << color(fg::reset) << color(style::reset);
s << color(style::dim);
s << "\", local address: " << (void*) i.local_pc;
s << color(style::reset);
if (i.source_filename == "") {
s << color(style::dim);
s << " Binary file \"";
s << color(style::reset);
s << color(style::bold) << color(fg::magenta);
s << i.binary_filename;
s << color(fg::reset) << color(style::reset);
s << color(style::dim);
s << "\", local address: " << (void*) i.local_pc;
s << color(style::reset);
} else {
// Nicely format the filename + line
s << color(style::dim) << " File \"" << color(style::reset)
<< color(style::bold) << color(fg::magenta) << i.source_filename
<< color(fg::reset) << color(style::reset)
<< color(style::dim) << "\", line " << i.line_number
<< color(style::reset);
const std::string line_text = remove_leading_whitespace(
read_line_from_file(i.source_filename, i.line_number));
if (line_text != "") {
s << "\n " << line_text;
}
}
}
} else if (i.source_filename == "") {
// The file is unknown (and data.line == 0 in this case), so the
Expand Down Expand Up @@ -542,18 +556,76 @@ void get_local_addresses(std::vector<StacktraceItem> &d)
}
}

void address_to_line_number(const std::vector<std::string> &filenames,
const std::vector<uint64_t> &addresses,
uintptr_t address,
std::string &filename,
int &line_number) {
int n = addresses.size() / 3;
// TODO: Using a bisection would be a lot faster: O(log(n) instead of O(n)
for (int i=0; i < n; i++) {
uint64_t addr, line, fileid;
addr = addresses[3*i+0];
line = addresses[3*i+1];
fileid = addresses[3*i+2];
if (addr > (address-8)) {
filename = filenames[fileid];
line_number = line;
return;
}
}
filename = "";
line_number = -1;
}

void get_local_info_dwarfdump(std::vector<StacktraceItem> &d)
{
std::vector<std::string> filenames;
std::vector<uint64_t> addresses;
{
std::string filename = binary_executable_path + ".dSYM/lines.txt";
std::ifstream in;
in.open(filename);
if (!in.is_open()) {
return;
}
std::string s;
std::getline(in, s);
int n = std::stoi(s);
for (int i=0; i < n; i++) {
std::getline(in, s);
filenames.push_back(s);
}
std::getline(in, s);
n = std::stoi(s);

filename = binary_executable_path + ".dSYM/lines.dat";
std::ifstream in2;
in2.open(filename, std::ios::binary);
addresses.resize(3*n);
in2.read((char*)&addresses[0], 3*n*sizeof(uint64_t));
}
for (size_t i=0; i < d.size(); i++) {
address_to_line_number(filenames, addresses, d[i].local_pc,
d[i].source_filename, d[i].line_number);
}
}

void get_local_info(std::vector<StacktraceItem> &d)
{
#ifdef HAVE_LFORTRAN_BFD
#ifdef HAVE_LFORTRAN_DWARFDUMP
get_local_info_dwarfdump(d);
#else
# ifdef HAVE_LFORTRAN_BFD
bfd_init();
#endif
# endif
for (size_t i=0; i < d.size(); i++) {
#ifdef HAVE_LFORTRAN_BFD
get_symbol_info(d[i].binary_filename, d[i].local_pc,
# ifdef HAVE_LFORTRAN_BFD
get_symbol_info_bfd(d[i].binary_filename, d[i].local_pc,
d[i].source_filename, d[i].function_name, d[i].line_number);
#endif
# endif
}
#endif
}

} // namespace LFortran
2 changes: 1 addition & 1 deletion src/lfortran/stacktrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ struct StacktraceItem
uintptr_t local_pc=0; // 0 if not found
std::string binary_filename; // "" if not found

// Sometimes this is found, but the next two are not
// This can be found or not
std::string function_name; // "" if not found

// The following two are either both found, or not found
Expand Down

0 comments on commit 6ac0af0

Please sign in to comment.