forked from lcompilers/lpython
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dwarf_convert.py
executable file
·162 lines (130 loc) · 5.48 KB
/
dwarf_convert.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#!/usr/bin/env python3
"""
This script is used to convert the output of dwarfdump into a file that is easy
to load into lfortran to lookup filenames and line numbers for a given address.
Here is how to use it:
cd src/bin
llvm-dwarfdump --debug-line lfortran.dSYM > lfortran.dSYM/symbols.txt
./dwarf_convert.py lfortran.dSYM/symbols.txt lfortran.dSYM/lines.txt lfortran.dSYM/lines.dat
This is meant to be executed at build time.
A better solution would be to use the `dwarf` library directly from C++ and
generate the same output directly. Here is the source code of llvm-dwarfdump:
https://github.com/llvm/llvm-project/blob/91a6ad5ad887a16e361338303d4ff3d29dba5e10/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
We just have to do exactly what it does, but generate the output in the format
of lines.txt and lines.dat
"""
from collections import namedtuple
from glob import glob
import os
import re
from struct import pack
import sys
DebugLines = namedtuple("DebugLines", ["lines"])
DebugLine = namedtuple("DebugLine", ["include_directories", "file_names", "addresses"])
IncludeDirectory = namedtuple("IncludeDirectory", ["id", "path"])
FileName = namedtuple("FileName", ["id", "filename", "dir_idx"])
ASRDebugLines = namedtuple("ASRDebugLines", ["filenames", "addresses"])
class Parser:
"""
Parser for the output generated by dwarfdump.
On macOS (both Intel and ARM based):
dwarfdump --debug-line src/bin/lfortran.dSYM > symbols.txt
Then parse it using:
p = Parser()
ast = p.parse_file("symbols.txt")
"""
def parse_file(self, filename):
self.file = open(filename)
self.line = self.file.readline()
while not self.line.startswith(".debug_line contents:"):
self.line = self.file.readline()
self.line = self.file.readline()
lines = []
while self.line.startswith("debug_line"):
d = self.parse_debug_line()
lines.append(d)
return DebugLines(lines)
def parse_debug_line(self):
self.line = self.file.readline()
while not self.line.startswith("include_directories"):
self.line = self.file.readline()
include_directories = []
while self.line.startswith("include_directories"):
n, path = re.compile(r"include_directories\[[ ]*(\d+)\] = \"([^\"]+)\"").findall(self.line)[0]
n = int(n)
include_directories.append(IncludeDirectory(n, path))
self.line = self.file.readline()
file_names = []
while self.line.startswith("file_names"):
n = re.compile(r"file_names\[[ ]*(\d+)\]:").findall(self.line)[0]
n = int(n)
self.line = self.file.readline()
filename = re.compile(r"name: \"([^\"]+)\"").findall(self.line)[0]
self.line = self.file.readline()
dir_idx = re.compile(r"dir_index: (\d+)").findall(self.line)[0]
dir_idx = int(dir_idx)
self.line = self.file.readline()
self.line = self.file.readline()
file_names.append(FileName(n, filename, dir_idx))
self.line = self.file.readline()
self.line = self.file.readline()
self.line = self.file.readline()
self.line = self.file.readline()
addresses = []
while self.line.startswith("0x"):
address, line, column, file_id = self.line.split()[:4]
address = int(address, base=16)
line = int(line)
column = int(column)
file_id = int(file_id)
addresses.append([address, line, column, file_id])
self.line = self.file.readline()
self.line = self.file.readline()
d = DebugLine(include_directories, file_names, addresses)
return d
def ast_to_asr(ast):
local_files = glob("../**/*.cpp", recursive=True) + \
glob("../**/*.h", recursive=True)
for i in range(len(local_files)):
local_files[i] = os.path.abspath(local_files[i])
def make_abs(end_path):
if end_path[0] != "/":
for f in local_files:
if f.endswith(end_path):
return f
return end_path
lines = []
last_address = -1
global_filename_id = 0
global_filenames = []
global_addresses = []
for line in ast.lines:
include_dirs = {}
for inc in line.include_directories:
include_dirs[inc.id] = inc.path
filenames = {}
for filename in line.file_names:
prefix = ""
if filename.dir_idx != 0:
prefix = include_dirs[filename.dir_idx] + "/"
filenames[filename.id] = global_filename_id
global_filenames.append(make_abs(prefix+filename.filename))
global_filename_id += 1
for address, line_num, column, file_id in line.addresses:
filename = global_filenames[filenames[file_id]]
assert last_address <= address
last_address = address
if line_num != 0:
global_addresses.append([address, line_num, filenames[file_id]])
return ASRDebugLines(global_filenames, global_addresses)
p = Parser()
ast = p.parse_file(sys.argv[1])
asr = ast_to_asr(ast)
with open(sys.argv[2], "w") as f:
f.write(str(len(asr.filenames)) + "\n")
for filename in asr.filenames:
f.write(filename + "\n")
f.write(str(len(asr.addresses)) + "\n")
with open(sys.argv[3], "wb") as f:
for addr, line, fileid in asr.addresses:
f.write(pack("3Q", addr, line, fileid))