Skip to content

Commit

Permalink
Update orderfile pipeline to better deal with aliased methods.
Browse files Browse the repository at this point in the history
This addresses two outstanding issues:
1. Methods which appear in multiple sections due to
   aliasing (eg constructors).
2. Partial methods and clones generated by the compiler
   for various optimizations.

Most of these changes were previously reviewed in
https://codereview.chromium.org/1155713003/ and
https://codereview.chromium.org/1165603003/
but then reverted in commits
cfbd6c2,
1d5e730,
a57cac2.

BUG=460906,497247

Review URL: https://codereview.chromium.org/1177473002

Cr-Commit-Position: refs/heads/master@{#336777}
  • Loading branch information
azarchs authored and Commit bot committed Jun 30, 2015
1 parent aff93a2 commit cc60851
Show file tree
Hide file tree
Showing 8 changed files with 515 additions and 157 deletions.
20 changes: 17 additions & 3 deletions tools/cygprofile/check_orderfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import optparse
import sys

import cyglog_to_orderfile
import cygprofile_utils
import patch_orderfile
import symbol_extractor
Expand All @@ -18,6 +19,12 @@
_MAX_WARNINGS_TO_PRINT = 200


def _IsSameMethod(name1, name2):
"""Returns true if name1 or name2 are split method forms of the other."""
return patch_orderfile.RemoveSuffixes(name1) == \
patch_orderfile.RemoveSuffixes(name2)


def _CountMisorderedSymbols(symbols, symbol_infos):
"""Count the number of misordered symbols, and log them.
Expand Down Expand Up @@ -52,8 +59,9 @@ def _CountMisorderedSymbols(symbols, symbol_infos):
previous_symbol_info = symbol_extractor.SymbolInfo(
name='', offset=-1, size=0, section='')
for symbol_info in matched_symbol_infos:
if symbol_info.offset < previous_symbol_info.offset:
logging.warning("Misordered pair: %s - %s" % (
if symbol_info.offset < previous_symbol_info.offset and not (
_IsSameMethod(symbol_info.name, previous_symbol_info.name)):
logging.warning('Misordered pair: %s - %s' % (
str(previous_symbol_info), str(symbol_info)))
misordered_count += 1
previous_symbol_info = symbol_info
Expand All @@ -77,7 +85,13 @@ def main():
(binary_filename, orderfile_filename) = argv[1:]

symbol_extractor.SetArchitecture(options.arch)
symbols = patch_orderfile.GetSymbolsFromOrderfile(orderfile_filename)
obj_dir = cygprofile_utils.GetObjDir(binary_filename)
symbol_to_sections_map = \
cyglog_to_orderfile.GetSymbolToSectionsMapFromObjectFiles(obj_dir)
section_to_symbols_map = cygprofile_utils.InvertMapping(
symbol_to_sections_map)
symbols = patch_orderfile.GetSymbolsFromOrderfile(orderfile_filename,
section_to_symbols_map)
symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename)
# Missing symbols is not an error since some of them can be eliminated through
# inlining.
Expand Down
85 changes: 60 additions & 25 deletions tools/cygprofile/cyglog_to_orderfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
import multiprocessing
import optparse
import os
import tempfile
import re
import string
import sys
import tempfile

import cygprofile_utils
import symbol_extractor
Expand All @@ -25,7 +26,6 @@ def _ParseLogLines(log_file_lines):
Args:
log_file_lines: array of lines in log file produced by profiled run
lib_name: library or executable containing symbols
Below is an example of a small log file:
5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so
Expand Down Expand Up @@ -125,31 +125,63 @@ def _AllSymbolInfos(object_filenames):
return result


def _GetSymbolToSectionMapFromObjectFiles(obj_dir):
""" Creates a mapping from symbol to linker section name by scanning all
the object files.
def _SameCtorOrDtorNames(symbol1, symbol2):
"""Returns True if two symbols refer to the same constructor or destructor.
The Itanium C++ ABI specifies dual constructor and destructor
emmission (section 5.1.4.3):
https://refspecs.linuxbase.org/cxxabi-1.83.html#mangling-special
To avoid fully parsing all mangled symbols, a heuristic is used with c++filt.
Note: some compilers may name generated copies differently. If this becomes
an issue this heuristic will need to be updated.
"""
# Check if this is the understood case of constructor/destructor
# signatures. GCC emits up to three types of constructor/destructors:
# complete, base, and allocating. If they're all the same they'll
# get folded together.
return (re.search('(C[123]|D[012])E', symbol1) and
symbol_extractor.DemangleSymbol(symbol1) ==
symbol_extractor.DemangleSymbol(symbol2))


def GetSymbolToSectionsMapFromObjectFiles(obj_dir):
"""Scans object files to create a {symbol: linker section(s)} map.
Args:
obj_dir: The root of the output object file directory, which will be
scanned for .o files to form the mapping.
Returns:
A map {symbol_name: [section_name1, section_name2...]}
"""
object_files = _GetObjectFileNames(obj_dir)
symbol_to_section_map = {}
symbol_to_sections_map = {}
symbol_warnings = cygprofile_utils.WarningCollector(300)
symbol_infos = _AllSymbolInfos(object_files)
for symbol_info in symbol_infos:
symbol = symbol_info.name
if symbol.startswith('.LTHUNK'):
continue
section = symbol_info.section
if ((symbol in symbol_to_section_map) and
(symbol_to_section_map[symbol] != symbol_info.section)):
symbol_warnings.Write('Symbol ' + symbol +
' in conflicting sections ' + section +
' and ' + symbol_to_section_map[symbol])
elif not section.startswith('.text'):
if ((symbol in symbol_to_sections_map) and
(symbol_info.section not in symbol_to_sections_map[symbol])):
symbol_to_sections_map[symbol].append(section)

if not _SameCtorOrDtorNames(
symbol, symbol_to_sections_map[symbol][0].lstrip('.text.')):
symbol_warnings.Write('Symbol ' + symbol +
' unexpectedly in more than one section: ' +
', '.join(symbol_to_sections_map[symbol]))
elif not section.startswith('.text.'):
symbol_warnings.Write('Symbol ' + symbol +
' in incorrect section ' + section)
else:
symbol_to_section_map[symbol] = section
# In most cases we expect just one item in this list, and maybe 4 or so in
# the worst case.
symbol_to_sections_map[symbol] = [section]
symbol_warnings.WriteEnd('bad sections')
return symbol_to_section_map
return symbol_to_sections_map


def _WarnAboutDuplicates(offsets):
Expand All @@ -172,15 +204,18 @@ def _WarnAboutDuplicates(offsets):
return ok


def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_section_map,
def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_sections_map,
output_file):
"""Outputs the orderfile to output_file.
Args:
offsets: Iterable of offsets to match to section names
offset_to_symbol_infos: {offset: [SymbolInfo]}
symbol_to_section_map: {name: section}
symbol_to_sections_map: {name: [section1, section2]}
output_file: file-like object to write the results to
Returns:
True if all symbols were found in the library.
"""
success = True
unknown_symbol_warnings = cygprofile_utils.WarningCollector(300)
Expand All @@ -190,11 +225,12 @@ def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_section_map,
try:
symbol_infos = _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset)
for symbol_info in symbol_infos:
if symbol_info.name in symbol_to_section_map:
section = symbol_to_section_map[symbol_info.name]
if not section in output_sections:
output_file.write(section + '\n')
output_sections.add(section)
if symbol_info.name in symbol_to_sections_map:
sections = symbol_to_sections_map[symbol_info.name]
for section in sections:
if not section in output_sections:
output_file.write(section + '\n')
output_sections.add(section)
else:
unknown_symbol_warnings.Write(
'No known section for symbol ' + symbol_info.name)
Expand Down Expand Up @@ -222,15 +258,14 @@ def main():
(log_filename, lib_filename, output_filename) = argv[1:]
symbol_extractor.SetArchitecture(options.arch)

obj_dir = os.path.abspath(os.path.join(
os.path.dirname(lib_filename), '../obj'))
obj_dir = cygprofile_utils.GetObjDir(lib_filename)

log_file_lines = map(string.rstrip, open(log_filename).readlines())
offsets = _ParseLogLines(log_file_lines)
_WarnAboutDuplicates(offsets)

offset_to_symbol_infos = _GroupLibrarySymbolInfosByOffset(lib_filename)
symbol_to_section_map = _GetSymbolToSectionMapFromObjectFiles(obj_dir)
symbol_to_sections_map = GetSymbolToSectionsMapFromObjectFiles(obj_dir)

success = False
temp_filename = None
Expand All @@ -239,7 +274,7 @@ def main():
(fd, temp_filename) = tempfile.mkstemp(dir=os.path.dirname(output_filename))
output_file = os.fdopen(fd, 'w')
ok = _OutputOrderfile(
offsets, offset_to_symbol_infos, symbol_to_section_map, output_file)
offsets, offset_to_symbol_infos, symbol_to_sections_map, output_file)
output_file.close()
os.rename(temp_filename, output_filename)
temp_filename = None
Expand Down
57 changes: 42 additions & 15 deletions tools/cygprofile/cyglog_to_orderfile_unittest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,15 @@
import unittest

import cyglog_to_orderfile
import os
import symbol_extractor
import sys

sys.path.insert(
0, os.path.join(os.path.dirname(__file__), os.pardir, os.pardir,
'third_party', 'android_platform', 'development',
'scripts'))
import symbol


class TestCyglogToOrderfile(unittest.TestCase):
Expand Down Expand Up @@ -48,38 +56,57 @@ def testWarnAboutDuplicates(self):
offsets.append(0x1)
self.assertFalse(cyglog_to_orderfile._WarnAboutDuplicates(offsets))

def testSameCtorOrDtorNames(self):
if not os.path.exists(symbol.ToolPath('c++filt')):
print 'Skipping test dependent on missing c++filt binary.'
return
self.assertTrue(cyglog_to_orderfile._SameCtorOrDtorNames(
'_ZNSt3__119istreambuf_iteratorIcNS_11char_traitsIcEEEC1Ev',
'_ZNSt3__119istreambuf_iteratorIcNS_11char_traitsIcEEEC2Ev'))
self.assertTrue(cyglog_to_orderfile._SameCtorOrDtorNames(
'_ZNSt3__119istreambuf_iteratorIcNS_11char_traitsIcEEED1Ev',
'_ZNSt3__119istreambuf_iteratorIcNS_11char_traitsIcEEED2Ev'))
self.assertFalse(cyglog_to_orderfile._SameCtorOrDtorNames(
'_ZNSt3__119istreambuf_iteratorIcNS_11char_traitsIcEEEC1Ev',
'_ZNSt3__119foo_iteratorIcNS_11char_traitsIcEEEC1Ev'))
self.assertFalse(cyglog_to_orderfile._SameCtorOrDtorNames(
'_ZNSt3__119istreambuf_iteratorIcNS_11char_traitsIcEEE',
'_ZNSt3__119istreambuf_iteratorIcNS_11char_traitsIcEEE'))

def testOutputOrderfile(self):
class FakeOutputFile(object):
def __init__(self):
self.writes = []

def write(self, data):
self.writes.append(data)

# One symbol not matched, one with an odd address, one regularly matched
# And two symbols aliased to the same address
offsets = [0x12, 0x17]
offset_to_symbol_infos = {
0x10:[symbol_extractor.SymbolInfo(
name='Symbol', offset=0x10, size=0x13, section='dummy')],
0x12:[symbol_extractor.SymbolInfo(
name='Symbol2', offset=0x12, size=0x13, section='dummy')],
0x16:[symbol_extractor.SymbolInfo(
name='Symbol3', offset=0x16, size=0x13, section='dummy'),
symbol_extractor.SymbolInfo(
name='Symbol32', offset=0x16, size=0x13, section='dummy'),]}
symbol_to_section_map = {
'Symbol': '.text.Symbol',
'Symbol2': '.text.Symbol2',
'Symbol3': '.text.Symbol3',
'Symbol32': '.text.Symbol32'}
0x10: [symbol_extractor.SymbolInfo(
name='Symbol', offset=0x10, size=0x13, section='dummy')],
0x12: [symbol_extractor.SymbolInfo(
name='Symbol2', offset=0x12, size=0x13, section='dummy')],
0x16: [symbol_extractor.SymbolInfo(
name='Symbol3', offset=0x16, size=0x13, section='dummy'),
symbol_extractor.SymbolInfo(
name='Symbol32', offset=0x16, size=0x13, section='dummy'),]}
symbol_to_sections_map = {
'Symbol': ['.text.Symbol'],
'Symbol2': ['.text.Symbol2', '.text.hot.Symbol2'],
'Symbol3': ['.text.Symbol3'],
'Symbol32': ['.text.Symbol32']}
fake_output = FakeOutputFile()
cyglog_to_orderfile._OutputOrderfile(
offsets, offset_to_symbol_infos, symbol_to_section_map, fake_output)
offsets, offset_to_symbol_infos, symbol_to_sections_map, fake_output)
expected = """.text.Symbol2
.text.hot.Symbol2
.text.Symbol3
.text.Symbol32
"""
self.assertEquals(expected, "".join(fake_output.writes))
self.assertEquals(expected, ''.join(fake_output.writes))


if __name__ == '__main__':
Expand Down
21 changes: 21 additions & 0 deletions tools/cygprofile/cygprofile_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,3 +40,24 @@ def DetectArchitecture(default='arm'):
return match.group(1)
else:
return default


def InvertMapping(x_to_ys):
"""Given a map x -> [y1, y2...] returns inverse mapping y->[x1, x2...]."""
y_to_xs = {}
for x, ys in x_to_ys.items():
for y in ys:
y_to_xs.setdefault(y, []).append(x)
return y_to_xs


def GetObjDir(libchrome):
"""Get the path to the obj directory corresponding to the given libchrome.
Assumes libchrome is in for example .../Release/lib/libchrome.so and object
files are in .../Release/obj.
"""
# TODO(azarchs): Pass obj path in explicitly where needed rather than relying
# on the above assumption.
return os.path.abspath(os.path.join(
os.path.dirname(libchrome), '../obj'))
22 changes: 22 additions & 0 deletions tools/cygprofile/cygprofile_utils_unittest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/python
# Copyright 2015 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

import unittest

import cygprofile_utils


class TestCygprofileUtils(unittest.TestCase):
def testInvertMapping(self):
inputMap = {'1': ['2', '3'],
'4': ['2', '5']}
self.assertEqual(cygprofile_utils.InvertMapping(inputMap),
{'2': ['1', '4'],
'3': ['1'],
'5': ['4']})


if __name__ == '__main__':
unittest.main()
Loading

0 comments on commit cc60851

Please sign in to comment.