diff --git a/tools/cygprofile/check_orderfile.py b/tools/cygprofile/check_orderfile.py index 4202f7fdacf9d8..0c34d8807b7fdc 100755 --- a/tools/cygprofile/check_orderfile.py +++ b/tools/cygprofile/check_orderfile.py @@ -10,6 +10,7 @@ import optparse import sys +import cyglog_to_orderfile import cygprofile_utils import patch_orderfile import symbol_extractor @@ -18,6 +19,12 @@ _MAX_WARNINGS_TO_PRINT = 200 +def _IsSameMethod(name1, name2): + """Returns true if name1 or name2 are split method forms of the other.""" + return patch_orderfile.RemoveSuffixes(name1) == \ + patch_orderfile.RemoveSuffixes(name2) + + def _CountMisorderedSymbols(symbols, symbol_infos): """Count the number of misordered symbols, and log them. @@ -52,8 +59,9 @@ def _CountMisorderedSymbols(symbols, symbol_infos): previous_symbol_info = symbol_extractor.SymbolInfo( name='', offset=-1, size=0, section='') for symbol_info in matched_symbol_infos: - if symbol_info.offset < previous_symbol_info.offset: - logging.warning("Misordered pair: %s - %s" % ( + if symbol_info.offset < previous_symbol_info.offset and not ( + _IsSameMethod(symbol_info.name, previous_symbol_info.name)): + logging.warning('Misordered pair: %s - %s' % ( str(previous_symbol_info), str(symbol_info))) misordered_count += 1 previous_symbol_info = symbol_info @@ -77,7 +85,13 @@ def main(): (binary_filename, orderfile_filename) = argv[1:] symbol_extractor.SetArchitecture(options.arch) - symbols = patch_orderfile.GetSymbolsFromOrderfile(orderfile_filename) + obj_dir = cygprofile_utils.GetObjDir(binary_filename) + symbol_to_sections_map = \ + cyglog_to_orderfile.GetSymbolToSectionsMapFromObjectFiles(obj_dir) + section_to_symbols_map = cygprofile_utils.InvertMapping( + symbol_to_sections_map) + symbols = patch_orderfile.GetSymbolsFromOrderfile(orderfile_filename, + section_to_symbols_map) symbol_infos = symbol_extractor.SymbolInfosFromBinary(binary_filename) # Missing symbols is not an error since some of them can be eliminated through # inlining. diff --git a/tools/cygprofile/cyglog_to_orderfile.py b/tools/cygprofile/cyglog_to_orderfile.py index f64c077edc0635..6225574cb41e56 100755 --- a/tools/cygprofile/cyglog_to_orderfile.py +++ b/tools/cygprofile/cyglog_to_orderfile.py @@ -12,9 +12,10 @@ import multiprocessing import optparse import os -import tempfile +import re import string import sys +import tempfile import cygprofile_utils import symbol_extractor @@ -25,7 +26,6 @@ def _ParseLogLines(log_file_lines): Args: log_file_lines: array of lines in log file produced by profiled run - lib_name: library or executable containing symbols Below is an example of a small log file: 5086e000-52e92000 r-xp 00000000 b3:02 51276 libchromeview.so @@ -125,12 +125,38 @@ def _AllSymbolInfos(object_filenames): return result -def _GetSymbolToSectionMapFromObjectFiles(obj_dir): - """ Creates a mapping from symbol to linker section name by scanning all - the object files. +def _SameCtorOrDtorNames(symbol1, symbol2): + """Returns True if two symbols refer to the same constructor or destructor. + + The Itanium C++ ABI specifies dual constructor and destructor + emmission (section 5.1.4.3): + https://refspecs.linuxbase.org/cxxabi-1.83.html#mangling-special + To avoid fully parsing all mangled symbols, a heuristic is used with c++filt. + + Note: some compilers may name generated copies differently. If this becomes + an issue this heuristic will need to be updated. + """ + # Check if this is the understood case of constructor/destructor + # signatures. GCC emits up to three types of constructor/destructors: + # complete, base, and allocating. If they're all the same they'll + # get folded together. + return (re.search('(C[123]|D[012])E', symbol1) and + symbol_extractor.DemangleSymbol(symbol1) == + symbol_extractor.DemangleSymbol(symbol2)) + + +def GetSymbolToSectionsMapFromObjectFiles(obj_dir): + """Scans object files to create a {symbol: linker section(s)} map. + + Args: + obj_dir: The root of the output object file directory, which will be + scanned for .o files to form the mapping. + + Returns: + A map {symbol_name: [section_name1, section_name2...]} """ object_files = _GetObjectFileNames(obj_dir) - symbol_to_section_map = {} + symbol_to_sections_map = {} symbol_warnings = cygprofile_utils.WarningCollector(300) symbol_infos = _AllSymbolInfos(object_files) for symbol_info in symbol_infos: @@ -138,18 +164,24 @@ def _GetSymbolToSectionMapFromObjectFiles(obj_dir): if symbol.startswith('.LTHUNK'): continue section = symbol_info.section - if ((symbol in symbol_to_section_map) and - (symbol_to_section_map[symbol] != symbol_info.section)): - symbol_warnings.Write('Symbol ' + symbol + - ' in conflicting sections ' + section + - ' and ' + symbol_to_section_map[symbol]) - elif not section.startswith('.text'): + if ((symbol in symbol_to_sections_map) and + (symbol_info.section not in symbol_to_sections_map[symbol])): + symbol_to_sections_map[symbol].append(section) + + if not _SameCtorOrDtorNames( + symbol, symbol_to_sections_map[symbol][0].lstrip('.text.')): + symbol_warnings.Write('Symbol ' + symbol + + ' unexpectedly in more than one section: ' + + ', '.join(symbol_to_sections_map[symbol])) + elif not section.startswith('.text.'): symbol_warnings.Write('Symbol ' + symbol + ' in incorrect section ' + section) else: - symbol_to_section_map[symbol] = section + # In most cases we expect just one item in this list, and maybe 4 or so in + # the worst case. + symbol_to_sections_map[symbol] = [section] symbol_warnings.WriteEnd('bad sections') - return symbol_to_section_map + return symbol_to_sections_map def _WarnAboutDuplicates(offsets): @@ -172,15 +204,18 @@ def _WarnAboutDuplicates(offsets): return ok -def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_section_map, +def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_sections_map, output_file): """Outputs the orderfile to output_file. Args: offsets: Iterable of offsets to match to section names offset_to_symbol_infos: {offset: [SymbolInfo]} - symbol_to_section_map: {name: section} + symbol_to_sections_map: {name: [section1, section2]} output_file: file-like object to write the results to + + Returns: + True if all symbols were found in the library. """ success = True unknown_symbol_warnings = cygprofile_utils.WarningCollector(300) @@ -190,11 +225,12 @@ def _OutputOrderfile(offsets, offset_to_symbol_infos, symbol_to_section_map, try: symbol_infos = _FindSymbolInfosAtOffset(offset_to_symbol_infos, offset) for symbol_info in symbol_infos: - if symbol_info.name in symbol_to_section_map: - section = symbol_to_section_map[symbol_info.name] - if not section in output_sections: - output_file.write(section + '\n') - output_sections.add(section) + if symbol_info.name in symbol_to_sections_map: + sections = symbol_to_sections_map[symbol_info.name] + for section in sections: + if not section in output_sections: + output_file.write(section + '\n') + output_sections.add(section) else: unknown_symbol_warnings.Write( 'No known section for symbol ' + symbol_info.name) @@ -222,15 +258,14 @@ def main(): (log_filename, lib_filename, output_filename) = argv[1:] symbol_extractor.SetArchitecture(options.arch) - obj_dir = os.path.abspath(os.path.join( - os.path.dirname(lib_filename), '../obj')) + obj_dir = cygprofile_utils.GetObjDir(lib_filename) log_file_lines = map(string.rstrip, open(log_filename).readlines()) offsets = _ParseLogLines(log_file_lines) _WarnAboutDuplicates(offsets) offset_to_symbol_infos = _GroupLibrarySymbolInfosByOffset(lib_filename) - symbol_to_section_map = _GetSymbolToSectionMapFromObjectFiles(obj_dir) + symbol_to_sections_map = GetSymbolToSectionsMapFromObjectFiles(obj_dir) success = False temp_filename = None @@ -239,7 +274,7 @@ def main(): (fd, temp_filename) = tempfile.mkstemp(dir=os.path.dirname(output_filename)) output_file = os.fdopen(fd, 'w') ok = _OutputOrderfile( - offsets, offset_to_symbol_infos, symbol_to_section_map, output_file) + offsets, offset_to_symbol_infos, symbol_to_sections_map, output_file) output_file.close() os.rename(temp_filename, output_filename) temp_filename = None diff --git a/tools/cygprofile/cyglog_to_orderfile_unittest.py b/tools/cygprofile/cyglog_to_orderfile_unittest.py index 714d102ff20e20..9ea0d182107d5b 100755 --- a/tools/cygprofile/cyglog_to_orderfile_unittest.py +++ b/tools/cygprofile/cyglog_to_orderfile_unittest.py @@ -6,7 +6,15 @@ import unittest import cyglog_to_orderfile +import os import symbol_extractor +import sys + +sys.path.insert( + 0, os.path.join(os.path.dirname(__file__), os.pardir, os.pardir, + 'third_party', 'android_platform', 'development', + 'scripts')) +import symbol class TestCyglogToOrderfile(unittest.TestCase): @@ -48,10 +56,28 @@ def testWarnAboutDuplicates(self): offsets.append(0x1) self.assertFalse(cyglog_to_orderfile._WarnAboutDuplicates(offsets)) + def testSameCtorOrDtorNames(self): + if not os.path.exists(symbol.ToolPath('c++filt')): + print 'Skipping test dependent on missing c++filt binary.' + return + self.assertTrue(cyglog_to_orderfile._SameCtorOrDtorNames( + '_ZNSt3__119istreambuf_iteratorIcNS_11char_traitsIcEEEC1Ev', + '_ZNSt3__119istreambuf_iteratorIcNS_11char_traitsIcEEEC2Ev')) + self.assertTrue(cyglog_to_orderfile._SameCtorOrDtorNames( + '_ZNSt3__119istreambuf_iteratorIcNS_11char_traitsIcEEED1Ev', + '_ZNSt3__119istreambuf_iteratorIcNS_11char_traitsIcEEED2Ev')) + self.assertFalse(cyglog_to_orderfile._SameCtorOrDtorNames( + '_ZNSt3__119istreambuf_iteratorIcNS_11char_traitsIcEEEC1Ev', + '_ZNSt3__119foo_iteratorIcNS_11char_traitsIcEEEC1Ev')) + self.assertFalse(cyglog_to_orderfile._SameCtorOrDtorNames( + '_ZNSt3__119istreambuf_iteratorIcNS_11char_traitsIcEEE', + '_ZNSt3__119istreambuf_iteratorIcNS_11char_traitsIcEEE')) + def testOutputOrderfile(self): class FakeOutputFile(object): def __init__(self): self.writes = [] + def write(self, data): self.writes.append(data) @@ -59,27 +85,28 @@ def write(self, data): # And two symbols aliased to the same address offsets = [0x12, 0x17] offset_to_symbol_infos = { - 0x10:[symbol_extractor.SymbolInfo( - name='Symbol', offset=0x10, size=0x13, section='dummy')], - 0x12:[symbol_extractor.SymbolInfo( - name='Symbol2', offset=0x12, size=0x13, section='dummy')], - 0x16:[symbol_extractor.SymbolInfo( - name='Symbol3', offset=0x16, size=0x13, section='dummy'), - symbol_extractor.SymbolInfo( - name='Symbol32', offset=0x16, size=0x13, section='dummy'),]} - symbol_to_section_map = { - 'Symbol': '.text.Symbol', - 'Symbol2': '.text.Symbol2', - 'Symbol3': '.text.Symbol3', - 'Symbol32': '.text.Symbol32'} + 0x10: [symbol_extractor.SymbolInfo( + name='Symbol', offset=0x10, size=0x13, section='dummy')], + 0x12: [symbol_extractor.SymbolInfo( + name='Symbol2', offset=0x12, size=0x13, section='dummy')], + 0x16: [symbol_extractor.SymbolInfo( + name='Symbol3', offset=0x16, size=0x13, section='dummy'), + symbol_extractor.SymbolInfo( + name='Symbol32', offset=0x16, size=0x13, section='dummy'),]} + symbol_to_sections_map = { + 'Symbol': ['.text.Symbol'], + 'Symbol2': ['.text.Symbol2', '.text.hot.Symbol2'], + 'Symbol3': ['.text.Symbol3'], + 'Symbol32': ['.text.Symbol32']} fake_output = FakeOutputFile() cyglog_to_orderfile._OutputOrderfile( - offsets, offset_to_symbol_infos, symbol_to_section_map, fake_output) + offsets, offset_to_symbol_infos, symbol_to_sections_map, fake_output) expected = """.text.Symbol2 +.text.hot.Symbol2 .text.Symbol3 .text.Symbol32 """ - self.assertEquals(expected, "".join(fake_output.writes)) + self.assertEquals(expected, ''.join(fake_output.writes)) if __name__ == '__main__': diff --git a/tools/cygprofile/cygprofile_utils.py b/tools/cygprofile/cygprofile_utils.py index 866b352706ca2c..c1c1d8790546f3 100755 --- a/tools/cygprofile/cygprofile_utils.py +++ b/tools/cygprofile/cygprofile_utils.py @@ -40,3 +40,24 @@ def DetectArchitecture(default='arm'): return match.group(1) else: return default + + +def InvertMapping(x_to_ys): + """Given a map x -> [y1, y2...] returns inverse mapping y->[x1, x2...].""" + y_to_xs = {} + for x, ys in x_to_ys.items(): + for y in ys: + y_to_xs.setdefault(y, []).append(x) + return y_to_xs + + +def GetObjDir(libchrome): + """Get the path to the obj directory corresponding to the given libchrome. + + Assumes libchrome is in for example .../Release/lib/libchrome.so and object + files are in .../Release/obj. + """ + # TODO(azarchs): Pass obj path in explicitly where needed rather than relying + # on the above assumption. + return os.path.abspath(os.path.join( + os.path.dirname(libchrome), '../obj')) diff --git a/tools/cygprofile/cygprofile_utils_unittest.py b/tools/cygprofile/cygprofile_utils_unittest.py new file mode 100755 index 00000000000000..84e0a61a65a616 --- /dev/null +++ b/tools/cygprofile/cygprofile_utils_unittest.py @@ -0,0 +1,22 @@ +#!/usr/bin/python +# Copyright 2015 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +import unittest + +import cygprofile_utils + + +class TestCygprofileUtils(unittest.TestCase): + def testInvertMapping(self): + inputMap = {'1': ['2', '3'], + '4': ['2', '5']} + self.assertEqual(cygprofile_utils.InvertMapping(inputMap), + {'2': ['1', '4'], + '3': ['1'], + '5': ['4']}) + + +if __name__ == '__main__': + unittest.main() diff --git a/tools/cygprofile/patch_orderfile.py b/tools/cygprofile/patch_orderfile.py index 7906804406841c..8f1f4fb44021f3 100755 --- a/tools/cygprofile/patch_orderfile.py +++ b/tools/cygprofile/patch_orderfile.py @@ -6,15 +6,16 @@ """Patch an orderfile. Starting with a list of symbols in a binary and an orderfile (ordered list of -symbols), matches the symbols in the orderfile and augments each symbol with the -symbols residing at the same address (due to having identical code). +sections), matches the symbols in the orderfile and augments each symbol with +the symbols residing at the same address (due to having identical code). The +output is a list of section matching rules appropriate for the linker option +-section-ordering-file. These section matching rules include both actual +section names and names with wildcard (*) suffixes. Note: It is possible to have. - Several symbols mapping to the same offset in the binary. -- Several offsets for a given symbol (because we strip the ".clone." suffix) - -TODO(lizeb): Since the suffix ".clone." is only used with -O3 that we don't -currently use, simplify the logic by removing the suffix handling. +- Several offsets for a given symbol (because we strip the ".clone." and other + suffixes) The general pipeline is: 1. Get the symbol infos (name, offset, size, section) from the binary @@ -22,6 +23,8 @@ 3. Find the orderfile symbol names in the symbols coming from the binary 4. For each symbol found, get all the symbols at the same address 5. Output them to an updated orderfile, with several different prefixes + and suffixes +6. Output catch-all section matching rules for unprofiled methods. """ import collections @@ -29,6 +32,7 @@ import optparse import sys +import cyglog_to_orderfile import cygprofile_utils import symbol_extractor @@ -36,17 +40,51 @@ # them back in the output file. _PREFIXES = ('.text.startup.', '.text.hot.', '.text.unlikely.', '.text.') +# Suffixes for the symbols. These are due to method splitting for inlining and +# method cloning for various reasons including constant propagation and +# inter-procedural optimization. +_SUFFIXES = ('.clone.', '.part.', '.isra.', '.constprop.') + -def _RemoveClone(name): - """Return name up to the ".clone." marker.""" - clone_index = name.find('.clone.') - if clone_index != -1: - return name[:clone_index] +def RemoveSuffixes(name): + """Strips method name suffixes from cloning and splitting. + + .clone. comes from cloning in -O3. + .part. comes from partial method splitting for inlining. + .isra. comes from inter-procedural optimizations. + .constprop. is cloning for constant propagation. + """ + for suffix in _SUFFIXES: + name = name.split(suffix)[0] return name +def _UniqueGenerator(generator): + """Converts a generator to skip yielding elements already seen. + + Example: + @_UniqueGenerator + def Foo(): + yield 1 + yield 2 + yield 1 + yield 3 + + Foo() yields 1,2,3. + """ + def _FilteringFunction(*args, **kwargs): + returned = set() + for item in generator(*args, **kwargs): + if item in returned: + continue + returned.add(item) + yield item + + return _FilteringFunction + + def _GroupSymbolInfos(symbol_infos): - """Group the symbol infos by name and offset. + """Groups the symbol infos by name and offset. Args: symbol_infos: an iterable of SymbolInfo @@ -58,7 +96,7 @@ def _GroupSymbolInfos(symbol_infos): offset_to_symbol_infos = collections.defaultdict(list) name_to_symbol_infos = collections.defaultdict(list) for symbol in symbol_infos: - symbol = symbol_extractor.SymbolInfo(name=_RemoveClone(symbol.name), + symbol = symbol_extractor.SymbolInfo(name=RemoveSuffixes(symbol.name), offset=symbol.offset, size=symbol.size, section=symbol.section) @@ -84,7 +122,7 @@ def _GroupSymbolInfosFromBinary(binary_filename): def _StripPrefix(line): - """Get the symbol from a line with a linker section name. + """Strips the linker section name prefix from a symbol line. Args: line: a line from an orderfile, usually in the form: @@ -93,53 +131,73 @@ def _StripPrefix(line): Returns: The symbol, SymbolName in the example above. """ - line = line.rstrip('\n') for prefix in _PREFIXES: if line.startswith(prefix): return line[len(prefix):] return line # Unprefixed case -def _GetSymbolsFromStream(lines): - """Get the symbols from an iterable of lines. - Filters out wildcards and lines which do not correspond to symbols. +def _SectionNameToSymbols(section_name, section_to_symbols_map): + """Yields all symbols which could be referred to by section_name. + + If the section name is present in the map, the names in the map are returned. + Otherwise, any clone annotations and prefixes are stripped from the section + name and the remainder is returned. + """ + if (not section_name or + section_name == '.text' or + section_name.endswith('*')): + return # Don't return anything for catch-all sections + if section_name in section_to_symbols_map: + for symbol in section_to_symbols_map[section_name]: + yield symbol + else: + name = _StripPrefix(section_name) + if name: + yield name + + +def GetSectionsFromOrderfile(filename): + """Yields the sections from an orderfile. Args: - lines: iterable of lines from an orderfile. + filename: The name of the orderfile. - Returns: - Same as GetSymbolsFromOrderfile + Yields: + A list of symbol names. """ - # TODO(lizeb): Retain the prefixes later in the processing stages. - symbols = [] - unique_symbols = set() - for line in lines: - line = _StripPrefix(line) - name = _RemoveClone(line) - if name == '' or name == '*' or name == '.text': - continue - if not line in unique_symbols: - symbols.append(line) - unique_symbols.add(line) - return symbols - - -def GetSymbolsFromOrderfile(filename): - """Return the symbols from an orderfile. + with open(filename, 'r') as f: + for line in f.xreadlines(): + line = line.rstrip('\n') + if line: + yield line + + +@_UniqueGenerator +def GetSymbolsFromOrderfile(filename, section_to_symbols_map): + """Yields the symbols from an orderfile. Output elements do not repeat. Args: filename: The name of the orderfile. + section_to_symbols_map: The mapping from section to symbol names. If a + section name is missing from the mapping, the + symbol name is assumed to be the section name with + prefixes and suffixes stripped. - Returns: + Yields: A list of symbol names. """ - with open(filename, 'r') as f: - return _GetSymbolsFromStream(f.xreadlines()) + # TODO(azarchs): Move this method to symbol_extractor.py + for section in GetSectionsFromOrderfile(filename): + for symbol in _SectionNameToSymbols(RemoveSuffixes(section), + section_to_symbols_map): + yield symbol + def _SymbolsWithSameOffset(profiled_symbol, name_to_symbol_info, offset_to_symbol_info): - """Expand a profiled symbol to include all symbols which share an offset - with that symbol. + """Expands a symbol to include all symbols with the same offset. + Args: profiled_symbol: the string symbol name to be expanded. name_to_symbol_info: {name: [symbol_info1], ...}, as returned by @@ -150,7 +208,7 @@ def _SymbolsWithSameOffset(profiled_symbol, name_to_symbol_info, A list of symbol names, or an empty list if profiled_symbol was not in name_to_symbol_info. """ - if not profiled_symbol in name_to_symbol_info: + if profiled_symbol not in name_to_symbol_info: return [] symbol_infos = name_to_symbol_info[profiled_symbol] expanded = [] @@ -158,51 +216,153 @@ def _SymbolsWithSameOffset(profiled_symbol, name_to_symbol_info, expanded += (s.name for s in offset_to_symbol_info[symbol_info.offset]) return expanded -def _ExpandSymbols(profiled_symbols, name_to_symbol_infos, - offset_to_symbol_infos): - """Expand all of the symbols in profiled_symbols to include any symbols which - share the same address. + +@_UniqueGenerator +def _SectionMatchingRules(section_name, name_to_symbol_infos, + offset_to_symbol_infos, section_to_symbols_map, + symbol_to_sections_map, suffixed_sections): + """Gets the set of section matching rules for section_name. + + These rules will include section_name, but also any sections which may + contain the same code due to cloning, splitting, or identical code folding. Args: - profiled_symbols: Symbols to match + section_name: The section to expand. name_to_symbol_infos: {name: [symbol_info1], ...}, as returned by - GetSymbolInfosFromBinary + GetSymbolInfosFromBinary. + offset_to_symbol_infos: {offset: [symbol_info1, ...], ...} + section_to_symbols_map: The mapping from section to symbol name. Missing + section names are treated as per _SectionNameToSymbols. + symbol_to_sections_map: The mapping from symbol name to names of linker + sections containing the symbol. If a symbol isn't in the mapping, the + section names are generated from the set of _PREFIXES with the symbol + name. + suffixed_sections: A set of sections which can have suffixes. + + Yields: + Section names including at least section_name. + """ + for name in _ExpandSection(section_name, name_to_symbol_infos, + offset_to_symbol_infos, section_to_symbols_map, + symbol_to_sections_map): + yield name + # Since only a subset of methods (mostly those compiled with O2) ever get + # suffixes, don't emit the wildcards for ones where it won't be helpful. + # Otherwise linking takes too long. + if name in suffixed_sections: + # TODO(azarchs): instead of just appending .*, append .suffix.* for + # _SUFFIXES. We can't do this right now because that many wildcards + # seems to kill the linker (linking libchrome takes 3 hours). This gets + # almost all the benefit at a much lower link-time cost, but could cause + # problems with unexpected suffixes. + yield name + '.*' + +def _ExpandSection(section_name, name_to_symbol_infos, offset_to_symbol_infos, + section_to_symbols_map, symbol_to_sections_map): + """Yields the set of section names for section_name. + + This set will include section_name, but also any sections which may contain + the same code due to identical code folding. + + Args: + section_name: The section to expand. + name_to_symbol_infos: {name: [symbol_info1], ...}, as returned by + GetSymbolInfosFromBinary. offset_to_symbol_infos: {offset: [symbol_info1, ...], ...} + section_to_symbols_map: The mapping from section to symbol name. Missing + section names are treated as per _SectionNameToSymbols. + symbol_to_sections_map: The mapping from symbol name to names of linker + sections containing the symbol. If a symbol isn't in the mapping, the + section names are generated from the set of _PREFIXES with the symbol + name. + + Yields: + Section names including at least section_name. + """ + yield section_name + for first_sym in _SectionNameToSymbols(section_name, + section_to_symbols_map): + for symbol in _SymbolsWithSameOffset(first_sym, name_to_symbol_infos, + offset_to_symbol_infos): + if symbol in symbol_to_sections_map: + for section in symbol_to_sections_map[symbol]: + yield section + else: + for prefix in _PREFIXES: + yield prefix + symbol + + +@_UniqueGenerator +def _ExpandSections(section_names, name_to_symbol_infos, + offset_to_symbol_infos, section_to_symbols_map, + symbol_to_sections_map, suffixed_sections): + """Gets an ordered set of section matching rules for a list of sections. + + Rules will not be repeated. + + Args: + section_names: The sections to expand. + name_to_symbol_infos: {name: [symbol_info1], ...}, as returned by + _GroupSymbolInfosFromBinary. + offset_to_symbol_infos: {offset: [symbol_info1, ...], ...} + section_to_symbols_map: The mapping from section to symbol names. + symbol_to_sections_map: The mapping from symbol name to names of linker + sections containing the symbol. + suffixed_sections: A set of sections which can have suffixes. + + Yields: + Section matching rules including at least section_names. + """ + for profiled_section in section_names: + for section in _SectionMatchingRules( + profiled_section, name_to_symbol_infos, offset_to_symbol_infos, + section_to_symbols_map, symbol_to_sections_map, suffixed_sections): + yield section + + +def _CombineSectionListsByPrimaryName(symbol_to_sections_map): + """Combines values of the symbol_to_sections_map by stripping suffixes. + + Example: + {foo: [.text.foo, .text.bar.part.1], + foo.constprop.4: [.text.baz.constprop.3]} -> + {foo: [.text.foo, .text.bar, .text.baz]} + + Args: + symbol_to_sections_map: Mapping from symbol name to list of section names Returns: - A list of the symbol names. + The same mapping, but with symbol and section names suffix-stripped. """ - found_symbols = 0 - missing_symbols = [] - all_symbols = [] - for name in profiled_symbols: - expansion = _SymbolsWithSameOffset(name, - name_to_symbol_infos, offset_to_symbol_infos) - if expansion: - found_symbols += 1 - all_symbols += expansion - else: - all_symbols.append(name) - missing_symbols.append(name) - logging.info('symbols found: %d\n' % found_symbols) - if missing_symbols > 0: - logging.warning('%d missing symbols.' % len(missing_symbols)) - missing_symbols_to_show = min(100, len(missing_symbols)) - logging.warning('First %d missing symbols:\n%s' % ( - missing_symbols_to_show, - '\n'.join(missing_symbols[:missing_symbols_to_show]))) - return all_symbols - - -def _PrintSymbolsWithPrefixes(symbol_names, output_file): - """For each symbol, outputs it to output_file with the prefixes.""" - unique_outputs = set() - for name in symbol_names: - for prefix in _PREFIXES: - linker_section = prefix + name - if not linker_section in unique_outputs: - output_file.write(linker_section + '\n') - unique_outputs.add(linker_section) + simplified = {} + for suffixed_symbol, suffixed_sections in symbol_to_sections_map.iteritems(): + symbol = RemoveSuffixes(suffixed_symbol) + sections = [RemoveSuffixes(section) for section in suffixed_sections] + simplified.setdefault(symbol, []).extend(sections) + return simplified + + +def _SectionsWithSuffixes(symbol_to_sections_map): + """Finds sections which have suffixes applied. + + Args: + symbol_to_sections_map: a map where the values are lists of section names. + + Returns: + A set containing all section names which were seen with suffixes applied. + """ + sections_with_suffixes = set() + for suffixed_sections in symbol_to_sections_map.itervalues(): + for suffixed_section in suffixed_sections: + section = RemoveSuffixes(suffixed_section) + if section != suffixed_section: + sections_with_suffixes.add(section) + return sections_with_suffixes + + +def _StripSuffixes(section_list): + """Remove all suffixes on items in a list of sections or symbols.""" + return [RemoveSuffixes(section) for section in section_list] def main(argv): @@ -222,13 +382,24 @@ def main(argv): symbol_extractor.SetArchitecture(options.arch) (offset_to_symbol_infos, name_to_symbol_infos) = _GroupSymbolInfosFromBinary( binary_filename) - profiled_symbols = GetSymbolsFromOrderfile(orderfile_filename) - expanded_symbols = _ExpandSymbols( - profiled_symbols, name_to_symbol_infos, offset_to_symbol_infos) - _PrintSymbolsWithPrefixes(expanded_symbols, sys.stdout) + obj_dir = cygprofile_utils.GetObjDir(binary_filename) + raw_symbol_map = cyglog_to_orderfile.GetSymbolToSectionsMapFromObjectFiles( + obj_dir) + suffixed = _SectionsWithSuffixes(raw_symbol_map) + symbol_to_sections_map = _CombineSectionListsByPrimaryName(raw_symbol_map) + section_to_symbols_map = cygprofile_utils.InvertMapping( + symbol_to_sections_map) + profiled_sections = _StripSuffixes( + GetSectionsFromOrderfile(orderfile_filename)) + expanded_sections = _ExpandSections( + profiled_sections, name_to_symbol_infos, offset_to_symbol_infos, + section_to_symbols_map, symbol_to_sections_map, suffixed) + for section in expanded_sections: + print section # The following is needed otherwise Gold only applies a partial sort. - print '.text' # gets methods not in a section, such as assembly - print '.text.*' # gets everything else + print '.text' # gets methods not in a section, such as assembly + for prefix in _PREFIXES: + print prefix + '*' # gets everything else return 0 diff --git a/tools/cygprofile/patch_orderfile_unittest.py b/tools/cygprofile/patch_orderfile_unittest.py index 5d6c1d28c2af2c..5a58b109f37af2 100755 --- a/tools/cygprofile/patch_orderfile_unittest.py +++ b/tools/cygprofile/patch_orderfile_unittest.py @@ -10,12 +10,15 @@ class TestPatchOrderFile(unittest.TestCase): - def testRemoveClone(self): - no_clone = "this.does.not.contain.clone" - self.assertEquals(no_clone, patch_orderfile._RemoveClone(no_clone)) - with_clone = "this.does.contain.clone." + def testRemoveSuffixes(self): + no_clone = 'this.does.not.contain.clone' + self.assertEquals(no_clone, patch_orderfile.RemoveSuffixes(no_clone)) + with_clone = 'this.does.contain.clone.' self.assertEquals( - "this.does.contain", patch_orderfile._RemoveClone(with_clone)) + 'this.does.contain', patch_orderfile.RemoveSuffixes(with_clone)) + with_part = 'this.is.a.part.42' + self.assertEquals( + 'this.is.a', patch_orderfile.RemoveSuffixes(with_part)) def testAliasClonedSymbols(self): symbol_infos = [ @@ -45,10 +48,9 @@ def testGroupSymbolsByOffset(self): self.assertEquals(len(offset_to_symbol_infos), 1) self.assertEquals(tuple(offset_to_symbol_infos[0x42]), symbol_infos) - def testExpandSymbols(self): + def testSymbolsWithSameOffset(self): symbol_name = "dummySymbol" symbol_name2 = "other" - profiled_symbol_names = [symbol_name, "symbolThatShouldntMatch"] name_to_symbol_infos = {symbol_name: [ symbol_extractor.SymbolInfo(symbol_name, 0x42, 0x12, section='.text')]} @@ -57,29 +59,90 @@ def testExpandSymbols(self): section='.text'), symbol_extractor.SymbolInfo(symbol_name2, 0x42, 0x12, section='.text')]} - symbol_names = patch_orderfile._ExpandSymbols( - profiled_symbol_names, name_to_symbol_infos, offset_to_symbol_infos) - self.assertEquals(len(symbol_names), 3) + symbol_names = patch_orderfile._SymbolsWithSameOffset( + symbol_name, name_to_symbol_infos, offset_to_symbol_infos) + self.assertEquals(len(symbol_names), 2) self.assertEquals(symbol_names[0], symbol_name) self.assertEquals(symbol_names[1], symbol_name2) - self.assertEquals(symbol_names[2], "symbolThatShouldntMatch") + self.assertEquals([], patch_orderfile._SymbolsWithSameOffset( + "symbolThatShouldntMatch", + name_to_symbol_infos, offset_to_symbol_infos)) + + def testSectionNameToSymbols(self): + mapping = {'.text.foo': ['foo'], + '.text.startup.bar': ['bar', 'bar1']} + self.assertEquals(list(patch_orderfile._SectionNameToSymbols( + '.text.foo', mapping)), + ['foo']) + self.assertEquals(list(patch_orderfile._SectionNameToSymbols( + '.text.startup.bar', mapping)), + ['bar', 'bar1']) + self.assertEquals(list(patch_orderfile._SectionNameToSymbols( + '.text.startup.bar', mapping)), + ['bar', 'bar1']) + self.assertEquals(list(patch_orderfile._SectionNameToSymbols( + '.text.hot.foobar', mapping)), + ['foobar']) + self.assertEquals(list(patch_orderfile._SectionNameToSymbols( + '.text.startup.*', mapping)), + []) + + def testSectionMatchingRules(self): + symbol_name1 = 'symbol1' + symbol_name2 = 'symbol2' + symbol_name3 = 'symbol3' + section_name1 = '.text.' + symbol_name1 + section_name3 = '.text.foo' + suffixed = set([section_name3]) + name_to_symbol_infos = {symbol_name1: [ + symbol_extractor.SymbolInfo(symbol_name1, 0x42, 0x12, + section='.text')]} + offset_to_symbol_infos = { + 0x42: [symbol_extractor.SymbolInfo(symbol_name1, 0x42, 0x12, + section='.text'), + symbol_extractor.SymbolInfo(symbol_name2, 0x42, 0x12, + section='.text')]} + section_to_symbols_map = {section_name1: [symbol_name1], + section_name3: [symbol_name1, symbol_name3]} + symbol_to_sections_map = {symbol_name1: + [section_name1, section_name3], + symbol_name3: [section_name3]} + expected = [ + section_name1, + section_name3, + section_name3 + '.*', + '.text.startup.symbol2', + '.text.hot.symbol2', + '.text.unlikely.symbol2', + '.text.symbol2'] + self.assertEqual(expected, list(patch_orderfile._SectionMatchingRules( + section_name1, name_to_symbol_infos, offset_to_symbol_infos, + section_to_symbols_map, symbol_to_sections_map, suffixed))) + + def testUniqueGenerator(self): + @patch_orderfile._UniqueGenerator + def TestIterator(): + yield 1 + yield 2 + yield 1 + yield 3 + + self.assertEqual(list(TestIterator()), [1,2,3]) + + def testCombineSectionListsByPrimaryName(self): + self.assertEqual(patch_orderfile._CombineSectionListsByPrimaryName( + {'foo': ['.text.foo', '.text.bar.constprop.1'], + 'foo.part.1': ['.text.baz'], + 'foobar': ['.text.foobar']}), + {'foo': ['.text.foo', '.text.bar', '.text.baz'], + 'foobar': ['.text.foobar']}) - def testPrintSymbolWithPrefixes(self): - class FakeOutputFile(object): - def __init__(self): - self.output = '' - def write(self, s): - self.output = self.output + s - test_symbol = "dummySymbol" - symbol_names = [test_symbol] - fake_output = FakeOutputFile() - patch_orderfile._PrintSymbolsWithPrefixes(symbol_names, fake_output) - expected_output = """.text.startup.dummySymbol -.text.hot.dummySymbol -.text.unlikely.dummySymbol -.text.dummySymbol -""" - self.assertEquals(fake_output.output, expected_output) + def testSectionsWithSuffixes(self): + self.assertEqual(patch_orderfile._SectionsWithSuffixes( + {'foo': ['.text.foo', '.text.bar.constprop.1'], + 'foo.part.1': ['.text.baz'], + 'foobar': ['.text.foobar']}), + set(['.text.bar'])) if __name__ == "__main__": diff --git a/tools/cygprofile/symbol_extractor.py b/tools/cygprofile/symbol_extractor.py index f28efd00ad562d..a30c05ee342884 100755 --- a/tools/cygprofile/symbol_extractor.py +++ b/tools/cygprofile/symbol_extractor.py @@ -150,3 +150,8 @@ def CreateNameToSymbolInfo(symbol_infos): ','.join([hex(x.offset) for x in infos]))) warnings.WriteEnd('symbols at multiple offsets.') return symbol_infos_by_name + + +def DemangleSymbol(mangled_symbol): + """Return the demangled form of mangled_symbol.""" + return symbol.CallCppFilt(mangled_symbol)