Skip to content

Commit

Permalink
Associate cli arguments with executables and refactor llvm/gcc c/c++ …
Browse files Browse the repository at this point in the history
…toolchain selection (#6217)

### Problem

#5951 explains the problem addressed by moving CLI arguments to individual `Executable` objects -- this reduces greatly the difficulty in generating appropriate command lines for the executables invoked. In this PR, it can be seen to remove a significant amount of repeated boilerplate.

Additionally, we weren't distinguishing between a `Linker` to link the compiled object files of `gcc` or `g++` vs `clang` or `clang++`. We were attempting to generate a linker object which would work with *any of* `gcc`, `g++`, `clang`, or `clang++`, and this wasn't really feasible. Along with the above, this made it extremely difficult and error-prone to generate correct command lines / environments for executing the linker, which led to e.g. not being able to find `crti.o` (as one symptom addressed by this problem).

### Solution

- Introduce `CToolchain` and `CppToolchain` in `environment.py`, which can be generated from `LLVMCToolchain`, `LLVMCppToolchain`, `GCCCToolchain`, or `GCCCppToolchain`. These toolchain datatypes are created in `native_toolchain.py`, where a single `@rule` for each ensures that no C or C++ compiler that someone can request was made without an accompanying linker, which will be configured to work with the compiler.
- Introduce the `extra_args` property to the `Executable` mixin in `environment.py`, which `Executable` subclasses can just declare a datatype field named `extra_args` in order to override. This is used in `native_toolchain.py` to ensure platform-specific arguments and environment variables are set in the same `@rule` which produces a paired compiler and linker -- there is a single place to look at to see where all the process invocation environment variables and command-line arguments are set for a given toolchain.
- Introduce the `ArchiveFileMapper` subsystem and use it to declare sets of directories to resolve within our BinaryTool archives `GCC` and `LLVM`. This subsystem allows globbing (and checks that there is a unique expansion), which makes it robust to e.g. platform-specific paths to things like include or lib directories.

### Result

Removes several FIXMEs, including heavily-commented parts of `test_native_toolchain.py`. Partially addresses #5951 -- `setup_py.py` still generates its own execution environment from scratch, and this could be made more hygienic in the future. As noted in #6179 and #6205, this PR seems to immediately fix the CI failures in those PRs.
  • Loading branch information
cosmicexplorer committed Jul 28, 2018
1 parent 6c444ce commit 7216ff2
Show file tree
Hide file tree
Showing 20 changed files with 782 additions and 629 deletions.
104 changes: 58 additions & 46 deletions src/python/pants/backend/native/config/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from pants.engine.rules import SingletonRule
from pants.util.objects import datatype
from pants.util.osutil import all_normalized_os_names, get_normalized_os_name
from pants.util.strutil import create_path_env_var, safe_shlex_join
from pants.util.strutil import create_path_env_var


class Platform(datatype(['normalized_os_name'])):
Expand Down Expand Up @@ -53,14 +53,24 @@ def path_entries(self):

@abstractproperty
def library_dirs(self):
"""Directories containing shared libraries required for a subprocess to run."""
"""Directories containing shared libraries that must be on the runtime library search path.
Note: this is for libraries needed for the current Executable to run -- see LinkerMixin below
for libraries that are needed at link time."""

@abstractproperty
def exe_filename(self):
"""The "entry point" -- which file to invoke when PATH is set to `path_entries()`."""

def get_invocation_environment_dict(self, platform):
lib_env_var = platform.resolve_platform_specific({
@property
def extra_args(self):
return []

_platform = Platform.create()

@property
def as_invocation_environment_dict(self):
lib_env_var = self._platform.resolve_platform_specific({
'darwin': lambda: 'DYLD_LIBRARY_PATH',
'linux': lambda: 'LD_LIBRARY_PATH',
})
Expand All @@ -78,57 +88,46 @@ class Assembler(datatype([
pass


class Linker(datatype([
'path_entries',
'exe_filename',
'library_dirs',
]), Executable):
class LinkerMixin(Executable):

@abstractproperty
def linking_library_dirs(self):
"""Directories to search for libraries needed at link time."""

@property
def as_invocation_environment_dict(self):
ret = super(LinkerMixin, self).as_invocation_environment_dict.copy()

# FIXME(#5951): We need a way to compose executables more hygienically. This could be done
# declaratively -- something like: { 'LIBRARY_PATH': DelimitedPathDirectoryEnvVar(...) }. We
# could also just use safe_shlex_join() and create_path_env_var() and keep all the state in the
# environment -- but then we have to remember to use those each time we specialize.
def get_invocation_environment_dict(self, platform):
ret = super(Linker, self).get_invocation_environment_dict(platform).copy()

# TODO: set all LDFLAGS in here or in further specializations of Linker instead of in individual
# tasks.
all_ldflags_for_platform = platform.resolve_platform_specific({
'darwin': lambda: ['-mmacosx-version-min=10.11'],
'linux': lambda: [],
})
ret.update({
'LDSHARED': self.exe_filename,
# FIXME: this overloads the meaning of 'library_dirs' to also mean "directories containing
# static libraries required for creating an executable" (currently, libc). These concepts
# should be distinct.
'LIBRARY_PATH': create_path_env_var(self.library_dirs),
'LDFLAGS': safe_shlex_join(all_ldflags_for_platform),
'LIBRARY_PATH': create_path_env_var(self.linking_library_dirs),
})

return ret


class Linker(datatype([
'path_entries',
'exe_filename',
'library_dirs',
'linking_library_dirs',
'extra_args',
]), LinkerMixin): pass


class CompilerMixin(Executable):

@abstractproperty
def include_dirs(self):
"""Directories to search for header files to #include during compilation."""

# FIXME: LIBRARY_PATH and (DY)?LD_LIBRARY_PATH are used for entirely different purposes, but are
# both sourced from the same `self.library_dirs`!
def get_invocation_environment_dict(self, platform):
ret = super(CompilerMixin, self).get_invocation_environment_dict(platform).copy()
@property
def as_invocation_environment_dict(self):
ret = super(CompilerMixin, self).as_invocation_environment_dict.copy()

if self.include_dirs:
ret['CPATH'] = create_path_env_var(self.include_dirs)

all_cflags_for_platform = platform.resolve_platform_specific({
'darwin': lambda: ['-mmacosx-version-min=10.11'],
'linux': lambda: [],
})
ret['CFLAGS'] = safe_shlex_join(all_cflags_for_platform)

return ret


Expand All @@ -137,10 +136,12 @@ class CCompiler(datatype([
'exe_filename',
'library_dirs',
'include_dirs',
'extra_args',
]), CompilerMixin):

def get_invocation_environment_dict(self, platform):
ret = super(CCompiler, self).get_invocation_environment_dict(platform).copy()
@property
def as_invocation_environment_dict(self):
ret = super(CCompiler, self).as_invocation_environment_dict.copy()

ret['CC'] = self.exe_filename

Expand All @@ -152,27 +153,38 @@ class CppCompiler(datatype([
'exe_filename',
'library_dirs',
'include_dirs',
'extra_args',
]), CompilerMixin):

def get_invocation_environment_dict(self, platform):
ret = super(CppCompiler, self).get_invocation_environment_dict(platform).copy()
@property
def as_invocation_environment_dict(self):
ret = super(CppCompiler, self).as_invocation_environment_dict.copy()

ret['CXX'] = self.exe_filename

return ret


# TODO(#4020): These classes are performing the work of variants.
class GCCCCompiler(datatype([('c_compiler', CCompiler)])): pass
# NB: These wrapper classes for LLVM and GCC toolchains are performing the work of variants. A
# CToolchain cannot be requested directly, but native_toolchain.py provides an LLVMCToolchain,
# which contains a CToolchain representing the clang compiler and a linker paired to work with
# objects compiled by that compiler.
class CToolchain(datatype([('c_compiler', CCompiler), ('c_linker', Linker)])): pass


class LLVMCToolchain(datatype([('c_toolchain', CToolchain)])): pass


class GCCCToolchain(datatype([('c_toolchain', CToolchain)])): pass


class LLVMCCompiler(datatype([('c_compiler', CCompiler)])): pass
class CppToolchain(datatype([('cpp_compiler', CppCompiler), ('cpp_linker', Linker)])): pass


class GCCCppCompiler(datatype([('cpp_compiler', CppCompiler)])): pass
class LLVMCppToolchain(datatype([('cpp_toolchain', CppToolchain)])): pass


class LLVMCppCompiler(datatype([('cpp_compiler', CppCompiler)])): pass
class GCCCppToolchain(datatype([('cpp_toolchain', CppToolchain)])): pass


# FIXME: make this an @rule, after we can automatically produce LibcDev and other subsystems in the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ def linker(self):
return Linker(
path_entries=self.path_entries(),
exe_filename='ld',
library_dirs=[])
library_dirs=[],
linking_library_dirs=[],
extra_args=[])


@rule(Assembler, [Select(Binutils)])
Expand Down
105 changes: 71 additions & 34 deletions src/python/pants/backend/native/subsystems/binaries/gcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,65 +6,102 @@

import os

from pants.backend.native.config.environment import (CCompiler, CppCompiler, GCCCCompiler,
GCCCppCompiler)
from pants.backend.native.subsystems.utils.parse_search_dirs import ParseSearchDirs
from pants.backend.native.config.environment import CCompiler, CppCompiler, Platform
from pants.backend.native.subsystems.utils.archive_file_mapper import ArchiveFileMapper
from pants.binaries.binary_tool import NativeTool
from pants.engine.rules import RootRule, rule
from pants.engine.selectors import Select
from pants.util.memo import memoized_method, memoized_property
from pants.util.strutil import create_path_env_var


class GCC(NativeTool):
"""Subsystem wrapping an archive providing a GCC distribution.
This subsystem provides the gcc and g++ compilers.
NB: The lib and include dirs provided by this distribution are produced by using known relative
paths into the distribution of GCC provided on Pantsbuild S3. If we change how we distribute GCC,
these methods may have to change. They should be stable to version upgrades, however.
"""
options_scope = 'gcc'
default_version = '7.3.0'
archive_type = 'tgz'

@classmethod
def subsystem_dependencies(cls):
return super(GCC, cls).subsystem_dependencies() + (ParseSearchDirs.scoped(cls),)
return super(GCC, cls).subsystem_dependencies() + (ArchiveFileMapper.scoped(cls),)

@memoized_property
def _parse_search_dirs(self):
return ParseSearchDirs.scoped_instance(self)
def _file_mapper(self):
return ArchiveFileMapper.scoped_instance(self)

def _lib_search_dirs(self, compiler_exe, path_entries):
return self._parse_search_dirs.get_compiler_library_dirs(
compiler_exe,
env={'PATH': create_path_env_var(path_entries)})
def _filemap(self, all_components_list):
return self._file_mapper.map_files(self.select(), all_components_list)

@memoized_method
@memoized_property
def path_entries(self):
return [os.path.join(self.select(), 'bin')]
return self._filemap([('bin',)])

@memoized_method
def _common_lib_dirs(self, platform):
lib64_tuples = platform.resolve_platform_specific({
'darwin': lambda: [],
'linux': lambda: [('lib64',)],
})
return self._filemap(lib64_tuples + [
('lib',),
('lib/gcc',),
('lib/gcc/*', self.version()),
])

@memoized_property
def _common_include_dirs(self):
return self._filemap([
('include',),
('lib/gcc/*', self.version(), 'include'),
])

def c_compiler(self):
exe_filename = 'gcc'
path_entries = self.path_entries()
def c_compiler(self, platform):
return CCompiler(
path_entries=path_entries,
exe_filename=exe_filename,
library_dirs=self._lib_search_dirs(exe_filename, path_entries),
include_dirs=[])

def cpp_compiler(self):
exe_filename = 'g++'
path_entries = self.path_entries()
path_entries=self.path_entries,
exe_filename='gcc',
library_dirs=self._common_lib_dirs(platform),
include_dirs=self._common_include_dirs,
extra_args=[])

@memoized_property
def _cpp_include_dirs(self):
most_cpp_include_dirs = self._filemap([
('include/c++', self.version()),
])

# This file is needed for C++ compilation.
cpp_config_header_path = self._file_mapper.assert_single_path_by_glob(
# NB: There are multiple paths matching this glob unless we provide the full path to
# c++config.h, which is why we bypass self._filemap() here.
[self.select(), 'include/c++', self.version(), '*/bits/c++config.h'])
# Get the directory that makes `#include <bits/c++config.h>` work.
plat_cpp_header_dir = os.path.dirname(os.path.dirname(cpp_config_header_path))

return most_cpp_include_dirs + [plat_cpp_header_dir]

def cpp_compiler(self, platform):
return CppCompiler(
path_entries=self.path_entries(),
exe_filename=exe_filename,
library_dirs=self._lib_search_dirs(exe_filename, path_entries),
include_dirs=[])
path_entries=self.path_entries,
exe_filename='g++',
library_dirs=self._common_lib_dirs(platform),
include_dirs=(self._common_include_dirs + self._cpp_include_dirs),
extra_args=[])


@rule(GCCCCompiler, [Select(GCC)])
def get_gcc(gcc):
yield GCCCCompiler(gcc.c_compiler())
@rule(CCompiler, [Select(GCC), Select(Platform)])
def get_gcc(gcc, platform):
return gcc.c_compiler(platform)


@rule(GCCCppCompiler, [Select(GCC)])
def get_gplusplus(gcc):
yield GCCCppCompiler(gcc.cpp_compiler())
@rule(CppCompiler, [Select(GCC), Select(Platform)])
def get_gplusplus(gcc, platform):
return gcc.cpp_compiler(platform)


def create_gcc_rules():
Expand Down
Loading

0 comments on commit 7216ff2

Please sign in to comment.