Skip to content

Commit

Permalink
Handle from foo import * wildcard imports in Rust dep inference par…
Browse files Browse the repository at this point in the history
…ser (#19249)

This fixes #19248 by implementing explicit handling for `*` wildcard
imports in the Rust-based Python dependency inference parser.

The commits are somewhat individually reviewable:

1. switching how the `insert_import` arguments are consumed from the
syntax tree, for `from ... import ...` statements in particular:
   - before: `name` and `module_name` correspond to `import $name`,
     `"$name"`, `from $module_name import $name`
   - after: `base` and `imported` correspond to `import $base`, `"$base"`,
     `from $base import $imported`
   - This is in particular flipping the last one, and changes whether the
     `from ...` part is the optional arg (before) or not (after).
   - The motivation is that there's more functional similarities between
     the `from ...` part and a plain `import ...` statement, than between the
     `import ...` part and a plain `import ...` statement, despite the
     superficial similarities of it! (`from foo import bar` is a little like
     `import foo as __secret; bar = __secret.bar`.)
2. Add some tests that fail
3. Fix the bug
4. (and others) some tweaks, including defence-in-depth against similar
    problems happening in future
  • Loading branch information
huonw authored and WorkerPants committed Jun 6, 2023
1 parent ed3ddac commit 8c95015
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 28 deletions.
106 changes: 79 additions & 27 deletions src/rust/engine/dep_inference/src/python/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,11 @@ pub fn get_dependencies(
}

let mut new_key_parts = path_parts[0..((path_parts.len() - level) + 1)].to_vec();
new_key_parts.push(nonrelative);
if !nonrelative.is_empty() {
// an import like `from .. import *` can end up with key == '..', and hence nonrelative == "";
// the result should just be the raw parent traversal, without a suffix part
new_key_parts.push(nonrelative);
}

let old_value = import_map.remove(&key).unwrap();
import_map.insert(new_key_parts.join("."), old_value);
Expand Down Expand Up @@ -149,48 +153,68 @@ impl ImportCollector<'_> {
false
}

fn unnest_alias(node: tree_sitter::Node) -> tree_sitter::Node {
match node.kind_id() {
KindID::ALIASED_IMPORT => node
.named_child(0)
.expect("aliased imports must have a child"),
_ => node,
}
}

/// Handle different styles of references to modules/imports
///
/// ```python
/// import $base
/// "$base" # string import
/// from $base import * # (the * node is passed as `specific` too)
/// from $base import $specific
/// ```
fn insert_import(
&mut self,
name: tree_sitter::Node,
module_name: Option<tree_sitter::Node>,
base: tree_sitter::Node,
specific: Option<tree_sitter::Node>,
is_string: bool,
) {
let dotted_name = match name.kind_id() {
KindID::ALIASED_IMPORT => name
.named_child(0)
.expect("Expected named child of aliased_import while parsing Python file."),
KindID::ERROR => {
return;
}
_ => name,
};
let name_range = dotted_name.range();
// the specifically-imported item takes precedence over the base name for ignoring and lines
// etc.
let most_specific = specific.unwrap_or(base);

if self.is_pragma_ignored(name) {
if self.is_pragma_ignored(most_specific) {
return;
}

let name_ref = if is_string {
self.string_at(name_range)
let base = ImportCollector::unnest_alias(base);
// * and errors are the same as not having an specific import
let specific = specific
.map(ImportCollector::unnest_alias)
.filter(|n| !matches!(n.kind_id(), KindID::WILDCARD_IMPORT | KindID::ERROR));

let base_range = base.range();
let base_ref = if is_string {
self.string_at(base_range)
} else {
self.code_at(name_range)
self.code_at(base_range)
};
let full_name = match module_name {
Some(module_name) => {
let mod_text = self.code_at(module_name.range());
// `from ... import a` => `...a` mod_text alone, but `from x import a` => `x.a` needs to
// insert a .
let joiner = if mod_text.ends_with('.') { "" } else { "." };
[mod_text, name_ref].join(joiner)

let full_name = match specific {
Some(specific) => {
let specific_ref = self.code_at(specific.range());
// `from ... import a` => `...a` should concat base_ref and specific_ref directly, but `from
// x import a` => `x.a` needs to insert a . between them
let joiner = if base_ref.ends_with('.') { "" } else { "." };
[base_ref, specific_ref].join(joiner)
}
None => name_ref.to_string(),
None => base_ref.to_string(),
};

let line0 = most_specific.range().start_point.row;

self
.import_map
.entry(full_name)
.and_modify(|v| *v = (v.0, v.1 && self.weaken_imports))
.or_insert(((name_range.start_point.row as u64) + 1, self.weaken_imports));
.or_insert(((line0 as u64) + 1, self.weaken_imports));
}
}

Expand All @@ -205,8 +229,36 @@ impl Visitor for ImportCollector<'_> {

fn visit_import_from_statement(&mut self, node: tree_sitter::Node) -> ChildBehavior {
if !self.is_pragma_ignored(node) {
// the grammar is something like `from $module_name import $($name),* | '*'`, where $... is a field
// name.
let module_name = node
.child_by_field_name("module_name")
.expect("`from ... import ...` must have module_name");

let mut any_inserted = false;
for child in node.children_by_field_name("name", &mut node.walk()) {
self.insert_import(child, Some(node.named_child(0).unwrap()), false);
self.insert_import(module_name, Some(child), false);
any_inserted = true;
}

if !any_inserted {
// There's no names (i.e. it's probably not `from ... import some, names`), let's look for
// the * in a wildcard import. (It doesn't have a field name, so we have to search for it
// manually.)
for child in node.children(&mut node.walk()) {
if child.kind_id() == KindID::WILDCARD_IMPORT {
self.insert_import(module_name, Some(child), false);
any_inserted = true
}
}
}

if !any_inserted {
// Still nothing inserted, which means something has probably gone wrong and/or we haven't
// understood the syntax tree! We're working on a definite import statement, so silently
// doing nothing with it is likely to be wrong. Let's insert the import node itself and let
// that be surfaced as an dep-inference failure.
self.insert_import(node, None, false)
}
}
ChildBehavior::Ignore
Expand Down
26 changes: 25 additions & 1 deletion src/rust/engine/dep_inference/src/python/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ fn simple_imports() {
assert_imports("import a.b", &["a.b"]);
assert_imports("import a as x", &["a"]);
assert_imports("from a import b", &["a.b"]);
assert_imports("from a import *", &["a"]);
assert_imports("from a.b import c", &["a.b.c"]);
assert_imports("from a.b import c.d", &["a.b.c.d"]);
assert_imports("from a.b import c, d, e", &["a.b.c", "a.b.d", "a.b.e"]);
Expand Down Expand Up @@ -87,6 +88,11 @@ from a.b import (
assert_imports("from ....a import b, c", &["....a.b", "....a.c"]);
assert_imports("from ....a import b as d, c", &["....a.b", "....a.c"]);

assert_imports("from .a import *", &[".a"]);
assert_imports("from . import *", &["."]);
assert_imports("from ..a import *", &["..a"]);
assert_imports("from .. import *", &[".."]);

assert_imports(
"class X: def method(): if True: while True: class Y: def f(): import a",
&["a"],
Expand All @@ -103,6 +109,7 @@ fn pragma_ignore() {
assert_imports("import a.b # pants: no-infer-dep", &[]);
assert_imports("import a.b as d # pants: no-infer-dep", &[]);
assert_imports("from a import b # pants: no-infer-dep", &[]);
assert_imports("from a import * # pants: no-infer-dep", &[]);
assert_imports("from a import b, c # pants: no-infer-dep", &[]);
assert_imports("from a import b, c as d # pants: no-infer-dep", &[]);
assert_imports(
Expand Down Expand Up @@ -182,6 +189,12 @@ fn pragma_ignore() {
)",
&[],
);
assert_imports(
r"
from a.b import \
* # pants: no-infer-dep",
&[],
);
}

#[test]
Expand Down Expand Up @@ -489,8 +502,11 @@ fn assert_relative_imports(filename: &str, code: &str, resolved_imports: &[&str]
fn relative_imports_resolution() {
let filename = "foo/bar/baz.py";
assert_relative_imports(filename, "from . import b", &["foo.bar.b"]);
assert_relative_imports(filename, "from . import *", &["foo.bar"]);
assert_relative_imports(filename, "from .a import b", &["foo.bar.a.b"]);
assert_relative_imports(filename, "from .a import *", &["foo.bar.a"]);
assert_relative_imports(filename, "from .. import b", &["foo.b"]);
assert_relative_imports(filename, "from .. import *", &["foo"]);
assert_relative_imports(filename, "from ..a import b", &["foo.a.b"]);
assert_relative_imports(filename, "from .. import b.c", &["foo.b.c"]);
assert_relative_imports(filename, "from ..a import b.c", &["foo.a.b.c"]);
Expand Down Expand Up @@ -520,14 +536,22 @@ fn syntax_errors_and_other_fun() {

assert_imports("imprt a", &[]);
assert_imports("form a import b", &["b"]);
assert_imports("import .b", &[]);
assert_imports("import .b", &["."]);
assert_imports("import a....b", &["a....b"]);
assert_imports("import a.", &[]);
assert_imports("import *", &[]);
assert_imports("from a import", &[]);
assert_imports("from a import;", &["a."]);
assert_imports("from a import ()", &["a."]);
assert_imports("from a imp x", &[]);
assert_imports("from from import a as .as", &[]);
assert_imports("from a import ......g", &["a.g"]);
assert_imports("from a. import b", &[]);
assert_imports("from a as c import b as d", &["a.b"]);
assert_imports("from a import *, b", &["a"]);
assert_imports("from a import b, *", &["a.b"]);
assert_imports("from a import (*)", &[]);
assert_imports("from * import b", &["b"]);
assert_imports("try:...\nexcept:import a", &["a"]);
assert_imports("try:...\nexcept 1:import a", &["a"]);
assert_imports("try:...\nexcept x=1:import a", &["a"]);
Expand Down

0 comments on commit 8c95015

Please sign in to comment.