Skip to content

Commit

Permalink
Add RUF016: Detection of invalid index types (#5602)
Browse files Browse the repository at this point in the history
Detects invalid types for tuple, list, bytes, string indices.

For example, the following will raise a `TypeError` at runtime and when
imported Python will display a `SyntaxWarning`

```python
var = [1, 2, 3]["x"]
```

```
example.py:1: SyntaxWarning: list indices must be integers or slices, not str; perhaps you missed a comma?
  var = [1, 2, 3]["x"]
Traceback (most recent call last):
  File "example.py", line 1, in <module>
    var = [1, 2, 3]["x"]
          ~~~~~~~~~^^^^^
TypeError: list indices must be integers or slices, not str
```

Previously, Ruff would not report the invalid syntax but now a violation
will be reported. This does not apply to cases where a variable, call,
or complex expression is used in the index — detection is roughly
limited to static definitions, which matches Python's warnings.

```
❯ ./target/debug/ruff example.py --select RUF015 --show-source --no-cache
example.py:1:17: RUF015 Indexed access to type `list` uses type `str` instead of an integer or slice.
  |
1 | var = [1, 2, 3]["x"]
  |                 ^^^ RUF015
  |
```

Closes #5082
xref
python/cpython@ffff144
  • Loading branch information
zanieb authored Jul 12, 2023
1 parent 7566ca8 commit 0666add
Show file tree
Hide file tree
Showing 9 changed files with 722 additions and 8 deletions.
115 changes: 115 additions & 0 deletions crates/ruff/resources/test/fixtures/ruff/RUF016.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# Should not emit for valid access with index
var = "abc"[0]
var = f"abc"[0]
var = [1, 2, 3][0]
var = (1, 2, 3)[0]
var = b"abc"[0]

# Should not emit for valid access with slice
var = "abc"[0:2]
var = f"abc"[0:2]
var = b"abc"[0:2]
var = [1, 2, 3][0:2]
var = (1, 2, 3)[0:2]
var = [1, 2, 3][None:2]
var = [1, 2, 3][0:None]
var = [1, 2, 3][:2]
var = [1, 2, 3][0:]

# Should emit for invalid access on strings
var = "abc"["x"]
var = f"abc"["x"]

# Should emit for invalid access on bytes
var = b"abc"["x"]

# Should emit for invalid access on lists and tuples
var = [1, 2, 3]["x"]
var = (1, 2, 3)["x"]

# Should emit for invalid access on list comprehensions
var = [x for x in range(10)]["x"]

# Should emit for invalid access using tuple
var = "abc"[1, 2]

# Should emit for invalid access using string
var = [1, 2]["x"]

# Should emit for invalid access using float
var = [1, 2][0.25]

# Should emit for invalid access using dict
var = [1, 2][{"x": "y"}]

# Should emit for invalid access using dict comp
var = [1, 2][{x: "y" for x in range(2)}]

# Should emit for invalid access using list
var = [1, 2][2, 3]

# Should emit for invalid access using list comp
var = [1, 2][[x for x in range(2)]]

# Should emit on invalid access using set
var = [1, 2][{"x", "y"}]

# Should emit on invalid access using set comp
var = [1, 2][{x for x in range(2)}]

# Should emit on invalid access using bytes
var = [1, 2][b"x"]

# Should emit for non-integer slice start
var = [1, 2, 3]["x":2]
var = [1, 2, 3][f"x":2]
var = [1, 2, 3][1.2:2]
var = [1, 2, 3][{"x"}:2]
var = [1, 2, 3][{x for x in range(2)}:2]
var = [1, 2, 3][{"x": x for x in range(2)}:2]
var = [1, 2, 3][[x for x in range(2)]:2]

# Should emit for non-integer slice end
var = [1, 2, 3][0:"x"]
var = [1, 2, 3][0:f"x"]
var = [1, 2, 3][0:1.2]
var = [1, 2, 3][0:{"x"}]
var = [1, 2, 3][0:{x for x in range(2)}]
var = [1, 2, 3][0:{"x": x for x in range(2)}]
var = [1, 2, 3][0:[x for x in range(2)]]

# Should emit for non-integer slice step
var = [1, 2, 3][0:1:"x"]
var = [1, 2, 3][0:1:f"x"]
var = [1, 2, 3][0:1:1.2]
var = [1, 2, 3][0:1:{"x"}]
var = [1, 2, 3][0:1:{x for x in range(2)}]
var = [1, 2, 3][0:1:{"x": x for x in range(2)}]
var = [1, 2, 3][0:1:[x for x in range(2)]]

# Should emit for non-integer slice start and end; should emit twice with specific ranges
var = [1, 2, 3]["x":"y"]

# Should emit once for repeated invalid access
var = [1, 2, 3]["x"]["y"]["z"]

# Cannot emit on invalid access using variable in index
x = "x"
var = "abc"[x]

# Cannot emit on invalid access using call
def func():
return 1
var = "abc"[func()]

# Cannot emit on invalid access using a variable in parent
x = [1, 2, 3]
var = x["y"]

# Cannot emit for invalid access on byte array
var = bytearray(b"abc")["x"]

# Cannot emit for slice bound using variable
x = "x"
var = [1, 2, 3][0:x]
var = [1, 2, 3][x:1]
6 changes: 5 additions & 1 deletion crates/ruff/src/checkers/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2142,7 +2142,7 @@ where

// Pre-visit.
match expr {
subscript @ Expr::Subscript(ast::ExprSubscript { value, slice, .. }) => {
Expr::Subscript(subscript @ ast::ExprSubscript { value, slice, .. }) => {
// Ex) Optional[...], Union[...]
if self.any_enabled(&[
Rule::FutureRewritableTypeAnnotation,
Expand Down Expand Up @@ -2235,6 +2235,10 @@ where
ruff::rules::unnecessary_iterable_allocation_for_first_element(self, subscript);
}

if self.enabled(Rule::InvalidIndexType) {
ruff::rules::invalid_index_type(self, subscript);
}

pandas_vet::rules::subscript(self, value, expr);
}
Expr::Tuple(ast::ExprTuple {
Expand Down
1 change: 1 addition & 0 deletions crates/ruff/src/codes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,7 @@ pub fn code_to_rule(linter: Linter, code: &str) -> Option<(RuleGroup, Rule)> {
#[cfg(feature = "unreachable-code")]
(Ruff, "014") => (RuleGroup::Nursery, rules::ruff::rules::UnreachableCode),
(Ruff, "015") => (RuleGroup::Unspecified, rules::ruff::rules::UnnecessaryIterableAllocationForFirstElement),
(Ruff, "016") => (RuleGroup::Unspecified, rules::ruff::rules::InvalidIndexType),
(Ruff, "100") => (RuleGroup::Unspecified, rules::ruff::rules::UnusedNOQA),
(Ruff, "200") => (RuleGroup::Unspecified, rules::ruff::rules::InvalidPyprojectToml),

Expand Down
1 change: 1 addition & 0 deletions crates/ruff/src/rules/ruff/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ mod tests {
Rule::UnnecessaryIterableAllocationForFirstElement,
Path::new("RUF015.py")
)]
#[test_case(Rule::InvalidIndexType, Path::new("RUF016.py"))]
#[cfg_attr(
feature = "unreachable-code",
test_case(Rule::UnreachableCode, Path::new("RUF014.py"))
Expand Down
214 changes: 214 additions & 0 deletions crates/ruff/src/rules/ruff/rules/invalid_index_type.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
use rustpython_parser::ast::{Constant, Expr, ExprConstant, ExprSlice, ExprSubscript, Ranged};

use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use std::fmt;

use crate::checkers::ast::Checker;

/// ## What it does
/// Checks for indexed access to lists, strings, tuples, bytes, and comprehensions
/// using a type other than an integer or slice.
///
/// ## Why is this bad?
/// Only integers or slices can be used as indices to these types. Using
/// other types will result in a `TypeError` at runtime and a `SyntaxWarning` at
/// import time.
///
/// ## Example
/// ```python
/// var = [1, 2, 3]["x"]
/// ```
///
/// Use instead:
/// ```python
/// var = [1, 2, 3][0]
/// ```
#[violation]
pub struct InvalidIndexType {
value_type: String,
index_type: String,
is_slice: bool,
}

impl Violation for InvalidIndexType {
#[derive_message_formats]
fn message(&self) -> String {
let InvalidIndexType {
value_type,
index_type,
is_slice,
} = self;
if *is_slice {
format!("Slice in indexed access to type `{value_type}` uses type `{index_type}` instead of an integer.")
} else {
format!(
"Indexed access to type `{value_type}` uses type `{index_type}` instead of an integer or slice."
)
}
}
}

/// RUF015
pub(crate) fn invalid_index_type(checker: &mut Checker, expr: &ExprSubscript) {
let ExprSubscript {
value,
slice: index,
..
} = expr;

// Check the value being indexed is a list, tuple, string, f-string, bytes, or comprehension
if !matches!(
value.as_ref(),
Expr::List(_)
| Expr::ListComp(_)
| Expr::Tuple(_)
| Expr::JoinedStr(_)
| Expr::Constant(ExprConstant {
value: Constant::Str(_) | Constant::Bytes(_),
..
})
) {
return;
}

// The value types supported by this rule should always be checkable
let Some(value_type) = CheckableExprType::try_from(value) else {
debug_assert!(false, "Index value must be a checkable type to generate a violation message.");
return;
};

// If the index is not a checkable type then we can't easily determine if there is a violation
let Some(index_type) = CheckableExprType::try_from(index) else {
return;
};

// Then check the contents of the index
match index.as_ref() {
Expr::Constant(ExprConstant {
value: index_value, ..
}) => {
// If the index is a constant, require an integer
if !index_value.is_int() {
checker.diagnostics.push(Diagnostic::new(
InvalidIndexType {
value_type: value_type.to_string(),
index_type: constant_type_name(index_value).to_string(),
is_slice: false,
},
index.range(),
));
}
}
Expr::Slice(ExprSlice {
lower, upper, step, ..
}) => {
// If the index is a slice, require integer or null bounds
for is_slice in [lower, upper, step].into_iter().flatten() {
if let Expr::Constant(ExprConstant {
value: index_value, ..
}) = is_slice.as_ref()
{
if !(index_value.is_int() || index_value.is_none()) {
checker.diagnostics.push(Diagnostic::new(
InvalidIndexType {
value_type: value_type.to_string(),
index_type: constant_type_name(index_value).to_string(),
is_slice: true,
},
is_slice.range(),
));
}
} else if let Some(is_slice_type) = CheckableExprType::try_from(is_slice.as_ref()) {
checker.diagnostics.push(Diagnostic::new(
InvalidIndexType {
value_type: value_type.to_string(),
index_type: is_slice_type.to_string(),
is_slice: true,
},
is_slice.range(),
));
}
}
}
_ => {
// If it's some other checkable data type, it's a violation
checker.diagnostics.push(Diagnostic::new(
InvalidIndexType {
value_type: value_type.to_string(),
index_type: index_type.to_string(),
is_slice: false,
},
index.range(),
));
}
}
}

/// An expression that can be checked for type compatibility.
///
/// These are generally "literal" type expressions in that we know their concrete type
/// without additional analysis; opposed to expressions like a function call where we
/// cannot determine what type it may return.
#[derive(Debug)]
enum CheckableExprType<'a> {
Constant(&'a Constant),
JoinedStr,
List,
ListComp,
SetComp,
DictComp,
Set,
Dict,
Tuple,
Slice,
}

impl fmt::Display for CheckableExprType<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::Constant(constant) => f.write_str(constant_type_name(constant)),
Self::JoinedStr => f.write_str("str"),
Self::List => f.write_str("list"),
Self::SetComp => f.write_str("set comprehension"),
Self::ListComp => f.write_str("list comprehension"),
Self::DictComp => f.write_str("dict comprehension"),
Self::Set => f.write_str("set"),
Self::Slice => f.write_str("slice"),
Self::Dict => f.write_str("dict"),
Self::Tuple => f.write_str("tuple"),
}
}
}

impl<'a> CheckableExprType<'a> {
fn try_from(expr: &'a Expr) -> Option<Self> {
match expr {
Expr::Constant(ExprConstant { value, .. }) => Some(Self::Constant(value)),
Expr::JoinedStr(_) => Some(Self::JoinedStr),
Expr::List(_) => Some(Self::List),
Expr::ListComp(_) => Some(Self::ListComp),
Expr::SetComp(_) => Some(Self::SetComp),
Expr::DictComp(_) => Some(Self::DictComp),
Expr::Set(_) => Some(Self::Set),
Expr::Dict(_) => Some(Self::Dict),
Expr::Tuple(_) => Some(Self::Tuple),
Expr::Slice(_) => Some(Self::Slice),
_ => None,
}
}
}

fn constant_type_name(constant: &Constant) -> &'static str {
match constant {
Constant::None => "None",
Constant::Bool(_) => "bool",
Constant::Str(_) => "str",
Constant::Bytes(_) => "bytes",
Constant::Int(_) => "int",
Constant::Tuple(_) => "tuple",
Constant::Float(_) => "float",
Constant::Complex { .. } => "complex",
Constant::Ellipsis => "ellipsis",
}
}
2 changes: 2 additions & 0 deletions crates/ruff/src/rules/ruff/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ pub(crate) use collection_literal_concatenation::*;
pub(crate) use explicit_f_string_type_conversion::*;
pub(crate) use function_call_in_dataclass_default::*;
pub(crate) use implicit_optional::*;
pub(crate) use invalid_index_type::*;
pub(crate) use invalid_pyproject_toml::*;
pub(crate) use mutable_class_default::*;
pub(crate) use mutable_dataclass_default::*;
Expand All @@ -22,6 +23,7 @@ mod explicit_f_string_type_conversion;
mod function_call_in_dataclass_default;
mod helpers;
mod implicit_optional;
mod invalid_index_type;
mod invalid_pyproject_toml;
mod mutable_class_default;
mod mutable_dataclass_default;
Expand Down
Loading

0 comments on commit 0666add

Please sign in to comment.