-
Notifications
You must be signed in to change notification settings - Fork 936
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement bandit's 'hardcoded-sql-expressions' S608 #2698
Changes from all commits
fa1e667
0784d16
98b4d8f
b7b161e
57193ef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,95 @@ | ||
# single-line failures | ||
query1 = "SELECT %s FROM table" % (var,) # bad | ||
query2 = "SELECT var FROM " + table | ||
query3 = "SELECT " + val + " FROM " + table | ||
query4 = "SELECT {} FROM table;".format(var) | ||
query5 = f"SELECT * FROM table WHERE var = {var}" | ||
|
||
query6 = "DELETE FROM table WHERE var = %s" % (var,) | ||
query7 = "DELETE FROM table WHERE VAR = " + var | ||
query8 = "DELETE FROM " + table + "WHERE var = " + var | ||
query9 = "DELETE FROM table WHERE var = {}".format(var) | ||
query10 = f"DELETE FROM table WHERE var = {var}" | ||
|
||
query11 = "INSERT INTO table VALUES (%s)" % (var,) | ||
query12 = "INSERT INTO TABLE VALUES (" + var + ")" | ||
query13 = "INSERT INTO {} VALUES ({})".format(table, var) | ||
query14 = f"INSERT INTO {table} VALUES var = {var}" | ||
|
||
query15 = "UPDATE %s SET var = %s" % (table, var) | ||
query16 = "UPDATE " + table + " SET var = " + var | ||
query17 = "UPDATE {} SET var = {}".format(table, var) | ||
query18 = f"UPDATE {table} SET var = {var}" | ||
|
||
query19 = "select %s from table" % (var,) | ||
query20 = "select var from " + table | ||
query21 = "select " + val + " from " + table | ||
query22 = "select {} from table;".format(var) | ||
query23 = f"select * from table where var = {var}" | ||
|
||
query24 = "delete from table where var = %s" % (var,) | ||
query25 = "delete from table where var = " + var | ||
query26 = "delete from " + table + "where var = " + var | ||
query27 = "delete from table where var = {}".format(var) | ||
query28 = f"delete from table where var = {var}" | ||
|
||
query29 = "insert into table values (%s)" % (var,) | ||
query30 = "insert into table values (" + var + ")" | ||
query31 = "insert into {} values ({})".format(table, var) | ||
query32 = f"insert into {table} values var = {var}" | ||
|
||
query33 = "update %s set var = %s" % (table, var) | ||
query34 = "update " + table + " set var = " + var | ||
query35 = "update {} set var = {}".format(table, var) | ||
query36 = f"update {table} set var = {var}" | ||
|
||
# multi-line failures | ||
def query37(): | ||
return """ | ||
SELECT * | ||
FROM table | ||
WHERE var = %s | ||
""" % var | ||
|
||
def query38(): | ||
return """ | ||
SELECT * | ||
FROM TABLE | ||
WHERE var = | ||
""" + var | ||
|
||
def query39(): | ||
return """ | ||
SELECT * | ||
FROM table | ||
WHERE var = {} | ||
""".format(var) | ||
|
||
def query40(): | ||
return f""" | ||
SELECT * | ||
FROM table | ||
WHERE var = {var} | ||
""" | ||
|
||
def query41(): | ||
return ( | ||
"SELECT *" | ||
"FROM table" | ||
f"WHERE var = {var}" | ||
) | ||
|
||
# # cursor-wrapped failures | ||
query42 = cursor.execute("SELECT * FROM table WHERE var = %s" % var) | ||
query43 = cursor.execute(f"SELECT * FROM table WHERE var = {var}") | ||
query44 = cursor.execute("SELECT * FROM table WHERE var = {}".format(var)) | ||
query45 = cursor.executemany("SELECT * FROM table WHERE var = %s" % var, []) | ||
|
||
# # pass | ||
query = "SELECT * FROM table WHERE id = 1" | ||
query = "DELETE FROM table WHERE id = 1" | ||
query = "INSERT INTO table VALUES (1)" | ||
query = "UPDATE table SET id = 1" | ||
cursor.execute('SELECT * FROM table WHERE id = %s', var) | ||
cursor.execute('SELECT * FROM table WHERE id = 1') | ||
cursor.executemany('SELECT * FROM table WHERE id = %s', [var, var2]) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
use once_cell::sync::Lazy; | ||
use regex::Regex; | ||
use ruff_macros::{define_violation, derive_message_formats}; | ||
use rustpython_parser::ast::{Expr, ExprKind, Operator}; | ||
|
||
use super::super::helpers::string_literal; | ||
use crate::ast::helpers::{any_over_expr, unparse_expr}; | ||
use crate::ast::types::Range; | ||
use crate::checkers::ast::Checker; | ||
use crate::registry::Diagnostic; | ||
use crate::violation::Violation; | ||
|
||
static SQL_REGEX: Lazy<Regex> = Lazy::new(|| { | ||
Regex::new(r"(?i)(select\s.*from\s|delete\s+from\s|insert\s+into\s.*values\s|update\s.*set\s)") | ||
.unwrap() | ||
}); | ||
|
||
define_violation!( | ||
/// ### What it does | ||
/// Checks for strings that resemble SQL statements involved in some form | ||
/// string building operation. | ||
/// | ||
/// ### Why is this bad? | ||
/// SQL injection is a common attack vector for web applications. Unless care | ||
/// is taken to sanitize and control the input data when building such | ||
/// SQL statement strings, an injection attack becomes possible. | ||
/// | ||
/// ### Example | ||
/// ```python | ||
/// query = "DELETE FROM foo WHERE id = '%s'" % identifier | ||
/// ``` | ||
pub struct HardcodedSQLExpression { | ||
pub string: String, | ||
} | ||
); | ||
impl Violation for HardcodedSQLExpression { | ||
#[derive_message_formats] | ||
fn message(&self) -> String { | ||
let HardcodedSQLExpression { string } = self; | ||
format!( | ||
"Possible SQL injection vector through string-based query construction: \"{}\"", | ||
string.escape_debug() | ||
) | ||
} | ||
} | ||
|
||
fn has_string_literal(expr: &Expr) -> bool { | ||
string_literal(expr).is_some() | ||
} | ||
|
||
fn matches_sql_statement(string: &str) -> bool { | ||
SQL_REGEX.is_match(string) | ||
} | ||
|
||
fn unparse_string_format_expression(checker: &mut Checker, expr: &Expr) -> Option<String> { | ||
match &expr.node { | ||
// "select * from table where val = " + "str" + ... | ||
// "select * from table where val = %s" % ... | ||
ExprKind::BinOp { | ||
op: Operator::Add | Operator::Mod, | ||
.. | ||
} => { | ||
let Some(parent) = checker.current_expr_parent() else { | ||
if any_over_expr(expr, &has_string_literal) { | ||
return Some(unparse_expr(expr, checker.stylist)); | ||
} | ||
return None; | ||
}; | ||
// Only evaluate the full BinOp, not the nested components. | ||
let ExprKind::BinOp { .. } = &parent.node else { | ||
if any_over_expr(expr, &has_string_literal) { | ||
return Some(unparse_expr(expr, checker.stylist)); | ||
} | ||
return None; | ||
}; | ||
None | ||
} | ||
ExprKind::Call { func, .. } => { | ||
let ExprKind::Attribute{ attr, value, .. } = &func.node else { | ||
return None; | ||
}; | ||
// "select * from table where val = {}".format(...) | ||
if attr == "format" && string_literal(value).is_some() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Confused why the Bandit source has this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it has to do with bandit's concept of |
||
return Some(unparse_expr(expr, checker.stylist)); | ||
}; | ||
None | ||
} | ||
// f"select * from table where val = {val}" | ||
ExprKind::JoinedStr { .. } => Some(unparse_expr(expr, checker.stylist)), | ||
_ => None, | ||
} | ||
} | ||
|
||
/// S608 | ||
pub fn hardcoded_sql_expression(checker: &mut Checker, expr: &Expr) { | ||
match unparse_string_format_expression(checker, expr) { | ||
Some(string) if matches_sql_statement(&string) => { | ||
checker.diagnostics.push(Diagnostic::new( | ||
HardcodedSQLExpression { string }, | ||
Range::from_located(expr), | ||
)); | ||
} | ||
_ => (), | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This section implies that sanitizing inputs is alright ... when actually directly interpolating user input into SQL queries should always be avoided (no matter if sanitized or not).
You should instead use "parameter binding" ... also called "server-side binding", where the parameters are sent separately from the query to the server, see e.g. psycopg2 server-side binding.
So I think it would make sense to update this section accordingly.