Merge pull request PyCQA#676 from jdufresne/invalid-escape

Add W605 warning for invalid escape sequences in string literals
teojgo · Sep 27, 2017 · c7448b4 · c7448b4
2 parents 769ea41 + 13d2bd2
commit c7448b4
Show file tree

Hide file tree

Showing 5 changed files with 80 additions and 4 deletions.
diff --git a/CHANGES.txt b/CHANGES.txt
@@ -1,6 +1,13 @@
 Changelog
 =========
 
+UNRELEASED
+----------
+
+New checks:
+
+* Add W605 warning for invalid escape sequences in string literals
+
 2.3.1 (2017-01-31)
 ------------------
 

diff --git a/docs/intro.rst b/docs/intro.rst
@@ -413,6 +413,8 @@ This is the current list of error and warning codes:
 +------------+----------------------------------------------------------------------+
 | W604       | backticks are deprecated, use 'repr()'                               |
 +------------+----------------------------------------------------------------------+
+| W605       | invalid escape sequence '\x'                                         |
++------------+----------------------------------------------------------------------+
 
 
 **(*)** In the default configuration, the checks **E121**, **E123**, **E126**,

diff --git a/pycodestyle.py b/pycodestyle.py
@@ -1388,6 +1388,57 @@ def python_3000_backticks(logical_line):
         yield pos, "W604 backticks are deprecated, use 'repr()'"
 
 
+@register_check
+def python_3000_invalid_escape_sequence(logical_line, tokens):
+    r"""Invalid escape sequences are deprecated in Python 3.6.
+
+    Okay: regex = r'\.png$'
+    W605: regex = '\.png$'
+    """
+    # https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
+    valid = [
+        '\n',
+        '\\',
+        '\'',
+        '"',
+        'a',
+        'b',
+        'f',
+        'n',
+        'r',
+        't',
+        'v',
+        '0', '1', '2', '3', '4', '5', '6', '7',
+        'x',
+
+        # Escape sequences only recognized in string literals
+        'N',
+        'u',
+        'U',
+    ]
+
+    for token_type, text, start, end, line in tokens:
+        if token_type == tokenize.STRING:
+            quote = text[-3:] if text[-3:] in ('"""', "'''") else text[-1]
+            # Extract string modifiers (e.g. u or r)
+            quote_pos = text.index(quote)
+            prefix = text[:quote_pos].lower()
+            start = quote_pos + len(quote)
+            string = text[start:-len(quote)]
+
+            if 'r' not in prefix:
+                pos = string.find('\\')
+                while pos >= 0:
+                    pos += 1
+                    if string[pos] not in valid:
+                        yield (
+                            pos,
+                            "W605 invalid escape sequence '\\%s'" %
+                            string[pos],
+                        )
+                    pos = string.find('\\', pos + 1)
+
+
 ##############################################################################
 # Helper functions
 ##############################################################################

diff --git a/testsuite/E12not.py b/testsuite/E12not.py
@@ -358,10 +358,10 @@ def qualify_by_address(self, cr, uid, ids, context=None,
     """ This gets called by the web server """
 
 
-_ipv4_re = re.compile('^(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
-                      '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
-                      '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
-                      '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$')
+_ipv4_re = re.compile(r'^(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
+                      r'(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
+                      r'(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.'
+                      r'(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$')
 
 
 fct("""

diff --git a/testsuite/W60.py b/testsuite/W60.py
@@ -13,3 +13,19 @@
     x = 0
 #: W604
 val = `1 + 2`
+#: W605
+regex = '\.png$'
+#: W605
+regex = '''
+\.png$
+'''
+#: Okay
+regex = r'\.png$'
+regex = '\\.png$'
+regex = r'''
+\.png$
+'''
+regex = r'''
+\\.png$
+'''
+s = '\\'