linting

jsvine · jsvine · Nov 9, 2023 · Nov 1, 2023 · Nov 1, 2023 · Nov 2, 2023
commit c2564653e86e00479d23ae95baec4718c60565a6
diff --git a/pdfplumber/container.py b/pdfplumber/container.py
@@ -112,7 +112,6 @@ def to_json(
         precision: Optional[int] = None,
         indent: Optional[int] = None,
     ) -> Optional[str]:
-
         data = self.to_dict(object_types)
 
         serialized = Serializer(

diff --git a/pdfplumber/convert.py b/pdfplumber/convert.py
@@ -66,7 +66,6 @@ def __init__(
         include_attrs: Optional[List[str]] = None,
         exclude_attrs: Optional[List[str]] = None,
     ):
-
         self.precision = precision
         self.attr_filter = get_attr_filter(
             include_attrs=include_attrs, exclude_attrs=exclude_attrs

diff --git a/pdfplumber/display.py b/pdfplumber/display.py
@@ -41,7 +41,6 @@ def get_page_image(
     password: Optional[str],
     antialias: bool = False,
 ) -> PIL.Image.Image:
-
     src: Union[pathlib.Path, BufferedReader, BytesIO]
 
     # If we are working with a file object saved to disk
@@ -357,7 +356,6 @@ def outline_words(
         x_tolerance: T_num = utils.DEFAULT_X_TOLERANCE,
         y_tolerance: T_num = utils.DEFAULT_Y_TOLERANCE,
     ) -> "PageImage":
-
         words = self.page.extract_words(
             x_tolerance=x_tolerance, y_tolerance=y_tolerance
         )
@@ -370,7 +368,6 @@ def outline_chars(
         fill: T_color = (255, 0, 0, int(255 / 4)),
         stroke_width: int = DEFAULT_STROKE_WIDTH,
     ) -> "PageImage":
-
         self.draw_rects(
             self.page.chars, stroke=stroke, fill=fill, stroke_width=stroke_width
         )

diff --git a/pdfplumber/pdf.py b/pdfplumber/pdf.py
@@ -72,7 +72,6 @@ def open(
         repair: bool = False,
         gs_path: Optional[Union[str, pathlib.Path]] = None,
     ) -> "PDF":
-
         stream: Union[BufferedReader, BytesIO]
 
         if repair:

diff --git a/pdfplumber/repair.py b/pdfplumber/repair.py
@@ -10,7 +10,6 @@ def _repair(
     password: Optional[str] = None,
     gs_path: Optional[Union[str, pathlib.Path]] = None,
 ) -> BytesIO:
-
     executable = gs_path or shutil.which("gs") or shutil.which("gswin32c")
     if executable is None:  # pragma: nocover
         raise Exception(

diff --git a/pdfplumber/table.py b/pdfplumber/table.py
@@ -286,7 +286,6 @@ def find_smallest_cell(points: List[T_point], i: int) -> Optional[T_bbox]:
                     and edge_connects(bottom_right, right_pt)
                     and edge_connects(bottom_right, below_pt)
                 ):
-
                     return (pt[0], pt[1], bottom_right[0], bottom_right[1])
         return None
 
@@ -397,7 +396,6 @@ def rows(self) -> List[Row]:
         return rows
 
     def extract(self, **kwargs: Any) -> List[List[Optional[str]]]:
-
         chars = self.page.chars
         table_arr = []
 

diff --git a/pdfplumber/utils/clustering.py b/pdfplumber/utils/clustering.py
@@ -45,7 +45,6 @@ def cluster_objects(
     tolerance: T_num,
     preserve_order: bool = False,
 ) -> List[List[R]]:
-
     if not callable(key_fn):
         key_fn = itemgetter(key_fn)
 

diff --git a/pdfplumber/utils/text.py b/pdfplumber/utils/text.py
@@ -72,7 +72,6 @@ def search(
         return_chars: bool = True,
         main_group: int = 0,
     ) -> List[Dict[str, Any]]:
-
         if isinstance(pattern, Pattern):
             if regex is False:
                 raise ValueError(
@@ -347,8 +346,8 @@ def merge_chars(self, ordered_chars: T_obj_list) -> T_obj:
             word[key] = ordered_chars[0][key]
 
         return word
-    
-    def set_tolerances_from_ratio(self, t: T_obj, axis_range: Iterable='x'):
+
+    def set_tolerances_from_ratio(self, t: T_obj, axis_range: Iterable = "x"):
         """
         If there is a `tolerance_ratio` for any axis, overrides the tolerance with ratio * size of `t`. Allows for dynamic tolerances to react to different text sizes within a single call.
 
@@ -359,11 +358,9 @@ def set_tolerances_from_ratio(self, t: T_obj, axis_range: Iterable='x'):
         for i in axis_range:
             if self.__getattribute__(f"{i}_tolerance_ratio") is not None:
                 self.__setattr__(
-                    f"{i}_tolerance", 
-                    set_tolerance(
-                        t, self.__getattribute__(f"{i}_tolerance_ratio")
-                        )
-                    )
+                    f"{i}_tolerance",
+                    set_tolerance(t, self.__getattribute__(f"{i}_tolerance_ratio")),
+                )
 
     def char_begins_new_word(
         self,
@@ -607,5 +604,6 @@ def yield_unique_chars(chars: T_obj_list) -> Generator[T_obj, None, None]:
     deduped = yield_unique_chars(chars)
     return sorted(deduped, key=chars.index)
 
+
 def set_tolerance(t, tolerance_ratio):
-    return tolerance_ratio*(t['bottom'] - t['top'])
+    return tolerance_ratio * (t["bottom"] - t["top"])
diff --git a/tests/test_ca_warn_report.py b/tests/test_ca_warn_report.py
@@ -40,7 +40,6 @@ def test_objects(self):
         assert len(p.images)
 
     def test_parse(self):
-
         rect_x0_clusters = utils.cluster_list(
             [r["x0"] for r in self.pdf.pages[1].rects], tolerance=3
         )

diff --git a/tests/test_issues.py b/tests/test_issues.py
@@ -51,14 +51,12 @@ def determine_if_checked(checkbox, checklines):
             """
 
             for cl in checklines:
-
                 if (
                     checkbox["height"] > (RECT_HEIGHT - RECT_TOLERANCE)
                     and (checkbox["height"] < RECT_HEIGHT + RECT_TOLERANCE)
                     and (checkbox["width"] < RECT_WIDTH + RECT_TOLERANCE)
                     and (checkbox["width"] < RECT_WIDTH + RECT_TOLERANCE)
                 ):
-
                     xmatch = False
                     ymatch = False
 

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -66,9 +66,11 @@ def test_decode_psl_list(self):
 
     def test_x_tolerance_ratio(self):
         pdf = pdfplumber.open(os.path.join(HERE, "pdfs/issue-987-test.pdf"))
-        assert pdf.pages[0].extract_text() == 'Big Te xt\nSmall Text'
-        assert pdf.pages[0].extract_text(x_tolerance=4) == 'Big Te xt\nSmallText'
-        assert pdf.pages[0].extract_text(x_tolerance_ratio=0.15) == 'Big Text\nSmall Text'
+        assert pdf.pages[0].extract_text() == "Big Te xt\nSmall Text"
+        assert pdf.pages[0].extract_text(x_tolerance=4) == "Big Te xt\nSmallText"
+        assert (
+            pdf.pages[0].extract_text(x_tolerance_ratio=0.15) == "Big Text\nSmall Text"
+        )
 
     def test_extract_words(self):
         path = os.path.join(HERE, "pdfs/issue-192-example.pdf")
@@ -97,7 +99,6 @@ def test_extract_words(self):
     def test_extract_words_punctuation(self):
         path = os.path.join(HERE, "pdfs/test-punkt.pdf")
         with pdfplumber.open(path) as pdf:
-
             wordsA = pdf.pages[0].extract_words(split_at_punctuation=True)
             wordsB = pdf.pages[0].extract_words(split_at_punctuation=False)
             wordsC = pdf.pages[0].extract_words(