Add x_tolerance_ratio test for extract_words(...)

jsvine · jsvine · Nov 9, 2023 · Nov 1, 2023 · Nov 1, 2023 · Nov 2, 2023
commit 0145647c58cfedf96d8a333f99606cd3ccf4aade
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -66,11 +66,14 @@ def test_decode_psl_list(self):
 
     def test_x_tolerance_ratio(self):
         pdf = pdfplumber.open(os.path.join(HERE, "pdfs/issue-987-test.pdf"))
-        assert pdf.pages[0].extract_text() == "Big Te xt\nSmall Text"
-        assert pdf.pages[0].extract_text(x_tolerance=4) == "Big Te xt\nSmallText"
-        assert (
-            pdf.pages[0].extract_text(x_tolerance_ratio=0.15) == "Big Text\nSmall Text"
-        )
+        page = pdf.pages[0]
+
+        assert page.extract_text() == "Big Te xt\nSmall Text"
+        assert page.extract_text(x_tolerance=4) == "Big Te xt\nSmallText"
+        assert page.extract_text(x_tolerance_ratio=0.15) == "Big Text\nSmall Text"
+
+        words = page.extract_words(x_tolerance_ratio=0.15)
+        assert "|".join(w["text"] for w in words) == "Big|Text|Small|Text"
 
     def test_extract_words(self):
         path = os.path.join(HERE, "pdfs/issue-192-example.pdf")