Drop support for Python 3.5

kp-forks · Dec 30, 2018 · 72b920e · 72b920e
1 parent b4a5190
commit 72b920e
Show file tree

Hide file tree

Showing 17 changed files with 34 additions and 86 deletions.
diff --git a/.gitignore b/.gitignore
@@ -39,3 +39,5 @@ log/
 tests/output/
 tests/resources/private/
 tmp/
+/debug_tests.py
+*.traineddata
diff --git a/.travis.yml b/.travis.yml
@@ -9,7 +9,7 @@ matrix:
       dist: trusty
       sudo: required
       language: python
-      python: "3.5"
+      python: "3.6"
       env:
         - DIST=trusty
       addons: &trusty_apt
@@ -33,14 +33,6 @@ matrix:
           - tesseract-ocr-deu
           - tesseract-ocr-eng
           - tesseract-ocr-fra
-    - os: linux
-      dist: trusty
-      sudo: required
-      language: python
-      python: "3.6"
-      env:
-        - DIST=trusty
-      addons: *trusty_apt
     - os: linux
       dist: xenial
       sudo: required

diff --git a/docs/installation.rst b/docs/installation.rst
@@ -493,14 +493,12 @@ Requirements for pip and HEAD install
 
 OCRmyPDF currently requires these external programs and libraries to be installed, and must be satisfied using the operating system package manager. ``pip`` cannot provide them.
 
-- Python 3.5 or newer
+- Python 3.6 or newer
 - Ghostscript 9.15 or newer
 - libexempi3 2.2.0 or newer
 - qpdf 8.1.0 or newer
 - Tesseract 3.04 or newer
 
-Using Python 3.5 in production is discouraged. Python 3.6 and 3.7 give much better performance.
-
 As of ocrmypdf 7.2.1, the following versions are recommended:
 
 - Python 3.7
@@ -526,7 +524,7 @@ These are in addition to the Python packaging dependencies, meaning that unfortu
 Installing HEAD revision from sources
 -------------------------------------
 
-If you have ``git`` and Python 3.5 or newer installed, you can install from source. When the ``pip`` installer runs, it will alert you if dependencies are missing.
+If you have ``git`` and Python 3.6 or newer installed, you can install from source. When the ``pip`` installer runs, it will alert you if dependencies are missing.
 
 If you prefer to build every from source, you will need to `build pikepdf from source <https://pikepdf.readthedocs.io/en/latest/installation.html#building-from-source>`_. First ensure you can build and install pikepdf.
 

diff --git a/setup.py b/setup.py
@@ -20,8 +20,8 @@
 from __future__ import print_function, unicode_literals
 
 import sys
-if sys.version_info < (3, 5):
-    print("Python 3.5 or newer is required", file=sys.stderr)
+if sys.version_info < (3, 6):
+    print("Python 3.6 or newer is required", file=sys.stderr)
     sys.exit(1)
 
 from setuptools import setup, find_packages  # nopep8
@@ -219,7 +219,6 @@ def readme():
     package_dir={'': 'src'},
     keywords=['PDF', 'OCR', 'optical character recognition', 'PDF/A', 'scanning'],
     classifiers=[
-        "Programming Language :: Python :: 3.5",
         "Programming Language :: Python :: 3.6",
         "Programming Language :: Python :: 3.7",
         "Development Status :: 5 - Production/Stable",
@@ -236,7 +235,7 @@ def readme():
         "Topic :: Text Processing :: Indexing",
         "Topic :: Text Processing :: Linguistic",
         ],
-    python_requires=' >= 3.5',
+    python_requires=' >= 3.6',
     setup_requires=[
         'cffi >= 1.9.1',        # to build the leptonica module
         'pytest-runner',        # to enable python setup.py test
@@ -252,7 +251,7 @@ def readme():
         'cffi >= 1.9.1',          # must be a setup and install requirement
         'img2pdf >= 0.3.0, < 0.4',       # pure Python, so track HEAD closely
         'pdfminer.six == 20181108',
-        'pikepdf >= 0.9.1',
+        'pikepdf >= 0.10.0, < 0.11.0',
         'Pillow >= 4.0.0, != 5.1.0 ; sys_platform == "darwin"',
                                   # Pillow < 4 has BytesIO/TIFF bug w/img2pdf 0.2.3
                                   # block 5.1.0, broken wheels

diff --git a/src/ocrmypdf/_pipeline.py b/src/ocrmypdf/_pipeline.py
@@ -193,12 +193,6 @@ def repair_and_parse_pdf(
         )
         raise InputFileError()
 
-    if len(pdfinfo.pages) > 2000 and sys.version_info[0:2] <= (3, 5):
-        log.warning(
-            "Performance regressions are known occur with Python 3.5 for "
-            "high page count files.  Python 3.6 or newer is recommended."
-        )
-
     if pdfinfo.has_acroform:
         if options.redo_ocr:
             log.error(

diff --git a/src/ocrmypdf/exec/ghostscript.py b/src/ocrmypdf/exec/ghostscript.py
@@ -23,7 +23,7 @@
 from PIL import Image
 from . import get_version
 from ..exceptions import SubprocessOutputError
-from ..helpers import fspath
+from os import fspath
 
 
 @lru_cache(maxsize=1)

diff --git a/src/ocrmypdf/exec/qpdf.py b/src/ocrmypdf/exec/qpdf.py
@@ -19,7 +19,7 @@
 from functools import lru_cache
 
 from . import  get_version
-from ..helpers import fspath
+from os import fspath
 
 
 @lru_cache(maxsize=1)

diff --git a/src/ocrmypdf/exec/tesseract.py b/src/ocrmypdf/exec/tesseract.py
@@ -23,9 +23,10 @@
 from textwrap import dedent
 from subprocess import CalledProcessError, TimeoutExpired, check_output, STDOUT, run, PIPE
 from contextlib import suppress
+from os import fspath
 
 from ..exceptions import MissingDependencyError, TesseractConfigError
-from ..helpers import page_number, fspath
+from ..helpers import page_number
 from . import get_version
 
 OrientationConfidence = namedtuple(

diff --git a/src/ocrmypdf/helpers.py b/src/ocrmypdf/helpers.py
@@ -29,8 +29,8 @@ def re_symlink(input_file, soft_link_name, log=None):
     """
     Helper function: relinks soft symbolic link if necessary
     """
-    input_file = fspath(input_file)  # For Py3.5
-    soft_link_name = fspath(soft_link_name)
+    input_file = os.fspath(input_file)
+    soft_link_name = os.fspath(soft_link_name)
     if log is None:
         prdebug = partial(print, file=sys.stderr)
     else:
@@ -72,7 +72,7 @@ def is_iterable_notstr(thing):
 
 def page_number(input_file):
     """Get one-based page number implied by filename (000002.pdf -> 2)"""
-    return int(os.path.basename(fspath(input_file))[0:6])
+    return int(os.path.basename(os.fspath(input_file))[0:6])
 
 
 def available_cpu_count():
@@ -103,19 +103,12 @@ def is_file_writable(test_file):
     p = Path(test_file)
 
     if p.is_symlink():
-        # Python 3.5 does not accept parameters for Path.resolve() and behaves
-        # as if strict=True (throws an exception on failure). Python 3.6
-        # defaults to strict=False. This implements strict=False like behavior
-        # for Python 3.5.
-        if sys.version_info[0:2] <= (3, 5):
-            p = Path(os.path.realpath(fspath(p)))
-        else:
-            p = p.resolve(strict=False)
+        p = p.resolve(strict=False)
 
     # p.is_file() throws an exception in some cases
     if p.exists() and p.is_file():
         return os.access(
-            fspath(p), os.W_OK,
+            os.fspath(p), os.W_OK,
             effective_ids=(os.access in os.supports_effective_ids))
     else:
         try:
@@ -129,39 +122,6 @@ def is_file_writable(test_file):
         return True
 
 
-if sys.version_info[0:2] <= (3, 5):
-    def fspath(path):
-        """https://www.python.org/dev/peps/pep-0519/#os"""
-        import pathlib
-        if isinstance(path, (str, bytes)):
-            return path
-
-        # Work from the object's type to match method resolution of other magic
-        # methods.
-        path_type = type(path)
-        try:
-            path = path_type.__fspath__(path)
-        except AttributeError:
-            # Added for Python 3.5 support.
-            if isinstance(path, pathlib.Path):
-                return str(path)
-            elif hasattr(path_type, '__fspath__'):
-                raise
-        else:
-            if isinstance(path, (str, bytes)):
-                return path
-            else:
-                raise TypeError("expected __fspath__() to return str or bytes, "
-                                "not " + type(path).__name__)
-
-        raise TypeError(
-            "expected str, bytes, pathlib.Path or os.PathLike object, not "
-            + path_type.__name__)
-
-else:
-    fspath = os.fspath
-
-
 def flatten_groups(groups):
     for obj in groups:
         if is_iterable_notstr(obj):

diff --git a/src/ocrmypdf/leptonica.py b/src/ocrmypdf/leptonica.py
@@ -33,7 +33,7 @@
 import warnings
 
 from .lib._leptonica import ffi
-from .helpers import fspath
+from os import fspath
 
 # pylint: disable=protected-access
 

diff --git a/src/ocrmypdf/optimize.py b/src/ocrmypdf/optimize.py
@@ -20,14 +20,15 @@
 from collections import defaultdict
 import logging
 import sys
+from os import fspath
 
 from PIL import Image
 
 import pikepdf
 
 from ._jobcontext import JobContext
 from . import leptonica
-from .helpers import re_symlink, fspath
+from .helpers import re_symlink
 from .exec import pngquant, jbig2enc
 
 DEFAULT_JPEG_QUALITY = 75

diff --git a/src/ocrmypdf/pdfinfo/__init__.py b/src/ocrmypdf/pdfinfo/__init__.py
@@ -20,6 +20,7 @@
 from decimal import Decimal
 from enum import Enum
 from math import hypot, isclose
+from os import fspath
 from pathlib import Path
 from unittest.mock import Mock
 from warnings import warn
@@ -32,7 +33,6 @@
 from .layout import get_page_analysis, get_text_boxes
 
 from ..exceptions import EncryptedPdfError
-from ..helpers import fspath
 
 
 Colorspace = Enum('Colorspace',

diff --git a/tests/test_lept.py b/tests/test_lept.py
@@ -19,13 +19,13 @@
 import os
 import shutil
 import sys
+from os import fspath
 from pickle import dumps, loads
 
 import pytest
 from PIL import Image, ImageChops
 
 import ocrmypdf.leptonica as lept
-from ocrmypdf.helpers import fspath
 
 
 def test_colormap_backgroundnorm(resources):

diff --git a/tests/test_metadata.py b/tests/test_metadata.py
@@ -23,13 +23,12 @@
 from shutil import copyfile
 from unittest.mock import patch, MagicMock
 import datetime
-from shutil import copyfile
+from os import fspath
 
 import pikepdf
 from pikepdf.models.metadata import decode_pdf_date
 
 from ocrmypdf.exceptions import ExitCode
-from ocrmypdf.helpers import fspath
 from ocrmypdf.pdfa import (
     file_claims_pdfa,
     generate_pdfa_ps,

diff --git a/tests/test_optimize.py b/tests/test_optimize.py
@@ -16,6 +16,7 @@
 # along with OCRmyPDF.  If not, see <http://www.gnu.org/licenses/>.
 
 from pathlib import Path
+from os import fspath
 
 import pytest
 import logging
@@ -27,7 +28,6 @@
 from ocrmypdf import optimize as opt
 from ocrmypdf.exec.ghostscript import rasterize_pdf
 from ocrmypdf.exec import jbig2enc, pngquant
-from ocrmypdf.helpers import fspath
 
 
 check_ocrmypdf = pytest.helpers.check_ocrmypdf

diff --git a/tests/test_rotation.py b/tests/test_rotation.py
@@ -18,6 +18,7 @@
 import logging
 from io import BytesIO
 from unittest.mock import Mock
+from os import fspath
 
 from PIL import Image
 import pytest
@@ -27,7 +28,6 @@
 from ocrmypdf import leptonica
 from ocrmypdf.pdfinfo import PdfInfo
 from ocrmypdf.exec import ghostscript, tesseract
-from ocrmypdf.helpers import fspath
 
 
 # pytest.helpers is dynamic

diff --git a/tests/test_tess4.py b/tests/test_tess4.py
@@ -15,17 +15,19 @@
 # You should have received a copy of the GNU General Public License
 # along with OCRmyPDF.  If not, see <http://www.gnu.org/licenses/>.
 
-import pytest
-from ocrmypdf.exceptions import ExitCode, MissingDependencyError
-from ocrmypdf.exec import tesseract
-from ocrmypdf.helpers import fspath
-from ocrmypdf import pdfinfo
+from os import fspath
 import sys
 import os
-import PyPDF2 as pypdf
 from contextlib import contextmanager
 from pathlib import Path
 
+import PyPDF2 as pypdf
+import pytest
+
+from ocrmypdf import pdfinfo
+from ocrmypdf.exceptions import ExitCode, MissingDependencyError
+from ocrmypdf.exec import tesseract
+
 # pylint: disable=no-member
 spoof = pytest.helpers.spoof