add linting (#6)

safevideo · Dec 11, 2023 · e40a2fa · e40a2fa
1 parent 93aefc6
commit e40a2fa
Show file tree

Hide file tree

Showing 10 changed files with 132 additions and 15 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,70 @@
+# Define bot property if installed via https://github.com/marketplace/pre-commit-ci
+ci:
+  autofix_prs: true
+  autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
+  autoupdate_schedule: monthly
+  # submodules: true
+
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: end-of-file-fixer
+      - id: trailing-whitespace
+      - id: check-case-conflict
+      # - id: no-commit-to-branch
+      # - id: check-yaml
+      - id: detect-private-key
+      - id: detect-aws-credentials
+        args:
+          - --allow-missing-credentials
+
+  - repo: https://github.com/asottile/pyupgrade
+    rev: v3.15.0
+    hooks:
+      - id: pyupgrade
+        name: Upgrade code
+
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.12.0
+    hooks:
+      - id: isort
+        name: Sort imports
+
+  - repo: https://github.com/google/yapf
+    rev: v0.40.2
+    hooks:
+      - id: yapf
+        name: YAPF formatting
+        args:
+          - -i
+
+  - repo: https://github.com/executablebooks/mdformat
+    rev: 0.7.17
+    hooks:
+      - id: mdformat
+        name: MD formatting
+        additional_dependencies:
+          - mdformat-gfm
+          - mdformat-black
+        exclude: 'docs/.*\.md'
+
+  - repo: https://github.com/PyCQA/flake8
+    rev: 6.1.0
+    hooks:
+      - id: flake8
+        name: Flake8 linting
+
+  - repo: https://github.com/codespell-project/codespell
+    rev: v2.2.6
+    hooks:
+      - id: codespell
+        args:
+          - --ignore-words-list=crate,nd,strack,dota,ane,segway,fo,gool,winn
+          # skip inline comments
+          - --skip="*.py:.*#.*,*ipynb"
+
+  - repo: https://github.com/PyCQA/docformatter
+    rev: v1.7.5
+    hooks:
+      - id: docformatter
diff --git a/README.md b/README.md
@@ -1,4 +1,5 @@
 # safetext
+
 Rule-based profanity checking tool for English and Turkish.
 
 ### installation
@@ -51,4 +52,4 @@ st.set_language_from_srt(turkish_srt_file_path)
 
 st.language
 >> 'tr'
-```
+```
diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,2 @@
 lingua-language-detector==1.3.1
-pysrt
+pysrt
diff --git a/safetext/__init__.py b/safetext/__init__.py
@@ -1,11 +1,13 @@
 from safetext.utils import detect_language_from_srt, detect_language_from_text
+
 from .languages.en import EnglishProfanityChecker
 from .languages.tr import TurkishProfanityChecker
 
 __version__ = "0.0.3"
 
 
 class SafeText:
+
     def __init__(self, language="en"):
         self.language = language
         self.checker = None

diff --git a/safetext/languages/base.py b/safetext/languages/base.py
@@ -29,9 +29,7 @@ def _check(self, text):
 
         for i, word in enumerate(words):
             if word.lower() in self.profanity_words:
-                start_index = sum(
-                    len(w) + 1 for w in words[:i]
-                )  # +1 to account for space between words
+                start_index = sum(len(w) + 1 for w in words[:i])  # +1 to account for space between words
                 end_index = start_index + len(word)
                 profanity_info = {
                     "word": word,
@@ -45,7 +43,7 @@ def _check(self, text):
 
     def _read_words(self, filepath):
         """Read the profanity words from the given file."""
-        with open(filepath, "r", encoding="utf8") as f:
+        with open(filepath, encoding="utf8") as f:
             profanity_words = f.read().splitlines()
 
         return profanity_words

diff --git a/safetext/languages/en/words.txt b/safetext/languages/en/words.txt
@@ -71,4 +71,4 @@ son of a bitch
 suck
 tits
 viagra
-whore
+whore
diff --git a/safetext/languages/tr/words.txt b/safetext/languages/tr/words.txt
@@ -360,4 +360,4 @@ zikeyim
 zikiiim
 zikiim
 zikik
-zikim
+zikim
diff --git a/safetext/utils.py b/safetext/utils.py
@@ -38,8 +38,6 @@ def detect_language_from_srt(srt_file: str, use_first_n_subs: 10) -> str:
     import pysrt
 
     subs = pysrt.open(srt_file, encoding="utf-8")
-    text = " ".join(
-        [sub.text_without_tags.replace("\n", " ") for sub in subs[:use_first_n_subs]]
-    )
+    text = " ".join([sub.text_without_tags.replace("\n", " ") for sub in subs[:use_first_n_subs]])
 
     return detect_language_from_text(text)
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,49 @@
+[metadata]
+license_files = LICENSE
+description_file = README.md
+
+[flake8]
+max_line_length = 110
+exclude = .tox,*.egg,build,temp,.git,.vscode,__pycache__
+recursive = true
+doctests = True
+verbose = 2
+inline-quotes = double
+# https://pep8.readthedocs.io/en/latest/intro.html#error-codes
+format = pylint
+# see: https://www.flake8rules.com/
+ignore = F401,E731,E402,W504,E501
+    # E731: Do not assign a lambda expression, use a def
+    # F405: name may be undefined, or defined from star imports: module
+    # E402: module level import not at top of file
+    # W504: line break after binary operator
+    # E501: line too long
+    # removed:
+    # F401: module imported but unused
+    # E231: missing whitespace after ‘,’, ‘;’, or ‘:’
+    # E127: continuation line over-indented for visual indent
+    # F403: ‘from module import *’ used; unable to detect undefined names
+
+
+[isort]
+# https://pycqa.github.io/isort/docs/configuration/options.html
+line_length = 110
+multi_line_output = 3
+include_trailing_comma = true
+
+[yapf]
+based_on_style = pep8
+spaces_before_comment = 2
+COLUMN_LIMIT = 110
+SPLIT_BEFORE_FIRST_ARGUMENT = true
+COALESCE_BRACKETS = true
+SPLIT_BEFORE_LOGICAL_OPERATOR = false
+
+[docformatter]
+wrap-summaries = 110
+wrap-descriptions = 110
+in-place = true
+make-summary-multi-line = false
+pre-summary-newline = true
+force-wrap = false
+close-quotes-on-newline = true
diff --git a/setup.py b/setup.py
@@ -7,7 +7,7 @@
 
 def get_long_description():
     base_dir = os.path.abspath(os.path.dirname(__file__))
-    with io.open(os.path.join(base_dir, "README.md"), encoding="utf-8") as f:
+    with open(os.path.join(base_dir, "README.md"), encoding="utf-8") as f:
         return f.read()
 
 
@@ -19,7 +19,7 @@ def get_requirements():
 def get_version():
     current_dir = os.path.abspath(os.path.dirname(__file__))
     version_file = os.path.join(current_dir, "safetext", "__init__.py")
-    with io.open(version_file, encoding="utf-8") as f:
+    with open(version_file, encoding="utf-8") as f:
         return re.search(r'^__version__ = [\'"]([^\'"]*)[\'"]', f.read(), re.M).group(1)
 
 
@@ -33,7 +33,6 @@ def get_version():
 
 extras = {"tests": _DEV_REQUIREMENTS, "dev": _DEV_REQUIREMENTS}
 
-
 setuptools.setup(
     name="safetext",
     version=get_version(),
@@ -66,4 +65,4 @@ def get_version():
         ],
     },
     keywords="text, profanity, filtering, turkish, english",
-)
+)