Skip to content

Commit

Permalink
add linting (#6)
Browse files Browse the repository at this point in the history
  • Loading branch information
SeeknnDestroy committed Dec 11, 2023
1 parent 93aefc6 commit e40a2fa
Show file tree
Hide file tree
Showing 10 changed files with 132 additions and 15 deletions.
70 changes: 70 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Define bot property if installed via https://github.com/marketplace/pre-commit-ci
ci:
autofix_prs: true
autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
autoupdate_schedule: monthly
# submodules: true

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: end-of-file-fixer
- id: trailing-whitespace
- id: check-case-conflict
# - id: no-commit-to-branch
# - id: check-yaml
- id: detect-private-key
- id: detect-aws-credentials
args:
- --allow-missing-credentials

- repo: https://github.com/asottile/pyupgrade
rev: v3.15.0
hooks:
- id: pyupgrade
name: Upgrade code

- repo: https://github.com/PyCQA/isort
rev: 5.12.0
hooks:
- id: isort
name: Sort imports

- repo: https://github.com/google/yapf
rev: v0.40.2
hooks:
- id: yapf
name: YAPF formatting
args:
- -i

- repo: https://github.com/executablebooks/mdformat
rev: 0.7.17
hooks:
- id: mdformat
name: MD formatting
additional_dependencies:
- mdformat-gfm
- mdformat-black
exclude: 'docs/.*\.md'

- repo: https://github.com/PyCQA/flake8
rev: 6.1.0
hooks:
- id: flake8
name: Flake8 linting

- repo: https://github.com/codespell-project/codespell
rev: v2.2.6
hooks:
- id: codespell
args:
- --ignore-words-list=crate,nd,strack,dota,ane,segway,fo,gool,winn
# skip inline comments
- --skip="*.py:.*#.*,*ipynb"

- repo: https://github.com/PyCQA/docformatter
rev: v1.7.5
hooks:
- id: docformatter
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# safetext

Rule-based profanity checking tool for English and Turkish.

### installation
Expand Down Expand Up @@ -51,4 +52,4 @@ st.set_language_from_srt(turkish_srt_file_path)

st.language
>> 'tr'
```
```
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
lingua-language-detector==1.3.1
pysrt
pysrt
2 changes: 2 additions & 0 deletions safetext/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from safetext.utils import detect_language_from_srt, detect_language_from_text

from .languages.en import EnglishProfanityChecker
from .languages.tr import TurkishProfanityChecker

__version__ = "0.0.3"


class SafeText:

def __init__(self, language="en"):
self.language = language
self.checker = None
Expand Down
6 changes: 2 additions & 4 deletions safetext/languages/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,7 @@ def _check(self, text):

for i, word in enumerate(words):
if word.lower() in self.profanity_words:
start_index = sum(
len(w) + 1 for w in words[:i]
) # +1 to account for space between words
start_index = sum(len(w) + 1 for w in words[:i]) # +1 to account for space between words
end_index = start_index + len(word)
profanity_info = {
"word": word,
Expand All @@ -45,7 +43,7 @@ def _check(self, text):

def _read_words(self, filepath):
"""Read the profanity words from the given file."""
with open(filepath, "r", encoding="utf8") as f:
with open(filepath, encoding="utf8") as f:
profanity_words = f.read().splitlines()

return profanity_words
Expand Down
2 changes: 1 addition & 1 deletion safetext/languages/en/words.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,4 +71,4 @@ son of a bitch
suck
tits
viagra
whore
whore
2 changes: 1 addition & 1 deletion safetext/languages/tr/words.txt
Original file line number Diff line number Diff line change
Expand Up @@ -360,4 +360,4 @@ zikeyim
zikiiim
zikiim
zikik
zikim
zikim
4 changes: 1 addition & 3 deletions safetext/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ def detect_language_from_srt(srt_file: str, use_first_n_subs: 10) -> str:
import pysrt

subs = pysrt.open(srt_file, encoding="utf-8")
text = " ".join(
[sub.text_without_tags.replace("\n", " ") for sub in subs[:use_first_n_subs]]
)
text = " ".join([sub.text_without_tags.replace("\n", " ") for sub in subs[:use_first_n_subs]])

return detect_language_from_text(text)
49 changes: 49 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
[metadata]
license_files = LICENSE
description_file = README.md

[flake8]
max_line_length = 110
exclude = .tox,*.egg,build,temp,.git,.vscode,__pycache__
recursive = true
doctests = True
verbose = 2
inline-quotes = double
# https://pep8.readthedocs.io/en/latest/intro.html#error-codes
format = pylint
# see: https://www.flake8rules.com/
ignore = F401,E731,E402,W504,E501
# E731: Do not assign a lambda expression, use a def
# F405: name may be undefined, or defined from star imports: module
# E402: module level import not at top of file
# W504: line break after binary operator
# E501: line too long
# removed:
# F401: module imported but unused
# E231: missing whitespace after ‘,’, ‘;’, or ‘:’
# E127: continuation line over-indented for visual indent
# F403: ‘from module import *’ used; unable to detect undefined names


[isort]
# https://pycqa.github.io/isort/docs/configuration/options.html
line_length = 110
multi_line_output = 3
include_trailing_comma = true

[yapf]
based_on_style = pep8
spaces_before_comment = 2
COLUMN_LIMIT = 110
SPLIT_BEFORE_FIRST_ARGUMENT = true
COALESCE_BRACKETS = true
SPLIT_BEFORE_LOGICAL_OPERATOR = false

[docformatter]
wrap-summaries = 110
wrap-descriptions = 110
in-place = true
make-summary-multi-line = false
pre-summary-newline = true
force-wrap = false
close-quotes-on-newline = true
7 changes: 3 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

def get_long_description():
base_dir = os.path.abspath(os.path.dirname(__file__))
with io.open(os.path.join(base_dir, "README.md"), encoding="utf-8") as f:
with open(os.path.join(base_dir, "README.md"), encoding="utf-8") as f:
return f.read()


Expand All @@ -19,7 +19,7 @@ def get_requirements():
def get_version():
current_dir = os.path.abspath(os.path.dirname(__file__))
version_file = os.path.join(current_dir, "safetext", "__init__.py")
with io.open(version_file, encoding="utf-8") as f:
with open(version_file, encoding="utf-8") as f:
return re.search(r'^__version__ = [\'"]([^\'"]*)[\'"]', f.read(), re.M).group(1)


Expand All @@ -33,7 +33,6 @@ def get_version():

extras = {"tests": _DEV_REQUIREMENTS, "dev": _DEV_REQUIREMENTS}


setuptools.setup(
name="safetext",
version=get_version(),
Expand Down Expand Up @@ -66,4 +65,4 @@ def get_version():
],
},
keywords="text, profanity, filtering, turkish, english",
)
)

0 comments on commit e40a2fa

Please sign in to comment.