From d945b12f1910fff031da78055161ead4420edec9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lipovsk=C3=BD?= Date: Sat, 22 Oct 2022 19:32:08 +0200 Subject: [PATCH 1/6] Update CHANGELOG.rst --- CHANGELOG.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 673744c..f18a097 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,12 @@ Changelog ~~~~~~~~~ - N/A +- 1.7.0 (2022-10-22) + - correct handling when authority starts with @ symbol + - remove unreserved characters from the beginning of found URL + - added typing and mypy checkcs - by mimi89999 + - updated list of TLDs + - 1.6.0 (2022-05-17) - Add a list of URLs allowed to extract (issue #125) - by khoben - correct order of actual and expected in tests From ed3a8ce9f56e57fe0c2efea89ed370540ba4e867 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lipovsk=C3=BD?= Date: Sat, 22 Oct 2022 19:34:43 +0200 Subject: [PATCH 2/6] Update list of TLDs --- urlextract/data/tlds-alpha-by-domain.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/urlextract/data/tlds-alpha-by-domain.txt b/urlextract/data/tlds-alpha-by-domain.txt index 62d511f..7c2497a 100644 --- a/urlextract/data/tlds-alpha-by-domain.txt +++ b/urlextract/data/tlds-alpha-by-domain.txt @@ -1,4 +1,4 @@ -# Version 2022051700, Last Updated Tue May 17 07:07:01 2022 UTC +# Version 2022102200, Last Updated Sat Oct 22 07:07:01 2022 UTC AAA AARP ABARTH @@ -176,7 +176,6 @@ BROTHER BRUSSELS BS BT -BUGATTI BUILD BUILDERS BUSINESS @@ -196,7 +195,6 @@ CALVINKLEIN CAM CAMERA CAMP -CANCERRESEARCH CANON CAPETOWN CAPITAL From 1f872e8126b66f369ba3be8f4933890b3193c688 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lipovsk=C3=BD?= Date: Sat, 22 Oct 2022 19:41:56 +0200 Subject: [PATCH 3/6] Version 1.7.0 --- .bumpversion.cfg | 2 +- setup.py | 2 +- urlextract/urlextract_core.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 4d31763..31377d3 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.6.0 +current_version = 1.7.0 commit = True tag = True message = Version {new_version} diff --git a/setup.py b/setup.py index b24ad97..ad0388e 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ # version of URLExtract # (do not forget to change it in urlextract_core.py as well) -__version__ = "1.6.0" +__version__ = "1.7.0" def read(readme): diff --git a/urlextract/urlextract_core.py b/urlextract/urlextract_core.py index dec043c..5fb3ee7 100644 --- a/urlextract/urlextract_core.py +++ b/urlextract/urlextract_core.py @@ -25,7 +25,7 @@ from urlextract.cachefile import CacheFile, CacheFileError # version of URLExtract (do not forget to change it in setup.py as well) -__version__ = "1.6.0" +__version__ = "1.7.0" # default value for maximum count of processed URLs by find_url DEFAULT_LIMIT = 10000 From 5a7dad826aa3553b2dcd161fc91c26ebbdb7e863 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lipovsk=C3=BD?= Date: Tue, 25 Oct 2022 20:49:51 +0200 Subject: [PATCH 4/6] Check if url_parts.authority is not NoneType fixes #137 --- tests/unit/test_find_urls.py | 1 + urlextract/urlextract_core.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_find_urls.py b/tests/unit/test_find_urls.py index 783ca0c..90250e6 100644 --- a/tests/unit/test_find_urls.py +++ b/tests/unit/test_find_urls.py @@ -58,6 +58,7 @@ ["www.example.com/somejsfile.js"], ), ("bad.email @address.net>", ['bad.email']), + ('[[ "$(giturl)" =~ ^https://gitlab.com ]] echo "found" || echo "didnt', []), ], ) def test_find_urls(urlextract, text, expected): diff --git a/urlextract/urlextract_core.py b/urlextract/urlextract_core.py index 5fb3ee7..6d70d2c 100644 --- a/urlextract/urlextract_core.py +++ b/urlextract/urlextract_core.py @@ -578,7 +578,7 @@ def _is_domain_valid( # :///?# # authority can't start with @ - if url_parts.authority.startswith('@'): + if url_parts.authority and url_parts.authority.startswith('@'): return False # if URI contains user info and schema was automatically added From ad0f16780cd85a379994341da32da153dc55c8d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lipovsk=C3=BD?= Date: Tue, 25 Oct 2022 20:52:43 +0200 Subject: [PATCH 5/6] update changelog --- CHANGELOG.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f18a097..c4e4608 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,9 @@ Changelog ~~~~~~~~~ - N/A +- 1.7.1 (2022-10-25) + - fixes urlextract without authority causes AttributeError + - 1.7.0 (2022-10-22) - correct handling when authority starts with @ symbol - remove unreserved characters from the beginning of found URL From d2071f4a0f497898ae4d763d43de4c380897470f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Lipovsk=C3=BD?= Date: Tue, 25 Oct 2022 20:52:45 +0200 Subject: [PATCH 6/6] Version 1.7.1 --- .bumpversion.cfg | 2 +- setup.py | 2 +- urlextract/urlextract_core.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 31377d3..d728d4a 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.7.0 +current_version = 1.7.1 commit = True tag = True message = Version {new_version} diff --git a/setup.py b/setup.py index ad0388e..6d83d43 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ # version of URLExtract # (do not forget to change it in urlextract_core.py as well) -__version__ = "1.7.0" +__version__ = "1.7.1" def read(readme): diff --git a/urlextract/urlextract_core.py b/urlextract/urlextract_core.py index 6d70d2c..b6b3767 100644 --- a/urlextract/urlextract_core.py +++ b/urlextract/urlextract_core.py @@ -25,7 +25,7 @@ from urlextract.cachefile import CacheFile, CacheFileError # version of URLExtract (do not forget to change it in setup.py as well) -__version__ = "1.7.0" +__version__ = "1.7.1" # default value for maximum count of processed URLs by find_url DEFAULT_LIMIT = 10000