diff --git a/texthero/nlp.py b/texthero/nlp.py index 748f0cd..9e8cfbe 100644 --- a/texthero/nlp.py +++ b/texthero/nlp.py @@ -4,10 +4,18 @@ import spacy import pandas as pd -import en_core_web_sm from nltk.stem import PorterStemmer, SnowballStemmer from texthero._types import TextSeries, InputSeries +try: + # If not present, download 'en_core_web_sm' + import en_core_web_sm +except ModuleNotFoundError: + from spacy.cli.download import download as spacy_download + + spacy_download("en_core_web_sm") + import en_core_web_sm + @InputSeries(TextSeries) def named_entities(s: TextSeries, package="spacy") -> pd.Series: diff --git a/texthero/preprocessing.py b/texthero/preprocessing.py index f603705..f2c1984 100644 --- a/texthero/preprocessing.py +++ b/texthero/preprocessing.py @@ -12,7 +12,6 @@ import pandas as pd import unidecode -from texthero import stopwords as _stopwords from texthero._types import TokenSeries, TextSeries, InputSeries from typing import List, Callable, Union @@ -329,6 +328,8 @@ def replace_stopwords( """ if stopwords is None: + from texthero import stopwords as _stopwords + stopwords = _stopwords.DEFAULT return s.apply(_replace_stopwords, args=(stopwords, symbol)) diff --git a/texthero/stopwords.py b/texthero/stopwords.py index 379e222..d85a95e 100644 --- a/texthero/stopwords.py +++ b/texthero/stopwords.py @@ -8,15 +8,6 @@ nltk.download("stopwords") from nltk.corpus import stopwords as nltk_en_stopwords - -try: - # If not present, download 'en_core_web_sm' - spacy_model = spacy.load("en_core_web_sm") -except OSError: - from spacy.cli.download import download as spacy_download - - spacy_download("en_core_web_sm") - from spacy.lang.en import stop_words as spacy_en_stopwords DEFAULT = set(nltk_en_stopwords.words("english"))