Skip to content

Commit

Permalink
Lazy load stopwords module to prevent downloading when it's not needed (
Browse files Browse the repository at this point in the history
jbesomi#194)

* Lazy load stopwords module to prevent downloading when it's not needed

* Download spaCy English model by default since it's required by many functions
  • Loading branch information
hugoabonizio committed Oct 20, 2020
1 parent 1c1123f commit a1c03b3
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 11 deletions.
10 changes: 9 additions & 1 deletion texthero/nlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,18 @@

import spacy
import pandas as pd
import en_core_web_sm
from nltk.stem import PorterStemmer, SnowballStemmer
from texthero._types import TextSeries, InputSeries

try:
# If not present, download 'en_core_web_sm'
import en_core_web_sm
except ModuleNotFoundError:
from spacy.cli.download import download as spacy_download

spacy_download("en_core_web_sm")
import en_core_web_sm


@InputSeries(TextSeries)
def named_entities(s: TextSeries, package="spacy") -> pd.Series:
Expand Down
3 changes: 2 additions & 1 deletion texthero/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
import pandas as pd
import unidecode

from texthero import stopwords as _stopwords
from texthero._types import TokenSeries, TextSeries, InputSeries

from typing import List, Callable, Union
Expand Down Expand Up @@ -329,6 +328,8 @@ def replace_stopwords(
"""

if stopwords is None:
from texthero import stopwords as _stopwords

stopwords = _stopwords.DEFAULT
return s.apply(_replace_stopwords, args=(stopwords, symbol))

Expand Down
9 changes: 0 additions & 9 deletions texthero/stopwords.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,6 @@
nltk.download("stopwords")

from nltk.corpus import stopwords as nltk_en_stopwords

try:
# If not present, download 'en_core_web_sm'
spacy_model = spacy.load("en_core_web_sm")
except OSError:
from spacy.cli.download import download as spacy_download

spacy_download("en_core_web_sm")

from spacy.lang.en import stop_words as spacy_en_stopwords

DEFAULT = set(nltk_en_stopwords.words("english"))
Expand Down

0 comments on commit a1c03b3

Please sign in to comment.