forked from gunthercox/ChatterBot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
backuputils.pybackup
222 lines (168 loc) · 6.15 KB
/
backuputils.pybackup
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
"""
ChatterBot utility functions
"""
from chatterbot.storage.neo4j_storage import Neo4jStorageAdapter
from nltk.corpus import wordnet
def import_module(dotted_path):
"""
Imports the specified module based on the
dot notated import path for the module.
"""
import importlib
module_parts = dotted_path.split('.')
module_path = '.'.join(module_parts[:-1])
module = importlib.import_module(module_path)
return getattr(module, module_parts[-1])
def get_initialization_functions(obj, attribute):
"""
Return all initialization methods for the comparison algorithm.
Initialization methods must start with 'initialize_' and take no parameters.
"""
attribute_parts = attribute.split('.')
outermost_attribute = getattr(obj, attribute_parts.pop(0))
for next_attribute in attribute_parts:
outermost_attribute = getattr(outermost_attribute, next_attribute)
return getattr(outermost_attribute, 'initialization_functions', [])
def initialize_class(data, *args, **kwargs):
"""
:param data: A string or dictionary containing a import_path attribute.
"""
if isinstance(data, dict):
import_path = data.get('import_path')
data.update(kwargs)
Class = import_module(import_path)
return Class(*args, **data)
else:
Class = import_module(data)
return Class(*args, **kwargs)
def validate_adapter_class(validate_class, adapter_class):
"""
Raises an exception if validate_class is not a
subclass of adapter_class.
:param validate_class: The class to be validated.
:type validate_class: class
:param adapter_class: The class type to check against.
:type adapter_class: class
:raises: Adapter.InvalidAdapterTypeException
"""
from chatterbot.adapters import Adapter
# If a dictionary was passed in, check if it has an import_path attribute
if isinstance(validate_class, dict):
if 'import_path' not in validate_class:
raise Adapter.InvalidAdapterTypeException(
'The dictionary {} must contain a value for "import_path"'.format(
str(validate_class)
)
)
# Set the class to the import path for the next check
validate_class = validate_class.get('import_path')
# if not issubclass(import_module(validate_class), adapter_class):
# raise Adapter.InvalidAdapterTypeException(
# '{} must be a subclass of {}'.format(
# validate_class,
# adapter_class.__name__
# )
# )
if not issubclass(import_module(validate_class), (adapter_class), Neo4jStorageAdapter):
raise Adapter.InvalidAdapterTypeException(
'{} must be a subclass of {}'.format(
validate_class,
adapter_class.__name_
)
)
def nltk_download_corpus(resource_path):
"""
Download the specified NLTK corpus file
unless it has already been downloaded.
Returns True if the corpus needed to be downloaded.
"""
from nltk.data import find
from nltk import download
from os.path import split, sep
from zipfile import BadZipfile
# Download the NLTK data only if it is not already downloaded
_, corpus_name = split(resource_path)
if not resource_path.endswith(sep):
resource_path = resource_path + sep
downloaded = False
try:
find(resource_path)
except LookupError:
download(corpus_name)
downloaded = True
except BadZipfile:
raise BadZipfile(
'The NLTK corpus file being opened is not a zipfile, '
'or it has been corrupted and needs to be manually deleted.'
)
return downloaded
def treebank_to_wordnet(pos):
"""
Convert Treebank part-of-speech tags to Wordnet part-of-speech tags.
* https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html
* http://www.nltk.org/_modules/nltk/corpus/reader/wordnet.html
"""
data_map = {
'N': wordnet.NOUN,
'J': wordnet.ADJ,
'V': wordnet.VERB,
'R': wordnet.ADV
}
return data_map.get(pos[0])
def get_response_time(chatbot, statement='Hello'):
"""
Returns the amount of time taken for a given
chat bot to return a response.
:param chatbot: A chat bot instance.
:type chatbot: ChatBot
:returns: The response time in seconds.
:rtype: float
"""
import time
start_time = time.time()
chatbot.get_response(statement)
return time.time() - start_time
def print_progress_bar(description, iteration_counter, total_items, progress_bar_length=20):
"""
Print progress bar
:param description: Training description
:type description: str
:param iteration_counter: Incremental counter
:type iteration_counter: int
:param total_items: total number items
:type total_items: int
:param progress_bar_length: Progress bar length
:type progress_bar_length: int
:returns: void
:rtype: void
"""
import sys
percent = float(iteration_counter) / total_items
hashes = '#' * int(round(percent * progress_bar_length))
spaces = ' ' * (progress_bar_length - len(hashes))
sys.stdout.write('\r{0}: [{1}] {2}%'.format(description, hashes + spaces, int(round(percent * 100))))
sys.stdout.flush()
if total_items == iteration_counter:
print('\r')
def download_nltk_stopwords():
"""
Download required NLTK stopwords corpus if it has not already been downloaded.
"""
nltk_download_corpus('stopwords')
def download_nltk_wordnet():
"""
Download required NLTK corpora if they have not already been downloaded.
"""
nltk_download_corpus('corpora/wordnet')
def download_nltk_averaged_perceptron_tagger():
"""
Download the NLTK averaged perceptron tagger that is required for this algorithm
to run only if the corpora has not already been downloaded.
"""
nltk_download_corpus('averaged_perceptron_tagger')
def download_nltk_vader_lexicon():
"""
Download the NLTK vader lexicon for sentiment analysis
that is required for this algorithm to run.
"""
nltk_download_corpus('vader_lexicon')