Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Checklist fixes #5239

Merged
merged 7 commits into from
Jun 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
312 changes: 154 additions & 158 deletions allennlp/confidence_checks/task_checklists/sentiment_analysis_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from overrides import overrides
from checklist.test_suite import TestSuite
from checklist.test_types import MFT, INV, DIR, Expect
from checklist.editor import Editor
from checklist.perturb import Perturb
from allennlp.confidence_checks.task_checklists.task_suite import TaskSuite
from allennlp.confidence_checks.task_checklists import utils
Expand Down Expand Up @@ -60,7 +59,7 @@ def preds_and_confs_fn(data):
for pred in predictions:
label = pred["probs"].index(max(pred["probs"]))
labels.append(label)
confs.append([pred["probs"][self._positive], pred["probs"][self._negative]])
confs.append(pred["probs"])
return np.array(labels), np.array(confs)

return preds_and_confs_fn
Expand Down Expand Up @@ -97,156 +96,153 @@ def _default_tests(self, data: Optional[Iterable[str]], num_test_cases=100):
self._default_negation_tests(data, num_test_cases)

def _setup_editor(self):
if not hasattr(self, "editor"):
self.editor = Editor()

pos_adj = [
"good",
"great",
"excellent",
"amazing",
"extraordinary",
"beautiful",
"fantastic",
"nice",
"incredible",
"exceptional",
"awesome",
"perfect",
"fun",
"adorable",
"brilliant",
"exciting",
"sweet",
"wonderful",
]
neg_adj = [
"awful",
"bad",
"horrible",
"weird",
"rough",
"lousy",
"unhappy",
"average",
"difficult",
"poor",
"sad",
"frustrating",
"hard",
"lame",
"nasty",
"annoying",
"boring",
"creepy",
"dreadful",
"ridiculous",
"terrible",
"ugly",
"unpleasant",
]
self.editor.add_lexicon("pos_adj", pos_adj, overwrite=True)
self.editor.add_lexicon("neg_adj", neg_adj, overwrite=True)

pos_verb_present = [
"like",
"enjoy",
"appreciate",
"love",
"recommend",
"admire",
"value",
"welcome",
]
neg_verb_present = ["hate", "dislike", "regret", "abhor", "dread", "despise"]
pos_verb_past = [
"liked",
"enjoyed",
"appreciated",
"loved",
"admired",
"valued",
"welcomed",
]
neg_verb_past = ["hated", "disliked", "regretted", "abhorred", "dreaded", "despised"]
self.editor.add_lexicon("pos_verb_present", pos_verb_present, overwrite=True)
self.editor.add_lexicon("neg_verb_present", neg_verb_present, overwrite=True)
self.editor.add_lexicon("pos_verb_past", pos_verb_past, overwrite=True)
self.editor.add_lexicon("neg_verb_past", neg_verb_past, overwrite=True)
self.editor.add_lexicon("pos_verb", pos_verb_present + pos_verb_past, overwrite=True)
self.editor.add_lexicon("neg_verb", neg_verb_present + neg_verb_past, overwrite=True)

noun = [
"airline",
"movie",
"product",
"customer service",
"restaurant",
"hotel",
"food",
"staff",
"company",
"crew",
"service",
]
self.editor.add_lexicon("noun", noun, overwrite=True)

intens_adj = [
"very",
"really",
"absolutely",
"truly",
"extremely",
"quite",
"incredibly",
"amazingly",
"especially",
"exceptionally",
"unbelievably",
"utterly",
"exceedingly",
"rather",
"totally",
"particularly",
]
intens_verb = [
"really",
"absolutely",
"truly",
"extremely",
"especially",
"utterly",
"totally",
"particularly",
"highly",
"definitely",
"certainly",
"genuinely",
"honestly",
"strongly",
"sure",
"sincerely",
]

self.editor.add_lexicon("intens_adj", intens_adj, overwrite=True)
self.editor.add_lexicon("intens_verb", intens_verb, overwrite=True)

reducer_adj = [
"somewhat",
"kinda",
"mostly",
"probably",
"generally",
"reasonably",
"a little",
"a bit",
"slightly",
]

self.editor.add_lexicon("reducer_adj", reducer_adj, overwrite=True)

self.monotonic_label = Expect.monotonic(increasing=True, tolerance=0.1)
self.monotonic_label_down = Expect.monotonic(increasing=False, tolerance=0.1)
super()._setup_editor()

pos_adj = [
"good",
"great",
"excellent",
"amazing",
"extraordinary",
"beautiful",
"fantastic",
"nice",
"incredible",
"exceptional",
"awesome",
"perfect",
"fun",
"adorable",
"brilliant",
"exciting",
"sweet",
"wonderful",
]
neg_adj = [
"awful",
"bad",
"horrible",
"weird",
"rough",
"lousy",
"average",
"difficult",
"poor",
"sad",
"frustrating",
"lame",
"nasty",
"annoying",
"boring",
"creepy",
"dreadful",
"ridiculous",
"terrible",
"ugly",
"unpleasant",
]
self.editor.add_lexicon("pos_adj", pos_adj, overwrite=True)
self.editor.add_lexicon("neg_adj", neg_adj, overwrite=True)

pos_verb_present = [
"like",
"enjoy",
"appreciate",
"love",
"recommend",
"admire",
"value",
"welcome",
]
neg_verb_present = ["hate", "dislike", "regret", "abhor", "dread", "despise"]
pos_verb_past = [
"liked",
"enjoyed",
"appreciated",
"loved",
"admired",
"valued",
"welcomed",
]
neg_verb_past = ["hated", "disliked", "regretted", "abhorred", "dreaded", "despised"]
self.editor.add_lexicon("pos_verb_present", pos_verb_present, overwrite=True)
self.editor.add_lexicon("neg_verb_present", neg_verb_present, overwrite=True)
self.editor.add_lexicon("pos_verb_past", pos_verb_past, overwrite=True)
self.editor.add_lexicon("neg_verb_past", neg_verb_past, overwrite=True)
self.editor.add_lexicon("pos_verb", pos_verb_present + pos_verb_past, overwrite=True)
self.editor.add_lexicon("neg_verb", neg_verb_present + neg_verb_past, overwrite=True)

noun = [
"airline",
"movie",
"product",
"customer service",
"restaurant",
"hotel",
"food",
"staff",
"company",
"crew",
"service",
]
self.editor.add_lexicon("noun", noun, overwrite=True)

intens_adj = [
"very",
"really",
"absolutely",
"truly",
"extremely",
"quite",
"incredibly",
"amazingly",
"especially",
"exceptionally",
"unbelievably",
"utterly",
"exceedingly",
"rather",
"totally",
"particularly",
]
intens_verb = [
"really",
"absolutely",
"truly",
"extremely",
"especially",
"utterly",
"totally",
"particularly",
"highly",
"definitely",
"certainly",
"genuinely",
"honestly",
"strongly",
"sure",
"sincerely",
]

self.editor.add_lexicon("intens_adj", intens_adj, overwrite=True)
self.editor.add_lexicon("intens_verb", intens_verb, overwrite=True)

reducer_adj = [
"somewhat",
"kinda",
"mostly",
"probably",
"generally",
"reasonably",
"a little",
"a bit",
"slightly",
]

self.editor.add_lexicon("reducer_adj", reducer_adj, overwrite=True)

self.monotonic_label = Expect.monotonic(increasing=True, tolerance=0.1)
self.monotonic_label_down = Expect.monotonic(increasing=False, tolerance=0.1)

def _default_vocabulary_tests(self, data: Optional[Iterable[str]], num_test_cases=100):

Expand Down Expand Up @@ -371,7 +367,7 @@ def _default_vocabulary_tests(self, data: Optional[Iterable[str]], num_test_case
templates=template.templates,
name="Intensifiers",
capability="Vocabulary",
description="Test is composed of pairs of sentences (x1, x2), where we add an intensifier"
description="Test is composed of pairs of sentences (x1, x2), where we add an intensifier "
"such as 'really',or 'very' to x2 and expect the confidence to NOT go down "
"(with tolerance=0.1). e.g.:"
"x1 = 'That was a good movie'"
Expand Down Expand Up @@ -400,7 +396,7 @@ def _default_vocabulary_tests(self, data: Optional[Iterable[str]], num_test_case
templates=template.templates,
name="Reducers",
capability="Vocabulary",
description="Test is composed of pairs of sentences (x1, x2), where we add a reducer"
description="Test is composed of pairs of sentences (x1, x2), where we add a reducer "
"such as 'somewhat', or 'kinda' to x2 and expect the confidence to NOT go up "
" (with tolerance=0.1). e.g.:"
"x1 = 'The staff was good.'"
Expand Down Expand Up @@ -555,8 +551,8 @@ def _default_temporal_tests(self, data: Optional[Iterable[str]], num_test_cases=
capability="Temporal",
description="Have two conflicing statements, one about the past and "
"one about the present."
"Expect the present to carry the sentiment. Examples:"
"I used to love this airline, now I hate it -> should be negative"
"Expect the present to carry the sentiment. Examples:\n"
"I used to love this airline, now I hate it -> should be negative\n"
"I love this airline, although I used to hate it -> should be positive",
)

Expand Down Expand Up @@ -604,13 +600,13 @@ def _default_fairness_tests(self, data: Optional[Iterable[str]], num_test_cases=

for p, vals in protected.items():
template = self.editor.template(
["{male} is %s {mask}." % r for r in vals],
["{male} is %s {profession}." % r for r in vals],
return_maps=False,
nsamples=num_test_cases,
save=True,
)
template += self.editor.template(
["{female} is %s {mask}." % r for r in vals],
["{female} is %s {profession}." % r for r in vals],
return_maps=False,
nsamples=num_test_cases,
save=True,
Expand Down
1 change: 1 addition & 0 deletions allennlp/confidence_checks/task_checklists/task_suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,7 @@ def _setup_editor(self):
"""
if not hasattr(self, "editor"):
self.editor = Editor()
utils.add_common_lexicons(self.editor)

def add_test(self, test: Union[MFT, INV, DIR]):
"""
Expand Down
Loading