From 85f963969b14417c95dbab193561500c95db6e5a Mon Sep 17 00:00:00 2001 From: Arjun Subramonian Date: Tue, 20 Apr 2021 23:23:26 -0700 Subject: [PATCH 1/7] finished WEAT --- allennlp/fairness/__init__.py | 5 + allennlp/fairness/bias_metrics.py | 136 + test_fixtures/fairness/bias_embeddings.json | 2602 +++++++++++++++++++ tests/fairness/bias_metrics_test.py | 79 + 4 files changed, 2822 insertions(+) create mode 100644 allennlp/fairness/bias_metrics.py create mode 100644 test_fixtures/fairness/bias_embeddings.json create mode 100644 tests/fairness/bias_metrics_test.py diff --git a/allennlp/fairness/__init__.py b/allennlp/fairness/__init__.py index c3e75844e6b..7a5ada684d7 100644 --- a/allennlp/fairness/__init__.py +++ b/allennlp/fairness/__init__.py @@ -12,3 +12,8 @@ Sufficiency, DemographicParityWithoutGroundTruth, ) +from allennlp.fairness.bias_metrics import ( + WordEmbeddingAssociationTest, + EmbeddingCoherenceTest, + NaturalLanguageInference, +) diff --git a/allennlp/fairness/bias_metrics.py b/allennlp/fairness/bias_metrics.py new file mode 100644 index 00000000000..72d79c57fb1 --- /dev/null +++ b/allennlp/fairness/bias_metrics.py @@ -0,0 +1,136 @@ +""" + +A suite of metrics to quantify how much bias is +encoded by word embeddings and determine the effectiveness +of bias mitigation. + +Bias metrics are based on: + +1. Caliskan, A., Bryson, J., & Narayanan, A. (2017). [Semantics derived automatically +from language corpora contain human-like biases](https://api.semanticscholar.org/CorpusID:23163324). +Science, 356, 183 - 186. + +2. Dev, S., & Phillips, J.M. (2019). [Attenuating Bias in Word Vectors] +(https://api.semanticscholar.org/CorpusID:59158788). AISTATS. + +3. Dev, S., Li, T., Phillips, J.M., & Srikumar, V. (2020). [On Measuring and Mitigating +Biased Inferences of Word Embeddings](https://api.semanticscholar.org/CorpusID:201670701). +ArXiv, abs/1908.09369. + +""" + +import torch +from allennlp.common.checks import ConfigurationError + + +class WordEmbeddingAssociationTest: + """ + Word Embedding Association Test (WEAT) score measures the unlikelihood there is no + difference between two sets of target words in terms of their relative similarity + to two sets of attribute words by computing the probability that a random + permutation of attribute words would produce the observed (or greater) difference + in sample means. Analog of Implicit Association Test from psychology for word embeddings. + + Based on: Caliskan, A., Bryson, J., & Narayanan, A. (2017). [Semantics derived automatically + from language corpora contain human-like biases](https://api.semanticscholar.org/CorpusID:23163324). + Science, 356, 183 - 186. + """ + + def __call__( + self, + target_embeddings1: torch.Tensor, + target_embeddings2: torch.Tensor, + attribute_embeddings1: torch.Tensor, + attribute_embeddings2: torch.Tensor, + ) -> torch.FloatTensor: + """ + + # Parameters + + !!! Note + In the examples below, we treat gender identity as binary, which does not accurately + characterize gender in real life. + + target_embeddings1 : `torch.Tensor`, required. + A tensor of size (target_embeddings_batch_size, ..., dim) containing target word + embeddings related to a concept group. For example, if the concept is gender, + target_embeddings1 could contain embeddings for linguistically masculine words, e.g. + "man", "king", "brother", etc. Represented as X. + + target_embeddings2 : `torch.Tensor`, required. + A tensor of the same size as target_embeddings1 containing target word + embeddings related to a different group for the same concept. For example, + target_embeddings2 could contain embeddings for linguistically feminine words, e.g. + "woman", "queen", "sister", etc. Represented as Y. + + attribute_embeddings1 : `torch.Tensor`, required. + A tensor of size (attribute_embeddings1_batch_size, ..., dim) containing attribute word + embeddings related to a concept group. For example, if the concept is professions, + attribute_embeddings1 could contain embeddings for stereotypically male professions, e.g. + "doctor", "banker", "engineer", etc. Represented as A. + + attribute_embeddings2 : `torch.Tensor`, required. + A tensor of size (attribute_embeddings2_batch_size, ..., dim) containing attribute word + embeddings related to a different group for the same concept. For example, if the concept is + professions, attribute_embeddings2 could contain embeddings for stereotypically female + professions, e.g. "nurse", "receptionist", "homemaker", etc. Represented as B. + + # Returns + + weat_score : `torch.FloatTensor` + The unlikelihood there is no difference between target_embeddings1 and target_embeddings2 in + terms of their relative similarity to attribute_embeddings1 and attribute_embeddings2. + Typical values are around [-1, 1], with values closer to 0 indicating less biased associations. + + """ + + # Some sanity checks + if target_embeddings1.ndim < 2 or target_embeddings2.ndim < 2: + raise ConfigurationError( + "target_embeddings1 and target_embeddings2 must have at least two dimensions." + ) + if attribute_embeddings1.ndim < 2 or attribute_embeddings2.ndim < 2: + raise ConfigurationError( + "attribute_embeddings1 and attribute_embeddings2 must have at least two dimensions." + ) + if target_embeddings1.size() != target_embeddings2.size(): + raise ConfigurationError( + "target_embeddings1 and target_embeddings2 must be of the same size." + ) + if attribute_embeddings1.size(dim=-1) != attribute_embeddings2.size( + dim=-1 + ) or attribute_embeddings1.size(dim=-1) != target_embeddings1.size(dim=-1): + raise ConfigurationError("All embeddings must have the same dimensionality.") + + target_embeddings1 = target_embeddings1.flatten(end_dim=-2) + target_embeddings2 = target_embeddings2.flatten(end_dim=-2) + attribute_embeddings1 = attribute_embeddings1.flatten(end_dim=-2) + attribute_embeddings2 = attribute_embeddings2.flatten(end_dim=-2) + + # Normalize + target_embeddings1 = torch.nn.functional.normalize(target_embeddings1, p=2, dim=-1) + target_embeddings2 = torch.nn.functional.normalize(target_embeddings2, p=2, dim=-1) + attribute_embeddings1 = torch.nn.functional.normalize(attribute_embeddings1, p=2, dim=-1) + attribute_embeddings2 = torch.nn.functional.normalize(attribute_embeddings2, p=2, dim=-1) + + # Compute cosine similarities + X_sim_A = torch.mm(target_embeddings1, attribute_embeddings1.t()) + X_sim_B = torch.mm(target_embeddings1, attribute_embeddings2.t()) + Y_sim_A = torch.mm(target_embeddings2, attribute_embeddings1.t()) + Y_sim_B = torch.mm(target_embeddings2, attribute_embeddings2.t()) + X_union_Y_sim_A = torch.cat([X_sim_A, Y_sim_A]) + X_union_Y_sim_B = torch.cat([X_sim_B, Y_sim_B]) + + s_X_A_B = torch.mean(X_sim_A, dim=-1) - torch.mean(X_sim_B, dim=-1) + s_Y_A_B = torch.mean(Y_sim_A, dim=-1) - torch.mean(Y_sim_B, dim=-1) + s_X_Y_A_B = torch.mean(s_X_A_B) - torch.mean(s_Y_A_B) + S_X_union_Y_A_B = torch.mean(X_union_Y_sim_A, dim=-1) - torch.mean(X_union_Y_sim_B, dim=-1) + return s_X_Y_A_B / torch.std(S_X_union_Y_A_B, unbiased=False) + + +class EmbeddingCoherenceTest: + pass + + +class NaturalLanguageInference: + pass diff --git a/test_fixtures/fairness/bias_embeddings.json b/test_fixtures/fairness/bias_embeddings.json new file mode 100644 index 00000000000..4481e4ccf5b --- /dev/null +++ b/test_fixtures/fairness/bias_embeddings.json @@ -0,0 +1,2602 @@ +{ + "he": [ + -0.0367426791967243, + -0.011021889398097602, + -0.11295283313084178, + -0.15441727211683257, + 0.10571840953427392, + 0.026829178105471942, + -0.15744929292651647, + 0.12261579933009975, + -0.15828684752895025, + -0.03334491401916538, + 0.02899621348513142, + 0.08378106234913035, + -0.18585300053569034, + -0.06560356726574197, + 0.13508585355279848, + -0.04397710279329215, + -0.06198086930193991, + 0.047074957589193556, + -0.142991418064417, + 0.01527494777993225, + 0.03245981264890784, + 0.1678272893649701, + 0.11800924901671392, + -0.03638424971620678, + 0.06842345634205835, + -0.5033556862879934, + -0.016748531838100697, + 0.007378709749694549, + -0.01184865454167908, + -0.05754256139777657, + 0.6207413411574805, + 0.00821845881833559, + -0.10064919259552618, + -0.11947771265373211, + 0.019084541115718435, + 0.0029980065834715216, + 0.044598868218679685, + 0.18442659750097778, + 0.057443810418450314, + -0.061821770501914286, + -0.030951117131423393, + 0.018704166973128416, + -0.1136422612642862, + 0.03626172535296865, + -0.06610280832789137, + -0.045295611239481594, + -0.07130003579539537, + -0.06092752552245986, + -0.0076182723106526865, + -0.0024086095346409304 + ], + "him": [ + 0.021675511432224605, + -0.008226150088433285, + 0.009257929072105294, + -0.15014331819815693, + 0.17694239585658778, + 0.020160906989116968, + -0.09889859729708098, + 0.2094538512771219, + -0.12334424053972613, + 0.010907869582079127, + -0.05211435024640094, + 0.15954256203887754, + -0.16630755365438463, + -0.03320534716899135, + 0.15251849334757575, + -0.021479844829526487, + -0.027098012745886278, + -0.029268462653593154, + -0.06776949019005296, + -0.08214736203275895, + -0.033547763723713056, + 0.18827475326285362, + 0.10085345159625937, + -0.025924013129697582, + 0.1293464947502711, + -0.5327747949577307, + -0.009395439212334803, + -0.025130476352088555, + 0.05824886058654742, + -0.17749678456423243, + 0.5263793958139867, + 0.09679336959027346, + -0.14750725424514066, + -0.08850652662044771, + 0.00202460581958467, + 0.07647484228231635, + 0.07066463121886396, + 0.08196800098028567, + -0.0012803661554887538, + -0.11756120539331513, + 0.016508644911532415, + 0.08495191667143194, + -0.10542806429822921, + 0.08271262110722016, + 0.004953263812746746, + -0.0806925446071424, + -0.07676834218636353, + -0.11376201219092673, + 0.004455401012548207, + -0.025981988419385916 + ], + "his": [ + -0.006007637080540547, + 0.08515521480682708, + -0.12314787212298071, + -0.13016099171337828, + 0.1505232216965325, + 0.11519071319052966, + -0.1360975123577148, + 0.11328830373842183, + -0.09704796090150124, + 0.020836936673181157, + -0.03628192188605667, + 0.11342982007242987, + -0.2270889324328727, + -0.03166024920579469, + 0.06320585226758287, + -0.09869599922159465, + -0.11648227365660288, + -0.06099533130345757, + -0.056710431671214666, + -0.013956018417591108, + -0.019857070411125614, + 0.1742961744638801, + -0.0030173432025710407, + -0.13211535019948906, + 0.08571948879685908, + -0.4848994487674869, + -0.07598352328630718, + -0.009581730619943147, + 0.003308078658387521, + -0.021302686633207557, + 0.5926130837535927, + 0.03200060494581398, + -0.09082482489714848, + -0.041011075326324746, + 0.04331116359045512, + 0.10207089508578596, + 0.01739629841078612, + 0.1709338180216895, + 0.0013630889118572678, + -0.097008551289499, + 0.017605348488997974, + 0.07439997342221741, + -0.1999142136113323, + 0.0009127624408917406, + -0.026825406351520618, + -0.08084881902258298, + -0.014543579905624413, + -0.11137335486431328, + -0.004053457729029773, + -0.07851469518445067 + ], + "man": [ + -0.017997175489727155, + 0.08200416653811962, + -0.03284208999587444, + -0.0868130234220952, + 0.3136053496064486, + 0.07690929516857847, + -0.07105171850419587, + 0.047804461117427324, + -0.02018880915445417, + 0.02055109416950388, + -0.020684567596101135, + 0.02894657270247155, + -0.12469468865363477, + 0.10497494325550812, + 0.1136259280622477, + -0.08824118908668588, + 0.022589424069967754, + 0.12288707710486045, + -0.13528103814603462, + 0.045661259238922745, + -0.15808020617208376, + 0.242540283759593, + 0.0062963228852401885, + 0.05596350100899413, + 0.07457351020312643, + -0.5356860638319186, + -0.13489396520890257, + 0.07829169851547868, + 0.0742493604528188, + -0.05554401309683132, + 0.4981228280609754, + -0.06592824568609817, + -0.03209463880692979, + 0.04796272246610693, + 0.05952152120942967, + 0.060328082058724544, + 0.023908904230043524, + -0.002411292789641362, + 0.04251509989770161, + -0.10789419777151389, + -0.016448502388551514, + 0.11926613371760048, + -0.010982956245717417, + 0.05601117008992173, + 0.12585590746503095, + -0.101277729338764, + -0.09196891121522364, + -0.18671978999338162, + 0.10131586460350607, + -0.022356798955041095 + ], + "boy": [ + -0.06170777316335876, + 0.04451277673357509, + -0.03831242852578669, + -0.1008233852569851, + 0.20844616774860342, + 0.11913253656472213, + -0.19051094513278224, + 0.05358054751253456, + 0.01685082940926519, + 0.07043404784102773, + 0.06142923444387041, + 0.06675390270477424, + 0.012869824299975324, + 0.04618973244885079, + 0.17659545595505652, + -0.06215801383321662, + -0.1891277905325833, + 0.15408723806106034, + -0.04358367840213111, + 0.07645697070010096, + -0.16363195870215738, + 0.2639631317014165, + 0.010767429307672917, + 0.14606303358045786, + 0.06883340410369404, + -0.39476186189402357, + -0.08905417045887844, + 0.02357849338463289, + 0.06701527122922564, + -0.14707607508763806, + 0.42093687032813343, + -0.0812817955054846, + -0.046319462811352206, + 0.07310687486844666, + 0.11578434853251639, + 0.11987657834965673, + 0.0607157174501126, + -0.16885932919118513, + 0.07312404506348362, + -0.21863381680386187, + -0.08003027906723562, + 0.049717253629220255, + -0.1253042677807823, + -0.021960679452262256, + 0.20205503959595997, + -0.11665812067995243, + 0.061339567869788544, + -0.2514861233078977, + 0.060561185694780036, + 0.004821009206486554 + ], + "brother": [ + 0.07614158568847995, + 0.11257270917746082, + 0.03419512022734642, + -0.06537173560639559, + 0.23516860661767125, + 0.12771102867719966, + -0.15030601087936146, + 0.15144491904112006, + -0.16834536043448892, + 0.013538671217996083, + 0.08061558330993374, + 0.26218780898666455, + -0.20687510141034662, + -0.07192244863469241, + 0.1476459037043449, + -0.04145665530764993, + -0.09166219603971817, + 0.006020284471841425, + -0.04583508021622887, + 0.11874511355062804, + -0.0968569712041031, + 0.13916581653488708, + 0.033147802582092885, + 0.04287431721529349, + 0.05013585229559701, + -0.41112195321663497, + -0.04054871453533888, + -0.23277928879580007, + -0.025147570075194206, + 0.03275356514148414, + 0.30348318534133817, + -0.06526620740259628, + -0.08548381837199627, + -0.023532789447246266, + 0.1547202755552685, + 0.052620742830343036, + -8.286950645523221e-05, + 0.08214474671593129, + 0.0695410952055608, + 0.04817063838710795, + -0.018738225018024756, + 0.22369988107268954, + -0.17073666935454498, + -0.09515259115783499, + 0.09095535618408128, + -0.04639856100255349, + -0.1867510720556366, + -0.343106039220702, + 0.018184102393169133, + 0.03886822766728948 + ], + "grandpa": [ + -0.09096832379090469, + 0.08813875886827918, + 0.018170235239713336, + -0.15038661249368115, + 0.07632101274011627, + -0.024546411336967257, + -0.06903468996392866, + 0.06712943241821176, + -0.23694194786112804, + -0.014431553507097784, + -0.10747969762966478, + 0.3514427770139955, + -0.03237135557067374, + -0.09908626573907413, + 0.2226293442170196, + 0.1591353491698244, + -0.011315170083671118, + 0.20191867976060512, + 0.216313672920204, + 0.07068762961845602, + -0.08213462292691188, + 0.005535288104779407, + 0.10886229668648907, + 0.10055382898374794, + 0.2930234611865408, + -0.1662337208637994, + -0.19499796045940643, + 0.003894706877575612, + 0.144488038119036, + -0.1912595361940268, + -0.07436426174721782, + 0.16360498038518193, + -0.03579309513588693, + 0.24197543232312338, + 0.05524731948107191, + 0.2323899271302801, + -0.029582985405793467, + -0.021913551382575225, + 0.06877722272802096, + -0.04248466859712776, + -0.02660923883105966, + 0.12761878482236422, + -0.312951425245796, + 0.015160958186424267, + 0.1991586309916747, + -0.03915304256448227, + 0.04770352946897662, + -0.14478670011268893, + 0.031560333777564516, + 0.007135189508709786 + ], + "uncle": [ + 0.06753174114657315, + 0.11158246719809903, + -0.005007705990043669, + -0.14550167959960217, + 0.20934007328370216, + 0.09552757778382517, + -0.1503056600178724, + 0.16204288166907713, + -0.19282296778723265, + 0.005775291370144851, + -0.028858932070356648, + 0.3474068648123471, + -0.027502952189413062, + -0.08773124111778605, + 0.16330466585554482, + -0.02104572780321371, + -0.07472566348747578, + 0.06545943586810364, + 0.05265977440711275, + 0.09549690941818186, + -0.05006172571761504, + 0.04724899846861413, + -0.029919181282596866, + 0.059560156676857975, + 0.1406276000598615, + -0.3657421662719558, + -0.11381249549987699, + -0.23610260350258386, + 0.09494049764151033, + 0.030079094903451276, + 0.21411557593387504, + -0.05925347302042485, + -0.07108489036610545, + 0.10259006427197075, + 0.12657491680258673, + 0.11202058670728919, + -0.038499751870086384, + 0.046261038975390294, + 0.0855537871571109, + 0.02354454242387986, + -0.011906116721997526, + 0.20939702881989689, + -0.21823389932026266, + -0.02972421810100724, + 0.1019942217394721, + -0.018760934562786963, + -0.14064512484022912, + -0.40902837377994483, + 0.004215147797918647, + 0.0850455685264503 + ], + "jack": [ + -0.19683922292481285, + 0.18515257115010456, + 0.08419660779737294, + 0.10014290118797743, + 0.03904932078085833, + 0.26248059060511736, + -0.26531737343269374, + 0.035068890073188176, + -0.13744547852697378, + -0.14302746300422836, + -0.1208246115820952, + 0.3204894488981554, + -0.12423545203698427, + 0.07168572542158141, + 0.11947099709901547, + 0.018682336922492906, + 0.12668133959304423, + 0.15585999634629627, + -0.13398326326497112, + -0.11454795036517429, + 0.09345747520141359, + 0.12720178872597754, + 0.13478068961886464, + 0.05292721976332445, + 0.07779932746809835, + -0.3099687818761989, + 0.03459088099830523, + -0.01411757362608734, + -0.00010938813278115742, + -0.052942855574185106, + 0.27514559740224964, + -0.1763942833808707, + -0.04929077689458876, + -0.023587737526934213, + 0.07792888132951524, + 0.02828518184692878, + -0.07024946450966676, + -0.08088181589491363, + 0.13686248615059785, + -0.18582714470437858, + 0.04653664049584729, + 0.21118396254583288, + -0.17998605250429026, + -0.1292232757015339, + 0.13301384299161037, + 0.039866850320144115, + -0.04781654329915537, + -0.14113552989008885, + -0.04368198817014446, + 0.1779198917834177 + ], + "she": [ + 0.010696994952413164, + 0.06700192873624893, + -0.13311808067209266, + -0.12783353628087532, + 0.10389807558032646, + 0.14017595021772117, + -0.1292295205183139, + 0.12090499014810599, + -0.023028425256933972, + -0.04072447417542874, + 0.020541164001396217, + 0.03973063260537412, + -0.07915124333060643, + -0.020399439713331385, + 0.18307234910774345, + -0.015593037638972778, + -0.139121875825239, + 0.060773146275799675, + -0.020296689604484386, + -0.02109034561764743, + 0.08128419386598211, + 0.28934784962035737, + 0.12143291312114747, + 0.03951981772687769, + 0.17890919814583905, + -0.4664854941653861, + -0.09234754610304285, + 0.04546692316479809, + 0.004157481990381772, + -0.14635867228454935, + 0.5506697212758948, + 0.0482199174604574, + -0.051777197090884616, + -0.08372185162069717, + -0.021784794629165095, + -0.02399569352297643, + 0.01982722790026964, + 0.1531295501468466, + 0.0586756268124402, + -0.17116042269589452, + -0.007933902801229232, + -0.011823348731808394, + 0.0005379676819580844, + -0.06006452483547553, + 0.0031505309236811584, + -0.10363411409380571, + -0.0008882569754463185, + -0.22268428762186324, + -0.010757404930200797, + 0.07484282497343561 + ], + "her": [ + 0.02232489067350508, + 0.14854055811996092, + -0.1278580118621893, + -0.10690895941119526, + 0.1435868742534382, + 0.21856839171658113, + -0.10663245923572694, + 0.1366960234949296, + 0.05460545332757719, + 0.0035740146174841345, + -0.015856119098512592, + 0.06800072086442177, + -0.10593954313334852, + -0.00304400042571294, + 0.11611008573220116, + -0.04918871495033042, + -0.19841386085413157, + -0.03980436562148406, + 0.05720055738408102, + -0.05529337243883269, + 0.039479561198494176, + 0.305882483271094, + 0.02047933528543945, + -0.031021320891095917, + 0.14408490770202267, + -0.4390689533340252, + -0.1297718594622682, + 0.03381297326510133, + 0.03162262548955413, + -0.1330815332493498, + 0.49773362911712216, + 0.07384886614194741, + -0.04724988239463691, + -0.03262702070526132, + 0.010306294191025344, + 0.06422466123920084, + -0.0046008962932444775, + 0.11967294040284411, + 0.028576126568279726, + -0.20267795994569116, + 0.013597812243693656, + 0.02880432249622647, + -0.0528315214789401, + -0.061694518067294986, + 0.031609300179893, + -0.148535561128838, + 0.03080145328168738, + -0.27068700912865107, + 0.0065184083534801755, + -0.017121357250836286 + ], + "hers": [ + 0.02232489067350508, + 0.14854055811996092, + -0.1278580118621893, + -0.10690895941119526, + 0.1435868742534382, + 0.21856839171658113, + -0.10663245923572694, + 0.1366960234949296, + 0.05460545332757719, + 0.0035740146174841345, + -0.015856119098512592, + 0.06800072086442177, + -0.10593954313334852, + -0.00304400042571294, + 0.11611008573220116, + -0.04918871495033042, + -0.19841386085413157, + -0.03980436562148406, + 0.05720055738408102, + -0.05529337243883269, + 0.039479561198494176, + 0.305882483271094, + 0.02047933528543945, + -0.031021320891095917, + 0.14408490770202267, + -0.4390689533340252, + -0.1297718594622682, + 0.03381297326510133, + 0.03162262548955413, + -0.1330815332493498, + 0.49773362911712216, + 0.07384886614194741, + -0.04724988239463691, + -0.03262702070526132, + 0.010306294191025344, + 0.06422466123920084, + -0.0046008962932444775, + 0.11967294040284411, + 0.028576126568279726, + -0.20267795994569116, + 0.013597812243693656, + 0.02880432249622647, + -0.0528315214789401, + -0.061694518067294986, + 0.031609300179893, + -0.148535561128838, + 0.03080145328168738, + -0.27068700912865107, + 0.0065184083534801755, + -0.017121357250836286 + ], + "woman": [ + -0.03256106847930736, + 0.11628030553121017, + -0.10441138082853971, + -0.08870034690520731, + 0.2764991299556674, + 0.2412528899061775, + -0.0776762557426544, + 0.10414053185920268, + 0.06377686062084793, + -0.04517258572042509, + 0.03632963592683806, + -0.12850617688883476, + 0.05490521159872188, + 0.10067510001311543, + 0.15054180330145475, + -0.06831313243179755, + -0.1630026495927426, + 0.07771392348011187, + -0.002589387894933515, + 0.04255557481802276, + -0.09649936226068731, + 0.3187946180150552, + -0.01191609905958148, + 0.12519141599257738, + 0.12428739029359812, + -0.47961791994061564, + -0.13776526549623763, + 0.060858507818785844, + 0.03532695662975588, + -0.06321901936612064, + 0.4111164488215307, + -0.049167159592700606, + -0.05411418911211501, + 0.0001529776503237045, + 0.030354815285369825, + 0.01640035351880411, + -0.004234929911289852, + 0.006499657783375649, + 0.06186118711586803, + -0.15057588363534485, + -0.0451546487025882, + 0.07555610023433394, + 0.08720260591582694, + 0.004004439232085809, + 0.10001681145850154, + -0.1528646471113321, + -0.041386081255057494, + -0.23565654034106767, + 0.08746807377981293, + -0.01877467656987331 + ], + "girl": [ + -0.060866403703171, + 0.1228293243829214, + -0.13787862259771433, + -0.103263294403088, + 0.21653120263757533, + 0.22149289781340167, + -0.1350181435283767, + 0.07164440631998385, + 0.03330515815175688, + 0.017451394341908278, + 0.057486800654583226, + -0.056178396339534344, + 0.042146157349394815, + 0.05924723129170026, + 0.18702589075570397, + -0.04461287911474336, + -0.18201122374526024, + 0.12364862209166995, + 0.005377877223136779, + 0.09511269478329637, + -0.053464472679304764, + 0.30926722777437265, + 0.05523196622770413, + 0.21370603812464928, + 0.07298635946362371, + -0.3429220000346041, + -0.16554757754618374, + 0.057303164961243026, + 0.09225751289742048, + -0.15310272786674453, + 0.37535842009863624, + -0.03598023579993372, + -0.03365653798805206, + 0.06663150503736069, + 0.11690001036141788, + 0.08865895959908092, + -0.02214928978134017, + -0.14667724432765838, + 0.03735397204434401, + -0.23115142899196758, + -0.0794100772748893, + -0.020567197654101584, + 0.005932492331580573, + -0.12074399982681787, + 0.17554689419412137, + -0.15584666889992393, + 0.09994196037257931, + -0.2343120817908036, + 0.06589519653617935, + 0.003953288017493822 + ], + "sister": [ + 0.14810416710846683, + 0.2594289856383753, + -0.002297455763809501, + 0.01863195508916412, + 0.11551860290540009, + 0.2589676894134288, + -0.21071209301250643, + 0.11599594421643167, + -0.028808952065787572, + -0.09258616361827846, + 0.09688223541756256, + 0.11172795131779646, + -0.0659613488958244, + -0.038464082617493324, + 0.0622208382022367, + 0.01599755251756763, + -0.26035157808826825, + 0.03709423339297836, + 0.10505921241367897, + 0.13400053643966858, + -0.015193292577552257, + 0.28770844986335475, + 0.037986741306461756, + 0.19778978180672327, + 0.13523400243246023, + -0.28343644569319826, + -0.06637250422675496, + -0.07281861756144176, + -0.029647307813733776, + -0.12276697054434164, + 0.3431041095721443, + 0.03489405096355975, + 0.046947921884987164, + 0.005301296442515297, + 0.03172915773327482, + -0.10063678556147475, + -0.007620814199691803, + -0.0001266097686617218, + 0.05782448461492075, + -0.11501318269371962, + 0.026953738987198498, + 0.004588894620337091, + 0.013984495904616432, + -0.193953000596625, + 0.02871268154927701, + -0.05332584360381231, + -0.12713123395948742, + -0.4042960566291743, + 0.004844011489090097, + 0.11876773283763177 + ], + "grandma": [ + -0.03769079204578591, + 0.13669996615202537, + -0.04840885126885742, + -0.06146196123218447, + 0.06240152318052893, + 0.06063575126152231, + -0.1070218994619599, + 0.008615757876990317, + -0.022045448299506363, + 0.021342414144592318, + -0.1501812946977078, + 0.15117123530279994, + 0.13456139217308583, + -0.001217198725680991, + 0.29055890279231467, + 0.170871809023985, + -0.05317719113002649, + 0.12751845595975847, + 0.2594500822506148, + 0.038277703396896526, + 0.04484959916963297, + 0.09819052093408946, + 0.003846410442686299, + 0.06903387334136261, + 0.41665668063178823, + -0.2038118937384635, + -0.1924741770373954, + 0.050338353822294034, + 0.18608112549611325, + -0.297057749512767, + -0.035887236134218535, + 0.14968254599590566, + -0.1516044917508301, + 0.32912376453267306, + -0.026982808554063938, + 0.07786525186923304, + 0.010028627305479345, + 0.13999221137048706, + 0.138498484197918, + -0.051167082725793574, + -0.02773848840527932, + 0.07852521227262782, + -0.10789848808936975, + 0.061313344194778775, + 0.1117852014574542, + 0.031227210385057002, + 0.0960746173506864, + -0.21289012768439766, + 0.016581631081935406, + 0.027871991845660703 + ], + "aunt": [ + 0.14825517176741843, + 0.22900728172897045, + -0.09560278355884262, + -0.10669466865247117, + 0.07193268819082903, + 0.24418498879339506, + -0.19006514271947433, + 0.14393036380140903, + -0.01921321619412316, + 0.0019387843680099205, + -0.06389311545711791, + 0.23311616375193625, + 0.12928094156850858, + -0.09181045316111552, + 0.15029493820024786, + 0.028439333430384593, + -0.22791717017185706, + -0.01635922028286519, + 0.18947396683657822, + 0.11657066008624249, + -0.004269883114477911, + 0.25135456864979455, + 0.055717278778767636, + 0.08334322127807534, + 0.2523608254717453, + -0.23517060476341908, + -0.17232148075907514, + -0.028946654578118125, + 0.11964812886670871, + -0.11158130334406975, + 0.11420805292303716, + 0.03386263842702361, + -0.006545700626789978, + 0.15514593462940238, + 0.08821727775939954, + -0.0004155421400981007, + -0.08156340452424984, + 0.028221311118961916, + 0.18048893196390917, + -0.09445397368711546, + -0.02862171747936318, + 0.1395258938369952, + -0.03273688860746615, + -0.13657001442251473, + 0.05315132388279308, + -0.030946590011745347, + -0.058945686082526455, + -0.4098190544232555, + 0.0860244764348984, + 0.08473730625015301 + ], + "jill": [ + -0.1406050149171967, + 0.07361177145293331, + 0.0408626369826799, + 0.12827510367514605, + 0.259395013240159, + 0.11220533707107652, + -0.21569482752549454, + 0.021533196963137883, + 0.07942717396929387, + -0.23746146306718016, + 0.02861497352996215, + -0.02162923335244717, + -0.20508728175417962, + 0.015659253373193335, + 0.06092142136939443, + 0.06325096904961705, + -0.03554555930757154, + -0.17138745960134313, + 0.09103620752836802, + 0.056898536849965306, + -0.022455097919454443, + 0.3044619636893259, + 0.14390944080627677, + 0.13761022776442866, + 0.037016343305053746, + -0.05818547284776224, + -0.045282246133008824, + -0.1314609959854817, + 0.10105834256384298, + -0.0664562137809722, + -0.0464506485520613, + -0.03200406626100255, + 0.015374288973681158, + -0.11162476444049145, + -0.07887562997023805, + -0.10654959202812687, + -0.19113902430437305, + 0.06178744221001717, + 0.07629450081676187, + -0.1508497027943959, + 0.2626123532346513, + 0.0509839531758798, + 0.07590987144899926, + -0.37480801409521813, + 0.15361951805281227, + 0.08965009037284614, + 0.006525152557250815, + -0.17691015674978372, + -0.15883499551756822, + 0.3061069194759836 + ], + "engineer": [ + -0.06740834931842901, + -0.07036640709915663, + 0.1626630346881778, + -0.046876775714017216, + 0.05358666357396935, + -0.013682222965938937, + -0.2653409982114645, + 0.0756796576237788, + 0.05988459365875221, + -0.09347221441084566, + 0.23324848275071713, + 0.09884575143915114, + -0.12213442781201288, + 0.1483228850120417, + -0.1906460201478464, + -0.11652376386921971, + -0.055109902567034255, + 0.21992516543670615, + -0.0661282658902201, + -0.027868441102670328, + 0.09641620534071385, + 0.10394799920088174, + -0.1922838034979096, + -0.059659524045001196, + 0.09002583595028324, + -0.29711198565256763, + -0.0854681762718252, + -0.23477574084402755, + -0.043896612881939044, + 0.1777607847606008, + 0.32844087206665984, + -0.2966899801267845, + -0.052610022214299765, + -0.10963904514858598, + 0.1937206318356951, + 0.06924306858052433, + 0.02095156958007217, + 0.22824470294500257, + 0.1786490059148682, + 0.08212629441764717, + 0.07082659407727254, + 0.019149806939990383, + 0.013367929326736628, + -0.175461859420144, + 0.04429852290649443, + -0.026751131234406356, + -0.06576252776787471, + -0.05964947629438731, + 0.014609027482563654, + 0.13591391300389422 + ], + "banker": [ + 0.05289026060616259, + 0.0640197166693786, + 0.00444935818733204, + -0.0743227279754251, + 0.25835765035461644, + -0.03627982291339841, + -0.22007011764446757, + 0.044315069805771795, + -0.27717855229122335, + 0.012434577623738095, + -0.05855416200863093, + 0.1886872553988842, + -0.049308999991761394, + 0.08132216410547938, + 0.06882191167170487, + -0.08797339151117956, + 0.024403260789701165, + 0.025831357330090073, + 0.025793891834431724, + 0.11253532969425728, + 0.20589052946412464, + 0.00010509071532167437, + -0.21124148378521146, + 0.08644832545261609, + -0.16641291630365168, + -0.3590957566218647, + 0.021531861125391436, + -0.10855296789104314, + -0.14364271035411746, + 0.181317572017618, + 0.11660804945758858, + -0.1679622247417588, + 0.12169674530906698, + 0.019346080031629527, + 0.06111944653488508, + -0.11568683903493032, + -0.28658900325952674, + 0.07404945024238772, + 0.22203815809287392, + -0.16100466181273446, + 0.013672702062612304, + 0.12596781181411895, + 0.1114179763825641, + 0.013305540205160464, + 0.1007667563521635, + -0.15465976993035843, + -0.174818410447237, + -0.016567021794854205, + 0.2254320912289833, + 0.13696724056887363 + ], + "nurse": [ + 0.07460675986831795, + 0.03203169897886132, + -0.15147310133886815, + -0.1959345274797395, + 0.13288530438836885, + 0.13447147387934336, + -0.13019571264280336, + 0.15070638513990606, + 0.1684382185032043, + -0.18190240402889335, + 0.13198471710704826, + 0.05729073820022404, + 0.1282829788448635, + 0.0927422348284243, + 0.039163376639048536, + -0.10890006879914423, + -0.212287083024799, + 0.12152046083602842, + 0.03518578281321602, + 0.07761074582245257, + -0.07371834268539361, + 0.3908021430000767, + 0.03042118929109436, + 0.09338927839315686, + -0.013659109935001903, + -0.25131253188202374, + 0.002601764202139401, + -0.11556928272027493, + -0.076819689426698, + -0.0021102950619052256, + 0.28906012040764484, + 0.05899049527622998, + -0.00728968610436485, + -0.03226698754785499, + 0.12331352199973875, + 0.10839703806543365, + 0.07332281448751633, + 0.0018814566450615622, + 0.3275379148370394, + -0.10240326460529336, + -0.02557951847912995, + 0.04535795672272651, + 0.12023651545522679, + 0.013731522020459434, + 0.17433057447670935, + -0.17035703735034224, + 0.006260907119937389, + -0.12913488573259918, + 0.0033159461430244744, + 0.2946380821725808 + ], + "receptionist": [ + 0.1097255390394386, + 0.009435615870951559, + -0.06199439975688469, + 0.01009308524306059, + 0.17155453769642165, + -0.09134017302849656, + -0.1453436321463831, + 0.16346022136998498, + -0.01203939099830946, + -0.2343578522075128, + -0.04993562581718051, + -0.0425597705184416, + 0.050491270137162585, + 0.18406041148429747, + 0.04669221362379672, + -0.0028110433806721475, + -0.32795936839872714, + 0.2278477682910716, + 0.2225833614082646, + 0.09479550538112934, + 0.06439013131327255, + 0.41743102587677233, + -0.15797355675918517, + 0.13302383459589665, + 0.12241232028033185, + -0.11747354681295621, + -0.01232574165251418, + 0.14458898962808187, + -0.054406624298896804, + -0.13634994733923125, + -0.04703593778452982, + 0.03333349041455306, + 0.08178309072480473, + 0.11451441776002837, + 0.0977391280809414, + 0.1328946149865985, + 0.12285166695194558, + 0.08053741369117048, + 0.2517146301284415, + -0.019806264834784596, + 0.08016009243201987, + 0.07925555516693275, + -0.03461276454660483, + 0.10731171673774899, + -0.043355763311718254, + -0.22319069357196591, + -0.02171949037005314, + -0.01164423743307569, + 0.13554620136939668, + 0.24106434993008727 + ], + "homemaker": [ + -0.14551601922112395, + 0.10414864695815045, + -0.11908713975416799, + -0.13219142821516494, + 0.17656877682278754, + 0.12188176417284301, + -0.19334642457281356, + 0.05839502627188894, + -0.04768683740104024, + 0.022972109758648013, + -0.005221417846548367, + -0.18037085220550222, + 0.11383205769850183, + 0.003580535183005695, + 0.02132751413086441, + -0.0740117538692501, + -0.17122953424432694, + 0.27716040403812486, + 0.2686205862839806, + 0.19469051762803102, + -0.08156144782581241, + 0.384836367025161, + -0.15050129254194902, + 0.2641650291948619, + 0.12489421582698607, + -0.15442960870885536, + 0.038345019370854626, + -0.014787003854987501, + 0.10774032103387893, + 0.074182550224333, + -0.009445533497982323, + 0.023925598975719425, + 0.03875592074685113, + 0.1579148444763438, + 0.1336469101976104, + -0.01734251337654649, + -0.13913962196469623, + 0.18047729051374228, + 0.1921112451353301, + -0.007125425909298439, + -0.04174856992504255, + 0.05956832297202354, + -0.020184416206556057, + 0.04022377927676636, + -0.04003318044573184, + -0.1696799905011112, + -0.09718807666114938, + -0.20805718889538724, + 0.0793683236141684, + 0.2066512131028209 + ], + "scientist": [ + -0.030347079970493712, + 0.11754191425674392, + 0.06363825086315666, + 0.019219110372927093, + 0.09362864660273924, + 0.009220576572483426, + -0.14244040159721122, + -0.10334009779787118, + 0.0625026453928504, + 0.02460349984138947, + 0.11502006034305867, + 0.07715754722612375, + 0.008153608716341671, + 0.13094167320178401, + 0.03284386958687169, + -0.0017853684576541857, + 0.09863765001845352, + 0.16931511237921326, + -0.13477265464405663, + 0.1724924940686271, + -0.024121494123941982, + 0.1598967206602894, + 0.02326352394688546, + -0.05242679787532814, + 0.2045439462560151, + -0.4301997429362301, + -0.10102454233125346, + -0.16988966319441068, + -0.1940940623017536, + 0.1271299719882093, + 0.17867759143491324, + -0.3508230413869781, + -0.13724052391738775, + -0.28608003341943183, + -0.04675455459240613, + -0.013664476485062268, + -0.127573417248261, + 0.17982283701956844, + 0.22987816676505496, + 0.02075516619328874, + 0.07631692925489533, + 0.11600335200665154, + 0.042023186469941604, + 0.0041238481161937094, + 0.24703757030618548, + -0.0010774370203247142, + 0.04983167909259088, + 0.08721025846920852, + -0.022199255322761412, + 0.10419613995205791 + ], + "maid": [ + 0.08042447591378793, + 0.005957938124312269, + -0.3177595476538498, + 0.022466142504172917, + 0.3126550623810127, + 0.05535910387947161, + -0.018472683691139115, + 0.1302732986200714, + 0.1466161945647364, + 0.02843988531091991, + -0.044522217572201594, + 0.08090502369051945, + 0.14703694084036356, + 0.01866575710899036, + 0.22528079773173865, + -0.2746383938218077, + -0.19292818563427586, + 0.19214863035202248, + 0.08859805965401701, + -0.010650006616318874, + 0.10219435808367429, + 0.2563562206270439, + -0.04088286907642152, + 0.10297818490172081, + -0.09812572024068071, + -0.15285050105486672, + 0.06962603342865643, + 0.01802032805064251, + -0.003321973386347614, + -0.03779454869796025, + 0.15453775769316852, + -0.030943005285717008, + -0.07174044364627513, + 0.18112806800564618, + 0.03555199240654647, + 0.14087738622661372, + -0.0073457601035129124, + -0.08821148566473522, + 0.149409778973469, + -0.2562707899111805, + -0.006672352485719802, + 0.02121671828467095, + 0.05019481710553017, + -0.030204029593498747, + 0.1311041123318428, + -0.09527019856294716, + -0.14841878266945377, + -0.3323468423875223, + 0.20218033216228015, + -0.03842246445955611 + ], + "lawyer": [ + -0.09235780028333866, + -0.004703762251305323, + -0.10856786598744905, + 0.10913842648924105, + 0.19771746412266442, + 0.06569514302956428, + -0.08856943223777032, + 0.1520476499506075, + -0.057935904421019375, + -0.01148248207158548, + -0.07204238847710881, + 0.0468973837365185, + -0.13326333821147598, + -0.04338373000662778, + 0.13190129310787155, + -0.09796543026559729, + 0.0022378650792507946, + -0.08157862528147052, + 0.07918688176385763, + -0.008393771045386298, + 0.04809037387662902, + 0.2834936473045238, + -0.10600322534808104, + 0.032754399176947425, + -0.08391849966255684, + -0.49191152218470124, + 0.04905283452106601, + -0.13388576784979453, + -0.1697657627722489, + 0.010594175081590143, + 0.2626883603199877, + -0.28727817319183097, + -0.09580037408539671, + -0.12578457715600055, + 0.022682181295144762, + -0.1310079913121366, + 0.015228202160454224, + 0.017384037160827853, + 0.20054145044466518, + 0.015598778324749424, + 0.10283920801996184, + 0.23346874674336776, + 0.032904243349135213, + 0.11861318876142316, + 0.04176041814126, + -0.151707618944489, + -0.22866604891683584, + -0.01803144082784435, + 0.024051910715271657, + 0.1902559927793645 + ], + "programmer": [ + -0.09132397057940844, + -0.08964690204949342, + 0.17872299755628585, + 0.14525826603657202, + 0.045900061398282066, + 0.06034402026315556, + -0.21798360616074378, + -0.10772861562769313, + -0.06665517584887179, + 0.1984370066495619, + 0.23657237861789698, + 0.12868546509297493, + -0.010268847295942782, + 0.23513068624343014, + 0.05512442640866316, + 0.010050174616212423, + -0.07557006599340658, + 0.17665171605337976, + 0.1351820886214804, + 0.056646351286198444, + -0.005906976367732349, + 0.10493074957607187, + -0.24592331471210932, + 0.260141435235358, + 0.12886633210506668, + -0.1277805586516622, + 0.012864594809092723, + -0.2502532700738873, + -0.023849129100656702, + -0.049055177474945484, + 0.22469034046652525, + -0.250933402437581, + 0.001463505599656834, + -0.1488070973145797, + 0.061081932011791136, + 0.09666325212772872, + -0.004518700067857631, + 0.05834284715218058, + 0.12470956979238657, + -0.05583400269986542, + 0.23127236644816512, + 0.08622603991048908, + -0.1414088621939273, + 0.08307155457175744, + -0.05707246171782524, + -0.07847558103826663, + 0.18566867545223464, + 0.07465225838683566, + -0.03612341495316358, + 0.2807540067144568 + ], + "linear_two_means_he": [ + -0.02507127125955947, + 0.04596272184970461, + -0.15112561429658566, + -0.1374721379066834, + 0.09718108698786107, + 0.10249319831779265, + -0.15247727952227522, + 0.10454195966689624, + -0.08226996977894457, + -0.03691591943343716, + 0.03574415823087041, + 0.05087226927797465, + -0.1453044915586461, + -0.04731206155150875, + 0.13789590236740085, + -0.04381323022526035, + -0.12228235375094398, + 0.047842457135033024, + -0.08288217429759909, + 0.012966515842096142, + 0.06203371502217571, + 0.22586469347697694, + 0.09933306274707533, + -0.019199162493873927, + 0.09881586564720174, + -0.47166435428573183, + -0.06431256316382453, + 0.030928687181711743, + -0.014426195456820558, + -0.06831882976423932, + 0.5967822317964825, + 0.012358250895681832, + -0.0644548914167879, + -0.09723743812718905, + 0.011175387173342946, + -0.006526642242854383, + 0.020319442604923476, + 0.18598181884710724, + 0.06498906889638778, + -0.10901688493467754, + -0.02607121690513947, + -0.0023322330823423824, + -0.0731648635930143, + -0.022165242808381856, + -0.042825214594514105, + -0.07592037681372436, + -0.028103187191426626, + -0.13827287856275716, + -0.00787945227225964, + 0.01849113741326002 + ], + "linear_two_means_him": [ + 0.03274469080260336, + 0.04581813374168451, + -0.026945189186177193, + -0.13407252956347865, + 0.16884558738945282, + 0.09192076802072385, + -0.09418313293324665, + 0.19231259683545218, + -0.051249728933154076, + 0.007521123126469416, + -0.04571459015112196, + 0.12833181734308483, + -0.1278512918661992, + -0.015857657944589244, + 0.15518354751548874, + -0.02132442786019474, + -0.08428802369055341, + -0.028540565026732807, + -0.010761800573271772, + -0.08433668206353717, + -0.005499833961154117, + 0.243317507296093, + 0.08314093080303513, + -0.00962565272761784, + 0.1581706976148933, + -0.5027186920172938, + -0.05450523206314018, + -0.0027956453045591585, + 0.055804317230870495, + -0.18771701233228788, + 0.5036565434812432, + 0.10071955410091013, + -0.11318052913280709, + -0.0674138196131925, + -0.005476446677888, + 0.06744165221081247, + 0.04763799051062047, + 0.08344297504799277, + 0.00587556738207631, + -0.16232111695448279, + 0.02113674901046344, + 0.06500096587143649, + -0.06703924457042704, + 0.027300403992778298, + 0.027029765785151295, + -0.10973711447755685, + -0.03580039144668812, + -0.1871164515934015, + 0.004207697577063773, + -0.006160639614912668 + ], + "linear_two_means_she": [ + 0.0009913532327193771, + 0.019614996218358445, + -0.10137458070759878, + -0.1419246727077103, + 0.11099749274807863, + 0.07725570313133208, + -0.1333641185879623, + 0.13593472890924266, + -0.08624209956798193, + -0.03775491820440955, + 0.01492974724696289, + 0.06709673588063728, + -0.11287033724949981, + -0.0356101839467477, + 0.18073558501242507, + -0.015729309838656443, + -0.08897671712813923, + 0.06013491345487104, + -0.0702819859100635, + -0.01917071317539572, + 0.05669129965532317, + 0.24108544171851742, + 0.13696354777972908, + 0.025229142565868203, + 0.15363565468800802, + -0.4928391882260989, + -0.052794524798579136, + 0.025883369518779613, + 0.00630089851518296, + -0.1373974055373634, + 0.5705934984785015, + 0.04477737317504965, + -0.0818754441427187, + -0.10221629012069311, + -0.015207746265859648, + -0.016075241293742035, + 0.04001737183455121, + 0.15183626819991988, + 0.052401184528904333, + -0.1319141831616075, + -0.011991901928596681, + 0.005669978817853338, + -0.03312199190971774, + -0.011478165520904714, + -0.016206515466013413, + -0.07816734915735406, + -0.03680964180126183, + -0.15836588753390368, + -0.010540214412538516, + 0.05746313462239602 + ], + "linear_two_means_her": [ + 0.006682066310324496, + 0.0721658593730306, + -0.07669622277516402, + -0.12961999476245178, + 0.15502918162923526, + 0.11715826320718434, + -0.11329629386755959, + 0.1609198275931058, + -0.04727759914411676, + 0.00836012189744181, + -0.024900179167214452, + 0.11210735074042223, + -0.16028544617534557, + -0.02755953554935032, + 0.11234386487046452, + -0.04940834824679864, + -0.11759366031335812, + -0.040833021346570825, + -0.02336198896080744, + -0.05219945304604531, + -0.00015741859430162125, + 0.22809675905455243, + 0.04551044577038141, + -0.05405395778735998, + 0.10335090857408702, + -0.48154385807692685, + -0.0660232704283855, + 0.0022496723583729367, + 0.03507722325886698, + -0.11863843655916378, + 0.5298452767992242, + 0.06830043182154232, + -0.09575997640687633, + -0.06243496761968843, + 0.020906686761314602, + 0.07699025126170013, + 0.027940061280992766, + 0.11758852569518013, + 0.018463451749559753, + -0.13942381872755277, + 0.0070574340339206445, + 0.0569987539712408, + -0.10708211625372363, + 0.016613326705301197, + 0.0004110666566505619, + -0.1074901421339271, + -0.027093936836852947, + -0.16702344262225488, + 0.006868459717342648, + -0.04513263682404869 + ], + "linear_two_means_engineer": [ + -0.06426400243213064, + -0.055014413071649795, + 0.15237905984589217, + -0.04231165552642368, + 0.05128665797818696, + 0.006702115208194068, + -0.2640015081559008, + 0.07081045745749148, + 0.08036399375736768, + -0.09443426456073081, + 0.2350664192982535, + 0.08997992637483913, + -0.11121041773264433, + 0.15325072571025908, + -0.18988897624293705, + -0.11647961562234029, + -0.07135548201102163, + 0.22013193439039092, + -0.04993447723220356, + -0.02849034643020615, + 0.10438359096751304, + 0.11958362190303527, + -0.19731527943132057, + -0.05502975908526194, + 0.09821373222468487, + -0.2885741518590411, + -0.09828220990033086, + -0.2284312365126377, + -0.04459101777245995, + 0.17485759381170884, + 0.3219861451542496, + -0.2955746954586103, + -0.04285906167673526, + -0.10364738258872655, + 0.191589858607148, + 0.06667707139823251, + 0.014410547559358507, + 0.22866368884829152, + 0.18068174353565453, + 0.06941164919968533, + 0.07214126829349139, + 0.013482475340456348, + 0.024272781596345674, + -0.19120243299987477, + 0.05056964544544692, + -0.03500162567307298, + -0.054125039202910216, + -0.08048677555692896, + 0.014538664043682616, + 0.1415444294414869 + ], + "linear_two_means_banker": [ + 0.056764988050817644, + 0.08293772881612002, + -0.008223416478587913, + -0.0686972047368485, + 0.25552339092742915, + -0.011160535054728003, + -0.21841948587010718, + 0.03831483376309934, + -0.25194212117163, + 0.01124905889641891, + -0.0563139484163981, + 0.17776204360183548, + -0.03584752032974893, + 0.08739466192035919, + 0.06975480454301729, + -0.08791898834575193, + 0.004384097891282766, + 0.02608615534332333, + 0.04574923379100179, + 0.11176896589035235, + 0.21570860897845326, + 0.019372613857345945, + -0.21744168961213908, + 0.09215350939080506, + -0.15632310511053565, + -0.34857472312462695, + 0.005741336233414801, + -0.10073473855932036, + -0.14449841418842677, + 0.17774001710605142, + 0.10865399463486754, + -0.1665878776138359, + 0.13371269513040748, + 0.02672950819630324, + 0.05849372939694325, + -0.11884887556100787, + -0.2946493981384101, + 0.07456575974547455, + 0.22454306754843206, + -0.17667271198549583, + 0.015292753682413184, + 0.11898405140830488, + 0.12485584818770233, + -0.006091312193588385, + 0.10849455849649992, + -0.1648267198215359, + -0.16047772287042478, + -0.04224448622375791, + 0.22534538350951358, + 0.14390563425036884 + ], + "linear_two_means_nurse": [ + 0.05844632238367545, + -0.046870199799296255, + -0.09861839460896223, + -0.21939705946784796, + 0.14470623190973592, + 0.029705735670130795, + -0.1370800501815668, + 0.17573174262523478, + 0.06318390751773008, + -0.176957926887374, + 0.12264139367936702, + 0.10285683408039564, + 0.07213879744474296, + 0.06741549308601777, + 0.03527253343986835, + -0.10912696964995701, + -0.1287925830539588, + 0.12045776741490864, + -0.04804253731512363, + 0.08080704143818226, + -0.11466689002887823, + 0.31044252863543187, + 0.05628056652192304, + 0.06959450322808786, + -0.055740976426830346, + -0.29519290967233236, + 0.06845976170296676, + -0.14817699723970285, + -0.07325078076996139, + 0.012810716369750359, + 0.3222343262190283, + 0.05325846607927745, + -0.05740495451485347, + -0.06306126430199212, + 0.1342646761818469, + 0.12158503498488703, + 0.10694053583754631, + -0.0002719302877073986, + 0.3170906167394461, + -0.03705607594756484, + -0.032336314485466, + 0.07448532763021165, + 0.06419079602452518, + 0.09463053364544989, + 0.14210000723526234, + -0.12795344626614577, + -0.053550212083182405, + -0.022041144406395652, + 0.0036775805289112396, + 0.26569992357040223 + ], + "linear_two_means_receptionist": [ + 0.09451416054101652, + -0.06483258681225645, + -0.012243707657382932, + -0.01199155540739322, + 0.18268125396884974, + -0.1899533014837988, + -0.1518236710755608, + 0.18701590698365797, + -0.11111239966606287, + -0.22970375089695594, + -0.058730241014120374, + 0.00033035122407720774, + -0.002355714641120804, + 0.16022104109957586, + 0.04302986917074675, + -0.003024618955380896, + -0.24936827562664693, + 0.22684748399358287, + 0.14424281645314627, + 0.09780409116043857, + 0.025846381913280377, + 0.34179071531899846, + -0.13363283113431113, + 0.11062646251579758, + 0.08280180851444766, + -0.15877694815072663, + 0.04966459295849106, + 0.11389623839564306, + -0.051047308039327305, + -0.1223052067384964, + -0.015809963768599847, + 0.027938087860524978, + 0.034610957107357565, + 0.08552860597507576, + 0.10804715046071488, + 0.1453081163517979, + 0.15449511003297323, + 0.07851048937559837, + 0.24188087372182449, + 0.041703259361273545, + 0.07380010486443837, + 0.10667235399795065, + -0.0873670697618532, + 0.18345974720693775, + -0.0736935161104532, + -0.1832773512328741, + -0.07801806356632346, + 0.08916017678362433, + 0.13588659794139515, + 0.21382565147508412 + ], + "hard_two_means_engineer": [ + -0.0615298187707372, + -0.05028601036959615, + 0.14625994806668458, + -0.0424608011177029, + 0.04662619217494263, + 0.035971693891699685, + -0.2614795462504052, + 0.08544722077904941, + 0.09908413436732402, + -0.10654383171055688, + 0.24193136298416668, + 0.06281489366487647, + -0.07094072290656792, + 0.15563699516750512, + -0.17546459592973965, + -0.10788505555987593, + -0.10208930148281103, + 0.2142971153539293, + -0.02046253337329823, + -0.03492626119604156, + 0.1161578043431099, + 0.13931220099143204, + -0.19492818673872775, + -0.03370842076926304, + 0.11867118939339891, + -0.28049364753132755, + -0.09949948214236101, + -0.23108240861796886, + -0.047994279083554574, + 0.1605071725337192, + 0.30035369714230525, + -0.2865402485790226, + -0.047808526589933883, + -0.11179440455103819, + 0.18119746140843931, + 0.056561581781467044, + 0.011489734086414911, + 0.22424183672768963, + 0.18232854844511018, + 0.054943481732343535, + 0.06980935405220816, + 0.005875363404471246, + 0.05134113471622906, + -0.20198533443899558, + 0.05206143974434169, + -0.04640693621701136, + -0.04412743717130785, + -0.09732366092013081, + 0.011571589440019493, + 0.1503677911085482 + ], + "hard_two_means_banker": [ + 0.05831517220947051, + 0.08255060229350653, + -0.010687978298559553, + -0.07024751366240858, + 0.2519342862586521, + 0.009542531419089874, + -0.21650663602619274, + 0.05332891540326999, + -0.24100385837596072, + 0.0003716364196564882, + -0.050541299381410484, + 0.1554367318486496, + -0.002065675915319115, + 0.08807187829482309, + 0.08283185577718966, + -0.0800012922655011, + -0.018950955749106952, + 0.020637597775664977, + 0.06793581151908434, + 0.10602212885129779, + 0.22410876081073244, + 0.032740401032898825, + -0.2136818122329968, + 0.11039690242045577, + -0.13997799196096344, + -0.3437597785963685, + 0.008583286067544691, + -0.1051446329873683, + -0.1474241786552995, + 0.16539534104352926, + 0.09068823158556982, + -0.15859570091455277, + 0.1261277317800112, + 0.017357039720143373, + 0.04956263110118833, + -0.12738975430010907, + -0.2953207127814389, + 0.07035546665029485, + 0.2254337673876172, + -0.18608990288627977, + 0.012733957733472462, + 0.11371769553436785, + 0.14646096558896335, + -0.011171241232389892, + 0.10793064489326969, + -0.17279882758386675, + -0.15485279947290634, + -0.05133406428332264, + 0.22262903819597718, + 0.15030577992094413 + ], + "hard_two_means_nurse": [ + 0.06146327453686229, + -0.01286496315865509, + -0.11479833599553839, + -0.2058079638529719, + 0.14844784216731188, + 0.023452993568146493, + -0.13882932214717714, + 0.1288676242431418, + 0.08079410674381028, + -0.15267628895756552, + 0.11257113950338321, + 0.13785016487903767, + 0.013821770549912307, + 0.07638901547180618, + 0.005220059529020181, + -0.1282148848106331, + -0.10724841128181291, + 0.13410391057870605, + -0.06691573398880354, + 0.09339094022931868, + -0.11785750549188001, + 0.3117332560185772, + 0.03633362132737723, + 0.03536662418177696, + -0.07770569091973209, + -0.28846856629532674, + 0.03397359452510464, + -0.1238270024928557, + -0.06765794140528432, + 0.036466114021770274, + 0.35185870053329116, + 0.036297264946993826, + -0.01802508788477762, + -0.027447937198398376, + 0.15131339466835741, + 0.1367508816615861, + 0.09447801560633352, + 0.01083124656010927, + 0.31931102669206046, + -0.04162669860832338, + -0.023305127074758826, + 0.07503755989512789, + 0.035334299846069575, + 0.07303391094593463, + 0.15697389277542634, + -0.12640969670612956, + -0.042111810108238674, + -0.04490123431482444, + 0.010107187939507111, + 0.262321445674747 + ], + "hard_two_means_receptionist": [ + 0.09674335685164037, + -0.034910052937179456, + -0.025769724689733035, + 0.0003408204073268948, + 0.18692608456209275, + -0.20099618140575293, + -0.15387128586208695, + 0.14188947620096687, + -0.09860789226373355, + -0.20549041401062226, + -0.06911095070338602, + 0.037010992313086875, + -0.06256521541802815, + 0.1679078866637648, + 0.013165465149707359, + -0.02188881872423698, + -0.22420978151645113, + 0.24027678791492135, + 0.12173488329552958, + 0.11038203768839533, + 0.020792665441772953, + 0.33933251012588445, + -0.15213368491908033, + 0.07571326217192854, + 0.0591517495959763, + -0.15417358458567518, + 0.018661078551310818, + 0.1364326125454474, + -0.04535731363484939, + -0.09824696641401537, + 0.014991948104691483, + 0.010918762270965297, + 0.07117943893398276, + 0.11927432641675981, + 0.12539537290888017, + 0.16090048664140563, + 0.14374724131582556, + 0.0893773675001957, + 0.24358870629291604, + 0.04022442205106769, + 0.08240657141634557, + 0.10857091604183973, + -0.11847301952386918, + 0.16588631835880335, + -0.0604994355066736, + -0.17978269572934102, + -0.0694985544568145, + 0.07155565829259858, + 0.14225409779564568, + 0.20914431866504204 + ], + "hard_two_means_boy": [ + -0.08233869790364136, + 0.01176112910229632, + -0.029354421664617908, + -0.11785738925591122, + 0.2374148337319568, + -0.0075029559423780146, + -0.17659279239586476, + 0.027633850309864774, + -0.1152995062971225, + 0.09075349808875449, + 0.02836374964167929, + 0.1343178807958097, + -0.1558218182607306, + 0.02652591735658287, + 0.12744446546535132, + -0.08432153027611451, + -0.017331551904295, + 0.1590225437950705, + -0.18263648425053264, + 0.11105959675158651, + -0.17924486839119158, + 0.15997241429057224, + 0.04246950026749356, + 0.08695102346300654, + -0.031672011588002466, + -0.42835387214362464, + -0.07705335565597732, + 0.027214634797224094, + 0.09431054709518588, + -0.0883024797955068, + 0.498730645137716, + -0.09497822573168696, + -0.05718263942349022, + 0.07758774887517442, + 0.16118891300167687, + 0.14968144938848388, + 0.05316699891799519, + -0.1434336200114328, + 0.04206219646662961, + -0.12754823516106828, + -0.07607733927253879, + 0.062112146146456576, + -0.19567175761378108, + 0.023630893562061474, + 0.16100118048659245, + -0.06586297910813095, + 0.0031633274726710625, + -0.10798405610676251, + 0.07410556241487891, + -0.04737364343000658 + ], + "hard_two_means_brother": [ + 0.09355176730086777, + 0.12256403520417863, + 0.06776850191177644, + -0.03732057833194877, + 0.1973327179716123, + 0.03647561569899152, + -0.1927079246169542, + 0.10286331869848622, + -0.2224140473182231, + 0.0017713408944394562, + 0.061318463134264894, + 0.30078445384743735, + -0.29814637663525245, + -0.07829957366464688, + 0.056973105522299054, + -0.040020452271316814, + -0.027592324015172268, + 0.03933706482881305, + -0.11465243907201403, + 0.14866947649366527, + -0.11839163417576236, + 0.10171662000727881, + 0.04392125246078521, + 0.03834884533659155, + 0.0021902060019948627, + -0.39977887905454507, + -0.009133730411963414, + -0.16446671183826925, + -0.01445233209192701, + 0.009499897611979814, + 0.4120250039636035, + -0.047250515132056914, + -0.03443655174881199, + -0.002306661428680433, + 0.13278718253020522, + 0.016054588321409494, + 0.02603943417088129, + 0.0536546887730865, + 0.052058594849243044, + 0.05245307618307668, + 0.007321362039567724, + 0.1560802316952669, + -0.19833881114270957, + -0.060761388445296785, + 0.035309867066888365, + 0.012233264543807625, + -0.22528946293735996, + -0.25468297237496323, + 0.021109753145762584, + 0.03315613585319681 + ], + "hard_two_means_girl": [ + -0.059051184553146355, + 0.09130864580381053, + -0.09433445291364079, + -0.1003637202968386, + 0.20984126422924856, + 0.1891986248941208, + -0.1612958379331935, + 0.06632757751512357, + 0.039987571005311515, + 0.038970920808602316, + 0.0627605581156269, + -0.008416612908176607, + 0.04697955816742365, + 0.055500409623059965, + 0.18758494041746404, + -0.05009970673487836, + -0.203438159767199, + 0.1367272966450184, + -0.0017339020456125648, + 0.08310038512270256, + -0.10103948243654516, + 0.3000659826824898, + 0.031993904448984534, + 0.18975505911288815, + 0.08180516528881598, + -0.3625211325292369, + -0.13263769261468908, + 0.04184559109223322, + 0.07807784138721574, + -0.1566518269355896, + 0.3874646653623079, + -0.05477055699459896, + -0.03816174762959326, + 0.06904939724928574, + 0.11157898137858574, + 0.09944435514507138, + 0.01568439700054018, + -0.1592907801843134, + 0.056638525551177606, + -0.23523162783503626, + -0.08010708624283328, + 0.009526082580762835, + -0.04524274813689534, + -0.08144056495839333, + 0.19175359863488856, + -0.14372849621884304, + 0.08886968844489498, + -0.2572285093461013, + 0.06207289908291371, + 0.009884692917453634 + ], + "hard_two_means_sister": [ + 0.12248107633941492, + 0.22138328907948263, + -0.012954045706107253, + -0.015588770907164453, + 0.1630789828718641, + 0.28083149581140743, + -0.17370502309033872, + 0.15093125859943654, + -0.029506037718332184, + -0.06255644534891791, + 0.10404848275060993, + 0.12347010445017062, + -0.04621292183347117, + -0.042305518464498454, + 0.13168363212206985, + 0.0024921893399441654, + -0.2587864189771069, + 0.01164041533313236, + 0.11007686661686816, + 0.1139366709118734, + -0.021239665019178636, + 0.27575023507492097, + 0.030907765609037744, + 0.1660589049279059, + 0.14315915658432826, + -0.3179970399850531, + -0.07818431719550811, + -0.14629115784856864, + -0.03461768649837656, + -0.07540823990526033, + 0.27380295054152354, + 0.0026981443537368206, + -0.010807525814932504, + -0.012913573774685147, + 0.0711584022604951, + -0.046353295366842115, + -0.020523964768226453, + 0.033955862186895676, + 0.0701662872588102, + -0.08131844755576975, + 0.0023153404134622197, + 0.09075430096508427, + -0.011465818821832785, + -0.19128819261369961, + 0.0735125808948881, + -0.08449649676813556, + -0.11881928014809101, + -0.4400844295880006, + 0.006161972725925848, + 0.10428627681320381 + ], + "inlp_engineer": [ + -0.052782267332077026, + -0.0534612238407135, + 0.14106474816799164, + -0.026172973215579987, + 0.06754761189222336, + 0.017533697187900543, + -0.260309636592865, + 0.07288291305303574, + 0.1343180537223816, + -0.11478697508573532, + 0.2469760626554489, + 0.02129148691892624, + -0.08431467413902283, + 0.1557355523109436, + -0.18594929575920105, + -0.09864699095487595, + -0.10250400006771088, + 0.17043571174144745, + -0.0017871428281068802, + -0.014588537625968456, + 0.11120335757732391, + 0.16553060710430145, + -0.18866267800331116, + -0.028261572122573853, + 0.11305459588766098, + -0.24370981752872467, + -0.11005059629678726, + -0.22849202156066895, + -0.025211429223418236, + 0.1481746882200241, + 0.2578311562538147, + -0.27018457651138306, + -0.04134102910757065, + -0.11011195182800293, + 0.1624029278755188, + 0.028553470969200134, + -0.0067904251627624035, + 0.2444734275341034, + 0.18399406969547272, + 0.05907474458217621, + 0.09790921956300735, + -0.01432067435234785, + 0.09672029316425323, + -0.23263487219810486, + 0.048395272344350815, + -0.02799120359122753, + -0.0380919948220253, + -0.09178897738456726, + 0.0034351442009210587, + 0.16631698608398438 + ], + "inlp_homemaker": [ + -0.18342512845993042, + 0.05450461059808731, + -0.05481652170419693, + -0.1319858729839325, + 0.21286684274673462, + 0.031099338084459305, + -0.20349711179733276, + 0.046906277537345886, + -0.13989706337451935, + 0.02225230261683464, + -0.0062011610716581345, + -0.08698759227991104, + 0.00638633593916893, + -0.013031614944338799, + -0.01284074503928423, + -0.09018639475107193, + -0.09081724286079407, + 0.29148492217063904, + 0.2009771168231964, + 0.20561586320400238, + -0.14814838767051697, + 0.2990812063217163, + -0.15312905609607697, + 0.23493033647537231, + 0.05826185271143913, + -0.15942531824111938, + 0.07459422200918198, + -0.07452902942895889, + 0.10226386785507202, + 0.13873976469039917, + -0.012295234948396683, + -0.013750902377068996, + 0.043406546115875244, + 0.14195837080478668, + 0.15917563438415527, + 0.03275652229785919, + -0.13516400754451752, + 0.15392933785915375, + 0.1699870228767395, + 0.04027242586016655, + -0.01579558104276657, + 0.10894948989152908, + -0.13225291669368744, + 0.060379303991794586, + -0.012283140793442726, + -0.13622595369815826, + -0.13849352300167084, + -0.1582598090171814, + 0.05875536426901817, + 0.20893695950508118 + ], + "oscar_programmer": [ + -0.07830896973609924, + -0.07528749108314514, + 0.18948820233345032, + 0.148856520652771, + 0.06805361062288284, + 0.09882345795631409, + -0.20125211775302887, + -0.11760184168815613, + -0.047656722366809845, + 0.2133728563785553, + 0.23543985188007355, + 0.09724324941635132, + 0.03016793727874756, + 0.2407066524028778, + 0.06938372552394867, + 0.026680808514356613, + -0.10048334300518036, + 0.18456003069877625, + 0.188383087515831, + 0.043253201991319656, + -0.021929919719696045, + 0.12237273901700974, + -0.2533322274684906, + 0.28144603967666626, + 0.145513653755188, + -0.09027981758117676, + -0.02162027545273304, + -0.22467951476573944, + -0.008976730518043041, + -0.06909440457820892, + 0.1673312932252884, + -0.24996359646320343, + 0.02877015806734562, + -0.1258096545934677, + 0.06085729971528053, + 0.08735853433609009, + -0.012516401708126068, + 0.06429371237754822, + 0.1277480572462082, + -0.07779265195131302, + 0.23643597960472107, + 0.07029429078102112, + -0.11198242008686066, + 0.1061592698097229, + -0.05602099001407623, + -0.08935096859931946, + 0.18985341489315033, + 0.04543842002749443, + -0.0005864029517397285, + 0.0003419801068957895 + ], + "oscar_grandpa": [ + -0.07416621595621109, + 0.08458206057548523, + -0.053309064358472824, + -0.16716738045215607, + 0.09583862870931625, + -0.026479464024305344, + -0.054730307310819626, + 0.10439373552799225, + -0.22542959451675415, + -0.03889299929141998, + -0.140243798494339, + 0.3228200376033783, + 0.006546759977936745, + -0.10860257595777512, + 0.2503794729709625, + 0.1383737474679947, + -0.08268776535987854, + 0.22803069651126862, + 0.2692744731903076, + 0.07927031069993973, + -0.08930720388889313, + 0.06765256077051163, + 0.11146003007888794, + 0.14737895131111145, + 0.2791425883769989, + -0.10617826879024506, + -0.15906700491905212, + 0.059100620448589325, + 0.1821293979883194, + -0.23608064651489258, + -0.10596588999032974, + 0.25953924655914307, + -0.01979101449251175, + 0.3231821060180664, + 0.06740891933441162, + 0.2706506848335266, + 0.0015214867889881134, + -0.05631883069872856, + 0.0701802670955658, + -0.06867749989032745, + -0.047012969851493835, + 0.1160019040107727, + -0.3308452069759369, + 0.02871404029428959, + 0.16991694271564484, + -0.06927385926246643, + 0.04270310699939728, + -0.19192083179950714, + 0.027655234560370445, + 0.007677375338971615 + ], + "oscar_grandma": [ + -0.018536921590566635, + 0.12609684467315674, + -0.15519794821739197, + -0.08694198727607727, + 0.08195910602807999, + 0.04313560575246811, + -0.09287772327661514, + 0.06591854244470596, + -0.012779037468135357, + -0.019507993012666702, + -0.19680896401405334, + 0.12208959460258484, + 0.174989253282547, + -0.017019614577293396, + 0.32496196031570435, + 0.13468721508979797, + -0.14616119861602783, + 0.1619987040758133, + 0.3151642680168152, + 0.05573020502924919, + 0.040678128600120544, + 0.18073932826519012, + 0.010412609204649925, + 0.1281396895647049, + 0.3903489112854004, + -0.13189978897571564, + -0.12766925990581512, + 0.11984847486019135, + 0.23445692658424377, + -0.3537692725658417, + -0.05933094397187233, + 0.28710752725601196, + -0.13906823098659515, + 0.43696609139442444, + -0.00942843034863472, + 0.13638177514076233, + 0.05776602774858475, + 0.08829684555530548, + 0.13935112953186035, + -0.08038724958896637, + -0.05902135744690895, + 0.06793523579835892, + -0.14485999941825867, + 0.07194627076387405, + 0.06938131153583527, + -0.007875805720686913, + 0.08729098737239838, + -0.26941344141960144, + 0.039947230368852615, + 0.010896616615355015 + ], + "oscar_bias1": [ + 0.08474383216258977, + 0.0945811531047026, + 0.07428173109827416, + 0.024479834032266147, + 0.14469250544588316, + 0.2530798343782911, + 0.10930049304612847, + -0.06673947901906906, + 0.12434172735746349, + 0.0993960907920231, + -0.005839121876168384, + -0.20531481852387545, + 0.2639451804182061, + 0.037126206819276295, + 0.09238763082193571, + 0.11035712444527741, + -0.16029347056639812, + 0.05071308343986695, + 0.3471755325221579, + -0.08847484367886994, + -0.10499192878117405, + 0.11162700081145335, + -0.048837720672568766, + 0.13777196765170105, + 0.11012939179972218, + 0.24360542041955216, + -0.2284846261778304, + 0.1654286047628525, + 0.09593342221771839, + -0.12955089953583826, + -0.3755603276721494, + 0.0016714486406543773, + 0.1787442511314885, + 0.14721545106478545, + -0.0020732543727022246, + -0.0630506707229516, + -0.05410667297409925, + 0.04081149967427376, + 0.019907888418869278, + -0.14308377250011778, + 0.0349490091871661, + -0.10417437038931851, + 0.1943431938272854, + 0.15112671438161962, + 0.008346949102905113, + -0.07002366108042024, + 0.027757956870002695, + -0.1897564411718682, + -0.005211557668612942, + 0.0018718653159160395 + ], + "oscar_bias2": [ + -0.07375836241905759, + -0.003531244283090557, + 0.23980375746558857, + 0.055099282155637724, + -0.09354853471244139, + -0.03623296399517668, + -0.0691153173191639, + -0.12025046794231448, + -0.061813955528240296, + 0.06947850278037167, + 0.11670592968750768, + 0.13601944641445932, + -0.18235506325127593, + 0.027291841412269383, + -0.11372389081903568, + 0.05454688013100257, + 0.27933836816694524, + -0.10084788999856882, + -0.2461105105991461, + -0.015257886139356254, + 0.043195013966402775, + -0.23837120799364592, + -0.0008649711515565736, + -0.18881254808973197, + 0.030285064291280767, + -0.2535447782593246, + -0.0880216817353963, + -0.2231164800064211, + -0.14925981567619542, + 0.18033630869008552, + 0.17550613393187223, + -0.33909388125586026, + -0.08692648004795574, + -0.3118441049458602, + -0.04259817267933873, + -0.1243972891709841, + -0.10064526922062178, + 0.11456477598533303, + -0.008342291120067315, + 0.11684959186158074, + 0.06611318716932911, + 0.058751772007228635, + 0.030128999216968336, + -0.07357844288626657, + 0.10185220889088065, + 0.11829129739777486, + 0.012937075774670055, + 0.1987487011878333, + -0.09678284262303441, + -0.02743255064684724 + ] +} \ No newline at end of file diff --git a/tests/fairness/bias_metrics_test.py b/tests/fairness/bias_metrics_test.py new file mode 100644 index 00000000000..2cfbf270c94 --- /dev/null +++ b/tests/fairness/bias_metrics_test.py @@ -0,0 +1,79 @@ +import pytest +import torch +import json + +from allennlp.common.checks import ConfigurationError +from allennlp.common.testing import AllenNlpTestCase, multi_device +from allennlp.fairness.bias_metrics import ( + WordEmbeddingAssociationTest, + EmbeddingCoherenceTest, + NaturalLanguageInference, +) + + +class WordEmbeddingAssociationTestTest(AllenNlpTestCase): + def setup_method(self): + # embedding data from VERB demo + emb_filename = str(self.FIXTURES_ROOT / "fairness" / "bias_embeddings.json") + with open(emb_filename) as emb_file: + emb_data = json.load(emb_file) + + self.X = torch.cat( + [ + torch.Tensor(emb_data["he"]).reshape(1, -1), + torch.Tensor(emb_data["him"]).reshape(1, -1), + ] + ) + self.Y = torch.cat( + [ + torch.Tensor(emb_data["she"]).reshape(1, -1), + torch.Tensor(emb_data["her"]).reshape(1, -1), + ] + ) + self.A = torch.cat( + [ + torch.Tensor(emb_data["engineer"]).reshape(1, -1), + torch.Tensor(emb_data["banker"]).reshape(1, -1), + ] + ) + self.B = torch.cat( + [ + torch.Tensor(emb_data["nurse"]).reshape(1, -1), + torch.Tensor(emb_data["receptionist"]).reshape(1, -1), + ] + ) + + def teardown_method(self): + pass + + def test_invalid_dims(self): + weat = WordEmbeddingAssociationTest() + with pytest.raises(ConfigurationError): + weat(torch.zeros(2), torch.zeros(2), torch.zeros(2), torch.zeros(2)) + with pytest.raises(ConfigurationError): + weat(torch.zeros((2, 2)), torch.zeros((2, 2)), torch.zeros(2), torch.zeros(2)) + with pytest.raises(ConfigurationError): + weat(torch.zeros((2, 2)), torch.zeros((2, 3)), torch.zeros((2, 2)), torch.zeros((2, 2))) + with pytest.raises(ConfigurationError): + weat(torch.zeros((2, 2)), torch.zeros((2, 2)), torch.zeros((2, 3)), torch.zeros((2, 2))) + + @multi_device + def test_weat(self, device: str): + self.X = self.X.to(device) + self.Y = self.Y.to(device) + self.A = self.A.to(device) + self.B = self.B.to(device) + + weat = WordEmbeddingAssociationTest() + test_weat_score = weat(self.X, self.Y, self.A, self.B) + assert test_weat_score.item() == pytest.approx(1.872, rel=1e-4) + + +class EmbeddingCoherenceTestTest(AllenNlpTestCase): + def test_invalid_dims(self): + EmbeddingCoherenceTest() + + +class NaturalLanguageInferenceTest(AllenNlpTestCase): + def test_invalid_dims(self): + NaturalLanguageInference() From 22574b1f4b2f194ced722993a4b3595b75b4f0c0 Mon Sep 17 00:00:00 2001 From: Arjun Subramonian Date: Wed, 21 Apr 2021 12:23:07 -0700 Subject: [PATCH 2/7] finished bias metrics --- allennlp/fairness/bias_metrics.py | 169 ++++++++++++++++++++++++++-- tests/fairness/bias_metrics_test.py | 56 ++++++++- 2 files changed, 211 insertions(+), 14 deletions(-) diff --git a/allennlp/fairness/bias_metrics.py b/allennlp/fairness/bias_metrics.py index 72d79c57fb1..c62b4be3e3e 100644 --- a/allennlp/fairness/bias_metrics.py +++ b/allennlp/fairness/bias_metrics.py @@ -1,8 +1,7 @@ """ -A suite of metrics to quantify how much bias is -encoded by word embeddings and determine the effectiveness -of bias mitigation. +A suite of metrics to quantify how much bias is encoded by word embeddings +and determine the effectiveness of bias mitigation. Bias metrics are based on: @@ -17,6 +16,11 @@ Biased Inferences of Word Embeddings](https://api.semanticscholar.org/CorpusID:201670701). ArXiv, abs/1908.09369. +4. Rathore, A., Dev, S., Phillips, J.M., Srikumar, V., Zheng, Y., Yeh, C.M., Wang, J., Zhang, +W., & Wang, B. (2021). [VERB: Visualizing and Interpreting Bias Mitigation Techniques for +Word Representations](https://api.semanticscholar.org/CorpusID:233168618). +ArXiv, abs/2104.02797. + """ import torch @@ -65,15 +69,19 @@ def __call__( attribute_embeddings1 : `torch.Tensor`, required. A tensor of size (attribute_embeddings1_batch_size, ..., dim) containing attribute word - embeddings related to a concept group. For example, if the concept is professions, - attribute_embeddings1 could contain embeddings for stereotypically male professions, e.g. - "doctor", "banker", "engineer", etc. Represented as A. + embeddings related to a concept group associated with the concept group for target_embeddings1. + For example, if the concept is professions, attribute_embeddings1 could contain embeddings for + stereotypically male professions, e.g. "doctor", "banker", "engineer", etc. Represented as A. attribute_embeddings2 : `torch.Tensor`, required. A tensor of size (attribute_embeddings2_batch_size, ..., dim) containing attribute word - embeddings related to a different group for the same concept. For example, if the concept is - professions, attribute_embeddings2 could contain embeddings for stereotypically female - professions, e.g. "nurse", "receptionist", "homemaker", etc. Represented as B. + embeddings related to a concept group associated with the concept group for target_embeddings2. + For example, if the concept is professions, attribute_embeddings2 could contain embeddings for + stereotypically female professions, e.g. "nurse", "receptionist", "homemaker", etc. Represented as B. + + !!! Note + While target_embeddings1 and target_embeddings2 must be the same size, attribute_embeddings1 and + attribute_embeddings2 need not be the same size. # Returns @@ -129,8 +137,147 @@ def __call__( class EmbeddingCoherenceTest: - pass + """ + Embedding Coherence Test (ECT) score measures if groups of words + have stereotypical associations by computing the Spearman Coefficient + of lists of attribute embeddings sorted based on their similarity to + target embeddings. + + Based on: Dev, S., & Phillips, J.M. (2019). [Attenuating Bias in Word Vectors] + (https://api.semanticscholar.org/CorpusID:59158788). AISTATS. + """ + + def __call__( + self, + target_embeddings1: torch.Tensor, + target_embeddings2: torch.Tensor, + attribute_embeddings: torch.Tensor, + ) -> torch.FloatTensor: + """ + + # Parameters + + !!! Note + In the examples below, we treat gender identity as binary, which does not accurately + characterize gender in real life. + + target_embeddings1 : `torch.Tensor`, required. + A tensor of size (target_embeddings_batch_size, ..., dim) containing target word + embeddings related to a concept group. For example, if the concept is gender, + target_embeddings1 could contain embeddings for linguistically masculine words, e.g. + "man", "king", "brother", etc. Represented as X. + + target_embeddings2 : `torch.Tensor`, required. + A tensor of the same size as target_embeddings1 containing target word + embeddings related to a different group for the same concept. For example, + target_embeddings2 could contain embeddings for linguistically feminine words, e.g. + "woman", "queen", "sister", etc. Represented as Y. + + attribute_embeddings : `torch.Tensor`, required. + A tensor of size (attribute_embeddings_batch_size, ..., dim) containing attribute word + embeddings related to a concept associated with target_embeddings1 and target_embeddings2. + For example, if the concept is professions, attribute_embeddings could contain embeddings for + "doctor", "banker", "engineer", etc. Represented as AB. + + # Returns + + ect_score : `torch.FloatTensor` + The Spearman Coefficient measuring the similarity of lists of attribute embeddings sorted + based on their similarity to the target embeddings. Ranges from [-1, 1], with values closer + to 1 indicating less biased associations. + + """ + # Some sanity checks + if target_embeddings1.ndim < 2 or target_embeddings2.ndim < 2: + raise ConfigurationError( + "target_embeddings1 and target_embeddings2 must have at least two dimensions." + ) + if attribute_embeddings.ndim < 2: + raise ConfigurationError("attribute_embeddings must have at least two dimensions.") + if target_embeddings1.size() != target_embeddings2.size(): + raise ConfigurationError( + "target_embeddings1 and target_embeddings2 must be of the same size." + ) + if attribute_embeddings.size(dim=-1) != target_embeddings1.size(dim=-1): + raise ConfigurationError("All embeddings must have the same dimensionality.") + + mean_target_embedding1 = target_embeddings1.flatten(end_dim=-2).mean(dim=0) + mean_target_embedding2 = target_embeddings2.flatten(end_dim=-2).mean(dim=0) + attribute_embeddings = attribute_embeddings.flatten(end_dim=-2) + + # Normalize + mean_target_embedding1 = torch.nn.functional.normalize(mean_target_embedding1, p=2, dim=-1) + mean_target_embedding2 = torch.nn.functional.normalize(mean_target_embedding2, p=2, dim=-1) + attribute_embeddings = torch.nn.functional.normalize(attribute_embeddings, p=2, dim=-1) + + # Compute cosine similarities + AB_sim_m = torch.matmul(attribute_embeddings, mean_target_embedding1) + AB_sim_f = torch.matmul(attribute_embeddings, mean_target_embedding2) + + return self.spearman_correlation(AB_sim_m, AB_sim_f) + + def _get_ranks(self, x: torch.Tensor) -> torch.Tensor: + tmp = x.argsort() + ranks = torch.zeros_like(tmp) + ranks[tmp] = torch.arange(len(x)) + return ranks + + def spearman_correlation(self, x: torch.Tensor, y: torch.Tensor): + x_rank = self._get_ranks(x) + y_rank = self._get_ranks(y) + + n = x.size(0) + upper = 6 * torch.sum((x_rank - y_rank).pow(2)) + down = n * (n ** 2 - 1.0) + return 1.0 - (upper / down) class NaturalLanguageInference: - pass + """ + Natural Language Inference (NLI) score measures the effect biased + associations have on decisions made in downstream tasks by predicting, + given neutrally-constructed pairs of sentences differing only in + the subject, if the second sentence is entailed by, contradicted by, or + neutral with respect to the first sentence. + + Based on: Dev, S., Li, T., Phillips, J.M., & Srikumar, V. (2020). [On Measuring and Mitigating + Biased Inferences of Word Embeddings](https://api.semanticscholar.org/CorpusID:201670701). + ArXiv, abs/1908.09369. + """ + + def __call__( + self, entailment_predictions: torch.Tensor, neutral_label: int = 0 + ) -> torch.FloatTensor: + """ + + # Parameters + + !!! Note + In the examples below, we treat gender identity as binary, which does not accurately + characterize gender in real life. + + entailment_predictions : `torch.Tensor`, required. + A tensor of size (batch_size, ..., dim) containing discrete integer entailment predictions for + neutrally-constructed pairs of sentences differing only in the subject. For example, + if the concept is gender, entailment_predictions could contain the entailment predictions + of: + + - "The driver owns a cabinet." -> "The man owns a cabinet." + + - "The driver owns a cabinet." -> "The woman owns a cabinet." + + - "The doctor eats an apple." -> "The man eats an apple." + + - "The doctor eats an apple." -> "The woman eats an apple." + + neutral_label : `int`, optional (default=`0`) + The discrete integer label corresponding to a neutral entailment prediction. + + # Returns + + nli_score : `torch.FloatTensor` + The percentage of sentence pairs predicted as neutral. A percentage + closer to 1 suggests lower bias, as bias will result in a higher + probability of entailment or contradiction. + """ + return (entailment_predictions == neutral_label).float().mean() diff --git a/tests/fairness/bias_metrics_test.py b/tests/fairness/bias_metrics_test.py index 2cfbf270c94..16de51b3b6b 100644 --- a/tests/fairness/bias_metrics_test.py +++ b/tests/fairness/bias_metrics_test.py @@ -70,10 +70,60 @@ def test_weat(self, device: str): class EmbeddingCoherenceTestTest(AllenNlpTestCase): + def setup_method(self): + # embedding data from VERB demo + emb_filename = str(self.FIXTURES_ROOT / "fairness" / "bias_embeddings.json") + with open(emb_filename) as emb_file: + emb_data = json.load(emb_file) + + self.X = torch.cat( + [ + torch.Tensor(emb_data["he"]).reshape(1, -1), + torch.Tensor(emb_data["him"]).reshape(1, -1), + ] + ) + self.Y = torch.cat( + [ + torch.Tensor(emb_data["she"]).reshape(1, -1), + torch.Tensor(emb_data["her"]).reshape(1, -1), + ] + ) + self.AB = torch.cat( + [ + torch.Tensor(emb_data["engineer"]).reshape(1, -1), + torch.Tensor(emb_data["banker"]).reshape(1, -1), + torch.Tensor(emb_data["nurse"]).reshape(1, -1), + torch.Tensor(emb_data["receptionist"]).reshape(1, -1), + ] + ) + + def teardown_method(self): + pass + def test_invalid_dims(self): - EmbeddingCoherenceTest() + ect = EmbeddingCoherenceTest() + with pytest.raises(ConfigurationError): + ect(torch.zeros(2), torch.zeros(2), torch.zeros(2)) + with pytest.raises(ConfigurationError): + ect(torch.zeros((2, 2)), torch.zeros((2, 2)), torch.zeros(2)) + with pytest.raises(ConfigurationError): + ect(torch.zeros((2, 2)), torch.zeros((2, 3)), torch.zeros((2, 2))) + with pytest.raises(ConfigurationError): + ect(torch.zeros((2, 2)), torch.zeros((2, 2)), torch.zeros((2, 3))) + + @multi_device + def test_ect(self, device: str): + self.X = self.X.to(device) + self.Y = self.Y.to(device) + self.AB = self.AB.to(device) + + ect = EmbeddingCoherenceTest() + test_ect_score = ect(self.X, self.Y, self.AB) + assert test_ect_score.item() == pytest.approx(0.800, rel=1e-4) class NaturalLanguageInferenceTest(AllenNlpTestCase): - def test_invalid_dims(self): - NaturalLanguageInference() + @multi_device + def test_nli(self, device: str): + entailment_predictions = torch.eye(3, device=device).long() + assert NaturalLanguageInference()(entailment_predictions, neutral_label=1) == 1 / 3 From 596cba0d56d9aefa17771f24b2ab91b3670e6118 Mon Sep 17 00:00:00 2001 From: Arjun Subramonian Date: Wed, 21 Apr 2021 12:24:38 -0700 Subject: [PATCH 3/7] updated CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c1b3a6d6dcf..2c98fb79500 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added a `quiet` parameter to the `MultiProcessDataLoading` that disables `Tqdm` progress bars. - The test for distributed metrics now takes a parameter specifying how often you want to run it. - Created the fairness module and added four fairness metrics: `Independence`, `Separation`, `Sufficiency`, and `DemographicParityWithoutGroundTruth`. +- Added three bias metrics to the fairness module: `WordEmbeddingAssociationTest`, `EmbeddingCoherenceTest`, and `NaturalLanguageInference`. ### Changed From 8d57b56457d1131d08242f4957c591db0f3cc051 Mon Sep 17 00:00:00 2001 From: Arjun Subramonian Date: Wed, 21 Apr 2021 12:47:26 -0700 Subject: [PATCH 4/7] fixed gpu issu --- allennlp/fairness/bias_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/allennlp/fairness/bias_metrics.py b/allennlp/fairness/bias_metrics.py index c62b4be3e3e..9a70ea864b1 100644 --- a/allennlp/fairness/bias_metrics.py +++ b/allennlp/fairness/bias_metrics.py @@ -219,7 +219,7 @@ def __call__( def _get_ranks(self, x: torch.Tensor) -> torch.Tensor: tmp = x.argsort() ranks = torch.zeros_like(tmp) - ranks[tmp] = torch.arange(len(x)) + ranks[tmp] = torch.arange(x.size(0), device=ranks) return ranks def spearman_correlation(self, x: torch.Tensor, y: torch.Tensor): From 95a2409f68426d3eaad2830c4b2585a7067e597c Mon Sep 17 00:00:00 2001 From: Arjun Subramonian Date: Wed, 21 Apr 2021 13:05:32 -0700 Subject: [PATCH 5/7] fixed gpu issue --- allennlp/fairness/bias_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/allennlp/fairness/bias_metrics.py b/allennlp/fairness/bias_metrics.py index 9a70ea864b1..d9e937300fe 100644 --- a/allennlp/fairness/bias_metrics.py +++ b/allennlp/fairness/bias_metrics.py @@ -219,7 +219,7 @@ def __call__( def _get_ranks(self, x: torch.Tensor) -> torch.Tensor: tmp = x.argsort() ranks = torch.zeros_like(tmp) - ranks[tmp] = torch.arange(x.size(0), device=ranks) + ranks[tmp] = torch.arange(x.size(0), device=ranks.device) return ranks def spearman_correlation(self, x: torch.Tensor, y: torch.Tensor): From d49ab68b421eb87d0620f3e6355ee2a3f3ef21d1 Mon Sep 17 00:00:00 2001 From: Arjun Subramonian Date: Wed, 12 May 2021 17:32:49 -0700 Subject: [PATCH 6/7] expanded NLI to include more NLI scores and work in batched and distributed settings --- allennlp/fairness/bias_metrics.py | 123 ++++++-- allennlp/fairness/evaluate_bias_mitigation.py | 298 ++++++++++++++++++ tests/fairness/bias_metrics_test.py | 41 ++- 3 files changed, 439 insertions(+), 23 deletions(-) create mode 100644 allennlp/fairness/evaluate_bias_mitigation.py diff --git a/allennlp/fairness/bias_metrics.py b/allennlp/fairness/bias_metrics.py index e2cc3e9efd4..e7be2763c1c 100644 --- a/allennlp/fairness/bias_metrics.py +++ b/allennlp/fairness/bias_metrics.py @@ -27,7 +27,7 @@ """ -from typing import Optional, Dict, Union +from typing import Optional, Dict, Union, List from overrides import overrides import torch @@ -35,6 +35,7 @@ from allennlp.common.util import is_distributed from allennlp.common.checks import ConfigurationError +from allennlp.nn.util import dist_reduce_sum from allennlp.training.metrics.metric import Metric @@ -243,22 +244,41 @@ def spearman_correlation(self, x: torch.Tensor, y: torch.Tensor): return 1.0 - (upper / down) -class NaturalLanguageInference: +@Metric.register("nli") +class NaturalLanguageInference(Metric): """ - Natural Language Inference (NLI) score measures the effect biased - associations have on decisions made in downstream tasks by predicting, - given neutrally-constructed pairs of sentences differing only in - the subject, if the second sentence is entailed by, contradicted by, or - neutral with respect to the first sentence. + Natural language inference scores measure the effect biased associations have on decisions + made downstream, given neutrally-constructed pairs of sentences differing only in the subject. + + 1. Net Neutral (NN): The average probability of the neutral label + across all sentence pairs. + + 2. Fraction Neutral (FN): The fraction of sentence pairs predicted neutral. + + 3. Threshold:tau (T:tau): A parameterized measure that reports the fraction + of examples whose probability of neutral is above tau. + + neutral_label : `int`, optional (default=`2`) + The discrete integer label corresponding to a neutral entailment prediction. + taus : `List[float]`, optional (default=`[0.5, 0.7]`) + All the taus for which to compute Threshold:tau. Based on: Dev, S., Li, T., Phillips, J.M., & Srikumar, V. (2020). [On Measuring and Mitigating Biased Inferences of Word Embeddings](https://api.semanticscholar.org/CorpusID:201670701). ArXiv, abs/1908.09369. """ - def __call__( - self, entailment_predictions: torch.Tensor, neutral_label: int = 2 - ) -> torch.FloatTensor: + def __init__(self, neutral_label: int = 2, taus: List[float] = [0.5, 0.7]): + self.neutral_label = neutral_label + self.taus = taus + + self._nli_probs_sum = 0.0 + self._num_neutral_predictions = 0.0 + self._num_neutral_above_taus = {tau: 0.0 for tau in taus} + self._total_predictions = 0 + + @overrides + def __call__(self, nli_probabilities: torch.Tensor) -> None: """ # Parameters @@ -267,11 +287,11 @@ def __call__( In the examples below, we treat gender identity as binary, which does not accurately characterize gender in real life. - entailment_predictions : `torch.Tensor`, required. - A tensor of size (batch_size, ..., dim) containing discrete integer entailment predictions for - neutrally-constructed pairs of sentences differing only in the subject. For example, - if the concept is gender, entailment_predictions could contain the entailment predictions - of: + nli_probabilities : `torch.Tensor`, required. + A tensor of size (batch_size, ..., 3) containing natural language inference + (i.e. entailment, contradiction, and neutral) probabilities for neutrally-constructed + pairs of sentences differing only in the subject. For example, if the concept is gender, + nli_probabilities could contain the natural language inference probabilities of: - "The driver owns a cabinet." -> "The man owns a cabinet." @@ -280,18 +300,79 @@ def __call__( - "The doctor eats an apple." -> "The man eats an apple." - "The doctor eats an apple." -> "The woman eats an apple." + """ + nli_probabilities = nli_probabilities.detach() + + # Some sanity checks + if nli_probabilities.dim() < 2: + raise ConfigurationError( + "nli_probabilities must have at least two dimensions but " + "found tensor of shape: {}".format(nli_probabilities.size()) + ) + if nli_probabilities.size(-1) != 3: + raise ConfigurationError( + "Last dimension of nli_probabilities must have dimensionality of 3 but " + "found tensor of shape: {}".format(nli_probabilities.size()) + ) - neutral_label : `int`, optional (default=`2`) - The discrete integer label corresponding to a neutral entailment prediction. + _nli_neutral_probs = nli_probabilities[..., self.neutral_label] + self._nli_probs_sum += dist_reduce_sum(_nli_neutral_probs.sum().item()) + self._num_neutral_predictions += dist_reduce_sum( + (nli_probabilities.argmax(dim=-1) == self.neutral_label).float().sum().item() + ) + for tau in self.taus: + self._num_neutral_above_taus[tau] += dist_reduce_sum( + (_nli_neutral_probs > tau).float().sum().item() + ) + self._total_predictions += dist_reduce_sum(_nli_neutral_probs.numel()) + + def get_metric(self, reset: bool = False): + """ # Returns - nli_score : `torch.FloatTensor` - The percentage of sentence pairs predicted as neutral. A percentage - closer to 1 suggests lower bias, as bias will result in a higher + nli_scores : `Dict[str, float]` + Contains the following keys: + + 1. "`net_neutral`" : The average probability of the neutral label across + all sentence pairs. A value closer to 1 suggests lower bias, as bias will result in a higher + probability of entailment or contradiction. + + 2. "`fraction_neutral`" : The fraction of sentence pairs predicted neutral. + A value closer to 1 suggests lower bias, as bias will result in a higher probability of entailment or contradiction. + + 3. "`threshold_{taus}`" : For each tau, the fraction of examples whose probability of + neutral is above tau. For each tau, a value closer to 1 suggests lower bias, as bias + will result in a higher probability of entailment or contradiction. + """ - return (entailment_predictions == neutral_label).float().mean() + if self._total_predictions == 0: + nli_scores = { + "net_neutral": 0.0, + "fraction_neutral": 0.0, + **{"threshold_{}".format(tau): 0.0 for tau in self.taus}, + } + else: + nli_scores = { + "net_neutral": self._nli_probs_sum / self._total_predictions, + "fraction_neutral": self._num_neutral_predictions / self._total_predictions, + **{ + "threshold_{}".format(tau): self._num_neutral_above_taus[tau] + / self._total_predictions + for tau in self.taus + }, + } + if reset: + self.reset() + return nli_scores + + @overrides + def reset(self): + self._nli_probs_sum = 0.0 + self._num_neutral_predictions = 0.0 + self._num_neutral_above_taus = {tau: 0.0 for tau in self.taus} + self._total_predictions = 0 @Metric.register("association_without_ground_truth") diff --git a/allennlp/fairness/evaluate_bias_mitigation.py b/allennlp/fairness/evaluate_bias_mitigation.py new file mode 100644 index 00000000000..e58f7345766 --- /dev/null +++ b/allennlp/fairness/evaluate_bias_mitigation.py @@ -0,0 +1,298 @@ +""" +The `evaluate_bias_mitigation` subcommand can be used to +compare a bias-mitigated trained model with a baseline +against an SNLI dataset following the format in [On Measuring +and Mitigating Biased Inferences of Word Embeddings] +(https://arxiv.org/pdf/1908.09369.pdf) and reports the +Net Neutral, Fraction Neutral, and Threshold:tau metrics. +""" + +import argparse +import json +import logging +from typing import Any, Dict, Tuple +from overrides import overrides +import tempfile +import torch + +from allennlp.commands.subcommand import Subcommand +from allennlp.common import logging as common_logging +from allennlp.common.util import prepare_environment +from allennlp.data import DataLoader +from allennlp.models.archival import load_archive +from allennlp.training.util import evaluate + +logger = logging.getLogger(__name__) + + +@Subcommand.register("evaluate-bias-mitigation") +class EvaluateBiasMitigation(Subcommand): + @overrides + def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.ArgumentParser: + description = """Evaluate bias mitigation""" + subparser = parser.add_parser( + self.name, description=description, help="Evaluate bias mitigation." + ) + + subparser.add_argument( + "bias_mitigated_archive_file", + type=str, + help="path to a bias-mitigated archived trained model", + ) + + subparser.add_argument( + "baseline_archive_file", type=str, help="path to a baseline archived trained model" + ) + + subparser.add_argument( + "input_file", type=str, help="path to the file containing the SNLI evaluation data" + ) + + subparser.add_argument( + "--bias-mitigated-output-file", + type=str, + help="optional path to write the metrics to as JSON", + ) + + subparser.add_argument( + "--baseline-output-file", + type=str, + help="optional path to write the metrics to as JSON", + ) + + subparser.add_argument( + "--predictions-diff-output-file", + type=str, + help="optional path to write diff of bias-mitigated and baseline predictions to as JSON lines", + ) + + subparser.add_argument( + "--taus", + type=float, + nargs="+", + default=[0.5, 0.7], + help="tau parameters for Threshold metric", + ) + + cuda_device = subparser.add_mutually_exclusive_group(required=False) + cuda_device.add_argument( + "--cuda-device", type=int, default=-1, help="id of GPU to use (if any)" + ) + + subparser.add_argument( + "--bias-mitigation-overrides", + type=str, + default="", + help=( + "a json(net) structure used to override the bias mitigation experiment configuration, e.g., " + "'{\"iterator.batch_size\": 16}'. Nested parameters can be specified either" + " with nested dictionaries or with dot syntax." + ), + ) + + subparser.add_argument( + "--baseline-overrides", + type=str, + default="", + help=( + "a json(net) structure used to override the baseline experiment configuration, e.g., " + "'{\"iterator.batch_size\": 16}'. Nested parameters can be specified either" + " with nested dictionaries or with dot syntax." + ), + ) + + subparser.add_argument( + "--batch-size", type=int, help="If non-empty, the batch size to use during evaluation." + ) + + subparser.add_argument( + "--file-friendly-logging", + action="store_true", + default=False, + help="outputs tqdm status on separate lines and slows tqdm refresh rate", + ) + + subparser.set_defaults(func=evaluate_from_args) + + return subparser + + +def _add(accumulator_dict, append_dict): + """ + Adds list pairs in append_dict to accumulator_dict, + concatenating list values for duplicate keys. + """ + for k, v in append_dict.items(): + if isinstance(v, list): + if k not in accumulator_dict: + accumulator_dict[k] = [] + accumulator_dict[k] += v + + +def compute_metrics(probs, model, taus): + """ + Computes the following metrics: + + 1. Net Neutral (NN): The average probability of the neutral label + across all sentence pairs. + + 2. Fraction Neutral (FN): The fraction of sentence pairs predicted neutral. + + 3. Threshold:tau (T:tau): A parameterized measure that reports the fraction + of examples whose probability of neutral is above tau. + + """ + metrics = {} + neutral_label = model.vocab.get_token_index("neutral", "labels") + + metrics["net_neutral"] = probs[..., neutral_label].mean().item() + metrics["fraction_neutral"] = (probs.argmax(dim=-1) == neutral_label).float().mean().item() + for tau in taus: + metrics["threshold_{}".format(tau)] = ( + (probs[..., neutral_label] > tau).float().mean().item() + ) + + return metrics + + +def compute_predictions_diff(bias_mitigated_labels, baseline_labels, tokens, baseline_tokenizer): + """ + Returns label changes induced by bias mitigation and the corresponding sentence pairs. + """ + diff = [] + for idx, label in enumerate(bias_mitigated_labels): + if label != baseline_labels[idx]: + diff.append( + { + "sentence_pair": baseline_tokenizer.convert_tokens_to_string(tokens[idx]), + "bias_mitigated_label": label, + "baseline_label": baseline_labels[idx], + } + ) + return diff + + +# TODO: allow bias mitigation and baseline evaluations to run simultaneously on +# two different GPUs +def evaluate_from_args(args: argparse.Namespace) -> Tuple[Dict[str, Any], Dict[str, Any]]: + common_logging.FILE_FRIENDLY_LOGGING = args.file_friendly_logging + + # Disable some of the more verbose logging statements + logging.getLogger("allennlp.common.params").disabled = True + logging.getLogger("allennlp.nn.initializers").disabled = True + logging.getLogger("allennlp.modules.token_embedders.embedding").setLevel(logging.INFO) + + # Load from bias-mitigated archive + bias_mitigated_archive = load_archive( + args.bias_mitigated_archive_file, + cuda_device=args.cuda_device, + overrides=args.bias_mitigation_overrides, + ) + bias_mitigated_config = bias_mitigated_archive.config + prepare_environment(bias_mitigated_config) + bias_mitigated_model = bias_mitigated_archive.model + bias_mitigated_model.eval() + + # Load from baseline archive + baseline_archive = load_archive( + args.baseline_archive_file, cuda_device=args.cuda_device, overrides=args.baseline_overrides + ) + baseline_config = baseline_archive.config + prepare_environment(baseline_config) + baseline_model = baseline_archive.model + baseline_model.eval() + + # Load the evaluation data + + bias_mitigated_dataset_reader = bias_mitigated_archive.validation_dataset_reader + baseline_dataset_reader = baseline_archive.validation_dataset_reader + + evaluation_data_path = args.input_file + logger.info("Reading evaluation data from %s", evaluation_data_path) + + bias_mitigated_data_loader_params = bias_mitigated_config.pop("validation_data_loader", None) + if bias_mitigated_data_loader_params is None: + bias_mitigated_data_loader_params = bias_mitigated_config.pop("data_loader") + if args.batch_size: + bias_mitigated_data_loader_params["batch_size"] = args.batch_size + bias_mitigated_data_loader = DataLoader.from_params( + params=bias_mitigated_data_loader_params, + reader=bias_mitigated_dataset_reader, + data_path=evaluation_data_path, + ) + bias_mitigated_data_loader.index_with(bias_mitigated_model.vocab) + + baseline_data_loader_params = baseline_config.pop("validation_data_loader", None) + if baseline_data_loader_params is None: + baseline_data_loader_params = baseline_config.pop("data_loader") + if args.batch_size: + baseline_data_loader_params["batch_size"] = args.batch_size + baseline_data_loader = DataLoader.from_params( + params=baseline_data_loader_params, + reader=baseline_dataset_reader, + data_path=evaluation_data_path, + ) + baseline_data_loader.index_with(baseline_model.vocab) + + bias_mitigated_file, bias_mitigated_filename = tempfile.mkstemp() + bias_mitigated_output_metrics = evaluate( + bias_mitigated_model, + bias_mitigated_data_loader, + args.cuda_device, + predictions_output_file=bias_mitigated_filename, + ) + + bias_mitigated_predictions: Dict[str, Any] = {} + with open(bias_mitigated_file, "r") as fd: + for line in fd: + _add(bias_mitigated_predictions, json.loads(line)) + + probs = torch.tensor(bias_mitigated_predictions["probs"], device=args.cuda_device) + bias_mitigated_metrics = compute_metrics(probs, bias_mitigated_model, args.taus) + metrics_json = json.dumps({**bias_mitigated_output_metrics, **bias_mitigated_metrics}, indent=2) + if args.bias_mitigated_output_file: + # write all metrics to output file + # don't use dump_metrics() because want to log regardless + with open(args.bias_mitigated_output_file, "w") as fd: + fd.write(metrics_json) + logger.info("Metrics: %s", metrics_json) + + baseline_file, baseline_filename = tempfile.mkstemp() + baseline_output_metrics = evaluate( + baseline_model, + baseline_data_loader, + args.cuda_device, + predictions_output_file=baseline_filename, + ) + + baseline_predictions: Dict[str, Any] = {} + with open(baseline_file, "r") as fd: + for line in fd: + _add(baseline_predictions, json.loads(line)) + + probs = torch.tensor(baseline_predictions["probs"], device=args.cuda_device) + baseline_metrics = compute_metrics(probs, baseline_model, args.taus) + metrics_json = json.dumps({**baseline_output_metrics, **baseline_metrics}, indent=2) + if args.baseline_output_file: + # write all metrics to output file + # don't use dump_metrics() because want to log regardless + with open(args.baseline_output_file, "w") as fd: + fd.write(metrics_json) + logger.info("Metrics: %s", metrics_json) + + if hasattr(baseline_dataset_reader, "_tokenizer"): + diff = compute_predictions_diff( + bias_mitigated_predictions["label"], + baseline_predictions["label"], + baseline_predictions["tokens"], + baseline_dataset_reader._tokenizer.tokenizer, # type: ignore + ) + diff_json = json.dumps(diff, indent=2) + if args.predictions_diff_output_file: + with open(args.predictions_diff_output_file, "w") as fd: + fd.write(diff_json) + logger.info("Predictions diff: %s", diff_json) + + logger.info("Finished evaluating.") + + return bias_mitigated_metrics, baseline_metrics diff --git a/tests/fairness/bias_metrics_test.py b/tests/fairness/bias_metrics_test.py index 11f1e5ce454..ba9675946e8 100644 --- a/tests/fairness/bias_metrics_test.py +++ b/tests/fairness/bias_metrics_test.py @@ -131,10 +131,47 @@ def test_ect(self, device: str): class NaturalLanguageInferenceTest(AllenNlpTestCase): + def test_invalid_dimensions(self): + nli_probabilities = torch.ones(3) + with pytest.raises(ConfigurationError): + NaturalLanguageInference(0)(nli_probabilities) + + nli_probabilities = torch.eye(4) + with pytest.raises(ConfigurationError): + NaturalLanguageInference(0)(nli_probabilities) + @multi_device def test_nli(self, device: str): - entailment_predictions = torch.eye(3, device=device).long() - assert NaturalLanguageInference()(entailment_predictions, neutral_label=1) == 1 / 3 + nli_probabilities = 0.6 * torch.eye(3, device=device) + nli = NaturalLanguageInference(0) + nli(nli_probabilities) + + expected_scores = { + "net_neutral": 0.6 / 3, + "fraction_neutral": 1 / 3, + "threshold_0.5": 1 / 3, + "threshold_0.7": 0.0, + } + assert nli.get_metric(reset=True) == pytest.approx(expected_scores) + assert all([v == 0.0 for k, v in nli.get_metric().items()]) + + def test_distributed_nli(self): + nli_probabilities = 0.6 * torch.eye(3) + expected_scores = { + "net_neutral": 0.6 / 3, + "fraction_neutral": 1 / 3, + "threshold_0.5": 1 / 3, + "threshold_0.7": 0.0, + } + metric_kwargs = {"nli_probabilities": [nli_probabilities, nli_probabilities]} + run_distributed_test( + [-1, -1], + global_distributed_metric, + NaturalLanguageInference(0), + metric_kwargs, + expected_scores, + exact=False, + ) class AssociationWithoutGroundTruthTest(AllenNlpTestCase): From 2d8392778e3a7ac96fdd3f51d4792683e16282d0 Mon Sep 17 00:00:00 2001 From: Arjun Subramonian Date: Wed, 12 May 2021 17:52:05 -0700 Subject: [PATCH 7/7] removed evaluate bias mitigation command from this PR --- allennlp/fairness/evaluate_bias_mitigation.py | 298 ------------------ 1 file changed, 298 deletions(-) delete mode 100644 allennlp/fairness/evaluate_bias_mitigation.py diff --git a/allennlp/fairness/evaluate_bias_mitigation.py b/allennlp/fairness/evaluate_bias_mitigation.py deleted file mode 100644 index e58f7345766..00000000000 --- a/allennlp/fairness/evaluate_bias_mitigation.py +++ /dev/null @@ -1,298 +0,0 @@ -""" -The `evaluate_bias_mitigation` subcommand can be used to -compare a bias-mitigated trained model with a baseline -against an SNLI dataset following the format in [On Measuring -and Mitigating Biased Inferences of Word Embeddings] -(https://arxiv.org/pdf/1908.09369.pdf) and reports the -Net Neutral, Fraction Neutral, and Threshold:tau metrics. -""" - -import argparse -import json -import logging -from typing import Any, Dict, Tuple -from overrides import overrides -import tempfile -import torch - -from allennlp.commands.subcommand import Subcommand -from allennlp.common import logging as common_logging -from allennlp.common.util import prepare_environment -from allennlp.data import DataLoader -from allennlp.models.archival import load_archive -from allennlp.training.util import evaluate - -logger = logging.getLogger(__name__) - - -@Subcommand.register("evaluate-bias-mitigation") -class EvaluateBiasMitigation(Subcommand): - @overrides - def add_subparser(self, parser: argparse._SubParsersAction) -> argparse.ArgumentParser: - description = """Evaluate bias mitigation""" - subparser = parser.add_parser( - self.name, description=description, help="Evaluate bias mitigation." - ) - - subparser.add_argument( - "bias_mitigated_archive_file", - type=str, - help="path to a bias-mitigated archived trained model", - ) - - subparser.add_argument( - "baseline_archive_file", type=str, help="path to a baseline archived trained model" - ) - - subparser.add_argument( - "input_file", type=str, help="path to the file containing the SNLI evaluation data" - ) - - subparser.add_argument( - "--bias-mitigated-output-file", - type=str, - help="optional path to write the metrics to as JSON", - ) - - subparser.add_argument( - "--baseline-output-file", - type=str, - help="optional path to write the metrics to as JSON", - ) - - subparser.add_argument( - "--predictions-diff-output-file", - type=str, - help="optional path to write diff of bias-mitigated and baseline predictions to as JSON lines", - ) - - subparser.add_argument( - "--taus", - type=float, - nargs="+", - default=[0.5, 0.7], - help="tau parameters for Threshold metric", - ) - - cuda_device = subparser.add_mutually_exclusive_group(required=False) - cuda_device.add_argument( - "--cuda-device", type=int, default=-1, help="id of GPU to use (if any)" - ) - - subparser.add_argument( - "--bias-mitigation-overrides", - type=str, - default="", - help=( - "a json(net) structure used to override the bias mitigation experiment configuration, e.g., " - "'{\"iterator.batch_size\": 16}'. Nested parameters can be specified either" - " with nested dictionaries or with dot syntax." - ), - ) - - subparser.add_argument( - "--baseline-overrides", - type=str, - default="", - help=( - "a json(net) structure used to override the baseline experiment configuration, e.g., " - "'{\"iterator.batch_size\": 16}'. Nested parameters can be specified either" - " with nested dictionaries or with dot syntax." - ), - ) - - subparser.add_argument( - "--batch-size", type=int, help="If non-empty, the batch size to use during evaluation." - ) - - subparser.add_argument( - "--file-friendly-logging", - action="store_true", - default=False, - help="outputs tqdm status on separate lines and slows tqdm refresh rate", - ) - - subparser.set_defaults(func=evaluate_from_args) - - return subparser - - -def _add(accumulator_dict, append_dict): - """ - Adds list pairs in append_dict to accumulator_dict, - concatenating list values for duplicate keys. - """ - for k, v in append_dict.items(): - if isinstance(v, list): - if k not in accumulator_dict: - accumulator_dict[k] = [] - accumulator_dict[k] += v - - -def compute_metrics(probs, model, taus): - """ - Computes the following metrics: - - 1. Net Neutral (NN): The average probability of the neutral label - across all sentence pairs. - - 2. Fraction Neutral (FN): The fraction of sentence pairs predicted neutral. - - 3. Threshold:tau (T:tau): A parameterized measure that reports the fraction - of examples whose probability of neutral is above tau. - - """ - metrics = {} - neutral_label = model.vocab.get_token_index("neutral", "labels") - - metrics["net_neutral"] = probs[..., neutral_label].mean().item() - metrics["fraction_neutral"] = (probs.argmax(dim=-1) == neutral_label).float().mean().item() - for tau in taus: - metrics["threshold_{}".format(tau)] = ( - (probs[..., neutral_label] > tau).float().mean().item() - ) - - return metrics - - -def compute_predictions_diff(bias_mitigated_labels, baseline_labels, tokens, baseline_tokenizer): - """ - Returns label changes induced by bias mitigation and the corresponding sentence pairs. - """ - diff = [] - for idx, label in enumerate(bias_mitigated_labels): - if label != baseline_labels[idx]: - diff.append( - { - "sentence_pair": baseline_tokenizer.convert_tokens_to_string(tokens[idx]), - "bias_mitigated_label": label, - "baseline_label": baseline_labels[idx], - } - ) - return diff - - -# TODO: allow bias mitigation and baseline evaluations to run simultaneously on -# two different GPUs -def evaluate_from_args(args: argparse.Namespace) -> Tuple[Dict[str, Any], Dict[str, Any]]: - common_logging.FILE_FRIENDLY_LOGGING = args.file_friendly_logging - - # Disable some of the more verbose logging statements - logging.getLogger("allennlp.common.params").disabled = True - logging.getLogger("allennlp.nn.initializers").disabled = True - logging.getLogger("allennlp.modules.token_embedders.embedding").setLevel(logging.INFO) - - # Load from bias-mitigated archive - bias_mitigated_archive = load_archive( - args.bias_mitigated_archive_file, - cuda_device=args.cuda_device, - overrides=args.bias_mitigation_overrides, - ) - bias_mitigated_config = bias_mitigated_archive.config - prepare_environment(bias_mitigated_config) - bias_mitigated_model = bias_mitigated_archive.model - bias_mitigated_model.eval() - - # Load from baseline archive - baseline_archive = load_archive( - args.baseline_archive_file, cuda_device=args.cuda_device, overrides=args.baseline_overrides - ) - baseline_config = baseline_archive.config - prepare_environment(baseline_config) - baseline_model = baseline_archive.model - baseline_model.eval() - - # Load the evaluation data - - bias_mitigated_dataset_reader = bias_mitigated_archive.validation_dataset_reader - baseline_dataset_reader = baseline_archive.validation_dataset_reader - - evaluation_data_path = args.input_file - logger.info("Reading evaluation data from %s", evaluation_data_path) - - bias_mitigated_data_loader_params = bias_mitigated_config.pop("validation_data_loader", None) - if bias_mitigated_data_loader_params is None: - bias_mitigated_data_loader_params = bias_mitigated_config.pop("data_loader") - if args.batch_size: - bias_mitigated_data_loader_params["batch_size"] = args.batch_size - bias_mitigated_data_loader = DataLoader.from_params( - params=bias_mitigated_data_loader_params, - reader=bias_mitigated_dataset_reader, - data_path=evaluation_data_path, - ) - bias_mitigated_data_loader.index_with(bias_mitigated_model.vocab) - - baseline_data_loader_params = baseline_config.pop("validation_data_loader", None) - if baseline_data_loader_params is None: - baseline_data_loader_params = baseline_config.pop("data_loader") - if args.batch_size: - baseline_data_loader_params["batch_size"] = args.batch_size - baseline_data_loader = DataLoader.from_params( - params=baseline_data_loader_params, - reader=baseline_dataset_reader, - data_path=evaluation_data_path, - ) - baseline_data_loader.index_with(baseline_model.vocab) - - bias_mitigated_file, bias_mitigated_filename = tempfile.mkstemp() - bias_mitigated_output_metrics = evaluate( - bias_mitigated_model, - bias_mitigated_data_loader, - args.cuda_device, - predictions_output_file=bias_mitigated_filename, - ) - - bias_mitigated_predictions: Dict[str, Any] = {} - with open(bias_mitigated_file, "r") as fd: - for line in fd: - _add(bias_mitigated_predictions, json.loads(line)) - - probs = torch.tensor(bias_mitigated_predictions["probs"], device=args.cuda_device) - bias_mitigated_metrics = compute_metrics(probs, bias_mitigated_model, args.taus) - metrics_json = json.dumps({**bias_mitigated_output_metrics, **bias_mitigated_metrics}, indent=2) - if args.bias_mitigated_output_file: - # write all metrics to output file - # don't use dump_metrics() because want to log regardless - with open(args.bias_mitigated_output_file, "w") as fd: - fd.write(metrics_json) - logger.info("Metrics: %s", metrics_json) - - baseline_file, baseline_filename = tempfile.mkstemp() - baseline_output_metrics = evaluate( - baseline_model, - baseline_data_loader, - args.cuda_device, - predictions_output_file=baseline_filename, - ) - - baseline_predictions: Dict[str, Any] = {} - with open(baseline_file, "r") as fd: - for line in fd: - _add(baseline_predictions, json.loads(line)) - - probs = torch.tensor(baseline_predictions["probs"], device=args.cuda_device) - baseline_metrics = compute_metrics(probs, baseline_model, args.taus) - metrics_json = json.dumps({**baseline_output_metrics, **baseline_metrics}, indent=2) - if args.baseline_output_file: - # write all metrics to output file - # don't use dump_metrics() because want to log regardless - with open(args.baseline_output_file, "w") as fd: - fd.write(metrics_json) - logger.info("Metrics: %s", metrics_json) - - if hasattr(baseline_dataset_reader, "_tokenizer"): - diff = compute_predictions_diff( - bias_mitigated_predictions["label"], - baseline_predictions["label"], - baseline_predictions["tokens"], - baseline_dataset_reader._tokenizer.tokenizer, # type: ignore - ) - diff_json = json.dumps(diff, indent=2) - if args.predictions_diff_output_file: - with open(args.predictions_diff_output_file, "w") as fd: - fd.write(diff_json) - logger.info("Predictions diff: %s", diff_json) - - logger.info("Finished evaluating.") - - return bias_mitigated_metrics, baseline_metrics