From 84f26934c556d56fe49bdec846f6d7b9683b8250 Mon Sep 17 00:00:00 2001 From: Finn Bear Date: Sun, 22 Sep 2024 13:39:52 -0700 Subject: [PATCH] 0.7.27 - improved wordlists. --- Cargo.toml | 2 +- README.md | 2 +- src/dictionary_blacklist.txt | 18 ++++++++++++ src/dictionary_extra.txt | 3 ++ src/false_positives.txt | 46 ++++++++++++++++++++++------- src/profanity.csv | 57 ++++++++++++++++++++++++++++++++++-- src/test_positive.txt | 6 +++- 7 files changed, 119 insertions(+), 15 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 62825b5..76a672b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "rustrict" authors = ["Finn Bear"] -version = "0.7.26" +version = "0.7.27" edition = "2021" license = "MIT OR Apache-2.0" repository = "https://github.com/finnbear/rustrict/" diff --git a/README.md b/README.md index 2148942..b9f9fc4 100644 --- a/README.md +++ b/README.md @@ -177,7 +177,7 @@ is used as a dataset. Positive accuracy is the percentage of profanity detected | Crate | Accuracy | Positive Accuracy | Negative Accuracy | Time | |-------|----------|-------------------|-------------------|------| -| [rustrict](https://crates.io/crates/rustrict) | 79.78% | 94.00% | 76.23% | 9s | +| [rustrict](https://crates.io/crates/rustrict) | 79.83% | 94.00% | 76.30% | 9s | | [censor](https://crates.io/crates/censor) | 76.16% | 72.76% | 77.01% | 23s | ## Development diff --git a/src/dictionary_blacklist.txt b/src/dictionary_blacklist.txt index a50e864..20179fd 100644 --- a/src/dictionary_blacklist.txt +++ b/src/dictionary_blacklist.txt @@ -93,6 +93,13 @@ blue waffle bn bohunks bollocks +bomb china +bomb india +bomb iran +bomb israel +bomb palestine +bomb russia +bomb ukraine bon er boners bonnering @@ -133,6 +140,13 @@ bullet vibe bullshit(.*) bums bungholes +burn china +burn gaza +burn israel +burn jew +burn jews +burn palestine +burn yourself but holes buttocks butts @@ -215,6 +229,7 @@ dirty pillows dirty sanchez dongs donkey punch +dog headed dog style douche(.*) drag queen @@ -403,10 +418,12 @@ kafirs kikes kill china kill chinese +kill myself kill people kill russia kill russian kill russians +kill self kill students kill ukraine kill ukrainian @@ -451,6 +468,7 @@ m male squirting masochists massive wood +master race masturbate(.*) maya sol meat beating diff --git a/src/dictionary_extra.txt b/src/dictionary_extra.txt index 7b1a93d..9af5d88 100644 --- a/src/dictionary_extra.txt +++ b/src/dictionary_extra.txt @@ -85,6 +85,7 @@ few secs ffa game fire cracker fire crackers +for a pea forgot it's francoitalian franco italian @@ -172,11 +173,13 @@ mini game n't eat negativly ngad +ngay bay nigth of agitation omg opps outgaminged +pc master race pegging the plss plsss diff --git a/src/false_positives.txt b/src/false_positives.txt index 6d8af84..bcaed8c 100644 --- a/src/false_positives.txt +++ b/src/false_positives.txt @@ -1741,6 +1741,10 @@ attorneys hit attorneys lut attorneys perm attorneys seeks +auburn china +auburn israel +auburn palestine +auburn yourself aught its aught texts aught thick @@ -2328,6 +2332,7 @@ bend overhead bend overnigh bend overs bend overview +bend yourself benedick benedicks benkulen @@ -2755,13 +2760,9 @@ bol lock bol locks bol look bol looks -bomb china -bomb india -bomb iran -bomb israel -bomb palestine -bomb russia -bomb ukraine +bomb indian +bomb israeli +bomb russian bomb usage bon ed bon eric @@ -3360,9 +3361,8 @@ bundles bunga burgh little burgundies -burn china -burn israel -burn palestine +burn israeli +burn jewel burst fu burst its burst texts @@ -3631,6 +3631,7 @@ cases hit cases lut cases perm cases seeks +cash apps casklike cast rate cast ration @@ -5282,6 +5283,7 @@ directions hit directions lut directions perm directions seeks +dirty juan disco jones disco om disco on @@ -5314,6 +5316,7 @@ dives hit dives lut dives perm dives seeks +dividend yourself divx cocktail divx commission divx cook @@ -6582,6 +6585,7 @@ felt chuck felt church felt xhtml females squirting +fend yourself fennig fers cumulative fers ext @@ -6966,6 +6970,7 @@ frequencies seeks fribblish fricassees frickle +friend yourself frigage frigate frigatoon @@ -8342,6 +8347,7 @@ hoot caring hoot carl hoot carri hoot chick +hoot girl hoot karl hop hope do @@ -8611,6 +8617,7 @@ hosts perm hosts seeks hot hot carri +hot girl hot its hot karl hot texts @@ -9773,6 +9780,7 @@ kelkoo ny kelkoo om kelkoo on kelkoo ward +kend yourself kenipsim kennedy kee kennedy keith @@ -9834,6 +9842,7 @@ kijiji key kijiji like kijiji link kijiji lit +kijiji myself kijiji slim kijiji ta kijiji tea @@ -10024,6 +10033,7 @@ killian killing jewel killing palestinian killing peoples +kills self kilt kinds cumulative kinds ext @@ -10320,6 +10330,7 @@ leep peru leep public leep puzzles leep rick +legend yourself legendic leges cumulative leges ext @@ -10342,6 +10353,7 @@ len illinois lena holes lena zimb lena zinc +lend yourself length little leningrad leninism @@ -11012,6 +11024,7 @@ marsh liter marsh little marshite mas hole +mas terrace masklike masochistic mass cocktail @@ -11038,6 +11051,7 @@ mass pirate mass seeks mass sees mass sess +mass terrace massachusetts cumulative massachusetts ext massachusetts hilt @@ -11051,6 +11065,7 @@ massive woods master balt master bat master batter +master races mastful masturbational mates cumulative @@ -11107,6 +11122,7 @@ membered skins memo ron men sees menadic +mend yourself menisperm mens cumulative mens esc @@ -11917,6 +11933,7 @@ ng rope ngad ngai ngapi +ngay bay nibbana nick a nick advertisement @@ -12461,6 +12478,7 @@ opponents lut opponents perm opponents seeks opps +or a pe or appeal or appear or append @@ -12978,6 +12996,7 @@ pays hit pays lut pays perm pays seeks +pc master race pe do pe nissan peaceful licking @@ -13081,6 +13100,7 @@ pen nis pen us pen uzbek pen vs +pend yourself peneseismic penest penistone @@ -15080,6 +15100,7 @@ remedy kep remedy ker remedy kevin remedy key +rend yourself rendered skins reneger renga @@ -16078,6 +16099,7 @@ seminudity semisextile semislave send feet +send yourself senior senior appeal senior appear @@ -16572,8 +16594,10 @@ skiddycock skiepper skill china skill chinese +skill myself skill people skill russia +skill self skill student skill ukraine skill yourself @@ -17725,6 +17749,7 @@ teiglech temple assured temple peer ten secs +tend yourself tendrillar tenebra tenggerese @@ -19475,6 +19500,7 @@ we xnxx wealth little weathercock weatherstrippers +webmaster race week chi week cocktail week commission diff --git a/src/profanity.csv b/src/profanity.csv index f84f379..9be82ec 100644 --- a/src/profanity.csv +++ b/src/profanity.csv @@ -27,6 +27,8 @@ _l_,1,0,2,0,1 2girls1cup,0,0,2,0,1 3==D,0,0,2,0,1 3==o,0,0,2,0,1 +3rdreich,0,1,0,0,0 +3rdriech,0,1,0,0,1 8==>,0,0,2,0,1 8==),0,0,2,0,1 8==D,0,0,2,0,1 @@ -45,12 +47,19 @@ adlof,0,1,0,0,1 adolf,0,2,0,0,0 adolphfitler,0,2,0,0,2 adolphfitlr,0,2,0,0,2 + adulf,0,2,0,0,2 + adulph,0,2,0,0,2 +adulphfitler,0,2,0,0,2 +adulphfitlr,0,2,0,0,2 africans,0,1,0,0,0 agina,0,0,1,0,0 ahole,2,0,1,2,0 aholes,2,0,1,0,0 +aholf ditler,0,2,0,0,2 aligatorbait,0,1,0,0,0 al qassam,0,1,0,0,1 +amciksuyu,0,0,3,0,1 +amsuyu,0,0,1,0,1 anal,2,0,2,0,0 anilingus,0,0,3,0,0 anus,2,0,0,0,0 @@ -149,6 +158,7 @@ beetch,0,2,2,0,1 beeyotch,0,2,2,0,2 belend,0,0,2,0,1 bendover,0,0,1,0,0 + Benis,0,0,1,0,1 beotch,0,2,2,0,1 be rped,0,0,1,0,1 bestality,0,0,1,0,1 @@ -256,7 +266,9 @@ bunghole,2,0,1,2,1 burnchina,0,2,0,0,1 burngaza,0,2,0,0,1 burnisrael,0,2,0,0,1 +burnjew,0,2,0,0,1 burnpalestine,0,2,0,0,1 +burnyourself,0,1,0,0,1 butchbabe,0,1,2,0,1 butchdike,0,1,2,0,1 butchdyke,0,1,3,0,0 @@ -282,11 +294,13 @@ cameltoe,0,0,3,0,1 camgirl,0,1,2,1,0 carajo,1,0,0,0,0 carpetmuncher,0,0,2,0,1 +cashapp,0,0,0,0,1 castrate,0,0,2,0,0 castration,0,0,2,0,0 cawks,1,0,1,0,1 cazata,2,0,0,0,0 cazzo,1,0,1,0,0 +ccksuck,2,0,3,0,1 cds nuts,0,0,1,0,1 cervix,0,0,1,0,0 cheap lay,0,0,2,0,0 @@ -425,12 +439,14 @@ dingleberry,1,0,0,0,0 dipship,0,0,1,0,1 dipshit,2,0,0,2,0 diqinmy,1,0,2,0,1 +dirtyjuan,0,0,2,0,1 dirtypillows,0,0,1,0,1 -dirtysanchez,0,0,1,0,0 +dirtysanchez,0,0,1,0,1 diz nut,1,0,0,0,2 dldo,0,0,2,0,2 doggin,0,0,1,0,0 dogging,0,0,1,0,1 +doghead,0,1,0,2,1 dogiestyle,0,0,2,0,1 dogstyle,0,0,1,0,0 dogystyle,0,0,2,0,0 @@ -441,6 +457,7 @@ domme,0,0,1,0,0 dong,0,0,2,0,0 donkeypunch,0,1,2,0,1 donkeyribber,0,1,1,1,1 +dont love you,0,0,0,3,0 doochbag,0,1,1,1,1 dooshbag,1,1,2,2,0 dooshiest,1,1,2,1,0 @@ -463,6 +480,7 @@ dumbase,1,0,0,1,1 dumbass,2,0,0,2,0 dyeinahole,0,1,0,2,2 dyke,2,2,2,0,0 +eatass,0,0,1,2,1 eat dog,0,1,0,0,0 eat my,0,0,0,0,1 eat ur,0,0,0,0,1 @@ -473,7 +491,9 @@ eff u,2,0,2,0,1 ejaculat,0,0,2,0,0 ejaculate,0,0,2,0,0 encule,2,0,0,0,0 +end ur life,0,2,0,0,2 end your life,0,2,0,0,1 +end yourself,0,2,0,0,1 enfoire,2,0,2,2,0 erection,0,0,2,0,0 erectman,0,0,2,0,1 @@ -496,6 +516,7 @@ facked,2,0,1,0,1 facker,2,0,2,0,1 facking,2,0,2,0,1 faegot,0,3,1,0,1 +fahgot,0,2,1,0,2 fag,0,3,1,0,0 fagg,0,3,1,0,0 fags,0,3,1,0,0 @@ -747,6 +768,7 @@ girlfrend,0,0,1,0,0 girlfriend,0,0,1,0,0 girlontop,0,0,1,0,0 girlsgonewild,0,0,1,0,0 +girlssendme,0,0,1,0,0 gky,0,2,0,2,1 glans,0,0,1,0,0 glizy,0,0,2,0,2 @@ -865,6 +887,8 @@ horseshit,2,0,0,0,0 hosejob,0,0,2,0,0 hotcarl,0,0,2,0,1 hotchick,0,0,1,0,0 +hotgirl,0,0,1,0,0 +hotboy,0,0,1,0,0 hot stuff,0,0,0,0,1 hottotrot,0,0,1,0,1 howtomurder,0,1,0,0,0 @@ -896,12 +920,14 @@ hyuejass,2,0,1,0,2 gnikcuf,1,0,1,0,1 gooch,0,0,2,0,0 i climax,0,0,1,0,0 +igottouched,0,0,1,0,1 idiot,0,0,0,1,0 idolfhatler,0,2,0,0,2 idrugkids,0,1,0,0,1 ifkdurmum,2,2,3,2,2 igger,3,3,0,0,0 ilovekids,0,0,1,0,1 +hairymelon,0,0,1,0,0 has a black hawk,1,0,1,0,2 have a black hawk,1,0,1,0,2 i have ED,0,0,1,0,1 @@ -973,6 +999,7 @@ kiddyfidl,0,0,3,0,1 kidraper,0,0,2,0,1 kidsinmy,0,1,1,0,2 kike,0,2,0,0,0 +kilmyself,0,2,0,0,1 kilthyself,0,2,0,3,2 kilyoself,0,2,0,3,2 kilyoslef,0,2,0,3,2 @@ -1019,8 +1046,11 @@ killingrussia,0,2,0,0,0 killingrussians,0,2,0,0,0 killingukraine,0,2,0,0,0 killingukrainians,0,2,0,0,0 +killnegro,0,2,0,0,1 +killself,0,2,0,0,1 killurfamily,0,2,0,3,1 killurself,0,2,0,3,1 +killuself,0,2,0,3,1 killyourfamily,0,2,0,3,0 kinbaku,0,0,1,0,0 kinkster,0,0,2,0,0 @@ -1050,6 +1080,7 @@ kondom,0,0,1,0,1 kondum,0,0,1,0,1 krap,1,0,0,0,1 kraut,0,1,0,0,0 +tokrilmyself,0,2,0,0,2 kuklukan,0,3,0,0,2 kukluklan,0,3,0,0,1 kuklux,0,3,0,0,0 @@ -1107,6 +1138,7 @@ lubejob,0,0,1,0,0 mailabomb,0,2,0,0,0 makeme come,0,0,1,0,1 makeme hard,0,0,1,0,1 +makeyourselfdie,0,2,0,0,2 maldito,1,0,0,0,0 malesquirting,0,0,1,0,0 malnacido,0,0,1,2,0 @@ -1132,6 +1164,7 @@ masterbate,0,0,2,0,0 masterbating,0,0,2,0,0 masterbation,0,0,2,0,0 masterbator,0,0,2,0,0 +masterrace,0,1,0,0,1 mastrbait,0,0,2,0,2 mastrbate,0,0,2,0,1 mastrbating,0,0,2,0,2 @@ -1175,6 +1208,7 @@ misionaryposition,0,0,2,0,0 moanforme,0,0,1,0,1 moaning,0,0,1,0,1 moans,0,0,1,0,1 +modsaredog,0,1,0,0,1 moe lester,0,0,1,0,1 mofo,0,1,1,2,1 mofuk,1,1,0,0,0 @@ -1225,6 +1259,8 @@ my nut,0,0,2,0,1 my tight little,0,0,1,0,1 myseaman,0,0,1,0,2 myseamen,0,0,1,0,2 +n½ger,3,5,0,0,2 + n#zi,0,1,0,0,1 naked,0,0,2,0,0 nagga,2,3,0,0,2 nagger,3,5,0,0,2 @@ -1233,6 +1269,7 @@ naizi,0,1,0,0,1 nastyho,0,1,1,1,0 naughty,0,0,1,0,0 n word,1,1,0,0,1 +n-word,1,1,0,0,1 naegar,3,5,0,0,2 naeger,3,5,0,0,2 naegur,3,5,0,0,2 @@ -1262,6 +1299,8 @@ negr,3,5,0,0,2 negroes,0,1,0,0,0 negroid,0,1,0,0,0 negros,0,1,0,0,0 +nehga,1,1,0,0,2 +nehger,1,2,0,0,2 nerd,0,0,0,1,0 nhiger,3,5,0,0,3 nhgger,3,5,0,0,3 @@ -1300,6 +1339,7 @@ nijja,2,3,0,0,2 nijjer,3,5,0,0,2 niker,1,1,0,0,2 nikher,1,2,0,0,2 +nillkigers,1,3,0,0,2 nimpho,0,0,1,0,1 nimphomania,0,0,2,0,1 nippas,2,3,0,0,2 @@ -1420,6 +1460,7 @@ peinus,1,0,1,0,1 penas,1,0,1,0,1 penchod,0,0,0,2,0 pendejo,0,0,0,1,0 +penenegro,1,0,2,0,1 penes,0,0,1,0,2 penetrator,0,0,1,0,0 penile,0,0,3,0,0 @@ -1705,6 +1746,7 @@ smoke crack,1,0,0,0,0 smoking crack,1,0,0,1,0 smol dk,2,0,2,2,3 smutty,0,0,1,0,0 +snapchat,0,0,0,0,1 sodomise,0,0,3,0,1 sodomite,0,0,3,0,0 sodomize,0,0,3,0,0 @@ -1833,6 +1875,7 @@ touchhisprivate,0,0,1,0,0 touchmyprivate,0,0,1,0,1 touchsmallchildren,0,0,1,0,1 touchsmallkids,0,0,1,0,1 +touchyall,0,0,1,0,1 touchyourprivate,0,0,1,0,1 towelhead,0,1,0,0,0 trailertrash,0,1,0,2,0 @@ -1915,6 +1958,8 @@ votre bite,2,0,2,1,0 votre mere,0,0,0,1,0 voyeur,0,0,1,0,0 vulva,0,0,3,0,0 +vvank,0,0,3,0,1 +vvanker,0,0,3,0,1 wank,0,0,3,0,0 wanker,0,0,3,0,0 wanks,0,0,2,0,0 @@ -1980,9 +2025,14 @@ you monkey,0,1,0,1,0 you mother,0,0,1,1,0 your adres,0,0,0,0,2 yourass,2,0,1,0,0 +your a trans,0,2,2,0,1 +your all trans,0,2,2,0,1 yourchit,1,0,0,2,2 yourdadleft,0,0,0,2,0 your gash,0,0,1,0,0 +youre a mistake,0,2,0,3,1 +youre a trans,0,2,2,0,1 +youre all trans,0,2,2,0,1 yourechit,1,0,0,2,2 youre hole,0,0,2,0,3 youremother,0,0,1,1,1 @@ -2041,6 +2091,8 @@ xyenneT,1,0,2,0,0 хуиня ебанная,1,0,0,0,0 xyNno,2,0,1,2,0 ybnюдok,1,0,1,0,0 +Заебался,2,0,0,0,0 +Хорни,0,0,2,0,0 鸡巴,1,0,3,0,0 天安門,0,0,1,0,0 天安门,0,0,1,0,0 @@ -2111,4 +2163,5 @@ ybnюдok,1,0,1,0,0 香港独立,0,1,0,0,0 澳门独立,0,1,0,0,0 他妈ㄉ王八蛋,2,0,0,2,0 -塞你老母,0,0,2,0,1 \ No newline at end of file +塞你老母,0,0,2,0,1 +弟妹做爱,0,0,3,0,0 \ No newline at end of file diff --git a/src/test_positive.txt b/src/test_positive.txt index 0dc04c0..c20b98a 100644 --- a/src/test_positive.txt +++ b/src/test_positive.txt @@ -2722,4 +2722,8 @@ nutt on you kid f # g g o t s p§§y kill all muslims -9/!! \ No newline at end of file +9/!! +ieatass +dog headed +kill myself +the n-word \ No newline at end of file