From a856995fde8607dc1d52b8bf2b68945987a46ba7 Mon Sep 17 00:00:00 2001 From: Chris O'Hara Date: Wed, 2 Dec 2015 07:02:39 +1000 Subject: [PATCH] Make dot/extension removal optional when normalizing gmail addresses --- README.md | 2 +- test/sanitizers.js | 20 ++++++++++++++++++-- validator.js | 15 +++++++++++---- 3 files changed, 30 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 90f9a71f6..627c14f15 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,7 @@ $ bower install validator-js - **blacklist(input, chars)** - remove characters that appear in the blacklist. The characters are used in a RegExp and so you will need to escape some chars, e.g. `blacklist(input, '\\[\\]')`. - **escape(input)** - replace `<`, `>`, `&`, `'`, `"` and `/` with HTML entities. - **ltrim(input [, chars])** - trim characters from the left-side of the input. -- **normalizeEmail(email [, options])** - canonicalize an email address. `options` is an object which defaults to `{ lowercase: true }`. With `lowercase` set to `true`, the local part of the email address is lowercased for all domains; the hostname is always lowercased and the local part of the email address is always lowercased for hosts that are known to be case-insensitive (currently only GMail). Normalization follows special rules for known providers: currently, GMail addresses have dots removed in the local part and are stripped of tags (e.g. `some.one+tag@gmail.com` becomes `someone@gmail.com`) and all `@googlemail.com` addresses are normalized to `@gmail.com`. +- **normalizeEmail(email [, options])** - canonicalize an email address. `options` is an object which defaults to `{ lowercase: true, remove_dots: true, remove_extension: true }`. With `lowercase` set to `true`, the local part of the email address is lowercased for all domains; the hostname is always lowercased and the local part of the email address is always lowercased for hosts that are known to be case-insensitive (currently only GMail). Normalization follows special rules for known providers: currently, GMail addresses have dots removed in the local part and are stripped of tags (e.g. `some.one+tag@gmail.com` becomes `someone@gmail.com`) and all `@googlemail.com` addresses are normalized to `@gmail.com`. - **rtrim(input [, chars])** - trim characters from the right-side of the input. - **stripLow(input [, keep_new_lines])** - remove characters with a numerical value < 32 and 127, mostly control characters. If `keep_new_lines` is `true`, newline characters are preserved (`\n` and `\r`, hex `0xA` and `0xD`). Unicode-safe in JavaScript. - **toBoolean(input [, strict])** - convert the input to a boolean. Everything except for `'0'`, `'false'` and `''` returns `true`. In strict mode only `'1'` and `'true'` return `true`. diff --git a/test/sanitizers.js b/test/sanitizers.js index e24c9f815..04d7020d2 100644 --- a/test/sanitizers.js +++ b/test/sanitizers.js @@ -198,7 +198,9 @@ describe('Sanitizers', function () { , 'an invalid email address': false , '': false , '+extension@gmail.com': false - // some.name.midd..leNa...me...+extension@GoogleMail.com was removed from test cases because of a bug with validator.isEmail. See issue #258 + , '...@gmail.com': false + , '.+extension@googlemail.com': false + , 'some.name.midd..leNa...me...+extension@GoogleMail.com': 'somenamemiddlename@gmail.com' } }); test({ @@ -211,13 +213,27 @@ describe('Sanitizers', function () { , 'TEST@me.com': 'TEST@me.com' , 'TEST@ME.COM': 'TEST@me.com' , 'blAH@x.com': 'blAH@x.com' - + // Domains that are known for being case-insensitive are always lowercased , 'SOME.name@GMAIL.com': 'somename@gmail.com' , 'SOME.name.middleName+extension@GoogleMail.com': 'somenamemiddlename@gmail.com' , 'SOME.name.midd.leNa.me.+extension@gmail.com': 'somenamemiddlename@gmail.com' } }); + test({ + sanitizer: 'normalizeEmail' + , args: [{remove_dots: false}] + , expect: { + 'SOME.name@GMAIL.com': 'some.name@gmail.com' + } + }); + test({ + sanitizer: 'normalizeEmail' + , args: [{remove_extension: false}] + , expect: { + 'foo+bar@gmail.com': 'foo+bar@gmail.com' + } + }); }); }); diff --git a/validator.js b/validator.js index fbfc7356c..71c2de36c 100644 --- a/validator.js +++ b/validator.js @@ -777,7 +777,9 @@ }; var default_normalize_email_options = { - lowercase: true + lowercase: true, + remove_dots: true, + remove_extension: true }; validator.normalizeEmail = function (email, options) { @@ -788,11 +790,16 @@ var parts = email.split('@', 2); parts[1] = parts[1].toLowerCase(); if (parts[1] === 'gmail.com' || parts[1] === 'googlemail.com') { - parts[0] = parts[0].toLowerCase().replace(/\./g, ''); - if (parts[0][0] === '+') { + if (options.remove_extension) { + parts[0] = parts[0].split('+')[0]; + } + if (options.remove_dots) { + parts[0] = parts[0].replace(/\./g, ''); + } + if (!parts[0].length) { return false; } - parts[0] = parts[0].split('+')[0]; + parts[0] = parts[0].toLowerCase(); parts[1] = 'gmail.com'; } else if (options.lowercase) { parts[0] = parts[0].toLowerCase();