Skip to content

Commit

Permalink
Scrubbing sensitive information (microsoft#56)
Browse files Browse the repository at this point in the history
* First draft of scrubbing sensitive information

* Fixing bugs in handling sensitive data

* Porting a previous PR (from master) to address an edge case with internal element ordering

* Bumping version to b20

* Bug fixes to improve visualization

* Toggling sensitive content scrubbing by default

* Mangling user input in sensitive mode

* Handle non-standard markup in visualization
  • Loading branch information
sarveshnagpal committed Oct 29, 2020
1 parent ad6cc01 commit 86d126d
Show file tree
Hide file tree
Showing 23 changed files with 203 additions and 89 deletions.
2 changes: 1 addition & 1 deletion lerna.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"packages": [
"packages/*"
],
"version": "0.6.0-beta.19",
"version": "0.6.0-beta.20",
"npmClient": "yarn",
"useWorkspaces": true
}
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "clarity",
"private": true,
"version": "0.6.0-beta.19",
"version": "0.6.0-beta.20",
"repository": "https://github.com/microsoft/clarity.git",
"author": "Sarvesh Nagpal <sarveshn@microsoft.com>",
"license": "MIT",
Expand Down
4 changes: 2 additions & 2 deletions packages/clarity-decode/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "clarity-decode",
"version": "0.6.0-beta.19",
"version": "0.6.0-beta.20",
"description": "An analytics library that uses web page interactions to generate aggregated insights",
"author": "Microsoft Corp.",
"license": "MIT",
Expand All @@ -26,7 +26,7 @@
"url": "https://github.com/Microsoft/clarity/issues"
},
"dependencies": {
"clarity-js": "^0.6.0-beta.19"
"clarity-js": "^0.6.0-beta.20"
},
"devDependencies": {
"@rollup/plugin-commonjs": "^11.1.0",
Expand Down
2 changes: 1 addition & 1 deletion packages/clarity-decode/src/layout.ts
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ function process(node: any[] | number[], tagIndex: number): DomData {
let lastChar = token[token.length - 1];
if (i === (node.length - 1) && output.tag === "STYLE") {
value = token;
} else if (lastChar === ">" && keyIndex === -1) {
} else if (output.tag !== Layout.Constant.TextTag && lastChar === ">" && keyIndex === -1) {
prefix = token;
} else if (output.tag !== Layout.Constant.TextTag && firstChar === Layout.Constant.Box && keyIndex === -1) {
let parts = token.substr(1).split(Layout.Constant.Period);
Expand Down
8 changes: 4 additions & 4 deletions packages/clarity-devtools/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "clarity-devtools",
"version": "0.6.0-beta.19",
"version": "0.6.0-beta.20",
"private": true,
"description": "Adds Clarity debugging support to browser devtools",
"author": "Microsoft Corp.",
Expand All @@ -24,9 +24,9 @@
"url": "https://github.com/Microsoft/clarity/issues"
},
"dependencies": {
"clarity-decode": "^0.6.0-beta.19",
"clarity-js": "^0.6.0-beta.19",
"clarity-visualize": "^0.6.0-beta.19"
"clarity-decode": "^0.6.0-beta.20",
"clarity-js": "^0.6.0-beta.20",
"clarity-visualize": "^0.6.0-beta.20"
},
"devDependencies": {
"@rollup/plugin-node-resolve": "^7.1.3",
Expand Down
2 changes: 1 addition & 1 deletion packages/clarity-devtools/static/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"name": "Clarity Developer Tools",
"description": "Get insights about how customers use your website.",
"version": "0.6.0",
"version_name": "0.6.0-beta.19",
"version_name": "0.6.0-beta.20",
"minimum_chrome_version": "50",
"devtools_page": "devtools.html",
"icons": {
Expand Down
2 changes: 1 addition & 1 deletion packages/clarity-js/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "clarity-js",
"version": "0.6.0-beta.19",
"version": "0.6.0-beta.20",
"description": "An analytics library that uses web page interactions to generate aggregated insights",
"author": "Microsoft Corp.",
"license": "MIT",
Expand Down
2 changes: 1 addition & 1 deletion packages/clarity-js/src/core/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ let config: Config = {
cssRules: false,
lean: true,
track: true,
content: false,
content: true,
mask: [],
unmask: [],
regions: {},
Expand Down
17 changes: 0 additions & 17 deletions packages/clarity-js/src/core/mask.ts

This file was deleted.

101 changes: 101 additions & 0 deletions packages/clarity-js/src/core/scrub.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
import { Privacy } from "@clarity-types/core";
import * as Data from "@clarity-types/data";
import * as Layout from "@clarity-types/layout";

export default function(value: string, hint: string, privacy: Privacy, mangle: boolean = false): string {
if (value) {
switch (privacy) {
case Privacy.None:
return value;
case Privacy.Sensitive:
switch (hint) {
case Layout.Constant.TextTag:
case "value":
case "placeholder":
return redact(value);
case "input":
return mangleToken(value);
}
return value;
case Privacy.Text:
case Privacy.TextImage:
switch (hint) {
case Layout.Constant.TextTag:
return mangle ? mangleText(value) : mask(value);
case "src":
case "srcset":
case "title":
case "alt":
return privacy === Privacy.TextImage ? Data.Constant.Empty : value;
case "value":
case "input":
return mangleToken(value);
case "placeholder":
return mask(value);
}
break;
}
}
return value;
}

function mangleText(value: string): string {
let trimmed = value.trim();
if (trimmed.length > 0) {
let first = trimmed[0];
let index = value.indexOf(first);
let prefix = value.substr(0, index);
let suffix = value.substr(index + trimmed.length);
return `${prefix}${trimmed.length.toString(36)}${suffix}`;
}
return value;
}

function mask(value: string): string {
return value.replace(/\S/gi, Data.Constant.Mask);
}

function mangleToken(value: string): string {
let length = ((Math.floor(value.length / Data.Setting.WordLength) + 1) * Data.Setting.WordLength);
let output: string = Layout.Constant.Empty;
for (let i = 0; i < length; i++) {
output += i > 0 && i % Data.Setting.WordLength === 0 ? Data.Constant.Space : Data.Constant.Mask;
}
return output;
}

function redact(value: string): string {
let spaceIndex = -1;
let hasDigit = false;
let hasEmail = false;
let hasWhitespace = false;
let array = null;
for (let i = 0; i < value.length; i++) {
let c = value.charCodeAt(i);
hasDigit = hasDigit || (c >= 48 && c <= 57); // Check for digits in the current word
hasEmail = hasEmail || c === 64; // Check for @ sign anywhere within the current word
hasWhitespace = c === 9 || c === 10 || c === 13 || c === 32; // Whitespace character (32: blank space | 9: \t | 10: \n | 13: \r)

// Process each word as an individual token to redact any sensitive information
if (i === 0 || i === value.length - 1 || hasWhitespace) {
// Performance optimization: Lazy load string -> array conversion only when required
if (hasDigit || hasEmail) {
if (array === null) { array = value.split(Data.Constant.Empty); }
mutate(array, spaceIndex, hasWhitespace ? i : i + 1);
}
// Reset digit and email flags after every word boundary, except the beginning of string
if (hasWhitespace) {
hasDigit = false;
hasEmail = false;
spaceIndex = i;
}
}
}
return array ? array.join(Data.Constant.Empty) : value;
}

function mutate(array: string[], start: number, end: number): void {
for (let i = start + 1; i < end; i++) {
array[i] = Data.Constant.Mask;
}
}
2 changes: 1 addition & 1 deletion packages/clarity-js/src/core/version.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
let version = "0.6.0-b19";
let version = "0.6.0-b20";
export default version;
5 changes: 2 additions & 3 deletions packages/clarity-js/src/interaction/encode.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import { Event, Token } from "@clarity-types/data";
import { Privacy } from "@clarity-types/layout";
import mask from "@src/core/mask";
import scrub from "@src/core/scrub";
import { time } from "@src/core/time";
import * as baseline from "@src/data/baseline";
import { queue } from "@src/data/upload";
Expand Down Expand Up @@ -55,7 +54,7 @@ export default async function (type: Event): Promise<void> {
tokens.push(entry.data.button);
tokens.push(entry.data.reaction);
tokens.push(entry.data.context);
tokens.push(cTarget.privacy !== Privacy.None ? mask(entry.data.text) : entry.data.text);
tokens.push(scrub(entry.data.text, "click", cTarget.privacy));
tokens.push(entry.data.link);
tokens.push(cTarget.hash);
if (cTarget.region) { tokens.push(cTarget.region); }
Expand Down
5 changes: 2 additions & 3 deletions packages/clarity-js/src/interaction/input.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import { Event } from "@clarity-types/data";
import { InputData, InputState, Setting } from "@clarity-types/interaction";
import { Privacy } from "@clarity-types/layout";
import { bind } from "@src/core/event";
import mask from "@src/core/mask";
import scrub from "@src/core/scrub";
import { schedule } from "@src/core/task";
import { time } from "@src/core/time";
import { clearTimeout, setTimeout } from "@src/core/timeout";
Expand Down Expand Up @@ -35,7 +34,7 @@ function recompute(evt: UIEvent): void {
v = input.value;
break;
default:
v = value.metadata.privacy !== Privacy.None ? mask(input.value) : input.value;
v = scrub(input.value, "input", value.metadata.privacy);
break;
}

Expand Down
59 changes: 38 additions & 21 deletions packages/clarity-js/src/layout/dom.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { Privacy } from "@clarity-types/core";
import { Setting } from "@clarity-types/data";
import { Constant, NodeChange, NodeInfo, NodeValue, Privacy, Source } from "@clarity-types/layout";
import { Constant, NodeChange, NodeInfo, NodeValue, Source } from "@clarity-types/layout";
import config from "@src/core/config";
import { time } from "@src/core/time";
import selector from "@src/layout/selector";
Expand All @@ -8,7 +9,8 @@ let index: number = 1;

// Reference: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/Input#%3Cinput%3E_types
const DISALLOWED_TYPES = ["password", "hidden", "email", "tel"];
const DISALLOWED_NAMES = ["address", "cell", "code", "dob", "email", "mobile", "name", "phone", "secret", "social", "ssn", "tel", "zip"];
const DISALLOWED_NAMES = ["addr", "cell", "code", "dob", "email", "mob", "name", "phone", "secret", "social", "ssn", "tel", "zip", "pass"];
const DISALLOWED_MATCH = ["address", "password", "contact"];

let nodes: Node[] = [];
let values: NodeValue[] = [];
Expand Down Expand Up @@ -64,7 +66,7 @@ export function parse(root: ParentNode): void {
for (const entry of config.mask) {
let elements = root.querySelectorAll(entry);
for (let i = 0; i < elements.length; i++) {
privacyMap.set(elements[i], Privacy.MaskTextImage);
privacyMap.set(elements[i], Privacy.TextImage);
}
}

Expand Down Expand Up @@ -93,7 +95,7 @@ export function add(node: Node, parent: Node, data: NodeInfo, source: Source): v
let id = getId(node, true);
let parentId = parent ? getId(parent) : null;
let previousId = getPreviousId(node);
let privacy = config.content ? Privacy.None : Privacy.MaskText;
let privacy = config.content ? Privacy.Sensitive : Privacy.Text;
let parentValue = null;
let parentTag = Constant.Empty;
let regionId = regionMap.has(node) ? getId(node) : null;
Expand Down Expand Up @@ -136,6 +138,7 @@ export function update(node: Node, parent: Node, data: NodeInfo, source: Source)
let parentId = parent ? getId(parent) : null;
let previousId = getPreviousId(node);
let changed = false;
let parentChanged = false;

if (id in values) {
let value = values[id];
Expand All @@ -154,7 +157,7 @@ export function update(node: Node, parent: Node, data: NodeInfo, source: Source)
value.parent = parentId;
// Move this node to the right location under new parent
if (parentId !== null && parentId >= 0) {
let childIndex = previousId === null ? values[parentId].children.length : values[parentId].children.indexOf(previousId) + 1;
let childIndex = previousId === null ? 0 : values[parentId].children.indexOf(previousId) + 1;
values[parentId].children.splice(childIndex, 0, id);
// Update region after the move
value.region = regionMap.has(node) ? getId(node) : values[parentId].region;
Expand All @@ -170,6 +173,7 @@ export function update(node: Node, parent: Node, data: NodeInfo, source: Source)
values[oldParentId].children.splice(nodeIndex, 1);
}
}
parentChanged = true;
}

// Update data
Expand All @@ -183,7 +187,7 @@ export function update(node: Node, parent: Node, data: NodeInfo, source: Source)
// Update selector
updateSelector(value);
metadata(data.tag, id, parentId);
track(id, source, changed);
track(id, source, changed, parentChanged);
}
}

Expand Down Expand Up @@ -220,24 +224,36 @@ function getPrivacy(node: Node, data: NodeInfo, parentTag: string, privacy: Priv
// Do not proceed if attributes are missing for the node
if (attributes === null || attributes === undefined) { return privacy; }

// Check for disallowed list of fields (e.g. address, phone, etc.) only if the input node is not already masked
if (privacy === Privacy.None && tag === Constant.InputTag) {
let field: string = Constant.Empty;
// Be aggressive in looking up any attribute (id, class, name, etc.) for disallowed names
for (const attribute of Object.keys(attributes)) { field += attributes[attribute].toLowerCase(); }
for (let name of DISALLOWED_NAMES) {
if (field.indexOf(name) >= 0) {
privacy = Privacy.MaskText;
continue;
// Look up for sensitive fields
if (Constant.Class in attributes && privacy === Privacy.Sensitive) {
for (let match of DISALLOWED_MATCH) {
if (attributes[Constant.Class].indexOf(match) >= 0) {
privacy = Privacy.Text;
break;
}
}
}

// Check for disallowed list of fields (e.g. address, phone, etc.) only if the input node is not already masked
if (tag === Constant.InputTag) {
if (privacy === Privacy.None) {
let field: string = Constant.Empty;
// Be aggressive in looking up any attribute (id, class, name, etc.) for disallowed names
for (const attribute of Object.keys(attributes)) { field += attributes[attribute].toLowerCase(); }
for (let name of DISALLOWED_NAMES) {
if (field.indexOf(name) >= 0) {
privacy = Privacy.Text;
break;
}
}
} else if (privacy === Privacy.Sensitive) { privacy = Privacy.Text; }
}

// Check for disallowed list of types (e.g. password, email, etc.) and set the masked property appropriately
if (Constant.Type in attributes && DISALLOWED_TYPES.indexOf(attributes[Constant.Type]) >= 0) { privacy = Privacy.MaskText; }
if (Constant.Type in attributes && DISALLOWED_TYPES.indexOf(attributes[Constant.Type]) >= 0) { privacy = Privacy.Text; }

// Following two conditions supersede any of the above. If there are explicit instructions to mask / unmask a field, we honor that.
if (Constant.MaskData in attributes) { privacy = Privacy.MaskTextImage; }
if (Constant.MaskData in attributes) { privacy = Privacy.TextImage; }
if (Constant.UnmaskData in attributes) { privacy = Privacy.None; }

// If it's a text node belonging to a STYLE or TITLE tag; then reset the privacy setting to ensure we capture the content
Expand Down Expand Up @@ -360,10 +376,11 @@ function size(value: NodeValue, parent: NodeValue): void {

// If this element is a text node, is masked, and longer than configured length, then track box model for the parent element
let isLongText = tag === Constant.TextTag && data.value && data.value.length > Setting.ResizeObserverThreshold;
if (isLongText && value.metadata.privacy !== Privacy.None && parent && parent.metadata.size === null) { parent.metadata.size = []; }
let isMasked = value.metadata.privacy === Privacy.Text || value.metadata.privacy === Privacy.TextImage;
if (isLongText && isMasked && parent && parent.metadata.size === null) { parent.metadata.size = []; }

// If this element is a image node, and is masked, then track box model for the current element
if (data.tag === Constant.ImageTag && value.metadata.privacy === Privacy.MaskTextImage) { value.metadata.size = []; }
if (data.tag === Constant.ImageTag && value.metadata.privacy === Privacy.TextImage) { value.metadata.size = []; }
}

function metadata(tag: string, id: number, parentId: number): void {
Expand Down Expand Up @@ -426,12 +443,12 @@ function copy(input: NodeValue[]): NodeValue[] {
return JSON.parse(JSON.stringify(input));
}

function track(id: number, source: Source, changed: boolean = true): void {
function track(id: number, source: Source, changed: boolean = true, parentChanged: boolean = false): void {
// Keep track of the order in which mutations happened, they may not be sequential
// Edge case: If an element is added later on, and pre-discovered element is moved as a child.
// In that case, we need to reorder the pre-discovered element in the update list to keep visualization consistent.
let uIndex = updateMap.indexOf(id);
if (uIndex >= 0 && source === Source.ChildListAdd) {
if (uIndex >= 0 && source === Source.ChildListAdd && parentChanged) {
updateMap.splice(uIndex, 1);
updateMap.push(id);
} else if (uIndex === -1 && changed) { updateMap.push(id); }
Expand Down
Loading

0 comments on commit 86d126d

Please sign in to comment.