Skip to content
This repository has been archived by the owner on May 6, 2024. It is now read-only.

adding an sql script for sanitizing the prod mysql db #1052

Merged
merged 11 commits into from
May 2, 2014
86 changes: 86 additions & 0 deletions util/vpc-tools/sanitize-db.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
SET FOREIGN_KEY_CHECKS=0;

/*
Truncate the courseware_studentmodulehistory table since
it is only needed for analytics
*/

TRUNCATE courseware_studentmodulehistory;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should probably put some conditional logic around this in case we handle leaving this out upstream, say by using manual export commands.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since it's not part of the sanitation I'll remove it and run the truncate separately.


/*
Rewrite all emails to used the SES simulator, simulating success.
Anonymize other user information
*/

UPDATE wwc.auth_user
set email = concat('success+',cast(id AS CHAR),'@simulator.amazonses.com')
where email not like ('%@edx.org'),
set username = concat('user-',cast(id AS CHAR)
where email not like ('%@edx.org'),
set first_name = concat('user-',cast(id AS CHAR)
where email not like ('%@edx.org'),
set last_name = concat('user-',cast(id AS CHAR)
where email not like ('%@edx.org'),
set password = null
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a little surprised this is nullable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's the default, maybe set it to an empty string instead?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it's nullable, I'm fine with this; I'm just surprised.

where email not like ('%@edx.org'),
set last_login = null
where email not like ('%@edx.org'),
set date_joined = null
where email not like ('%@edx.org');

/*
There are a handful of email changes requests captured in flight.
*/

UPDATE wwc.student_pendingemailchange
set new_email = concat('success+',cast(user_id AS CHAR),'@simulator.amazonses.com');

/*
Differs slightly to prevent creating duplicate email records.
User id isn't stored here and this email is probably not used for
sending email, but cannot hurt.
*/

UPDATE wwc.student_courseenrollmentallowed
set email = concat('success+','courseenrollmentallowed_',cast(id AS CHAR),'@simulator.amazonses.com');

/*
Set the name to the userid and empty the other fields
This will also empty user profile data for edx employees
*/

UPDATE wwc.auth_userprofile
set name = concat('user-',cast(id as CHAR)),
set language = "",
set location = "",
set meta = "",
set gender = null,
set mailing_address = null,
set year_of_birth = null,
set level_of_education = null,
set goals = null
set country = "",
set city = null;

/*
Grader has its own django core tables.
*/

UPDATE prod_grader.auth_user
set email = concat('success+',cast(id AS CHAR),'@simulator.amazonses.com')
where email not like ('%@edx.org'),
set username = concat('user-',cast(id AS CHAR)
where email not like ('%@edx.org'),
set first_name = concat('user-',cast(id AS CHAR)
where email not like ('%@edx.org'),
set last_name = concat('user-',cast(id AS CHAR)
where email not like ('%@edx.org'),
set password = null
where email not like ('%@edx.org'),
set last_login = null
where email not like ('%@edx.org'),
set date_joined = null
where email not like ('%@edx.org');


SET FOREIGN_KEY_CHECKS=1;