Skip to content

Commit

Permalink
Merge pull request openedx-unsupported#1052 from edx/jarv/add-clone-db
Browse files Browse the repository at this point in the history
adding an sql script for sanitizing the prod mysql db
  • Loading branch information
jarv committed May 2, 2014
2 parents d9b9bdb + 360e571 commit 0dafb4f
Show file tree
Hide file tree
Showing 4 changed files with 286 additions and 2 deletions.
122 changes: 122 additions & 0 deletions playbooks/edx-east/update_edxapp_db_users.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# This is a utility play to setup the db users on the edxapp db
#
# The mysql root user MUST be passed in as an extra var
#
# the environment and deployment must be passed in as COMMON_ENVIRONMENT
# and COMMON_DEPLOYMENT. These two vars should be set in the secret
# var file for the corresponding vpc stack
#
# Example invocation:
#
# Create the databases for edxapp and xqueue:
#
# ansible-playbook -i localhost, create_db_users.yml -e@/path/to/secrets.yml -e "edxapp_db_root_user=root edxapp_db_root_pass=password"
#

- name: Update db users on the edxapp db
hosts: all
gather_facts: False
vars:
edxapp_db_root_user: 'None'
edxapp_db_root_pass: 'None'

tasks:
- fail: msg="COMMON_ENVIRONMENT and COMMON_DEPLOYMENT need to be defined to use this play"
when: COMMON_ENVIRONMENT is not defined or COMMON_DEPLOYMENT is not defined
- name: assign mysql user permissions for read_only user
mysql_user:
name: "{{ COMMON_MYSQL_READ_ONLY_USER }}"
priv: "*.*:SELECT"
password: "{{ COMMON_MYSQL_READ_ONLY_PASS }}"
login_host: "{{ item.db_host }}"
login_user: "{{ item.db_user }}"
login_password: "{{ item.db_pass }}"
append_privs: yes
host: '%'
when: item.db_user != 'None'
with_items:
- db_host: "{{ EDXAPP_MYSQL_HOST|default('None') }}"
db_name: "{{ EDXAPP_MYSQL_DB_NAME|default('None') }}"
db_user: "{{ edxapp_db_root_user }}"
db_pass: "{{ edxapp_db_root_pass }}"

- name: assign mysql user permissions for migrate user
mysql_user:
name: "{{ COMMON_MYSQL_MIGRATE_USER }}"
priv: "{{ item.db_name }}.*:SELECT,INSERT,UPDATE,DELETE,ALTER,CREATE,DROP,INDEX"
password: "{{ COMMON_MYSQL_MIGRATE_PASS }}"
login_host: "{{ item.db_host }}"
login_user: "{{ item.db_user }}"
login_password: "{{ item.db_pass }}"
append_privs: yes
host: '%'
when: item.db_user != 'None'
with_items:
- db_host: "{{ EDXAPP_MYSQL_HOST|default('None') }}"
db_name: "{{ EDXAPP_MYSQL_DB_NAME|default('None') }}"
db_user: "{{ edxapp_db_root_user }}"
db_pass: "{{ edxapp_db_root_pass }}"

- name: assign mysql user permissions for admin user
mysql_user:
name: "{{ COMMON_MYSQL_ADMIN_USER }}"
priv: "*.*:CREATE USER"
password: "{{ COMMON_MYSQL_ADMIN_PASS }}"
login_host: "{{ item.db_host }}"
login_user: "{{ item.db_user }}"
login_password: "{{ item.db_pass }}"
append_privs: yes
host: '%'
when: item.db_user != 'None'
with_items:
- db_host: "{{ EDXAPP_MYSQL_HOST|default('None') }}"
db_user: "{{ edxapp_db_root_user }}"
db_pass: "{{ edxapp_db_root_pass }}"

- name: assign mysql user permissions for db users
mysql_user:
name: "{{ item.db_user_to_modify }}"
priv: "{{ item.db_name }}.*:SELECT,INSERT,UPDATE,DELETE"
password: "{{ item.db_user_to_modify_pass }}"
login_host: "{{ item.db_host }}"
login_user: "{{ item.db_user }}"
login_password: "{{ item.db_pass }}"
host: '%'
when: item.db_user != 'None'
with_items:
# These defaults are needed, otherwise ansible will throw
# variable undefined errors for when they are not defined
# in secret vars
- db_name: "{{ EDXAPP_MYSQL_DB_NAME|default('None') }}"
db_host: "{{ EDXAPP_MYSQL_HOST|default('None') }}"
db_user: "{{ edxapp_db_root_user|default('None') }}"
db_pass: "{{ edxapp_db_root_pass|default('None') }}"
db_user_to_modify: "{{ EDXAPP_MYSQL_USER }}"
db_user_to_modify_pass: "{{ EDXAPP_MYSQL_PASSWORD }}"

# The second call to mysql_user needs to have append_privs set to
# yes otherwise it will overwrite the previous run.
# This means that both tasks will report changed on every ansible
# run

- name: assign mysql user permissions for db test user
mysql_user:
append_privs: yes
name: "{{ item.db_user_to_modify }}"
priv: "{{ COMMON_ENVIRONMENT }}_{{ COMMON_DEPLOYMENT }}_test_{{ item.db_name }}.*:ALL"
password: "{{ item.db_user_to_modify_pass }}"
login_host: "{{ item.db_host }}"
login_user: "{{ item.db_user }}"
login_password: "{{ item.db_pass }}"
host: '%'
when: item.db_user != 'None'
with_items:
# These defaults are needed, otherwise ansible will throw
# variable undefined errors for when they are not defined
# in secret vars
- db_name: "{{ EDXAPP_MYSQL_DB_NAME|default('None') }}"
db_host: "{{ EDXAPP_MYSQL_HOST|default('None') }}"
db_user: "{{ edxapp_db_root_user|default('None') }}"
db_pass: "{{ edxapp_db_root_pass|default('None') }}"
db_user_to_modify: "{{ EDXAPP_MYSQL_USER }}"
db_user_to_modify_pass: "{{ EDXAPP_MYSQL_PASSWORD }}"
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ PyYAML==3.11
Jinja2==2.7.2
MarkupSafe==0.21
argparse==1.2.1
boto==2.20.1
boto==2.27.0
ecdsa==0.11
paramiko==1.13.0
pycrypto==2.6.1
Expand Down
83 changes: 82 additions & 1 deletion util/vpc-tools/db-clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import datetime
import sys
from vpcutil import rds_subnet_group_name_for_stack_name, all_stack_names
import os

description = """
Expand All @@ -33,6 +34,23 @@
'db.m2.4xlarg',
]

# These are the groups for the different
# stack names that will be assigned once
# the corresponding db is cloned

SG_GROUPS = {
'stage-edx': 'sg-d2f623b7',
}

# This group must already be created
# and allows for full access to port
# 3306. this group is assigned temporarily
# for cleaning the db

SG_GROUPS_FULL = {
'stage-edx': 'sg-0abf396f',
}


def parse_args(args=sys.argv[1:]):

Expand All @@ -49,10 +67,24 @@ def parse_args(args=sys.argv[1:]):
default='db.m1.small', help='RDS size to create instances of')
parser.add_argument('-d', '--db-source', choices=dbs,
default=u'stage-edx', help="source db to clone")
parser.add_argument('-p', '--password', required=True,
parser.add_argument('-p', '--password',
help="password for the new database", metavar="NEW PASSWORD")
parser.add_argument('-r', '--region', default='us-east-1',
help="region to connect to")
parser.add_argument('--dns',
help="dns entry for the new rds instance")
parser.add_argument('--security-group', action="store_true",
default=False,
help="add sg group from SG_GROUPS")
parser.add_argument('--clean', action="store_true",
default=False,
help="clean the db after launching it into the vpc, removing sensitive data")
parser.add_argument('--dump', action="store_true",
default=False,
help="create a sql dump after launching it into the vpc")
parser.add_argument('--secret-var-file',
help="using a secret var file run ansible against the host to update db users")

return parser.parse_args(args)


Expand All @@ -71,6 +103,8 @@ def wait_on_db_status(db_name, region='us-east-1', wait_on='available', aws_id=N

if __name__ == '__main__':
args = parse_args()
sanitize_sql_file = os.path.join(os.path.dirname(os.path.realpath(__file__)), "sanitize-db.sql")
play_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../playbooks/edx-east")

rds = boto.rds2.connect_to_region(args.region)
subnet_name = rds_subnet_group_name_for_stack_name(args.stack_name)
Expand All @@ -82,3 +116,50 @@ def wait_on_db_status(db_name, region='us-east-1', wait_on='available', aws_id=N
db_instance_class=args.type,
db_subnet_group_name=subnet_name)
wait_on_db_status(restore_dbid)

db_host = rds.describe_db_instances(restore_dbid)['DescribeDBInstancesResponse']['DescribeDBInstancesResult']['DBInstances'][0]['Endpoint']['Address']

if args.password or args.security_group:
modify_args = dict(
apply_immediately=True
)
if args.password:
modify_args['master_user_password'] = args.password
if args.security_group:
modify_args['vpc_security_group_ids'] = [SG_GROUPS[args.stack_name], SG_GROUPS_FULL[args.stack_name]]

# Update the db immediately
rds.modify_db_instance(restore_dbid, **modify_args)

if args.clean:
# Run the mysql clean sql file
sanitize_cmd = """mysql -u root -p{root_pass} -h{db_host} < {sanitize_sql_file} """.format(
root_pass=args.password,
db_host=db_host,
sanitize_sql_file=sanitize_sql_file)
print("Running {}".format(sanitize_cmd))
os.system(sanitize_cmd)

if args.secret_var_file:
db_cmd = """cd {play_path} && ansible-playbook -c local -i 127.0.0.1, update_edxapp_db_users.yml """ \
"""-e @{secret_var_file} -e "edxapp_db_root_user=root edxapp_db_root_pass={root_pass} """ \
"""EDXAPP_MYSQL_HOST={db_host}" """.format(
root_pass=args.password,
secret_var_file=args.secret_var_file,
db_host=db_host,
play_path=play_path)
print("Running {}".format(db_cmd))
os.system(db_cmd)

if args.dns:
dns_cmd = """cd {play_path} && ansible-playbook -c local -i 127.0.0.1, create_cname.yml """ \
"""-e "dns_zone=edx.org dns_name={dns} sandbox={db_host}" """.format(
play_path=play_path,
dns=args.dns,
db_host=db_host)
print("Running {}".format(dns_cmd))
os.system(dns_cmd)

if args.security_group:
# remove full mysql access from within the vpc
rds.modify_db_instance(restore_dbid, vpc_security_group_ids=[SG_GROUPS[args.stack_name]])
81 changes: 81 additions & 0 deletions util/vpc-tools/sanitize-db.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
SET FOREIGN_KEY_CHECKS=0;

/*
Remove all password hashes, even for edx employees
*/

UPDATE wwc.auth_user
set
password = null;

UPDATE wwc.student_passwordhistory
set
password = null;

/*
Rewrite all emails to used the SES simulator, simulating success.
Anonymize other user information. Skip @edx.org accounts
*/

UPDATE wwc.auth_user
set
email = concat('success+',cast(id AS CHAR),'@simulator.amazonses.com'),
username = concat('user-',cast(id AS CHAR)),
first_name = concat('user-',cast(id AS CHAR)),
last_name = concat('user-',cast(id AS CHAR)),
last_login = null,
date_joined = null
where email not like ('%@edx.org');

/*
There are a handful of email changes requests captured in flight.
*/

UPDATE wwc.student_pendingemailchange
set new_email = concat('success+',cast(user_id AS CHAR),'@simulator.amazonses.com');

/*
Differs slightly to prevent creating duplicate email records.
User id isn't stored here and this email is probably not used for
sending email, but cannot hurt.
*/

UPDATE wwc.student_courseenrollmentallowed
set email = concat('success+','courseenrollmentallowed_',cast(id AS CHAR),'@simulator.amazonses.com');

/*
Set the name to the userid and empty the other fields
This will also empty user profile data for edx employees
*/

UPDATE wwc.auth_userprofile
set
name = concat('user-',cast(id as CHAR)),
language = "",
location = "",
meta = "",
gender = null,
mailing_address = null,
year_of_birth = null,
level_of_education = null,
goals = null,
country = "",
city = null;

/*
Grader has its own django core tables.
*/

UPDATE prod_grader.auth_user
set
email = concat('success+',cast(id AS CHAR),'@simulator.amazonses.com'),
username = concat('user-',cast(id AS CHAR)),
first_name = concat('user-',cast(id AS CHAR)),
last_name = concat('user-',cast(id AS CHAR)),
password = null,
last_login = null,
date_joined = null
where email not like ('%@edx.org');


SET FOREIGN_KEY_CHECKS=1;

0 comments on commit 0dafb4f

Please sign in to comment.