Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Generate separate snapshots for logical databases #13792

Merged
merged 11 commits into from
Sep 20, 2022
118 changes: 82 additions & 36 deletions scripts-dev/make_full_schema.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,8 @@
# synapse_port_db to convert it to Postgres. It then dumps the contents of both.

export PGHOST="localhost"
POSTGRES_DB_NAME="synapse_full_schema.$$"

SQLITE_SCHEMA_FILE="schema.sql.sqlite"
SQLITE_ROWS_FILE="rows.sql.sqlite"
POSTGRES_SCHEMA_FILE="full.sql.postgres"
POSTGRES_ROWS_FILE="rows.sql.postgres"

POSTGRES_MAIN_DB_NAME="synapse_full_schema_main.$$"
POSTGRES_STATE_DB_NAME="synapse_full_schema_state.$$"
REQUIRED_DEPS=("matrix-synapse" "psycopg2")

usage() {
Expand Down Expand Up @@ -95,12 +90,19 @@ cd "$(dirname "$0")/.."
TMPDIR=$(mktemp -d)
KEY_FILE=$TMPDIR/test.signing.key # default Synapse signing key path
SQLITE_CONFIG=$TMPDIR/sqlite.conf
SQLITE_DB=$TMPDIR/homeserver.db
SQLITE_MAIN_DB=$TMPDIR/main.db
SQLITE_STATE_DB=$TMPDIR/state.db
POSTGRES_CONFIG=$TMPDIR/postgres.conf

# Ensure these files are delete on script exit
# TODO: the trap should also drop the temp postgres DB
trap 'rm -rf $TMPDIR' EXIT
cleanup() {
echo "Cleaning up temporary sqlite database and config files..."
rm -r "$TMPDIR"
echo "Cleaning up temporary Postgres database..."
dropdb --if-exists "$POSTGRES_MAIN_DB_NAME"
dropdb --if-exists "$POSTGRES_STATE_DB_NAME"
}
trap 'cleanup' EXIT

cat > "$SQLITE_CONFIG" <<EOF
server_name: "test"
Expand All @@ -110,10 +112,17 @@ macaroon_secret_key: "abcde"

report_stats: false

database:
name: "sqlite3"
args:
database: "$SQLITE_DB"
databases:
main:
name: "sqlite3"
data_stores: ["main"]
args:
database: "$SQLITE_MAIN_DB"
state:
name: "sqlite3"
data_stores: ["state"]
args:
database: "$SQLITE_STATE_DB"

# Suppress the key server warning.
trusted_key_servers: []
Expand All @@ -127,13 +136,24 @@ macaroon_secret_key: "abcde"

report_stats: false

database:
name: "psycopg2"
args:
user: "$PGUSER"
host: "$PGHOST"
password: "$PGPASSWORD"
database: "$POSTGRES_DB_NAME"
databases:
main:
name: "psycopg2"
data_stores: ["main"]
args:
user: "$PGUSER"
host: "$PGHOST"
password: "$PGPASSWORD"
database: "$POSTGRES_MAIN_DB_NAME"
state:
name: "psycopg2"
data_stores: ["state"]
args:
user: "$PGUSER"
host: "$PGHOST"
password: "$PGPASSWORD"
database: "$POSTGRES_STATE_DB_NAME"


# Suppress the key server warning.
trusted_key_servers: []
Expand All @@ -148,33 +168,59 @@ echo "Running db background jobs..."
synapse/_scripts/update_synapse_database.py --database-config "$SQLITE_CONFIG" --run-background-updates

# Create the PostgreSQL database.
echo "Creating postgres database..."
createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_DB_NAME"
echo "Creating postgres databases..."
createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_MAIN_DB_NAME"
createdb --lc-collate=C --lc-ctype=C --template=template0 "$POSTGRES_STATE_DB_NAME"

echo "Running db background jobs..."
synapse/_scripts/update_synapse_database.py --database-config "$POSTGRES_CONFIG" --run-background-updates


# Delete schema_version, applied_schema_deltas and applied_module_schemas tables
# Also delete any shadow tables from fts4
# Don't include the `common` tables in the dumps.
echo "Dropping unwanted db tables..."
SQL="
DROP_COMMON_TABLES="
DROP TABLE schema_version;
DROP TABLE schema_compat_version;
DROP TABLE background_updates;
DROP TABLE applied_schema_deltas;
DROP TABLE applied_module_schemas;
"
sqlite3 "$SQLITE_DB" <<< "$SQL"
psql "$POSTGRES_DB_NAME" -w <<< "$SQL"

echo "Dumping SQLite3 schema to '$OUTPUT_DIR/$SQLITE_SCHEMA_FILE' and '$OUTPUT_DIR/$SQLITE_ROWS_FILE'..."
sqlite3 "$SQLITE_DB" ".schema --indent" > "$OUTPUT_DIR/$SQLITE_SCHEMA_FILE"
sqlite3 "$SQLITE_DB" ".dump --data-only --nosys" > "$OUTPUT_DIR/$SQLITE_ROWS_FILE"
sqlite3 "$SQLITE_MAIN_DB" <<< "$DROP_COMMON_TABLES"
sqlite3 "$SQLITE_STATE_DB" <<< "$DROP_COMMON_TABLES"
psql "$POSTGRES_MAIN_DB_NAME" -w <<< "$DROP_COMMON_TABLES"
psql "$POSTGRES_STATE_DB_NAME" -w <<< "$DROP_COMMON_TABLES"

# For Reasons(TM), SQLite's `.schema` also dumps out "shadow tables", the implementation
# details behind full text search tables. Omit these from the dumps.

sqlite3 "$SQLITE_MAIN_DB" <<< "
DROP TABLE event_search_content;
DROP TABLE event_search_segments;
DROP TABLE event_search_segdir;
DROP TABLE event_search_docsize;
DROP TABLE event_search_stat;
DROP TABLE user_directory_search_content;
DROP TABLE user_directory_search_segments;
DROP TABLE user_directory_search_segdir;
DROP TABLE user_directory_search_docsize;
DROP TABLE user_directory_search_stat;
"
Comment on lines +228 to +239
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought I could omit this in #13770, but I was mistaken. Sorry.


echo "Dumping SQLite3 schema to $OUTPUT_DIR"
sqlite3 "$SQLITE_MAIN_DB" ".schema --indent" > "$OUTPUT_DIR/schema_main.sql.sqlite"
sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" > "$OUTPUT_DIR/rows_main.sql.sqlite"
sqlite3 "$SQLITE_STATE_DB" ".schema --indent" > "$OUTPUT_DIR/schema_state.sql.sqlite"
sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" > "$OUTPUT_DIR/rows_state.sql.sqlite"

echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE' and '$OUTPUT_DIR/$POSTGRES_ROWS_FILE'..."
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_ROWS_FILE"
cleanup_pg_schema() {
sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d'
}

echo "Cleaning up temporary Postgres database..."
dropdb $POSTGRES_DB_NAME
echo "Dumping Postgres schema to $OUTPUT_DIR"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/schema_main.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/rows_main.sql.postgres"
pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/schema_state.sql.postgres"
pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/rows_state.sql.postgres"

echo "Done! Files dumped to: $OUTPUT_DIR"
7 changes: 5 additions & 2 deletions synapse/_scripts/update_synapse_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,11 @@ def main() -> None:
# Load, process and sanity-check the config.
hs_config = yaml.safe_load(args.database_config)

if "database" not in hs_config:
sys.stderr.write("The configuration file must have a 'database' section.\n")
if "database" not in hs_config and "databases" not in hs_config:
sys.stderr.write(
"The configuration file must have a 'database' or 'databases' section. "
"See https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#database"
)
sys.exit(4)

config = HomeServerConfig()
Expand Down