From 3e9b8e580d061465a542986d7dca1bc5d343af2c Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Sun, 29 Dec 2019 16:58:44 -0700 Subject: [PATCH] Update synapse import documentation The script appears to work, just needed some touchups. Fixes https://github.com/turt2live/matrix-media-repo/issues/93 --- README.md | 18 ++++--- cmd/import_synapse/main.go | 75 +++++------------------------ cmd/media_repo/inits.go | 80 ------------------------------ cmd/media_repo/main.go | 22 ++------- cmd/media_repo/reloads.go | 5 +- common/runtime/init.go | 99 ++++++++++++++++++++++++++++++++++++++ common/version/version.go | 18 +++++++ 7 files changed, 146 insertions(+), 171 deletions(-) create mode 100644 common/runtime/init.go diff --git a/README.md b/README.md index 7da34ef4..bcb51b45 100644 --- a/README.md +++ b/README.md @@ -113,21 +113,27 @@ release though if you want to avoid building it yourself. 2. Edit/setup `media-repo.yaml` per the install instructions above 3. Run `bin/import_synapse`. The usage is below. ``` - Usage of ./bin/import_synapse: + Usage of import_synapse.exe: -baseUrl string The base URL to access your homeserver with (default "http://localhost:8008") + -config string + The path to the media repo configuration (with the database section completed) (default "media-repo.yaml") -dbHost string - The IP or hostname of the postgresql server with the synapse database (default "localhost") + The PostgresSQL hostname for your Synapse database (default "localhost") -dbName string - The name of the synapse database (default "synapse") + The name of your Synapse database (default "synapse") -dbPassword string - The password to authorize the postgres user. Can be omitted to be prompted when run + The password for your Synapse's PostgreSQL database. Can be omitted to be prompted when run -dbPort int - The port to access postgres on (default 5432) + The port for your Synapse's PostgreSQL database (default 5432) -dbUsername string - The username to access postgres with (default "synapse") + The username for your Synapse's PostgreSQL database (default "synapse") + -migrations string + The absolute path the media repo's migrations folder (default "./migrations") -serverName string The name of your homeserver (eg: matrix.org) (default "localhost") + -workers int + The number of workers to use when downloading media. Using multiple workers risks deduplication not working as efficiently. (default 1) ``` Assuming the media repository, postgres database, and synapse are all on the same host, the command to run would look something like: `bin/import_synapse -serverName myserver.com -dbUsername my_database_user -dbName synapse` 4. Wait for the import to complete. The script will automatically deduplicate media. diff --git a/cmd/import_synapse/main.go b/cmd/import_synapse/main.go index d38bb33b..651640cc 100644 --- a/cmd/import_synapse/main.go +++ b/cmd/import_synapse/main.go @@ -17,10 +17,9 @@ import ( "github.com/turt2live/matrix-media-repo/common/config" "github.com/turt2live/matrix-media-repo/common/logging" "github.com/turt2live/matrix-media-repo/common/rcontext" + "github.com/turt2live/matrix-media-repo/common/runtime" "github.com/turt2live/matrix-media-repo/controllers/upload_controller" "github.com/turt2live/matrix-media-repo/storage" - "github.com/turt2live/matrix-media-repo/storage/datastore" - "github.com/turt2live/matrix-media-repo/storage/datastore/ds_s3" "github.com/turt2live/matrix-media-repo/synapse" ) @@ -31,15 +30,15 @@ type fetchRequest struct { } func main() { - postgresHost := flag.String("dbHost", "localhost", "The IP or hostname of the postgresql server with the synapse database") - postgresPort := flag.Int("dbPort", 5432, "The port to access postgres on") - postgresUsername := flag.String("dbUsername", "synapse", "The username to access postgres with") - postgresPassword := flag.String("dbPassword", "", "The password to authorize the postgres user. Can be omitted to be prompted when run") - postgresDatabase := flag.String("dbName", "synapse", "The name of the synapse database") + postgresHost := flag.String("dbHost", "localhost", "The PostgresSQL hostname for your Synapse database") + postgresPort := flag.Int("dbPort", 5432, "The port for your Synapse's PostgreSQL database") + postgresUsername := flag.String("dbUsername", "synapse", "The username for your Synapse's PostgreSQL database") + postgresPassword := flag.String("dbPassword", "", "The password for your Synapse's PostgreSQL database. Can be omitted to be prompted when run") + postgresDatabase := flag.String("dbName", "synapse", "The name of your Synapse database") baseUrl := flag.String("baseUrl", "http://localhost:8008", "The base URL to access your homeserver with") serverName := flag.String("serverName", "localhost", "The name of your homeserver (eg: matrix.org)") - configPath := flag.String("config", "media-repo.yaml", "The path to the configuration") - migrationsPath := flag.String("migrations", "./migrations", "The absolute path the migrations folder") + configPath := flag.String("config", "media-repo.yaml", "The path to the media repo configuration (with the database section completed)") + migrationsPath := flag.String("migrations", "./migrations", "The absolute path the media repo's migrations folder") numWorkers := flag.Int("workers", 1, "The number of workers to use when downloading media. Using multiple workers risks deduplication not working as efficiently.") flag.Parse() @@ -63,60 +62,8 @@ func main() { panic(err) } - logrus.Info("Preparing database...") - mediaStore := storage.GetDatabase().GetMediaStore(rcontext.Initial()) - - logrus.Info("Initializing datastores...") - enabledDatastores := 0 - for _, ds := range config.Get().DataStores { - if !ds.Enabled { - continue - } - - enabledDatastores++ - uri := datastore.GetUriForDatastore(ds) - - _, err := storage.GetOrCreateDatastoreOfType(rcontext.Initial(), ds.Type, uri) - if err != nil { - logrus.Fatal(err) - } - } - - // Print all the known datastores at startup. Doubles as a way to initialize the database. - datastores, err := mediaStore.GetAllDatastores() - if err != nil { - logrus.Fatal(err) - } - logrus.Info("Datastores:") - for _, ds := range datastores { - logrus.Info(fmt.Sprintf("\t%s (%s): %s", ds.Type, ds.DatastoreId, ds.Uri)) - - if ds.Type == "s3" { - conf, err := datastore.GetDatastoreConfig(ds) - if err != nil { - continue - } - - s3, err := ds_s3.GetOrCreateS3Datastore(ds.DatastoreId, conf) - if err != nil { - continue - } - - err = s3.EnsureBucketExists() - if err != nil { - logrus.Warn("\t\tBucket does not exist!") - } - - err = s3.EnsureTempPathExists() - if err != nil { - logrus.Warn("\t\tTemporary path does not exist!") - } - } - } - - if len(config.Get().Uploads.StoragePaths) > 0 { - logrus.Warn("You are using `storagePaths` in your configuration - in a future update, this will be removed. Please use datastores instead (see sample config).") - } + logrus.Info("Starting up...") + runtime.RunStartupSequence() logrus.Info("Setting up for importing...") @@ -189,6 +136,7 @@ func fetchMedia(req interface{}) interface{} { logrus.Error(err.Error()) return nil } + defer body.Close() _, err = upload_controller.StoreDirect(body, -1, record.ContentType, record.UploadName, record.UserId, payload.serverName, record.MediaId, common.KindLocalMedia, ctx) if err != nil { @@ -196,7 +144,6 @@ func fetchMedia(req interface{}) interface{} { return nil } - body.Close() return nil } diff --git a/cmd/media_repo/inits.go b/cmd/media_repo/inits.go index fc763a56..aa03798e 100644 --- a/cmd/media_repo/inits.go +++ b/cmd/media_repo/inits.go @@ -4,13 +4,10 @@ import ( "fmt" "github.com/sirupsen/logrus" - "github.com/turt2live/matrix-media-repo/common" - "github.com/turt2live/matrix-media-repo/common/config" "github.com/turt2live/matrix-media-repo/common/rcontext" "github.com/turt2live/matrix-media-repo/controllers/maintenance_controller" "github.com/turt2live/matrix-media-repo/storage" "github.com/turt2live/matrix-media-repo/storage/datastore" - "github.com/turt2live/matrix-media-repo/storage/datastore/ds_s3" ) func scanAndStartUnfinishedTasks() error { @@ -63,80 +60,3 @@ func scanAndStartUnfinishedTasks() error { return nil } -func loadDatabase() { - logrus.Info("Preparing database...") - storage.GetDatabase() -} - -func loadDatastores() { - if len(config.Get().Uploads.StoragePaths) > 0 { - logrus.Warn("storagePaths usage is deprecated - please use datastores instead") - for _, p := range config.Get().Uploads.StoragePaths { - ctx := rcontext.Initial().LogWithFields(logrus.Fields{"path": p}) - ds, err := storage.GetOrCreateDatastoreOfType(ctx, "file", p) - if err != nil { - logrus.Fatal(err) - } - - fakeConfig := config.DatastoreConfig{ - Type: "file", - Enabled: true, - MediaKinds: common.AllKinds, - Options: map[string]string{"path": ds.Uri}, - } - config.Get().DataStores = append(config.Get().DataStores, fakeConfig) - } - } - - mediaStore := storage.GetDatabase().GetMediaStore(rcontext.Initial()) - - logrus.Info("Initializing datastores...") - for _, ds := range config.UniqueDatastores() { - if !ds.Enabled { - continue - } - - uri := datastore.GetUriForDatastore(ds) - - _, err := storage.GetOrCreateDatastoreOfType(rcontext.Initial(), ds.Type, uri) - if err != nil { - logrus.Fatal(err) - } - } - - // Print all the known datastores at startup. Doubles as a way to initialize the database. - datastores, err := mediaStore.GetAllDatastores() - if err != nil { - logrus.Fatal(err) - } - logrus.Info("Datastores:") - for _, ds := range datastores { - logrus.Info(fmt.Sprintf("\t%s (%s): %s", ds.Type, ds.DatastoreId, ds.Uri)) - - if ds.Type == "s3" { - conf, err := datastore.GetDatastoreConfig(ds) - if err != nil { - continue - } - - s3, err := ds_s3.GetOrCreateS3Datastore(ds.DatastoreId, conf) - if err != nil { - continue - } - - err = s3.EnsureBucketExists() - if err != nil { - logrus.Warn("\t\tBucket does not exist!") - } - - err = s3.EnsureTempPathExists() - if err != nil { - logrus.Warn("\t\tTemporary path does not exist!") - } - } - } - - if len(config.Get().Uploads.StoragePaths) > 0 { - logrus.Warn("You are using `storagePaths` in your configuration - in a future update, this will be removed. Please use datastores instead (see sample config).") - } -} diff --git a/cmd/media_repo/main.go b/cmd/media_repo/main.go index 2db4bc18..43207c35 100644 --- a/cmd/media_repo/main.go +++ b/cmd/media_repo/main.go @@ -2,7 +2,6 @@ package main import ( "flag" - "fmt" "os" "os/signal" @@ -10,23 +9,12 @@ import ( "github.com/turt2live/matrix-media-repo/api/webserver" "github.com/turt2live/matrix-media-repo/common/config" "github.com/turt2live/matrix-media-repo/common/logging" + "github.com/turt2live/matrix-media-repo/common/runtime" "github.com/turt2live/matrix-media-repo/common/version" "github.com/turt2live/matrix-media-repo/metrics" "github.com/turt2live/matrix-media-repo/tasks" ) -func printVersion(usingLogger bool) { - version.SetDefaults() - - if usingLogger { - logrus.Info("Version: " + version.Version) - logrus.Info("Commit: " + version.GitCommit) - } else { - fmt.Println("Version: " + version.Version) - fmt.Println("Commit: " + version.GitCommit) - } -} - func main() { configPath := flag.String("config", "media-repo.yaml", "The path to the configuration") migrationsPath := flag.String("migrations", "./migrations", "The absolute path for the migrations folder") @@ -35,7 +23,7 @@ func main() { flag.Parse() if *versionFlag { - printVersion(false) + version.Print(false) return // exit 0 } @@ -55,11 +43,7 @@ func main() { } logrus.Info("Starting up...") - printVersion(true) - - config.PrintDomainInfo() - loadDatabase() - loadDatastores() + runtime.RunStartupSequence() logrus.Info("Checking background tasks...") err = scanAndStartUnfinishedTasks() diff --git a/cmd/media_repo/reloads.go b/cmd/media_repo/reloads.go index 93679a8d..7879faa6 100644 --- a/cmd/media_repo/reloads.go +++ b/cmd/media_repo/reloads.go @@ -3,6 +3,7 @@ package main import ( "github.com/turt2live/matrix-media-repo/api/webserver" "github.com/turt2live/matrix-media-repo/common/globals" + "github.com/turt2live/matrix-media-repo/common/runtime" "github.com/turt2live/matrix-media-repo/metrics" "github.com/turt2live/matrix-media-repo/storage" "github.com/turt2live/matrix-media-repo/tasks" @@ -57,7 +58,7 @@ func reloadDatabaseOnChan(reloadChan chan bool) { shouldReload := <-reloadChan if shouldReload { storage.ReloadDatabase() - loadDatabase() + runtime.LoadDatabase() globals.DatastoresReloadChan <- true } else { return // received stop @@ -71,7 +72,7 @@ func reloadDatastoresOnChan(reloadChan chan bool) { for { shouldReload := <-reloadChan if shouldReload { - loadDatastores() + runtime.LoadDatastores() } else { return // received stop } diff --git a/common/runtime/init.go b/common/runtime/init.go new file mode 100644 index 00000000..622d7de5 --- /dev/null +++ b/common/runtime/init.go @@ -0,0 +1,99 @@ +package runtime + +import ( + "fmt" + + "github.com/sirupsen/logrus" + "github.com/turt2live/matrix-media-repo/common" + "github.com/turt2live/matrix-media-repo/common/config" + "github.com/turt2live/matrix-media-repo/common/rcontext" + "github.com/turt2live/matrix-media-repo/common/version" + "github.com/turt2live/matrix-media-repo/storage" + "github.com/turt2live/matrix-media-repo/storage/datastore" + "github.com/turt2live/matrix-media-repo/storage/datastore/ds_s3" +) + +func RunStartupSequence() { + version.Print(true) + config.PrintDomainInfo() + LoadDatabase() + LoadDatastores() +} + +func LoadDatabase() { + logrus.Info("Preparing database...") + storage.GetDatabase() +} + +func LoadDatastores() { + if len(config.Get().Uploads.StoragePaths) > 0 { + logrus.Warn("storagePaths usage is deprecated - please use datastores instead") + for _, p := range config.Get().Uploads.StoragePaths { + ctx := rcontext.Initial().LogWithFields(logrus.Fields{"path": p}) + ds, err := storage.GetOrCreateDatastoreOfType(ctx, "file", p) + if err != nil { + logrus.Fatal(err) + } + + fakeConfig := config.DatastoreConfig{ + Type: "file", + Enabled: true, + MediaKinds: common.AllKinds, + Options: map[string]string{"path": ds.Uri}, + } + config.Get().DataStores = append(config.Get().DataStores, fakeConfig) + } + } + + mediaStore := storage.GetDatabase().GetMediaStore(rcontext.Initial()) + + logrus.Info("Initializing datastores...") + for _, ds := range config.UniqueDatastores() { + if !ds.Enabled { + continue + } + + uri := datastore.GetUriForDatastore(ds) + + _, err := storage.GetOrCreateDatastoreOfType(rcontext.Initial(), ds.Type, uri) + if err != nil { + logrus.Fatal(err) + } + } + + // Print all the known datastores at startup. Doubles as a way to initialize the database. + datastores, err := mediaStore.GetAllDatastores() + if err != nil { + logrus.Fatal(err) + } + logrus.Info("Datastores:") + for _, ds := range datastores { + logrus.Info(fmt.Sprintf("\t%s (%s): %s", ds.Type, ds.DatastoreId, ds.Uri)) + + if ds.Type == "s3" { + conf, err := datastore.GetDatastoreConfig(ds) + if err != nil { + continue + } + + s3, err := ds_s3.GetOrCreateS3Datastore(ds.DatastoreId, conf) + if err != nil { + continue + } + + err = s3.EnsureBucketExists() + if err != nil { + logrus.Warn("\t\tBucket does not exist!") + } + + err = s3.EnsureTempPathExists() + if err != nil { + logrus.Warn("\t\tTemporary path does not exist!") + } + } + } + + if len(config.Get().Uploads.StoragePaths) > 0 { + logrus.Warn("You are using `storagePaths` in your configuration - in a future update, this will be removed. Please use datastores instead (see sample config).") + } +} diff --git a/common/version/version.go b/common/version/version.go index 7cdfbd1a..ed571d28 100644 --- a/common/version/version.go +++ b/common/version/version.go @@ -1,5 +1,11 @@ package version +import ( + "fmt" + + "github.com/sirupsen/logrus" +) + var GitCommit string var Version string @@ -11,3 +17,15 @@ func SetDefaults() { Version = "unknown" } } + +func Print(usingLogger bool) { + SetDefaults() + + if usingLogger { + logrus.Info("Version: " + Version) + logrus.Info("Commit: " + GitCommit) + } else { + fmt.Println("Version: " + Version) + fmt.Println("Commit: " + GitCommit) + } +}