diff --git a/.dockerignore b/.dockerignore index 1c6905b1bbb3..0b51345cbdb0 100644 --- a/.dockerignore +++ b/.dockerignore @@ -9,6 +9,7 @@ !pyproject.toml !poetry.lock !Cargo.lock +!Cargo.toml !build_rust.py rust/target diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000000..9c7db1fc866f --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,17 @@ +version: 2 +updates: + - # "pip" is the correct setting for poetry, per https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem + package-ecosystem: "pip" + directory: "/" + schedule: + interval: "weekly" + + - package-ecosystem: "docker" + directory: "/docker" + schedule: + interval: "weekly" + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/dependabot_changelog.yml b/.github/workflows/dependabot_changelog.yml new file mode 100644 index 000000000000..b6a29a572298 --- /dev/null +++ b/.github/workflows/dependabot_changelog.yml @@ -0,0 +1,46 @@ +name: Write changelog for dependabot PR +on: + pull_request: + types: + - opened + - reopened # For debugging! + +permissions: + # Needed to be able to push the commit. See + # https://docs.github.com/en/code-security/dependabot/working-with-dependabot/automating-dependabot-with-github-actions#enable-auto-merge-on-a-pull-request + # for a similar example + contents: write + +jobs: + add-changelog: + runs-on: 'ubuntu-latest' + if: ${{ github.actor == 'dependabot[bot]' }} + steps: + - uses: actions/checkout@v3 + with: + ref: ${{ github.event.pull_request.head.ref }} + - name: Write, commit and push changelog + run: | + echo "${{ github.event.pull_request.title }}." > "changelog.d/${{ github.event.pull_request.number }}".misc + git add changelog.d + git config user.email "github-actions[bot]@users.noreply.github.com" + git config user.name "GitHub Actions" + git commit -m "Changelog" + git push + shell: bash + # The `git push` above does not trigger CI on the dependabot PR. + # + # By default, workflows can't trigger other workflows when they're just using the + # default `GITHUB_TOKEN` access token. (This is intended to stop you from writing + # recursive workflow loops by accident, because that'll get very expensive very + # quickly.) Instead, you have to manually call out to another workflow, or else + # make your changes (i.e. the `git push` above) using a personal access token. + # See + # https://docs.github.com/en/actions/using-workflows/triggering-a-workflow#triggering-a-workflow-from-a-workflow + # + # I have tried and failed to find a way to trigger CI on the "merge ref" of the PR. + # See git commit history for previous attempts. If anyone desperately wants to try + # again in the future, make a matrix-bot account and use its access token to git push. + + # THIS WORKFLOW HAS WRITE PERMISSIONS---do not add other jobs here unless they + # are sufficiently locked down to dependabot only as above. diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index d20d30c0353c..49427ab50d0f 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -17,19 +17,19 @@ jobs: steps: - name: Set up QEMU id: qemu - uses: docker/setup-qemu-action@v1 + uses: docker/setup-qemu-action@v2 with: platforms: arm64 - name: Set up Docker Buildx id: buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v2 - name: Inspect builder run: docker buildx inspect - + - name: Log in to DockerHub - uses: docker/login-action@v1 + uses: docker/login-action@v2 with: username: ${{ secrets.DOCKERHUB_USERNAME }} password: ${{ secrets.DOCKERHUB_TOKEN }} @@ -48,10 +48,15 @@ jobs: type=pep440,pattern={{raw}} - name: Build and push all platforms - uses: docker/build-push-action@v2 + uses: docker/build-push-action@v3 with: push: true labels: "gitsha1=${{ github.sha }}" tags: "${{ steps.set-tag.outputs.tags }}" file: "docker/Dockerfile" platforms: linux/amd64,linux/arm64 + + # arm64 builds OOM without the git fetch setting. c.f. + # https://github.com/rust-lang/cargo/issues/10583 + build-args: | + CARGO_NET_GIT_FETCH_WITH_CLI=true diff --git a/.github/workflows/docs.yaml b/.github/workflows/docs.yaml index b366eb866705..17b9df601c59 100644 --- a/.github/workflows/docs.yaml +++ b/.github/workflows/docs.yaml @@ -17,7 +17,7 @@ jobs: name: GitHub Pages runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Setup mdbook uses: peaceiris/actions-mdbook@4b5ef36b314c2599664ca107bb8c02412548d79d # v1.1.14 diff --git a/.github/workflows/latest_deps.yml b/.github/workflows/latest_deps.yml index 8366ac9393e3..e240bf4e4f45 100644 --- a/.github/workflows/latest_deps.yml +++ b/.github/workflows/latest_deps.yml @@ -25,7 +25,7 @@ jobs: mypy: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install Rust uses: actions-rs/toolchain@v1 with: @@ -59,7 +59,7 @@ jobs: postgres-version: "14" steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install Rust uses: actions-rs/toolchain@v1 @@ -133,7 +133,7 @@ jobs: BLACKLIST: ${{ matrix.workers && 'synapse-blacklist-with-workers' }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install Rust uses: actions-rs/toolchain@v1 @@ -155,7 +155,7 @@ jobs: if: ${{ always() }} run: /sytest/scripts/tap_to_gha.pl /logs/results.tap - name: Upload SyTest logs - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: ${{ always() }} with: name: Sytest Logs - ${{ job.status }} - (${{ join(matrix.*, ', ') }}) @@ -182,8 +182,8 @@ jobs: database: Postgres steps: - - name: Run actions/checkout@v2 for synapse - uses: actions/checkout@v2 + - name: Run actions/checkout@v3 for synapse + uses: actions/checkout@v3 with: path: synapse @@ -201,15 +201,16 @@ jobs: open-issue: if: "failure() && github.event_name != 'push' && github.event_name != 'pull_request'" needs: - # TODO: should mypy be included here? It feels more brittle than the other two. + # TODO: should mypy be included here? It feels more brittle than the others. - mypy - trial - sytest + - complement runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: JasonEtco/create-an-issue@5d9504915f79f9cc6d791934b8ef34f2353dd74d # v2.5.0, 2020-12-06 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/release-artifacts.yml b/.github/workflows/release-artifacts.yml index 0708d631cd36..eb12d88fbc00 100644 --- a/.github/workflows/release-artifacts.yml +++ b/.github/workflows/release-artifacts.yml @@ -11,6 +11,7 @@ on: # we do the full build on tags. tags: ["v*"] + workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -24,7 +25,7 @@ jobs: name: "Calculate list of debian distros" runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/setup-python@v2 - id: set-distros run: | @@ -49,18 +50,18 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: path: src - name: Set up Docker Buildx id: buildx - uses: docker/setup-buildx-action@v1 + uses: docker/setup-buildx-action@v2 with: install: true - name: Set up docker layer caching - uses: actions/cache@v2 + uses: actions/cache@v3 with: path: /tmp/.buildx-cache key: ${{ runner.os }}-buildx-${{ github.sha }} @@ -84,7 +85,7 @@ jobs: mv /tmp/.buildx-cache-new /tmp/.buildx-cache - name: Upload debs as artifacts - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: debs path: debs/* @@ -145,7 +146,7 @@ jobs: - name: Build sdist run: python -m build --sdist - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: name: Sdist path: dist/*.tar.gz @@ -162,7 +163,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Download all workflow run artifacts - uses: actions/download-artifact@v2 + uses: actions/download-artifact@v3 - name: Build a tarball for the debs run: tar -cvJf debs.tar.xz debs - name: Attach to release diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9fe61930a55a..94eb58b59dea 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -4,6 +4,7 @@ on: push: branches: ["develop", "release-*"] pull_request: + workflow_dispatch: concurrency: group: ${{ github.workflow }}-${{ github.ref }} @@ -30,7 +31,7 @@ jobs: check-sampleconfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/setup-python@v2 - uses: matrix-org/setup-python-poetry@v1 with: @@ -41,7 +42,7 @@ jobs: check-schema-delta: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/setup-python@v2 - run: "pip install 'click==8.1.1' 'GitPython>=3.1.20'" - run: scripts-dev/check_schema_delta.py --force-colors @@ -54,15 +55,15 @@ jobs: lint-crlf: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Check line endings run: scripts-dev/check_line_terminators.sh lint-newsfile: - if: ${{ github.base_ref == 'develop' || contains(github.base_ref, 'release-') }} + if: ${{ (github.base_ref == 'develop' || contains(github.base_ref, 'release-')) && github.actor != 'dependabot[bot]' }} runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: ref: ${{ github.event.pull_request.head.sha }} fetch-depth: 0 @@ -75,7 +76,7 @@ jobs: lint-pydantic: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: ref: ${{ github.event.pull_request.head.sha }} - uses: matrix-org/setup-python-poetry@v1 @@ -89,7 +90,7 @@ jobs: if: ${{ needs.changes.outputs.rust == 'true' }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install Rust uses: actions-rs/toolchain@v1 @@ -107,7 +108,7 @@ jobs: if: ${{ needs.changes.outputs.rust == 'true' }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install Rust uses: actions-rs/toolchain@v1 @@ -140,7 +141,7 @@ jobs: needs: linting-done runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: actions/setup-python@v2 - id: get-matrix run: .ci/scripts/calculate_jobs.py @@ -157,7 +158,7 @@ jobs: job: ${{ fromJson(needs.calculate-test-jobs.outputs.trial_test_matrix) }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - run: sudo apt-get -qq install xmlsec1 - name: Set up PostgreSQL ${{ matrix.job.postgres-version }} if: ${{ matrix.job.postgres-version }} @@ -199,7 +200,7 @@ jobs: needs: linting-done runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install Rust uses: actions-rs/toolchain@v1 @@ -270,7 +271,7 @@ jobs: extras: ["all"] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 # Install libs necessary for PyPy to build binary wheels for dependencies - run: sudo apt-get -qq install xmlsec1 libxml2-dev libxslt-dev - uses: matrix-org/setup-python-poetry@v1 @@ -313,7 +314,7 @@ jobs: job: ${{ fromJson(needs.calculate-test-jobs.outputs.sytest_test_matrix) }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Prepare test blacklist run: cat sytest-blacklist .ci/worker-blacklist > synapse-blacklist-with-workers @@ -331,7 +332,7 @@ jobs: if: ${{ always() }} run: /sytest/scripts/tap_to_gha.pl /logs/results.tap - name: Upload SyTest logs - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: ${{ always() }} with: name: Sytest Logs - ${{ job.status }} - (${{ join(matrix.job.*, ', ') }}) @@ -361,7 +362,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - run: sudo apt-get -qq install xmlsec1 postgresql-client - uses: matrix-org/setup-python-poetry@v1 with: @@ -402,7 +403,7 @@ jobs: --health-retries 5 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - run: sudo apt-get -qq install xmlsec1 postgresql-client - uses: matrix-org/setup-python-poetry@v1 with: @@ -444,8 +445,8 @@ jobs: database: Postgres steps: - - name: Run actions/checkout@v2 for synapse - uses: actions/checkout@v2 + - name: Run actions/checkout@v3 for synapse + uses: actions/checkout@v3 with: path: synapse @@ -473,7 +474,7 @@ jobs: - changes steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install Rust uses: actions-rs/toolchain@v1 diff --git a/.github/workflows/twisted_trunk.yml b/.github/workflows/twisted_trunk.yml index 8fa2fbdea0e8..b4e26118c075 100644 --- a/.github/workflows/twisted_trunk.yml +++ b/.github/workflows/twisted_trunk.yml @@ -15,7 +15,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install Rust uses: actions-rs/toolchain@v1 @@ -40,7 +40,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - run: sudo apt-get -qq install xmlsec1 - name: Install Rust @@ -81,7 +81,7 @@ jobs: - ${{ github.workspace }}:/src steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Install Rust uses: actions-rs/toolchain@v1 @@ -112,7 +112,7 @@ jobs: if: ${{ always() }} run: /sytest/scripts/tap_to_gha.pl /logs/results.tap - name: Upload SyTest logs - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 if: ${{ always() }} with: name: Sytest Logs - ${{ job.status }} - (${{ join(matrix.*, ', ') }}) @@ -138,8 +138,8 @@ jobs: database: Postgres steps: - - name: Run actions/checkout@v2 for synapse - uses: actions/checkout@v2 + - name: Run actions/checkout@v3 for synapse + uses: actions/checkout@v3 with: path: synapse @@ -177,7 +177,7 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - uses: JasonEtco/create-an-issue@5d9504915f79f9cc6d791934b8ef34f2353dd74d # v2.5.0, 2020-12-06 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 000000000000..bf96e7743de0 --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1 @@ +group_imports = "StdExternalCrate" diff --git a/CHANGES.md b/CHANGES.md index fbb57f0e04e1..c373b2a8468e 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,162 @@ +Synapse 1.69.0 (2022-10-17) +=========================== + +Please note that legacy Prometheus metric names are now deprecated and will be removed in Synapse 1.73.0. +Server administrators should update their dashboards and alerting rules to avoid using the deprecated metric names. +See the [upgrade notes](https://matrix-org.github.io/synapse/v1.69/upgrade.html#upgrading-to-v1690) for more details. + + +No significant changes since 1.69.0rc4. + + +Synapse 1.69.0rc4 (2022-10-14) +============================== + +Bugfixes +-------- + +- Fix poor performance of the `event_push_backfill_thread_id` background update, which was introduced in Synapse 1.68.0rc1. ([\#14172](https://github.com/matrix-org/synapse/issues/14172), [\#14181](https://github.com/matrix-org/synapse/issues/14181)) + + +Updates to the Docker image +--------------------------- + +- Fix docker build OOMing in CI for arm64 builds. ([\#14173](https://github.com/matrix-org/synapse/issues/14173)) + + +Synapse 1.69.0rc3 (2022-10-12) +============================== + +Bugfixes +-------- + +- Fix an issue with Docker images causing the Rust dependencies to not be pinned correctly. Introduced in v1.68.0 ([\#14129](https://github.com/matrix-org/synapse/issues/14129)) +- Fix a bug introduced in Synapse 1.69.0rc1 which would cause registration replication requests to fail if the worker sending the request is not running Synapse 1.69. ([\#14135](https://github.com/matrix-org/synapse/issues/14135)) +- Fix error in background update when rotating existing notifications. Introduced in v1.69.0rc2. ([\#14138](https://github.com/matrix-org/synapse/issues/14138)) + + +Internal Changes +---------------- + +- Rename the `url_preview` extra to `url-preview`, for compatability with poetry-core 1.3.0 and [PEP 685](https://peps.python.org/pep-0685/). From-source installations using this extra will need to install using the new name. ([\#14085](https://github.com/matrix-org/synapse/issues/14085)) + + +Synapse 1.69.0rc2 (2022-10-06) +============================== + +Deprecations and Removals +------------------------- + +- Deprecate the `generate_short_term_login_token` method in favor of an async `create_login_token` method in the Module API. ([\#13842](https://github.com/matrix-org/synapse/issues/13842)) + + +Internal Changes +---------------- + +- Ensure Synapse v1.69 works with upcoming database changes in v1.70. ([\#14045](https://github.com/matrix-org/synapse/issues/14045)) +- Fix a bug introduced in Synapse v1.68.0 where messages could not be sent in rooms with non-integer `notifications` power level. ([\#14073](https://github.com/matrix-org/synapse/issues/14073)) +- Temporarily pin build-system requirements to workaround an incompatibility with poetry-core 1.3.0. This will be reverted before the v1.69.0 release proper, see [\#14079](https://github.com/matrix-org/synapse/issues/14079). ([\#14080](https://github.com/matrix-org/synapse/issues/14080)) + + +Synapse 1.69.0rc1 (2022-10-04) +============================== + +Features +-------- + +- Allow application services to set the `origin_server_ts` of a state event by providing the query parameter `ts` in [`PUT /_matrix/client/r0/rooms/{roomId}/state/{eventType}/{stateKey}`](https://spec.matrix.org/v1.4/client-server-api/#put_matrixclientv3roomsroomidstateeventtypestatekey), per [MSC3316](https://github.com/matrix-org/matrix-doc/pull/3316). Contributed by @lukasdenk. ([\#11866](https://github.com/matrix-org/synapse/issues/11866)) +- Allow server admins to require a manual approval process before new accounts can be used (using [MSC3866](https://github.com/matrix-org/matrix-spec-proposals/pull/3866)). ([\#13556](https://github.com/matrix-org/synapse/issues/13556)) +- Exponentially backoff from backfilling the same event over and over. ([\#13635](https://github.com/matrix-org/synapse/issues/13635), [\#13936](https://github.com/matrix-org/synapse/issues/13936)) +- Add cache invalidation across workers to module API. ([\#13667](https://github.com/matrix-org/synapse/issues/13667), [\#13947](https://github.com/matrix-org/synapse/issues/13947)) +- Experimental implementation of [MSC3882](https://github.com/matrix-org/matrix-spec-proposals/pull/3882) to allow an existing device/session to generate a login token for use on a new device/session. ([\#13722](https://github.com/matrix-org/synapse/issues/13722), [\#13868](https://github.com/matrix-org/synapse/issues/13868)) +- Experimental support for thread-specific receipts ([MSC3771](https://github.com/matrix-org/matrix-spec-proposals/pull/3771)). ([\#13782](https://github.com/matrix-org/synapse/issues/13782), [\#13893](https://github.com/matrix-org/synapse/issues/13893), [\#13932](https://github.com/matrix-org/synapse/issues/13932), [\#13937](https://github.com/matrix-org/synapse/issues/13937), [\#13939](https://github.com/matrix-org/synapse/issues/13939)) +- Add experimental support for [MSC3881: Remotely toggle push notifications for another client](https://github.com/matrix-org/matrix-spec-proposals/pull/3881). ([\#13799](https://github.com/matrix-org/synapse/issues/13799), [\#13831](https://github.com/matrix-org/synapse/issues/13831), [\#13860](https://github.com/matrix-org/synapse/issues/13860)) +- Keep track when an event pulled over federation fails its signature check so we can intelligently back-off in the future. ([\#13815](https://github.com/matrix-org/synapse/issues/13815)) +- Improve validation for the unspecced, internal-only `_matrix/client/unstable/add_threepid/msisdn/submit_token` endpoint. ([\#13832](https://github.com/matrix-org/synapse/issues/13832)) +- Faster remote room joins: record _when_ we first partial-join to a room. ([\#13892](https://github.com/matrix-org/synapse/issues/13892)) +- Support a `dir` parameter on the `/relations` endpoint per [MSC3715](https://github.com/matrix-org/matrix-doc/pull/3715). ([\#13920](https://github.com/matrix-org/synapse/issues/13920)) +- Ask mail servers receiving emails from Synapse to not send automatic replies (e.g. out-of-office responses). ([\#13957](https://github.com/matrix-org/synapse/issues/13957)) + + +Bugfixes +-------- + +- Send push notifications for invites received over federation. ([\#13719](https://github.com/matrix-org/synapse/issues/13719), [\#14014](https://github.com/matrix-org/synapse/issues/14014)) +- Fix a long-standing bug where typing events would be accepted from remote servers not present in a room. Also fix a bug where incoming typing events would cause other incoming events to get stuck during a fast join. ([\#13830](https://github.com/matrix-org/synapse/issues/13830)) +- Fix a bug introduced in Synapse v1.53.0 where the experimental implementation of [MSC3715](https://github.com/matrix-org/matrix-spec-proposals/pull/3715) would give incorrect results when paginating forward. ([\#13840](https://github.com/matrix-org/synapse/issues/13840)) +- Fix access token leak to logs from proxy agent. ([\#13855](https://github.com/matrix-org/synapse/issues/13855)) +- Fix `have_seen_event` cache not being invalidated after we persist an event which causes inefficiency effects like extra `/state` federation calls. ([\#13863](https://github.com/matrix-org/synapse/issues/13863)) +- Faster room joins: Fix a bug introduced in 1.66.0 where an error would be logged when syncing after joining a room. ([\#13872](https://github.com/matrix-org/synapse/issues/13872)) +- Fix a bug introduced in 1.66.0 where some required fields in the pushrules sent to clients were not present anymore. Contributed by Nico. ([\#13904](https://github.com/matrix-org/synapse/issues/13904)) +- Fix packaging to include `Cargo.lock` in `sdist`. ([\#13909](https://github.com/matrix-org/synapse/issues/13909)) +- Fix a long-standing bug where device updates could cause delays sending out to-device messages over federation. ([\#13922](https://github.com/matrix-org/synapse/issues/13922)) +- Fix a bug introduced in v1.68.0 where Synapse would require `setuptools_rust` at runtime, even though the package is only required at build time. ([\#13952](https://github.com/matrix-org/synapse/issues/13952)) +- Fix a long-standing bug where `POST /_matrix/client/v3/keys/query` requests could result in excessively large SQL queries. ([\#13956](https://github.com/matrix-org/synapse/issues/13956)) +- Fix a performance regression in the `get_users_in_room` database query. Introduced in v1.67.0. ([\#13972](https://github.com/matrix-org/synapse/issues/13972)) +- Fix a bug introduced in v1.68.0 bug where Rust extension wasn't built in `release` mode when using `poetry install`. ([\#14009](https://github.com/matrix-org/synapse/issues/14009)) +- Do not return an unspecified `original_event` field when using the stable `/relations` endpoint. Introduced in Synapse v1.57.0. ([\#14025](https://github.com/matrix-org/synapse/issues/14025)) +- Correctly handle a race with device lists when a remote user leaves during a partial join. ([\#13885](https://github.com/matrix-org/synapse/issues/13885)) +- Correctly handle sending local device list updates to remote servers during a partial join. ([\#13934](https://github.com/matrix-org/synapse/issues/13934)) + + +Improved Documentation +---------------------- + +- Add `worker_main_http_uri` for the worker generator bash script. ([\#13772](https://github.com/matrix-org/synapse/issues/13772)) +- Update URL for the NixOS module for Synapse. ([\#13818](https://github.com/matrix-org/synapse/issues/13818)) +- Fix a mistake in sso_mapping_providers.md: `map_user_attributes` is expected to return `display_name`, not `displayname`. ([\#13836](https://github.com/matrix-org/synapse/issues/13836)) +- Fix a cross-link from the registration admin API to the `registration_shared_secret` configuration documentation. ([\#13870](https://github.com/matrix-org/synapse/issues/13870)) +- Update the man page for the `hash_password` script to correct the default number of bcrypt rounds performed. ([\#13911](https://github.com/matrix-org/synapse/issues/13911), [\#13930](https://github.com/matrix-org/synapse/issues/13930)) +- Emphasize the right reasons when to use `(room_id, event_id)` in a database schema. ([\#13915](https://github.com/matrix-org/synapse/issues/13915)) +- Add instruction to contributing guide for running unit tests in parallel. Contributed by @ashfame. ([\#13928](https://github.com/matrix-org/synapse/issues/13928)) +- Clarify that the `auto_join_rooms` config option can also be used with Space aliases. ([\#13931](https://github.com/matrix-org/synapse/issues/13931)) +- Add some cross references to worker documentation. ([\#13974](https://github.com/matrix-org/synapse/issues/13974)) +- Linkify urls in config documentation. ([\#14003](https://github.com/matrix-org/synapse/issues/14003)) + + +Deprecations and Removals +------------------------- + +- Remove the `complete_sso_login` method from the Module API which was deprecated in Synapse 1.13.0. ([\#13843](https://github.com/matrix-org/synapse/issues/13843)) +- Announce that legacy metric names are deprecated, will be turned off by default in Synapse v1.71.0 and removed altogether in Synapse v1.73.0. See the upgrade notes for more information. ([\#14024](https://github.com/matrix-org/synapse/issues/14024)) + + +Internal Changes +---------------- + +- Speed up creation of DM rooms. ([\#13487](https://github.com/matrix-org/synapse/issues/13487), [\#13800](https://github.com/matrix-org/synapse/issues/13800)) +- Port push rules to using Rust. ([\#13768](https://github.com/matrix-org/synapse/issues/13768), [\#13838](https://github.com/matrix-org/synapse/issues/13838), [\#13889](https://github.com/matrix-org/synapse/issues/13889)) +- Optimise get rooms for user calls. Contributed by Nick @ Beeper (@fizzadar). ([\#13787](https://github.com/matrix-org/synapse/issues/13787)) +- Update the script which makes full schema dumps. ([\#13792](https://github.com/matrix-org/synapse/issues/13792)) +- Use shared methods for cache invalidation when persisting events, remove duplicate codepaths. Contributed by Nick @ Beeper (@fizzadar). ([\#13796](https://github.com/matrix-org/synapse/issues/13796)) +- Improve the `synapse.api.auth.Auth` mock used in unit tests. ([\#13809](https://github.com/matrix-org/synapse/issues/13809)) +- Faster Remote Room Joins: tell remote homeservers that we are unable to authorise them if they query a room which has partial state on our server. ([\#13823](https://github.com/matrix-org/synapse/issues/13823)) +- Carry IdP Session IDs through user-mapping sessions. ([\#13839](https://github.com/matrix-org/synapse/issues/13839)) +- Fix the release script not publishing binary wheels. ([\#13850](https://github.com/matrix-org/synapse/issues/13850)) +- Raise issue if complement fails with latest deps. ([\#13859](https://github.com/matrix-org/synapse/issues/13859)) +- Correct the comments in the complement dockerfile. ([\#13867](https://github.com/matrix-org/synapse/issues/13867)) +- Create a new snapshot of the database schema. ([\#13873](https://github.com/matrix-org/synapse/issues/13873)) +- Faster room joins: Send device list updates to most servers in rooms with partial state. ([\#13874](https://github.com/matrix-org/synapse/issues/13874), [\#14013](https://github.com/matrix-org/synapse/issues/14013)) +- Add comments to the Prometheus recording rules to make it clear which set of rules you need for Grafana or Prometheus Console. ([\#13876](https://github.com/matrix-org/synapse/issues/13876)) +- Only pull relevant backfill points from the database based on the current depth and limit (instead of all) every time we want to `/backfill`. ([\#13879](https://github.com/matrix-org/synapse/issues/13879)) +- Faster room joins: Avoid waiting for full state when processing `/keys/changes` requests. ([\#13888](https://github.com/matrix-org/synapse/issues/13888)) +- Improve backfill robustness by trying more servers when we get a `4xx` error back. ([\#13890](https://github.com/matrix-org/synapse/issues/13890)) +- Fix mypy errors with canonicaljson 1.6.3. ([\#13905](https://github.com/matrix-org/synapse/issues/13905)) +- Faster remote room joins: correctly handle remote device list updates during a partial join. ([\#13913](https://github.com/matrix-org/synapse/issues/13913)) +- Complement image: propagate SIGTERM to all workers. ([\#13914](https://github.com/matrix-org/synapse/issues/13914)) +- Update an innaccurate comment in Synapse's upsert database helper. ([\#13924](https://github.com/matrix-org/synapse/issues/13924)) +- Update mypy (0.950 -> 0.981) and mypy-zope (0.3.7 -> 0.3.11). ([\#13925](https://github.com/matrix-org/synapse/issues/13925), [\#13993](https://github.com/matrix-org/synapse/issues/13993)) +- Use dedicated `get_local_users_in_room(room_id)` function to find local users when calculating users to copy over during a room upgrade. ([\#13960](https://github.com/matrix-org/synapse/issues/13960)) +- Refactor language in user directory `_track_user_joined_room` code to make it more clear that we use both local and remote users. ([\#13966](https://github.com/matrix-org/synapse/issues/13966)) +- Revert catch-all exceptions being recorded as event pull attempt failures (only handle what we know about). ([\#13969](https://github.com/matrix-org/synapse/issues/13969)) +- Speed up calculating push actions in large rooms. ([\#13973](https://github.com/matrix-org/synapse/issues/13973), [\#13992](https://github.com/matrix-org/synapse/issues/13992)) +- Enable update notifications from Github's dependabot. ([\#13976](https://github.com/matrix-org/synapse/issues/13976)) +- Prototype a workflow to automatically add changelogs to dependabot PRs. ([\#13998](https://github.com/matrix-org/synapse/issues/13998), [\#14011](https://github.com/matrix-org/synapse/issues/14011), [\#14017](https://github.com/matrix-org/synapse/issues/14017), [\#14021](https://github.com/matrix-org/synapse/issues/14021), [\#14027](https://github.com/matrix-org/synapse/issues/14027)) +- Fix type annotations to be compatible with new annotations in development versions of twisted. ([\#14012](https://github.com/matrix-org/synapse/issues/14012)) +- Clear out stale entries in `event_push_actions_staging` table. ([\#14020](https://github.com/matrix-org/synapse/issues/14020)) +- Bump versions of GitHub actions. ([\#13978](https://github.com/matrix-org/synapse/issues/13978), [\#13979](https://github.com/matrix-org/synapse/issues/13979), [\#13980](https://github.com/matrix-org/synapse/issues/13980), [\#13982](https://github.com/matrix-org/synapse/issues/13982), [\#14015](https://github.com/matrix-org/synapse/issues/14015), [\#14019](https://github.com/matrix-org/synapse/issues/14019), [\#14022](https://github.com/matrix-org/synapse/issues/14022), [\#14023](https://github.com/matrix-org/synapse/issues/14023)) + + Synapse 1.68.0 (2022-09-27) =========================== diff --git a/Cargo.lock b/Cargo.lock index 3ae36161b3cc..b952b6b4c01d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,27 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "aho-corasick" +version = "0.7.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4f55bd91a0978cbfd91c457a164bab8b4001c833b7f323132c0a4e1922dd44e" +dependencies = [ + "memchr", +] + +[[package]] +name = "anyhow" +version = "1.0.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98161a4e3e2184da77bb14f02184cdd111e83bbbcc9979dfee3c44b9a85f5602" + +[[package]] +name = "arc-swap" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "983cd8b9d4b02a6dc6ffa557262eb5858a27a0038ffffe21a0f133eaa819a164" + [[package]] name = "autocfg" version = "1.1.0" @@ -81,6 +102,18 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "adab1eaa3408fb7f0c777a73e7465fd5656136fc93b670eb6df3c88c2c1344e3" +[[package]] +name = "itoa" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8af84674fe1f223a982c933a0ee1086ac4d4052aa0fb8060c12c6ad838e754" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + [[package]] name = "libc" version = "0.2.132" @@ -97,6 +130,30 @@ dependencies = [ "scopeguard", ] +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + [[package]] name = "once_cell" version = "1.13.1" @@ -137,13 +194,15 @@ dependencies = [ [[package]] name = "pyo3" -version = "0.16.6" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0220c44442c9b239dd4357aa856ac468a4f5e1f0df19ddb89b2522952eb4c6ca" +checksum = "12f72538a0230791398a0986a6518ebd88abc3fded89007b506ed072acc831e1" dependencies = [ + "anyhow", "cfg-if", "indoc", "libc", + "memoffset", "parking_lot", "pyo3-build-config", "pyo3-ffi", @@ -153,9 +212,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.16.6" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c819d397859445928609d0ec5afc2da5204e0d0f73d6bf9e153b04e83c9cdc2" +checksum = "fc4cf18c20f4f09995f3554e6bcf9b09bd5e4d6b67c562fdfaafa644526ba479" dependencies = [ "once_cell", "target-lexicon", @@ -163,19 +222,30 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.16.6" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca882703ab55f54702d7bfe1189b41b0af10272389f04cae38fe4cd56c65f75f" +checksum = "a41877f28d8ebd600b6aa21a17b40c3b0fc4dfe73a27b6e81ab3d895e401b0e9" dependencies = [ "libc", "pyo3-build-config", ] +[[package]] +name = "pyo3-log" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5695ccff5060c13ca1751cf8c857a12da9b0bf0378cb071c5e0326f7c7e4c1b" +dependencies = [ + "arc-swap", + "log", + "pyo3", +] + [[package]] name = "pyo3-macros" -version = "0.16.6" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "568749402955ad7be7bad9a09b8593851cd36e549ac90bfd44079cea500f3f21" +checksum = "2e81c8d4bcc2f216dc1b665412df35e46d12ee8d3d046b381aad05f1fcf30547" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -185,15 +255,25 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.16.6" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "611f64e82d98f447787e82b8e7b0ebc681e1eb78fc1252668b2c605ffb4e1eb8" +checksum = "85752a767ee19399a78272cc2ab625cd7d373b2e112b4b13db28de71fa892784" dependencies = [ "proc-macro2", "quote", "syn", ] +[[package]] +name = "pythonize" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f7f0c136f5fbc01868185eef462800e49659eb23acca83b9e884367a006acb6" +dependencies = [ + "pyo3", + "serde", +] + [[package]] name = "quote" version = "1.0.21" @@ -212,12 +292,66 @@ dependencies = [ "bitflags", ] +[[package]] +name = "regex" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244" + +[[package]] +name = "ryu" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" + [[package]] name = "scopeguard" version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "serde" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728eb6351430bccb993660dfffc5a72f91ccc1295abaa8ce19b27ebe4f75568b" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81fa1584d3d1bcacd84c277a0dfe21f5b0f6accf4a23d04d4c6d61f1af522b4c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.85" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e55a28e3aaef9d5ce0506d0a14dbba8054ddc7e499ef522dd8b26859ec9d4a44" +dependencies = [ + "itoa", + "ryu", + "serde", +] + [[package]] name = "smallvec" version = "1.9.0" @@ -245,9 +379,17 @@ dependencies = [ name = "synapse" version = "0.1.0" dependencies = [ + "anyhow", "blake2", "hex", + "lazy_static", + "log", "pyo3", + "pyo3-log", + "pythonize", + "regex", + "serde", + "serde_json", ] [[package]] diff --git a/build_rust.py b/build_rust.py index 5c5e557ee847..662474dcb47c 100644 --- a/build_rust.py +++ b/build_rust.py @@ -15,6 +15,9 @@ def build(setup_kwargs: Dict[str, Any]) -> None: path=cargo_toml_path, binding=Binding.PyO3, py_limited_api=True, + # We force always building in release mode, as we can't tell the + # difference between using `poetry` in development vs production. + debug=False, ) setup_kwargs.setdefault("rust_extensions", []).append(extension) setup_kwargs["zip_safe"] = False diff --git a/contrib/prometheus/synapse-v2.rules b/contrib/prometheus/synapse-v2.rules index cbe6f7bebaa4..dde311322f52 100644 --- a/contrib/prometheus/synapse-v2.rules +++ b/contrib/prometheus/synapse-v2.rules @@ -1,7 +1,12 @@ groups: - name: synapse rules: - # These 3 rules are used in the included Prometheus console + + ### + ### Prometheus Console Only + ### The following rules are only needed if you use the Prometheus Console + ### in contrib/prometheus/consoles/synapse.html + ### - record: 'synapse_federation_client_sent' labels: type: "EDU" @@ -15,7 +20,6 @@ groups: type: "Query" expr: 'sum(synapse_federation_client_sent_queries) by (job)' - # These 3 rules are used in the included Prometheus console - record: 'synapse_federation_server_received' labels: type: "EDU" @@ -29,7 +33,6 @@ groups: type: "Query" expr: 'sum(synapse_federation_server_received_queries) by (job)' - # These 2 rules are used in the included Prometheus console - record: 'synapse_federation_transaction_queue_pending' labels: type: "EDU" @@ -38,8 +41,16 @@ groups: labels: type: "PDU" expr: 'synapse_federation_transaction_queue_pending_pdus + 0' + ### + ### End of 'Prometheus Console Only' rules block + ### + - # These 3 rules are used in the included Grafana dashboard + ### + ### Grafana Only + ### The following rules are only needed if you use the Grafana dashboard + ### in contrib/grafana/synapse.json + ### - record: synapse_storage_events_persisted_by_source_type expr: sum without(type, origin_type, origin_entity) (synapse_storage_events_persisted_events_sep_total{origin_type="remote"}) labels: @@ -53,11 +64,11 @@ groups: labels: type: bridges - # This rule is used in the included Grafana dashboard - record: synapse_storage_events_persisted_by_event_type expr: sum without(origin_entity, origin_type) (synapse_storage_events_persisted_events_sep_total) - # This rule is used in the included Grafana dashboard - record: synapse_storage_events_persisted_by_origin expr: sum without(type) (synapse_storage_events_persisted_events_sep_total) - + ### + ### End of 'Grafana Only' rules block + ### diff --git a/contrib/workers-bash-scripts/create-multiple-generic-workers.md b/contrib/workers-bash-scripts/create-multiple-generic-workers.md index d30310142946..c9be707b3c6e 100644 --- a/contrib/workers-bash-scripts/create-multiple-generic-workers.md +++ b/contrib/workers-bash-scripts/create-multiple-generic-workers.md @@ -7,7 +7,7 @@ You can alternatively create multiple worker configuration files with a simple ` #!/bin/bash for i in {1..5} do -cat << EOF >> generic_worker$i.yaml +cat << EOF > generic_worker$i.yaml worker_app: synapse.app.generic_worker worker_name: generic_worker$i @@ -15,6 +15,8 @@ worker_name: generic_worker$i worker_replication_host: 127.0.0.1 worker_replication_http_port: 9093 +worker_main_http_uri: http://localhost:8008/ + worker_listeners: - type: http port: 808$i diff --git a/debian/changelog b/debian/changelog index 1c1545469fb0..f1de7b15d597 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,35 @@ +matrix-synapse-py3 (1.69.0) stable; urgency=medium + + * New Synapse release 1.69.0. + + -- Synapse Packaging team Mon, 17 Oct 2022 11:31:03 +0100 + +matrix-synapse-py3 (1.69.0~rc4) stable; urgency=medium + + * New Synapse release 1.69.0rc4. + + -- Synapse Packaging team Fri, 14 Oct 2022 15:04:47 +0100 + +matrix-synapse-py3 (1.69.0~rc3) stable; urgency=medium + + * New Synapse release 1.69.0rc3. + + -- Synapse Packaging team Wed, 12 Oct 2022 13:24:04 +0100 + +matrix-synapse-py3 (1.69.0~rc2) stable; urgency=medium + + * New Synapse release 1.69.0rc2. + + -- Synapse Packaging team Thu, 06 Oct 2022 14:45:00 +0100 + +matrix-synapse-py3 (1.69.0~rc1) stable; urgency=medium + + * The man page for the hash_password script has been updated to reflect + the correct default value of 'bcrypt_rounds'. + * New Synapse release 1.69.0rc1. + + -- Synapse Packaging team Tue, 04 Oct 2022 11:17:16 +0100 + matrix-synapse-py3 (1.68.0) stable; urgency=medium * New Synapse release 1.68.0. diff --git a/debian/hash_password.1 b/debian/hash_password.1 index d64b91e7c828..39fa3ffcbfef 100644 --- a/debian/hash_password.1 +++ b/debian/hash_password.1 @@ -10,7 +10,7 @@ .P \fBhash_password\fR takes a password as an parameter either on the command line or the \fBSTDIN\fR if not supplied\. .P -It accepts an YAML file which can be used to specify parameters like the number of rounds for bcrypt and password_config section having the pepper value used for the hashing\. By default \fBbcrypt_rounds\fR is set to \fB10\fR\. +It accepts an YAML file which can be used to specify parameters like the number of rounds for bcrypt and password_config section having the pepper value used for the hashing\. By default \fBbcrypt_rounds\fR is set to \fB12\fR\. .P The hashed password is written on the \fBSTDOUT\fR\. .SH "FILES" diff --git a/debian/hash_password.ronn b/debian/hash_password.ronn index eeb354602da2..5d0df538020e 100644 --- a/debian/hash_password.ronn +++ b/debian/hash_password.ronn @@ -14,7 +14,7 @@ or the `STDIN` if not supplied. It accepts an YAML file which can be used to specify parameters like the number of rounds for bcrypt and password_config section having the pepper -value used for the hashing. By default `bcrypt_rounds` is set to **10**. +value used for the hashing. By default `bcrypt_rounds` is set to **12**. The hashed password is written on the `STDOUT`. diff --git a/docker/Dockerfile b/docker/Dockerfile index 9d227200ba2d..f6de919a6f5b 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -109,6 +109,11 @@ RUN mkdir /rust /cargo RUN curl -sSf https://sh.rustup.rs | sh -s -- -y --no-modify-path --default-toolchain stable +# arm64 builds consume a lot of memory if `CARGO_NET_GIT_FETCH_WITH_CLI` is not +# set to true, so we expose it as a build-arg. +ARG CARGO_NET_GIT_FETCH_WITH_CLI=false +ENV CARGO_NET_GIT_FETCH_WITH_CLI=$CARGO_NET_GIT_FETCH_WITH_CLI + # Beeper: install shared secret authenticator RUN pip install --prefix="/install" --no-deps --no-warn-script-location \ 'git+https://github.com/devture/matrix-synapse-shared-secret-auth@e178353ec87c56e0169dd04466d4769da5ed9c46#egg=shared_secret_authenticator' @@ -135,7 +140,7 @@ RUN --mount=type=cache,target=/root/.cache/pip \ COPY synapse /synapse/synapse/ COPY rust /synapse/rust/ # ... and what we need to `pip install`. -COPY pyproject.toml README.rst build_rust.py /synapse/ +COPY pyproject.toml README.rst build_rust.py Cargo.toml Cargo.lock /synapse/ # Repeat of earlier build argument declaration, as this is a new build stage. ARG TEST_ONLY_IGNORE_POETRY_LOCKFILE diff --git a/docker/complement/Dockerfile b/docker/complement/Dockerfile index 0e13722d1c00..c0935c99a8b7 100644 --- a/docker/complement/Dockerfile +++ b/docker/complement/Dockerfile @@ -8,19 +8,15 @@ ARG SYNAPSE_VERSION=latest -# first of all, we create a base image with a postgres server and database, -# which we can copy into the target image. For repeated rebuilds, this is -# much faster than apt installing postgres each time. -# -# This trick only works because (a) the Synapse image happens to have all the -# shared libraries that postgres wants, (b) we use a postgres image based on -# the same debian version as Synapse's docker image (so the versions of the -# shared libraries match). - -# now build the final image, based on the Synapse image. - FROM matrixdotorg/synapse-workers:$SYNAPSE_VERSION - # copy the postgres installation over from the image we built above + # First of all, we copy postgres server from the official postgres image, + # since for repeated rebuilds, this is much faster than apt installing + # postgres each time. + + # This trick only works because (a) the Synapse image happens to have all the + # shared libraries that postgres wants, (b) we use a postgres image based on + # the same debian version as Synapse's docker image (so the versions of the + # shared libraries match). RUN adduser --system --uid 999 postgres --home /var/lib/postgresql COPY --from=postgres:13-bullseye /usr/lib/postgresql /usr/lib/postgresql COPY --from=postgres:13-bullseye /usr/share/postgresql /usr/share/postgresql @@ -28,7 +24,7 @@ FROM matrixdotorg/synapse-workers:$SYNAPSE_VERSION ENV PATH="${PATH}:/usr/lib/postgresql/13/bin" ENV PGDATA=/var/lib/postgresql/data - # initialise the database cluster in /var/lib/postgresql + # We also initialize the database at build time, rather than runtime, so that it's faster to spin up the image. RUN gosu postgres initdb --locale=C --encoding=UTF-8 --auth-host password # Configure a password and create a database for Synapse diff --git a/docs/admin_api/register_api.md b/docs/admin_api/register_api.md index f6be31b44312..dd2830f3a18a 100644 --- a/docs/admin_api/register_api.md +++ b/docs/admin_api/register_api.md @@ -5,7 +5,7 @@ non-interactive way. This is generally used for bootstrapping a Synapse instance with administrator accounts. To authenticate yourself to the server, you will need both the shared secret -([`registration_shared_secret`](../configuration/config_documentation.md#registration_shared_secret) +([`registration_shared_secret`](../usage/configuration/config_documentation.md#registration_shared_secret) in the homeserver configuration), and a one-time nonce. If the registration shared secret is not configured, this API is not enabled. diff --git a/docs/development/contributing_guide.md b/docs/development/contributing_guide.md index cb0d727efa63..5c3722516843 100644 --- a/docs/development/contributing_guide.md +++ b/docs/development/contributing_guide.md @@ -167,6 +167,12 @@ was broken. They are slower than the linters but will typically catch more error poetry run trial tests ``` +You can run unit tests in parallel by specifying `-jX` argument to `trial` where `X` is the number of parallel runners you want. To use 4 cpu cores, you would run them like: + +```sh +poetry run trial -j4 tests +``` + If you wish to only run *some* unit tests, you may specify another module instead of `tests` - or a test class or a method: diff --git a/docs/development/database_schema.md b/docs/development/database_schema.md index e9b925ddd835..29945c264ee8 100644 --- a/docs/development/database_schema.md +++ b/docs/development/database_schema.md @@ -195,23 +195,24 @@ There are three separate aspects to this: ## `event_id` global uniqueness -In room versions `1` and `2` it's possible to end up with two events with the -same `event_id` (in the same or different rooms). After room version `3`, that -can only happen with a hash collision, which we basically hope will never -happen. - -There are several places in Synapse and even Matrix APIs like [`GET +`event_id`'s can be considered globally unique although there has been a lot of +debate on this topic in places like +[MSC2779](https://github.com/matrix-org/matrix-spec-proposals/issues/2779) and +[MSC2848](https://github.com/matrix-org/matrix-spec-proposals/pull/2848) which +has no resolution yet (as of 2022-09-01). There are several places in Synapse +and even in the Matrix APIs like [`GET /_matrix/federation/v1/event/{eventId}`](https://spec.matrix.org/v1.1/server-server-api/#get_matrixfederationv1eventeventid) where we assume that event IDs are globally unique. -But hash collisions are still possible, and by treating event IDs as room -scoped, we can reduce the possibility of a hash collision. When scoping -`event_id` in the database schema, it should be also accompanied by `room_id` -(`PRIMARY KEY (room_id, event_id)`) and lookups should be done through the pair -`(room_id, event_id)`. +When scoping `event_id` in a database schema, it is often nice to accompany it +with `room_id` (`PRIMARY KEY (room_id, event_id)` and a `FOREIGN KEY(room_id) +REFERENCES rooms(room_id)`) which makes flexible lookups easy. For example it +makes it very easy to find and clean up everything in a room when it needs to be +purged (no need to use sub-`select` query or join from the `events` table). + +A note on collisions: In room versions `1` and `2` it's possible to end up with +two events with the same `event_id` (in the same or different rooms). After room +version `3`, that can only happen with a hash collision, which we basically hope +will never happen (SHA256 has a massive big key space). -There has been a lot of debate on this in places like -https://github.com/matrix-org/matrix-spec-proposals/issues/2779 and -[MSC2848](https://github.com/matrix-org/matrix-spec-proposals/pull/2848) which -has no resolution yet (as of 2022-09-01). diff --git a/docs/metrics-howto.md b/docs/metrics-howto.md index 279303a7988e..d8416b5a5f7b 100644 --- a/docs/metrics-howto.md +++ b/docs/metrics-howto.md @@ -135,6 +135,8 @@ Synapse 1.2 updates the Prometheus metrics to match the naming convention of the upstream `prometheus_client`. The old names are considered deprecated and will be removed in a future version of Synapse. +**The old names will be disabled by default in Synapse v1.71.0 and removed +altogether in Synapse v1.73.0.** | New Name | Old Name | | ---------------------------------------------------------------------------- | ---------------------------------------------------------------------- | @@ -146,6 +148,13 @@ Synapse. | synapse_federation_client_events_processed_total | synapse_federation_client_events_processed | | synapse_event_processing_loop_count_total | synapse_event_processing_loop_count | | synapse_event_processing_loop_room_count_total | synapse_event_processing_loop_room_count | +| synapse_util_caches_cache_hits | synapse_util_caches_cache:hits | +| synapse_util_caches_cache_size | synapse_util_caches_cache:size | +| synapse_util_caches_cache_evicted_size | synapse_util_caches_cache:evicted_size | +| synapse_util_caches_cache | synapse_util_caches_cache:total | +| synapse_util_caches_response_cache_size | synapse_util_caches_response_cache:size | +| synapse_util_caches_response_cache_hits | synapse_util_caches_response_cache:hits | +| synapse_util_caches_response_cache_evicted_size | synapse_util_caches_response_cache:evicted_size | | synapse_util_metrics_block_count_total | synapse_util_metrics_block_count | | synapse_util_metrics_block_time_seconds_total | synapse_util_metrics_block_time_seconds | | synapse_util_metrics_block_ru_utime_seconds_total | synapse_util_metrics_block_ru_utime_seconds | @@ -261,7 +270,7 @@ Standard Metric Names As of synapse version 0.18.2, the format of the process-wide metrics has been changed to fit prometheus standard naming conventions. Additionally -the units have been changed to seconds, from miliseconds. +the units have been changed to seconds, from milliseconds. | New name | Old name | | ---------------------------------------- | --------------------------------- | diff --git a/docs/setup/installation.md b/docs/setup/installation.md index 96833effc6b9..dcd8f17c5e98 100644 --- a/docs/setup/installation.md +++ b/docs/setup/installation.md @@ -181,7 +181,7 @@ doas pkg_add synapse #### NixOS Robin Lambertz has packaged Synapse for NixOS at: - + ### Installing as a Python module from PyPI diff --git a/docs/sso_mapping_providers.md b/docs/sso_mapping_providers.md index 817499149f90..9f5e5fbbe152 100644 --- a/docs/sso_mapping_providers.md +++ b/docs/sso_mapping_providers.md @@ -73,8 +73,8 @@ A custom mapping provider must specify the following methods: * `async def map_user_attributes(self, userinfo, token, failures)` - This method must be async. - Arguments: - - `userinfo` - A `authlib.oidc.core.claims.UserInfo` object to extract user - information from. + - `userinfo` - An [`authlib.oidc.core.claims.UserInfo`](https://docs.authlib.org/en/latest/specs/oidc.html#authlib.oidc.core.UserInfo) + object to extract user information from. - `token` - A dictionary which includes information necessary to make further requests to the OpenID provider. - `failures` - An `int` that represents the amount of times the returned @@ -91,7 +91,13 @@ A custom mapping provider must specify the following methods: `None`, the user is prompted to pick their own username. This is only used during a user's first login. Once a localpart has been associated with a remote user ID (see `get_remote_user_id`) it cannot be updated. - - `displayname`: An optional string, the display name for the user. + - `confirm_localpart`: A boolean. If set to `True`, when a `localpart` + string is returned from this method, Synapse will prompt the user to + either accept this localpart or pick their own username. Otherwise this + option has no effect. If omitted, defaults to `False`. + - `display_name`: An optional string, the display name for the user. + - `emails`: A list of strings, the email address(es) to associate with + this user. If omitted, defaults to an empty list. * `async def get_extra_attributes(self, userinfo, token)` - This method must be async. - Arguments: diff --git a/docs/systemd-with-workers/workers/media_worker.yaml b/docs/systemd-with-workers/workers/media_worker.yaml new file mode 100644 index 000000000000..eb34d1249231 --- /dev/null +++ b/docs/systemd-with-workers/workers/media_worker.yaml @@ -0,0 +1,14 @@ +worker_app: synapse.app.media_repository +worker_name: media_worker + +# The replication listener on the main synapse process. +worker_replication_host: 127.0.0.1 +worker_replication_http_port: 9093 + +worker_listeners: + - type: http + port: 8085 + resources: + - names: [media] + +worker_log_config: /etc/matrix-synapse/media-worker-log.yaml diff --git a/docs/upgrade.md b/docs/upgrade.md index 37f1cb27df95..b81385b19183 100644 --- a/docs/upgrade.md +++ b/docs/upgrade.md @@ -88,13 +88,103 @@ process, for example: dpkg -i matrix-synapse-py3_1.3.0+stretch1_amd64.deb ``` +# Upgrading to v1.69.0 + +## Changes to the receipts replication streams + +Synapse now includes information indicating if a receipt applies to a thread when +replicating it to other workers. This is a forwards- and backwards-incompatible +change: v1.68 and workers cannot process receipts replicated by v1.69 workers, and +vice versa. + +Once all workers are upgraded to v1.69 (or downgraded to v1.68), receipts +replication will resume as normal. + + +## Deprecation of legacy Prometheus metric names + +In current versions of Synapse, some Prometheus metrics are emitted under two different names, +with one of the names being older but non-compliant with OpenMetrics and Prometheus conventions +and one of the names being newer but compliant. + +Synapse v1.71.0 will turn the old metric names off *by default*. +For administrators that still rely on them and have not had chance to update their +uses of the metrics, it's possible to specify `enable_legacy_metrics: true` in +the configuration to re-enable them temporarily. + +Synapse v1.73.0 will **remove legacy metric names altogether** and it will no longer +be possible to re-enable them. + +The Grafana dashboard, Prometheus recording rules and Prometheus Consoles included +in the `contrib` directory in the Synapse repository have been updated to no longer +rely on the legacy names. These can be used on a current version of Synapse +because current versions of Synapse emit both old and new names. + +You may need to update your alerting rules or any other rules that depend on +the names of Prometheus metrics. +If you want to test your changes before legacy names are disabled by default, +you may specify `enable_legacy_metrics: false` in your homeserver configuration. + +A list of affected metrics is available on the [Metrics How-to page](https://matrix-org.github.io/synapse/v1.69/metrics-howto.html?highlight=metrics%20deprecated#renaming-of-metrics--deprecation-of-old-names-in-12). + + +## Deprecation of the `generate_short_term_login_token` module API method + +The following method of the module API has been deprecated, and is scheduled to +be remove in v1.71.0: + +```python +def generate_short_term_login_token( + self, + user_id: str, + duration_in_ms: int = (2 * 60 * 1000), + auth_provider_id: str = "", + auth_provider_session_id: Optional[str] = None, +) -> str: + ... +``` + +It has been replaced by an asynchronous equivalent: + +```python +async def create_login_token( + self, + user_id: str, + duration_in_ms: int = (2 * 60 * 1000), + auth_provider_id: Optional[str] = None, + auth_provider_session_id: Optional[str] = None, +) -> str: + ... +``` + +Synapse will log a warning when a module uses the deprecated method, to help +administrators find modules using it. + + # Upgrading to v1.68.0 -As announced in the upgrade notes for v1.67.0, Synapse now requires a SQLite -version of 3.27.0 or higher if SQLite is in use and source checkouts of Synapse -now require a recent Rust compiler. +Two changes announced in the upgrade notes for v1.67.0 have now landed in v1.68.0. + +## SQLite version requirement + +Synapse now requires a SQLite version of 3.27.0 or higher if SQLite is configured as +Synapse's database. + +Installations using + +- Docker images [from `matrixdotorg`](https://hub.docker.com/r/matrixdotorg/synapse), +- Debian packages [from Matrix.org](https://packages.matrix.org/), or +- a PostgreSQL database + +are not affected. + +## Rust requirement when building from source. + +Building from a source checkout of Synapse now requires a recent Rust compiler +(currently Rust 1.58.1, but see also the +[Platform Dependency Policy](https://matrix-org.github.io/synapse/latest/deprecation_policy.html)). -Installations using +Installations using - Docker images [from `matrixdotorg`](https://hub.docker.com/r/matrixdotorg/synapse), - Debian packages [from Matrix.org](https://packages.matrix.org/), or @@ -134,12 +224,12 @@ The simplest way of installing Rust is via [rustup.rs](https://rustup.rs/) ## SQLite version requirement in the next release -From the next major release (v1.68.0) Synapse will require SQLite 3.27.0 or +From the next major release (v1.68.0) Synapse will require SQLite 3.27.0 or higher. Synapse v1.67.0 will be the last major release supporting SQLite versions 3.22 to 3.26. Those using Docker images or Debian packages from Matrix.org will not be -affected. If you have installed from source, you should check the version of +affected. If you have installed from source, you should check the version of SQLite used by Python with: ```shell diff --git a/docs/usage/configuration/config_documentation.md b/docs/usage/configuration/config_documentation.md index 69d305b62e6d..8a71a934ea2a 100644 --- a/docs/usage/configuration/config_documentation.md +++ b/docs/usage/configuration/config_documentation.md @@ -179,7 +179,7 @@ This will tell other servers to send traffic to port 443 instead. This option currently defaults to false. -See https://matrix-org.github.io/synapse/latest/delegate.html for more +See [Delegation of incoming federation traffic](../../delegate.md) for more information. Example configuration: @@ -2229,6 +2229,9 @@ homeserver. If the room already exists, make certain it is a publicly joinable room, i.e. the join rule of the room must be set to 'public'. You can find more options relating to auto-joining rooms below. +As Spaces are just rooms under the hood, Space aliases may also be +used. + Example configuration: ```yaml auto_join_rooms: @@ -2240,7 +2243,7 @@ auto_join_rooms: Where `auto_join_rooms` are specified, setting this flag ensures that the rooms exist by creating them when the first user on the -homeserver registers. +homeserver registers. This option will not create Spaces. By default the auto-created rooms are publicly joinable from any federated server. Use the `autocreate_auto_join_rooms_federated` and @@ -2258,7 +2261,7 @@ autocreate_auto_join_rooms: false --- ### `autocreate_auto_join_rooms_federated` -Whether the rooms listen in `auto_join_rooms` that are auto-created are available +Whether the rooms listed in `auto_join_rooms` that are auto-created are available via federation. Only has an effect if `autocreate_auto_join_rooms` is true. Note that whether a room is federated cannot be modified after @@ -2433,6 +2436,31 @@ Example configuration: enable_metrics: true ``` --- +### `enable_legacy_metrics` + +Set to `true` to publish both legacy and non-legacy Prometheus metric names, +or to `false` to only publish non-legacy Prometheus metric names. +Defaults to `true`. Has no effect if `enable_metrics` is `false`. +**In Synapse v1.71.0, this will default to `false` before being removed in Synapse v1.73.0.** + +Legacy metric names include: +- metrics containing colons in the name, such as `synapse_util_caches_response_cache:hits`, because colons are supposed to be reserved for user-defined recording rules; +- counters that don't end with the `_total` suffix, such as `synapse_federation_client_sent_edus`, therefore not adhering to the OpenMetrics standard. + +These legacy metric names are unconventional and not compliant with OpenMetrics standards. +They are included for backwards compatibility. + +Example configuration: +```yaml +enable_legacy_metrics: false +``` + +See https://github.com/matrix-org/synapse/issues/11106 for context. + +*Since v1.67.0.* + +**Will be removed in v1.73.0.** +--- ### `sentry` Use this option to enable sentry integration. Provide the DSN assigned to you by sentry @@ -2949,7 +2977,7 @@ Options for each entry include: * `module`: The class name of a custom mapping module. Default is `synapse.handlers.oidc.JinjaOidcMappingProvider`. - See https://matrix-org.github.io/synapse/latest/sso_mapping_providers.html#openid-mapping-providers + See [OpenID Mapping Providers](../../sso_mapping_providers.md#openid-mapping-providers) for information on implementing a custom mapping provider. * `config`: Configuration for the mapping provider module. This section will @@ -3390,13 +3418,15 @@ This option has the following sub-options: the user directory. If false, search results will only contain users visible in public rooms and users sharing a room with the requester. Defaults to false. + NB. If you set this to true, and the last time the user_directory search indexes were (re)built was before Synapse 1.44, you'll have to rebuild the indexes in order to search through all known users. + These indexes are built the first time Synapse starts; admins can - manually trigger a rebuild via API following the instructions at - https://matrix-org.github.io/synapse/latest/usage/administration/admin_api/background_updates.html#run - Set to true to return search results containing all known users, even if that + manually trigger a rebuild via the API following the instructions + [for running background updates](../administration/admin_api/background_updates.md#run), + set to true to return search results containing all known users, even if that user does not share a room with the requester. * `prefer_local_users`: Defines whether to prefer local users in search query results. If set to true, local users are more likely to appear above remote users when searching the diff --git a/docs/workers.md b/docs/workers.md index 40b18523137c..25f2e13237f8 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -93,7 +93,6 @@ listener" for the main process; and secondly, you need to enable redis-based replication. Optionally, a shared secret can be used to authenticate HTTP traffic between workers. For example: - ```yaml # extend the existing `listeners` section. This defines the ports that the # main process will listen on. @@ -129,7 +128,8 @@ In the config file for each worker, you must specify: * The HTTP replication endpoint that it should talk to on the main synapse process (`worker_replication_host` and `worker_replication_http_port`) * If handling HTTP requests, a `worker_listeners` option with an `http` - listener, in the same way as the `listeners` option in the shared config. + listener, in the same way as the [`listeners`](usage/configuration/config_documentation.md#listeners) + option in the shared config. * If handling the `^/_matrix/client/v3/keys/upload` endpoint, the HTTP URI for the main process (`worker_main_http_uri`). @@ -285,8 +285,9 @@ For multiple workers not handling the SSO endpoints properly, see [#7530](https://github.com/matrix-org/synapse/issues/7530) and [#9427](https://github.com/matrix-org/synapse/issues/9427). -Note that a HTTP listener with `client` and `federation` resources must be -configured in the `worker_listeners` option in the worker config. +Note that a [HTTP listener](usage/configuration/config_documentation.md#listeners) +with `client` and `federation` `resources` must be configured in the `worker_listeners` +option in the worker config. #### Load balancing @@ -326,7 +327,8 @@ effects of bursts of events from that bridge on events sent by normal users. Additionally, the writing of specific streams (such as events) can be moved off of the main process to a particular worker. -To enable this, the worker must have a HTTP replication listener configured, +To enable this, the worker must have a +[HTTP `replication` listener](usage/configuration/config_documentation.md#listeners) configured, have a `worker_name` and be listed in the `instance_map` config. The same worker can handle multiple streams, but unless otherwise documented, each stream can only have a single writer. @@ -410,7 +412,7 @@ the stream writer for the `presence` stream: There is also support for moving background tasks to a separate worker. Background tasks are run periodically or started via replication. Exactly which tasks are configured to run depends on your Synapse configuration (e.g. if -stats is enabled). +stats is enabled). This worker doesn't handle any REST endpoints itself. To enable this, the worker must have a `worker_name` and can be configured to run background tasks. For example, to move background tasks to a dedicated worker, @@ -457,8 +459,8 @@ worker application type. #### Notifying Application Services You can designate one generic worker to send output traffic to Application Services. - -Specify its name in the shared configuration as follows: +Doesn't handle any REST endpoints itself, but you should specify its name in the +shared configuration as follows: ```yaml notify_appservices_from_worker: worker_name @@ -536,16 +538,12 @@ file to stop the main synapse running background jobs related to managing the media repository. Note that doing so will prevent the main process from being able to handle the above endpoints. -In the `media_repository` worker configuration file, configure the http listener to +In the `media_repository` worker configuration file, configure the +[HTTP listener](usage/configuration/config_documentation.md#listeners) to expose the `media` resource. For example: ```yaml -worker_listeners: - - type: http - port: 8085 - resources: - - names: - - media +{{#include systemd-with-workers/workers/media_worker.yaml}} ``` Note that if running multiple media repositories they must be on the same server diff --git a/mypy.ini b/mypy.ini index 64f9097206d1..34b4523e007e 100644 --- a/mypy.ini +++ b/mypy.ini @@ -106,6 +106,9 @@ disallow_untyped_defs = False [mypy-tests.handlers.test_user_directory] disallow_untyped_defs = True +[mypy-tests.push.test_bulk_push_rule_evaluator] +disallow_untyped_defs = True + [mypy-tests.test_server] disallow_untyped_defs = True diff --git a/poetry.lock b/poetry.lock index 074147512c83..5b515d668137 100644 --- a/poetry.lock +++ b/poetry.lock @@ -95,14 +95,15 @@ webencodings = "*" [[package]] name = "canonicaljson" -version = "1.6.0" +version = "1.6.3" description = "Canonical JSON" category = "main" optional = false -python-versions = "~=3.7" +python-versions = ">=3.7" [package.dependencies] simplejson = ">=3.14.0" +typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.8\""} [package.extras] frozendict = ["frozendict (>=1.0)"] @@ -580,11 +581,11 @@ python-versions = "*" [[package]] name = "mypy" -version = "0.950" +version = "0.981" description = "Optional static typing for Python" category = "dev" optional = false -python-versions = ">=3.6" +python-versions = ">=3.7" [package.dependencies] mypy-extensions = ">=0.4.3" @@ -607,14 +608,14 @@ python-versions = "*" [[package]] name = "mypy-zope" -version = "0.3.7" +version = "0.3.11" description = "Plugin for mypy to support zope interfaces" category = "dev" optional = false python-versions = "*" [package.dependencies] -mypy = "0.950" +mypy = "0.981" "zope.interface" = "*" "zope.schema" = "*" @@ -1690,8 +1691,8 @@ bleach = [ {file = "bleach-4.1.0.tar.gz", hash = "sha256:0900d8b37eba61a802ee40ac0061f8c2b5dee29c1927dd1d233e075ebf5a71da"}, ] canonicaljson = [ - {file = "canonicaljson-1.6.0-py3-none-any.whl", hash = "sha256:7230c2a2a3db07874f622af84effe41a655e07bf23734830e18a454e65d5b998"}, - {file = "canonicaljson-1.6.0.tar.gz", hash = "sha256:8739d5fd91aca7281d425660ae65af7663808c8177778965f67e90b16a2b2427"}, + {file = "canonicaljson-1.6.3-py3-none-any.whl", hash = "sha256:6ba3cf1702fa3d209b3e915a4e9a3e4ef194f1e8fca189c1f0b7a2a7686a27e6"}, + {file = "canonicaljson-1.6.3.tar.gz", hash = "sha256:ca59760bc274a899a0da75809d6909ae43e5123381fd6ef040a44d1952c0b448"}, ] certifi = [ {file = "certifi-2021.10.8-py2.py3-none-any.whl", hash = "sha256:d62a0163eb4c2344ac042ab2bdf75399a71a2d8c7d47eac2e2ee91b9d6339569"}, @@ -2228,37 +2229,38 @@ msgpack = [ {file = "msgpack-1.0.3.tar.gz", hash = "sha256:51fdc7fb93615286428ee7758cecc2f374d5ff363bdd884c7ea622a7a327a81e"}, ] mypy = [ - {file = "mypy-0.950-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:cf9c261958a769a3bd38c3e133801ebcd284ffb734ea12d01457cb09eacf7d7b"}, - {file = "mypy-0.950-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b5b5bd0ffb11b4aba2bb6d31b8643902c48f990cc92fda4e21afac658044f0c0"}, - {file = "mypy-0.950-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5e7647df0f8fc947388e6251d728189cfadb3b1e558407f93254e35abc026e22"}, - {file = "mypy-0.950-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:eaff8156016487c1af5ffa5304c3e3fd183edcb412f3e9c72db349faf3f6e0eb"}, - {file = "mypy-0.950-cp310-cp310-win_amd64.whl", hash = "sha256:563514c7dc504698fb66bb1cf897657a173a496406f1866afae73ab5b3cdb334"}, - {file = "mypy-0.950-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:dd4d670eee9610bf61c25c940e9ade2d0ed05eb44227275cce88701fee014b1f"}, - {file = "mypy-0.950-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:ca75ecf2783395ca3016a5e455cb322ba26b6d33b4b413fcdedfc632e67941dc"}, - {file = "mypy-0.950-cp36-cp36m-win_amd64.whl", hash = "sha256:6003de687c13196e8a1243a5e4bcce617d79b88f83ee6625437e335d89dfebe2"}, - {file = "mypy-0.950-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:4c653e4846f287051599ed8f4b3c044b80e540e88feec76b11044ddc5612ffed"}, - {file = "mypy-0.950-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:e19736af56947addedce4674c0971e5dceef1b5ec7d667fe86bcd2b07f8f9075"}, - {file = "mypy-0.950-cp37-cp37m-win_amd64.whl", hash = "sha256:ef7beb2a3582eb7a9f37beaf38a28acfd801988cde688760aea9e6cc4832b10b"}, - {file = "mypy-0.950-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0112752a6ff07230f9ec2f71b0d3d4e088a910fdce454fdb6553e83ed0eced7d"}, - {file = "mypy-0.950-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ee0a36edd332ed2c5208565ae6e3a7afc0eabb53f5327e281f2ef03a6bc7687a"}, - {file = "mypy-0.950-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:77423570c04aca807508a492037abbd72b12a1fb25a385847d191cd50b2c9605"}, - {file = "mypy-0.950-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5ce6a09042b6da16d773d2110e44f169683d8cc8687e79ec6d1181a72cb028d2"}, - {file = "mypy-0.950-cp38-cp38-win_amd64.whl", hash = "sha256:5b231afd6a6e951381b9ef09a1223b1feabe13625388db48a8690f8daa9b71ff"}, - {file = "mypy-0.950-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0384d9f3af49837baa92f559d3fa673e6d2652a16550a9ee07fc08c736f5e6f8"}, - {file = "mypy-0.950-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1fdeb0a0f64f2a874a4c1f5271f06e40e1e9779bf55f9567f149466fc7a55038"}, - {file = "mypy-0.950-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:61504b9a5ae166ba5ecfed9e93357fd51aa693d3d434b582a925338a2ff57fd2"}, - {file = "mypy-0.950-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:a952b8bc0ae278fc6316e6384f67bb9a396eb30aced6ad034d3a76120ebcc519"}, - {file = "mypy-0.950-cp39-cp39-win_amd64.whl", hash = "sha256:eaea21d150fb26d7b4856766e7addcf929119dd19fc832b22e71d942835201ef"}, - {file = "mypy-0.950-py3-none-any.whl", hash = "sha256:a4d9898f46446bfb6405383b57b96737dcfd0a7f25b748e78ef3e8c576bba3cb"}, - {file = "mypy-0.950.tar.gz", hash = "sha256:1b333cfbca1762ff15808a0ef4f71b5d3eed8528b23ea1c3fb50543c867d68de"}, + {file = "mypy-0.981-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:4bc460e43b7785f78862dab78674e62ec3cd523485baecfdf81a555ed29ecfa0"}, + {file = "mypy-0.981-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:756fad8b263b3ba39e4e204ee53042671b660c36c9017412b43af210ddee7b08"}, + {file = "mypy-0.981-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a16a0145d6d7d00fbede2da3a3096dcc9ecea091adfa8da48fa6a7b75d35562d"}, + {file = "mypy-0.981-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce65f70b14a21fdac84c294cde75e6dbdabbcff22975335e20827b3b94bdbf49"}, + {file = "mypy-0.981-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6e35d764784b42c3e256848fb8ed1d4292c9fc0098413adb28d84974c095b279"}, + {file = "mypy-0.981-cp310-cp310-win_amd64.whl", hash = "sha256:e53773073c864d5f5cec7f3fc72fbbcef65410cde8cc18d4f7242dea60dac52e"}, + {file = "mypy-0.981-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:6ee196b1d10b8b215e835f438e06965d7a480f6fe016eddbc285f13955cca659"}, + {file = "mypy-0.981-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8ad21d4c9d3673726cf986ea1d0c9fb66905258709550ddf7944c8f885f208be"}, + {file = "mypy-0.981-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:d1debb09043e1f5ee845fa1e96d180e89115b30e47c5d3ce53bc967bab53f62d"}, + {file = "mypy-0.981-cp37-cp37m-win_amd64.whl", hash = "sha256:9f362470a3480165c4c6151786b5379351b790d56952005be18bdbdd4c7ce0ae"}, + {file = "mypy-0.981-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:c9e0efb95ed6ca1654951bd5ec2f3fa91b295d78bf6527e026529d4aaa1e0c30"}, + {file = "mypy-0.981-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:e178eaffc3c5cd211a87965c8c0df6da91ed7d258b5fc72b8e047c3771317ddb"}, + {file = "mypy-0.981-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:06e1eac8d99bd404ed8dd34ca29673c4346e76dd8e612ea507763dccd7e13c7a"}, + {file = "mypy-0.981-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa38f82f53e1e7beb45557ff167c177802ba7b387ad017eab1663d567017c8ee"}, + {file = "mypy-0.981-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:64e1f6af81c003f85f0dfed52db632817dabb51b65c0318ffbf5ff51995bbb08"}, + {file = "mypy-0.981-cp38-cp38-win_amd64.whl", hash = "sha256:e1acf62a8c4f7c092462c738aa2c2489e275ed386320c10b2e9bff31f6f7e8d6"}, + {file = "mypy-0.981-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:b6ede64e52257931315826fdbfc6ea878d89a965580d1a65638ef77cb551f56d"}, + {file = "mypy-0.981-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:eb3978b191b9fa0488524bb4ffedf2c573340e8c2b4206fc191d44c7093abfb7"}, + {file = "mypy-0.981-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:77f8fcf7b4b3cc0c74fb33ae54a4cd00bb854d65645c48beccf65fa10b17882c"}, + {file = "mypy-0.981-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f64d2ce043a209a297df322eb4054dfbaa9de9e8738291706eaafda81ab2b362"}, + {file = "mypy-0.981-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2ee3dbc53d4df7e6e3b1c68ac6a971d3a4fb2852bf10a05fda228721dd44fae1"}, + {file = "mypy-0.981-cp39-cp39-win_amd64.whl", hash = "sha256:8e8e49aa9cc23aa4c926dc200ce32959d3501c4905147a66ce032f05cb5ecb92"}, + {file = "mypy-0.981-py3-none-any.whl", hash = "sha256:794f385653e2b749387a42afb1e14c2135e18daeb027e0d97162e4b7031210f8"}, + {file = "mypy-0.981.tar.gz", hash = "sha256:ad77c13037d3402fbeffda07d51e3f228ba078d1c7096a73759c9419ea031bf4"}, ] mypy-extensions = [ {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, {file = "mypy_extensions-0.4.3.tar.gz", hash = "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8"}, ] mypy-zope = [ - {file = "mypy-zope-0.3.7.tar.gz", hash = "sha256:9da171e78e8ef7ac8922c86af1a62f1b7f3244f121020bd94a2246bc3f33c605"}, - {file = "mypy_zope-0.3.7-py3-none-any.whl", hash = "sha256:9c7637d066e4d1bafa0651abc091c752009769098043b236446e6725be2bc9c2"}, + {file = "mypy-zope-0.3.11.tar.gz", hash = "sha256:d4255f9f04d48c79083bbd4e2fea06513a6ac7b8de06f8c4ce563fd85142ca05"}, + {file = "mypy_zope-0.3.11-py3-none-any.whl", hash = "sha256:ec080a6508d1f7805c8d2054f9fdd13c849742ce96803519e1fdfa3d3cab7140"}, ] netaddr = [ {file = "netaddr-0.8.0-py2.py3-none-any.whl", hash = "sha256:9666d0232c32d2656e5e5f8d735f58fd6c7457ce52fc21c98d45f2af78f990ac"}, diff --git a/pyproject.toml b/pyproject.toml index bdc745c9c799..58402eacfbb5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,7 +57,7 @@ manifest-path = "rust/Cargo.toml" [tool.poetry] name = "matrix-synapse" -version = "1.68.0" +version = "1.69.0" description = "Homeserver for the Matrix decentralised comms protocol" authors = ["Matrix.org Team and Contributors "] license = "Apache-2.0" @@ -220,7 +220,7 @@ oidc = ["authlib"] # `systemd.journal.JournalHandler`, as is documented in # `contrib/systemd/log_config.yaml`. systemd = ["systemd-python"] -url_preview = ["lxml"] +url-preview = ["lxml"] sentry = ["sentry-sdk"] opentracing = ["jaeger-client", "opentracing"] jwt = ["authlib"] @@ -251,7 +251,7 @@ all = [ "pysaml2", # oidc and jwt "authlib", - # url_preview + # url-preview "lxml", # sentry "sentry-sdk", @@ -308,7 +308,12 @@ twine = "*" towncrier = ">=18.6.0rc1" [build-system] -requires = ["poetry-core==1.2.0", "setuptools_rust==1.5.2"] +# The upper bounds here are defensive, intended to prevent situations like +# #13849 and #14079 where we see buildtime or runtime errors caused by build +# system changes. +# We are happy to raise these upper bounds upon request, +# provided we check that it's safe to do so (i.e. that CI passes). +requires = ["poetry-core>=1.0.0,<=1.3.1", "setuptools_rust>=1.3,<=1.5.2"] build-backend = "poetry.core.masonry.api" diff --git a/rust/Cargo.toml b/rust/Cargo.toml index 657f78c0b1e0..cffaa5b51b94 100644 --- a/rust/Cargo.toml +++ b/rust/Cargo.toml @@ -11,14 +11,24 @@ rust-version = "1.58.1" [lib] name = "synapse" -crate-type = ["cdylib"] +# We generate a `cdylib` for Python and a standard `lib` for running +# tests/benchmarks. +crate-type = ["lib", "cdylib"] [package.metadata.maturin] # This is where we tell maturin where to place the built library. name = "synapse.synapse_rust" [dependencies] -pyo3 = { version = "0.16.5", features = ["extension-module", "macros", "abi3", "abi3-py37"] } +anyhow = "1.0.63" +lazy_static = "1.4.0" +log = "0.4.17" +pyo3 = { version = "0.17.1", features = ["extension-module", "macros", "anyhow", "abi3", "abi3-py37"] } +pyo3-log = "0.7.0" +pythonize = "0.17.0" +regex = "1.6.0" +serde = { version = "1.0.144", features = ["derive"] } +serde_json = "1.0.85" [build-dependencies] blake2 = "0.10.4" diff --git a/rust/benches/evaluator.rs b/rust/benches/evaluator.rs new file mode 100644 index 000000000000..ed411461d139 --- /dev/null +++ b/rust/benches/evaluator.rs @@ -0,0 +1,149 @@ +// Copyright 2022 The Matrix.org Foundation C.I.C. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![feature(test)] +use synapse::push::{ + evaluator::PushRuleEvaluator, Condition, EventMatchCondition, FilteredPushRules, PushRules, +}; +use test::Bencher; + +extern crate test; + +#[bench] +fn bench_match_exact(b: &mut Bencher) { + let flattened_keys = [ + ("type".to_string(), "m.text".to_string()), + ("room_id".to_string(), "!room:server".to_string()), + ("content.body".to_string(), "test message".to_string()), + ] + .into_iter() + .collect(); + + let eval = PushRuleEvaluator::py_new( + flattened_keys, + 10, + 0, + Default::default(), + Default::default(), + true, + ) + .unwrap(); + + let condition = Condition::Known(synapse::push::KnownCondition::EventMatch( + EventMatchCondition { + key: "room_id".into(), + pattern: Some("!room:server".into()), + pattern_type: None, + }, + )); + + let matched = eval.match_condition(&condition, None, None).unwrap(); + assert!(matched, "Didn't match"); + + b.iter(|| eval.match_condition(&condition, None, None).unwrap()); +} + +#[bench] +fn bench_match_word(b: &mut Bencher) { + let flattened_keys = [ + ("type".to_string(), "m.text".to_string()), + ("room_id".to_string(), "!room:server".to_string()), + ("content.body".to_string(), "test message".to_string()), + ] + .into_iter() + .collect(); + + let eval = PushRuleEvaluator::py_new( + flattened_keys, + 10, + 0, + Default::default(), + Default::default(), + true, + ) + .unwrap(); + + let condition = Condition::Known(synapse::push::KnownCondition::EventMatch( + EventMatchCondition { + key: "content.body".into(), + pattern: Some("test".into()), + pattern_type: None, + }, + )); + + let matched = eval.match_condition(&condition, None, None).unwrap(); + assert!(matched, "Didn't match"); + + b.iter(|| eval.match_condition(&condition, None, None).unwrap()); +} + +#[bench] +fn bench_match_word_miss(b: &mut Bencher) { + let flattened_keys = [ + ("type".to_string(), "m.text".to_string()), + ("room_id".to_string(), "!room:server".to_string()), + ("content.body".to_string(), "test message".to_string()), + ] + .into_iter() + .collect(); + + let eval = PushRuleEvaluator::py_new( + flattened_keys, + 10, + 0, + Default::default(), + Default::default(), + true, + ) + .unwrap(); + + let condition = Condition::Known(synapse::push::KnownCondition::EventMatch( + EventMatchCondition { + key: "content.body".into(), + pattern: Some("foobar".into()), + pattern_type: None, + }, + )); + + let matched = eval.match_condition(&condition, None, None).unwrap(); + assert!(!matched, "Didn't match"); + + b.iter(|| eval.match_condition(&condition, None, None).unwrap()); +} + +#[bench] +fn bench_eval_message(b: &mut Bencher) { + let flattened_keys = [ + ("type".to_string(), "m.text".to_string()), + ("room_id".to_string(), "!room:server".to_string()), + ("content.body".to_string(), "test message".to_string()), + ] + .into_iter() + .collect(); + + let eval = PushRuleEvaluator::py_new( + flattened_keys, + 10, + 0, + Default::default(), + Default::default(), + true, + ) + .unwrap(); + + let rules = + FilteredPushRules::py_new(PushRules::new(Vec::new()), Default::default(), false, false); + + b.iter(|| eval.run(&rules, Some("bob"), Some("person"))); +} diff --git a/rust/benches/glob.rs b/rust/benches/glob.rs new file mode 100644 index 000000000000..b6697d9285c3 --- /dev/null +++ b/rust/benches/glob.rs @@ -0,0 +1,40 @@ +// Copyright 2022 The Matrix.org Foundation C.I.C. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![feature(test)] + +use synapse::push::utils::{glob_to_regex, GlobMatchType}; +use test::Bencher; + +extern crate test; + +#[bench] +fn bench_whole(b: &mut Bencher) { + b.iter(|| glob_to_regex("test", GlobMatchType::Whole)); +} + +#[bench] +fn bench_word(b: &mut Bencher) { + b.iter(|| glob_to_regex("test", GlobMatchType::Word)); +} + +#[bench] +fn bench_whole_wildcard_run(b: &mut Bencher) { + b.iter(|| glob_to_regex("test***??*?*?foo", GlobMatchType::Whole)); +} + +#[bench] +fn bench_word_wildcard_run(b: &mut Bencher) { + b.iter(|| glob_to_regex("test***??*?*?foo", GlobMatchType::Whole)); +} diff --git a/rust/build.rs b/rust/build.rs index 2117975e56f7..ef370e6b4188 100644 --- a/rust/build.rs +++ b/rust/build.rs @@ -22,7 +22,7 @@ fn main() -> Result<(), std::io::Error> { for entry in entries { if entry.is_dir() { - dirs.push(entry) + dirs.push(entry); } else { paths.push(entry.to_str().expect("valid rust paths").to_string()); } diff --git a/rust/src/lib.rs b/rust/src/lib.rs index ba42465fb80b..c7b60e58a73b 100644 --- a/rust/src/lib.rs +++ b/rust/src/lib.rs @@ -1,5 +1,7 @@ use pyo3::prelude::*; +pub mod push; + /// Returns the hash of all the rust source files at the time it was compiled. /// /// Used by python to detect if the rust library is outdated. @@ -17,8 +19,13 @@ fn sum_as_string(a: usize, b: usize) -> PyResult { /// The entry point for defining the Python module. #[pymodule] -fn synapse_rust(_py: Python<'_>, m: &PyModule) -> PyResult<()> { +fn synapse_rust(py: Python<'_>, m: &PyModule) -> PyResult<()> { + pyo3_log::init(); + m.add_function(wrap_pyfunction!(sum_as_string, m)?)?; m.add_function(wrap_pyfunction!(get_rust_file_digest, m)?)?; + + push::register_module(py, m)?; + Ok(()) } diff --git a/rust/src/push/base_rules.rs b/rust/src/push/base_rules.rs new file mode 100644 index 000000000000..bb59676bdea9 --- /dev/null +++ b/rust/src/push/base_rules.rs @@ -0,0 +1,336 @@ +// Copyright 2022 The Matrix.org Foundation C.I.C. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Contains the definitions of the "base" push rules. + +use std::borrow::Cow; +use std::collections::HashMap; + +use lazy_static::lazy_static; +use serde_json::Value; + +use super::KnownCondition; +use crate::push::Action; +use crate::push::Condition; +use crate::push::EventMatchCondition; +use crate::push::PushRule; +use crate::push::SetTweak; +use crate::push::TweakValue; + +const HIGHLIGHT_ACTION: Action = Action::SetTweak(SetTweak { + set_tweak: Cow::Borrowed("highlight"), + value: None, + other_keys: Value::Null, +}); + +const HIGHLIGHT_FALSE_ACTION: Action = Action::SetTweak(SetTweak { + set_tweak: Cow::Borrowed("highlight"), + value: Some(TweakValue::Other(Value::Bool(false))), + other_keys: Value::Null, +}); + +const SOUND_ACTION: Action = Action::SetTweak(SetTweak { + set_tweak: Cow::Borrowed("sound"), + value: Some(TweakValue::String(Cow::Borrowed("default"))), + other_keys: Value::Null, +}); + +const RING_ACTION: Action = Action::SetTweak(SetTweak { + set_tweak: Cow::Borrowed("sound"), + value: Some(TweakValue::String(Cow::Borrowed("ring"))), + other_keys: Value::Null, +}); + +pub const BASE_PREPEND_OVERRIDE_RULES: &[PushRule] = &[PushRule { + rule_id: Cow::Borrowed("global/override/.m.rule.master"), + priority_class: 5, + conditions: Cow::Borrowed(&[]), + actions: Cow::Borrowed(&[Action::DontNotify]), + default: true, + default_enabled: false, +}]; + +pub const BASE_APPEND_OVERRIDE_RULES: &[PushRule] = &[ + PushRule { + rule_id: Cow::Borrowed("global/override/.m.rule.suppress_notices"), + priority_class: 5, + conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( + EventMatchCondition { + key: Cow::Borrowed("content.msgtype"), + pattern: Some(Cow::Borrowed("m.notice")), + pattern_type: None, + }, + ))]), + actions: Cow::Borrowed(&[Action::DontNotify]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/override/.m.rule.invite_for_me"), + priority_class: 5, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + pattern: Some(Cow::Borrowed("m.room.member")), + pattern_type: None, + })), + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("content.membership"), + pattern: Some(Cow::Borrowed("invite")), + pattern_type: None, + })), + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("state_key"), + pattern: None, + pattern_type: Some(Cow::Borrowed("user_id")), + })), + ]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION, SOUND_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/override/.m.rule.member_event"), + priority_class: 5, + conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( + EventMatchCondition { + key: Cow::Borrowed("type"), + pattern: Some(Cow::Borrowed("m.room.member")), + pattern_type: None, + }, + ))]), + actions: Cow::Borrowed(&[Action::DontNotify]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/override/.m.rule.contains_display_name"), + priority_class: 5, + conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::ContainsDisplayName)]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION, SOUND_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/override/.m.rule.roomnotif"), + priority_class: 5, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::SenderNotificationPermission { + key: Cow::Borrowed("room"), + }), + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("content.body"), + pattern: Some(Cow::Borrowed("@room")), + pattern_type: None, + })), + ]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/override/.m.rule.tombstone"), + priority_class: 5, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + pattern: Some(Cow::Borrowed("m.room.tombstone")), + pattern_type: None, + })), + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("state_key"), + pattern: Some(Cow::Borrowed("")), + pattern_type: None, + })), + ]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/override/.m.rule.reaction"), + priority_class: 5, + conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( + EventMatchCondition { + key: Cow::Borrowed("type"), + pattern: Some(Cow::Borrowed("m.reaction")), + pattern_type: None, + }, + ))]), + actions: Cow::Borrowed(&[Action::DontNotify]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/override/.org.matrix.msc3786.rule.room.server_acl"), + priority_class: 5, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + pattern: Some(Cow::Borrowed("m.room.server_acl")), + pattern_type: None, + })), + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("state_key"), + pattern: Some(Cow::Borrowed("")), + pattern_type: None, + })), + ]), + actions: Cow::Borrowed(&[]), + default: true, + default_enabled: true, + }, +]; + +pub const BASE_APPEND_CONTENT_RULES: &[PushRule] = &[PushRule { + rule_id: Cow::Borrowed("global/content/.m.rule.contains_user_name"), + priority_class: 4, + conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( + EventMatchCondition { + key: Cow::Borrowed("content.body"), + pattern: None, + pattern_type: Some(Cow::Borrowed("user_localpart")), + }, + ))]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_ACTION, SOUND_ACTION]), + default: true, + default_enabled: true, +}]; + +pub const BASE_APPEND_UNDERRIDE_RULES: &[PushRule] = &[ + PushRule { + rule_id: Cow::Borrowed("global/underride/.m.rule.call"), + priority_class: 1, + conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( + EventMatchCondition { + key: Cow::Borrowed("type"), + pattern: Some(Cow::Borrowed("m.call.invite")), + pattern_type: None, + }, + ))]), + actions: Cow::Borrowed(&[Action::Notify, RING_ACTION, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/underride/.m.rule.room_one_to_one"), + priority_class: 1, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + pattern: Some(Cow::Borrowed("m.room.message")), + pattern_type: None, + })), + Condition::Known(KnownCondition::RoomMemberCount { + is: Some(Cow::Borrowed("2")), + }), + ]), + actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/underride/.m.rule.encrypted_room_one_to_one"), + priority_class: 1, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + pattern: Some(Cow::Borrowed("m.room.encrypted")), + pattern_type: None, + })), + Condition::Known(KnownCondition::RoomMemberCount { + is: Some(Cow::Borrowed("2")), + }), + ]), + actions: Cow::Borrowed(&[Action::Notify, SOUND_ACTION, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/underride/.org.matrix.msc3772.thread_reply"), + priority_class: 1, + conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::RelationMatch { + rel_type: Cow::Borrowed("m.thread"), + event_type_pattern: None, + sender: None, + sender_type: Some(Cow::Borrowed("user_id")), + })]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/underride/.m.rule.message"), + priority_class: 1, + conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( + EventMatchCondition { + key: Cow::Borrowed("type"), + pattern: Some(Cow::Borrowed("m.room.message")), + pattern_type: None, + }, + ))]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/underride/.m.rule.encrypted"), + priority_class: 1, + conditions: Cow::Borrowed(&[Condition::Known(KnownCondition::EventMatch( + EventMatchCondition { + key: Cow::Borrowed("type"), + pattern: Some(Cow::Borrowed("m.room.encrypted")), + pattern_type: None, + }, + ))]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, + PushRule { + rule_id: Cow::Borrowed("global/underride/.im.vector.jitsi"), + priority_class: 1, + conditions: Cow::Borrowed(&[ + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("type"), + pattern: Some(Cow::Borrowed("im.vector.modular.widgets")), + pattern_type: None, + })), + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("content.type"), + pattern: Some(Cow::Borrowed("jitsi")), + pattern_type: None, + })), + Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: Cow::Borrowed("state_key"), + pattern: Some(Cow::Borrowed("*")), + pattern_type: None, + })), + ]), + actions: Cow::Borrowed(&[Action::Notify, HIGHLIGHT_FALSE_ACTION]), + default: true, + default_enabled: true, + }, +]; + +lazy_static! { + pub static ref BASE_RULES_BY_ID: HashMap<&'static str, &'static PushRule> = + BASE_PREPEND_OVERRIDE_RULES + .iter() + .chain(BASE_APPEND_OVERRIDE_RULES.iter()) + .chain(BASE_APPEND_CONTENT_RULES.iter()) + .chain(BASE_APPEND_UNDERRIDE_RULES.iter()) + .map(|rule| { (&*rule.rule_id, rule) }) + .collect(); +} diff --git a/rust/src/push/evaluator.rs b/rust/src/push/evaluator.rs new file mode 100644 index 000000000000..efe88ec76e54 --- /dev/null +++ b/rust/src/push/evaluator.rs @@ -0,0 +1,374 @@ +// Copyright 2022 The Matrix.org Foundation C.I.C. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::{ + borrow::Cow, + collections::{BTreeMap, BTreeSet}, +}; + +use anyhow::{Context, Error}; +use lazy_static::lazy_static; +use log::warn; +use pyo3::prelude::*; +use regex::Regex; + +use super::{ + utils::{get_glob_matcher, get_localpart_from_id, GlobMatchType}, + Action, Condition, EventMatchCondition, FilteredPushRules, KnownCondition, +}; + +lazy_static! { + /// Used to parse the `is` clause in the room member count condition. + static ref INEQUALITY_EXPR: Regex = Regex::new(r"^([=<>]*)([0-9]+)$").expect("valid regex"); +} + +/// Allows running a set of push rules against a particular event. +#[pyclass] +pub struct PushRuleEvaluator { + /// A mapping of "flattened" keys to string values in the event, e.g. + /// includes things like "type" and "content.msgtype". + flattened_keys: BTreeMap, + + /// The "content.body", if any. + body: String, + + /// The number of users in the room. + room_member_count: u64, + + /// The `notifications` section of the current power levels in the room. + notification_power_levels: BTreeMap, + + /// The relations related to the event as a mapping from relation type to + /// set of sender/event type 2-tuples. + relations: BTreeMap>, + + /// Is running "relation" conditions enabled? + relation_match_enabled: bool, + + /// The power level of the sender of the event, or None if event is an + /// outlier. + sender_power_level: Option, +} + +#[pymethods] +impl PushRuleEvaluator { + /// Create a new `PushRuleEvaluator`. See struct docstring for details. + #[new] + pub fn py_new( + flattened_keys: BTreeMap, + room_member_count: u64, + sender_power_level: Option, + notification_power_levels: BTreeMap, + relations: BTreeMap>, + relation_match_enabled: bool, + ) -> Result { + let body = flattened_keys + .get("content.body") + .cloned() + .unwrap_or_default(); + + Ok(PushRuleEvaluator { + flattened_keys, + body, + room_member_count, + notification_power_levels, + relations, + relation_match_enabled, + sender_power_level, + }) + } + + /// Run the evaluator with the given push rules, for the given user ID and + /// display name of the user. + /// + /// Passing in None will skip evaluating rules matching user ID and display + /// name. + /// + /// Returns the set of actions, if any, that match (filtering out any + /// `dont_notify` actions). + pub fn run( + &self, + push_rules: &FilteredPushRules, + user_id: Option<&str>, + display_name: Option<&str>, + ) -> Vec { + 'outer: for (push_rule, enabled) in push_rules.iter() { + if !enabled { + continue; + } + + for condition in push_rule.conditions.iter() { + match self.match_condition(condition, user_id, display_name) { + Ok(true) => {} + Ok(false) => continue 'outer, + Err(err) => { + warn!("Condition match failed {err}"); + continue 'outer; + } + } + } + + let actions = push_rule + .actions + .iter() + // Filter out "dont_notify" actions, as we don't store them. + .filter(|a| **a != Action::DontNotify) + .cloned() + .collect(); + + return actions; + } + + Vec::new() + } + + /// Check if the given condition matches. + fn matches( + &self, + condition: Condition, + user_id: Option<&str>, + display_name: Option<&str>, + ) -> bool { + match self.match_condition(&condition, user_id, display_name) { + Ok(true) => true, + Ok(false) => false, + Err(err) => { + warn!("Condition match failed {err}"); + false + } + } + } +} + +impl PushRuleEvaluator { + /// Match a given `Condition` for a push rule. + pub fn match_condition( + &self, + condition: &Condition, + user_id: Option<&str>, + display_name: Option<&str>, + ) -> Result { + let known_condition = match condition { + Condition::Known(known) => known, + Condition::Unknown(_) => { + return Ok(false); + } + }; + + let result = match known_condition { + KnownCondition::EventMatch(event_match) => { + self.match_event_match(event_match, user_id)? + } + KnownCondition::ContainsDisplayName => { + if let Some(dn) = display_name { + if !dn.is_empty() { + get_glob_matcher(dn, GlobMatchType::Word)?.is_match(&self.body)? + } else { + // We specifically ignore empty display names, as otherwise + // they would always match. + false + } + } else { + false + } + } + KnownCondition::RoomMemberCount { is } => { + if let Some(is) = is { + self.match_member_count(is)? + } else { + false + } + } + KnownCondition::SenderNotificationPermission { key } => { + if let Some(sender_power_level) = &self.sender_power_level { + let required_level = self + .notification_power_levels + .get(key.as_ref()) + .copied() + .unwrap_or(50); + + *sender_power_level >= required_level + } else { + false + } + } + KnownCondition::RelationMatch { + rel_type, + event_type_pattern, + sender, + sender_type, + } => { + self.match_relations(rel_type, sender, sender_type, user_id, event_type_pattern)? + } + }; + + Ok(result) + } + + /// Evaluates a relation condition. + fn match_relations( + &self, + rel_type: &str, + sender: &Option>, + sender_type: &Option>, + user_id: Option<&str>, + event_type_pattern: &Option>, + ) -> Result { + // First check if relation matching is enabled... + if !self.relation_match_enabled { + return Ok(false); + } + + // ... and if there are any relations to match against. + let relations = if let Some(relations) = self.relations.get(rel_type) { + relations + } else { + return Ok(false); + }; + + // Extract the sender pattern from the condition + let sender_pattern = if let Some(sender) = sender { + Some(sender.as_ref()) + } else if let Some(sender_type) = sender_type { + if sender_type == "user_id" { + if let Some(user_id) = user_id { + Some(user_id) + } else { + return Ok(false); + } + } else { + warn!("Unrecognized sender_type: {sender_type}"); + return Ok(false); + } + } else { + None + }; + + let mut sender_compiled_pattern = if let Some(pattern) = sender_pattern { + Some(get_glob_matcher(pattern, GlobMatchType::Whole)?) + } else { + None + }; + + let mut type_compiled_pattern = if let Some(pattern) = event_type_pattern { + Some(get_glob_matcher(pattern, GlobMatchType::Whole)?) + } else { + None + }; + + for (relation_sender, event_type) in relations { + if let Some(pattern) = &mut sender_compiled_pattern { + if !pattern.is_match(relation_sender)? { + continue; + } + } + + if let Some(pattern) = &mut type_compiled_pattern { + if !pattern.is_match(event_type)? { + continue; + } + } + + return Ok(true); + } + + Ok(false) + } + + /// Evaluates a `event_match` condition. + fn match_event_match( + &self, + event_match: &EventMatchCondition, + user_id: Option<&str>, + ) -> Result { + let pattern = if let Some(pattern) = &event_match.pattern { + pattern + } else if let Some(pattern_type) = &event_match.pattern_type { + // The `pattern_type` can either be "user_id" or "user_localpart", + // either way if we don't have a `user_id` then the condition can't + // match. + let user_id = if let Some(user_id) = user_id { + user_id + } else { + return Ok(false); + }; + + match &**pattern_type { + "user_id" => user_id, + "user_localpart" => get_localpart_from_id(user_id)?, + _ => return Ok(false), + } + } else { + return Ok(false); + }; + + let haystack = if let Some(haystack) = self.flattened_keys.get(&*event_match.key) { + haystack + } else { + return Ok(false); + }; + + // For the content.body we match against "words", but for everything + // else we match against the entire value. + let match_type = if event_match.key == "content.body" { + GlobMatchType::Word + } else { + GlobMatchType::Whole + }; + + let mut compiled_pattern = get_glob_matcher(pattern, match_type)?; + compiled_pattern.is_match(haystack) + } + + /// Match the member count against an 'is' condition + /// The `is` condition can be things like '>2', '==3' or even just '4'. + fn match_member_count(&self, is: &str) -> Result { + let captures = INEQUALITY_EXPR.captures(is).context("bad 'is' clause")?; + let ineq = captures.get(1).map_or("==", |m| m.as_str()); + let rhs: u64 = captures + .get(2) + .context("missing number")? + .as_str() + .parse()?; + + let matches = match ineq { + "" | "==" => self.room_member_count == rhs, + "<" => self.room_member_count < rhs, + ">" => self.room_member_count > rhs, + ">=" => self.room_member_count >= rhs, + "<=" => self.room_member_count <= rhs, + _ => false, + }; + + Ok(matches) + } +} + +#[test] +fn push_rule_evaluator() { + let mut flattened_keys = BTreeMap::new(); + flattened_keys.insert("content.body".to_string(), "foo bar bob hello".to_string()); + let evaluator = PushRuleEvaluator::py_new( + flattened_keys, + 10, + Some(0), + BTreeMap::new(), + BTreeMap::new(), + true, + ) + .unwrap(); + + let result = evaluator.run(&FilteredPushRules::default(), None, Some("bob")); + assert_eq!(result.len(), 3); +} diff --git a/rust/src/push/mod.rs b/rust/src/push/mod.rs new file mode 100644 index 000000000000..30fffc31adba --- /dev/null +++ b/rust/src/push/mod.rs @@ -0,0 +1,514 @@ +// Copyright 2022 The Matrix.org Foundation C.I.C. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! An implementation of Matrix push rules. +//! +//! The `Cow<_>` type is used extensively within this module to allow creating +//! the base rules as constants (in Rust constants can't require explicit +//! allocation atm). +//! +//! --- +//! +//! Push rules is the system used to determine which events trigger a push (and a +//! bump in notification counts). +//! +//! This consists of a list of "push rules" for each user, where a push rule is a +//! pair of "conditions" and "actions". When a user receives an event Synapse +//! iterates over the list of push rules until it finds one where all the conditions +//! match the event, at which point "actions" describe the outcome (e.g. notify, +//! highlight, etc). +//! +//! Push rules are split up into 5 different "kinds" (aka "priority classes"), which +//! are run in order: +//! 1. Override — highest priority rules, e.g. always ignore notices +//! 2. Content — content specific rules, e.g. @ notifications +//! 3. Room — per room rules, e.g. enable/disable notifications for all messages +//! in a room +//! 4. Sender — per sender rules, e.g. never notify for messages from a given +//! user +//! 5. Underride — the lowest priority "default" rules, e.g. notify for every +//! message. +//! +//! The set of "base rules" are the list of rules that every user has by default. A +//! user can modify their copy of the push rules in one of three ways: +//! 1. Adding a new push rule of a certain kind +//! 2. Changing the actions of a base rule +//! 3. Enabling/disabling a base rule. +//! +//! The base rules are split into whether they come before or after a particular +//! kind, so the order of push rule evaluation would be: base rules for before +//! "override" kind, user defined "override" rules, base rules after "override" +//! kind, etc, etc. + +use std::borrow::Cow; +use std::collections::{BTreeMap, HashMap, HashSet}; + +use anyhow::{Context, Error}; +use log::warn; +use pyo3::prelude::*; +use pythonize::{depythonize, pythonize}; +use serde::de::Error as _; +use serde::{Deserialize, Serialize}; +use serde_json::Value; + +use self::evaluator::PushRuleEvaluator; + +mod base_rules; +pub mod evaluator; +pub mod utils; + +/// Called when registering modules with python. +pub fn register_module(py: Python<'_>, m: &PyModule) -> PyResult<()> { + let child_module = PyModule::new(py, "push")?; + child_module.add_class::()?; + child_module.add_class::()?; + child_module.add_class::()?; + child_module.add_class::()?; + child_module.add_function(wrap_pyfunction!(get_base_rule_ids, m)?)?; + + m.add_submodule(child_module)?; + + // We need to manually add the module to sys.modules to make `from + // synapse.synapse_rust import push` work. + py.import("sys")? + .getattr("modules")? + .set_item("synapse.synapse_rust.push", child_module)?; + + Ok(()) +} + +#[pyfunction] +fn get_base_rule_ids() -> HashSet<&'static str> { + base_rules::BASE_RULES_BY_ID.keys().copied().collect() +} + +/// A single push rule for a user. +#[derive(Debug, Clone)] +#[pyclass(frozen)] +pub struct PushRule { + /// A unique ID for this rule + pub rule_id: Cow<'static, str>, + /// The "kind" of push rule this is (see `PRIORITY_CLASS_MAP` in Python) + #[pyo3(get)] + pub priority_class: i32, + /// The conditions that must all match for actions to be applied + pub conditions: Cow<'static, [Condition]>, + /// The actions to apply if all conditions are met + pub actions: Cow<'static, [Action]>, + /// Whether this is a base rule + #[pyo3(get)] + pub default: bool, + /// Whether this is enabled by default + #[pyo3(get)] + pub default_enabled: bool, +} + +#[pymethods] +impl PushRule { + #[staticmethod] + pub fn from_db( + rule_id: String, + priority_class: i32, + conditions: &str, + actions: &str, + ) -> Result { + let conditions = serde_json::from_str(conditions).context("parsing conditions")?; + let actions = serde_json::from_str(actions).context("parsing actions")?; + + Ok(PushRule { + rule_id: Cow::Owned(rule_id), + priority_class, + conditions, + actions, + default: false, + default_enabled: true, + }) + } + + #[getter] + fn rule_id(&self) -> &str { + &self.rule_id + } + + #[getter] + fn actions(&self) -> Vec { + self.actions.clone().into_owned() + } + + #[getter] + fn conditions(&self) -> Vec { + self.conditions.clone().into_owned() + } + + fn __repr__(&self) -> String { + format!( + "", + self.rule_id, self.conditions, self.actions + ) + } +} + +/// The "action" Synapse should perform for a matching push rule. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Action { + DontNotify, + Notify, + Coalesce, + SetTweak(SetTweak), + + // An unrecognized custom action. + Unknown(Value), +} + +impl IntoPy for Action { + fn into_py(self, py: Python<'_>) -> PyObject { + // When we pass the `Action` struct to Python we want it to be converted + // to a dict. We use `pythonize`, which converts the struct using the + // `serde` serialization. + pythonize(py, &self).expect("valid action") + } +} + +/// The body of a `SetTweak` push action. +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +pub struct SetTweak { + set_tweak: Cow<'static, str>, + + #[serde(skip_serializing_if = "Option::is_none")] + value: Option, + + // This picks up any other fields that may have been added by clients. + // These get added when we convert the `Action` to a python object. + #[serde(flatten)] + other_keys: Value, +} + +/// The value of a `set_tweak`. +/// +/// We need this (rather than using `TweakValue` directly) so that we can use +/// `&'static str` in the value when defining the constant base rules. +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)] +#[serde(untagged)] +pub enum TweakValue { + String(Cow<'static, str>), + Other(Value), +} + +impl Serialize for Action { + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer, + { + match self { + Action::DontNotify => serializer.serialize_str("dont_notify"), + Action::Notify => serializer.serialize_str("notify"), + Action::Coalesce => serializer.serialize_str("coalesce"), + Action::SetTweak(tweak) => tweak.serialize(serializer), + Action::Unknown(value) => value.serialize(serializer), + } + } +} + +/// Simple helper class for deserializing Action from JSON. +#[derive(Deserialize)] +#[serde(untagged)] +enum ActionDeserializeHelper { + Str(String), + SetTweak(SetTweak), + Unknown(Value), +} + +impl<'de> Deserialize<'de> for Action { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let helper: ActionDeserializeHelper = Deserialize::deserialize(deserializer)?; + match helper { + ActionDeserializeHelper::Str(s) => match &*s { + "dont_notify" => Ok(Action::DontNotify), + "notify" => Ok(Action::Notify), + "coalesce" => Ok(Action::Coalesce), + _ => Err(D::Error::custom("unrecognized action")), + }, + ActionDeserializeHelper::SetTweak(set_tweak) => Ok(Action::SetTweak(set_tweak)), + ActionDeserializeHelper::Unknown(value) => Ok(Action::Unknown(value)), + } + } +} + +/// A condition used in push rules to match against an event. +/// +/// We need this split as `serde` doesn't give us the ability to have a +/// "catchall" variant in tagged enums. +#[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(untagged)] +pub enum Condition { + /// A recognized condition that we can match against + Known(KnownCondition), + /// An unrecognized condition that we ignore. + Unknown(Value), +} + +/// The set of "known" conditions that we can handle. +#[derive(Serialize, Deserialize, Debug, Clone)] +#[serde(rename_all = "snake_case")] +#[serde(tag = "kind")] +pub enum KnownCondition { + EventMatch(EventMatchCondition), + ContainsDisplayName, + RoomMemberCount { + #[serde(skip_serializing_if = "Option::is_none")] + is: Option>, + }, + SenderNotificationPermission { + key: Cow<'static, str>, + }, + #[serde(rename = "org.matrix.msc3772.relation_match")] + RelationMatch { + rel_type: Cow<'static, str>, + #[serde(skip_serializing_if = "Option::is_none", rename = "type")] + event_type_pattern: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + sender: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + sender_type: Option>, + }, +} + +impl IntoPy for Condition { + fn into_py(self, py: Python<'_>) -> PyObject { + pythonize(py, &self).expect("valid condition") + } +} + +impl<'source> FromPyObject<'source> for Condition { + fn extract(ob: &'source PyAny) -> PyResult { + Ok(depythonize(ob)?) + } +} + +/// The body of a [`Condition::EventMatch`] +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct EventMatchCondition { + pub key: Cow<'static, str>, + #[serde(skip_serializing_if = "Option::is_none")] + pub pattern: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + pub pattern_type: Option>, +} + +/// The collection of push rules for a user. +#[derive(Debug, Clone, Default)] +#[pyclass(frozen)] +pub struct PushRules { + /// Custom push rules that override a base rule. + overridden_base_rules: HashMap, PushRule>, + + /// Custom rules that come between the prepend/append override base rules. + override_rules: Vec, + /// Custom rules that come before the base content rules. + content: Vec, + /// Custom rules that come before the base room rules. + room: Vec, + /// Custom rules that come before the base sender rules. + sender: Vec, + /// Custom rules that come before the base underride rules. + underride: Vec, +} + +#[pymethods] +impl PushRules { + #[new] + pub fn new(rules: Vec) -> PushRules { + let mut push_rules: PushRules = Default::default(); + + for rule in rules { + if let Some(&o) = base_rules::BASE_RULES_BY_ID.get(&*rule.rule_id) { + push_rules.overridden_base_rules.insert( + rule.rule_id.clone(), + PushRule { + actions: rule.actions.clone(), + ..o.clone() + }, + ); + + continue; + } + + match rule.priority_class { + 5 => push_rules.override_rules.push(rule), + 4 => push_rules.content.push(rule), + 3 => push_rules.room.push(rule), + 2 => push_rules.sender.push(rule), + 1 => push_rules.underride.push(rule), + _ => { + warn!( + "Unrecognized priority class for rule {}: {}", + rule.rule_id, rule.priority_class + ); + } + } + } + + push_rules + } + + /// Returns the list of all rules, including base rules, in the order they + /// should be executed in. + fn rules(&self) -> Vec { + self.iter().cloned().collect() + } +} + +impl PushRules { + /// Iterates over all the rules, including base rules, in the order they + /// should be executed in. + pub fn iter(&self) -> impl Iterator { + base_rules::BASE_PREPEND_OVERRIDE_RULES + .iter() + .chain(self.override_rules.iter()) + .chain(base_rules::BASE_APPEND_OVERRIDE_RULES.iter()) + .chain(self.content.iter()) + .chain(base_rules::BASE_APPEND_CONTENT_RULES.iter()) + .chain(self.room.iter()) + .chain(self.sender.iter()) + .chain(self.underride.iter()) + .chain(base_rules::BASE_APPEND_UNDERRIDE_RULES.iter()) + .map(|rule| { + self.overridden_base_rules + .get(&*rule.rule_id) + .unwrap_or(rule) + }) + } +} + +/// A wrapper around `PushRules` that checks the enabled state of rules and +/// filters out disabled experimental rules. +#[derive(Debug, Clone, Default)] +#[pyclass(frozen)] +pub struct FilteredPushRules { + push_rules: PushRules, + enabled_map: BTreeMap, + msc3786_enabled: bool, + msc3772_enabled: bool, +} + +#[pymethods] +impl FilteredPushRules { + #[new] + pub fn py_new( + push_rules: PushRules, + enabled_map: BTreeMap, + msc3786_enabled: bool, + msc3772_enabled: bool, + ) -> Self { + Self { + push_rules, + enabled_map, + msc3786_enabled, + msc3772_enabled, + } + } + + /// Returns the list of all rules and their enabled state, including base + /// rules, in the order they should be executed in. + fn rules(&self) -> Vec<(PushRule, bool)> { + self.iter().map(|(r, e)| (r.clone(), e)).collect() + } +} + +impl FilteredPushRules { + /// Iterates over all the rules and their enabled state, including base + /// rules, in the order they should be executed in. + fn iter(&self) -> impl Iterator { + self.push_rules + .iter() + .filter(|rule| { + // Ignore disabled experimental push rules + if !self.msc3786_enabled + && rule.rule_id == "global/override/.org.matrix.msc3786.rule.room.server_acl" + { + return false; + } + + if !self.msc3772_enabled + && rule.rule_id == "global/underride/.org.matrix.msc3772.thread_reply" + { + return false; + } + + true + }) + .map(|r| { + let enabled = *self + .enabled_map + .get(&*r.rule_id) + .unwrap_or(&r.default_enabled); + (r, enabled) + }) + } +} + +#[test] +fn test_serialize_condition() { + let condition = Condition::Known(KnownCondition::EventMatch(EventMatchCondition { + key: "content.body".into(), + pattern: Some("coffee".into()), + pattern_type: None, + })); + + let json = serde_json::to_string(&condition).unwrap(); + assert_eq!( + json, + r#"{"kind":"event_match","key":"content.body","pattern":"coffee"}"# + ) +} + +#[test] +fn test_deserialize_condition() { + let json = r#"{"kind":"event_match","key":"content.body","pattern":"coffee"}"#; + + let _: Condition = serde_json::from_str(json).unwrap(); +} + +#[test] +fn test_deserialize_custom_condition() { + let json = r#"{"kind":"custom_tag"}"#; + + let condition: Condition = serde_json::from_str(json).unwrap(); + assert!(matches!(condition, Condition::Unknown(_))); + + let new_json = serde_json::to_string(&condition).unwrap(); + assert_eq!(json, new_json); +} + +#[test] +fn test_deserialize_action() { + let _: Action = serde_json::from_str(r#""notify""#).unwrap(); + let _: Action = serde_json::from_str(r#""dont_notify""#).unwrap(); + let _: Action = serde_json::from_str(r#""coalesce""#).unwrap(); + let _: Action = serde_json::from_str(r#"{"set_tweak": "highlight"}"#).unwrap(); +} + +#[test] +fn test_custom_action() { + let json = r#"{"some_custom":"action_fields"}"#; + + let action: Action = serde_json::from_str(json).unwrap(); + assert!(matches!(action, Action::Unknown(_))); + + let new_json = serde_json::to_string(&action).unwrap(); + assert_eq!(json, new_json); +} diff --git a/rust/src/push/utils.rs b/rust/src/push/utils.rs new file mode 100644 index 000000000000..87593404730a --- /dev/null +++ b/rust/src/push/utils.rs @@ -0,0 +1,215 @@ +// Copyright 2022 The Matrix.org Foundation C.I.C. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use anyhow::bail; +use anyhow::Context; +use anyhow::Error; +use lazy_static::lazy_static; +use regex; +use regex::Regex; +use regex::RegexBuilder; + +lazy_static! { + /// Matches runs of non-wildcard characters followed by wildcard characters. + static ref WILDCARD_RUN: Regex = Regex::new(r"([^\?\*]*)([\?\*]*)").expect("valid regex"); +} + +/// Extract the localpart from a Matrix style ID +pub(crate) fn get_localpart_from_id(id: &str) -> Result<&str, Error> { + let (localpart, _) = id + .split_once(':') + .with_context(|| format!("ID does not contain colon: {id}"))?; + + // We need to strip off the first character, which is the ID type. + if localpart.is_empty() { + bail!("Invalid ID {id}"); + } + + Ok(&localpart[1..]) +} + +/// Used by `glob_to_regex` to specify what to match the regex against. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum GlobMatchType { + /// The generated regex will match against the entire input. + Whole, + /// The generated regex will match against words. + Word, +} + +/// Convert a "glob" style expression to a regex, anchoring either to the entire +/// input or to individual words. +pub fn glob_to_regex(glob: &str, match_type: GlobMatchType) -> Result { + let mut chunks = Vec::new(); + + // Patterns with wildcards must be simplified to avoid performance cliffs + // - The glob `?**?**?` is equivalent to the glob `???*` + // - The glob `???*` is equivalent to the regex `.{3,}` + for captures in WILDCARD_RUN.captures_iter(glob) { + if let Some(chunk) = captures.get(1) { + chunks.push(regex::escape(chunk.as_str())); + } + + if let Some(wildcards) = captures.get(2) { + if wildcards.as_str() == "" { + continue; + } + + let question_marks = wildcards.as_str().chars().filter(|c| *c == '?').count(); + + if wildcards.as_str().contains('*') { + chunks.push(format!(".{{{question_marks},}}")); + } else { + chunks.push(format!(".{{{question_marks}}}")); + } + } + } + + let joined = chunks.join(""); + + let regex_str = match match_type { + GlobMatchType::Whole => format!(r"\A{joined}\z"), + + // `^|\W` and `\W|$` handle the case where `pattern` starts or ends with a non-word + // character. + GlobMatchType::Word => format!(r"(?:^|\b|\W){joined}(?:\b|\W|$)"), + }; + + Ok(RegexBuilder::new(®ex_str) + .case_insensitive(true) + .build()?) +} + +/// Compiles the glob into a `Matcher`. +pub fn get_glob_matcher(glob: &str, match_type: GlobMatchType) -> Result { + // There are a number of shortcuts we can make if the glob doesn't contain a + // wild card. + let matcher = if glob.contains(['*', '?']) { + let regex = glob_to_regex(glob, match_type)?; + Matcher::Regex(regex) + } else if match_type == GlobMatchType::Whole { + // If there aren't any wildcards and we're matching the whole thing, + // then we simply can do a case-insensitive string match. + Matcher::Whole(glob.to_lowercase()) + } else { + // Otherwise, if we're matching against words then can first check + // if the haystack contains the glob at all. + Matcher::Word { + word: glob.to_lowercase(), + regex: None, + } + }; + + Ok(matcher) +} + +/// Matches against a glob +pub enum Matcher { + /// Plain regex matching. + Regex(Regex), + + /// Case-insensitive equality. + Whole(String), + + /// Word matching. `regex` is a cache of calling [`glob_to_regex`] on word. + Word { word: String, regex: Option }, +} + +impl Matcher { + /// Checks if the glob matches the given haystack. + pub fn is_match(&mut self, haystack: &str) -> Result { + // We want to to do case-insensitive matching, so we convert to + // lowercase first. + let haystack = haystack.to_lowercase(); + + match self { + Matcher::Regex(regex) => Ok(regex.is_match(&haystack)), + Matcher::Whole(whole) => Ok(whole == &haystack), + Matcher::Word { word, regex } => { + // If we're looking for a literal word, then we first check if + // the haystack contains the word as a substring. + if !haystack.contains(&*word) { + return Ok(false); + } + + // If it does contain the word as a substring, then we need to + // check if it is an actual word by testing it against the regex. + let regex = if let Some(regex) = regex { + regex + } else { + let compiled_regex = glob_to_regex(word, GlobMatchType::Word)?; + regex.insert(compiled_regex) + }; + + Ok(regex.is_match(&haystack)) + } + } + } +} + +#[test] +fn test_get_domain_from_id() { + get_localpart_from_id("").unwrap_err(); + get_localpart_from_id(":").unwrap_err(); + get_localpart_from_id(":asd").unwrap_err(); + get_localpart_from_id("::as::asad").unwrap_err(); + + assert_eq!(get_localpart_from_id("@test:foo").unwrap(), "test"); + assert_eq!(get_localpart_from_id("@:").unwrap(), ""); + assert_eq!(get_localpart_from_id("@test:foo:907").unwrap(), "test"); +} + +#[test] +fn tset_glob() -> Result<(), Error> { + assert_eq!( + glob_to_regex("simple", GlobMatchType::Whole)?.as_str(), + r"\Asimple\z" + ); + assert_eq!( + glob_to_regex("simple*", GlobMatchType::Whole)?.as_str(), + r"\Asimple.{0,}\z" + ); + assert_eq!( + glob_to_regex("simple?", GlobMatchType::Whole)?.as_str(), + r"\Asimple.{1}\z" + ); + assert_eq!( + glob_to_regex("simple?*?*", GlobMatchType::Whole)?.as_str(), + r"\Asimple.{2,}\z" + ); + assert_eq!( + glob_to_regex("simple???", GlobMatchType::Whole)?.as_str(), + r"\Asimple.{3}\z" + ); + + assert_eq!( + glob_to_regex("escape.", GlobMatchType::Whole)?.as_str(), + r"\Aescape\.\z" + ); + + assert!(glob_to_regex("simple", GlobMatchType::Whole)?.is_match("simple")); + assert!(!glob_to_regex("simple", GlobMatchType::Whole)?.is_match("simples")); + assert!(glob_to_regex("simple*", GlobMatchType::Whole)?.is_match("simples")); + assert!(glob_to_regex("simple?", GlobMatchType::Whole)?.is_match("simples")); + assert!(glob_to_regex("simple*", GlobMatchType::Whole)?.is_match("simple")); + + assert!(glob_to_regex("simple", GlobMatchType::Word)?.is_match("some simple.")); + assert!(glob_to_regex("simple", GlobMatchType::Word)?.is_match("simple")); + assert!(!glob_to_regex("simple", GlobMatchType::Word)?.is_match("simples")); + + assert!(glob_to_regex("@user:foo", GlobMatchType::Word)?.is_match("Some @user:foo test")); + assert!(glob_to_regex("@user:foo", GlobMatchType::Word)?.is_match("@user:foo")); + + Ok(()) +} diff --git a/scripts-dev/check_pydantic_models.py b/scripts-dev/check_pydantic_models.py index d0fb811bdb5e..9f2b7ded5bd5 100755 --- a/scripts-dev/check_pydantic_models.py +++ b/scripts-dev/check_pydantic_models.py @@ -88,10 +88,9 @@ def make_wrapper(factory: Callable[P, R]) -> Callable[P, R]: @functools.wraps(factory) def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: - # type-ignore: should be redundant once we can use https://github.com/python/mypy/pull/12668 - if "strict" not in kwargs: # type: ignore[attr-defined] + if "strict" not in kwargs: raise MissingStrictInConstrainedTypeException(factory.__name__) - if not kwargs["strict"]: # type: ignore[index] + if not kwargs["strict"]: raise MissingStrictInConstrainedTypeException(factory.__name__) return factory(*args, **kwargs) diff --git a/scripts-dev/make_full_schema.sh b/scripts-dev/make_full_schema.sh index 61394360cecb..e2bc1640bb2a 100755 --- a/scripts-dev/make_full_schema.sh +++ b/scripts-dev/make_full_schema.sh @@ -2,23 +2,16 @@ # # This script generates SQL files for creating a brand new Synapse DB with the latest # schema, on both SQLite3 and Postgres. -# -# It does so by having Synapse generate an up-to-date SQLite DB, then running -# synapse_port_db to convert it to Postgres. It then dumps the contents of both. export PGHOST="localhost" -POSTGRES_DB_NAME="synapse_full_schema.$$" - -SQLITE_SCHEMA_FILE="schema.sql.sqlite" -SQLITE_ROWS_FILE="rows.sql.sqlite" -POSTGRES_SCHEMA_FILE="full.sql.postgres" -POSTGRES_ROWS_FILE="rows.sql.postgres" - +POSTGRES_MAIN_DB_NAME="synapse_full_schema_main.$$" +POSTGRES_COMMON_DB_NAME="synapse_full_schema_common.$$" +POSTGRES_STATE_DB_NAME="synapse_full_schema_state.$$" REQUIRED_DEPS=("matrix-synapse" "psycopg2") usage() { echo - echo "Usage: $0 -p -o [-c] [-n] [-h]" + echo "Usage: $0 -p -o [-c] [-n ] [-h]" echo echo "-p " echo " Username to connect to local postgres instance. The password will be requested" @@ -27,11 +20,19 @@ usage() { echo " CI mode. Prints every command that the script runs." echo "-o " echo " Directory to output full schema files to." + echo "-n " + echo " Schema number for the new snapshot. Used to set the location of files within " + echo " the output directory, mimicking that of synapse/storage/schemas." + echo " Defaults to 9999." echo "-h" echo " Display this help text." + echo "" + echo " NB: make sure to run this against the *oldest* supported version of postgres," + echo " or else pg_dump might output non-backwards-compatible syntax." } -while getopts "p:co:h" opt; do +SCHEMA_NUMBER="9999" +while getopts "p:co:hn:" opt; do case $opt in p) export PGUSER=$OPTARG @@ -48,6 +49,9 @@ while getopts "p:co:h" opt; do usage exit ;; + n) + SCHEMA_NUMBER="$OPTARG" + ;; \?) echo "ERROR: Invalid option: -$OPTARG" >&2 usage @@ -95,12 +99,21 @@ cd "$(dirname "$0")/.." TMPDIR=$(mktemp -d) KEY_FILE=$TMPDIR/test.signing.key # default Synapse signing key path SQLITE_CONFIG=$TMPDIR/sqlite.conf -SQLITE_DB=$TMPDIR/homeserver.db +SQLITE_MAIN_DB=$TMPDIR/main.db +SQLITE_STATE_DB=$TMPDIR/state.db +SQLITE_COMMON_DB=$TMPDIR/common.db POSTGRES_CONFIG=$TMPDIR/postgres.conf # Ensure these files are delete on script exit -# TODO: the trap should also drop the temp postgres DB -trap 'rm -rf $TMPDIR' EXIT +cleanup() { + echo "Cleaning up temporary sqlite database and config files..." + rm -r "$TMPDIR" + echo "Cleaning up temporary Postgres database..." + dropdb --if-exists "$POSTGRES_COMMON_DB_NAME" + dropdb --if-exists "$POSTGRES_MAIN_DB_NAME" + dropdb --if-exists "$POSTGRES_STATE_DB_NAME" +} +trap 'cleanup' EXIT cat > "$SQLITE_CONFIG" < "$OUTPUT_DIR/$SQLITE_SCHEMA_FILE" -sqlite3 "$SQLITE_DB" ".dump --data-only --nosys" > "$OUTPUT_DIR/$SQLITE_ROWS_FILE" +sqlite3 "$SQLITE_COMMON_DB" <<< "$DROP_APP_MANAGED_TABLES" +sqlite3 "$SQLITE_MAIN_DB" <<< "$DROP_COMMON_TABLES" +sqlite3 "$SQLITE_STATE_DB" <<< "$DROP_COMMON_TABLES" +psql "$POSTGRES_COMMON_DB_NAME" -w <<< "$DROP_APP_MANAGED_TABLES" +psql "$POSTGRES_MAIN_DB_NAME" -w <<< "$DROP_COMMON_TABLES" +psql "$POSTGRES_STATE_DB_NAME" -w <<< "$DROP_COMMON_TABLES" + +# For Reasons(TM), SQLite's `.schema` also dumps out "shadow tables", the implementation +# details behind full text search tables. Omit these from the dumps. + +sqlite3 "$SQLITE_MAIN_DB" <<< " +DROP TABLE event_search_content; +DROP TABLE event_search_segments; +DROP TABLE event_search_segdir; +DROP TABLE event_search_docsize; +DROP TABLE event_search_stat; +DROP TABLE user_directory_search_content; +DROP TABLE user_directory_search_segments; +DROP TABLE user_directory_search_segdir; +DROP TABLE user_directory_search_docsize; +DROP TABLE user_directory_search_stat; +" + +echo "Dumping SQLite3 schema..." + +mkdir -p "$OUTPUT_DIR/"{common,main,state}"/full_schemas/$SCHEMA_NUMBER" +sqlite3 "$SQLITE_COMMON_DB" ".schema" > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_COMMON_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_MAIN_DB" ".schema" > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_MAIN_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_STATE_DB" ".schema" > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" +sqlite3 "$SQLITE_STATE_DB" ".dump --data-only --nosys" >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.sqlite" + +cleanup_pg_schema() { + # Cleanup as follows: + # - Remove empty lines. pg_dump likes to output a lot of these. + # - Remove comment-only lines. pg_dump also likes to output a lot of these to visually + # separate tables etc. + # - Remove "public." prefix --- the schema name. + # - Remove "SET" commands. Last time I ran this, the output commands were + # SET statement_timeout = 0; + # SET lock_timeout = 0; + # SET idle_in_transaction_session_timeout = 0; + # SET client_encoding = 'UTF8'; + # SET standard_conforming_strings = on; + # SET check_function_bodies = false; + # SET xmloption = content; + # SET client_min_messages = warning; + # SET row_security = off; + # SET default_table_access_method = heap; + # - Very carefully remove specific SELECT statements. We CANNOT blanket remove all + # SELECT statements because some of those have side-effects which we do want in the + # schema. Last time I ran this, the only SELECTS were + # SELECT pg_catalog.set_config('search_path', '', false); + # and + # SELECT pg_catalog.setval(text, bigint, bool); + # We do want to remove the former, but the latter is important. If the last argument + # is `true` or omitted, this marks the given integer as having been consumed and + # will NOT appear as the nextval. + sed -e '/^$/d' \ + -e '/^--/d' \ + -e 's/public\.//g' \ + -e '/^SET /d' \ + -e '/^SELECT pg_catalog.set_config/d' +} -echo "Dumping Postgres schema to '$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE' and '$OUTPUT_DIR/$POSTGRES_ROWS_FILE'..." -pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_SCHEMA_FILE" -pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_DB_NAME" | sed -e '/^$/d' -e '/^--/d' -e 's/public\.//g' -e '/^SET /d' -e '/^SELECT /d' > "$OUTPUT_DIR/$POSTGRES_ROWS_FILE" +echo "Dumping Postgres schema..." -echo "Cleaning up temporary Postgres database..." -dropdb $POSTGRES_DB_NAME +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_COMMON_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/common/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_MAIN_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/main/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --schema-only --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema > "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" +pg_dump --format=plain --data-only --inserts --no-tablespaces --no-acl --no-owner "$POSTGRES_STATE_DB_NAME" | cleanup_pg_schema >> "$OUTPUT_DIR/state/full_schemas/$SCHEMA_NUMBER/full.sql.postgres" echo "Done! Files dumped to: $OUTPUT_DIR" diff --git a/scripts-dev/mypy_synapse_plugin.py b/scripts-dev/mypy_synapse_plugin.py index d08517a95382..2c377533c0fd 100644 --- a/scripts-dev/mypy_synapse_plugin.py +++ b/scripts-dev/mypy_synapse_plugin.py @@ -29,7 +29,7 @@ def get_method_signature_hook( self, fullname: str ) -> Optional[Callable[[MethodSigContext], CallableType]]: if fullname.startswith( - "synapse.util.caches.descriptors._CachedFunction.__call__" + "synapse.util.caches.descriptors.CachedFunction.__call__" ) or fullname.startswith( "synapse.util.caches.descriptors._LruCachedFunction.__call__" ): @@ -38,7 +38,7 @@ def get_method_signature_hook( def cached_function_method_signature(ctx: MethodSigContext) -> CallableType: - """Fixes the `_CachedFunction.__call__` signature to be correct. + """Fixes the `CachedFunction.__call__` signature to be correct. It already has *almost* the correct signature, except: diff --git a/stubs/synapse/synapse_rust.pyi b/stubs/synapse/synapse_rust/__init__.pyi similarity index 100% rename from stubs/synapse/synapse_rust.pyi rename to stubs/synapse/synapse_rust/__init__.pyi diff --git a/stubs/synapse/synapse_rust/push.pyi b/stubs/synapse/synapse_rust/push.pyi new file mode 100644 index 000000000000..fffb8419c63d --- /dev/null +++ b/stubs/synapse/synapse_rust/push.pyi @@ -0,0 +1,54 @@ +from typing import Any, Collection, Dict, Mapping, Optional, Sequence, Set, Tuple, Union + +from synapse.types import JsonDict + +class PushRule: + @property + def rule_id(self) -> str: ... + @property + def priority_class(self) -> int: ... + @property + def conditions(self) -> Sequence[Mapping[str, str]]: ... + @property + def actions(self) -> Sequence[Union[Mapping[str, Any], str]]: ... + @property + def default(self) -> bool: ... + @property + def default_enabled(self) -> bool: ... + @staticmethod + def from_db( + rule_id: str, priority_class: int, conditions: str, actions: str + ) -> "PushRule": ... + +class PushRules: + def __init__(self, rules: Collection[PushRule]): ... + def rules(self) -> Collection[PushRule]: ... + +class FilteredPushRules: + def __init__( + self, + push_rules: PushRules, + enabled_map: Dict[str, bool], + msc3786_enabled: bool, + msc3772_enabled: bool, + ): ... + def rules(self) -> Collection[Tuple[PushRule, bool]]: ... + +def get_base_rule_ids() -> Collection[str]: ... + +class PushRuleEvaluator: + def __init__( + self, + flattened_keys: Mapping[str, str], + room_member_count: int, + sender_power_level: Optional[int], + notification_power_levels: Mapping[str, int], + relations: Mapping[str, Set[Tuple[str, str]]], + relation_match_enabled: bool, + ): ... + def run( + self, + push_rules: FilteredPushRules, + user_id: Optional[str], + display_name: Optional[str], + ) -> Collection[dict]: ... diff --git a/synapse/_scripts/synapse_port_db.py b/synapse/_scripts/synapse_port_db.py index 30983c47fbb7..5fa599e70e90 100755 --- a/synapse/_scripts/synapse_port_db.py +++ b/synapse/_scripts/synapse_port_db.py @@ -107,10 +107,11 @@ "redactions": ["have_censored"], "room_stats_state": ["is_federatable"], "local_media_repository": ["safe_from_quarantine"], - "users": ["shadow_banned"], + "users": ["shadow_banned", "approved"], "e2e_fallback_keys_json": ["used"], "access_tokens": ["used"], "device_lists_changes_in_room": ["converted_to_destinations"], + "pushers": ["enabled"], } diff --git a/synapse/_scripts/update_synapse_database.py b/synapse/_scripts/update_synapse_database.py index e91bd537f771..fb1fb83f50d9 100755 --- a/synapse/_scripts/update_synapse_database.py +++ b/synapse/_scripts/update_synapse_database.py @@ -48,10 +48,13 @@ def __init__(self, config: HomeServerConfig): def run_background_updates(hs: HomeServer) -> None: - store = hs.get_datastores().main + main = hs.get_datastores().main + state = hs.get_datastores().state async def run_background_updates() -> None: - await store.db_pool.updates.run_background_updates(sleep=False) + await main.db_pool.updates.run_background_updates(sleep=False) + if state: + await state.db_pool.updates.run_background_updates(sleep=False) # Stop the reactor to exit the script once every background update is run. reactor.stop() @@ -99,7 +102,8 @@ def main() -> None: if "database" not in hs_config and "databases" not in hs_config: sys.stderr.write( - "The configuration file must have a 'database' or 'databases' section.\n" + "The configuration file must have a 'database' or 'databases' section. " + "See https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#database" ) sys.exit(4) diff --git a/synapse/api/constants.py b/synapse/api/constants.py index c1bdf178bfaa..3d03d1b27ed8 100644 --- a/synapse/api/constants.py +++ b/synapse/api/constants.py @@ -270,3 +270,14 @@ class PublicRoomsFilterFields: GENERIC_SEARCH_TERM: Final = "generic_search_term" ROOM_TYPES: Final = "room_types" + + +class ApprovalNoticeMedium: + """Identifier for the medium this server will use to serve notice of approval for a + specific user's registration. + + As defined in https://github.com/matrix-org/matrix-spec-proposals/blob/babolivier/m_not_approved/proposals/3866-user-not-approved-error.md + """ + + NONE = "org.matrix.msc3866.none" + EMAIL = "org.matrix.msc3866.email" diff --git a/synapse/api/errors.py b/synapse/api/errors.py index e6dea89c6d09..c6062075690e 100644 --- a/synapse/api/errors.py +++ b/synapse/api/errors.py @@ -100,6 +100,14 @@ class Codes(str, Enum): UNREDACTED_CONTENT_DELETED = "FI.MAU.MSC2815_UNREDACTED_CONTENT_DELETED" + # Returned for federation requests where we can't process a request as we + # can't ensure the sending server is in a room which is partial-stated on + # our side. + # Part of MSC3895. + UNABLE_DUE_TO_PARTIAL_STATE = "ORG.MATRIX.MSC3895_UNABLE_DUE_TO_PARTIAL_STATE" + + USER_AWAITING_APPROVAL = "ORG.MATRIX.MSC3866_USER_AWAITING_APPROVAL" + class CodeMessageException(RuntimeError): """An exception with integer code and message string attributes. @@ -560,6 +568,20 @@ def error_dict(self, config: Optional["HomeServerConfig"]) -> "JsonDict": return cs_error(self.msg, self.errcode, **extra) +class NotApprovedError(SynapseError): + def __init__( + self, + msg: str, + approval_notice_medium: str, + ): + super().__init__( + code=403, + msg=msg, + errcode=Codes.USER_AWAITING_APPROVAL, + additional_fields={"approval_notice_medium": approval_notice_medium}, + ) + + def cs_error(msg: str, code: str = Codes.UNKNOWN, **kwargs: Any) -> "JsonDict": """Utility method for constructing an error response for client-server interactions. diff --git a/synapse/app/_base.py b/synapse/app/_base.py index 9a24bed0a0fd..000912e86ee9 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -98,9 +98,7 @@ def register_sighup(func: Callable[P, None], *args: P.args, **kwargs: P.kwargs) func: Function to be called when sent a SIGHUP signal. *args, **kwargs: args and kwargs to be passed to the target function. """ - # This type-ignore should be redundant once we use a mypy release with - # https://github.com/python/mypy/pull/12668. - _sighup_callbacks.append((func, args, kwargs)) # type: ignore[arg-type] + _sighup_callbacks.append((func, args, kwargs)) def start_worker_reactor( diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 8a583d3ec632..3c8c00ea5bc4 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -53,9 +53,9 @@ class AdminCmdSlavedStore( SlavedFilteringStore, - SlavedDeviceStore, SlavedPushRuleStore, SlavedEventStore, + SlavedDeviceStore, TagsWorkerStore, DeviceInboxWorkerStore, AccountDataWorkerStore, diff --git a/synapse/app/complement_fork_starter.py b/synapse/app/complement_fork_starter.py index 89eb07df2733..b22f315453ba 100644 --- a/synapse/app/complement_fork_starter.py +++ b/synapse/app/complement_fork_starter.py @@ -51,11 +51,18 @@ import importlib import itertools import multiprocessing +import os +import signal import sys -from typing import Any, Callable, List +from types import FrameType +from typing import Any, Callable, List, Optional from twisted.internet.main import installReactor +# a list of the original signal handlers, before we installed our custom ones. +# We restore these in our child processes. +_original_signal_handlers: dict[int, Any] = {} + class ProxiedReactor: """ @@ -105,6 +112,11 @@ def _worker_entrypoint( sys.argv = args + # reset the custom signal handlers that we installed, so that the children start + # from a clean slate. + for sig, handler in _original_signal_handlers.items(): + signal.signal(sig, handler) + from twisted.internet.epollreactor import EPollReactor proxy_reactor._install_real_reactor(EPollReactor()) @@ -167,13 +179,29 @@ def main() -> None: update_proc.join() print("===== PREPARED DATABASE =====", file=sys.stderr) + processes: List[multiprocessing.Process] = [] + + # Install signal handlers to propagate signals to all our children, so that they + # shut down cleanly. This also inhibits our own exit, but that's good: we want to + # wait until the children have exited. + def handle_signal(signum: int, frame: Optional[FrameType]) -> None: + print( + f"complement_fork_starter: Caught signal {signum}. Stopping children.", + file=sys.stderr, + ) + for p in processes: + if p.pid: + os.kill(p.pid, signum) + + for sig in (signal.SIGINT, signal.SIGTERM): + _original_signal_handlers[sig] = signal.signal(sig, handle_signal) + # At this point, we've imported all the main entrypoints for all the workers. # Now we basically just fork() out to create the workers we need. # Because we're using fork(), all the workers get a clone of this launcher's # memory space and don't need to repeat the work of loading the code! # Instead of using fork() directly, we use the multiprocessing library, # which uses fork() on Unix platforms. - processes = [] for (func, worker_args) in zip(worker_functions, args_by_worker): process = multiprocessing.Process( target=_worker_entrypoint, args=(func, proxy_reactor, worker_args) diff --git a/synapse/config/experimental.py b/synapse/config/experimental.py index f7d0392512f9..4d15b3efab35 100644 --- a/synapse/config/experimental.py +++ b/synapse/config/experimental.py @@ -14,10 +14,25 @@ from typing import Any +import attr + from synapse.config._base import Config from synapse.types import JsonDict +@attr.s(auto_attribs=True, frozen=True, slots=True) +class MSC3866Config: + """Configuration for MSC3866 (mandating approval for new users)""" + + # Whether the base support for the approval process is enabled. This includes the + # ability for administrators to check and update the approval of users, even if no + # approval is currently required. + enabled: bool = False + # Whether to require that new users are approved by an admin before their account + # can be used. Note that this setting is ignored if 'enabled' is false. + require_approval_for_new_accounts: bool = False + + class ExperimentalConfig(Config): """Config section for enabling experimental features""" @@ -67,7 +82,8 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: # MSC3706 (server-side support for partial state in /send_join responses) self.msc3706_enabled: bool = experimental.get("msc3706_enabled", False) - # experimental support for faster joins over federation (msc2775, msc3706) + # experimental support for faster joins over federation + # (MSC2775, MSC3706, MSC3895) # requires a target server with msc3706_enabled enabled. self.faster_joins_enabled: bool = experimental.get("faster_joins", False) @@ -86,6 +102,8 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: # MSC3786 (Add a default push rule to ignore m.room.server_acl events) self.msc3786_enabled: bool = experimental.get("msc3786_enabled", False) + # MSC3771: Thread read receipts + self.msc3771_enabled: bool = experimental.get("msc3771_enabled", False) # MSC3772: A push rule for mutual relations. self.msc3772_enabled: bool = experimental.get("msc3772_enabled", False) @@ -97,3 +115,17 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: # MSC3852: Expose last seen user agent field on /_matrix/client/v3/devices. self.msc3852_enabled: bool = experimental.get("msc3852_enabled", False) + + # MSC3866: M_USER_AWAITING_APPROVAL error code + raw_msc3866_config = experimental.get("msc3866", {}) + self.msc3866 = MSC3866Config(**raw_msc3866_config) + + # MSC3881: Remotely toggle push notifications for another client + self.msc3881_enabled: bool = experimental.get("msc3881_enabled", False) + + # MSC3882: Allow an existing session to sign in a new session + self.msc3882_enabled: bool = experimental.get("msc3882_enabled", False) + self.msc3882_ui_auth: bool = experimental.get("msc3882_ui_auth", True) + self.msc3882_token_timeout = self.parse_duration( + experimental.get("msc3882_token_timeout", "5m") + ) diff --git a/synapse/config/metrics.py b/synapse/config/metrics.py index f3134834e53b..bb065f9f2f14 100644 --- a/synapse/config/metrics.py +++ b/synapse/config/metrics.py @@ -43,32 +43,6 @@ class MetricsConfig(Config): def read_config(self, config: JsonDict, **kwargs: Any) -> None: self.enable_metrics = config.get("enable_metrics", False) - """ - ### `enable_legacy_metrics` (experimental) - - **Experimental: this option may be removed or have its behaviour - changed at any time, with no notice.** - - Set to `true` to publish both legacy and non-legacy Prometheus metric names, - or to `false` to only publish non-legacy Prometheus metric names. - Defaults to `true`. Has no effect if `enable_metrics` is `false`. - - Legacy metric names include: - - metrics containing colons in the name, such as `synapse_util_caches_response_cache:hits`, because colons are supposed to be reserved for user-defined recording rules; - - counters that don't end with the `_total` suffix, such as `synapse_federation_client_sent_edus`, therefore not adhering to the OpenMetrics standard. - - These legacy metric names are unconventional and not compliant with OpenMetrics standards. - They are included for backwards compatibility. - - Example configuration: - ```yaml - enable_legacy_metrics: false - ``` - - See https://github.com/matrix-org/synapse/issues/11106 for context. - - *Since v1.67.0.* - """ self.enable_legacy_metrics = config.get("enable_legacy_metrics", True) self.report_stats = config.get("report_stats", None) diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 1033496bb43d..e4759711ed95 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -205,7 +205,7 @@ def read_config(self, config: JsonDict, **kwargs: Any) -> None: ) self.url_preview_enabled = config.get("url_preview_enabled", False) if self.url_preview_enabled: - check_requirements("url_preview") + check_requirements("url-preview") proxy_env = getproxies_environment() if "url_preview_ip_range_blacklist" not in config: diff --git a/synapse/events/__init__.py b/synapse/events/__init__.py index b2c9119fd0fb..030c3ca408c0 100644 --- a/synapse/events/__init__.py +++ b/synapse/events/__init__.py @@ -289,6 +289,10 @@ def is_historical(self) -> bool: """ return self._dict.get("historical", False) + def is_notifiable(self) -> bool: + """Whether this event can trigger a push notification""" + return not self.is_outlier() or self.is_out_of_band_membership() + class EventBase(metaclass=abc.ABCMeta): @property diff --git a/synapse/federation/federation_base.py b/synapse/federation/federation_base.py index abe2c1971a19..6bd4742140c4 100644 --- a/synapse/federation/federation_base.py +++ b/synapse/federation/federation_base.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Awaitable, Callable, Optional from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership from synapse.api.errors import Codes, SynapseError @@ -58,7 +58,12 @@ def __init__(self, hs: "HomeServer"): @trace async def _check_sigs_and_hash( - self, room_version: RoomVersion, pdu: EventBase + self, + room_version: RoomVersion, + pdu: EventBase, + record_failure_callback: Optional[ + Callable[[EventBase, str], Awaitable[None]] + ] = None, ) -> EventBase: """Checks that event is correctly signed by the sending server. @@ -70,6 +75,11 @@ async def _check_sigs_and_hash( Args: room_version: The room version of the PDU pdu: the event to be checked + record_failure_callback: A callback to run whenever the given event + fails signature or hash checks. This includes exceptions + that would be normally be thrown/raised but also things like + checking for event tampering where we just return the redacted + event. Returns: * the original event if the checks pass @@ -80,7 +90,12 @@ async def _check_sigs_and_hash( InvalidEventSignatureError if the signature check failed. Nothing will be logged in this case. """ - await _check_sigs_on_pdu(self.keyring, room_version, pdu) + try: + await _check_sigs_on_pdu(self.keyring, room_version, pdu) + except InvalidEventSignatureError as exc: + if record_failure_callback: + await record_failure_callback(pdu, str(exc)) + raise exc if not check_event_content_hash(pdu): # let's try to distinguish between failures because the event was @@ -116,6 +131,10 @@ async def _check_sigs_and_hash( "event_id": pdu.event_id, } ) + if record_failure_callback: + await record_failure_callback( + pdu, "Event content has been tampered with" + ) return redacted_event spam_check = await self.spam_checker.check_event_for_spam(pdu) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index 464672a3da81..4dca711cd28d 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -278,7 +278,7 @@ async def backfill( pdus = [event_from_pdu_json(p, room_version) for p in transaction_data_pdus] # Check signatures and hash of pdus, removing any from the list that fail checks - pdus[:] = await self._check_sigs_and_hash_and_fetch( + pdus[:] = await self._check_sigs_and_hash_for_pulled_events_and_fetch( dest, pdus, room_version=room_version ) @@ -328,7 +328,17 @@ async def get_pdu_from_destination_raw( # Check signatures are correct. try: - signed_pdu = await self._check_sigs_and_hash(room_version, pdu) + + async def _record_failure_callback( + event: EventBase, cause: str + ) -> None: + await self.store.record_event_failed_pull_attempt( + event.room_id, event.event_id, cause + ) + + signed_pdu = await self._check_sigs_and_hash( + room_version, pdu, _record_failure_callback + ) except InvalidEventSignatureError as e: errmsg = f"event id {pdu.event_id}: {e}" logger.warning("%s", errmsg) @@ -547,24 +557,28 @@ async def get_room_state( len(auth_event_map), ) - valid_auth_events = await self._check_sigs_and_hash_and_fetch( + valid_auth_events = await self._check_sigs_and_hash_for_pulled_events_and_fetch( destination, auth_event_map.values(), room_version ) - valid_state_events = await self._check_sigs_and_hash_and_fetch( - destination, state_event_map.values(), room_version + valid_state_events = ( + await self._check_sigs_and_hash_for_pulled_events_and_fetch( + destination, state_event_map.values(), room_version + ) ) return valid_state_events, valid_auth_events @trace - async def _check_sigs_and_hash_and_fetch( + async def _check_sigs_and_hash_for_pulled_events_and_fetch( self, origin: str, pdus: Collection[EventBase], room_version: RoomVersion, ) -> List[EventBase]: - """Checks the signatures and hashes of a list of events. + """ + Checks the signatures and hashes of a list of pulled events we got from + federation and records any signature failures as failed pull attempts. If a PDU fails its signature check then we check if we have it in the database, and if not then request it from the sender's server (if that @@ -597,11 +611,17 @@ async def _check_sigs_and_hash_and_fetch( valid_pdus: List[EventBase] = [] + async def _record_failure_callback(event: EventBase, cause: str) -> None: + await self.store.record_event_failed_pull_attempt( + event.room_id, event.event_id, cause + ) + async def _execute(pdu: EventBase) -> None: valid_pdu = await self._check_sigs_and_hash_and_fetch_one( pdu=pdu, origin=origin, room_version=room_version, + record_failure_callback=_record_failure_callback, ) if valid_pdu: @@ -618,6 +638,9 @@ async def _check_sigs_and_hash_and_fetch_one( pdu: EventBase, origin: str, room_version: RoomVersion, + record_failure_callback: Optional[ + Callable[[EventBase, str], Awaitable[None]] + ] = None, ) -> Optional[EventBase]: """Takes a PDU and checks its signatures and hashes. @@ -634,6 +657,11 @@ async def _check_sigs_and_hash_and_fetch_one( origin pdu room_version + record_failure_callback: A callback to run whenever the given event + fails signature or hash checks. This includes exceptions + that would be normally be thrown/raised but also things like + checking for event tampering where we just return the redacted + event. Returns: The PDU (possibly redacted) if it has valid signatures and hashes. @@ -641,7 +669,9 @@ async def _check_sigs_and_hash_and_fetch_one( """ try: - return await self._check_sigs_and_hash(room_version, pdu) + return await self._check_sigs_and_hash( + room_version, pdu, record_failure_callback + ) except InvalidEventSignatureError as e: logger.warning( "Signature on retrieved event %s was invalid (%s). " @@ -694,7 +724,7 @@ async def get_event_auth( auth_chain = [event_from_pdu_json(p, room_version) for p in res["auth_chain"]] - signed_auth = await self._check_sigs_and_hash_and_fetch( + signed_auth = await self._check_sigs_and_hash_for_pulled_events_and_fetch( destination, auth_chain, room_version=room_version ) @@ -1401,7 +1431,7 @@ async def get_missing_events( event_from_pdu_json(e, room_version) for e in content.get("events", []) ] - signed_events = await self._check_sigs_and_hash_and_fetch( + signed_events = await self._check_sigs_and_hash_for_pulled_events_and_fetch( destination, events, room_version=room_version ) except HttpResponseException as e: diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 3bf84cf62515..907940e19eb0 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -530,13 +530,10 @@ async def _process_edu(edu_dict: JsonDict) -> None: async def on_room_state_request( self, origin: str, room_id: str, event_id: str ) -> Tuple[int, JsonDict]: + await self._event_auth_handler.assert_host_in_room(room_id, origin) origin_host, _ = parse_server_name(origin) await self.check_server_matches_acl(origin_host, room_id) - in_room = await self._event_auth_handler.check_host_in_room(room_id, origin) - if not in_room: - raise AuthError(403, "Host not in room.") - # we grab the linearizer to protect ourselves from servers which hammer # us. In theory we might already have the response to this query # in the cache so we could return it without waiting for the linearizer @@ -560,13 +557,10 @@ async def on_state_ids_request( if not event_id: raise NotImplementedError("Specify an event") + await self._event_auth_handler.assert_host_in_room(room_id, origin) origin_host, _ = parse_server_name(origin) await self.check_server_matches_acl(origin_host, room_id) - in_room = await self._event_auth_handler.check_host_in_room(room_id, origin) - if not in_room: - raise AuthError(403, "Host not in room.") - resp = await self._state_ids_resp_cache.wrap( (room_id, event_id), self._on_state_ids_request_compute, @@ -955,6 +949,7 @@ async def on_event_auth( self, origin: str, room_id: str, event_id: str ) -> Tuple[int, Dict[str, Any]]: async with self._server_linearizer.queue((origin, room_id)): + await self._event_auth_handler.assert_host_in_room(room_id, origin) origin_host, _ = parse_server_name(origin) await self.check_server_matches_acl(origin_host, room_id) diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index 41d8b937af4e..084c45a95ca1 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -646,29 +646,32 @@ async def __aenter__(self) -> Tuple[List[EventBase], List[Edu]]: # We start by fetching device related EDUs, i.e device updates and to # device messages. We have to keep 2 free slots for presence and rr_edus. - limit = MAX_EDUS_PER_TRANSACTION - 2 - - device_update_edus, dev_list_id = await self.queue._get_device_update_edus( - limit - ) - - if device_update_edus: - self._device_list_id = dev_list_id - else: - self.queue._last_device_list_stream_id = dev_list_id - - limit -= len(device_update_edus) + device_edu_limit = MAX_EDUS_PER_TRANSACTION - 2 + # We prioritize to-device messages so that existing encryption channels + # work. We also keep a few slots spare (by reducing the limit) so that + # we can still trickle out some device list updates. ( to_device_edus, device_stream_id, - ) = await self.queue._get_to_device_message_edus(limit) + ) = await self.queue._get_to_device_message_edus(device_edu_limit - 10) if to_device_edus: self._device_stream_id = device_stream_id else: self.queue._last_device_stream_id = device_stream_id + device_edu_limit -= len(to_device_edus) + + device_update_edus, dev_list_id = await self.queue._get_device_update_edus( + device_edu_limit + ) + + if device_update_edus: + self._device_list_id = dev_list_id + else: + self.queue._last_device_list_stream_id = dev_list_id + pending_edus = device_update_edus + to_device_edus # Now add the read receipt EDU. diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index cf9f19608af1..f2989cc4a214 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -32,6 +32,7 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self._storage_controllers = hs.get_storage_controllers() self._state_storage_controller = self._storage_controllers.state + self._msc3866_enabled = hs.config.experimental.msc3866.enabled async def get_whois(self, user: UserID) -> JsonDict: connections = [] @@ -75,6 +76,10 @@ async def get_user(self, user: UserID) -> Optional[JsonDict]: "is_guest", } + if self._msc3866_enabled: + # Only include the approved flag if support for MSC3866 is enabled. + user_info_to_return.add("approved") + # Restrict returned keys to a known set. user_info_dict = { key: value diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index 0327fc57a4ec..f5f0e0e7a773 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -63,7 +63,6 @@ from synapse.http.site import SynapseRequest from synapse.logging.context import defer_to_thread from synapse.metrics.background_process_metrics import run_as_background_process -from synapse.storage.roommember import ProfileInfo from synapse.types import JsonDict, Requester, UserID from synapse.util import stringutils as stringutils from synapse.util.async_helpers import delay_cancellation, maybe_awaitable @@ -1010,6 +1009,17 @@ async def check_user_exists(self, user_id: str) -> Optional[str]: return res[0] return None + async def is_user_approved(self, user_id: str) -> bool: + """Checks if a user is approved and therefore can be allowed to log in. + + Args: + user_id: the user to check the approval status of. + + Returns: + A boolean that is True if the user is approved, False otherwise. + """ + return await self.store.is_user_approved(user_id) + async def _find_user_id_and_pwd_hash( self, user_id: str ) -> Optional[Tuple[str, str]]: @@ -1687,41 +1697,10 @@ async def complete_sso_login( respond_with_html(request, 403, self._sso_account_deactivated_template) return - profile = await self.store.get_profileinfo( + user_profile_data = await self.store.get_profileinfo( UserID.from_string(registered_user_id).localpart ) - self._complete_sso_login( - registered_user_id, - auth_provider_id, - request, - client_redirect_url, - extra_attributes, - new_user=new_user, - user_profile_data=profile, - auth_provider_session_id=auth_provider_session_id, - ) - - def _complete_sso_login( - self, - registered_user_id: str, - auth_provider_id: str, - request: Request, - client_redirect_url: str, - extra_attributes: Optional[JsonDict] = None, - new_user: bool = False, - user_profile_data: Optional[ProfileInfo] = None, - auth_provider_session_id: Optional[str] = None, - ) -> None: - """ - The synchronous portion of complete_sso_login. - - This exists purely for backwards compatibility of synapse.module_api.ModuleApi. - """ - - if user_profile_data is None: - user_profile_data = ProfileInfo(None, None) - # Store any extra attributes which will be passed in the login response. # Note that this is per-user so it may overwrite a previous value, this # is considered OK since the newest SSO attributes should be most valid. diff --git a/synapse/handlers/cas.py b/synapse/handlers/cas.py index 7163af8004e3..fc467bc7c13e 100644 --- a/synapse/handlers/cas.py +++ b/synapse/handlers/cas.py @@ -130,6 +130,9 @@ async def _validate_ticket( except PartialDownloadError as pde: # Twisted raises this error if the connection is closed, # even if that's being used old-http style to signal end-of-data + # Assertion is for mypy's benefit. Error.response is Optional[bytes], + # but a PartialDownloadError should always have a non-None response. + assert pde.response is not None body = pde.response except HttpResponseException as e: description = ( diff --git a/synapse/handlers/device.py b/synapse/handlers/device.py index 0f2735abd301..f9cc5bddbcbd 100644 --- a/synapse/handlers/device.py +++ b/synapse/handlers/device.py @@ -195,7 +195,9 @@ async def get_user_ids_changed( possibly_changed = set(changed) possibly_left = set() for room_id in rooms_changed: - current_state_ids = await self._state_storage.get_current_state_ids(room_id) + current_state_ids = await self._state_storage.get_current_state_ids( + room_id, await_full_state=False + ) # The user may have left the room # TODO: Check if they actually did or if we were just invited. @@ -234,7 +236,8 @@ async def get_user_ids_changed( # mapping from event_id -> state_dict prev_state_ids = await self._state_storage.get_state_ids_for_events( - event_ids + event_ids, + await_full_state=False, ) # Check if we've joined the room? If so we just blindly add all the users to @@ -304,6 +307,17 @@ async def on_federation_query_user_devices(self, user_id: str) -> JsonDict: "self_signing_key": self_signing_key, } + async def handle_room_un_partial_stated(self, room_id: str) -> None: + """Handles sending appropriate device list updates in a room that has + gone from partial to full state. + """ + + # TODO(faster_joins): worker mode support + # https://github.com/matrix-org/synapse/issues/12994 + logger.error( + "Trying handling device list state for partial join: not supported on workers." + ) + class DeviceHandler(DeviceWorkerHandler): def __init__(self, hs: "HomeServer"): @@ -686,11 +700,15 @@ async def _handle_new_device_update_async(self) -> None: # Ignore any users that aren't ours if self.hs.is_mine_id(user_id): hosts = set( - await self._storage_controllers.state.get_current_hosts_in_room( + await self._storage_controllers.state.get_current_hosts_in_room_or_partial_state_approximation( room_id ) ) hosts.discard(self.server_name) + # For rooms with partial state, `hosts` is merely an + # approximation. When we transition to a full state room, we + # will have to send out device list updates to any servers we + # missed. # Check if we've already sent this update to some hosts if current_stream_id == stream_id: @@ -737,6 +755,95 @@ async def _handle_new_device_update_async(self) -> None: finally: self._handle_new_device_update_is_processing = False + async def handle_room_un_partial_stated(self, room_id: str) -> None: + """Handles sending appropriate device list updates in a room that has + gone from partial to full state. + """ + + # We defer to the device list updater to handle pending remote device + # list updates. + await self.device_list_updater.handle_room_un_partial_stated(room_id) + + # Replay local updates. + ( + join_event_id, + device_lists_stream_id, + ) = await self.store.get_join_event_id_and_device_lists_stream_id_for_partial_state( + room_id + ) + + # Get the local device list changes that have happened in the room since + # we started joining. If there are no updates there's nothing left to do. + changes = await self.store.get_device_list_changes_in_room( + room_id, device_lists_stream_id + ) + local_changes = {(u, d) for u, d in changes if self.hs.is_mine_id(u)} + if not local_changes: + return + + # Note: We have persisted the full state at this point, we just haven't + # cleared the `partial_room` flag. + join_state_ids = await self._state_storage.get_state_ids_for_event( + join_event_id, await_full_state=False + ) + current_state_ids = await self.store.get_partial_current_state_ids(room_id) + + # Now we need to work out all servers that might have been in the room + # at any point during our join. + + # First we look for any membership states that have changed between the + # initial join and now... + all_keys = set(join_state_ids) + all_keys.update(current_state_ids) + + potentially_changed_hosts = set() + for etype, state_key in all_keys: + if etype != EventTypes.Member: + continue + + prev = join_state_ids.get((etype, state_key)) + current = current_state_ids.get((etype, state_key)) + + if prev != current: + potentially_changed_hosts.add(get_domain_from_id(state_key)) + + # ... then we add all the hosts that are currently joined to the room... + current_hosts_in_room = await self.store.get_current_hosts_in_room(room_id) + potentially_changed_hosts.update(current_hosts_in_room) + + # ... and finally we remove any hosts that we were told about, as we + # will have sent device list updates to those hosts when they happened. + known_hosts_at_join = await self.store.get_partial_state_servers_at_join( + room_id + ) + potentially_changed_hosts.difference_update(known_hosts_at_join) + + potentially_changed_hosts.discard(self.server_name) + + if not potentially_changed_hosts: + # Nothing to do. + return + + logger.info( + "Found %d changed hosts to send device list updates to", + len(potentially_changed_hosts), + ) + + for user_id, device_id in local_changes: + await self.store.add_device_list_outbound_pokes( + user_id=user_id, + device_id=device_id, + room_id=room_id, + stream_id=None, + hosts=potentially_changed_hosts, + context=None, + ) + + # Notify things that device lists need to be sent out. + self.notifier.notify_replication() + for host in potentially_changed_hosts: + self.federation_sender.send_device_messages(host, immediate=False) + def _update_device_from_client_ips( device: JsonDict, client_ips: Mapping[Tuple[str, str], Mapping[str, Any]] @@ -827,6 +934,16 @@ async def incoming_device_list_update( ) return + # Check if we are partially joining any rooms. If so we need to store + # all device list updates so that we can handle them correctly once we + # know who is in the room. + partial_rooms = await self.store.get_partial_state_rooms_and_servers() + if partial_rooms: + await self.store.add_remote_device_list_to_pending( + user_id, + device_id, + ) + room_ids = await self.store.get_rooms_for_user(user_id) if not room_ids: # We don't share any rooms with this user. Ignore update, as we @@ -1166,3 +1283,35 @@ async def process_cross_signing_key_update( device_ids.append(verify_key.version) return device_ids + + async def handle_room_un_partial_stated(self, room_id: str) -> None: + """Handles sending appropriate device list updates in a room that has + gone from partial to full state. + """ + + pending_updates = ( + await self.store.get_pending_remote_device_list_updates_for_room(room_id) + ) + + for user_id, device_id in pending_updates: + logger.info( + "Got pending device list update in room %s: %s / %s", + room_id, + user_id, + device_id, + ) + position = await self.store.add_device_change_to_streams( + user_id, + [device_id], + room_ids=[room_id], + ) + + if not position: + # This should only happen if there are no updates, which + # shouldn't happen when we've passed in a non-empty set of + # device IDs. + continue + + self.device_handler.notifier.on_new_event( + StreamKeyType.DEVICE_LIST, position, rooms=[room_id] + ) diff --git a/synapse/handlers/event_auth.py b/synapse/handlers/event_auth.py index c3ddc5d18253..8249ca1ed26c 100644 --- a/synapse/handlers/event_auth.py +++ b/synapse/handlers/event_auth.py @@ -31,7 +31,6 @@ from synapse.events.builder import EventBuilder from synapse.events.snapshot import EventContext from synapse.types import StateMap, get_domain_from_id -from synapse.util.metrics import Measure if TYPE_CHECKING: from synapse.server import HomeServer @@ -156,9 +155,33 @@ async def get_user_which_could_invite( Codes.UNABLE_TO_GRANT_JOIN, ) - async def check_host_in_room(self, room_id: str, host: str) -> bool: - with Measure(self._clock, "check_host_in_room"): - return await self._store.is_host_joined(room_id, host) + async def is_host_in_room(self, room_id: str, host: str) -> bool: + return await self._store.is_host_joined(room_id, host) + + async def assert_host_in_room( + self, room_id: str, host: str, allow_partial_state_rooms: bool = False + ) -> None: + """ + Asserts that the host is in the room, or raises an AuthError. + + If the room is partial-stated, we raise an AuthError with the + UNABLE_DUE_TO_PARTIAL_STATE error code, unless `allow_partial_state_rooms` is true. + + If allow_partial_state_rooms is True and the room is partial-stated, + this function may return an incorrect result as we are not able to fully + track server membership in a room without full state. + """ + if not allow_partial_state_rooms and await self._store.is_partial_state_room( + room_id + ): + raise AuthError( + 403, + "Unable to authorise you right now; room is partial-stated here.", + errcode=Codes.UNABLE_DUE_TO_PARTIAL_STATE, + ) + + if not await self.is_host_in_room(room_id, host): + raise AuthError(403, "Host not in room.") async def check_restricted_join_rules( self, diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index dd4b9f66d10e..986ffed3d592 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -38,7 +38,7 @@ from unpaddedbase64 import decode_base64 from synapse import event_auth -from synapse.api.constants import EventContentFields, EventTypes, Membership +from synapse.api.constants import MAX_DEPTH, EventContentFields, EventTypes, Membership from synapse.api.errors import ( AuthError, CodeMessageException, @@ -149,6 +149,8 @@ def __init__(self, hs: "HomeServer"): self.http_client = hs.get_proxied_blacklisted_http_client() self._replication = hs.get_replication_data_handler() self._federation_event_handler = hs.get_federation_event_handler() + self._device_handler = hs.get_device_handler() + self._bulk_push_rule_evaluator = hs.get_bulk_push_rule_evaluator() self._clean_room_for_join_client = ReplicationCleanRoomRestServlet.make_client( hs @@ -209,7 +211,7 @@ async def _maybe_backfill_inner( current_depth: int, limit: int, *, - processing_start_time: int, + processing_start_time: Optional[int], ) -> bool: """ Checks whether the `current_depth` is at or approaching any backfill @@ -221,13 +223,22 @@ async def _maybe_backfill_inner( room_id: The room to backfill in. current_depth: The depth to check at for any upcoming backfill points. limit: The max number of events to request from the remote federated server. - processing_start_time: The time when `maybe_backfill` started - processing. Only used for timing. + processing_start_time: The time when `maybe_backfill` started processing. + Only used for timing. If `None`, no timing observation will be made. """ backwards_extremities = [ _BackfillPoint(event_id, depth, _BackfillPointType.BACKWARDS_EXTREMITY) - for event_id, depth in await self.store.get_oldest_event_ids_with_depth_in_room( - room_id + for event_id, depth in await self.store.get_backfill_points_in_room( + room_id=room_id, + current_depth=current_depth, + # We only need to end up with 5 extremities combined with the + # insertion event extremities to make the `/backfill` request + # but fetch an order of magnitude more to make sure there is + # enough even after we filter them by whether visible in the + # history. This isn't fool-proof as all backfill points within + # our limit could be filtered out but seems like a good amount + # to try with at least. + limit=50, ) ] @@ -236,7 +247,12 @@ async def _maybe_backfill_inner( insertion_events_to_be_backfilled = [ _BackfillPoint(event_id, depth, _BackfillPointType.INSERTION_PONT) for event_id, depth in await self.store.get_insertion_event_backward_extremities_in_room( - room_id + room_id=room_id, + current_depth=current_depth, + # We only need to end up with 5 extremities combined with + # the backfill points to make the `/backfill` request ... + # (see the other comment above for more context). + limit=50, ) ] logger.debug( @@ -245,10 +261,6 @@ async def _maybe_backfill_inner( insertion_events_to_be_backfilled, ) - if not backwards_extremities and not insertion_events_to_be_backfilled: - logger.debug("Not backfilling as no extremeties found.") - return False - # we now have a list of potential places to backpaginate from. We prefer to # start with the most recent (ie, max depth), so let's sort the list. sorted_backfill_points: List[_BackfillPoint] = sorted( @@ -269,6 +281,33 @@ async def _maybe_backfill_inner( sorted_backfill_points, ) + # If we have no backfill points lower than the `current_depth` then + # either we can a) bail or b) still attempt to backfill. We opt to try + # backfilling anyway just in case we do get relevant events. + if not sorted_backfill_points and current_depth != MAX_DEPTH: + logger.debug( + "_maybe_backfill_inner: all backfill points are *after* current depth. Trying again with later backfill points." + ) + return await self._maybe_backfill_inner( + room_id=room_id, + # We use `MAX_DEPTH` so that we find all backfill points next + # time (all events are below the `MAX_DEPTH`) + current_depth=MAX_DEPTH, + limit=limit, + # We don't want to start another timing observation from this + # nested recursive call. The top-most call can record the time + # overall otherwise the smaller one will throw off the results. + processing_start_time=None, + ) + + # Even after recursing with `MAX_DEPTH`, we didn't find any + # backward extremities to backfill from. + if not sorted_backfill_points: + logger.debug( + "_maybe_backfill_inner: Not backfilling as no backward extremeties found." + ) + return False + # If we're approaching an extremity we trigger a backfill, otherwise we # no-op. # @@ -278,47 +317,16 @@ async def _maybe_backfill_inner( # chose more than one times the limit in case of failure, but choosing a # much larger factor will result in triggering a backfill request much # earlier than necessary. - # - # XXX: shouldn't we do this *after* the filter by depth below? Again, we don't - # care about events that have happened after our current position. - # - max_depth = sorted_backfill_points[0].depth - if current_depth - 2 * limit > max_depth: + max_depth_of_backfill_points = sorted_backfill_points[0].depth + if current_depth - 2 * limit > max_depth_of_backfill_points: logger.debug( "Not backfilling as we don't need to. %d < %d - 2 * %d", - max_depth, + max_depth_of_backfill_points, current_depth, limit, ) return False - # We ignore extremities that have a greater depth than our current depth - # as: - # 1. we don't really care about getting events that have happened - # after our current position; and - # 2. we have likely previously tried and failed to backfill from that - # extremity, so to avoid getting "stuck" requesting the same - # backfill repeatedly we drop those extremities. - # - # However, we need to check that the filtered extremities are non-empty. - # If they are empty then either we can a) bail or b) still attempt to - # backfill. We opt to try backfilling anyway just in case we do get - # relevant events. - # - filtered_sorted_backfill_points = [ - t for t in sorted_backfill_points if t.depth <= current_depth - ] - if filtered_sorted_backfill_points: - logger.debug( - "_maybe_backfill_inner: backfill points before current depth: %s", - filtered_sorted_backfill_points, - ) - sorted_backfill_points = filtered_sorted_backfill_points - else: - logger.debug( - "_maybe_backfill_inner: all backfill points are *after* current depth. Backfilling anyway." - ) - # For performance's sake, we only want to paginate from a particular extremity # if we can actually see the events we'll get. Otherwise, we'd just spend a lot # of resources to get redacted events. We check each extremity in turn and @@ -404,11 +412,22 @@ async def _maybe_backfill_inner( # First we try hosts that are already in the room. # TODO: HEURISTIC ALERT. likely_domains = ( - await self._storage_controllers.state.get_current_hosts_in_room(room_id) + await self._storage_controllers.state.get_current_hosts_in_room_ordered( + room_id + ) ) async def try_backfill(domains: Collection[str]) -> bool: # TODO: Should we try multiple of these at a time? + + # Number of contacted remote homeservers that have denied our backfill + # request with a 4xx code. + denied_count = 0 + + # Maximum number of contacted remote homeservers that can deny our + # backfill request with 4xx codes before we give up. + max_denied_count = 5 + for dom in domains: # We don't want to ask our own server for information we don't have if dom == self.server_name: @@ -427,13 +446,33 @@ async def try_backfill(domains: Collection[str]) -> bool: continue except HttpResponseException as e: if 400 <= e.code < 500: - raise e.to_synapse_error() + logger.warning( + "Backfill denied from %s because %s [%d/%d]", + dom, + e, + denied_count, + max_denied_count, + ) + denied_count += 1 + if denied_count >= max_denied_count: + return False + continue logger.info("Failed to backfill from %s because %s", dom, e) continue except CodeMessageException as e: if 400 <= e.code < 500: - raise + logger.warning( + "Backfill denied from %s because %s [%d/%d]", + dom, + e, + denied_count, + max_denied_count, + ) + denied_count += 1 + if denied_count >= max_denied_count: + return False + continue logger.info("Failed to backfill from %s because %s", dom, e) continue @@ -452,10 +491,15 @@ async def try_backfill(domains: Collection[str]) -> bool: return False - processing_end_time = self.clock.time_msec() - backfill_processing_before_timer.observe( - (processing_end_time - processing_start_time) / 1000 - ) + # If we have the `processing_start_time`, then we can make an + # observation. We wouldn't have the `processing_start_time` in the case + # where `_maybe_backfill_inner` is recursively called to find any + # backfill points regardless of `current_depth`. + if processing_start_time is not None: + processing_end_time = self.clock.time_msec() + backfill_processing_before_timer.observe( + (processing_end_time - processing_start_time) / 1000 + ) success = await try_backfill(likely_domains) if success: @@ -583,7 +627,11 @@ async def do_invite_join( # Mark the room as having partial state. # The background process is responsible for unmarking this flag, # even if the join fails. - await self.store.store_partial_state_room(room_id, ret.servers_in_room) + await self.store.store_partial_state_room( + room_id=room_id, + servers=ret.servers_in_room, + device_lists_stream_id=self.store.get_device_stream_token(), + ) try: max_stream_id = ( @@ -608,6 +656,14 @@ async def do_invite_join( room_id, ) raise LimitExceededError(msg=e.msg, errcode=e.errcode, retry_after_ms=0) + else: + # Record the join event id for future use (when we finish the full + # join). We have to do this after persisting the event to keep foreign + # key constraints intact. + if ret.partial_state: + await self.store.write_partial_state_rooms_join_event_id( + room_id, event.event_id + ) finally: # Always kick off the background process that asynchronously fetches # state for the room. @@ -804,7 +860,7 @@ async def on_make_join_request( ) # now check that we are *still* in the room - is_in_room = await self._event_auth_handler.check_host_in_room( + is_in_room = await self._event_auth_handler.is_host_in_room( room_id, self.server_name ) if not is_in_room: @@ -946,9 +1002,15 @@ async def on_invite_request( ) context = EventContext.for_outlier(self._storage_controllers) - await self._federation_event_handler.persist_events_and_notify( - event.room_id, [(event, context)] - ) + + await self._bulk_push_rule_evaluator.action_for_event_by_user(event, context) + try: + await self._federation_event_handler.persist_events_and_notify( + event.room_id, [(event, context)] + ) + except Exception: + await self.store.remove_push_actions_from_staging(event.event_id) + raise return event @@ -1150,9 +1212,7 @@ async def get_state_ids_for_pdu(self, room_id: str, event_id: str) -> List[str]: async def on_backfill_request( self, origin: str, room_id: str, pdu_list: List[str], limit: int ) -> List[EventBase]: - in_room = await self._event_auth_handler.check_host_in_room(room_id, origin) - if not in_room: - raise AuthError(403, "Host not in room.") + await self._event_auth_handler.assert_host_in_room(room_id, origin) # Synapse asks for 100 events per backfill request. Do not allow more. limit = min(limit, 100) @@ -1198,21 +1258,17 @@ async def get_persisted_pdu( event_id, allow_none=True, allow_rejected=True ) - if event: - in_room = await self._event_auth_handler.check_host_in_room( - event.room_id, origin - ) - if not in_room: - raise AuthError(403, "Host not in room.") - - events = await filter_events_for_server( - self._storage_controllers, origin, [event] - ) - event = events[0] - return event - else: + if not event: return None + await self._event_auth_handler.assert_host_in_room(event.room_id, origin) + + events = await filter_events_for_server( + self._storage_controllers, origin, [event] + ) + event = events[0] + return event + async def on_get_missing_events( self, origin: str, @@ -1221,9 +1277,7 @@ async def on_get_missing_events( latest_events: List[str], limit: int, ) -> List[EventBase]: - in_room = await self._event_auth_handler.check_host_in_room(room_id, origin) - if not in_room: - raise AuthError(403, "Host not in room.") + await self._event_auth_handler.assert_host_in_room(room_id, origin) # Only allow up to 20 events to be retrieved per request. limit = min(limit, 20) @@ -1257,7 +1311,7 @@ async def exchange_third_party_invite( "state_key": target_user_id, } - if await self._event_auth_handler.check_host_in_room(room_id, self.hs.hostname): + if await self._event_auth_handler.is_host_in_room(room_id, self.hs.hostname): room_version_obj = await self.store.get_room_version(room_id) builder = self.event_builder_factory.for_room_version( room_version_obj, event_dict @@ -1622,6 +1676,9 @@ async def _sync_partial_state_room( # https://github.com/matrix-org/synapse/issues/12994 await self.state_handler.update_current_state(room_id) + logger.info("Handling any pending device list updates") + await self._device_handler.handle_room_un_partial_stated(room_id) + logger.info("Clearing partial-state flag for %s", room_id) success = await self.store.clear_partial_state_room(room_id) if success: diff --git a/synapse/handlers/federation_event.py b/synapse/handlers/federation_event.py index efcdb8405783..778d8869b3c7 100644 --- a/synapse/handlers/federation_event.py +++ b/synapse/handlers/federation_event.py @@ -238,7 +238,7 @@ async def on_receive_pdu(self, origin: str, pdu: EventBase) -> None: # # Note that if we were never in the room then we would have already # dropped the event, since we wouldn't know the room version. - is_in_room = await self._event_auth_handler.check_host_in_room( + is_in_room = await self._event_auth_handler.is_host_in_room( room_id, self._server_name ) if not is_in_room: @@ -866,11 +866,6 @@ async def _process_pulled_event( event.room_id, event_id, str(err) ) return - except Exception as exc: - await self._store.record_event_failed_pull_attempt( - event.room_id, event_id, str(exc) - ) - raise exc try: try: @@ -913,11 +908,6 @@ async def _process_pulled_event( logger.warning("Pulled event %s failed history check.", event_id) else: raise - except Exception as exc: - await self._store.record_event_failed_pull_attempt( - event.room_id, event_id, str(exc) - ) - raise exc @trace async def _compute_event_context_with_maybe_missing_prevs( @@ -2170,6 +2160,7 @@ async def persist_events_and_notify( if instance != self._instance_name: # Limit the number of events sent over replication. We choose 200 # here as that is what we default to in `max_request_body_size(..)` + result = {} try: for batch in batch_iter(event_and_contexts, 200): result = await self._send_events( diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index ca8815c481c3..0f647013f210 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -56,13 +56,16 @@ from synapse.logging.context import make_deferred_yieldable, run_in_background from synapse.metrics.background_process_metrics import run_as_background_process from synapse.replication.http.send_event import ReplicationSendEventRestServlet +from synapse.replication.http.send_events import ReplicationSendEventsRestServlet from synapse.storage.databases.main.events import PartialStateConflictError from synapse.storage.databases.main.events_worker import EventRedactBehaviour from synapse.storage.state import StateFilter from synapse.types import ( MutableStateMap, + PersistedEventPosition, Requester, RoomAlias, + StateMap, StreamToken, UserID, create_requester, @@ -492,6 +495,7 @@ def __init__(self, hs: "HomeServer"): self.membership_types_to_include_profile_data_in.add(Membership.INVITE) self.send_event = ReplicationSendEventRestServlet.make_client(hs) + self.send_events = ReplicationSendEventsRestServlet.make_client(hs) self.request_ratelimiter = hs.get_request_ratelimiter() @@ -569,9 +573,17 @@ async def create_event( outlier: bool = False, historical: bool = False, depth: Optional[int] = None, + state_map: Optional[StateMap[str]] = None, + for_batch: bool = False, + current_state_group: Optional[int] = None, ) -> Tuple[EventBase, EventContext]: """ - Given a dict from a client, create a new event. + Given a dict from a client, create a new event. If bool for_batch is true, will + create an event using the prev_event_ids, and will create an event context for + the event using the parameters state_map and current_state_group, thus these parameters + must be provided in this case if for_batch is True. The subsequently created event + and context are suitable for being batched up and bulk persisted to the database + with other similarly created events. Creates an FrozenEvent object, filling out auth_events, prev_events, etc. @@ -614,16 +626,27 @@ async def create_event( outlier: Indicates whether the event is an `outlier`, i.e. if it's from an arbitrary point and floating in the DAG as opposed to being inline with the current DAG. + historical: Indicates whether the message is being inserted back in time around some existing events. This is used to skip a few checks and mark the event as backfilled. + depth: Override the depth used to order the event in the DAG. Should normally be set to None, which will cause the depth to be calculated based on the prev_events. + state_map: A state map of previously created events, used only when creating events + for batch persisting + + for_batch: whether the event is being created for batch persisting to the db + + current_state_group: the current state group, used only for creating events for + batch persisting + Raises: ResourceLimitError if server is blocked to some resource being exceeded + Returns: Tuple of created event, Context """ @@ -695,6 +718,9 @@ async def create_event( auth_event_ids=auth_event_ids, state_event_ids=state_event_ids, depth=depth, + state_map=state_map, + for_batch=for_batch, + current_state_group=current_state_group, ) # In an ideal world we wouldn't need the second part of this condition. However, @@ -709,10 +735,14 @@ async def create_event( # federation as well as those created locally. As of room v3, aliases events # can be created by users that are not in the room, therefore we have to # tolerate them in event_auth.check(). - prev_state_ids = await context.get_prev_state_ids( - StateFilter.from_types([(EventTypes.Member, None)]) - ) - prev_event_id = prev_state_ids.get((EventTypes.Member, event.sender)) + if for_batch: + assert state_map is not None + prev_event_id = state_map.get((EventTypes.Member, event.sender)) + else: + prev_state_ids = await context.get_prev_state_ids( + StateFilter.from_types([(EventTypes.Member, None)]) + ) + prev_event_id = prev_state_ids.get((EventTypes.Member, event.sender)) prev_event = ( await self.store.get_event(prev_event_id, allow_none=True) if prev_event_id @@ -991,8 +1021,7 @@ async def create_and_send_nonmember_event( ev = await self.handle_new_client_event( requester=requester, - event=event, - context=context, + events_and_context=[(event, context)], ratelimit=ratelimit, ignore_shadow_ban=ignore_shadow_ban, ) @@ -1011,8 +1040,16 @@ async def create_new_client_event( auth_event_ids: Optional[List[str]] = None, state_event_ids: Optional[List[str]] = None, depth: Optional[int] = None, + state_map: Optional[StateMap[str]] = None, + for_batch: bool = False, + current_state_group: Optional[int] = None, ) -> Tuple[EventBase, EventContext]: - """Create a new event for a local client + """Create a new event for a local client. If bool for_batch is true, will + create an event using the prev_event_ids, and will create an event context for + the event using the parameters state_map and current_state_group, thus these parameters + must be provided in this case if for_batch is True. The subsequently created event + and context are suitable for being batched up and bulk persisted to the database + with other similarly created events. Args: builder: @@ -1045,6 +1082,14 @@ async def create_new_client_event( Should normally be set to None, which will cause the depth to be calculated based on the prev_events. + state_map: A state map of previously created events, used only when creating events + for batch persisting + + for_batch: whether the event is being created for batch persisting to the db + + current_state_group: the current state group, used only for creating events for + batch persisting + Returns: Tuple of created event, context """ @@ -1097,64 +1142,76 @@ async def create_new_client_event( builder.type == EventTypes.Create or prev_event_ids ), "Attempting to create a non-m.room.create event with no prev_events" - event = await builder.build( - prev_event_ids=prev_event_ids, - auth_event_ids=auth_event_ids, - depth=depth, - ) + if for_batch: + assert prev_event_ids is not None + assert state_map is not None + assert current_state_group is not None + auth_ids = self._event_auth_handler.compute_auth_events(builder, state_map) + event = await builder.build( + prev_event_ids=prev_event_ids, auth_event_ids=auth_ids, depth=depth + ) + context = await self.state.compute_event_context_for_batched( + event, state_map, current_state_group + ) + else: + event = await builder.build( + prev_event_ids=prev_event_ids, + auth_event_ids=auth_event_ids, + depth=depth, + ) - # Pass on the outlier property from the builder to the event - # after it is created - if builder.internal_metadata.outlier: - event.internal_metadata.outlier = True - context = EventContext.for_outlier(self._storage_controllers) - elif ( - event.type == EventTypes.MSC2716_INSERTION - and state_event_ids - and builder.internal_metadata.is_historical() - ): - # Add explicit state to the insertion event so it has state to derive - # from even though it's floating with no `prev_events`. The rest of - # the batch can derive from this state and state_group. - # - # TODO(faster_joins): figure out how this works, and make sure that the - # old state is complete. - # https://github.com/matrix-org/synapse/issues/13003 - metadata = await self.store.get_metadata_for_events(state_event_ids) - - state_map_for_event: MutableStateMap[str] = {} - for state_id in state_event_ids: - data = metadata.get(state_id) - if data is None: - # We're trying to persist a new historical batch of events - # with the given state, e.g. via - # `RoomBatchSendEventRestServlet`. The state can be inferred - # by Synapse or set directly by the client. - # - # Either way, we should have persisted all the state before - # getting here. - raise Exception( - f"State event {state_id} not found in DB," - " Synapse should have persisted it before using it." - ) + # Pass on the outlier property from the builder to the event + # after it is created + if builder.internal_metadata.outlier: + event.internal_metadata.outlier = True + context = EventContext.for_outlier(self._storage_controllers) + elif ( + event.type == EventTypes.MSC2716_INSERTION + and state_event_ids + and builder.internal_metadata.is_historical() + ): + # Add explicit state to the insertion event so it has state to derive + # from even though it's floating with no `prev_events`. The rest of + # the batch can derive from this state and state_group. + # + # TODO(faster_joins): figure out how this works, and make sure that the + # old state is complete. + # https://github.com/matrix-org/synapse/issues/13003 + metadata = await self.store.get_metadata_for_events(state_event_ids) + + state_map_for_event: MutableStateMap[str] = {} + for state_id in state_event_ids: + data = metadata.get(state_id) + if data is None: + # We're trying to persist a new historical batch of events + # with the given state, e.g. via + # `RoomBatchSendEventRestServlet`. The state can be inferred + # by Synapse or set directly by the client. + # + # Either way, we should have persisted all the state before + # getting here. + raise Exception( + f"State event {state_id} not found in DB," + " Synapse should have persisted it before using it." + ) - if data.state_key is None: - raise Exception( - f"Trying to set non-state event {state_id} as state" - ) + if data.state_key is None: + raise Exception( + f"Trying to set non-state event {state_id} as state" + ) - state_map_for_event[(data.event_type, data.state_key)] = state_id + state_map_for_event[(data.event_type, data.state_key)] = state_id - context = await self.state.compute_event_context( - event, - state_ids_before_event=state_map_for_event, - # TODO(faster_joins): check how MSC2716 works and whether we can have - # partial state here - # https://github.com/matrix-org/synapse/issues/13003 - partial_state=False, - ) - else: - context = await self.state.compute_event_context(event) + context = await self.state.compute_event_context( + event, + state_ids_before_event=state_map_for_event, + # TODO(faster_joins): check how MSC2716 works and whether we can have + # partial state here + # https://github.com/matrix-org/synapse/issues/13003 + partial_state=False, + ) + else: + context = await self.state.compute_event_context(event) if requester: context.app_service = requester.app_service @@ -1240,14 +1297,14 @@ async def _validate_event_relation(self, event: EventBase) -> None: async def handle_new_client_event( self, requester: Requester, - event: EventBase, - context: EventContext, + events_and_context: List[Tuple[EventBase, EventContext]], ratelimit: bool = True, extra_users: Optional[List[UserID]] = None, ignore_shadow_ban: bool = False, dont_notify: bool = False, ) -> EventBase: - """Processes a new event. + """Processes new events. Please note that if batch persisting events, an error in + handling any one of these events will result in all of the events being dropped. This includes deduplicating, checking auth, persisting, notifying users, sending to remote servers, etc. @@ -1257,8 +1314,7 @@ async def handle_new_client_event( Args: requester - event - context + events_and_context: A list of one or more tuples of event, context to be persisted ratelimit extra_users: Any extra users to notify about event @@ -1278,62 +1334,63 @@ async def handle_new_client_event( """ extra_users = extra_users or [] - # we don't apply shadow-banning to membership events here. Invites are blocked - # higher up the stack, and we allow shadow-banned users to send join and leave - # events as normal. - if ( - event.type != EventTypes.Member - and not ignore_shadow_ban - and requester.shadow_banned - ): - # We randomly sleep a bit just to annoy the requester. - await self.clock.sleep(random.randint(1, 10)) - raise ShadowBanError() + for event, context in events_and_context: + # we don't apply shadow-banning to membership events here. Invites are blocked + # higher up the stack, and we allow shadow-banned users to send join and leave + # events as normal. + if ( + event.type != EventTypes.Member + and not ignore_shadow_ban + and requester.shadow_banned + ): + # We randomly sleep a bit just to annoy the requester. + await self.clock.sleep(random.randint(1, 10)) + raise ShadowBanError() - if event.is_state(): - prev_event = await self.deduplicate_state_event(event, context) - if prev_event is not None: - logger.info( - "Not bothering to persist state event %s duplicated by %s", - event.event_id, - prev_event.event_id, - ) - return prev_event + if event.is_state(): + prev_event = await self.deduplicate_state_event(event, context) + if prev_event is not None: + logger.info( + "Not bothering to persist state event %s duplicated by %s", + event.event_id, + prev_event.event_id, + ) + return prev_event - if event.internal_metadata.is_out_of_band_membership(): - # the only sort of out-of-band-membership events we expect to see here are - # invite rejections and rescinded knocks that we have generated ourselves. - assert event.type == EventTypes.Member - assert event.content["membership"] == Membership.LEAVE - else: - try: - validate_event_for_room_version(event) - await self._event_auth_handler.check_auth_rules_from_context( - event, context - ) - except AuthError as err: - logger.warning("Denying new event %r because %s", event, err) - raise err + if event.internal_metadata.is_out_of_band_membership(): + # the only sort of out-of-band-membership events we expect to see here are + # invite rejections and rescinded knocks that we have generated ourselves. + assert event.type == EventTypes.Member + assert event.content["membership"] == Membership.LEAVE + else: + try: + validate_event_for_room_version(event) + await self._event_auth_handler.check_auth_rules_from_context( + event, context + ) + except AuthError as err: + logger.warning("Denying new event %r because %s", event, err) + raise err - # Ensure that we can round trip before trying to persist in db - try: - dump = json_encoder.encode(event.content) - json_decoder.decode(dump) - except Exception: - logger.exception("Failed to encode content: %r", event.content) - raise + # Ensure that we can round trip before trying to persist in db + try: + dump = json_encoder.encode(event.content) + json_decoder.decode(dump) + except Exception: + logger.exception("Failed to encode content: %r", event.content) + raise # We now persist the event (and update the cache in parallel, since we # don't want to block on it). + event, context = events_and_context[0] try: result, _ = await make_deferred_yieldable( gather_results( ( run_in_background( - self._persist_event, + self._persist_events, requester=requester, - event=event, - context=context, + events_and_context=events_and_context, ratelimit=ratelimit, extra_users=extra_users, dont_notify=dont_notify, @@ -1358,48 +1415,50 @@ async def handle_new_client_event( return result - async def _persist_event( + async def _persist_events( self, requester: Requester, - event: EventBase, - context: EventContext, + events_and_context: List[Tuple[EventBase, EventContext]], ratelimit: bool = True, extra_users: Optional[List[UserID]] = None, dont_notify: bool = False, ) -> EventBase: - """Actually persists the event. Should only be called by + """Actually persists new events. Should only be called by `handle_new_client_event`, and see its docstring for documentation of - the arguments. + the arguments. Please note that if batch persisting events, an error in + handling any one of these events will result in all of the events being dropped. PartialStateConflictError: if attempting to persist a partial state event in a room that has been un-partial stated. """ - # Skip push notification actions for historical messages - # because we don't want to notify people about old history back in time. - # The historical messages also do not have the proper `context.current_state_ids` - # and `state_groups` because they have `prev_events` that aren't persisted yet - # (historical messages persisted in reverse-chronological order). - if not event.internal_metadata.is_historical() and not event.content.get( - EventContentFields.MSC2716_HISTORICAL - ): - with opentracing.start_active_span("calculate_push_actions"): - await self._bulk_push_rule_evaluator.action_for_event_by_user( - event, context - ) + for event, context in events_and_context: + # Skip push notification actions for historical messages + # because we don't want to notify people about old history back in time. + # The historical messages also do not have the proper `context.current_state_ids` + # and `state_groups` because they have `prev_events` that aren't persisted yet + # (historical messages persisted in reverse-chronological order). + if not event.internal_metadata.is_historical() and not event.content.get( + EventContentFields.MSC2716_HISTORICAL + ): + with opentracing.start_active_span("calculate_push_actions"): + await self._bulk_push_rule_evaluator.action_for_event_by_user( + event, context + ) try: # If we're a worker we need to hit out to the master. - writer_instance = self._events_shard_config.get_instance(event.room_id) + first_event, _ = events_and_context[0] + writer_instance = self._events_shard_config.get_instance( + first_event.room_id + ) if writer_instance != self._instance_name: try: - result = await self.send_event( + result = await self.send_events( instance_name=writer_instance, - event_id=event.event_id, + events_and_context=events_and_context, store=self.store, requester=requester, - event=event, - context=context, ratelimit=ratelimit, extra_users=extra_users, dont_notify=dont_notify, @@ -1410,6 +1469,11 @@ async def _persist_event( raise stream_id = result["stream_id"] event_id = result["event_id"] + + # If we batch persisted events we return the last persisted event, otherwise + # we return the one event that was persisted + event, _ = events_and_context[-1] + if event_id != event.event_id: # If we get a different event back then it means that its # been de-duplicated, so we replace the given event with the @@ -1422,10 +1486,9 @@ async def _persist_event( event.internal_metadata.stream_ordering = stream_id return event - event = await self.persist_and_notify_client_event( + event = await self.persist_and_notify_client_events( requester, - event, - context, + events_and_context, ratelimit=ratelimit, extra_users=extra_users, dont_notify=dont_notify, @@ -1433,9 +1496,10 @@ async def _persist_event( return event except Exception: - # Ensure that we actually remove the entries in the push actions - # staging area, if we calculated them. - await self.store.remove_push_actions_from_staging(event.event_id) + for event, _ in events_and_context: + # Ensure that we actually remove the entries in the push actions + # staging area, if we calculated them. + await self.store.remove_push_actions_from_staging(event.event_id) raise async def cache_joined_hosts_for_event( @@ -1534,24 +1598,27 @@ async def _validate_canonical_alias( Codes.BAD_ALIAS, ) - async def persist_and_notify_client_event( + async def persist_and_notify_client_events( self, requester: Requester, - event: EventBase, - context: EventContext, + events_and_context: List[Tuple[EventBase, EventContext]], ratelimit: bool = True, extra_users: Optional[List[UserID]] = None, dont_notify: bool = False, ) -> EventBase: - """Called when we have fully built the event, have already - calculated the push actions for the event, and checked auth. + """Called when we have fully built the events, have already + calculated the push actions for the events, and checked auth. This should only be run on the instance in charge of persisting events. + Please note that if batch persisting events, an error in + handling any one of these events will result in all of the events being dropped. + Returns: - The persisted event. This may be different than the given event if - it was de-duplicated (e.g. because we had already persisted an - event with the same transaction ID.) + The persisted event, if one event is passed in, or the last event in the + list in the case of batch persisting. If only one event was persisted, the + returned event may be different than the given event if it was de-duplicated + (e.g. because we had already persisted an event with the same transaction ID.) Raises: PartialStateConflictError: if attempting to persist a partial state event in @@ -1559,96 +1626,102 @@ async def persist_and_notify_client_event( """ extra_users = extra_users or [] - assert self._storage_controllers.persistence is not None - assert self._events_shard_config.should_handle( - self._instance_name, event.room_id - ) + for event, context in events_and_context: + assert self._events_shard_config.should_handle( + self._instance_name, event.room_id + ) - if ratelimit: - # We check if this is a room admin redacting an event so that we - # can apply different ratelimiting. We do this by simply checking - # it's not a self-redaction (to avoid having to look up whether the - # user is actually admin or not). - is_admin_redaction = False - if event.type == EventTypes.Redaction: - assert event.redacts is not None + if ratelimit: + # We check if this is a room admin redacting an event so that we + # can apply different ratelimiting. We do this by simply checking + # it's not a self-redaction (to avoid having to look up whether the + # user is actually admin or not). + is_admin_redaction = False + if event.type == EventTypes.Redaction: + assert event.redacts is not None + + original_event = await self.store.get_event( + event.redacts, + redact_behaviour=EventRedactBehaviour.as_is, + get_prev_content=False, + allow_rejected=False, + allow_none=True, + ) - original_event = await self.store.get_event( - event.redacts, - redact_behaviour=EventRedactBehaviour.as_is, - get_prev_content=False, - allow_rejected=False, - allow_none=True, - ) + is_admin_redaction = bool( + original_event and event.sender != original_event.sender + ) - is_admin_redaction = bool( - original_event and event.sender != original_event.sender + await self.request_ratelimiter.ratelimit( + requester, is_admin_redaction=is_admin_redaction ) - await self.request_ratelimiter.ratelimit( - requester, is_admin_redaction=is_admin_redaction - ) - - if event.type == EventTypes.Member and event.membership == Membership.JOIN: - ( - current_membership, - _, - ) = await self.store.get_local_current_membership_for_user_in_room( - event.state_key, event.room_id - ) - if current_membership != Membership.JOIN: - self._notifier.notify_user_joined_room(event.event_id, event.room_id) - - await self._maybe_kick_guest_users(event, context) - - if event.type == EventTypes.CanonicalAlias: - # Validate a newly added alias or newly added alt_aliases. + # run checks/actions on event based on type + if event.type == EventTypes.Member and event.membership == Membership.JOIN: + ( + current_membership, + _, + ) = await self.store.get_local_current_membership_for_user_in_room( + event.state_key, event.room_id + ) + if current_membership != Membership.JOIN: + self._notifier.notify_user_joined_room( + event.event_id, event.room_id + ) - original_alias = None - original_alt_aliases: object = [] + await self._maybe_kick_guest_users(event, context) - original_event_id = event.unsigned.get("replaces_state") - if original_event_id: - original_event = await self.store.get_event(original_event_id) + if event.type == EventTypes.CanonicalAlias: + # Validate a newly added alias or newly added alt_aliases. - if original_event: - original_alias = original_event.content.get("alias", None) - original_alt_aliases = original_event.content.get("alt_aliases", []) - - # Check the alias is currently valid (if it has changed). - room_alias_str = event.content.get("alias", None) - directory_handler = self.hs.get_directory_handler() - if room_alias_str and room_alias_str != original_alias: - await self._validate_canonical_alias( - directory_handler, room_alias_str, event.room_id - ) + original_alias = None + original_alt_aliases: object = [] - # Check that alt_aliases is the proper form. - alt_aliases = event.content.get("alt_aliases", []) - if not isinstance(alt_aliases, (list, tuple)): - raise SynapseError( - 400, "The alt_aliases property must be a list.", Codes.INVALID_PARAM - ) + original_event_id = event.unsigned.get("replaces_state") + if original_event_id: + original_alias_event = await self.store.get_event(original_event_id) - # If the old version of alt_aliases is of an unknown form, - # completely replace it. - if not isinstance(original_alt_aliases, (list, tuple)): - # TODO: check that the original_alt_aliases' entries are all strings - original_alt_aliases = [] + if original_alias_event: + original_alias = original_alias_event.content.get("alias", None) + original_alt_aliases = original_alias_event.content.get( + "alt_aliases", [] + ) - # Check that each alias is currently valid. - new_alt_aliases = set(alt_aliases) - set(original_alt_aliases) - if new_alt_aliases: - for alias_str in new_alt_aliases: + # Check the alias is currently valid (if it has changed). + room_alias_str = event.content.get("alias", None) + directory_handler = self.hs.get_directory_handler() + if room_alias_str and room_alias_str != original_alias: await self._validate_canonical_alias( - directory_handler, alias_str, event.room_id + directory_handler, room_alias_str, event.room_id + ) + + # Check that alt_aliases is the proper form. + alt_aliases = event.content.get("alt_aliases", []) + if not isinstance(alt_aliases, (list, tuple)): + raise SynapseError( + 400, + "The alt_aliases property must be a list.", + Codes.INVALID_PARAM, ) - federation_handler = self.hs.get_federation_handler() + # If the old version of alt_aliases is of an unknown form, + # completely replace it. + if not isinstance(original_alt_aliases, (list, tuple)): + # TODO: check that the original_alt_aliases' entries are all strings + original_alt_aliases = [] + + # Check that each alias is currently valid. + new_alt_aliases = set(alt_aliases) - set(original_alt_aliases) + if new_alt_aliases: + for alias_str in new_alt_aliases: + await self._validate_canonical_alias( + directory_handler, alias_str, event.room_id + ) + + federation_handler = self.hs.get_federation_handler() - if event.type == EventTypes.Member: - if event.content["membership"] == Membership.INVITE: - if not event.internal_metadata.outlier: + if event.type == EventTypes.Member: + if event.content["membership"] == Membership.INVITE: event.unsigned[ "invite_room_state" ] = await self.store.get_stripped_room_state_from_event_context( @@ -1657,22 +1730,21 @@ async def persist_and_notify_client_event( membership_user_id=event.sender, ) - invitee = UserID.from_string(event.state_key) - if not self.hs.is_mine(invitee): - # TODO: Can we add signature from remote server in a nicer - # way? If we have been invited by a remote server, we need - # to get them to sign the event. + invitee = UserID.from_string(event.state_key) + if not self.hs.is_mine(invitee): + # TODO: Can we add signature from remote server in a nicer + # way? If we have been invited by a remote server, we need + # to get them to sign the event. - returned_invite = await federation_handler.send_invite( - invitee.domain, event - ) - event.unsigned.pop("room_state", None) + returned_invite = await federation_handler.send_invite( + invitee.domain, event + ) + event.unsigned.pop("room_state", None) - # TODO: Make sure the signatures actually are correct. - event.signatures.update(returned_invite.signatures) + # TODO: Make sure the signatures actually are correct. + event.signatures.update(returned_invite.signatures) - if event.content["membership"] == Membership.KNOCK: - if not event.internal_metadata.outlier: + if event.content["membership"] == Membership.KNOCK: event.unsigned[ "knock_room_state" ] = await self.store.get_stripped_room_state_from_event_context( @@ -1680,163 +1752,176 @@ async def persist_and_notify_client_event( self.room_prejoin_state_types, ) - if event.type == EventTypes.Redaction: - assert event.redacts is not None - - original_event = await self.store.get_event( - event.redacts, - redact_behaviour=EventRedactBehaviour.as_is, - get_prev_content=False, - allow_rejected=False, - allow_none=True, - ) + if event.type == EventTypes.Redaction: + assert event.redacts is not None - room_version = await self.store.get_room_version_id(event.room_id) - room_version_obj = KNOWN_ROOM_VERSIONS[room_version] - - # we can make some additional checks now if we have the original event. - if original_event: - if original_event.type == EventTypes.Create: - raise AuthError(403, "Redacting create events is not permitted") - - if original_event.room_id != event.room_id: - raise SynapseError(400, "Cannot redact event from a different room") - - if original_event.type == EventTypes.ServerACL: - raise AuthError(403, "Redacting server ACL events is not permitted") - - # Add a little safety stop-gap to prevent people from trying to - # redact MSC2716 related events when they're in a room version - # which does not support it yet. We allow people to use MSC2716 - # events in existing room versions but only from the room - # creator since it does not require any changes to the auth - # rules and in effect, the redaction algorithm . In the - # supported room version, we add the `historical` power level to - # auth the MSC2716 related events and adjust the redaction - # algorthim to keep the `historical` field around (redacting an - # event should only strip fields which don't affect the - # structural protocol level). - is_msc2716_event = ( - original_event.type == EventTypes.MSC2716_INSERTION - or original_event.type == EventTypes.MSC2716_BATCH - or original_event.type == EventTypes.MSC2716_MARKER + original_event = await self.store.get_event( + event.redacts, + redact_behaviour=EventRedactBehaviour.as_is, + get_prev_content=False, + allow_rejected=False, + allow_none=True, ) - if not room_version_obj.msc2716_historical and is_msc2716_event: - raise AuthError( - 403, - "Redacting MSC2716 events is not supported in this room version", - ) - event_types = event_auth.auth_types_for_event(event.room_version, event) - prev_state_ids = await context.get_prev_state_ids( - StateFilter.from_types(event_types) - ) + room_version = await self.store.get_room_version_id(event.room_id) + room_version_obj = KNOWN_ROOM_VERSIONS[room_version] - auth_events_ids = self._event_auth_handler.compute_auth_events( - event, prev_state_ids, for_verification=True - ) - auth_events_map = await self.store.get_events(auth_events_ids) - auth_events = {(e.type, e.state_key): e for e in auth_events_map.values()} + # we can make some additional checks now if we have the original event. + if original_event: + if original_event.type == EventTypes.Create: + raise AuthError(403, "Redacting create events is not permitted") - if event_auth.check_redaction( - room_version_obj, event, auth_events=auth_events - ): - # this user doesn't have 'redact' rights, so we need to do some more - # checks on the original event. Let's start by checking the original - # event exists. - if not original_event: - raise NotFoundError("Could not find event %s" % (event.redacts,)) - - if event.user_id != original_event.user_id: - raise AuthError(403, "You don't have permission to redact events") - - # all the checks are done. - event.internal_metadata.recheck_redaction = False - - if event.type == EventTypes.Create: - prev_state_ids = await context.get_prev_state_ids() - if prev_state_ids: - raise AuthError(403, "Changing the room create event is forbidden") - - if event.type == EventTypes.MSC2716_INSERTION: - room_version = await self.store.get_room_version_id(event.room_id) - room_version_obj = KNOWN_ROOM_VERSIONS[room_version] - - create_event = await self.store.get_create_event_for_room(event.room_id) - room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR) - - # Only check an insertion event if the room version - # supports it or the event is from the room creator. - if room_version_obj.msc2716_historical or ( - self.config.experimental.msc2716_enabled - and event.sender == room_creator - ): - next_batch_id = event.content.get( - EventContentFields.MSC2716_NEXT_BATCH_ID - ) - conflicting_insertion_event_id = None - if next_batch_id: - conflicting_insertion_event_id = ( - await self.store.get_insertion_event_id_by_batch_id( - event.room_id, next_batch_id + if original_event.room_id != event.room_id: + raise SynapseError( + 400, "Cannot redact event from a different room" ) + + if original_event.type == EventTypes.ServerACL: + raise AuthError( + 403, "Redacting server ACL events is not permitted" + ) + + # Add a little safety stop-gap to prevent people from trying to + # redact MSC2716 related events when they're in a room version + # which does not support it yet. We allow people to use MSC2716 + # events in existing room versions but only from the room + # creator since it does not require any changes to the auth + # rules and in effect, the redaction algorithm . In the + # supported room version, we add the `historical` power level to + # auth the MSC2716 related events and adjust the redaction + # algorthim to keep the `historical` field around (redacting an + # event should only strip fields which don't affect the + # structural protocol level). + is_msc2716_event = ( + original_event.type == EventTypes.MSC2716_INSERTION + or original_event.type == EventTypes.MSC2716_BATCH + or original_event.type == EventTypes.MSC2716_MARKER ) - if conflicting_insertion_event_id is not None: - # The current insertion event that we're processing is invalid - # because an insertion event already exists in the room with the - # same next_batch_id. We can't allow multiple because the batch - # pointing will get weird, e.g. we can't determine which insertion - # event the batch event is pointing to. - raise SynapseError( - HTTPStatus.BAD_REQUEST, - "Another insertion event already exists with the same next_batch_id", - errcode=Codes.INVALID_PARAM, + if not room_version_obj.msc2716_historical and is_msc2716_event: + raise AuthError( + 403, + "Redacting MSC2716 events is not supported in this room version", + ) + + event_types = event_auth.auth_types_for_event(event.room_version, event) + prev_state_ids = await context.get_prev_state_ids( + StateFilter.from_types(event_types) + ) + + auth_events_ids = self._event_auth_handler.compute_auth_events( + event, prev_state_ids, for_verification=True + ) + auth_events_map = await self.store.get_events(auth_events_ids) + auth_events = { + (e.type, e.state_key): e for e in auth_events_map.values() + } + + if event_auth.check_redaction( + room_version_obj, event, auth_events=auth_events + ): + # this user doesn't have 'redact' rights, so we need to do some more + # checks on the original event. Let's start by checking the original + # event exists. + if not original_event: + raise NotFoundError( + "Could not find event %s" % (event.redacts,) + ) + + if event.user_id != original_event.user_id: + raise AuthError( + 403, "You don't have permission to redact events" + ) + + # all the checks are done. + event.internal_metadata.recheck_redaction = False + + if event.type == EventTypes.Create: + prev_state_ids = await context.get_prev_state_ids() + if prev_state_ids: + raise AuthError(403, "Changing the room create event is forbidden") + + if event.type == EventTypes.MSC2716_INSERTION: + room_version = await self.store.get_room_version_id(event.room_id) + room_version_obj = KNOWN_ROOM_VERSIONS[room_version] + + create_event = await self.store.get_create_event_for_room(event.room_id) + room_creator = create_event.content.get(EventContentFields.ROOM_CREATOR) + + # Only check an insertion event if the room version + # supports it or the event is from the room creator. + if room_version_obj.msc2716_historical or ( + self.config.experimental.msc2716_enabled + and event.sender == room_creator + ): + next_batch_id = event.content.get( + EventContentFields.MSC2716_NEXT_BATCH_ID ) + conflicting_insertion_event_id = None + if next_batch_id: + conflicting_insertion_event_id = ( + await self.store.get_insertion_event_id_by_batch_id( + event.room_id, next_batch_id + ) + ) + if conflicting_insertion_event_id is not None: + # The current insertion event that we're processing is invalid + # because an insertion event already exists in the room with the + # same next_batch_id. We can't allow multiple because the batch + # pointing will get weird, e.g. we can't determine which insertion + # event the batch event is pointing to. + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "Another insertion event already exists with the same next_batch_id", + errcode=Codes.INVALID_PARAM, + ) - # Mark any `m.historical` messages as backfilled so they don't appear - # in `/sync` and have the proper decrementing `stream_ordering` as we import - backfilled = False - if event.internal_metadata.is_historical(): - backfilled = True + # Mark any `m.historical` messages as backfilled so they don't appear + # in `/sync` and have the proper decrementing `stream_ordering` as we import + backfilled = False + if event.internal_metadata.is_historical(): + backfilled = True - # Note that this returns the event that was persisted, which may not be - # the same as we passed in if it was deduplicated due transaction IDs. + assert self._storage_controllers.persistence is not None ( - event, - event_pos, + persisted_events, max_stream_token, - ) = await self._storage_controllers.persistence.persist_event( - event, context=context, backfilled=backfilled + ) = await self._storage_controllers.persistence.persist_events( + events_and_context, backfilled=backfilled ) - if self._ephemeral_events_enabled: - # If there's an expiry timestamp on the event, schedule its expiry. - self._message_handler.maybe_schedule_expiry(event) + for event in persisted_events: + if self._ephemeral_events_enabled: + # If there's an expiry timestamp on the event, schedule its expiry. + self._message_handler.maybe_schedule_expiry(event) - if dont_notify: - # Skip notifying clients, this is used for Beeper's custom - # batch sending of non-historical messages. - return event + if event.type == EventTypes.Message: + # We don't want to block sending messages on any presence code. This + # matters as sometimes presence code can take a while. + run_in_background(self._bump_active_time, requester.user) - async def _notify() -> None: - try: - await self.notifier.on_new_room_event( - event, event_pos, max_stream_token, extra_users=extra_users - ) - except Exception: - logger.exception( - "Error notifying about new room event %s", - event.event_id, - ) + if dont_notify: + # Skip notifying clients, this is used for Beeper's custom + # batch sending of non-historical messages. + continue - run_in_background(_notify) + stream_ordering = event.internal_metadata.stream_ordering + assert stream_ordering is not None + pos = PersistedEventPosition(self._instance_name, stream_ordering) - if event.type == EventTypes.Message: - # We don't want to block sending messages on any presence code. This - # matters as sometimes presence code can take a while. - run_in_background(self._bump_active_time, requester.user) + async def _notify() -> None: + try: + await self.notifier.on_new_room_event( + event, pos, max_stream_token, extra_users=extra_users + ) + except Exception: + logger.exception( + "Error notifying about new room event %s", + event.event_id, + ) - return event + run_in_background(_notify) + + return persisted_events[-1] async def _maybe_kick_guest_users( self, event: EventBase, context: EventContext @@ -1925,8 +2010,7 @@ async def _send_dummy_event_for_room(self, room_id: str) -> bool: # shadow-banned user. await self.handle_new_client_event( requester, - event, - context, + events_and_context=[(event, context)], ratelimit=False, ignore_shadow_ban=True, ) diff --git a/synapse/handlers/push_rules.py b/synapse/handlers/push_rules.py index 2599160bcc00..1219672a59dc 100644 --- a/synapse/handlers/push_rules.py +++ b/synapse/handlers/push_rules.py @@ -16,14 +16,17 @@ import attr from synapse.api.errors import SynapseError, UnrecognizedRequestError -from synapse.push.baserules import BASE_RULE_IDS from synapse.storage.push_rule import RuleNotFoundException +from synapse.synapse_rust.push import get_base_rule_ids from synapse.types import JsonDict if TYPE_CHECKING: from synapse.server import HomeServer +BASE_RULE_IDS = get_base_rule_ids() + + @attr.s(slots=True, frozen=True, auto_attribs=True) class RuleSpec: scope: str diff --git a/synapse/handlers/receipts.py b/synapse/handlers/receipts.py index a67503c126b5..12764856550d 100644 --- a/synapse/handlers/receipts.py +++ b/synapse/handlers/receipts.py @@ -63,6 +63,8 @@ def __init__(self, hs: "HomeServer"): self.clock = self.hs.get_clock() self.state = hs.get_state_handler() + self._msc3771_enabled = hs.config.experimental.msc3771_enabled + async def _received_remote_receipt(self, origin: str, content: JsonDict) -> None: """Called when we receive an EDU of type m.receipt from a remote HS.""" receipts = [] @@ -70,7 +72,7 @@ async def _received_remote_receipt(self, origin: str, content: JsonDict) -> None # If we're not in the room just ditch the event entirely. This is # probably an old server that has come back and thinks we're still in # the room (or we've been rejoined to the room by a state reset). - is_in_room = await self.event_auth_handler.check_host_in_room( + is_in_room = await self.event_auth_handler.is_host_in_room( room_id, self.server_name ) if not is_in_room: @@ -91,13 +93,23 @@ async def _received_remote_receipt(self, origin: str, content: JsonDict) -> None ) continue + # Check if these receipts apply to a thread. + thread_id = None + data = user_values.get("data", {}) + if self._msc3771_enabled and isinstance(data, dict): + thread_id = data.get("thread_id") + # If the thread ID is invalid, consider it missing. + if not isinstance(thread_id, str): + thread_id = None + receipts.append( ReadReceipt( room_id=room_id, receipt_type=receipt_type, user_id=user_id, event_ids=user_values["event_ids"], - data=user_values.get("data", {}), + thread_id=thread_id, + data=data, ) ) @@ -114,6 +126,7 @@ async def _handle_new_receipts(self, receipts: List[ReadReceipt]) -> bool: receipt.receipt_type, receipt.user_id, receipt.event_ids, + receipt.thread_id, receipt.data, ) @@ -151,6 +164,7 @@ async def received_client_receipt( receipt_type: str, user_id: str, event_id: str, + thread_id: Optional[str], extra_content: Optional[JsonDict] = None, ) -> None: """Called when a client tells us a local user has read up to the given @@ -161,6 +175,7 @@ async def received_client_receipt( receipt_type=receipt_type, user_id=user_id, event_ids=[event_id], + thread_id=thread_id, data={"ts": int(self.clock.time_msec()), **(extra_content or {})}, ) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 20ec22105a34..ca1c7a18667e 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -220,6 +220,7 @@ async def register_user( by_admin: bool = False, user_agent_ips: Optional[List[Tuple[str, str]]] = None, auth_provider_id: Optional[str] = None, + approved: bool = False, ) -> str: """Registers a new client on the server. @@ -246,6 +247,8 @@ async def register_user( user_agent_ips: Tuples of user-agents and IP addresses used during the registration process. auth_provider_id: The SSO IdP the user used, if any. + approved: True if the new user should be considered already + approved by an administrator. Returns: The registered user_id. Raises: @@ -307,6 +310,7 @@ async def register_user( user_type=user_type, address=address, shadow_banned=shadow_banned, + approved=approved, ) profile = await self.store.get_profileinfo(localpart) @@ -695,6 +699,7 @@ async def register_with_store( user_type: Optional[str] = None, address: Optional[str] = None, shadow_banned: bool = False, + approved: bool = False, ) -> None: """Register user in the datastore. @@ -713,6 +718,7 @@ async def register_with_store( api.constants.UserTypes, or None for a normal user. address: the IP address used to perform the registration. shadow_banned: Whether to shadow-ban the user + approved: Whether to mark the user as approved by an administrator """ if self.hs.config.worker.worker_app: await self._register_client( @@ -726,6 +732,7 @@ async def register_with_store( user_type=user_type, address=address, shadow_banned=shadow_banned, + approved=approved, ) else: await self.store.register_user( @@ -738,6 +745,7 @@ async def register_with_store( admin=admin, user_type=user_type, shadow_banned=shadow_banned, + approved=approved, ) # Only call the account validity module(s) on the main process, to avoid @@ -997,7 +1005,7 @@ async def _register_email_threepid( assert user_tuple token_id = user_tuple.token_id - await self.pusher_pool.add_pusher( + await self.pusher_pool.add_or_update_pusher( user_id=user_id, access_token=token_id, kind="email", @@ -1005,7 +1013,7 @@ async def _register_email_threepid( app_display_name="Email Notifications", device_display_name=threepid["address"], pushkey=threepid["address"], - lang=None, # We don't know a user's language here + lang=None, data={}, ) diff --git a/synapse/handlers/relations.py b/synapse/handlers/relations.py index 28d7093f08cd..63bc6a7aa55f 100644 --- a/synapse/handlers/relations.py +++ b/synapse/handlers/relations.py @@ -78,6 +78,7 @@ async def get_relations( direction: str = "b", from_token: Optional[StreamToken] = None, to_token: Optional[StreamToken] = None, + include_original_event: bool = False, ) -> JsonDict: """Get related events of a event, ordered by topological ordering. @@ -94,6 +95,7 @@ async def get_relations( oldest first (`"f"`). from_token: Fetch rows from the given token, or from the start if None. to_token: Fetch rows up to the given token, or up to the end if None. + include_original_event: Whether to include the parent event. Returns: The pagination chunk. @@ -138,25 +140,24 @@ async def get_relations( is_peeking=(member_event_id is None), ) - now = self._clock.time_msec() - # Do not bundle aggregations when retrieving the original event because - # we want the content before relations are applied to it. - original_event = self._event_serializer.serialize_event( - event, now, bundle_aggregations=None - ) # The relations returned for the requested event do include their # bundled aggregations. aggregations = await self.get_bundled_aggregations( events, requester.user.to_string() ) - serialized_events = self._event_serializer.serialize_events( - events, now, bundle_aggregations=aggregations - ) - return_value = { - "chunk": serialized_events, - "original_event": original_event, + now = self._clock.time_msec() + return_value: JsonDict = { + "chunk": self._event_serializer.serialize_events( + events, now, bundle_aggregations=aggregations + ), } + if include_original_event: + # Do not bundle aggregations when retrieving the original event because + # we want the content before relations are applied to it. + return_value["original_event"] = self._event_serializer.serialize_event( + event, now, bundle_aggregations=None + ) if next_token: return_value["next_batch"] = await next_token.to_string(self._main_store) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 541224549282..b23d2d947005 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -314,8 +314,7 @@ async def _upgrade_room( # now send the tombstone await self.event_creation_handler.handle_new_client_event( requester=requester, - event=tombstone_event, - context=tombstone_context, + events_and_context=[(tombstone_event, tombstone_context)], ) state_filter = StateFilter.from_types( @@ -729,7 +728,7 @@ async def create_room( if ( self._server_notices_mxid is not None - and requester.user.to_string() == self._server_notices_mxid + and user_id == self._server_notices_mxid ): # allow the server notices mxid to create rooms is_requester_admin = True @@ -1058,7 +1057,9 @@ async def _send_events_for_new_room( creator_join_profile: Optional[JsonDict] = None, ratelimit: bool = True, ) -> Tuple[int, str, int]: - """Sends the initial events into a new room. + """Sends the initial events into a new room. Sends the room creation, membership, + and power level events into the room sequentially, then creates and batches up the + rest of the events to persist as a batch to the DB. `power_level_content_override` doesn't apply when initial state has power level state event content. @@ -1069,13 +1070,23 @@ async def _send_events_for_new_room( """ creator_id = creator.user.to_string() - event_keys = {"room_id": room_id, "sender": creator_id, "state_key": ""} - depth = 1 + + # the last event sent/persisted to the db last_sent_event_id: Optional[str] = None - def create(etype: str, content: JsonDict, **kwargs: Any) -> JsonDict: + # the most recently created event + prev_event: List[str] = [] + # a map of event types, state keys -> event_ids. We collect these mappings this as events are + # created (but not persisted to the db) to determine state for future created events + # (as this info can't be pulled from the db) + state_map: MutableStateMap[str] = {} + # current_state_group of last event created. Used for computing event context of + # events to be batched + current_state_group = None + + def create_event_dict(etype: str, content: JsonDict, **kwargs: Any) -> JsonDict: e = {"type": etype, "content": content} e.update(event_keys) @@ -1083,32 +1094,51 @@ def create(etype: str, content: JsonDict, **kwargs: Any) -> JsonDict: return e - async def send(etype: str, content: JsonDict, **kwargs: Any) -> int: - nonlocal last_sent_event_id + async def create_event( + etype: str, + content: JsonDict, + for_batch: bool, + **kwargs: Any, + ) -> Tuple[EventBase, synapse.events.snapshot.EventContext]: nonlocal depth + nonlocal prev_event - event = create(etype, content, **kwargs) - logger.debug("Sending %s in new room", etype) - # Allow these events to be sent even if the user is shadow-banned to - # allow the room creation to complete. - ( - sent_event, - last_stream_id, - ) = await self.event_creation_handler.create_and_send_nonmember_event( + event_dict = create_event_dict(etype, content, **kwargs) + + new_event, new_context = await self.event_creation_handler.create_event( creator, - event, + event_dict, + prev_event_ids=prev_event, + depth=depth, + state_map=state_map, + for_batch=for_batch, + current_state_group=current_state_group, + ) + depth += 1 + prev_event = [new_event.event_id] + state_map[(new_event.type, new_event.state_key)] = new_event.event_id + + return new_event, new_context + + async def send( + event: EventBase, + context: synapse.events.snapshot.EventContext, + creator: Requester, + ) -> int: + nonlocal last_sent_event_id + + ev = await self.event_creation_handler.handle_new_client_event( + requester=creator, + events_and_context=[(event, context)], ratelimit=False, ignore_shadow_ban=True, - # Note: we don't pass state_event_ids here because this triggers - # an additional query per event to look them up from the events table. - prev_event_ids=[last_sent_event_id] if last_sent_event_id else [], - depth=depth, ) - last_sent_event_id = sent_event.event_id - depth += 1 + last_sent_event_id = ev.event_id - return last_stream_id + # we know it was persisted, so must have a stream ordering + assert ev.internal_metadata.stream_ordering + return ev.internal_metadata.stream_ordering try: config = self._presets_dict[preset_config] @@ -1118,9 +1148,13 @@ async def send(etype: str, content: JsonDict, **kwargs: Any) -> int: ) creation_content.update({"creator": creator_id}) - await send(etype=EventTypes.Create, content=creation_content) + creation_event, creation_context = await create_event( + EventTypes.Create, creation_content, False + ) logger.debug("Sending %s in new room", EventTypes.Member) + await send(creation_event, creation_context, creator) + # Room create event must exist at this point assert last_sent_event_id is not None member_event_id, _ = await self.room_member_handler.update_membership( @@ -1134,15 +1168,22 @@ async def send(etype: str, content: JsonDict, **kwargs: Any) -> int: prev_event_ids=[last_sent_event_id], depth=depth, ) - last_sent_event_id = member_event_id + prev_event = [member_event_id] + + # update the depth and state map here as the membership event has been created + # through a different code path + depth += 1 + state_map[(EventTypes.Member, creator.user.to_string())] = member_event_id # We treat the power levels override specially as this needs to be one # of the first events that get sent into a room. pl_content = initial_state.pop((EventTypes.PowerLevels, ""), None) if pl_content is not None: - last_sent_stream_id = await send( - etype=EventTypes.PowerLevels, content=pl_content + power_event, power_context = await create_event( + EventTypes.PowerLevels, pl_content, False ) + current_state_group = power_context._state_group + await send(power_event, power_context, creator) else: power_level_content: JsonDict = { "users": {creator_id: 100}, @@ -1185,48 +1226,71 @@ async def send(etype: str, content: JsonDict, **kwargs: Any) -> int: # apply those. if power_level_content_override: power_level_content.update(power_level_content_override) - - last_sent_stream_id = await send( - etype=EventTypes.PowerLevels, content=power_level_content + pl_event, pl_context = await create_event( + EventTypes.PowerLevels, + power_level_content, + False, ) + current_state_group = pl_context._state_group + await send(pl_event, pl_context, creator) + events_to_send = [] if room_alias and (EventTypes.CanonicalAlias, "") not in initial_state: - last_sent_stream_id = await send( - etype=EventTypes.CanonicalAlias, - content={"alias": room_alias.to_string()}, + room_alias_event, room_alias_context = await create_event( + EventTypes.CanonicalAlias, {"alias": room_alias.to_string()}, True ) + current_state_group = room_alias_context._state_group + events_to_send.append((room_alias_event, room_alias_context)) if (EventTypes.JoinRules, "") not in initial_state: - last_sent_stream_id = await send( - etype=EventTypes.JoinRules, content={"join_rule": config["join_rules"]} + join_rules_event, join_rules_context = await create_event( + EventTypes.JoinRules, + {"join_rule": config["join_rules"]}, + True, ) + current_state_group = join_rules_context._state_group + events_to_send.append((join_rules_event, join_rules_context)) if (EventTypes.RoomHistoryVisibility, "") not in initial_state: - last_sent_stream_id = await send( - etype=EventTypes.RoomHistoryVisibility, - content={"history_visibility": config["history_visibility"]}, + visibility_event, visibility_context = await create_event( + EventTypes.RoomHistoryVisibility, + {"history_visibility": config["history_visibility"]}, + True, ) + current_state_group = visibility_context._state_group + events_to_send.append((visibility_event, visibility_context)) if config["guest_can_join"]: if (EventTypes.GuestAccess, "") not in initial_state: - last_sent_stream_id = await send( - etype=EventTypes.GuestAccess, - content={EventContentFields.GUEST_ACCESS: GuestAccess.CAN_JOIN}, + guest_access_event, guest_access_context = await create_event( + EventTypes.GuestAccess, + {EventContentFields.GUEST_ACCESS: GuestAccess.CAN_JOIN}, + True, ) + current_state_group = guest_access_context._state_group + events_to_send.append((guest_access_event, guest_access_context)) for (etype, state_key), content in initial_state.items(): - last_sent_stream_id = await send( - etype=etype, state_key=state_key, content=content + event, context = await create_event( + etype, content, True, state_key=state_key ) + current_state_group = context._state_group + events_to_send.append((event, context)) if config["encrypted"]: - last_sent_stream_id = await send( - etype=EventTypes.RoomEncryption, + encryption_event, encryption_context = await create_event( + EventTypes.RoomEncryption, + {"algorithm": RoomEncryptionAlgorithms.DEFAULT}, + True, state_key="", - content={"algorithm": RoomEncryptionAlgorithms.DEFAULT}, ) + events_to_send.append((encryption_event, encryption_context)) - return last_sent_stream_id, last_sent_event_id, depth + last_event = await self.event_creation_handler.handle_new_client_event( + creator, events_to_send, ignore_shadow_ban=True + ) + assert last_event.internal_metadata.stream_ordering is not None + return last_event.internal_metadata.stream_ordering, last_event.event_id, depth def _generate_room_id(self) -> str: """Generates a random room ID. @@ -1492,7 +1556,9 @@ async def get_event_for_timestamp( ) likely_domains = ( - await self._storage_controllers.state.get_current_hosts_in_room(room_id) + await self._storage_controllers.state.get_current_hosts_in_room_ordered( + room_id + ) ) # Loop through each homeserver candidate until we get a succesful response diff --git a/synapse/handlers/room_batch.py b/synapse/handlers/room_batch.py index 05a3cda6df79..5022ad7a955c 100644 --- a/synapse/handlers/room_batch.py +++ b/synapse/handlers/room_batch.py @@ -416,8 +416,7 @@ async def persist_historical_events( app_service_requester.app_service, also_allow_user, ), - event=event, - context=context, + events_and_context=[(event, context)], ) return event_ids diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index 8d01f4bf2be5..6ad2b38b8f96 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -322,6 +322,7 @@ async def _local_membership_update( require_consent: bool = True, outlier: bool = False, historical: bool = False, + origin_server_ts: Optional[int] = None, ) -> Tuple[str, int]: """ Internal membership update function to get an existing event or create @@ -361,6 +362,8 @@ async def _local_membership_update( historical: Indicates whether the message is being inserted back in time around some existing events. This is used to skip a few checks and mark the event as backfilled. + origin_server_ts: The origin_server_ts to use if a new event is created. Uses + the current timestamp if set to None. Returns: Tuple of event ID and stream ordering position @@ -399,6 +402,7 @@ async def _local_membership_update( "state_key": user_id, # For backwards compatibility: "membership": membership, + "origin_server_ts": origin_server_ts, }, txn_id=txn_id, allow_no_prev_events=allow_no_prev_events, @@ -432,8 +436,7 @@ async def _local_membership_update( with opentracing.start_active_span("handle_new_client_event"): result_event = await self.event_creation_handler.handle_new_client_event( requester, - event, - context, + events_and_context=[(event, context)], extra_users=[target], ratelimit=ratelimit, ) @@ -505,6 +508,7 @@ async def update_membership( prev_event_ids: Optional[List[str]] = None, state_event_ids: Optional[List[str]] = None, depth: Optional[int] = None, + origin_server_ts: Optional[int] = None, ) -> Tuple[str, int]: """Update a user's membership in a room. @@ -543,6 +547,8 @@ async def update_membership( depth: Override the depth used to order the event in the DAG. Should normally be set to None, which will cause the depth to be calculated based on the prev_events. + origin_server_ts: The origin_server_ts to use if a new event is created. Uses + the current timestamp if set to None. Returns: A tuple of the new event ID and stream ID. @@ -584,6 +590,7 @@ async def update_membership( prev_event_ids=prev_event_ids, state_event_ids=state_event_ids, depth=depth, + origin_server_ts=origin_server_ts, ) return result @@ -607,6 +614,7 @@ async def update_membership_locked( prev_event_ids: Optional[List[str]] = None, state_event_ids: Optional[List[str]] = None, depth: Optional[int] = None, + origin_server_ts: Optional[int] = None, ) -> Tuple[str, int]: """Helper for update_membership. @@ -647,6 +655,8 @@ async def update_membership_locked( depth: Override the depth used to order the event in the DAG. Should normally be set to None, which will cause the depth to be calculated based on the prev_events. + origin_server_ts: The origin_server_ts to use if a new event is created. Uses + the current timestamp if set to None. Returns: A tuple of the new event ID and stream ID. @@ -786,6 +796,7 @@ async def update_membership_locked( require_consent=require_consent, outlier=outlier, historical=historical, + origin_server_ts=origin_server_ts, ) latest_event_ids = await self.store.get_prev_events_for_room(room_id) @@ -1031,6 +1042,7 @@ async def update_membership_locked( content=content, require_consent=require_consent, outlier=outlier, + origin_server_ts=origin_server_ts, ) async def _should_perform_remote_join( @@ -1151,8 +1163,8 @@ async def transfer_room_state_on_room_upgrade( logger.info("Transferring room state from %s to %s", old_room_id, room_id) # Find all local users that were in the old room and copy over each user's state - users = await self.store.get_users_in_room(old_room_id) - await self.copy_user_state_on_room_upgrade(old_room_id, room_id, users) + local_users = await self.store.get_local_users_in_room(old_room_id) + await self.copy_user_state_on_room_upgrade(old_room_id, room_id, local_users) # Add new room to the room directory if the old room was there # Remove old room from the room directory @@ -1252,7 +1264,10 @@ async def send_membership_event( raise SynapseError(403, "This room has been blocked on this server") event = await self.event_creation_handler.handle_new_client_event( - requester, event, context, extra_users=[target_user], ratelimit=ratelimit + requester, + events_and_context=[(event, context)], + extra_users=[target_user], + ratelimit=ratelimit, ) prev_member_event_id = prev_state_ids.get( @@ -1860,8 +1875,7 @@ async def _generate_local_out_of_band_leave( result_event = await self.event_creation_handler.handle_new_client_event( requester, - event, - context, + events_and_context=[(event, context)], extra_users=[UserID.from_string(target_user)], ) # we know it was persisted, so must have a stream ordering diff --git a/synapse/handlers/room_summary.py b/synapse/handlers/room_summary.py index ebd445adcaf3..8d08625237bc 100644 --- a/synapse/handlers/room_summary.py +++ b/synapse/handlers/room_summary.py @@ -609,7 +609,7 @@ async def _is_local_room_accessible( # If this is a request over federation, check if the host is in the room or # has a user who could join the room. elif origin: - if await self._event_auth_handler.check_host_in_room( + if await self._event_auth_handler.is_host_in_room( room_id, origin ) or await self._store.is_host_invited(room_id, origin): return True @@ -624,9 +624,7 @@ async def _is_local_room_accessible( await self._event_auth_handler.get_rooms_that_allow_join(state_ids) ) for space_id in allowed_rooms: - if await self._event_auth_handler.check_host_in_room( - space_id, origin - ): + if await self._event_auth_handler.is_host_in_room(space_id, origin): return True logger.info( diff --git a/synapse/handlers/send_email.py b/synapse/handlers/send_email.py index e2844799e886..804cc6e81e00 100644 --- a/synapse/handlers/send_email.py +++ b/synapse/handlers/send_email.py @@ -187,6 +187,19 @@ async def send_email( multipart_msg["To"] = email_address multipart_msg["Date"] = email.utils.formatdate() multipart_msg["Message-ID"] = email.utils.make_msgid() + # Discourage automatic responses to Synapse's emails. + # Per RFC 3834, automatic responses should not be sent if the "Auto-Submitted" + # header is present with any value other than "no". See + # https://www.rfc-editor.org/rfc/rfc3834.html#section-5.1 + multipart_msg["Auto-Submitted"] = "auto-generated" + # Also include a Microsoft-Exchange specific header: + # https://learn.microsoft.com/en-us/openspecs/exchange_server_protocols/ms-oxcmail/ced68690-498a-4567-9d14-5c01f974d8b1 + # which suggests it can take the value "All" to "suppress all auto-replies", + # or a comma separated list of auto-reply classes to suppress. + # The following stack overflow question has a little more context: + # https://stackoverflow.com/a/25324691/5252017 + # https://stackoverflow.com/a/61646381/5252017 + multipart_msg["X-Auto-Response-Suppress"] = "All" multipart_msg.attach(text_part) multipart_msg.attach(html_part) diff --git a/synapse/handlers/sso.py b/synapse/handlers/sso.py index 1e171f3f7115..e035677b8a10 100644 --- a/synapse/handlers/sso.py +++ b/synapse/handlers/sso.py @@ -128,6 +128,9 @@ async def handle_redirect_request( @attr.s(auto_attribs=True) class UserAttributes: + # NB: This struct is documented in docs/sso_mapping_providers.md so that users can + # populate it with data from their own mapping providers. + # the localpart of the mxid that the mapper has assigned to the user. # if `None`, the mapper has not picked a userid, and the user should be prompted to # enter one. @@ -144,6 +147,9 @@ class UsernameMappingSession: # A unique identifier for this SSO provider, e.g. "oidc" or "saml". auth_provider_id: str + # An optional session ID from the IdP. + auth_provider_session_id: Optional[str] + # user ID on the IdP server remote_user_id: str @@ -461,6 +467,7 @@ async def complete_sso_login_request( client_redirect_url, next_step_url, extra_login_attributes, + auth_provider_session_id, ) user_id = await self._register_mapped_user( @@ -582,6 +589,7 @@ async def _redirect_to_next_new_user_step( client_redirect_url: str, next_step_url: bytes, extra_login_attributes: Optional[JsonDict], + auth_provider_session_id: Optional[str], ) -> NoReturn: """Creates a UsernameMappingSession and redirects the browser @@ -604,6 +612,8 @@ async def _redirect_to_next_new_user_step( extra_login_attributes: An optional dictionary of extra attributes to be provided to the client in the login response. + auth_provider_session_id: An optional session ID from the IdP. + Raises: RedirectException """ @@ -612,6 +622,7 @@ async def _redirect_to_next_new_user_step( now = self._clock.time_msec() session = UsernameMappingSession( auth_provider_id=auth_provider_id, + auth_provider_session_id=auth_provider_session_id, remote_user_id=remote_user_id, display_name=attributes.display_name, emails=attributes.emails, @@ -965,6 +976,7 @@ async def register_sso_user(self, request: Request, session_id: str) -> None: session.client_redirect_url, session.extra_login_attributes, new_user=True, + auth_provider_session_id=session.auth_provider_session_id, ) def _expire_old_sessions(self) -> None: diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 6b2edaeb4f41..2c26ab947b43 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1202,7 +1202,9 @@ async def _find_missing_partial_state_memberships( room_id: The partial state room to find the remaining memberships for. members_to_fetch: The memberships to find. events_with_membership_auth: A mapping from user IDs to events whose auth - events are known to contain their membership. + events would contain their prior membership, if one exists. + Note that join events will not cite a prior membership if a user has + never been in a room before. found_state_ids: A dict from (type, state_key) -> state_event_id, containing memberships that have been previously found. Entries in `members_to_fetch` that have a membership in `found_state_ids` are @@ -1212,6 +1214,10 @@ async def _find_missing_partial_state_memberships( A dict from ("m.room.member", state_key) -> state_event_id, containing the memberships missing from `found_state_ids`. + When `events_with_membership_auth` contains a join event for a given user + which does not cite a prior membership, no membership is returned for that + user. + Raises: KeyError: if `events_with_membership_auth` does not have an entry for a missing membership. Memberships in `found_state_ids` do not need an @@ -1229,8 +1235,18 @@ async def _find_missing_partial_state_memberships( if (EventTypes.Member, member) in found_state_ids: continue - missing_members.add(member) event_with_membership_auth = events_with_membership_auth[member] + is_join = ( + event_with_membership_auth.is_state() + and event_with_membership_auth.type == EventTypes.Member + and event_with_membership_auth.state_key == member + and event_with_membership_auth.content.get("membership") + == Membership.JOIN + ) + if not is_join: + # The event must include the desired membership as an auth event, unless + # it's the first join event for a given user. + missing_members.add(member) auth_event_ids.update(event_with_membership_auth.auth_event_ids()) auth_events = await self.store.get_events(auth_event_ids) @@ -1254,7 +1270,7 @@ async def _find_missing_partial_state_memberships( auth_event.type == EventTypes.Member and auth_event.state_key == member ): - missing_members.remove(member) + missing_members.discard(member) additional_state_ids[ (EventTypes.Member, member) ] = auth_event.event_id diff --git a/synapse/handlers/typing.py b/synapse/handlers/typing.py index a4cd8b8f0cee..f95369166985 100644 --- a/synapse/handlers/typing.py +++ b/synapse/handlers/typing.py @@ -340,7 +340,7 @@ async def _recv_edu(self, origin: str, content: JsonDict) -> None: # If we're not in the room just ditch the event entirely. This is # probably an old server that has come back and thinks we're still in # the room (or we've been rejoined to the room by a state reset). - is_in_room = await self.event_auth_handler.check_host_in_room( + is_in_room = await self.event_auth_handler.is_host_in_room( room_id, self.server_name ) if not is_in_room: @@ -362,11 +362,14 @@ async def _recv_edu(self, origin: str, content: JsonDict) -> None: ) return - domains = await self._storage_controllers.state.get_current_hosts_in_room( + # Let's check that the origin server is in the room before accepting the typing + # event. We don't want to block waiting on a partial state so take an + # approximation if needed. + domains = await self._storage_controllers.state.get_current_hosts_in_room_or_partial_state_approximation( room_id ) - if self.server_name in domains: + if user.domain in domains: logger.info("Got typing update from %s: %r", user_id, content) now = self.clock.time_msec() self._member_typing_until[member] = now + FEDERATION_TIMEOUT diff --git a/synapse/handlers/ui_auth/checkers.py b/synapse/handlers/ui_auth/checkers.py index a744d68c648a..332edcca24e5 100644 --- a/synapse/handlers/ui_auth/checkers.py +++ b/synapse/handlers/ui_auth/checkers.py @@ -119,6 +119,9 @@ async def check_auth(self, authdict: dict, clientip: str) -> Any: except PartialDownloadError as pde: # Twisted is silly data = pde.response + # For mypy's benefit. A general Error.response is Optional[bytes], but + # a PartialDownloadError.response should be bytes AFAICS. + assert data is not None resp_body = json_decoder.decode(data.decode("utf-8")) if "success" in resp_body: diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 8c3c52e1caa6..3610b6bf785e 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -13,7 +13,7 @@ # limitations under the License. import logging -from typing import TYPE_CHECKING, Any, Dict, List, Optional +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple import synapse.metrics from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules, Membership @@ -379,7 +379,7 @@ async def _upsert_directory_entry_for_remote_user( user_id, event.content.get("displayname"), event.content.get("avatar_url") ) - async def _track_user_joined_room(self, room_id: str, user_id: str) -> None: + async def _track_user_joined_room(self, room_id: str, joining_user_id: str) -> None: """Someone's just joined a room. Update `users_in_public_rooms` or `users_who_share_private_rooms` as appropriate. @@ -390,32 +390,44 @@ async def _track_user_joined_room(self, room_id: str, user_id: str) -> None: room_id ) if is_public: - await self.store.add_users_in_public_rooms(room_id, (user_id,)) + await self.store.add_users_in_public_rooms(room_id, (joining_user_id,)) else: users_in_room = await self.store.get_users_in_room(room_id) other_users_in_room = [ other for other in users_in_room - if other != user_id + if other != joining_user_id and ( + # We can't apply any special rules to remote users so + # they're always included not self.is_mine_id(other) + # Check the special rules whether the local user should be + # included in the user directory or await self.store.should_include_local_user_in_dir(other) ) ] - to_insert = set() + updates_to_users_who_share_rooms: Set[Tuple[str, str]] = set() - # First, if they're our user then we need to update for every user - if self.is_mine_id(user_id): + # First, if the joining user is our local user then we need an + # update for every other user in the room. + if self.is_mine_id(joining_user_id): for other_user_id in other_users_in_room: - to_insert.add((user_id, other_user_id)) + updates_to_users_who_share_rooms.add( + (joining_user_id, other_user_id) + ) - # Next we need to update for every local user in the room + # Next, we need an update for every other local user in the room + # that they now share a room with the joining user. for other_user_id in other_users_in_room: if self.is_mine_id(other_user_id): - to_insert.add((other_user_id, user_id)) + updates_to_users_who_share_rooms.add( + (other_user_id, joining_user_id) + ) - if to_insert: - await self.store.add_users_who_share_private_room(room_id, to_insert) + if updates_to_users_who_share_rooms: + await self.store.add_users_who_share_private_room( + room_id, updates_to_users_who_share_rooms + ) async def _handle_remove_user(self, room_id: str, user_id: str) -> None: """Called when when someone leaves a room. The user may be local or remote. diff --git a/synapse/http/proxyagent.py b/synapse/http/proxyagent.py index b2a50c910507..1f8227896f65 100644 --- a/synapse/http/proxyagent.py +++ b/synapse/http/proxyagent.py @@ -36,6 +36,7 @@ from twisted.web.http_headers import Headers from twisted.web.iweb import IAgent, IBodyProducer, IPolicyForHTTPS +from synapse.http import redact_uri from synapse.http.connectproxyclient import HTTPConnectProxyEndpoint, ProxyCredentials from synapse.types import ISynapseReactor @@ -220,7 +221,11 @@ def request( self._reactor, parsed_uri.host, parsed_uri.port, **self._endpoint_kwargs ) - logger.debug("Requesting %s via %s", uri, endpoint) + logger.debug( + "Requesting %s via %s", + redact_uri(uri.decode("ascii", errors="replace")), + endpoint, + ) if parsed_uri.scheme == b"https": tls_connection_creator = self._policy_for_https.creatorForNetloc( diff --git a/synapse/http/server.py b/synapse/http/server.py index 6068a94b409a..bcbfac2c9fff 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -705,7 +705,7 @@ def stopProducing(self) -> None: self._request = None -def _encode_json_bytes(json_object: Any) -> bytes: +def _encode_json_bytes(json_object: object) -> bytes: """ Encode an object into JSON. Returns an iterator of bytes. """ @@ -746,7 +746,7 @@ def respond_with_json( return None if canonical_json: - encoder = encode_canonical_json + encoder: Callable[[object], bytes] = encode_canonical_json else: encoder = _encode_json_bytes diff --git a/synapse/logging/context.py b/synapse/logging/context.py index a488c27d21c4..e2f4a1c80ba5 100644 --- a/synapse/logging/context.py +++ b/synapse/logging/context.py @@ -587,7 +587,7 @@ def filter(self, record: logging.LogRecord) -> Literal[True]: True to include the record in the log output. """ context = current_context() - record.request = self._default_request # type: ignore + record.request = self._default_request # context should never be None, but if it somehow ends up being, then # we end up in a death spiral of infinite loops, so let's check, for @@ -595,22 +595,22 @@ def filter(self, record: logging.LogRecord) -> Literal[True]: if context is not None: # Logging is interested in the request ID. Note that for backwards # compatibility this is stored as the "request" on the record. - record.request = str(context) # type: ignore + record.request = str(context) # Add some data from the HTTP request. request = context.request if request is None: return True - record.ip_address = request.ip_address # type: ignore - record.site_tag = request.site_tag # type: ignore - record.requester = request.requester # type: ignore - record.authenticated_entity = request.authenticated_entity # type: ignore - record.method = request.method # type: ignore - record.url = request.url # type: ignore - record.protocol = request.protocol # type: ignore - record.user_agent = request.user_agent # type: ignore - record.beeper_bridge = request.beeper_bridge # type: ignore + record.ip_address = request.ip_address + record.site_tag = request.site_tag + record.requester = request.requester + record.authenticated_entity = request.authenticated_entity + record.method = request.method + record.url = request.url + record.protocol = request.protocol + record.user_agent = request.user_agent + record.beeper_bridge = request.beeper_bridge return True diff --git a/synapse/logging/opentracing.py b/synapse/logging/opentracing.py index ca2735dd6dc0..8ce5a2a33818 100644 --- a/synapse/logging/opentracing.py +++ b/synapse/logging/opentracing.py @@ -992,9 +992,9 @@ def _wrapping_logic( # FIXME: We could update this to handle any type of function by ignoring the # first argument only if it's named `self` or `cls`. This isn't fool-proof # but handles the idiomatic cases. - for i, arg in enumerate(args[1:], start=1): # type: ignore[index] + for i, arg in enumerate(args[1:], start=1): set_tag(SynapseTags.FUNC_ARG_PREFIX + argspec.args[i], str(arg)) - set_tag(SynapseTags.FUNC_ARGS, str(args[len(argspec.args) :])) # type: ignore[index] + set_tag(SynapseTags.FUNC_ARGS, str(args[len(argspec.args) :])) set_tag(SynapseTags.FUNC_KWARGS, str(kwargs)) yield diff --git a/synapse/module_api/__init__.py b/synapse/module_api/__init__.py index 87ba154cb737..6a6ae208d157 100644 --- a/synapse/module_api/__init__.py +++ b/synapse/module_api/__init__.py @@ -125,7 +125,7 @@ ) from synapse.util import Clock from synapse.util.async_helpers import maybe_awaitable -from synapse.util.caches.descriptors import cached +from synapse.util.caches.descriptors import CachedFunction, cached from synapse.util.frozenutils import freeze if TYPE_CHECKING: @@ -748,6 +748,40 @@ def record_user_external_id( ) ) + async def create_login_token( + self, + user_id: str, + duration_in_ms: int = (2 * 60 * 1000), + auth_provider_id: Optional[str] = None, + auth_provider_session_id: Optional[str] = None, + ) -> str: + """Create a login token suitable for m.login.token authentication + + Added in Synapse v1.69.0. + + Args: + user_id: gives the ID of the user that the token is for + + duration_in_ms: the time that the token will be valid for + + auth_provider_id: the ID of the SSO IdP that the user used to authenticate + to get this token, if any. This is encoded in the token so that + /login can report stats on number of successful logins by IdP. + + auth_provider_session_id: The session ID got during login from the SSO IdP, + if any. + """ + # The deprecated `generate_short_term_login_token` method defaulted to an empty + # string for the `auth_provider_id` because of how the underlying macaroon was + # generated. This will change to a proper NULL-able field when the tokens get + # moved to the database. + return self._hs.get_macaroon_generator().generate_short_term_login_token( + user_id, + auth_provider_id or "", + auth_provider_session_id, + duration_in_ms, + ) + def generate_short_term_login_token( self, user_id: str, @@ -759,6 +793,9 @@ def generate_short_term_login_token( Added in Synapse v1.9.0. + This was deprecated in Synapse v1.69.0 in favor of create_login_token, and will + be removed in Synapse 1.71.0. + Args: user_id: gives the ID of the user that the token is for @@ -768,6 +805,11 @@ def generate_short_term_login_token( to get this token, if any. This is encoded in the token so that /login can report stats on number of successful logins by IdP. """ + logger.warn( + "A module configured on this server uses ModuleApi.generate_short_term_login_token(), " + "which is deprecated in favor of ModuleApi.create_login_token(), and will be removed in " + "Synapse 1.71.0", + ) return self._hs.get_macaroon_generator().generate_short_term_login_token( user_id, auth_provider_id, @@ -836,29 +878,39 @@ def run_db_interaction( self._store.db_pool.runInteraction(desc, func, *args, **kwargs) # type: ignore[arg-type] ) - def complete_sso_login( - self, registered_user_id: str, request: SynapseRequest, client_redirect_url: str - ) -> None: - """Complete a SSO login by redirecting the user to a page to confirm whether they - want their access token sent to `client_redirect_url`, or redirect them to that - URL with a token directly if the URL matches with one of the whitelisted clients. + def register_cached_function(self, cached_func: CachedFunction) -> None: + """Register a cached function that should be invalidated across workers. + Invalidation local to a worker can be done directly using `cached_func.invalidate`, + however invalidation that needs to go to other workers needs to call `invalidate_cache` + on the module API instead. - This is deprecated in favor of complete_sso_login_async. - - Added in Synapse v1.11.1. + Added in Synapse v1.69.0. Args: - registered_user_id: The MXID that has been registered as a previous step of - of this SSO login. - request: The request to respond to. - client_redirect_url: The URL to which to offer to redirect the user (or to - redirect them directly if whitelisted). + cached_function: The cached function that will be registered to receive invalidation + locally and from other workers. """ - self._auth_handler._complete_sso_login( - registered_user_id, - "", - request, - client_redirect_url, + self._store.register_external_cached_function( + f"{cached_func.__module__}.{cached_func.__name__}", cached_func + ) + + async def invalidate_cache( + self, cached_func: CachedFunction, keys: Tuple[Any, ...] + ) -> None: + """Invalidate a cache entry of a cached function across workers. The cached function + needs to be registered on all workers first with `register_cached_function`. + + Added in Synapse v1.69.0. + + Args: + cached_function: The cached function that needs an invalidation + keys: keys of the entry to invalidate, usually matching the arguments of the + cached function. + """ + cached_func.invalidate(keys) + await self._store.send_invalidation_to_replication( + f"{cached_func.__module__}.{cached_func.__name__}", + keys, ) async def complete_sso_login_async( diff --git a/synapse/push/__init__.py b/synapse/push/__init__.py index 57c4d70466b6..a0c760239db8 100644 --- a/synapse/push/__init__.py +++ b/synapse/push/__init__.py @@ -116,6 +116,8 @@ class PusherConfig: last_stream_ordering: int last_success: Optional[int] failing_since: Optional[int] + enabled: bool + device_id: Optional[str] def as_dict(self) -> Dict[str, Any]: """Information that can be retrieved about a pusher after creation.""" @@ -128,6 +130,8 @@ def as_dict(self) -> Dict[str, Any]: "lang": self.lang, "profile_tag": self.profile_tag, "pushkey": self.pushkey, + "enabled": self.enabled, + "device_id": self.device_id, } diff --git a/synapse/push/baserules.py b/synapse/push/baserules.py deleted file mode 100644 index 158974e61baf..000000000000 --- a/synapse/push/baserules.py +++ /dev/null @@ -1,701 +0,0 @@ -# Copyright 2015, 2016 OpenMarket Ltd -# Copyright 2017 New Vector Ltd -# Copyright 2019 The Matrix.org Foundation C.I.C. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -""" -Push rules is the system used to determine which events trigger a push (and a -bump in notification counts). - -This consists of a list of "push rules" for each user, where a push rule is a -pair of "conditions" and "actions". When a user receives an event Synapse -iterates over the list of push rules until it finds one where all the conditions -match the event, at which point "actions" describe the outcome (e.g. notify, -highlight, etc). - -Push rules are split up into 5 different "kinds" (aka "priority classes"), which -are run in order: - 1. Override — highest priority rules, e.g. always ignore notices - 2. Content — content specific rules, e.g. @ notifications - 3. Room — per room rules, e.g. enable/disable notifications for all messages - in a room - 4. Sender — per sender rules, e.g. never notify for messages from a given - user - 5. Underride — the lowest priority "default" rules, e.g. notify for every - message. - -The set of "base rules" are the list of rules that every user has by default. A -user can modify their copy of the push rules in one of three ways: - - 1. Adding a new push rule of a certain kind - 2. Changing the actions of a base rule - 3. Enabling/disabling a base rule. - -The base rules are split into whether they come before or after a particular -kind, so the order of push rule evaluation would be: base rules for before -"override" kind, user defined "override" rules, base rules after "override" -kind, etc, etc. -""" - -import itertools -import logging -from typing import Dict, Iterator, List, Mapping, Sequence, Tuple, Union - -import attr - -from synapse.config.experimental import ExperimentalConfig -from synapse.push.rulekinds import PRIORITY_CLASS_MAP - -logger = logging.getLogger(__name__) - - -@attr.s(auto_attribs=True, slots=True, frozen=True) -class PushRule: - """A push rule - - Attributes: - rule_id: a unique ID for this rule - priority_class: what "kind" of push rule this is (see - `PRIORITY_CLASS_MAP` for mapping between int and kind) - conditions: the sequence of conditions that all need to match - actions: the actions to apply if all conditions are met - default: is this a base rule? - default_enabled: is this enabled by default? - """ - - rule_id: str - priority_class: int - conditions: Sequence[Mapping[str, str]] - actions: Sequence[Union[str, Mapping]] - default: bool = False - default_enabled: bool = True - - -@attr.s(auto_attribs=True, slots=True, frozen=True, weakref_slot=False) -class PushRules: - """A collection of push rules for an account. - - Can be iterated over, producing push rules in priority order. - """ - - # A mapping from rule ID to push rule that overrides a base rule. These will - # be returned instead of the base rule. - overriden_base_rules: Dict[str, PushRule] = attr.Factory(dict) - - # The following stores the custom push rules at each priority class. - # - # We keep these separate (rather than combining into one big list) to avoid - # copying the base rules around all the time. - override: List[PushRule] = attr.Factory(list) - content: List[PushRule] = attr.Factory(list) - room: List[PushRule] = attr.Factory(list) - sender: List[PushRule] = attr.Factory(list) - underride: List[PushRule] = attr.Factory(list) - - def __iter__(self) -> Iterator[PushRule]: - # When iterating over the push rules we need to return the base rules - # interspersed at the correct spots. - for rule in itertools.chain( - BASE_PREPEND_OVERRIDE_RULES, - self.override, - BASE_APPEND_OVERRIDE_RULES, - self.content, - BASE_APPEND_CONTENT_RULES, - self.room, - self.sender, - self.underride, - BASE_APPEND_UNDERRIDE_RULES, - ): - # Check if a base rule has been overriden by a custom rule. If so - # return that instead. - override_rule = self.overriden_base_rules.get(rule.rule_id) - if override_rule: - yield override_rule - else: - yield rule - - def __len__(self) -> int: - # The length is mostly used by caches to get a sense of "size" / amount - # of memory this object is using, so we only count the number of custom - # rules. - return ( - len(self.overriden_base_rules) - + len(self.override) - + len(self.content) - + len(self.room) - + len(self.sender) - + len(self.underride) - ) - - -@attr.s(auto_attribs=True, slots=True, frozen=True, weakref_slot=False) -class FilteredPushRules: - """A wrapper around `PushRules` that filters out disabled experimental push - rules, and includes the "enabled" state for each rule when iterated over. - """ - - push_rules: PushRules - enabled_map: Dict[str, bool] - experimental_config: ExperimentalConfig - - def __iter__(self) -> Iterator[Tuple[PushRule, bool]]: - for rule in self.push_rules: - if not _is_experimental_rule_enabled( - rule.rule_id, self.experimental_config - ): - continue - - enabled = self.enabled_map.get(rule.rule_id, rule.default_enabled) - - yield rule, enabled - - def __len__(self) -> int: - return len(self.push_rules) - - -DEFAULT_EMPTY_PUSH_RULES = PushRules() - - -def compile_push_rules(rawrules: List[PushRule]) -> PushRules: - """Given a set of custom push rules return a `PushRules` instance (which - includes the base rules). - """ - - if not rawrules: - # Fast path to avoid allocating empty lists when there are no custom - # rules for the user. - return DEFAULT_EMPTY_PUSH_RULES - - rules = PushRules() - - for rule in rawrules: - # We need to decide which bucket each custom push rule goes into. - - # If it has the same ID as a base rule then it overrides that... - overriden_base_rule = BASE_RULES_BY_ID.get(rule.rule_id) - if overriden_base_rule: - rules.overriden_base_rules[rule.rule_id] = attr.evolve( - overriden_base_rule, actions=rule.actions - ) - continue - - # ... otherwise it gets added to the appropriate priority class bucket - collection: List[PushRule] - if rule.priority_class == 5: - collection = rules.override - elif rule.priority_class == 4: - collection = rules.content - elif rule.priority_class == 3: - collection = rules.room - elif rule.priority_class == 2: - collection = rules.sender - elif rule.priority_class == 1: - collection = rules.underride - elif rule.priority_class <= 0: - logger.info( - "Got rule with priority class less than zero, but doesn't override a base rule: %s", - rule, - ) - continue - else: - # We log and continue here so as not to break event sending - logger.error("Unknown priority class: %", rule.priority_class) - continue - - collection.append(rule) - - return rules - - -def _is_experimental_rule_enabled( - rule_id: str, experimental_config: ExperimentalConfig -) -> bool: - """Used by `FilteredPushRules` to filter out experimental rules when they - have not been enabled. - """ - if ( - rule_id == "global/override/.org.matrix.msc3786.rule.room.server_acl" - and not experimental_config.msc3786_enabled - ): - return False - if ( - rule_id == "global/underride/.org.matrix.msc3772.thread_reply" - and not experimental_config.msc3772_enabled - ): - return False - return True - - -BASE_APPEND_CONTENT_RULES = [ - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["content"], - rule_id="global/content/.m.rule.contains_user_name", - conditions=[ - { - "kind": "event_match", - "key": "content.body", - # Match the localpart of the requester's MXID. - "pattern_type": "user_localpart", - } - ], - actions=[ - "notify", - {"set_tweak": "sound", "value": "default"}, - {"set_tweak": "highlight"}, - ], - ) -] - - -BASE_PREPEND_OVERRIDE_RULES = [ - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["override"], - rule_id="global/override/.m.rule.master", - default_enabled=False, - conditions=[], - actions=["dont_notify"], - ) -] - - -BASE_APPEND_OVERRIDE_RULES = [ - # Disable notifications for auto-accepted room invites - # NOTE: this rule must be a higher prio than .m.rule.invite_for_me because - # that will also match the same events. - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["override"], - rule_id="global/override/.com.beeper.suppress_auto_invite", - conditions=[ - { # member event - "kind": "event_match", - "key": "type", - "pattern": "m.room.member", - "_cache_key": "_member", - }, - { # with membership = invite - "kind": "event_match", - "key": "content.membership", - "pattern": "invite", - "_cache_key": "_invite_member", - }, - { # matching the current user ID - "kind": "event_match", - "key": "state_key", - "pattern_type": "user_id", - }, - { # with the will_auto_accept field - "kind": "event_match", - "key": "content.fi.mau.will_auto_accept", - "pattern": "true", - "_cache_key": "_will_auto_accept", - }, - ], - actions=["dont_notify"], - ), - # We don't want to notify on edits in Beeper land. Not only can this be confusing in real time (2 notifications, - # one message) but it's also especially confusing when a bridge needs to edit a previously backfilled message. - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["override"], - rule_id="global/override/.com.beeper.suppress_edits", - conditions=[ - { - "kind": "event_match", - "key": "content.m.relates_to.rel_type", - "pattern": "m.replace", - "_cache_key": "_m_relates_to_rel_type", - }, - ], - actions=["dont_notify"], - ), - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["override"], - rule_id="global/override/.m.rule.suppress_send_message_status", - conditions=[ - { - "kind": "event_match", - "key": "type", - "pattern": "com.beeper.message_send_status", - "_cache_key": "_suppress_send_message_status", - } - ], - actions=["dont_notify"], - ), - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["override"], - rule_id="global/override/.m.rule.suppress_power_levels", - conditions=[ - { - "kind": "event_match", - "key": "type", - "pattern": "m.room.power_levels", - "_cache_key": "_suppress_power_levels", - } - ], - actions=["dont_notify"], - ), - # NB. .m.rule.invite_for_me must be higher prio than .m.rule.member_event - # otherwise invites will be matched by .m.rule.member_event - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["override"], - rule_id="global/override/.m.rule.invite_for_me", - conditions=[ - { - "kind": "event_match", - "key": "type", - "pattern": "m.room.member", - "_cache_key": "_member", - }, - { - "kind": "event_match", - "key": "content.membership", - "pattern": "invite", - "_cache_key": "_invite_member", - }, - # Match the requester's MXID. - {"kind": "event_match", "key": "state_key", "pattern_type": "user_id"}, - ], - actions=[ - "notify", - {"set_tweak": "sound", "value": "default"}, - {"set_tweak": "highlight", "value": False}, - ], - ), - # Will we sometimes want to know about people joining and leaving? - # Perhaps: if so, this could be expanded upon. Seems the most usual case - # is that we don't though. We add this override rule so that even if - # the room rule is set to notify, we don't get notifications about - # join/leave/avatar/displayname events. - # See also: https://matrix.org/jira/browse/SYN-607 - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["override"], - rule_id="global/override/.m.rule.member_event", - conditions=[ - { - "kind": "event_match", - "key": "type", - "pattern": "m.room.member", - "_cache_key": "_member", - } - ], - actions=["dont_notify"], - ), - # This was changed from underride to override so it's closer in priority - # to the content rules where the user name highlight rule lives. This - # way a room rule is lower priority than both but a custom override rule - # is higher priority than both. - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["override"], - rule_id="global/override/.m.rule.contains_display_name", - conditions=[{"kind": "contains_display_name"}], - actions=[ - "notify", - {"set_tweak": "sound", "value": "default"}, - {"set_tweak": "highlight"}, - ], - ), - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["override"], - rule_id="global/override/.m.rule.roomnotif", - conditions=[ - { - "kind": "event_match", - "key": "content.body", - "pattern": "@room", - "_cache_key": "_roomnotif_content", - }, - { - "kind": "sender_notification_permission", - "key": "room", - "_cache_key": "_roomnotif_pl", - }, - ], - actions=["notify", {"set_tweak": "highlight", "value": True}], - ), - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["override"], - rule_id="global/override/.m.rule.tombstone", - conditions=[ - { - "kind": "event_match", - "key": "type", - "pattern": "m.room.tombstone", - "_cache_key": "_tombstone", - }, - { - "kind": "event_match", - "key": "state_key", - "pattern": "", - "_cache_key": "_tombstone_statekey", - }, - ], - actions=["notify", {"set_tweak": "highlight", "value": True}], - ), - # NOTE: upstream has a blanket rule that blocks all notifications for reactions, - # this is a modified rule that blocks reactions to *other users* events. This means - # any user supplied "noisy" rules don't accidentally trigger notifications for reactions - # to other users messages. - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["override"], - rule_id="global/override/.com.beeper.reaction", - conditions=[ - { - "kind": "event_match", - "key": "type", - "pattern": "m.reaction", - "_cache_key": "_reaction", - }, - # Don't notify if this reaction is to someone elses event - { - "kind": "inverse_related_event_match", - "key": "sender", - "pattern_type": "user_id", - }, - ], - actions=["dont_notify"], - ), - # XXX: This is an experimental rule that is only enabled if msc3786_enabled - # is enabled, if it is not the rule gets filtered out in _load_rules() in - # PushRulesWorkerStore - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["override"], - rule_id="global/override/.org.matrix.msc3786.rule.room.server_acl", - conditions=[ - { - "kind": "event_match", - "key": "type", - "pattern": "m.room.server_acl", - "_cache_key": "_room_server_acl", - }, - { - "kind": "event_match", - "key": "state_key", - "pattern": "", - "_cache_key": "_room_server_acl_state_key", - }, - ], - actions=[], - ), -] - - -BASE_APPEND_UNDERRIDE_RULES = [ - # Beeper change: this rule is moved down from override. This means room - # rules take precedence, so if you enable bot notifications (by modifying - # this rule) notifications will not be sent for muted rooms. - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["underride"], - rule_id="global/underride/.m.rule.suppress_notices", - conditions=[ - { - "kind": "event_match", - "key": "content.msgtype", - "pattern": "m.notice", - "_cache_key": "_suppress_notices", - } - ], - actions=["dont_notify"], - ), - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["underride"], - rule_id="global/underride/.m.rule.call", - conditions=[ - { - "kind": "event_match", - "key": "type", - "pattern": "m.call.invite", - "_cache_key": "_call", - } - ], - actions=[ - "notify", - {"set_tweak": "sound", "value": "ring"}, - {"set_tweak": "highlight", "value": False}, - ], - ), - # XXX: once m.direct is standardised everywhere, we should use it to detect - # a DM from the user's perspective rather than this heuristic. - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["underride"], - rule_id="global/underride/.m.rule.room_one_to_one", - conditions=[ - {"kind": "room_member_count", "is": "2", "_cache_key": "member_count"}, - { - "kind": "event_match", - "key": "type", - "pattern": "m.room.message", - "_cache_key": "_message", - }, - ], - actions=[ - "notify", - {"set_tweak": "sound", "value": "default"}, - {"set_tweak": "highlight", "value": False}, - ], - ), - # XXX: this is going to fire for events which aren't m.room.messages - # but are encrypted (e.g. m.call.*)... - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["underride"], - rule_id="global/underride/.m.rule.encrypted_room_one_to_one", - conditions=[ - {"kind": "room_member_count", "is": "2", "_cache_key": "member_count"}, - { - "kind": "event_match", - "key": "type", - "pattern": "m.room.encrypted", - "_cache_key": "_encrypted", - }, - ], - actions=[ - "notify", - {"set_tweak": "sound", "value": "default"}, - {"set_tweak": "highlight", "value": False}, - ], - ), - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["underride"], - rule_id="global/underride/.org.matrix.msc3772.thread_reply", - conditions=[ - { - "kind": "org.matrix.msc3772.relation_match", - "rel_type": "m.thread", - # Match the requester's MXID. - "sender_type": "user_id", - } - ], - actions=["notify", {"set_tweak": "highlight", "value": False}], - ), - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["underride"], - rule_id="global/underride/.m.rule.message", - conditions=[ - { - "kind": "event_match", - "key": "type", - "pattern": "m.room.message", - "_cache_key": "_message", - } - ], - actions=["notify", {"set_tweak": "highlight", "value": False}], - ), - # XXX: this is going to fire for events which aren't m.room.messages - # but are encrypted (e.g. m.call.*)... - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["underride"], - rule_id="global/underride/.m.rule.encrypted", - conditions=[ - { - "kind": "event_match", - "key": "type", - "pattern": "m.room.encrypted", - "_cache_key": "_encrypted", - } - ], - actions=["notify", {"set_tweak": "highlight", "value": False}], - ), - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["underride"], - rule_id="global/underride/.im.vector.jitsi", - conditions=[ - { - "kind": "event_match", - "key": "type", - "pattern": "im.vector.modular.widgets", - "_cache_key": "_type_modular_widgets", - }, - { - "kind": "event_match", - "key": "content.type", - "pattern": "jitsi", - "_cache_key": "_content_type_jitsi", - }, - { - "kind": "event_match", - "key": "state_key", - "pattern": "*", - "_cache_key": "_is_state_event", - }, - ], - actions=["notify", {"set_tweak": "highlight", "value": False}], - ), - # Enable notifications for reactions - PushRule( - default=True, - priority_class=PRIORITY_CLASS_MAP["underride"], - rule_id="global/underride/.com.beeper.reaction", - conditions=[ - { - "kind": "event_match", - "key": "type", - "pattern": "m.reaction", - "_cache_key": "_reaction", - }, - # Only send reaction notifications for smaller rooms (under 20 members) - { - "kind": "room_member_count", - "is": "<20", - "_cache_key": "_member_count", - }, - # Only send notification if the reaction is to your message - { - "kind": "related_event_match", - "key": "sender", - "pattern_type": "user_id", - }, - ], - actions=["notify", {"set_tweak": "highlight", "value": False}], - ), -] - - -BASE_RULE_IDS = set() - -BASE_RULES_BY_ID: Dict[str, PushRule] = {} - -for r in BASE_APPEND_CONTENT_RULES: - BASE_RULE_IDS.add(r.rule_id) - BASE_RULES_BY_ID[r.rule_id] = r - -for r in BASE_PREPEND_OVERRIDE_RULES: - BASE_RULE_IDS.add(r.rule_id) - BASE_RULES_BY_ID[r.rule_id] = r - -for r in BASE_APPEND_OVERRIDE_RULES: - BASE_RULE_IDS.add(r.rule_id) - BASE_RULES_BY_ID[r.rule_id] = r - -for r in BASE_APPEND_UNDERRIDE_RULES: - BASE_RULE_IDS.add(r.rule_id) - BASE_RULES_BY_ID[r.rule_id] = r diff --git a/synapse/push/bulk_push_rule_evaluator.py b/synapse/push/bulk_push_rule_evaluator.py index 07c862ee1583..031cbd10f2c9 100644 --- a/synapse/push/bulk_push_rule_evaluator.py +++ b/synapse/push/bulk_push_rule_evaluator.py @@ -17,6 +17,7 @@ import logging from typing import ( TYPE_CHECKING, + Any, Collection, Dict, Iterable, @@ -35,13 +36,11 @@ from synapse.state import POWER_KEY from synapse.storage.databases.main.roommember import EventIdMembership from synapse.storage.state import StateFilter +from synapse.synapse_rust.push import FilteredPushRules, PushRule, PushRuleEvaluator from synapse.util.caches import register_cache from synapse.util.metrics import measure_func from synapse.visibility import filter_event_for_clients_with_state -from .baserules import FilteredPushRules, PushRule -from .push_rule_evaluator import PushRuleEvaluatorForEvent - if TYPE_CHECKING: from synapse.server import HomeServer @@ -154,7 +153,11 @@ async def _get_rules_for_event( async def _get_power_levels_and_sender_level( self, event: EventBase, context: EventContext - ) -> Tuple[dict, int]: + ) -> Tuple[dict, Optional[int]]: + # There are no power levels and sender levels possible to get from outlier + if event.internal_metadata.is_outlier(): + return {}, None + event_types = auth_types_for_event(event.room_version, event) prev_state_ids = await context.get_prev_state_ids( StateFilter.from_types(event_types) @@ -231,8 +234,8 @@ async def action_for_event_by_user( should increment the unread count, and insert the results into the event_push_actions_staging table. """ - if event.internal_metadata.is_outlier(): - # This can happen due to out of band memberships + if not event.internal_metadata.is_notifiable(): + # Push rules for events that aren't notifiable can't be processed by this return rules_by_user = await self._get_rules_for_event(event) @@ -262,16 +265,24 @@ async def action_for_event_by_user( thread_id = "main" if relation: relations = await self._get_mutual_relations( - relation.parent_id, itertools.chain(*rules_by_user.values()) + relation.parent_id, + itertools.chain(*(r.rules() for r in rules_by_user.values())), ) if relation.rel_type == RelationTypes.THREAD: thread_id = relation.parent_id - evaluator = PushRuleEvaluatorForEvent( - event, + # It's possible that old room versions have non-integer power levels (floats or + # strings). Workaround this by explicitly converting to int. + notification_levels = power_levels.get("notifications", {}) + if not event.room_version.msc3667_int_only_power_levels: + for user_id, level in notification_levels.items(): + notification_levels[user_id] = int(level) + + evaluator = PushRuleEvaluator( + _flatten_dict(event), non_bot_room_member_count, sender_power_level, - power_levels, + notification_levels, related_event, relations, self._relations_match_enabled, @@ -282,20 +293,10 @@ async def action_for_event_by_user( event.room_id, users ) - # This is a check for the case where user joins a room without being - # allowed to see history, and then the server receives a delayed event - # from before the user joined, which they should not be pushed for - uids_with_visibility = await filter_event_for_clients_with_state( - self.store, users, event, context - ) - for uid, rules in rules_by_user.items(): if event.sender == uid: continue - if uid not in uids_with_visibility: - continue - display_name = None profile = profiles.get(uid) if profile: @@ -322,22 +323,35 @@ async def action_for_event_by_user( # current user, it'll be added to the dict later. actions_by_user[uid] = [] - for rule, enabled in rules: - if not enabled: - continue + actions = evaluator.run(rules, uid, display_name) + if "notify" in actions: + # Push rules say we should notify the user of this event + actions_by_user[uid] = actions + logger.info( + "Staging notify action to userID=%s for eventID=%s", + uid, + event.event_id, + ) + + # If there aren't any actions then we can skip the rest of the + # processing. + if not actions_by_user: + return + + # This is a check for the case where user joins a room without being + # allowed to see history, and then the server receives a delayed event + # from before the user joined, which they should not be pushed for + # + # We do this *after* calculating the push actions as a) its unlikely + # that we'll filter anyone out and b) for large rooms its likely that + # most users will have push disabled and so the set of users to check is + # much smaller. + uids_with_visibility = await filter_event_for_clients_with_state( + self.store, actions_by_user.keys(), event, context + ) - matches = evaluator.check_conditions(rule.conditions, uid, display_name) - if matches: - actions = [x for x in rule.actions if x != "dont_notify"] - if actions and "notify" in actions: - # Push rules say we should notify the user of this event - actions_by_user[uid] = actions - logger.info( - "Staging notify action to userID=%s for eventID=%s", - uid, - event.event_id, - ) - break + for user_id in set(actions_by_user).difference(uids_with_visibility): + actions_by_user.pop(user_id, None) # Mark in the DB staging area the push actions for users who should be # notified for this event. (This will then get handled when we persist @@ -354,3 +368,21 @@ async def action_for_event_by_user( Rule = Dict[str, dict] RulesByUser = Dict[str, List[Rule]] StateGroup = Union[object, int] + + +def _flatten_dict( + d: Union[EventBase, Mapping[str, Any]], + prefix: Optional[List[str]] = None, + result: Optional[Dict[str, str]] = None, +) -> Dict[str, str]: + if prefix is None: + prefix = [] + if result is None: + result = {} + for key, value in d.items(): + if isinstance(value, str): + result[".".join(prefix + [key])] = value.lower() + elif isinstance(value, Mapping): + _flatten_dict(value, prefix=(prefix + [key]), result=result) + + return result diff --git a/synapse/push/clientformat.py b/synapse/push/clientformat.py index 1662acfe1a38..7095ae83f931 100644 --- a/synapse/push/clientformat.py +++ b/synapse/push/clientformat.py @@ -16,10 +16,9 @@ from typing import Any, Dict, List, Optional from synapse.push.rulekinds import PRIORITY_CLASS_INVERSE_MAP, PRIORITY_CLASS_MAP +from synapse.synapse_rust.push import FilteredPushRules, PushRule from synapse.types import UserID -from .baserules import FilteredPushRules, PushRule - def format_push_rules_for_user( user: UserID, ruleslist: FilteredPushRules @@ -34,7 +33,7 @@ def format_push_rules_for_user( rules["global"] = _add_empty_priority_class_arrays(rules["global"]) - for r, enabled in ruleslist: + for r, enabled in ruleslist.rules(): template_name = _priority_class_to_template_name(r.priority_class) rulearray = rules["global"][template_name] diff --git a/synapse/push/httppusher.py b/synapse/push/httppusher.py index 4013e0bc4ee5..38f013c326e9 100644 --- a/synapse/push/httppusher.py +++ b/synapse/push/httppusher.py @@ -14,7 +14,7 @@ # limitations under the License. import logging import urllib.parse -from typing import TYPE_CHECKING, Any, Dict, Iterable, Optional, Union +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Union from prometheus_client import Counter @@ -28,7 +28,7 @@ from synapse.push import Pusher, PusherConfig, PusherConfigException from synapse.storage.databases.main.event_push_actions import HttpPushAction -from . import push_rule_evaluator, push_tools +from . import push_tools if TYPE_CHECKING: from synapse.server import HomeServer @@ -56,6 +56,39 @@ ) +def tweaks_for_actions(actions: List[Union[str, Dict]]) -> Dict[str, Any]: + """ + Converts a list of actions into a `tweaks` dict (which can then be passed to + the push gateway). + + This function ignores all actions other than `set_tweak` actions, and treats + absent `value`s as `True`, which agrees with the only spec-defined treatment + of absent `value`s (namely, for `highlight` tweaks). + + Args: + actions: list of actions + e.g. [ + {"set_tweak": "a", "value": "AAA"}, + {"set_tweak": "b", "value": "BBB"}, + {"set_tweak": "highlight"}, + "notify" + ] + + Returns: + dictionary of tweaks for those actions + e.g. {"a": "AAA", "b": "BBB", "highlight": True} + """ + tweaks = {} + for a in actions: + if not isinstance(a, dict): + continue + if "set_tweak" in a: + # value is allowed to be absent in which case the value assumed + # should be True. + tweaks[a["set_tweak"]] = a.get("value", True) + return tweaks + + class HttpPusher(Pusher): INITIAL_BACKOFF_SEC = 1 # in seconds because that's what Twisted takes MAX_BACKOFF_SEC = 60 * 60 @@ -291,7 +324,7 @@ async def _process_one(self, push_action: HttpPushAction) -> bool: if "notify" not in push_action.actions: return True - tweaks = push_rule_evaluator.tweaks_for_actions(push_action.actions) + tweaks = tweaks_for_actions(push_action.actions) badge = await push_tools.get_badge_count( self.hs.get_datastores().main, self.user_id, diff --git a/synapse/push/push_rule_evaluator.py b/synapse/push/push_rule_evaluator.py deleted file mode 100644 index 92bf7680bb9b..000000000000 --- a/synapse/push/push_rule_evaluator.py +++ /dev/null @@ -1,387 +0,0 @@ -# Copyright 2015, 2016 OpenMarket Ltd -# Copyright 2017 New Vector Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import logging -import re -from typing import ( - Any, - Dict, - List, - Mapping, - Optional, - Pattern, - Sequence, - Set, - Tuple, - Union, -) - -from matrix_common.regex import glob_to_regex, to_word_pattern - -from synapse.events import EventBase -from synapse.types import UserID -from synapse.util.caches.lrucache import LruCache - -logger = logging.getLogger(__name__) - - -GLOB_REGEX = re.compile(r"\\\[(\\\!|)(.*)\\\]") -IS_GLOB = re.compile(r"[\?\*\[\]]") -INEQUALITY_EXPR = re.compile("^([=<>]*)([0-9]*)$") - - -def _room_member_count( - ev: EventBase, condition: Mapping[str, Any], room_member_count: int -) -> bool: - return _test_ineq_condition(condition, room_member_count) - - -def _sender_notification_permission( - ev: EventBase, - condition: Mapping[str, Any], - sender_power_level: int, - power_levels: Dict[str, Union[int, Dict[str, int]]], -) -> bool: - notif_level_key = condition.get("key") - if notif_level_key is None: - return False - - notif_levels = power_levels.get("notifications", {}) - assert isinstance(notif_levels, dict) - room_notif_level = notif_levels.get(notif_level_key, 50) - - return sender_power_level >= room_notif_level - - -def _test_ineq_condition(condition: Mapping[str, Any], number: int) -> bool: - if "is" not in condition: - return False - m = INEQUALITY_EXPR.match(condition["is"]) - if not m: - return False - ineq = m.group(1) - rhs = m.group(2) - if not rhs.isdigit(): - return False - rhs_int = int(rhs) - - if ineq == "" or ineq == "==": - return number == rhs_int - elif ineq == "<": - return number < rhs_int - elif ineq == ">": - return number > rhs_int - elif ineq == ">=": - return number >= rhs_int - elif ineq == "<=": - return number <= rhs_int - else: - return False - - -def tweaks_for_actions(actions: List[Union[str, Dict]]) -> Dict[str, Any]: - """ - Converts a list of actions into a `tweaks` dict (which can then be passed to - the push gateway). - - This function ignores all actions other than `set_tweak` actions, and treats - absent `value`s as `True`, which agrees with the only spec-defined treatment - of absent `value`s (namely, for `highlight` tweaks). - - Args: - actions: list of actions - e.g. [ - {"set_tweak": "a", "value": "AAA"}, - {"set_tweak": "b", "value": "BBB"}, - {"set_tweak": "highlight"}, - "notify" - ] - - Returns: - dictionary of tweaks for those actions - e.g. {"a": "AAA", "b": "BBB", "highlight": True} - """ - tweaks = {} - for a in actions: - if not isinstance(a, dict): - continue - if "set_tweak" in a: - # value is allowed to be absent in which case the value assumed - # should be True. - tweaks[a["set_tweak"]] = a.get("value", True) - return tweaks - - -class PushRuleEvaluatorForEvent: - def __init__( - self, - event: EventBase, - room_member_count: int, - sender_power_level: int, - power_levels: Dict[str, Union[int, Dict[str, int]]], - related_event: Optional[EventBase], - relations: Dict[str, Set[Tuple[str, str]]], - relations_match_enabled: bool, - ): - self._event = event - self._room_member_count = room_member_count - self._sender_power_level = sender_power_level - self._power_levels = power_levels - self._relations = relations - self._relations_match_enabled = relations_match_enabled - - # Maps strings of e.g. 'content.body' -> event["content"]["body"] - self._value_cache = _flatten_dict(event) - - self._related_event = related_event - self._related_event_value_cache = ( - _flatten_dict(related_event) if related_event else {} - ) - - # Maps cache keys to final values. - self._condition_cache: Dict[str, bool] = {} - - def check_conditions( - self, conditions: Sequence[Mapping], uid: str, display_name: Optional[str] - ) -> bool: - """ - Returns true if a user's conditions/user ID/display name match the event. - - Args: - conditions: The user's conditions to match. - uid: The user's MXID. - display_name: The display name. - - Returns: - True if all conditions match the event, False otherwise. - """ - for cond in conditions: - _cache_key = cond.get("_cache_key", None) - if _cache_key: - res = self._condition_cache.get(_cache_key, None) - if res is False: - return False - elif res is True: - continue - - res = self.matches(cond, uid, display_name) - if _cache_key: - self._condition_cache[_cache_key] = bool(res) - - if not res: - return False - - return True - - def matches( - self, condition: Mapping[str, Any], user_id: str, display_name: Optional[str] - ) -> bool: - """ - Returns true if a user's condition/user ID/display name match the event. - - Args: - condition: The user's condition to match. - uid: The user's MXID. - display_name: The display name, or None if there is not one. - - Returns: - True if the condition matches the event, False otherwise. - """ - if condition["kind"] == "event_match": - return self._event_match(condition, user_id, self._event, self._value_cache) - elif condition["kind"] in ( - "related_event_match", - "inverse_related_event_match", - ): - if not self._related_event: - return False - is_match = self._event_match( - condition, user_id, self._related_event, self._related_event_value_cache - ) - if condition["kind"] == "inverse_related_event_match": - return not is_match - return is_match - elif condition["kind"] == "contains_display_name": - return self._contains_display_name(display_name) - elif condition["kind"] == "room_member_count": - return _room_member_count(self._event, condition, self._room_member_count) - elif condition["kind"] == "sender_notification_permission": - return _sender_notification_permission( - self._event, condition, self._sender_power_level, self._power_levels - ) - elif ( - condition["kind"] == "org.matrix.msc3772.relation_match" - and self._relations_match_enabled - ): - return self._relation_match(condition, user_id) - else: - # XXX This looks incorrect -- we have reached an unknown condition - # kind and are unconditionally returning that it matches. Note - # that it seems possible to provide a condition to the /pushrules - # endpoint with an unknown kind, see _rule_tuple_from_request_object. - return True - - def _event_match( - self, - condition: Mapping, - user_id: str, - event: EventBase, - event_value_cache: Dict[str, str], - ) -> bool: - """ - Check an "event_match" push rule condition. - - Args: - condition: The "event_match" push rule condition to match. - user_id: The user's MXID. - - Returns: - True if the condition matches the event, False otherwise. - """ - pattern = condition.get("pattern", None) - - if not pattern: - pattern_type = condition.get("pattern_type", None) - if pattern_type == "user_id": - pattern = user_id - elif pattern_type == "user_localpart": - pattern = UserID.from_string(user_id).localpart - - if not pattern: - logger.warning("event_match condition with no pattern") - return False - - # XXX: optimisation: cache our pattern regexps - if condition["key"] == "content.body": - body = event.content.get("body", None) - if not body or not isinstance(body, str): - return False - - return _glob_matches(pattern, body, word_boundary=True) - else: - haystack = event_value_cache.get(condition["key"], None) - if haystack is None: - return False - - return _glob_matches(pattern, haystack) - - def _contains_display_name(self, display_name: Optional[str]) -> bool: - """ - Check an "event_match" push rule condition. - - Args: - display_name: The display name, or None if there is not one. - - Returns: - True if the display name is found in the event body, False otherwise. - """ - if not display_name: - return False - - body = self._event.content.get("body", None) - if not body or not isinstance(body, str): - return False - - # Similar to _glob_matches, but do not treat display_name as a glob. - r = regex_cache.get((display_name, False, True), None) - if not r: - r1 = re.escape(display_name) - r1 = to_word_pattern(r1) - r = re.compile(r1, flags=re.IGNORECASE) - regex_cache[(display_name, False, True)] = r - - return bool(r.search(body)) - - def _relation_match(self, condition: Mapping, user_id: str) -> bool: - """ - Check an "relation_match" push rule condition. - - Args: - condition: The "event_match" push rule condition to match. - user_id: The user's MXID. - - Returns: - True if the condition matches the event, False otherwise. - """ - rel_type = condition.get("rel_type") - if not rel_type: - logger.warning("relation_match condition missing rel_type") - return False - - sender_pattern = condition.get("sender") - if sender_pattern is None: - sender_type = condition.get("sender_type") - if sender_type == "user_id": - sender_pattern = user_id - type_pattern = condition.get("type") - - # If any other relations matches, return True. - for sender, event_type in self._relations.get(rel_type, ()): - if sender_pattern and not _glob_matches(sender_pattern, sender): - continue - if type_pattern and not _glob_matches(type_pattern, event_type): - continue - # All values must have matched. - return True - - # No relations matched. - return False - - -# Caches (string, is_glob, word_boundary) -> regex for push. See _glob_matches -regex_cache: LruCache[Tuple[str, bool, bool], Pattern] = LruCache( - 50000, "regex_push_cache" -) - - -def _glob_matches(glob: str, value: str, word_boundary: bool = False) -> bool: - """Tests if value matches glob. - - Args: - glob - value: String to test against glob. - word_boundary: Whether to match against word boundaries or entire - string. Defaults to False. - """ - - try: - r = regex_cache.get((glob, True, word_boundary), None) - if not r: - r = glob_to_regex(glob, word_boundary=word_boundary) - regex_cache[(glob, True, word_boundary)] = r - return bool(r.search(value)) - except re.error: - logger.warning("Failed to parse glob to regex: %r", glob) - return False - - -def _flatten_dict( - d: Union[EventBase, Mapping[str, Any]], - prefix: Optional[List[str]] = None, - result: Optional[Dict[str, str]] = None, -) -> Dict[str, str]: - if prefix is None: - prefix = [] - if result is None: - result = {} - for key, value in d.items(): - if isinstance(value, bool): - value = str(value) - if isinstance(value, str): - result[".".join(prefix + [key])] = value.lower() - elif isinstance(value, Mapping): - _flatten_dict(value, prefix=(prefix + [key]), result=result) - - return result diff --git a/synapse/push/pusherpool.py b/synapse/push/pusherpool.py index 1e0ef44fc786..e2648cbc93c9 100644 --- a/synapse/push/pusherpool.py +++ b/synapse/push/pusherpool.py @@ -94,7 +94,7 @@ def start(self) -> None: return run_as_background_process("start_pushers", self._start_pushers) - async def add_pusher( + async def add_or_update_pusher( self, user_id: str, access_token: Optional[int], @@ -106,6 +106,8 @@ async def add_pusher( lang: Optional[str], data: JsonDict, profile_tag: str = "", + enabled: bool = True, + device_id: Optional[str] = None, ) -> Optional[Pusher]: """Creates a new pusher and adds it to the pool @@ -147,9 +149,22 @@ async def add_pusher( last_stream_ordering=last_stream_ordering, last_success=None, failing_since=None, + enabled=enabled, + device_id=device_id, ) ) + # Before we actually persist the pusher, we check if the user already has one + # this app ID and pushkey. If so, we want to keep the access token and device ID + # in place, since this could be one device modifying (e.g. enabling/disabling) + # another device's pusher. + existing_config = await self._get_pusher_config_for_user_by_app_id_and_pushkey( + user_id, app_id, pushkey + ) + if existing_config: + access_token = existing_config.access_token + device_id = existing_config.device_id + await self.store.add_pusher( user_id=user_id, access_token=access_token, @@ -163,8 +178,10 @@ async def add_pusher( data=data, last_stream_ordering=last_stream_ordering, profile_tag=profile_tag, + enabled=enabled, + device_id=device_id, ) - pusher = await self.start_pusher_by_id(app_id, pushkey, user_id) + pusher = await self.process_pusher_change_by_id(app_id, pushkey, user_id) return pusher @@ -276,10 +293,25 @@ async def on_new_receipts( except Exception: logger.exception("Exception in pusher on_new_receipts") - async def start_pusher_by_id( + async def _get_pusher_config_for_user_by_app_id_and_pushkey( + self, user_id: str, app_id: str, pushkey: str + ) -> Optional[PusherConfig]: + resultlist = await self.store.get_pushers_by_app_id_and_pushkey(app_id, pushkey) + + pusher_config = None + for r in resultlist: + if r.user_name == user_id: + pusher_config = r + + return pusher_config + + async def process_pusher_change_by_id( self, app_id: str, pushkey: str, user_id: str ) -> Optional[Pusher]: - """Look up the details for the given pusher, and start it + """Look up the details for the given pusher, and either start it if its + "enabled" flag is True, or try to stop it otherwise. + + If the pusher is new and its "enabled" flag is False, the stop is a noop. Returns: The pusher started, if any @@ -290,12 +322,13 @@ async def start_pusher_by_id( if not self._pusher_shard_config.should_handle(self._instance_name, user_id): return None - resultlist = await self.store.get_pushers_by_app_id_and_pushkey(app_id, pushkey) + pusher_config = await self._get_pusher_config_for_user_by_app_id_and_pushkey( + user_id, app_id, pushkey + ) - pusher_config = None - for r in resultlist: - if r.user_name == user_id: - pusher_config = r + if pusher_config and not pusher_config.enabled: + self.maybe_stop_pusher(app_id, pushkey, user_id) + return None pusher = None if pusher_config: @@ -305,7 +338,7 @@ async def start_pusher_by_id( async def _start_pushers(self) -> None: """Start all the pushers""" - pushers = await self.store.get_all_pushers() + pushers = await self.store.get_enabled_pushers() # Stagger starting up the pushers so we don't completely drown the # process on start up. @@ -363,6 +396,8 @@ async def _start_pusher(self, pusher_config: PusherConfig) -> Optional[Pusher]: synapse_pushers.labels(type(pusher).__name__, pusher.app_id).inc() + logger.info("Starting pusher %s / %s", pusher.user_id, appid_pushkey) + # Check if there *may* be push to process. We do this as this check is a # lot cheaper to do than actually fetching the exact rows we need to # push. @@ -382,16 +417,7 @@ async def _start_pusher(self, pusher_config: PusherConfig) -> Optional[Pusher]: return pusher async def remove_pusher(self, app_id: str, pushkey: str, user_id: str) -> None: - appid_pushkey = "%s:%s" % (app_id, pushkey) - - byuser = self.pushers.get(user_id, {}) - - if appid_pushkey in byuser: - logger.info("Stopping pusher %s / %s", user_id, appid_pushkey) - pusher = byuser.pop(appid_pushkey) - pusher.on_stop() - - synapse_pushers.labels(type(pusher).__name__, pusher.app_id).dec() + self.maybe_stop_pusher(app_id, pushkey, user_id) # We can only delete pushers on master. if self._remove_pusher_client: @@ -402,3 +428,22 @@ async def remove_pusher(self, app_id: str, pushkey: str, user_id: str) -> None: await self.store.delete_pusher_by_app_id_pushkey_user_id( app_id, pushkey, user_id ) + + def maybe_stop_pusher(self, app_id: str, pushkey: str, user_id: str) -> None: + """Stops a pusher with the given app ID and push key if one is running. + + Args: + app_id: the pusher's app ID. + pushkey: the pusher's push key. + user_id: the user the pusher belongs to. Only used for logging. + """ + appid_pushkey = "%s:%s" % (app_id, pushkey) + + byuser = self.pushers.get(user_id, {}) + + if appid_pushkey in byuser: + logger.info("Stopping pusher %s / %s", user_id, appid_pushkey) + pusher = byuser.pop(appid_pushkey) + pusher.on_stop() + + synapse_pushers.labels(type(pusher).__name__, pusher.app_id).dec() diff --git a/synapse/replication/http/__init__.py b/synapse/replication/http/__init__.py index 53aa7fa4c6bd..ac9a92240af2 100644 --- a/synapse/replication/http/__init__.py +++ b/synapse/replication/http/__init__.py @@ -25,6 +25,7 @@ push, register, send_event, + send_events, state, streams, ) @@ -43,6 +44,7 @@ def __init__(self, hs: "HomeServer"): def register_servlets(self, hs: "HomeServer") -> None: send_event.register_servlets(hs, self) + send_events.register_servlets(hs, self) federation.register_servlets(hs, self) presence.register_servlets(hs, self) membership.register_servlets(hs, self) diff --git a/synapse/replication/http/register.py b/synapse/replication/http/register.py index 6c8f8388fd60..976c2833603d 100644 --- a/synapse/replication/http/register.py +++ b/synapse/replication/http/register.py @@ -39,6 +39,16 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self.registration_handler = hs.get_registration_handler() + # Default value if the worker that sent the replication request did not include + # an 'approved' property. + if ( + hs.config.experimental.msc3866.enabled + and hs.config.experimental.msc3866.require_approval_for_new_accounts + ): + self._approval_default = False + else: + self._approval_default = True + @staticmethod async def _serialize_payload( # type: ignore[override] user_id: str, @@ -51,6 +61,7 @@ async def _serialize_payload( # type: ignore[override] user_type: Optional[str], address: Optional[str], shadow_banned: bool, + approved: bool, ) -> JsonDict: """ Args: @@ -68,6 +79,8 @@ async def _serialize_payload( # type: ignore[override] or None for a normal user. address: the IP address used to perform the regitration. shadow_banned: Whether to shadow-ban the user + approved: Whether the user should be considered already approved by an + administrator. """ return { "password_hash": password_hash, @@ -79,6 +92,7 @@ async def _serialize_payload( # type: ignore[override] "user_type": user_type, "address": address, "shadow_banned": shadow_banned, + "approved": approved, } async def _handle_request( # type: ignore[override] @@ -88,6 +102,12 @@ async def _handle_request( # type: ignore[override] await self.registration_handler.check_registration_ratelimit(content["address"]) + # Always default admin users to approved (since it means they were created by + # an admin). + approved_default = self._approval_default + if content["admin"]: + approved_default = True + await self.registration_handler.register_with_store( user_id=user_id, password_hash=content["password_hash"], @@ -99,6 +119,7 @@ async def _handle_request( # type: ignore[override] user_type=content["user_type"], address=content["address"], shadow_banned=content["shadow_banned"], + approved=content.get("approved", approved_default), ) return 200, {} diff --git a/synapse/replication/http/send_event.py b/synapse/replication/http/send_event.py index 3a8c9e3fd22f..a3b5d7cc1e55 100644 --- a/synapse/replication/http/send_event.py +++ b/synapse/replication/http/send_event.py @@ -145,10 +145,9 @@ async def _handle_request( # type: ignore[override] "Got event to send with ID: %s into room: %s", event.event_id, event.room_id ) - event = await self.event_creation_handler.persist_and_notify_client_event( + event = await self.event_creation_handler.persist_and_notify_client_events( requester, - event, - context, + [(event, context)], ratelimit=ratelimit, extra_users=extra_users, dont_notify=dont_notify, diff --git a/synapse/replication/http/send_events.py b/synapse/replication/http/send_events.py new file mode 100644 index 000000000000..8889bbb644e1 --- /dev/null +++ b/synapse/replication/http/send_events.py @@ -0,0 +1,171 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import TYPE_CHECKING, List, Tuple + +from twisted.web.server import Request + +from synapse.api.room_versions import KNOWN_ROOM_VERSIONS +from synapse.events import EventBase, make_event_from_dict +from synapse.events.snapshot import EventContext +from synapse.http.server import HttpServer +from synapse.http.servlet import parse_json_object_from_request +from synapse.replication.http._base import ReplicationEndpoint +from synapse.types import JsonDict, Requester, UserID +from synapse.util.metrics import Measure + +if TYPE_CHECKING: + from synapse.server import HomeServer + from synapse.storage.databases.main import DataStore + +logger = logging.getLogger(__name__) + + +class ReplicationSendEventsRestServlet(ReplicationEndpoint): + """Handles batches of newly created events on workers, including persisting and + notifying. + + The API looks like: + + POST /_synapse/replication/send_events/:txn_id + + { + "events": [{ + "event": { .. serialized event .. }, + "room_version": .., // "1", "2", "3", etc: the version of the room + // containing the event + "event_format_version": .., // 1,2,3 etc: the event format version + "internal_metadata": { .. serialized internal_metadata .. }, + "outlier": true|false, + "rejected_reason": .., // The event.rejected_reason field + "context": { .. serialized event context .. }, + "requester": { .. serialized requester .. }, + "ratelimit": true, + }] + } + + 200 OK + + { "stream_id": 12345, "event_id": "$abcdef..." } + + Responds with a 409 when a `PartialStateConflictError` is raised due to an event + context that needs to be recomputed due to the un-partial stating of a room. + + """ + + NAME = "send_events" + PATH_ARGS = () + + def __init__(self, hs: "HomeServer"): + super().__init__(hs) + + self.event_creation_handler = hs.get_event_creation_handler() + self.store = hs.get_datastores().main + self._storage_controllers = hs.get_storage_controllers() + self.clock = hs.get_clock() + + @staticmethod + async def _serialize_payload( # type: ignore[override] + events_and_context: List[Tuple[EventBase, EventContext]], + store: "DataStore", + requester: Requester, + ratelimit: bool, + extra_users: List[UserID], + ) -> JsonDict: + """ + Args: + store + requester + events_and_ctx + ratelimit + """ + serialized_events = [] + + for event, context in events_and_context: + serialized_context = await context.serialize(event, store) + serialized_event = { + "event": event.get_pdu_json(), + "room_version": event.room_version.identifier, + "event_format_version": event.format_version, + "internal_metadata": event.internal_metadata.get_dict(), + "outlier": event.internal_metadata.is_outlier(), + "rejected_reason": event.rejected_reason, + "context": serialized_context, + "requester": requester.serialize(), + "ratelimit": ratelimit, + "extra_users": [u.to_string() for u in extra_users], + } + serialized_events.append(serialized_event) + + payload = {"events": serialized_events} + + return payload + + async def _handle_request( # type: ignore[override] + self, request: Request + ) -> Tuple[int, JsonDict]: + with Measure(self.clock, "repl_send_events_parse"): + payload = parse_json_object_from_request(request) + events_and_context = [] + events = payload["events"] + + for event_payload in events: + event_dict = event_payload["event"] + room_ver = KNOWN_ROOM_VERSIONS[event_payload["room_version"]] + internal_metadata = event_payload["internal_metadata"] + rejected_reason = event_payload["rejected_reason"] + + event = make_event_from_dict( + event_dict, room_ver, internal_metadata, rejected_reason + ) + event.internal_metadata.outlier = event_payload["outlier"] + + requester = Requester.deserialize( + self.store, event_payload["requester"] + ) + context = EventContext.deserialize( + self._storage_controllers, event_payload["context"] + ) + + ratelimit = event_payload["ratelimit"] + events_and_context.append((event, context)) + + extra_users = [ + UserID.from_string(u) for u in event_payload["extra_users"] + ] + + logger.info( + "Got batch of events to send, last ID of batch is: %s, sending into room: %s", + event.event_id, + event.room_id, + ) + + last_event = ( + await self.event_creation_handler.persist_and_notify_client_events( + requester, events_and_context, ratelimit, extra_users + ) + ) + + return ( + 200, + { + "stream_id": last_event.internal_metadata.stream_ordering, + "event_id": last_event.event_id, + }, + ) + + +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: + ReplicationSendEventsRestServlet(hs).register(http_server) diff --git a/synapse/replication/tcp/client.py b/synapse/replication/tcp/client.py index e4f2201c922f..b2522f98cade 100644 --- a/synapse/replication/tcp/client.py +++ b/synapse/replication/tcp/client.py @@ -189,7 +189,9 @@ async def on_rdata( if row.deleted: self.stop_pusher(row.user_id, row.app_id, row.pushkey) else: - await self.start_pusher(row.user_id, row.app_id, row.pushkey) + await self.process_pusher_change( + row.user_id, row.app_id, row.pushkey + ) elif stream_name == EventsStream.NAME: # We shouldn't get multiple rows per token for events stream, so # we don't need to optimise this for multiple rows. @@ -334,13 +336,15 @@ def stop_pusher(self, user_id: str, app_id: str, pushkey: str) -> None: logger.info("Stopping pusher %r / %r", user_id, key) pusher.on_stop() - async def start_pusher(self, user_id: str, app_id: str, pushkey: str) -> None: + async def process_pusher_change( + self, user_id: str, app_id: str, pushkey: str + ) -> None: if not self._notify_pushers: return key = "%s:%s" % (app_id, pushkey) logger.info("Starting pusher %r / %r", user_id, key) - await self._pusher_pool.start_pusher_by_id(app_id, pushkey, user_id) + await self._pusher_pool.process_pusher_change_by_id(app_id, pushkey, user_id) class FederationSenderHandler: @@ -423,7 +427,8 @@ async def _on_new_receipts( receipt.receipt_type, receipt.user_id, [receipt.event_id], - receipt.data, + thread_id=receipt.thread_id, + data=receipt.data, ) await self.federation_sender.send_read_receipt(receipt_info) diff --git a/synapse/replication/tcp/streams/_base.py b/synapse/replication/tcp/streams/_base.py index 398bebeaa659..e01155ad597b 100644 --- a/synapse/replication/tcp/streams/_base.py +++ b/synapse/replication/tcp/streams/_base.py @@ -361,6 +361,7 @@ class ReceiptsStreamRow: receipt_type: str user_id: str event_id: str + thread_id: Optional[str] data: dict NAME = "receipts" diff --git a/synapse/rest/__init__.py b/synapse/rest/__init__.py index b71221511209..9a2ab99ede87 100644 --- a/synapse/rest/__init__.py +++ b/synapse/rest/__init__.py @@ -30,6 +30,7 @@ keys, knock, login as v1_login, + login_token_request, logout, mutual_rooms, notifications, @@ -130,3 +131,4 @@ def register_servlets(client_resource: HttpServer, hs: "HomeServer") -> None: # unstable mutual_rooms.register_servlets(hs, client_resource) + login_token_request.register_servlets(hs, client_resource) diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index 9d65b1c5e087..4a9254e96eb4 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -69,6 +69,7 @@ def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main self.auth = hs.get_auth() self.admin_handler = hs.get_admin_handler() + self._msc3866_enabled = hs.config.experimental.msc3866.enabled async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: await assert_requester_is_admin(self.auth, request) @@ -96,6 +97,13 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: deactivated = parse_boolean(request, "deactivated", default=False) appservice = parse_string(request, "appservice") + # If support for MSC3866 is not enabled, apply no filtering based on the + # `approved` column. + if self._msc3866_enabled: + approved = parse_boolean(request, "approved", default=True) + else: + approved = True + order_by = parse_string( request, "order_by", @@ -124,8 +132,15 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: deactivated, order_by, direction, + approved, appservice, ) + + # If support for MSC3866 is not enabled, don't show the approval flag. + if not self._msc3866_enabled: + for user in users: + del user["approved"] + ret = {"users": users, "total": total} if (start + limit) < total: ret["next_token"] = str(start + len(users)) @@ -172,6 +187,7 @@ def __init__(self, hs: "HomeServer"): self.deactivate_account_handler = hs.get_deactivate_account_handler() self.registration_handler = hs.get_registration_handler() self.pusher_pool = hs.get_pusherpool() + self._msc3866_enabled = hs.config.experimental.msc3866.enabled async def on_GET( self, request: SynapseRequest, user_id: str @@ -248,6 +264,15 @@ async def on_PUT( HTTPStatus.BAD_REQUEST, "'deactivated' parameter is not of type boolean" ) + approved: Optional[bool] = None + if "approved" in body and self._msc3866_enabled: + approved = body["approved"] + if not isinstance(approved, bool): + raise SynapseError( + HTTPStatus.BAD_REQUEST, + "'approved' parameter is not of type boolean", + ) + # convert List[Dict[str, str]] into List[Tuple[str, str]] if external_ids is not None: new_external_ids = [ @@ -352,6 +377,9 @@ async def on_PUT( if "user_type" in body: await self.store.set_user_type(target_user, user_type) + if approved is not None: + await self.store.update_user_approval_status(target_user, approved) + user = await self.admin_handler.get_user(target_user) assert user is not None @@ -364,6 +392,10 @@ async def on_PUT( if password is not None: password_hash = await self.auth_handler.hash(password) + new_user_approved = True + if self._msc3866_enabled and approved is not None: + new_user_approved = approved + user_id = await self.registration_handler.register_user( localpart=target_user.localpart, password_hash=password_hash, @@ -371,6 +403,7 @@ async def on_PUT( default_display_name=displayname, user_type=user_type, by_admin=True, + approved=new_user_approved, ) if threepids is not None: @@ -384,7 +417,7 @@ async def on_PUT( and self.hs.config.email.email_notif_for_new_users and medium == "email" ): - await self.pusher_pool.add_pusher( + await self.pusher_pool.add_or_update_pusher( user_id=user_id, access_token=None, kind="email", @@ -392,7 +425,7 @@ async def on_PUT( app_display_name="Email Notifications", device_display_name=address, pushkey=address, - lang=None, # We don't know a user's language here + lang=None, data={}, ) @@ -559,6 +592,7 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: user_type=user_type, default_display_name=displayname, by_admin=True, + approved=True, ) result = await register._create_registration_details(user_id, body) diff --git a/synapse/rest/client/account.py b/synapse/rest/client/account.py index 2db2a04f95df..44f622bcce15 100644 --- a/synapse/rest/client/account.py +++ b/synapse/rest/client/account.py @@ -534,6 +534,11 @@ class AddThreepidMsisdnSubmitTokenServlet(RestServlet): "/add_threepid/msisdn/submit_token$", releases=(), unstable=True ) + class PostBody(RequestBodyModel): + client_secret: ClientSecretStr + sid: StrictStr + token: StrictStr + def __init__(self, hs: "HomeServer"): super().__init__() self.config = hs.config @@ -549,16 +554,14 @@ async def on_POST(self, request: Request) -> Tuple[int, JsonDict]: "instead.", ) - body = parse_json_object_from_request(request) - assert_params_in_dict(body, ["client_secret", "sid", "token"]) - assert_valid_client_secret(body["client_secret"]) + body = parse_and_validate_json_object_from_request(request, self.PostBody) # Proxy submit_token request to msisdn threepid delegate response = await self.identity_handler.proxy_msisdn_submit_token( self.config.registration.account_threepid_delegate_msisdn, - body["client_secret"], - body["sid"], - body["token"], + body.client_secret, + body.sid, + body.token, ) return 200, response @@ -581,6 +584,10 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: return 200, {"threepids": threepids} + # NOTE(dmr): I have chosen not to use Pydantic to parse this request's body, because + # the endpoint is deprecated. (If you really want to, you could do this by reusing + # ThreePidBindRestServelet.PostBody with an `alias_generator` to handle + # `threePidCreds` versus `three_pid_creds`. async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: if not self.hs.config.registration.enable_3pid_changes: raise SynapseError( diff --git a/synapse/rest/client/login.py b/synapse/rest/client/login.py index 0437c87d8d6d..f554586ac3c4 100644 --- a/synapse/rest/client/login.py +++ b/synapse/rest/client/login.py @@ -28,7 +28,14 @@ from typing_extensions import TypedDict -from synapse.api.errors import Codes, InvalidClientTokenError, LoginError, SynapseError +from synapse.api.constants import ApprovalNoticeMedium +from synapse.api.errors import ( + Codes, + InvalidClientTokenError, + LoginError, + NotApprovedError, + SynapseError, +) from synapse.api.ratelimiting import Ratelimiter from synapse.api.urls import CLIENT_API_PREFIX from synapse.appservice import ApplicationService @@ -55,11 +62,11 @@ class LoginResponse(TypedDict, total=False): user_id: str - access_token: str + access_token: Optional[str] home_server: str expires_in_ms: Optional[int] refresh_token: Optional[str] - device_id: str + device_id: Optional[str] well_known: Optional[Dict[str, Any]] @@ -92,6 +99,12 @@ def __init__(self, hs: "HomeServer"): hs.config.registration.refreshable_access_token_lifetime is not None ) + # Whether we need to check if the user has been approved or not. + self._require_approval = ( + hs.config.experimental.msc3866.enabled + and hs.config.experimental.msc3866.require_approval_for_new_accounts + ) + self.auth = hs.get_auth() self.clock = hs.get_clock() @@ -220,6 +233,14 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, LoginResponse]: except KeyError: raise SynapseError(400, "Missing JSON keys.") + if self._require_approval: + approved = await self.auth_handler.is_user_approved(result["user_id"]) + if not approved: + raise NotApprovedError( + msg="This account is pending approval by a server administrator.", + approval_notice_medium=ApprovalNoticeMedium.NONE, + ) + well_known_data = self._well_known_builder.get_well_known() if well_known_data: result["well_known"] = well_known_data @@ -356,6 +377,16 @@ async def _complete_login( errcode=Codes.INVALID_PARAM, ) + if self._require_approval: + approved = await self.auth_handler.is_user_approved(user_id) + if not approved: + # If the user isn't approved (and needs to be) we won't allow them to + # actually log in, so we don't want to create a device/access token. + return LoginResponse( + user_id=user_id, + home_server=self.hs.hostname, + ) + initial_display_name = login_submission.get("initial_device_display_name") ( device_id, diff --git a/synapse/rest/client/login_token_request.py b/synapse/rest/client/login_token_request.py new file mode 100644 index 000000000000..277b20fb638d --- /dev/null +++ b/synapse/rest/client/login_token_request.py @@ -0,0 +1,96 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import TYPE_CHECKING, Tuple + +from synapse.http.server import HttpServer +from synapse.http.servlet import RestServlet, parse_json_object_from_request +from synapse.http.site import SynapseRequest +from synapse.rest.client._base import client_patterns, interactive_auth_handler +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class LoginTokenRequestServlet(RestServlet): + """ + Get a token that can be used with `m.login.token` to log in a second device. + + Request: + + POST /login/token HTTP/1.1 + Content-Type: application/json + + {} + + Response: + + HTTP/1.1 200 OK + { + "login_token": "ABDEFGH", + "expires_in": 3600, + } + """ + + PATTERNS = client_patterns( + "/org.matrix.msc3882/login/token$", releases=[], v1=False, unstable=True + ) + + def __init__(self, hs: "HomeServer"): + super().__init__() + self.auth = hs.get_auth() + self.store = hs.get_datastores().main + self.clock = hs.get_clock() + self.server_name = hs.config.server.server_name + self.macaroon_gen = hs.get_macaroon_generator() + self.auth_handler = hs.get_auth_handler() + self.token_timeout = hs.config.experimental.msc3882_token_timeout + self.ui_auth = hs.config.experimental.msc3882_ui_auth + + @interactive_auth_handler + async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + requester = await self.auth.get_user_by_req(request) + body = parse_json_object_from_request(request) + + if self.ui_auth: + await self.auth_handler.validate_user_via_ui_auth( + requester, + request, + body, + "issue a new access token for your account", + can_skip_ui_auth=False, # Don't allow skipping of UI auth + ) + + login_token = self.macaroon_gen.generate_short_term_login_token( + user_id=requester.user.to_string(), + auth_provider_id="org.matrix.msc3882.login_token_request", + duration_in_ms=self.token_timeout, + ) + + return ( + 200, + { + "login_token": login_token, + "expires_in": self.token_timeout // 1000, + }, + ) + + +def register_servlets(hs: "HomeServer", http_server: HttpServer) -> None: + if hs.config.experimental.msc3882_enabled: + LoginTokenRequestServlet(hs).register(http_server) diff --git a/synapse/rest/client/pusher.py b/synapse/rest/client/pusher.py index 9a1f10f4befe..975eef2144b6 100644 --- a/synapse/rest/client/pusher.py +++ b/synapse/rest/client/pusher.py @@ -42,6 +42,7 @@ def __init__(self, hs: "HomeServer"): super().__init__() self.hs = hs self.auth = hs.get_auth() + self._msc3881_enabled = self.hs.config.experimental.msc3881_enabled async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) @@ -51,9 +52,16 @@ async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: user.to_string() ) - filtered_pushers = [p.as_dict() for p in pushers] + pusher_dicts = [p.as_dict() for p in pushers] - return 200, {"pushers": filtered_pushers} + for pusher in pusher_dicts: + if self._msc3881_enabled: + pusher["org.matrix.msc3881.enabled"] = pusher["enabled"] + pusher["org.matrix.msc3881.device_id"] = pusher["device_id"] + del pusher["enabled"] + del pusher["device_id"] + + return 200, {"pushers": pusher_dicts} class PushersSetRestServlet(RestServlet): @@ -65,6 +73,7 @@ def __init__(self, hs: "HomeServer"): self.auth = hs.get_auth() self.notifier = hs.get_notifier() self.pusher_pool = self.hs.get_pusherpool() + self._msc3881_enabled = self.hs.config.experimental.msc3881_enabled async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: requester = await self.auth.get_user_by_req(request) @@ -103,6 +112,10 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: if "append" in content: append = content["append"] + enabled = True + if self._msc3881_enabled and "org.matrix.msc3881.enabled" in content: + enabled = content["org.matrix.msc3881.enabled"] + if not append: await self.pusher_pool.remove_pushers_by_app_id_and_pushkey_not_user( app_id=content["app_id"], @@ -111,7 +124,7 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: ) try: - await self.pusher_pool.add_pusher( + await self.pusher_pool.add_or_update_pusher( user_id=user.to_string(), access_token=requester.access_token_id, kind=content["kind"], @@ -122,6 +135,8 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: lang=content["lang"], data=content["data"], profile_tag=content.get("profile_tag", ""), + enabled=enabled, + device_id=requester.device_id, ) except PusherConfigException as pce: raise SynapseError( diff --git a/synapse/rest/client/read_marker.py b/synapse/rest/client/read_marker.py index 511ba6392b50..914766b7b0c2 100644 --- a/synapse/rest/client/read_marker.py +++ b/synapse/rest/client/read_marker.py @@ -84,6 +84,8 @@ async def on_POST( receipt_type, user_id=requester.user.to_string(), event_id=event_id, + # Setting the thread ID is not possible with the /read_markers endpoint. + thread_id=None, extra_content=body.get("com.beeper.read.extra", None), ) diff --git a/synapse/rest/client/receipts.py b/synapse/rest/client/receipts.py index e5bab25db6c5..e1802be5f59e 100644 --- a/synapse/rest/client/receipts.py +++ b/synapse/rest/client/receipts.py @@ -50,6 +50,7 @@ def __init__(self, hs: "HomeServer"): ReceiptTypes.FULLY_READ, ReceiptTypes.BEEPER_INBOX_DONE, } + self._msc3771_enabled = hs.config.experimental.msc3771_enabled async def on_POST( self, request: SynapseRequest, room_id: str, receipt_type: str, event_id: str @@ -64,6 +65,16 @@ async def on_POST( body = parse_json_object_from_request(request, allow_empty_body=False) + # Pull the thread ID, if one exists. + thread_id = None + if self._msc3771_enabled: + if "thread_id" in body: + thread_id = body.get("thread_id") + if not thread_id or not isinstance(thread_id, str): + raise SynapseError( + 400, "thread_id field must be a non-empty string" + ) + await self.presence_handler.bump_presence_active_time(requester.user) if receipt_type == ReceiptTypes.FULLY_READ: @@ -79,6 +90,7 @@ async def on_POST( receipt_type, user_id=requester.user.to_string(), event_id=event_id, + thread_id=thread_id, extra_content=body, ) diff --git a/synapse/rest/client/register.py b/synapse/rest/client/register.py index c6154c1ebd5b..1234668a63e8 100644 --- a/synapse/rest/client/register.py +++ b/synapse/rest/client/register.py @@ -21,10 +21,15 @@ import synapse import synapse.api.auth import synapse.types -from synapse.api.constants import APP_SERVICE_REGISTRATION_TYPE, LoginType +from synapse.api.constants import ( + APP_SERVICE_REGISTRATION_TYPE, + ApprovalNoticeMedium, + LoginType, +) from synapse.api.errors import ( Codes, InteractiveAuthIncompleteError, + NotApprovedError, SynapseError, ThreepidValidationError, UnrecognizedRequestError, @@ -415,6 +420,11 @@ def __init__(self, hs: "HomeServer"): hs.config.registration.inhibit_user_in_use_error ) + self._require_approval = ( + hs.config.experimental.msc3866.enabled + and hs.config.experimental.msc3866.require_approval_for_new_accounts + ) + self._registration_flows = _calculate_registration_flows( hs.config, self.auth_handler ) @@ -735,6 +745,12 @@ async def on_POST(self, request: SynapseRequest) -> Tuple[int, JsonDict]: access_token=return_dict.get("access_token"), ) + if self._require_approval: + raise NotApprovedError( + msg="This account needs to be approved by an administrator before it can be used.", + approval_notice_medium=ApprovalNoticeMedium.NONE, + ) + return 200, return_dict async def _do_appservice_registration( @@ -779,7 +795,9 @@ async def _create_registration_details( "user_id": user_id, "home_server": self.hs.hostname, } - if not params.get("inhibit_login", False): + # We don't want to log the user in if we're going to deny them access because + # they need to be approved first. + if not params.get("inhibit_login", False) and not self._require_approval: device_id = params.get("device_id") initial_display_name = params.get("initial_device_display_name") ( diff --git a/synapse/rest/client/relations.py b/synapse/rest/client/relations.py index ce970800136a..7a25de5c8596 100644 --- a/synapse/rest/client/relations.py +++ b/synapse/rest/client/relations.py @@ -56,15 +56,21 @@ async def on_GET( requester = await self.auth.get_user_by_req(request, allow_guest=True) limit = parse_integer(request, "limit", default=5) - if self._msc3715_enabled: - direction = parse_string( - request, - "org.matrix.msc3715.dir", - default="b", - allowed_values=["f", "b"], - ) - else: - direction = "b" + # Fetch the direction parameter, if provided. + # + # TODO Use PaginationConfig.from_request when the unstable parameter is + # no longer needed. + direction = parse_string(request, "dir", allowed_values=["f", "b"]) + if direction is None: + if self._msc3715_enabled: + direction = parse_string( + request, + "org.matrix.msc3715.dir", + default="b", + allowed_values=["f", "b"], + ) + else: + direction = "b" from_token_str = parse_string(request, "from") to_token_str = parse_string(request, "to") @@ -76,6 +82,11 @@ async def on_GET( if to_token_str: to_token = await StreamToken.from_string(self.store, to_token_str) + # The unstable version of this API returns an extra field for client + # compatibility, see https://github.com/matrix-org/synapse/issues/12930. + assert request.path is not None + include_original_event = request.path.startswith(b"/_matrix/client/unstable/") + result = await self._relations_handler.get_relations( requester=requester, event_id=parent_id, @@ -86,6 +97,7 @@ async def on_GET( direction=direction, from_token=from_token, to_token=to_token, + include_original_event=include_original_event, ) return 200, result diff --git a/synapse/rest/client/room.py b/synapse/rest/client/room.py index 94f868183649..4cff5a166a63 100644 --- a/synapse/rest/client/room.py +++ b/synapse/rest/client/room.py @@ -268,15 +268,9 @@ async def on_PUT( content = parse_json_object_from_request(request) - event_dict = { - "type": event_type, - "content": content, - "room_id": room_id, - "sender": requester.user.to_string(), - } - - if state_key is not None: - event_dict["state_key"] = state_key + origin_server_ts = None + if requester.app_service: + origin_server_ts = parse_integer(request, "ts") try: if event_type == EventTypes.Member: @@ -287,8 +281,22 @@ async def on_PUT( room_id=room_id, action=membership, content=content, + origin_server_ts=origin_server_ts, ) else: + event_dict: JsonDict = { + "type": event_type, + "content": content, + "room_id": room_id, + "sender": requester.user.to_string(), + } + + if state_key is not None: + event_dict["state_key"] = state_key + + if origin_server_ts is not None: + event_dict["origin_server_ts"] = origin_server_ts + ( event, _, diff --git a/synapse/rest/client/versions.py b/synapse/rest/client/versions.py index c516cda95d5c..c95b0d6f197b 100644 --- a/synapse/rest/client/versions.py +++ b/synapse/rest/client/versions.py @@ -103,8 +103,14 @@ def on_GET(self, request: Request) -> Tuple[int, JsonDict]: "org.matrix.msc3030": self.config.experimental.msc3030_enabled, # Adds support for thread relations, per MSC3440. "org.matrix.msc3440.stable": True, # TODO: remove when "v1.3" is added above + # Support for thread read receipts. + "org.matrix.msc3771": self.config.experimental.msc3771_enabled, # Allows moderators to fetch redacted event content as described in MSC2815 "fi.mau.msc2815": self.config.experimental.msc2815_enabled, + # Adds support for login token requests as per MSC3882 + "org.matrix.msc3882": self.config.experimental.msc3882_enabled, + # Adds support for remotely enabling/disabling pushers, as per MSC3881 + "org.matrix.msc3881": self.config.experimental.msc3881_enabled, }, }, ) diff --git a/synapse/state/__init__.py b/synapse/state/__init__.py index 3787d35b244f..6f3dd0463e66 100644 --- a/synapse/state/__init__.py +++ b/synapse/state/__init__.py @@ -420,6 +420,69 @@ async def compute_event_context( partial_state=partial_state, ) + async def compute_event_context_for_batched( + self, + event: EventBase, + state_ids_before_event: StateMap[str], + current_state_group: int, + ) -> EventContext: + """ + Generate an event context for an event that has not yet been persisted to the + database. Intended for use with events that are created to be persisted in a batch. + Args: + event: the event the context is being computed for + state_ids_before_event: a state map consisting of the state ids of the events + created prior to this event. + current_state_group: the current state group before the event. + """ + state_group_before_event_prev_group = None + deltas_to_state_group_before_event = None + + state_group_before_event = current_state_group + + # if the event is not state, we are set + if not event.is_state(): + return EventContext.with_state( + storage=self._storage_controllers, + state_group_before_event=state_group_before_event, + state_group=state_group_before_event, + state_delta_due_to_event={}, + prev_group=state_group_before_event_prev_group, + delta_ids=deltas_to_state_group_before_event, + partial_state=False, + ) + + # otherwise, we'll need to create a new state group for after the event + key = (event.type, event.state_key) + + if state_ids_before_event is not None: + replaces = state_ids_before_event.get(key) + + if replaces and replaces != event.event_id: + event.unsigned["replaces_state"] = replaces + + delta_ids = {key: event.event_id} + + state_group_after_event = ( + await self._state_storage_controller.store_state_group( + event.event_id, + event.room_id, + prev_group=state_group_before_event, + delta_ids=delta_ids, + current_state_ids=None, + ) + ) + + return EventContext.with_state( + storage=self._storage_controllers, + state_group=state_group_after_event, + state_group_before_event=state_group_before_event, + state_delta_due_to_event=delta_ids, + prev_group=state_group_before_event, + delta_ids=delta_ids, + partial_state=False, + ) + @measure_func() async def resolve_state_groups_for_events( self, room_id: str, event_ids: Collection[str], await_full_state: bool = True diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index 37271f91836a..3fed24331301 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -15,12 +15,13 @@ # limitations under the License. import logging from abc import ABCMeta -from typing import TYPE_CHECKING, Any, Collection, Iterable, Optional, Union +from typing import TYPE_CHECKING, Any, Collection, Dict, Iterable, Optional, Union from synapse.storage.database import make_in_list_sql_clause # noqa: F401; noqa: F401 from synapse.storage.database import DatabasePool, LoggingDatabaseConnection from synapse.types import get_domain_from_id from synapse.util import json_decoder +from synapse.util.caches.descriptors import CachedFunction if TYPE_CHECKING: from synapse.server import HomeServer @@ -49,6 +50,8 @@ def __init__( # Beeper: setup the external caches self._enable_external_caches() + self.external_cached_functions: Dict[str, CachedFunction] = {} + def process_replication_rows( self, stream_name: str, @@ -101,7 +104,7 @@ def _invalidate_state_caches( def _attempt_to_invalidate_cache( self, cache_name: str, key: Optional[Collection[Any]] - ) -> None: + ) -> bool: """Attempts to invalidate the cache of the given name, ignoring if the cache doesn't exist. Mainly used for invalidating caches on workers, where they may not have the cache. @@ -119,9 +122,12 @@ def _attempt_to_invalidate_cache( try: cache = getattr(self, cache_name) except AttributeError: - # We probably haven't pulled in the cache in this worker, - # which is fine. - return + # Check if an externally defined module cache has been registered + cache = self.external_cached_functions.get(cache_name) + if not cache: + # We probably haven't pulled in the cache in this worker, + # which is fine. + return False if key is None: cache.invalidate_all() @@ -131,6 +137,13 @@ def _attempt_to_invalidate_cache( invalidate_method = getattr(cache, "invalidate_local", cache.invalidate) invalidate_method(tuple(key)) + return True + + def register_external_cached_function( + self, cache_name: str, func: CachedFunction + ) -> None: + self.external_cached_functions[cache_name] = func + # Beeper: externalised caches in Redis # It is *critical* that all cache invalidations happe here, something we'll have to keep # an eye on when merging upstream changes (until we upstream it!). diff --git a/synapse/storage/background_updates.py b/synapse/storage/background_updates.py index bf5e7ee7be7e..2056ecb2c331 100644 --- a/synapse/storage/background_updates.py +++ b/synapse/storage/background_updates.py @@ -285,7 +285,10 @@ async def run_background_updates(self, sleep: bool) -> None: back_to_back_failures = 0 try: - logger.info("Starting background schema updates") + logger.info( + "Starting background schema updates for database %s", + self._database_name, + ) while self.enabled: try: result = await self.do_next_background_update(sleep) diff --git a/synapse/storage/controllers/persist_events.py b/synapse/storage/controllers/persist_events.py index 501dbbc99011..06e71a8053c7 100644 --- a/synapse/storage/controllers/persist_events.py +++ b/synapse/storage/controllers/persist_events.py @@ -423,16 +423,18 @@ async def enqueue( for d in ret_vals: replaced_events.update(d) - events = [] + persisted_events = [] for event, _ in events_and_contexts: existing_event_id = replaced_events.get(event.event_id) if existing_event_id: - events.append(await self.main_store.get_event(existing_event_id)) + persisted_events.append( + await self.main_store.get_event(existing_event_id) + ) else: - events.append(event) + persisted_events.append(event) return ( - events, + persisted_events, self.main_store.get_room_max_token(), ) @@ -598,11 +600,6 @@ async def _persist_event_batch( # room state_delta_for_room: Dict[str, DeltaState] = {} - # Set of remote users which were in rooms the server has left or who may - # have left rooms the server is in. We should check if we still share any - # rooms and if not we mark their device lists as stale. - potentially_left_users: Set[str] = set() - if not backfilled: with Measure(self._clock, "_calculate_state_and_extrem"): # Work out the new "current state" for each room. @@ -716,8 +713,6 @@ async def _persist_event_batch( room_id, ev_ctx_rm, delta, - current_state, - potentially_left_users, ) if not is_still_joined: logger.info("Server no longer in room %s", room_id) @@ -725,20 +720,6 @@ async def _persist_event_batch( current_state = {} delta.no_longer_in_room = True - # Add all remote users that might have left rooms. - potentially_left_users.update( - user_id - for event_type, user_id in delta.to_delete - if event_type == EventTypes.Member - and not self.is_mine_id(user_id) - ) - potentially_left_users.update( - user_id - for event_type, user_id in delta.to_insert.keys() - if event_type == EventTypes.Member - and not self.is_mine_id(user_id) - ) - state_delta_for_room[room_id] = delta await self.persist_events_store._persist_events_and_state_updates( @@ -749,8 +730,6 @@ async def _persist_event_batch( inhibit_local_membership_updates=backfilled, ) - await self._handle_potentially_left_users(potentially_left_users) - return replaced_events async def _calculate_new_extremities( @@ -1126,8 +1105,6 @@ async def _is_server_still_joined( room_id: str, ev_ctx_rm: List[Tuple[EventBase, EventContext]], delta: DeltaState, - current_state: Optional[StateMap[str]], - potentially_left_users: Set[str], ) -> bool: """Check if the server will still be joined after the given events have been persised. @@ -1137,11 +1114,6 @@ async def _is_server_still_joined( ev_ctx_rm delta: The delta of current state between what is in the database and what the new current state will be. - current_state: The new current state if it already been calculated, - otherwise None. - potentially_left_users: If the server has left the room, then joined - remote users will be added to this set to indicate that the - server may no longer be sharing a room with them. """ if not any( @@ -1195,45 +1167,4 @@ async def _is_server_still_joined( ): return True - # The server will leave the room, so we go and find out which remote - # users will still be joined when we leave. - if current_state is None: - current_state = await self.main_store.get_partial_current_state_ids(room_id) - current_state = dict(current_state) - for key in delta.to_delete: - current_state.pop(key, None) - - current_state.update(delta.to_insert) - - remote_event_ids = [ - event_id - for ( - typ, - state_key, - ), event_id in current_state.items() - if typ == EventTypes.Member and not self.is_mine_id(state_key) - ] - members = await self.main_store.get_membership_from_event_ids(remote_event_ids) - potentially_left_users.update( - member.user_id - for member in members.values() - if member and member.membership == Membership.JOIN - ) - return False - - async def _handle_potentially_left_users(self, user_ids: Set[str]) -> None: - """Given a set of remote users check if the server still shares a room with - them. If not then mark those users' device cache as stale. - """ - - if not user_ids: - return - - joined_users = await self.main_store.get_users_server_still_shares_room_with( - user_ids - ) - left_users = user_ids - joined_users - - for user_id in left_users: - await self.main_store.mark_remote_user_device_list_as_unsubscribed(user_id) diff --git a/synapse/storage/controllers/state.py b/synapse/storage/controllers/state.py index bbe568bf053e..2b31ce54bb75 100644 --- a/synapse/storage/controllers/state.py +++ b/synapse/storage/controllers/state.py @@ -23,6 +23,7 @@ List, Mapping, Optional, + Set, Tuple, ) @@ -406,6 +407,7 @@ async def get_current_state_ids( self, room_id: str, state_filter: Optional[StateFilter] = None, + await_full_state: bool = True, on_invalidate: Optional[Callable[[], None]] = None, ) -> StateMap[str]: """Get the current state event ids for a room based on the @@ -418,13 +420,17 @@ async def get_current_state_ids( room_id: The room to get the state IDs of. state_filter: The state filter used to fetch state from the database. + await_full_state: if true, will block if we do not yet have complete + state for the room. on_invalidate: Callback for when the `get_current_state_ids` cache for the room gets invalidated. Returns: The current state of the room. """ - if not state_filter or state_filter.must_await_full_state(self._is_mine_id): + if await_full_state and ( + not state_filter or state_filter.must_await_full_state(self._is_mine_id) + ): await self._partial_state_room_tracker.await_full_state(room_id) if state_filter and not state_filter.is_full(): @@ -523,13 +529,60 @@ async def get_current_state_event( ) return state_map.get(key) - async def get_current_hosts_in_room(self, room_id: str) -> List[str]: - """Get current hosts in room based on current state.""" + async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: + """Get current hosts in room based on current state. + + Blocks until we have full state for the given room. This only happens for rooms + with partial state. + """ await self._partial_state_room_tracker.await_full_state(room_id) return await self.stores.main.get_current_hosts_in_room(room_id) + async def get_current_hosts_in_room_ordered(self, room_id: str) -> List[str]: + """Get current hosts in room based on current state. + + Blocks until we have full state for the given room. This only happens for rooms + with partial state. + + Returns: + A list of hosts in the room, sorted by longest in the room first. (aka. + sorted by join with the lowest depth first). + """ + + await self._partial_state_room_tracker.await_full_state(room_id) + + return await self.stores.main.get_current_hosts_in_room_ordered(room_id) + + async def get_current_hosts_in_room_or_partial_state_approximation( + self, room_id: str + ) -> Collection[str]: + """Get approximation of current hosts in room based on current state. + + For rooms with full state, this is equivalent to `get_current_hosts_in_room`, + with the same order of results. + + For rooms with partial state, no blocking occurs. Instead, the list of hosts + in the room at the time of joining is combined with the list of hosts which + joined the room afterwards. The returned list may include hosts that are not + actually in the room and exclude hosts that are in the room, since we may + calculate state incorrectly during the partial state phase. The order of results + is arbitrary for rooms with partial state. + """ + # We have to read this list first to mitigate races with un-partial stating. + # This will be empty for rooms with full state. + hosts_at_join = await self.stores.main.get_partial_state_servers_at_join( + room_id + ) + + hosts_from_state = await self.stores.main.get_current_hosts_in_room(room_id) + + hosts = set(hosts_at_join) + hosts.update(hosts_from_state) + + return hosts + async def get_users_in_room_with_profiles( self, room_id: str ) -> Dict[str, ProfileInfo]: diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 921cd4dc5ee0..b4469eb96493 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -95,6 +95,8 @@ "local_media_repository_thumbnails": "local_media_repository_thumbnails_method_idx", "remote_media_cache_thumbnails": "remote_media_repository_thumbnails_method_idx", "event_push_summary": "event_push_summary_unique_index", + "receipts_linearized": "receipts_linearized_unique_index", + "receipts_graph": "receipts_graph_unique_index", } @@ -288,8 +290,7 @@ def call_after( # LoggingTransaction isn't expecting there to be any callbacks; assert that # is not the case. assert self.after_callbacks is not None - # type-ignore: need mypy containing https://github.com/python/mypy/pull/12668 - self.after_callbacks.append((callback, args, kwargs)) # type: ignore[arg-type] + self.after_callbacks.append((callback, args, kwargs)) def async_call_after( self, callback: Callable[P, Awaitable], *args: P.args, **kwargs: P.kwargs @@ -310,8 +311,7 @@ def async_call_after( # LoggingTransaction isn't expecting there to be any callbacks; assert that # is not the case. assert self.async_after_callbacks is not None - # type-ignore: need mypy containing https://github.com/python/mypy/pull/12668 - self.async_after_callbacks.append((callback, args, kwargs)) # type: ignore[arg-type] + self.async_after_callbacks.append((callback, args, kwargs)) def call_on_exception( self, callback: Callable[P, object], *args: P.args, **kwargs: P.kwargs @@ -329,8 +329,7 @@ def call_on_exception( # LoggingTransaction isn't expecting there to be any callbacks; assert that # is not the case. assert self.exception_callbacks is not None - # type-ignore: need mypy containing https://github.com/python/mypy/pull/12668 - self.exception_callbacks.append((callback, args, kwargs)) # type: ignore[arg-type] + self.exception_callbacks.append((callback, args, kwargs)) def fetchone(self) -> Optional[Tuple]: return self.txn.fetchone() @@ -391,6 +390,14 @@ def execute(self, sql: str, *args: Any) -> None: def executemany(self, sql: str, *args: Any) -> None: self._do_execute(self.txn.executemany, sql, *args) + def executescript(self, sql: str) -> None: + if isinstance(self.database_engine, Sqlite3Engine): + self._do_execute(self.txn.executescript, sql) # type: ignore[attr-defined] + else: + raise NotImplementedError( + f"executescript only exists for sqlite driver, not {type(self.database_engine)}" + ) + def _make_sql_one_line(self, sql: str) -> str: "Strip newlines out of SQL so that the loggers in the DB are on one line" return " ".join(line.strip() for line in sql.splitlines() if line.strip()) @@ -411,10 +418,7 @@ def _do_execute( sql = self.database_engine.convert_param_style(sql) if args: try: - # The type-ignore should be redundant once mypy releases a version with - # https://github.com/python/mypy/pull/12668. (`args` might be empty, - # (but we'll catch the index error if so.) - sql_logger.debug("[SQL values] {%s} %r", self.name, args[0]) # type: ignore[index] + sql_logger.debug("[SQL values] {%s} %r", self.name, args[0]) except Exception: # Don't let logging failures stop SQL from working pass @@ -645,9 +649,7 @@ def new_transaction( # For now, we just log an error, and hope that it works on the first attempt. # TODO: raise an exception. - # Type-ignore Mypy doesn't yet consider ParamSpec.args to be iterable; see - # https://github.com/python/mypy/pull/12668 - for i, arg in enumerate(args): # type: ignore[arg-type, var-annotated] + for i, arg in enumerate(args): if inspect.isgenerator(arg): logger.error( "Programming error: generator passed to new_transaction as " @@ -655,9 +657,7 @@ def new_transaction( i, func, ) - # Type-ignore Mypy doesn't yet consider ParamSpec.args to be a mapping; see - # https://github.com/python/mypy/pull/12668 - for name, val in kwargs.items(): # type: ignore[attr-defined] + for name, val in kwargs.items(): if inspect.isgenerator(val): logger.error( "Programming error: generator passed to new_transaction as " @@ -1131,17 +1131,57 @@ async def simple_upsert( desc: str = "simple_upsert", lock: bool = True, ) -> bool: - """ + """Insert a row with values + insertion_values; on conflict, update with values. + + All of our supported databases accept the nonstandard "upsert" statement in + their dialect of SQL. We call this a "native upsert". The syntax looks roughly + like: + + INSERT INTO table VALUES (values + insertion_values) + ON CONFLICT (keyvalues) + DO UPDATE SET (values); -- overwrite `values` columns only + + If (values) is empty, the resulting query is slighlty simpler: + + INSERT INTO table VALUES (insertion_values) + ON CONFLICT (keyvalues) + DO NOTHING; -- do not overwrite any columns - `lock` should generally be set to True (the default), but can be set - to False if either of the following are true: - 1. there is a UNIQUE INDEX on the key columns. In this case a conflict - will cause an IntegrityError in which case this function will retry - the update. - 2. we somehow know that we are the only thread which will be updating - this table. - As an additional note, this parameter only matters for old SQLite versions - because we will use native upserts otherwise. + This function is a helper to build such queries. + + In order for upserts to make sense, the database must be able to determine when + an upsert CONFLICTs with an existing row. Postgres and SQLite ensure this by + requiring that a unique index exist on the column names used to detect a + conflict (i.e. `keyvalues.keys()`). + + If there is no such index, we can "emulate" an upsert with a SELECT followed + by either an INSERT or an UPDATE. This is unsafe: we cannot make the same + atomicity guarantees that a native upsert can and are very vulnerable to races + and crashes. Therefore if we wish to upsert without an appropriate unique index, + we must either: + + 1. Acquire a table-level lock before the emulated upsert (`lock=True`), or + 2. VERY CAREFULLY ensure that we are the only thread and worker which will be + writing to this table, in which case we can proceed without a lock + (`lock=False`). + + Generally speaking, you should use `lock=True`. If the table in question has a + unique index[*], this class will use a native upsert (which is atomic and so can + ignore the `lock` argument). Otherwise this class will use an emulated upsert, + in which case we want the safer option unless we been VERY CAREFUL. + + [*]: Some tables have unique indices added to them in the background. Those + tables `T` are keys in the dictionary UNIQUE_INDEX_BACKGROUND_UPDATES, + where `T` maps to the background update that adds a unique index to `T`. + This dictionary is maintained by hand. + + At runtime, we constantly check to see if each of these background updates + has run. If so, we deem the coresponding table safe to upsert into, because + we can now use a native insert to do so. If not, we deem the table unsafe + to upsert into and require an emulated upsert. + + Tables that do not appear in this dictionary are assumed to have an + appropriate unique index and therefore be safe to upsert into. Args: table: The table to upsert into @@ -2421,6 +2461,66 @@ def make_in_list_sql_clause( return "%s IN (%s)" % (column, ",".join("?" for _ in iterable)), list(iterable) +# These overloads ensure that `columns` and `iterable` values have the same length. +# Suppress "Single overload definition, multiple required" complaint. +@overload # type: ignore[misc] +def make_tuple_in_list_sql_clause( + database_engine: BaseDatabaseEngine, + columns: Tuple[str, str], + iterable: Collection[Tuple[Any, Any]], +) -> Tuple[str, list]: + ... + + +def make_tuple_in_list_sql_clause( + database_engine: BaseDatabaseEngine, + columns: Tuple[str, ...], + iterable: Collection[Tuple[Any, ...]], +) -> Tuple[str, list]: + """Returns an SQL clause that checks the given tuple of columns is in the iterable. + + Args: + database_engine + columns: Names of the columns in the tuple. + iterable: The tuples to check the columns against. + + Returns: + A tuple of SQL query and the args + """ + if len(columns) == 0: + # Should be unreachable due to mypy, as long as the overloads are set up right. + if () in iterable: + return "TRUE", [] + else: + return "FALSE", [] + + if len(columns) == 1: + # Use `= ANY(?)` on postgres. + return make_in_list_sql_clause( + database_engine, next(iter(columns)), [values[0] for values in iterable] + ) + + # There are multiple columns. Avoid using an `= ANY(?)` clause on postgres, as + # indices are not used when there are multiple columns. Instead, use an `IN` + # expression. + # + # `IN ((?, ...), ...)` with tuples is supported by postgres only, whereas + # `IN (VALUES (?, ...), ...)` is supported by both sqlite and postgres. + # Thus, the latter is chosen. + + if len(iterable) == 0: + # A 0-length `VALUES` list is not allowed in sqlite or postgres. + # Also note that a 0-length `IN (...)` clause (not using `VALUES`) is not + # allowed in postgres. + return "FALSE", [] + + tuple_sql = "(%s)" % (",".join("?" for _ in columns),) + return "(%s) IN (VALUES %s)" % ( + ",".join(column for column in columns), + ",".join(tuple_sql for _ in iterable), + ), [value for values in iterable for value in values] + + KV = TypeVar("KV") diff --git a/synapse/storage/databases/main/__init__.py b/synapse/storage/databases/main/__init__.py index 012648410ee3..0fab9e31afd5 100644 --- a/synapse/storage/databases/main/__init__.py +++ b/synapse/storage/databases/main/__init__.py @@ -83,6 +83,7 @@ class DataStore( EventsBackgroundUpdatesStore, + DeviceStore, RoomMemberStore, RoomStore, RoomBatchStore, @@ -114,7 +115,6 @@ class DataStore( StreamWorkerStore, OpenIdStore, ClientIpWorkerStore, - DeviceStore, DeviceInboxStore, UserDirectoryStore, UserErasureStore, @@ -203,6 +203,7 @@ async def get_users_paginate( deactivated: bool = False, order_by: str = UserSortOrder.USER_ID.value, direction: str = "f", + approved: bool = True, appservice: Optional[str] = None, ) -> Tuple[List[JsonDict], int]: """Function to retrieve a paginated list of users from @@ -218,6 +219,7 @@ async def get_users_paginate( deactivated: whether to include deactivated users order_by: the sort order of the returned list direction: sort ascending or descending + approved: whether to include approved users Returns: A tuple of a list of mappings from user to information and a count of total users. """ @@ -255,6 +257,10 @@ def get_users_paginate_txn( elif appservice: filters.append("appservice_id = ?") args.append(appservice) + if not approved: + # We ignore NULL values for the approved flag because these should only + # be already existing users that we consider as already approved. + filters.append("approved IS FALSE") where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else "" @@ -269,7 +275,7 @@ def get_users_paginate_txn( sql = f""" SELECT name, user_type, is_guest, admin, deactivated, shadow_banned, - displayname, avatar_url, creation_ts * 1000 as creation_ts, appservice_id + displayname, avatar_url, creation_ts * 1000 as creation_ts, approved, appservice_id {sql_base} ORDER BY {order_by_column} {order}, u.name ASC LIMIT ? OFFSET ? diff --git a/synapse/storage/databases/main/cache.py b/synapse/storage/databases/main/cache.py index abaae0cbf914..b9a6c8206735 100644 --- a/synapse/storage/databases/main/cache.py +++ b/synapse/storage/databases/main/cache.py @@ -33,7 +33,7 @@ ) from synapse.storage.engines import PostgresEngine from synapse.storage.util.id_generators import MultiWriterIdGenerator -from synapse.util.caches.descriptors import _CachedFunction +from synapse.util.caches.descriptors import CachedFunction from synapse.util.iterutils import batch_iter if TYPE_CHECKING: @@ -224,15 +224,16 @@ def _invalidate_caches_for_event( # process triggering the invalidation is responsible for clearing any external # cached objects. self._invalidate_local_get_event_cache(event_id) - self.have_seen_event.invalidate((room_id, event_id)) - self.get_latest_event_ids_in_room.invalidate((room_id,)) - - self.get_unread_event_push_actions_by_room_for_user.invalidate((room_id,)) + self._attempt_to_invalidate_cache("have_seen_event", (room_id, event_id)) + self._attempt_to_invalidate_cache("get_latest_event_ids_in_room", (room_id,)) + self._attempt_to_invalidate_cache( + "get_unread_event_push_actions_by_room_for_user", (room_id,) + ) # The `_get_membership_from_event_id` is immutable, except for the # case where we look up an event *before* persisting it. - self._get_membership_from_event_id.invalidate((event_id,)) + self._attempt_to_invalidate_cache("_get_membership_from_event_id", (event_id,)) if not backfilled: self._events_stream_cache.entity_has_changed(room_id, stream_ordering) @@ -241,21 +242,32 @@ def _invalidate_caches_for_event( self._invalidate_local_get_event_cache(redacts) # Caches which might leak edits must be invalidated for the event being # redacted. - self.get_relations_for_event.invalidate((redacts,)) - self.get_applicable_edit.invalidate((redacts,)) + self._attempt_to_invalidate_cache("get_relations_for_event", (redacts,)) + self._attempt_to_invalidate_cache("get_applicable_edit", (redacts,)) if etype == EventTypes.Member: self._membership_stream_cache.entity_has_changed(state_key, stream_ordering) - self.get_invited_rooms_for_local_user.invalidate((state_key,)) - self.get_rooms_for_user_with_stream_ordering.invalidate((state_key,)) - self.get_rooms_for_user.invalidate((state_key,)) + self._attempt_to_invalidate_cache( + "get_invited_rooms_for_local_user", (state_key,) + ) + self._attempt_to_invalidate_cache( + "get_rooms_for_user_with_stream_ordering", (state_key,) + ) + self._attempt_to_invalidate_cache( + "get_rooms_for_user", (state_key,) + ) if relates_to: - self.get_relations_for_event.invalidate((relates_to,)) - self.get_aggregation_groups_for_event.invalidate((relates_to,)) - self.get_applicable_edit.invalidate((relates_to,)) - self.get_thread_summary.invalidate((relates_to,)) - self.get_thread_participated.invalidate((relates_to,)) + self._attempt_to_invalidate_cache("get_relations_for_event", (relates_to,)) + self._attempt_to_invalidate_cache( + "get_aggregation_groups_for_event", (relates_to,) + ) + self._attempt_to_invalidate_cache("get_applicable_edit", (relates_to,)) + self._attempt_to_invalidate_cache("get_thread_summary", (relates_to,)) + self._attempt_to_invalidate_cache("get_thread_participated", (relates_to,)) + self._attempt_to_invalidate_cache( + "get_mutual_event_relations_for_rel_type", (relates_to,) + ) async def invalidate_cache_and_stream( self, cache_name: str, keys: Tuple[Any, ...] @@ -272,9 +284,7 @@ async def invalidate_cache_and_stream( return cache_func.invalidate(keys) - await self.db_pool.runInteraction( - "invalidate_cache_and_stream", - self._send_invalidation_to_replication, + await self.send_invalidation_to_replication( cache_func.__name__, keys, ) @@ -282,7 +292,7 @@ async def invalidate_cache_and_stream( def _invalidate_cache_and_stream( self, txn: LoggingTransaction, - cache_func: _CachedFunction, + cache_func: CachedFunction, keys: Tuple[Any, ...], ) -> None: """Invalidates the cache and adds it to the cache stream so slaves @@ -296,7 +306,7 @@ def _invalidate_cache_and_stream( self._send_invalidation_to_replication(txn, cache_func.__name__, keys) def _invalidate_all_cache_and_stream( - self, txn: LoggingTransaction, cache_func: _CachedFunction + self, txn: LoggingTransaction, cache_func: CachedFunction ) -> None: """Invalidates the entire cache and adds it to the cache stream so slaves will know to invalidate their caches. @@ -340,6 +350,16 @@ def _invalidate_state_caches_and_stream( txn, CURRENT_STATE_CACHE_NAME, [room_id] ) + async def send_invalidation_to_replication( + self, cache_name: str, keys: Optional[Collection[Any]] + ) -> None: + await self.db_pool.runInteraction( + "send_invalidation_to_replication", + self._send_invalidation_to_replication, + cache_name, + keys, + ) + def _send_invalidation_to_replication( self, txn: LoggingTransaction, cache_name: str, keys: Optional[Iterable[Any]] ) -> None: diff --git a/synapse/storage/databases/main/devices.py b/synapse/storage/databases/main/devices.py index 5d700ca6c307..18358eca467e 100644 --- a/synapse/storage/databases/main/devices.py +++ b/synapse/storage/databases/main/devices.py @@ -47,6 +47,7 @@ make_tuple_comparison_clause, ) from synapse.storage.databases.main.end_to_end_keys import EndToEndKeyWorkerStore +from synapse.storage.databases.main.roommember import RoomMemberWorkerStore from synapse.storage.types import Cursor from synapse.types import JsonDict, get_verify_key_from_cross_signing_key from synapse.util import json_decoder, json_encoder @@ -70,7 +71,7 @@ BG_UPDATE_REMOVE_DUP_OUTBOUND_POKES = "remove_dup_outbound_pokes" -class DeviceWorkerStore(EndToEndKeyWorkerStore): +class DeviceWorkerStore(RoomMemberWorkerStore, EndToEndKeyWorkerStore): def __init__( self, database: DatabasePool, @@ -985,24 +986,59 @@ async def mark_remote_user_device_cache_as_valid(self, user_id: str) -> None: desc="mark_remote_user_device_cache_as_valid", ) + async def handle_potentially_left_users(self, user_ids: Set[str]) -> None: + """Given a set of remote users check if the server still shares a room with + them. If not then mark those users' device cache as stale. + """ + + if not user_ids: + return + + await self.db_pool.runInteraction( + "_handle_potentially_left_users", + self.handle_potentially_left_users_txn, + user_ids, + ) + + def handle_potentially_left_users_txn( + self, + txn: LoggingTransaction, + user_ids: Set[str], + ) -> None: + """Given a set of remote users check if the server still shares a room with + them. If not then mark those users' device cache as stale. + """ + + if not user_ids: + return + + joined_users = self.get_users_server_still_shares_room_with_txn(txn, user_ids) + left_users = user_ids - joined_users + + for user_id in left_users: + self.mark_remote_user_device_list_as_unsubscribed_txn(txn, user_id) + async def mark_remote_user_device_list_as_unsubscribed(self, user_id: str) -> None: """Mark that we no longer track device lists for remote user.""" - def _mark_remote_user_device_list_as_unsubscribed_txn( - txn: LoggingTransaction, - ) -> None: - self.db_pool.simple_delete_txn( - txn, - table="device_lists_remote_extremeties", - keyvalues={"user_id": user_id}, - ) - self._invalidate_cache_and_stream( - txn, self.get_device_list_last_stream_id_for_remote, (user_id,) - ) - await self.db_pool.runInteraction( "mark_remote_user_device_list_as_unsubscribed", - _mark_remote_user_device_list_as_unsubscribed_txn, + self.mark_remote_user_device_list_as_unsubscribed_txn, + user_id, + ) + + def mark_remote_user_device_list_as_unsubscribed_txn( + self, + txn: LoggingTransaction, + user_id: str, + ) -> None: + self.db_pool.simple_delete_txn( + txn, + table="device_lists_remote_extremeties", + keyvalues={"user_id": user_id}, + ) + self._invalidate_cache_and_stream( + txn, self.get_device_list_last_stream_id_for_remote, (user_id,) ) async def get_dehydrated_device( @@ -1271,6 +1307,33 @@ def _get_device_list_changes_in_rooms_txn( return changes + async def get_device_list_changes_in_room( + self, room_id: str, min_stream_id: int + ) -> Collection[Tuple[str, str]]: + """Get all device list changes that happened in the room since the given + stream ID. + + Returns: + Collection of user ID/device ID tuples of all devices that have + changed + """ + + sql = """ + SELECT DISTINCT user_id, device_id FROM device_lists_changes_in_room + WHERE room_id = ? AND stream_id > ? + """ + + def get_device_list_changes_in_room_txn( + txn: LoggingTransaction, + ) -> Collection[Tuple[str, str]]: + txn.execute(sql, (room_id, min_stream_id)) + return cast(Collection[Tuple[str, str]], txn.fetchall()) + + return await self.db_pool.runInteraction( + "get_device_list_changes_in_room", + get_device_list_changes_in_room_txn, + ) + class DeviceBackgroundUpdateStore(SQLBaseStore): def __init__( @@ -1910,14 +1973,15 @@ async def add_device_list_outbound_pokes( user_id: str, device_id: str, room_id: str, - stream_id: int, + stream_id: Optional[int], hosts: Collection[str], context: Optional[Dict[str, str]], ) -> None: """Queue the device update to be sent to the given set of hosts, calculated from the room ID. - Marks the associated row in `device_lists_changes_in_room` as handled. + Marks the associated row in `device_lists_changes_in_room` as handled, + if `stream_id` is provided. """ def add_device_list_outbound_pokes_txn( @@ -1933,17 +1997,18 @@ def add_device_list_outbound_pokes_txn( context=context, ) - self.db_pool.simple_update_txn( - txn, - table="device_lists_changes_in_room", - keyvalues={ - "user_id": user_id, - "device_id": device_id, - "stream_id": stream_id, - "room_id": room_id, - }, - updatevalues={"converted_to_destinations": True}, - ) + if stream_id: + self.db_pool.simple_update_txn( + txn, + table="device_lists_changes_in_room", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + "stream_id": stream_id, + "room_id": room_id, + }, + updatevalues={"converted_to_destinations": True}, + ) if not hosts: # If there are no hosts then we don't try and generate stream IDs. @@ -1959,3 +2024,58 @@ def add_device_list_outbound_pokes_txn( add_device_list_outbound_pokes_txn, stream_ids, ) + + async def add_remote_device_list_to_pending( + self, user_id: str, device_id: str + ) -> None: + """Add a device list update to the table tracking remote device list + updates during partial joins. + """ + + async with self._device_list_id_gen.get_next() as stream_id: # type: ignore[attr-defined] + await self.db_pool.simple_upsert( + table="device_lists_remote_pending", + keyvalues={ + "user_id": user_id, + "device_id": device_id, + }, + values={"stream_id": stream_id}, + desc="add_remote_device_list_to_pending", + ) + + async def get_pending_remote_device_list_updates_for_room( + self, room_id: str + ) -> Collection[Tuple[str, str]]: + """Get the set of remote device list updates from the pending table for + the room. + """ + + min_device_stream_id = await self.db_pool.simple_select_one_onecol( + table="partial_state_rooms", + keyvalues={ + "room_id": room_id, + }, + retcol="device_lists_stream_id", + desc="get_pending_remote_device_list_updates_for_room_device", + ) + + sql = """ + SELECT user_id, device_id FROM device_lists_remote_pending AS d + INNER JOIN current_state_events AS c ON + type = 'm.room.member' + AND state_key = user_id + AND membership = 'join' + WHERE + room_id = ? AND stream_id > ? + """ + + def get_pending_remote_device_list_updates_for_room_txn( + txn: LoggingTransaction, + ) -> Collection[Tuple[str, str]]: + txn.execute(sql, (room_id, min_device_stream_id)) + return cast(Collection[Tuple[str, str]], txn.fetchall()) + + return await self.db_pool.runInteraction( + "get_pending_remote_device_list_updates_for_room", + get_pending_remote_device_list_updates_for_room_txn, + ) diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 8e9e1b0b4b41..8a10ae800c3d 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -43,6 +43,7 @@ LoggingDatabaseConnection, LoggingTransaction, make_in_list_sql_clause, + make_tuple_in_list_sql_clause, ) from synapse.storage.databases.main.cache import CacheInvalidationWorkerStore from synapse.storage.engines import PostgresEngine @@ -278,7 +279,7 @@ async def get_e2e_device_keys_and_signatures( def _get_e2e_device_keys_txn( self, txn: LoggingTransaction, - query_list: Collection[Tuple[str, str]], + query_list: Collection[Tuple[str, Optional[str]]], include_all_devices: bool = False, include_deleted_devices: bool = False, ) -> Dict[str, Dict[str, Optional[DeviceKeyLookupResult]]]: @@ -288,8 +289,8 @@ def _get_e2e_device_keys_txn( cross-signing signatures which have been added subsequently (for which, see get_e2e_device_keys_and_signatures) """ - query_clauses = [] - query_params = [] + query_clauses: List[str] = [] + query_params_list: List[List[object]] = [] if include_all_devices is False: include_deleted_devices = False @@ -297,40 +298,64 @@ def _get_e2e_device_keys_txn( if include_deleted_devices: deleted_devices = set(query_list) + # Split the query list into queries for users and queries for particular + # devices. + user_list = [] + user_device_list = [] for (user_id, device_id) in query_list: - query_clause = "user_id = ?" - query_params.append(user_id) - - if device_id is not None: - query_clause += " AND device_id = ?" - query_params.append(device_id) - - query_clauses.append(query_clause) - - sql = ( - "SELECT user_id, device_id, " - " d.display_name, " - " k.key_json" - " FROM devices d" - " %s JOIN e2e_device_keys_json k USING (user_id, device_id)" - " WHERE %s AND NOT d.hidden" - ) % ( - "LEFT" if include_all_devices else "INNER", - " OR ".join("(" + q + ")" for q in query_clauses), - ) + if device_id is None: + user_list.append(user_id) + else: + user_device_list.append((user_id, device_id)) - txn.execute(sql, query_params) + if user_list: + user_id_in_list_clause, user_args = make_in_list_sql_clause( + txn.database_engine, "user_id", user_list + ) + query_clauses.append(user_id_in_list_clause) + query_params_list.append(user_args) + + if user_device_list: + # Divide the device queries into batches, to avoid excessively large + # queries. + for user_device_batch in batch_iter(user_device_list, 1024): + ( + user_device_id_in_list_clause, + user_device_args, + ) = make_tuple_in_list_sql_clause( + txn.database_engine, ("user_id", "device_id"), user_device_batch + ) + query_clauses.append(user_device_id_in_list_clause) + query_params_list.append(user_device_args) result: Dict[str, Dict[str, Optional[DeviceKeyLookupResult]]] = {} - for (user_id, device_id, display_name, key_json) in txn: - if include_deleted_devices: - deleted_devices.remove((user_id, device_id)) - result.setdefault(user_id, {})[device_id] = DeviceKeyLookupResult( - display_name, db_to_json(key_json) if key_json else None + for query_clause, query_params in zip(query_clauses, query_params_list): + sql = ( + "SELECT user_id, device_id, " + " d.display_name, " + " k.key_json" + " FROM devices d" + " %s JOIN e2e_device_keys_json k USING (user_id, device_id)" + " WHERE %s AND NOT d.hidden" + ) % ( + "LEFT" if include_all_devices else "INNER", + query_clause, ) + txn.execute(sql, query_params) + + for (user_id, device_id, display_name, key_json) in txn: + assert device_id is not None + if include_deleted_devices: + deleted_devices.remove((user_id, device_id)) + result.setdefault(user_id, {})[device_id] = DeviceKeyLookupResult( + display_name, db_to_json(key_json) if key_json else None + ) + if include_deleted_devices: for user_id, device_id in deleted_devices: + if device_id is None: + continue result.setdefault(user_id, {})[device_id] = None return result diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index ef477978ed63..6b9a629eddb9 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import datetime import itertools import logging from queue import Empty, PriorityQueue @@ -43,7 +44,7 @@ ) from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.signatures import SignatureWorkerStore -from synapse.storage.engines import PostgresEngine +from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.types import JsonDict from synapse.util import json_encoder from synapse.util.caches.descriptors import cached @@ -72,6 +73,30 @@ logger = logging.getLogger(__name__) +# Parameters controlling exponential backoff between backfill failures. +# After the first failure to backfill, we wait 2 hours before trying again. If the +# second attempt fails, we wait 4 hours before trying again. If the third attempt fails, +# we wait 8 hours before trying again, ... and so on. +# +# Each successive backoff period is twice as long as the last. However we cap this +# period at a maximum of 2^8 = 256 hours: a little over 10 days. (This is the smallest +# power of 2 which yields a maximum backoff period of at least 7 days---which was the +# original maximum backoff period.) Even when we hit this cap, we will continue to +# make backfill attempts once every 10 days. +BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS = 8 +BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_MILLISECONDS = int( + datetime.timedelta(hours=1).total_seconds() * 1000 +) + +# We need a cap on the power of 2 or else the backoff period +# 2^N * (milliseconds per hour) +# will overflow when calcuated within the database. We ensure overflow does not occur +# by checking that the largest backoff period fits in a 32-bit signed integer. +_LONGEST_BACKOFF_PERIOD_MILLISECONDS = ( + 2**BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS +) * BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_MILLISECONDS +assert 0 < _LONGEST_BACKOFF_PERIOD_MILLISECONDS <= ((2**31) - 1) + # All the info we need while iterating the DAG while backfilling @attr.s(frozen=True, slots=True, auto_attribs=True) @@ -715,96 +740,262 @@ def _get_auth_chain_difference_txn( @trace @tag_args - async def get_oldest_event_ids_with_depth_in_room( - self, room_id: str + async def get_backfill_points_in_room( + self, + room_id: str, + current_depth: int, + limit: int, ) -> List[Tuple[str, int]]: - """Gets the oldest events(backwards extremities) in the room along with the - aproximate depth. - - We use this function so that we can compare and see if someones current - depth at their current scrollback is within pagination range of the - event extremeties. If the current depth is close to the depth of given - oldest event, we can trigger a backfill. + """ + Get the backward extremities to backfill from in the room along with the + approximate depth. + + Only returns events that are at a depth lower than or + equal to the `current_depth`. Sorted by depth, highest to lowest (descending) + so the closest events to the `current_depth` are first in the list. + + We ignore extremities that are newer than the user's current scroll position + (ie, those with depth greater than `current_depth`) as: + 1. we don't really care about getting events that have happened + after our current position; and + 2. by the nature of paginating and scrolling back, we have likely + previously tried and failed to backfill from that extremity, so + to avoid getting "stuck" requesting the same backfill repeatedly + we drop those extremities. Args: room_id: Room where we want to find the oldest events + current_depth: The depth at the user's current scrollback position + limit: The max number of backfill points to return Returns: - List of (event_id, depth) tuples + List of (event_id, depth) tuples. Sorted by depth, highest to lowest + (descending) so the closest events to the `current_depth` are first + in the list. """ - def get_oldest_event_ids_with_depth_in_room_txn( + def get_backfill_points_in_room_txn( txn: LoggingTransaction, room_id: str ) -> List[Tuple[str, int]]: - # Assemble a dictionary with event_id -> depth for the oldest events + # Assemble a tuple lookup of event_id -> depth for the oldest events # we know of in the room. Backwards extremeties are the oldest # events we know of in the room but we only know of them because - # some other event referenced them by prev_event and aren't peristed - # in our database yet (meaning we don't know their depth - # specifically). So we need to look for the aproximate depth from + # some other event referenced them by prev_event and aren't + # persisted in our database yet (meaning we don't know their depth + # specifically). So we need to look for the approximate depth from # the events connected to the current backwards extremeties. - sql = """ - SELECT b.event_id, MAX(e.depth) FROM events as e + + if isinstance(self.database_engine, PostgresEngine): + least_function = "LEAST" + elif isinstance(self.database_engine, Sqlite3Engine): + least_function = "MIN" + else: + raise RuntimeError("Unknown database engine") + + sql = f""" + SELECT backward_extrem.event_id, event.depth FROM events AS event /** * Get the edge connections from the event_edges table * so we can see whether this event's prev_events points * to a backward extremity in the next join. */ - INNER JOIN event_edges as g - ON g.event_id = e.event_id + INNER JOIN event_edges AS edge + ON edge.event_id = event.event_id /** * We find the "oldest" events in the room by looking for * events connected to backwards extremeties (oldest events * in the room that we know of so far). */ - INNER JOIN event_backward_extremities as b - ON g.prev_event_id = b.event_id - WHERE b.room_id = ? AND g.is_state is ? - GROUP BY b.event_id + INNER JOIN event_backward_extremities AS backward_extrem + ON edge.prev_event_id = backward_extrem.event_id + /** + * We use this info to make sure we don't retry to use a backfill point + * if we've already attempted to backfill from it recently. + */ + LEFT JOIN event_failed_pull_attempts AS failed_backfill_attempt_info + ON + failed_backfill_attempt_info.room_id = backward_extrem.room_id + AND failed_backfill_attempt_info.event_id = backward_extrem.event_id + WHERE + backward_extrem.room_id = ? + /* We only care about non-state edges because we used to use + * `event_edges` for two different sorts of "edges" (the current + * event DAG, but also a link to the previous state, for state + * events). These legacy state event edges can be distinguished by + * `is_state` and are removed from the codebase and schema but + * because the schema change is in a background update, it's not + * necessarily safe to assume that it will have been completed. + */ + AND edge.is_state is ? /* False */ + /** + * We only want backwards extremities that are older than or at + * the same position of the given `current_depth` (where older + * means less than the given depth) because we're looking backwards + * from the `current_depth` when backfilling. + * + * current_depth (ignore events that come after this, ignore 2-4) + * | + * ▼ + * [0]<--[1]<--[2]<--[3]<--[4] + */ + AND event.depth <= ? /* current_depth */ + /** + * Exponential back-off (up to the upper bound) so we don't retry the + * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc. + * + * We use `1 << n` as a power of 2 equivalent for compatibility + * with older SQLites. The left shift equivalent only works with + * powers of 2 because left shift is a binary operation (base-2). + * Otherwise, we would use `power(2, n)` or the power operator, `2^n`. + */ + AND ( + failed_backfill_attempt_info.event_id IS NULL + OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + ( + (1 << {least_function}(failed_backfill_attempt_info.num_attempts, ? /* max doubling steps */)) + * ? /* step */ + ) + ) + /** + * Sort from highest (closest to the `current_depth`) to the lowest depth + * because the closest are most relevant to backfill from first. + * Then tie-break on alphabetical order of the event_ids so we get a + * consistent ordering which is nice when asserting things in tests. + */ + ORDER BY event.depth DESC, backward_extrem.event_id DESC + LIMIT ? """ - txn.execute(sql, (room_id, False)) + txn.execute( + sql, + ( + room_id, + False, + current_depth, + self._clock.time_msec(), + BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS, + BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_MILLISECONDS, + limit, + ), + ) return cast(List[Tuple[str, int]], txn.fetchall()) return await self.db_pool.runInteraction( - "get_oldest_event_ids_with_depth_in_room", - get_oldest_event_ids_with_depth_in_room_txn, + "get_backfill_points_in_room", + get_backfill_points_in_room_txn, room_id, ) @trace async def get_insertion_event_backward_extremities_in_room( - self, room_id: str + self, + room_id: str, + current_depth: int, + limit: int, ) -> List[Tuple[str, int]]: - """Get the insertion events we know about that we haven't backfilled yet. - - We use this function so that we can compare and see if someones current - depth at their current scrollback is within pagination range of the - insertion event. If the current depth is close to the depth of given - insertion event, we can trigger a backfill. + """ + Get the insertion events we know about that we haven't backfilled yet + along with the approximate depth. Only returns insertion events that are + at a depth lower than or equal to the `current_depth`. Sorted by depth, + highest to lowest (descending) so the closest events to the + `current_depth` are first in the list. + + We ignore insertion events that are newer than the user's current scroll + position (ie, those with depth greater than `current_depth`) as: + 1. we don't really care about getting events that have happened + after our current position; and + 2. by the nature of paginating and scrolling back, we have likely + previously tried and failed to backfill from that insertion event, so + to avoid getting "stuck" requesting the same backfill repeatedly + we drop those insertion event. Args: room_id: Room where we want to find the oldest events + current_depth: The depth at the user's current scrollback position + limit: The max number of insertion event extremities to return Returns: - List of (event_id, depth) tuples + List of (event_id, depth) tuples. Sorted by depth, highest to lowest + (descending) so the closest events to the `current_depth` are first + in the list. """ def get_insertion_event_backward_extremities_in_room_txn( txn: LoggingTransaction, room_id: str ) -> List[Tuple[str, int]]: - sql = """ - SELECT b.event_id, MAX(e.depth) FROM insertion_events as i + if isinstance(self.database_engine, PostgresEngine): + least_function = "LEAST" + elif isinstance(self.database_engine, Sqlite3Engine): + least_function = "MIN" + else: + raise RuntimeError("Unknown database engine") + + sql = f""" + SELECT + insertion_event_extremity.event_id, event.depth /* We only want insertion events that are also marked as backwards extremities */ - INNER JOIN insertion_event_extremities as b USING (event_id) + FROM insertion_event_extremities AS insertion_event_extremity /* Get the depth of the insertion event from the events table */ - INNER JOIN events AS e USING (event_id) - WHERE b.room_id = ? - GROUP BY b.event_id + INNER JOIN events AS event USING (event_id) + /** + * We use this info to make sure we don't retry to use a backfill point + * if we've already attempted to backfill from it recently. + */ + LEFT JOIN event_failed_pull_attempts AS failed_backfill_attempt_info + ON + failed_backfill_attempt_info.room_id = insertion_event_extremity.room_id + AND failed_backfill_attempt_info.event_id = insertion_event_extremity.event_id + WHERE + insertion_event_extremity.room_id = ? + /** + * We only want extremities that are older than or at + * the same position of the given `current_depth` (where older + * means less than the given depth) because we're looking backwards + * from the `current_depth` when backfilling. + * + * current_depth (ignore events that come after this, ignore 2-4) + * | + * ▼ + * [0]<--[1]<--[2]<--[3]<--[4] + */ + AND event.depth <= ? /* current_depth */ + /** + * Exponential back-off (up to the upper bound) so we don't retry the + * same backfill point over and over. ex. 2hr, 4hr, 8hr, 16hr, etc + * + * We use `1 << n` as a power of 2 equivalent for compatibility + * with older SQLites. The left shift equivalent only works with + * powers of 2 because left shift is a binary operation (base-2). + * Otherwise, we would use `power(2, n)` or the power operator, `2^n`. + */ + AND ( + failed_backfill_attempt_info.event_id IS NULL + OR ? /* current_time */ >= failed_backfill_attempt_info.last_attempt_ts + ( + (1 << {least_function}(failed_backfill_attempt_info.num_attempts, ? /* max doubling steps */)) + * ? /* step */ + ) + ) + /** + * Sort from highest (closest to the `current_depth`) to the lowest depth + * because the closest are most relevant to backfill from first. + * Then tie-break on alphabetical order of the event_ids so we get a + * consistent ordering which is nice when asserting things in tests. + */ + ORDER BY event.depth DESC, insertion_event_extremity.event_id DESC + LIMIT ? """ - txn.execute(sql, (room_id,)) + txn.execute( + sql, + ( + room_id, + current_depth, + self._clock.time_msec(), + BACKFILL_EVENT_EXPONENTIAL_BACKOFF_MAXIMUM_DOUBLING_STEPS, + BACKFILL_EVENT_EXPONENTIAL_BACKOFF_STEP_MILLISECONDS, + limit, + ), + ) return cast(List[Tuple[str, int]], txn.fetchall()) return await self.db_pool.runInteraction( @@ -1539,7 +1730,12 @@ async def get_next_staged_event_id_for_room( self, room_id: str, ) -> Optional[Tuple[str, str]]: - """Get the next event ID in the staging area for the given room.""" + """ + Get the next event ID in the staging area for the given room. + + Returns: + Tuple of the `origin` and `event_id` + """ def _get_next_staged_event_id_for_room_txn( txn: LoggingTransaction, diff --git a/synapse/storage/databases/main/event_push_actions.py b/synapse/storage/databases/main/event_push_actions.py index fb49e405efdc..eb842344dee3 100644 --- a/synapse/storage/databases/main/event_push_actions.py +++ b/synapse/storage/databases/main/event_push_actions.py @@ -204,6 +204,9 @@ def __init__( ): super().__init__(database, db_conn, hs) + # Track when the process started. + self._started_ts = self._clock.time_msec() + # These get correctly set by _find_stream_orderings_for_times_txn self.stream_ordering_month_ago: Optional[int] = None self.stream_ordering_day_ago: Optional[int] = None @@ -223,6 +226,10 @@ def __init__( self._rotate_notifs, 30 * 1000 ) + self._clear_old_staging_loop = self._clock.looping_call( + self._clear_old_push_actions_staging, 30 * 60 * 1000 + ) + self.db_pool.updates.register_background_index_update( "event_push_summary_unique_index", index_name="event_push_summary_unique_index", @@ -261,11 +268,11 @@ async def _background_backfill_thread_id( event_push_actions_done = progress.get("event_push_actions_done", False) def add_thread_id_txn( - txn: LoggingTransaction, table_name: str, start_stream_ordering: int + txn: LoggingTransaction, start_stream_ordering: int ) -> int: - sql = f""" + sql = """ SELECT stream_ordering - FROM {table_name} + FROM event_push_actions WHERE thread_id IS NULL AND stream_ordering > ? @@ -277,7 +284,7 @@ def add_thread_id_txn( # No more rows to process. rows = txn.fetchall() if not rows: - progress[f"{table_name}_done"] = True + progress["event_push_actions_done"] = True self.db_pool.updates._background_update_progress_txn( txn, "event_push_backfill_thread_id", progress ) @@ -286,16 +293,65 @@ def add_thread_id_txn( # Update the thread ID for any of those rows. max_stream_ordering = rows[-1][0] - sql = f""" - UPDATE {table_name} + sql = """ + UPDATE event_push_actions SET thread_id = 'main' - WHERE stream_ordering <= ? AND thread_id IS NULL + WHERE ? < stream_ordering AND stream_ordering <= ? AND thread_id IS NULL """ - txn.execute(sql, (max_stream_ordering,)) + txn.execute( + sql, + ( + start_stream_ordering, + max_stream_ordering, + ), + ) # Update progress. processed_rows = txn.rowcount - progress[f"max_{table_name}_stream_ordering"] = max_stream_ordering + progress["max_event_push_actions_stream_ordering"] = max_stream_ordering + self.db_pool.updates._background_update_progress_txn( + txn, "event_push_backfill_thread_id", progress + ) + + return processed_rows + + def add_thread_id_summary_txn(txn: LoggingTransaction) -> int: + min_user_id = progress.get("max_summary_user_id", "") + min_room_id = progress.get("max_summary_room_id", "") + + # Slightly overcomplicated query for getting the Nth user ID / room + # ID tuple, or the last if there are less than N remaining. + sql = """ + SELECT user_id, room_id FROM ( + SELECT user_id, room_id FROM event_push_summary + WHERE (user_id, room_id) > (?, ?) + AND thread_id IS NULL + ORDER BY user_id, room_id + LIMIT ? + ) AS e + ORDER BY user_id DESC, room_id DESC + LIMIT 1 + """ + + txn.execute(sql, (min_user_id, min_room_id, batch_size)) + row = txn.fetchone() + if not row: + return 0 + + max_user_id, max_room_id = row + + sql = """ + UPDATE event_push_summary + SET thread_id = 'main' + WHERE + (?, ?) < (user_id, room_id) AND (user_id, room_id) <= (?, ?) + AND thread_id IS NULL + """ + txn.execute(sql, (min_user_id, min_room_id, max_user_id, max_room_id)) + processed_rows = txn.rowcount + + progress["max_summary_user_id"] = max_user_id + progress["max_summary_room_id"] = max_room_id self.db_pool.updates._background_update_progress_txn( txn, "event_push_backfill_thread_id", progress ) @@ -311,15 +367,12 @@ def add_thread_id_txn( result = await self.db_pool.runInteraction( "event_push_backfill_thread_id", add_thread_id_txn, - "event_push_actions", progress.get("max_event_push_actions_stream_ordering", 0), ) else: result = await self.db_pool.runInteraction( "event_push_backfill_thread_id", - add_thread_id_txn, - "event_push_summary", - progress.get("max_event_push_summary_stream_ordering", 0), + add_thread_id_summary_txn, ) # Only done after the event_push_summary table is done. @@ -365,14 +418,11 @@ def _get_unread_counts_by_receipt_txn( user_id: str, ) -> NotifCounts: # Get the stream ordering of the user's latest receipt in the room. - result = self.get_last_receipt_for_user_txn( + result = self.get_last_unthreaded_receipt_for_user_txn( txn, user_id, room_id, - receipt_types=( - ReceiptTypes.READ, - ReceiptTypes.READ_PRIVATE, - ), + receipt_types=(ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE), ) if result: @@ -558,14 +608,22 @@ def f(txn: LoggingTransaction) -> List[str]: def _get_receipts_by_room_txn( self, txn: LoggingTransaction, user_id: str - ) -> List[Tuple[str, int]]: + ) -> Dict[str, int]: + """ + Generate a map of room ID to the latest stream ordering that has been + read by the given user. + + Args: + txn: + user_id: The user to fetch receipts for. + + Returns: + A map of room ID to stream ordering for all rooms the user has a receipt in. + """ receipt_types_clause, args = make_in_list_sql_clause( self.database_engine, "receipt_type", - ( - ReceiptTypes.READ, - ReceiptTypes.READ_PRIVATE, - ), + (ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE), ) sql = f""" @@ -578,7 +636,10 @@ def _get_receipts_by_room_txn( args.extend((user_id,)) txn.execute(sql, args) - return cast(List[Tuple[str, int]], txn.fetchall()) + return { + room_id: latest_stream_ordering + for room_id, latest_stream_ordering in txn.fetchall() + } async def get_unread_push_actions_for_user_in_range_for_http( self, @@ -603,12 +664,10 @@ async def get_unread_push_actions_for_user_in_range_for_http( The list will have between 0~limit entries. """ - receipts_by_room = dict( - await self.db_pool.runInteraction( - "get_unread_push_actions_for_user_in_range_http_receipts", - self._get_receipts_by_room_txn, - user_id=user_id, - ), + receipts_by_room = await self.db_pool.runInteraction( + "get_unread_push_actions_for_user_in_range_http_receipts", + self._get_receipts_by_room_txn, + user_id=user_id, ) def get_push_actions_txn( @@ -677,12 +736,10 @@ async def get_unread_push_actions_for_user_in_range_for_email( The list will have between 0~limit entries. """ - receipts_by_room = dict( - await self.db_pool.runInteraction( - "get_unread_push_actions_for_user_in_range_email_receipts", - self._get_receipts_by_room_txn, - user_id=user_id, - ), + receipts_by_room = await self.db_pool.runInteraction( + "get_unread_push_actions_for_user_in_range_email_receipts", + self._get_receipts_by_room_txn, + user_id=user_id, ) def get_push_actions_txn( @@ -785,7 +842,7 @@ async def add_push_actions_to_staging( # can be used to insert into the `event_push_actions_staging` table. def _gen_entry( user_id: str, actions: Collection[Union[Mapping, str]] - ) -> Tuple[str, str, str, int, int, int, str]: + ) -> Tuple[str, str, str, int, int, int, str, int]: is_highlight = 1 if _action_has_highlight(actions) else 0 notif = 1 if "notify" in actions else 0 return ( @@ -796,6 +853,7 @@ def _gen_entry( is_highlight, # highlight column int(count_as_unread_by_user[user_id]), # unread column thread_id, # thread_id column + self._clock.time_msec(), # inserted_ts column ) await self.db_pool.simple_insert_many( @@ -808,6 +866,7 @@ def _gen_entry( "highlight", "unread", "thread_id", + "inserted_ts", ), values=[ _gen_entry(user_id, actions) @@ -1061,7 +1120,7 @@ def _handle_new_receipts_for_notifs_txn(self, txn: LoggingTransaction) -> bool: limit, ), ) - rows = txn.fetchall() + rows = cast(List[Tuple[int, str, str, int]], txn.fetchall()) # For each new read receipt we delete push actions from before it and # recalculate the summary. @@ -1091,37 +1150,46 @@ def _handle_new_receipts_for_notifs_txn(self, txn: LoggingTransaction) -> bool: txn, room_id, user_id, stream_ordering, old_rotate_stream_ordering ) + # First ensure that the existing rows have an updated thread_id field. + txn.execute( + """ + UPDATE event_push_summary + SET thread_id = ? + WHERE room_id = ? AND user_id = ? AND thread_id is NULL + """, + ("main", room_id, user_id), + ) + # Replace the previous summary with the new counts. # # TODO(threads): Upsert per-thread instead of setting them all to main. self.db_pool.simple_upsert_txn( txn, table="event_push_summary", - keyvalues={"room_id": room_id, "user_id": user_id}, + keyvalues={"room_id": room_id, "user_id": user_id, "thread_id": "main"}, values={ "notif_count": notif_count, "unread_count": unread_count, "stream_ordering": old_rotate_stream_ordering, "last_receipt_stream_ordering": stream_ordering, - "thread_id": "main", }, ) # We always update `event_push_summary_last_receipt_stream_id` to # ensure that we don't rescan the same receipts for remote users. - upper_limit = max_receipts_stream_id + receipts_last_processed_stream_id = max_receipts_stream_id if len(rows) >= limit: # If we pulled out a limited number of rows we only update the # position to the last receipt we processed, so we continue # processing the rest next iteration. - upper_limit = rows[-1][0] + receipts_last_processed_stream_id = rows[-1][0] self.db_pool.simple_update_txn( txn, table="event_push_summary_last_receipt_stream_id", keyvalues={}, - updatevalues={"stream_id": upper_limit}, + updatevalues={"stream_id": receipts_last_processed_stream_id}, ) return len(rows) < limit @@ -1252,20 +1320,33 @@ def _rotate_notifs_before_txn( logger.info("Rotating notifications, handling %d rows", len(summaries)) + # Ensure that any updated threads have an updated thread_id. + txn.execute_batch( + """ + UPDATE event_push_summary + SET thread_id = ? + WHERE room_id = ? AND user_id = ? AND thread_id is NULL + """, + [("main", room_id, user_id) for user_id, room_id in summaries], + ) + self.db_pool.simple_update_many_txn( + txn, + table="event_push_summary", + key_names=("user_id", "room_id", "thread_id"), + key_values=[(user_id, room_id, None) for user_id, room_id in summaries], + value_names=("thread_id",), + value_values=[("main",) for _ in summaries], + ) + # TODO(threads): Update on a per-thread basis. self.db_pool.simple_upsert_many_txn( txn, table="event_push_summary", - key_names=("user_id", "room_id"), - key_values=[(user_id, room_id) for user_id, room_id in summaries], - value_names=("notif_count", "unread_count", "stream_ordering", "thread_id"), + key_names=("user_id", "room_id", "thread_id"), + key_values=[(user_id, room_id, "main") for user_id, room_id in summaries], + value_names=("notif_count", "unread_count", "stream_ordering"), value_values=[ - ( - summary.notif_count, - summary.unread_count, - summary.stream_ordering, - "main", - ) + (summary.notif_count, summary.unread_count, summary.stream_ordering) for summary in summaries.values() ], ) @@ -1337,6 +1418,53 @@ def remove_old_push_actions_that_have_rotated_txn( if done: break + @wrap_as_background_process("_clear_old_push_actions_staging") + async def _clear_old_push_actions_staging(self) -> None: + """Clear out any old event push actions from the staging table for + events that we failed to persist. + """ + + # We delete anything more than an hour old, on the assumption that we'll + # never take more than an hour to persist an event. + delete_before_ts = self._clock.time_msec() - 60 * 60 * 1000 + + if self._started_ts > delete_before_ts: + # We need to wait for at least an hour before we started deleting, + # so that we know it's safe to delete rows with NULL `inserted_ts`. + return + + # We don't have an index on `inserted_ts`, instead we assume that the + # number of "live" rows in `event_push_actions_staging` is small enough + # that an infrequent periodic scan won't cause a problem. + # + # Note: we also delete any columns with NULL `inserted_ts`, this is safe + # as we added a default value to new rows and so they must be at least + # an hour old. + limit = 1000 + sql = """ + DELETE FROM event_push_actions_staging WHERE event_id IN ( + SELECT event_id FROM event_push_actions_staging WHERE + inserted_ts < ? OR inserted_ts IS NULL + LIMIT ? + ) + """ + + def _clear_old_push_actions_staging_txn(txn: LoggingTransaction) -> bool: + txn.execute(sql, (delete_before_ts, limit)) + return txn.rowcount >= limit + + while True: + # Returns true if we have more stuff to delete from the table. + deleted = await self.db_pool.runInteraction( + "_clear_old_push_actions_staging", _clear_old_push_actions_staging_txn + ) + + if not deleted: + return + + # We sleep to ensure that we don't overwhelm the DB. + await self._clock.sleep(1.0) + class EventPushActionsStore(EventPushActionsWorkerStore): EPA_HIGHLIGHT_INDEX = "epa_highlight_index" diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index 9577406f6946..3e1582798656 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -35,7 +35,7 @@ from prometheus_client import Counter import synapse.metrics -from synapse.api.constants import EventContentFields, EventTypes, RelationTypes +from synapse.api.constants import EventContentFields, EventTypes from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import RoomVersions from synapse.events import EventBase, relation_from_event @@ -410,6 +410,31 @@ def _persist_events_txn( assert min_stream_order assert max_stream_order + # Once the txn completes, invalidate all of the relevant caches. Note that we do this + # up here because it captures all the events_and_contexts before any are removed. + for event, _ in events_and_contexts: + self.store.invalidate_get_event_cache_after_txn(txn, event.event_id) + if event.redacts: + self.store.invalidate_get_event_cache_after_txn(txn, event.redacts) + + relates_to = None + relation = relation_from_event(event) + if relation: + relates_to = relation.parent_id + + assert event.internal_metadata.stream_ordering is not None + txn.call_after( + self.store._invalidate_caches_for_event, + event.internal_metadata.stream_ordering, + event.event_id, + event.room_id, + event.type, + getattr(event, "state_key", None), + event.redacts, + relates_to, + backfilled=False, + ) + self._update_forward_extremities_txn( txn, new_forward_extremities=new_forward_extremities, @@ -459,6 +484,7 @@ def _persist_events_txn( # We call this last as it assumes we've inserted the events into # room_memberships, where applicable. + # NB: This function invalidates all state related caches self._update_current_state_txn(txn, state_delta_for_room, min_stream_order) def _persist_event_auth_chain_txn( @@ -1176,6 +1202,12 @@ def _update_current_state_txn( txn, room_id, members_changed ) + # Check if any of the remote membership changes requires us to + # unsubscribe from their device lists. + self.store.handle_potentially_left_users_txn( + txn, {m for m in members_changed if not self.hs.is_mine_id(m)} + ) + def _upsert_room_version_txn(self, txn: LoggingTransaction, room_id: str) -> None: """Update the room version in the database based off current state events. @@ -1215,9 +1247,6 @@ def _update_forward_extremities_txn( self.db_pool.simple_delete_txn( txn, table="event_forward_extremities", keyvalues={"room_id": room_id} ) - txn.call_after( - self.store.get_latest_event_ids_in_room.invalidate, (room_id,) - ) self.db_pool.simple_insert_many_txn( txn, @@ -1287,8 +1316,6 @@ def _update_room_depths_txn( """ depth_updates: Dict[str, int] = {} for event, context in events_and_contexts: - # Remove the any existing cache entries for the event_ids - self.store.invalidate_get_event_cache_after_txn(txn, event.event_id) # Then update the `stream_ordering` position to mark the latest # event as the front of the room. This should not be done for # backfilled events because backfilled events have negative @@ -1690,16 +1717,7 @@ async def prefill() -> None: txn.async_call_after(prefill) def _store_redaction(self, txn: LoggingTransaction, event: EventBase) -> None: - """Invalidate the caches for the redacted event. - - Note that these caches are also cleared as part of event replication in - _invalidate_caches_for_event. - """ assert event.redacts is not None - self.store.invalidate_get_event_cache_after_txn(txn, event.redacts) - txn.call_after(self.store.get_relations_for_event.invalidate, (event.redacts,)) - txn.call_after(self.store.get_applicable_edit.invalidate, (event.redacts,)) - self.db_pool.simple_upsert_txn( txn, table="redactions", @@ -1800,38 +1818,6 @@ def _store_room_members_txn( for event in events: assert event.internal_metadata.stream_ordering is not None - txn.call_after( - self.store._membership_stream_cache.entity_has_changed, - event.state_key, - event.internal_metadata.stream_ordering, - ) - txn.call_after( - self.store.get_invited_rooms_for_local_user.invalidate, - (event.state_key,), - ) - txn.call_after( - self.store.get_local_users_in_room.invalidate, - (event.room_id,), - ) - txn.call_after( - self.store.get_number_joined_users_in_room.invalidate, - (event.room_id,), - ) - txn.call_after( - self.store.get_number_joined_non_bot_users_in_room.invalidate, - (event.room_id,), - ) - txn.call_after( - self.store.get_user_in_room_with_profile.invalidate, - (event.room_id, event.state_key), - ) - - # The `_get_membership_from_event_id` is immutable, except for the - # case where we look up an event *before* persisting it. - txn.call_after( - self.store._get_membership_from_event_id.invalidate, - (event.event_id,), - ) # We update the local_current_membership table only if the event is # "current", i.e., its something that has just happened. @@ -1880,35 +1866,6 @@ def _handle_event_relations( }, ) - txn.call_after( - self.store.get_relations_for_event.invalidate, (relation.parent_id,) - ) - txn.call_after( - self.store.get_aggregation_groups_for_event.invalidate, - (relation.parent_id,), - ) - txn.call_after( - self.store.get_mutual_event_relations_for_rel_type.invalidate, - (relation.parent_id,), - ) - - if relation.rel_type == RelationTypes.REPLACE: - txn.call_after( - self.store.get_applicable_edit.invalidate, (relation.parent_id,) - ) - - if relation.rel_type == RelationTypes.THREAD: - txn.call_after( - self.store.get_thread_summary.invalidate, (relation.parent_id,) - ) - # It should be safe to only invalidate the cache if the user has not - # previously participated in the thread, but that's difficult (and - # potentially error-prone) so it is always invalidated. - txn.call_after( - self.store.get_thread_participated.invalidate, - (relation.parent_id, event.sender), - ) - def _handle_insertion_event( self, txn: LoggingTransaction, event: EventBase ) -> None: @@ -2177,13 +2134,13 @@ def _set_push_actions_for_event_and_users_txn( appear in events_and_context. """ - # Only non outlier events will have push actions associated with them, + # Only notifiable events will have push actions associated with them, # so let's filter them out. (This makes joining large rooms faster, as # these queries took seconds to process all the state events). - non_outlier_events = [ + notifiable_events = [ event for event, _ in events_and_contexts - if not event.internal_metadata.is_outlier() + if event.internal_metadata.is_notifiable() ] sql = """ @@ -2196,7 +2153,7 @@ def _set_push_actions_for_event_and_users_txn( WHERE event_id = ? """ - if non_outlier_events: + if notifiable_events: txn.execute_batch( sql, ( @@ -2206,32 +2163,10 @@ def _set_push_actions_for_event_and_users_txn( event.depth, event.event_id, ) - for event in non_outlier_events + for event in notifiable_events ), ) - room_to_event_ids: Dict[str, List[str]] = {} - for e in non_outlier_events: - room_to_event_ids.setdefault(e.room_id, []).append(e.event_id) - - for room_id, event_ids in room_to_event_ids.items(): - rows = self.db_pool.simple_select_many_txn( - txn, - table="event_push_actions_staging", - column="event_id", - iterable=event_ids, - keyvalues={}, - retcols=("user_id",), - ) - - user_ids = {row["user_id"] for row in rows} - - for user_id in user_ids: - txn.call_after( - self.store.get_unread_event_push_actions_by_room_for_user.invalidate, - (room_id, user_id), - ) - # Now we delete the staging area for *all* events that were being # persisted. txn.execute_batch( @@ -2239,18 +2174,13 @@ def _set_push_actions_for_event_and_users_txn( ( (event.event_id,) for event, _ in all_events_and_contexts - if not event.internal_metadata.is_outlier() + if event.internal_metadata.is_notifiable() ), ) def _remove_push_actions_for_event_id_txn( self, txn: LoggingTransaction, room_id: str, event_id: str ) -> None: - # Sad that we have to blow away the cache for the whole room here - txn.call_after( - self.store.get_unread_event_push_actions_by_room_for_user.invalidate, - (room_id,), - ) txn.execute( "DELETE FROM event_push_actions WHERE room_id = ? AND event_id = ?", (room_id, event_id), diff --git a/synapse/storage/databases/main/events_worker.py b/synapse/storage/databases/main/events_worker.py index aeed89b13080..206b914d9223 100644 --- a/synapse/storage/databases/main/events_worker.py +++ b/synapse/storage/databases/main/events_worker.py @@ -1487,32 +1487,38 @@ async def have_seen_events( # the batches as big as possible. results: Set[str] = set() - for chunk in batch_iter(event_ids, 500): - r = await self._have_seen_events_dict( - [(room_id, event_id) for event_id in chunk] + for event_ids_chunk in batch_iter(event_ids, 500): + events_seen_dict = await self._have_seen_events_dict( + room_id, event_ids_chunk + ) + results.update( + eid for (eid, have_event) in events_seen_dict.items() if have_event ) - results.update(eid for ((_rid, eid), have_event) in r.items() if have_event) return results - @cachedList(cached_method_name="have_seen_event", list_name="keys") + @cachedList(cached_method_name="have_seen_event", list_name="event_ids") async def _have_seen_events_dict( - self, keys: Collection[Tuple[str, str]] - ) -> Dict[Tuple[str, str], bool]: + self, + room_id: str, + event_ids: Collection[str], + ) -> Dict[str, bool]: """Helper for have_seen_events Returns: - a dict {(room_id, event_id)-> bool} + a dict {event_id -> bool} """ # if the event cache contains the event, obviously we've seen it. cache_results = { - (rid, eid) - for (rid, eid) in keys - if await self._get_event_cache.contains((eid,)) + event_id + for event_id in event_ids + if await self._get_event_cache.contains((event_id,)) } results = dict.fromkeys(cache_results, True) - remaining = [k for k in keys if k not in cache_results] + remaining = [ + event_id for event_id in event_ids if event_id not in cache_results + ] if not remaining: return results @@ -1524,23 +1530,21 @@ def have_seen_events_txn(txn: LoggingTransaction) -> None: sql = "SELECT event_id FROM events AS e WHERE " clause, args = make_in_list_sql_clause( - txn.database_engine, "e.event_id", [eid for (_rid, eid) in remaining] + txn.database_engine, "e.event_id", remaining ) txn.execute(sql + clause, args) found_events = {eid for eid, in txn} # ... and then we can update the results for each key - results.update( - {(rid, eid): (eid in found_events) for (rid, eid) in remaining} - ) + results.update({eid: (eid in found_events) for eid in remaining}) await self.db_pool.runInteraction("have_seen_events", have_seen_events_txn) return results @cached(max_entries=100000, tree=True) async def have_seen_event(self, room_id: str, event_id: str) -> bool: - res = await self._have_seen_events_dict(((room_id, event_id),)) - return res[(room_id, event_id)] + res = await self._have_seen_events_dict(room_id, [event_id]) + return res[event_id] def _get_current_state_event_counts_txn( self, txn: LoggingTransaction, room_id: str diff --git a/synapse/storage/databases/main/push_rule.py b/synapse/storage/databases/main/push_rule.py index 5079edd1e083..ed17b2e70c04 100644 --- a/synapse/storage/databases/main/push_rule.py +++ b/synapse/storage/databases/main/push_rule.py @@ -30,9 +30,8 @@ from synapse.api.errors import StoreError from synapse.config.homeserver import ExperimentalConfig -from synapse.push.baserules import FilteredPushRules, PushRule, compile_push_rules from synapse.replication.slave.storage._slaved_id_tracker import SlavedIdTracker -from synapse.storage._base import SQLBaseStore, db_to_json +from synapse.storage._base import SQLBaseStore from synapse.storage.database import ( DatabasePool, LoggingDatabaseConnection, @@ -51,6 +50,7 @@ IdGenerator, StreamIdGenerator, ) +from synapse.synapse_rust.push import FilteredPushRules, PushRule, PushRules from synapse.types import JsonDict from synapse.util import json_encoder from synapse.util.caches.descriptors import cached, cachedList @@ -72,18 +72,25 @@ def _load_rules( """ ruleslist = [ - PushRule( + PushRule.from_db( rule_id=rawrule["rule_id"], priority_class=rawrule["priority_class"], - conditions=db_to_json(rawrule["conditions"]), - actions=db_to_json(rawrule["actions"]), + conditions=rawrule["conditions"], + actions=rawrule["actions"], ) for rawrule in rawrules ] - push_rules = compile_push_rules(ruleslist) + push_rules = PushRules( + ruleslist, + ) - filtered_rules = FilteredPushRules(push_rules, enabled_map, experimental_config) + filtered_rules = FilteredPushRules( + push_rules, + enabled_map, + msc3786_enabled=experimental_config.msc3786_enabled, + msc3772_enabled=experimental_config.msc3772_enabled, + ) return filtered_rules @@ -845,7 +852,7 @@ async def copy_push_rules_from_room_to_room_for_user( user_push_rules = await self.get_push_rules_for_user(user_id) # Get rules relating to the old room and copy them to the new room - for rule, enabled in user_push_rules: + for rule, enabled in user_push_rules.rules(): if not enabled: continue diff --git a/synapse/storage/databases/main/pusher.py b/synapse/storage/databases/main/pusher.py index bd0cfa7f3211..01206950a9d4 100644 --- a/synapse/storage/databases/main/pusher.py +++ b/synapse/storage/databases/main/pusher.py @@ -89,6 +89,11 @@ def _decode_pushers_rows(self, rows: Iterable[dict]) -> Iterator[PusherConfig]: ) continue + # If we're using SQLite, then boolean values are integers. This is + # troublesome since some code using the return value of this method might + # expect it to be a boolean, or will expose it to clients (in responses). + r["enabled"] = bool(r["enabled"]) + yield PusherConfig(**r) async def get_pushers_by_app_id_and_pushkey( @@ -100,38 +105,52 @@ async def get_pushers_by_user_id(self, user_id: str) -> Iterator[PusherConfig]: return await self.get_pushers_by({"user_name": user_id}) async def get_pushers_by(self, keyvalues: Dict[str, Any]) -> Iterator[PusherConfig]: - ret = await self.db_pool.simple_select_list( - "pushers", - keyvalues, - [ - "id", - "user_name", - "access_token", - "profile_tag", - "kind", - "app_id", - "app_display_name", - "device_display_name", - "pushkey", - "ts", - "lang", - "data", - "last_stream_ordering", - "last_success", - "failing_since", - ], + """Retrieve pushers that match the given criteria. + + Args: + keyvalues: A {column: value} dictionary. + + Returns: + The pushers for which the given columns have the given values. + """ + + def get_pushers_by_txn(txn: LoggingTransaction) -> List[Dict[str, Any]]: + # We could technically use simple_select_list here, but we need to call + # COALESCE on the 'enabled' column. While it is technically possible to give + # simple_select_list the whole `COALESCE(...) AS ...` as a column name, it + # feels a bit hacky, so it's probably better to just inline the query. + sql = """ + SELECT + id, user_name, access_token, profile_tag, kind, app_id, + app_display_name, device_display_name, pushkey, ts, lang, data, + last_stream_ordering, last_success, failing_since, + COALESCE(enabled, TRUE) AS enabled, device_id + FROM pushers + """ + + sql += "WHERE %s" % (" AND ".join("%s = ?" % (k,) for k in keyvalues),) + + txn.execute(sql, list(keyvalues.values())) + + return self.db_pool.cursor_to_dict(txn) + + ret = await self.db_pool.runInteraction( desc="get_pushers_by", + func=get_pushers_by_txn, ) + return self._decode_pushers_rows(ret) - async def get_all_pushers(self) -> Iterator[PusherConfig]: - def get_pushers(txn: LoggingTransaction) -> Iterator[PusherConfig]: - txn.execute("SELECT * FROM pushers") + async def get_enabled_pushers(self) -> Iterator[PusherConfig]: + def get_enabled_pushers_txn(txn: LoggingTransaction) -> Iterator[PusherConfig]: + txn.execute("SELECT * FROM pushers WHERE COALESCE(enabled, TRUE)") rows = self.db_pool.cursor_to_dict(txn) return self._decode_pushers_rows(rows) - return await self.db_pool.runInteraction("get_all_pushers", get_pushers) + return await self.db_pool.runInteraction( + "get_enabled_pushers", get_enabled_pushers_txn + ) async def get_all_updated_pushers_rows( self, instance_name: str, last_id: int, current_id: int, limit: int @@ -458,7 +477,74 @@ def _delete_pushers(txn: LoggingTransaction) -> int: return number_deleted -class PusherStore(PusherWorkerStore): +class PusherBackgroundUpdatesStore(SQLBaseStore): + def __init__( + self, + database: DatabasePool, + db_conn: LoggingDatabaseConnection, + hs: "HomeServer", + ): + super().__init__(database, db_conn, hs) + + self.db_pool.updates.register_background_update_handler( + "set_device_id_for_pushers", self._set_device_id_for_pushers + ) + + async def _set_device_id_for_pushers( + self, progress: JsonDict, batch_size: int + ) -> int: + """Background update to populate the device_id column of the pushers table.""" + last_pusher_id = progress.get("pusher_id", 0) + + def set_device_id_for_pushers_txn(txn: LoggingTransaction) -> int: + txn.execute( + """ + SELECT p.id, at.device_id + FROM pushers AS p + INNER JOIN access_tokens AS at + ON p.access_token = at.id + WHERE + p.access_token IS NOT NULL + AND at.device_id IS NOT NULL + AND p.id > ? + ORDER BY p.id + LIMIT ? + """, + (last_pusher_id, batch_size), + ) + + rows = self.db_pool.cursor_to_dict(txn) + if len(rows) == 0: + return 0 + + self.db_pool.simple_update_many_txn( + txn=txn, + table="pushers", + key_names=("id",), + key_values=[(row["id"],) for row in rows], + value_names=("device_id",), + value_values=[(row["device_id"],) for row in rows], + ) + + self.db_pool.updates._background_update_progress_txn( + txn, "set_device_id_for_pushers", {"pusher_id": rows[-1]["id"]} + ) + + return len(rows) + + nb_processed = await self.db_pool.runInteraction( + "set_device_id_for_pushers", set_device_id_for_pushers_txn + ) + + if nb_processed < batch_size: + await self.db_pool.updates._end_background_update( + "set_device_id_for_pushers" + ) + + return nb_processed + + +class PusherStore(PusherWorkerStore, PusherBackgroundUpdatesStore): def get_pushers_stream_token(self) -> int: return self._pushers_id_gen.get_current_token() @@ -476,6 +562,8 @@ async def add_pusher( data: Optional[JsonDict], last_stream_ordering: int, profile_tag: str = "", + enabled: bool = True, + device_id: Optional[str] = None, ) -> None: async with self._pushers_id_gen.get_next() as stream_id: # no need to lock because `pushers` has a unique key on @@ -494,6 +582,8 @@ async def add_pusher( "last_stream_ordering": last_stream_ordering, "profile_tag": profile_tag, "id": stream_id, + "enabled": enabled, + "device_id": device_id, }, desc="add_pusher", lock=False, diff --git a/synapse/storage/databases/main/receipts.py b/synapse/storage/databases/main/receipts.py index eff0dd5a6671..644110b692db 100644 --- a/synapse/storage/databases/main/receipts.py +++ b/synapse/storage/databases/main/receipts.py @@ -137,34 +137,7 @@ def get_max_receipt_stream_id(self) -> int: """Get the current max stream ID for receipts stream""" return self._receipts_id_gen.get_current_token() - async def get_last_receipt_event_id_for_user( - self, user_id: str, room_id: str, receipt_types: Collection[str] - ) -> Optional[str]: - """ - Fetch the event ID for the latest receipt in a room with one of the given receipt types. - - Args: - user_id: The user to fetch receipts for. - room_id: The room ID to fetch the receipt for. - receipt_type: The receipt types to fetch. - - Returns: - The latest receipt, if one exists. - """ - result = await self.db_pool.runInteraction( - "get_last_receipt_event_id_for_user", - self.get_last_receipt_for_user_txn, - user_id, - room_id, - receipt_types, - ) - if not result: - return None - - event_id, _ = result - return event_id - - def get_last_receipt_for_user_txn( + def get_last_unthreaded_receipt_for_user_txn( self, txn: LoggingTransaction, user_id: str, @@ -172,13 +145,13 @@ def get_last_receipt_for_user_txn( receipt_types: Collection[str], ) -> Optional[Tuple[str, int]]: """ - Fetch the event ID and stream_ordering for the latest receipt in a room - with one of the given receipt types. + Fetch the event ID and stream_ordering for the latest unthreaded receipt + in a room with one of the given receipt types. Args: user_id: The user to fetch receipts for. room_id: The room ID to fetch the receipt for. - receipt_type: The receipt types to fetch. + receipt_types: The receipt types to fetch. Returns: The event ID and stream ordering of the latest receipt, if one exists. @@ -194,6 +167,7 @@ def get_last_receipt_for_user_txn( WHERE {clause} AND user_id = ? AND room_id = ? + AND thread_id IS NULL ORDER BY event_stream_ordering DESC LIMIT 1 """ @@ -554,7 +528,9 @@ def _get_users_sent_receipts_between_txn(txn: LoggingTransaction) -> List[str]: async def get_all_updated_receipts( self, instance_name: str, last_id: int, current_id: int, limit: int - ) -> Tuple[List[Tuple[int, list]], int, bool]: + ) -> Tuple[ + List[Tuple[int, Tuple[str, str, str, str, Optional[str], JsonDict]]], int, bool + ]: """Get updates for receipts replication stream. Args: @@ -581,9 +557,13 @@ async def get_all_updated_receipts( def get_all_updated_receipts_txn( txn: LoggingTransaction, - ) -> Tuple[List[Tuple[int, list]], int, bool]: + ) -> Tuple[ + List[Tuple[int, Tuple[str, str, str, str, Optional[str], JsonDict]]], + int, + bool, + ]: sql = """ - SELECT stream_id, room_id, receipt_type, user_id, event_id, data + SELECT stream_id, room_id, receipt_type, user_id, event_id, thread_id, data FROM receipts_linearized WHERE ? < stream_id AND stream_id <= ? ORDER BY stream_id ASC @@ -592,8 +572,8 @@ def get_all_updated_receipts_txn( txn.execute(sql, (last_id, current_id, limit)) updates = cast( - List[Tuple[int, list]], - [(r[0], r[1:5] + (db_to_json(r[5]),)) for r in txn], + List[Tuple[int, Tuple[str, str, str, str, Optional[str], JsonDict]]], + [(r[0], r[1:6] + (db_to_json(r[6]),)) for r in txn], ) limited = False @@ -645,6 +625,7 @@ def _insert_linearized_receipt_txn( receipt_type: str, user_id: str, event_id: str, + thread_id: Optional[str], data: JsonDict, stream_id: int, ) -> Optional[int]: @@ -671,11 +652,27 @@ def _insert_linearized_receipt_txn( # We don't want to clobber receipts for more recent events, so we # have to compare orderings of existing receipts if stream_ordering is not None: - sql = ( - "SELECT event_stream_ordering, event_id FROM receipts_linearized" - " WHERE room_id = ? AND receipt_type = ? AND user_id = ?" + if thread_id is None: + thread_clause = "r.thread_id IS NULL" + thread_args: Tuple[str, ...] = () + else: + thread_clause = "r.thread_id = ?" + thread_args = (thread_id,) + + sql = f""" + SELECT event_stream_ordering, event_id FROM events + INNER JOIN receipts_linearized AS r USING (event_id, room_id) + WHERE r.room_id = ? AND r.receipt_type = ? AND r.user_id = ? AND {thread_clause} + """ + txn.execute( + sql, + ( + room_id, + receipt_type, + user_id, + ) + + thread_args, ) - txn.execute(sql, (room_id, receipt_type, user_id)) for so, eid in txn: if so and int(so) >= stream_ordering: @@ -695,21 +692,28 @@ def _insert_linearized_receipt_txn( self._receipts_stream_cache.entity_has_changed, room_id, stream_id ) + keyvalues = { + "room_id": room_id, + "receipt_type": receipt_type, + "user_id": user_id, + } + where_clause = "" + if thread_id is None: + where_clause = "thread_id IS NULL" + else: + keyvalues["thread_id"] = thread_id + self.db_pool.simple_upsert_txn( txn, table="receipts_linearized", - keyvalues={ - "room_id": room_id, - "receipt_type": receipt_type, - "user_id": user_id, - }, + keyvalues=keyvalues, values={ "stream_id": stream_id, "event_id": event_id, "event_stream_ordering": stream_ordering, "data": json_encoder.encode(data), - "thread_id": None, }, + where_clause=where_clause, # receipts_linearized has a unique constraint on # (user_id, room_id, receipt_type), so no need to lock lock=False, @@ -761,6 +765,7 @@ async def insert_receipt( receipt_type: str, user_id: str, event_ids: List[str], + thread_id: Optional[str], data: dict, ) -> Optional[Tuple[int, int]]: """Insert a receipt, either from local client or remote server. @@ -793,6 +798,7 @@ async def insert_receipt( receipt_type, user_id, linearized_event_id, + thread_id, data, stream_id=stream_id, # Read committed is actually beneficial here because we check for a receipt with @@ -807,7 +813,8 @@ async def insert_receipt( now = self._clock.time_msec() logger.debug( - "RR for event %s in %s (%i ms old)", + "Receipt %s for event %s in %s (%i ms old)", + receipt_type, linearized_event_id, room_id, now - event_ts, @@ -820,6 +827,7 @@ async def insert_receipt( receipt_type, user_id, event_ids, + thread_id, data, ) @@ -834,6 +842,7 @@ def _insert_graph_receipt_txn( receipt_type: str, user_id: str, event_ids: List[str], + thread_id: Optional[str], data: JsonDict, ) -> None: assert self._can_write_to_receipts @@ -845,19 +854,26 @@ def _insert_graph_receipt_txn( # FIXME: This shouldn't invalidate the whole cache txn.call_after(self._get_linearized_receipts_for_room.invalidate, (room_id,)) + keyvalues = { + "room_id": room_id, + "receipt_type": receipt_type, + "user_id": user_id, + } + where_clause = "" + if thread_id is None: + where_clause = "thread_id IS NULL" + else: + keyvalues["thread_id"] = thread_id + self.db_pool.simple_upsert_txn( txn, table="receipts_graph", - keyvalues={ - "room_id": room_id, - "receipt_type": receipt_type, - "user_id": user_id, - }, + keyvalues=keyvalues, values={ "event_ids": json_encoder.encode(event_ids), "data": json_encoder.encode(data), - "thread_id": None, }, + where_clause=where_clause, # receipts_graph has a unique constraint on # (user_id, room_id, receipt_type), so no need to lock lock=False, diff --git a/synapse/storage/databases/main/registration.py b/synapse/storage/databases/main/registration.py index ac821878b0ce..2996d6bb4d66 100644 --- a/synapse/storage/databases/main/registration.py +++ b/synapse/storage/databases/main/registration.py @@ -166,27 +166,49 @@ def __init__( @cached() async def get_user_by_id(self, user_id: str) -> Optional[Dict[str, Any]]: """Deprecated: use get_userinfo_by_id instead""" - return await self.db_pool.simple_select_one( - table="users", - keyvalues={"name": user_id}, - retcols=[ - "name", - "password_hash", - "is_guest", - "admin", - "consent_version", - "consent_ts", - "consent_server_notice_sent", - "appservice_id", - "creation_ts", - "user_type", - "deactivated", - "shadow_banned", - ], - allow_none=True, + + def get_user_by_id_txn(txn: LoggingTransaction) -> Optional[Dict[str, Any]]: + # We could technically use simple_select_one here, but it would not perform + # the COALESCEs (unless hacked into the column names), which could yield + # confusing results. + txn.execute( + """ + SELECT + name, password_hash, is_guest, admin, consent_version, consent_ts, + consent_server_notice_sent, appservice_id, creation_ts, user_type, + deactivated, COALESCE(shadow_banned, FALSE) AS shadow_banned, + COALESCE(approved, TRUE) AS approved + FROM users + WHERE name = ? + """, + (user_id,), + ) + + rows = self.db_pool.cursor_to_dict(txn) + + if len(rows) == 0: + return None + + return rows[0] + + row = await self.db_pool.runInteraction( desc="get_user_by_id", + func=get_user_by_id_txn, ) + if row is not None: + # If we're using SQLite our boolean values will be integers. Because we + # present some of this data as is to e.g. server admins via REST APIs, we + # want to make sure we're returning the right type of data. + # Note: when adding a column name to this list, be wary of NULLable columns, + # since NULL values will be turned into False. + boolean_columns = ["admin", "deactivated", "shadow_banned", "approved"] + for column in boolean_columns: + if not isinstance(row[column], bool): + row[column] = bool(row[column]) + + return row + async def get_userinfo_by_id(self, user_id: str) -> Optional[UserInfo]: """Get a UserInfo object for a user by user ID. @@ -1779,6 +1801,40 @@ async def is_guest(self, user_id: str) -> bool: return res if res else False + @cached() + async def is_user_approved(self, user_id: str) -> bool: + """Checks if a user is approved and therefore can be allowed to log in. + + If the user's 'approved' column is NULL, we consider it as true given it means + the user was registered when support for an approval flow was either disabled + or nonexistent. + + Args: + user_id: the user to check the approval status of. + + Returns: + A boolean that is True if the user is approved, False otherwise. + """ + + def is_user_approved_txn(txn: LoggingTransaction) -> bool: + txn.execute( + """ + SELECT COALESCE(approved, TRUE) AS approved FROM users WHERE name = ? + """, + (user_id,), + ) + + rows = self.db_pool.cursor_to_dict(txn) + + # We cast to bool because the value returned by the database engine might + # be an integer if we're using SQLite. + return bool(rows[0]["approved"]) + + return await self.db_pool.runInteraction( + desc="is_user_pending_approval", + func=is_user_approved_txn, + ) + class RegistrationBackgroundUpdateStore(RegistrationWorkerStore): def __init__( @@ -1916,6 +1972,29 @@ def set_user_deactivated_status_txn( self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,)) txn.call_after(self.is_guest.invalidate, (user_id,)) + def update_user_approval_status_txn( + self, txn: LoggingTransaction, user_id: str, approved: bool + ) -> None: + """Set the user's 'approved' flag to the given value. + + The boolean is turned into an int because the column is a smallint. + + Args: + txn: the current database transaction. + user_id: the user to update the flag for. + approved: the value to set the flag to. + """ + self.db_pool.simple_update_one_txn( + txn=txn, + table="users", + keyvalues={"name": user_id}, + updatevalues={"approved": approved}, + ) + + # Invalidate the caches of methods that read the value of the 'approved' flag. + self._invalidate_cache_and_stream(txn, self.get_user_by_id, (user_id,)) + self._invalidate_cache_and_stream(txn, self.is_user_approved, (user_id,)) + class RegistrationStore(StatsStore, RegistrationBackgroundUpdateStore): def __init__( @@ -1933,6 +2012,13 @@ def __init__( self._access_tokens_id_gen = IdGenerator(db_conn, "access_tokens", "id") self._refresh_tokens_id_gen = IdGenerator(db_conn, "refresh_tokens", "id") + # If support for MSC3866 is enabled and configured to require approval for new + # account, we will create new users with an 'approved' flag set to false. + self._require_approval = ( + hs.config.experimental.msc3866.enabled + and hs.config.experimental.msc3866.require_approval_for_new_accounts + ) + async def add_access_token_to_user( self, user_id: str, @@ -2065,6 +2151,7 @@ async def register_user( admin: bool = False, user_type: Optional[str] = None, shadow_banned: bool = False, + approved: bool = False, ) -> None: """Attempts to register an account. @@ -2083,6 +2170,8 @@ async def register_user( or None for a normal user. shadow_banned: Whether the user is shadow-banned, i.e. they may be told their requests succeeded but we ignore them. + approved: Whether to consider the user has already been approved by an + administrator. Raises: StoreError if the user_id could not be registered. @@ -2099,6 +2188,7 @@ async def register_user( admin, user_type, shadow_banned, + approved, ) def _register_user( @@ -2113,11 +2203,14 @@ def _register_user( admin: bool, user_type: Optional[str], shadow_banned: bool, + approved: bool, ) -> None: user_id_obj = UserID.from_string(user_id) now = int(self._clock.time()) + user_approved = approved or not self._require_approval + try: if was_guest: # Ensure that the guest user actually exists @@ -2143,6 +2236,7 @@ def _register_user( "admin": 1 if admin else 0, "user_type": user_type, "shadow_banned": shadow_banned, + "approved": user_approved, }, ) else: @@ -2158,6 +2252,7 @@ def _register_user( "admin": 1 if admin else 0, "user_type": user_type, "shadow_banned": shadow_banned, + "approved": user_approved, }, ) @@ -2503,6 +2598,25 @@ def start_or_continue_validation_session_txn(txn: LoggingTransaction) -> None: start_or_continue_validation_session_txn, ) + async def update_user_approval_status( + self, user_id: UserID, approved: bool + ) -> None: + """Set the user's 'approved' flag to the given value. + + The boolean will be turned into an int (in update_user_approval_status_txn) + because the column is a smallint. + + Args: + user_id: the user to update the flag for. + approved: the value to set the flag to. + """ + await self.db_pool.runInteraction( + "update_user_approval_status", + self.update_user_approval_status_txn, + user_id.to_string(), + approved, + ) + def find_max_generated_user_id_localpart(cur: Cursor) -> int: """ diff --git a/synapse/storage/databases/main/relations.py b/synapse/storage/databases/main/relations.py index 7bd27790ebfe..898947af9536 100644 --- a/synapse/storage/databases/main/relations.py +++ b/synapse/storage/databases/main/relations.py @@ -51,6 +51,8 @@ class _RelatedEvent: event_id: str # The sender of the related event. sender: str + topological_ordering: Optional[int] + stream_ordering: int class RelationsWorkerStore(SQLBaseStore): @@ -91,6 +93,9 @@ async def get_relations_for_event( # it. The `event_id` must match the `event.event_id`. assert event.event_id == event_id + # Ensure bad limits aren't being passed in. + assert limit >= 0 + where_clause = ["relates_to_id = ?", "room_id = ?"] where_args: List[Union[str, int]] = [event.event_id, room_id] is_redacted = event.internal_metadata.is_redacted() @@ -139,21 +144,34 @@ def _get_recent_references_for_event_txn( ) -> Tuple[List[_RelatedEvent], Optional[StreamToken]]: txn.execute(sql, where_args + [limit + 1]) - last_topo_id = None - last_stream_id = None events = [] - for row in txn: + for event_id, relation_type, sender, topo_ordering, stream_ordering in txn: # Do not include edits for redacted events as they leak event # content. - if not is_redacted or row[1] != RelationTypes.REPLACE: - events.append(_RelatedEvent(row[0], row[2])) - last_topo_id = row[3] - last_stream_id = row[4] + if not is_redacted or relation_type != RelationTypes.REPLACE: + events.append( + _RelatedEvent(event_id, sender, topo_ordering, stream_ordering) + ) - # If there are more events, generate the next pagination key. + # If there are more events, generate the next pagination key from the + # last event returned. next_token = None - if len(events) > limit and last_topo_id and last_stream_id: - next_key = RoomStreamToken(last_topo_id, last_stream_id) + if len(events) > limit: + # Instead of using the last row (which tells us there is more + # data), use the last row to be returned. + events = events[:limit] + + topo = events[-1].topological_ordering + token = events[-1].stream_ordering + if direction == "b": + # Tokens are positions between events. + # This token points *after* the last event in the chunk. + # We need it to point to the event before it in the chunk + # when we are going backwards so we subtract one from the + # stream part. + token -= 1 + next_key = RoomStreamToken(topo, token) + if from_token: next_token = from_token.copy_and_replace( StreamKeyType.ROOM, next_key diff --git a/synapse/storage/databases/main/room.py b/synapse/storage/databases/main/room.py index bef66f199269..7412bce255ef 100644 --- a/synapse/storage/databases/main/room.py +++ b/synapse/storage/databases/main/room.py @@ -25,6 +25,7 @@ List, Mapping, Optional, + Sequence, Tuple, Union, cast, @@ -1133,6 +1134,23 @@ def get_rooms_for_retention_period_in_range_txn( get_rooms_for_retention_period_in_range_txn, ) + @cached(iterable=True) + async def get_partial_state_servers_at_join(self, room_id: str) -> Sequence[str]: + """Gets the list of servers in a partial state room at the time we joined it. + + Returns: + The `servers_in_room` list from the `/send_join` response for partial state + rooms. May not be accurate or complete, as it comes from a remote + homeserver. + An empty list for full state rooms. + """ + return await self.db_pool.simple_select_onecol( + "partial_state_rooms_servers", + keyvalues={"room_id": room_id}, + retcol="server_name", + desc="get_partial_state_servers_at_join", + ) + async def get_partial_state_rooms_and_servers( self, ) -> Mapping[str, Collection[str]]: @@ -1199,6 +1217,29 @@ def _clear_partial_state_room_txn( keyvalues={"room_id": room_id}, ) self._invalidate_cache_and_stream(txn, self.is_partial_state_room, (room_id,)) + self._invalidate_cache_and_stream( + txn, self.get_partial_state_servers_at_join, (room_id,) + ) + + # We now delete anything from `device_lists_remote_pending` with a + # stream ID less than the minimum + # `partial_state_rooms.device_lists_stream_id`, as we no longer need them. + device_lists_stream_id = DatabasePool.simple_select_one_onecol_txn( + txn, + table="partial_state_rooms", + keyvalues={}, + retcol="MIN(device_lists_stream_id)", + allow_none=True, + ) + if device_lists_stream_id is None: + # There are no rooms being currently partially joined, so we delete everything. + txn.execute("DELETE FROM device_lists_remote_pending") + else: + sql = """ + DELETE FROM device_lists_remote_pending + WHERE stream_id <= ? + """ + txn.execute(sql, (device_lists_stream_id,)) @cached() async def is_partial_state_room(self, room_id: str) -> bool: @@ -1219,6 +1260,22 @@ async def is_partial_state_room(self, room_id: str) -> bool: return entry is not None + async def get_join_event_id_and_device_lists_stream_id_for_partial_state( + self, room_id: str + ) -> Tuple[str, int]: + """Get the event ID of the initial join that started the partial + join, and the device list stream ID at the point we started the partial + join. + """ + + result = await self.db_pool.simple_select_one( + table="partial_state_rooms", + keyvalues={"room_id": room_id}, + retcols=("join_event_id", "device_lists_stream_id"), + desc="get_join_event_id_for_partial_state", + ) + return result["join_event_id"], result["device_lists_stream_id"] + class _BackgroundUpdates: REMOVE_TOMESTONED_ROOMS_BG_UPDATE = "remove_tombstoned_rooms_from_directory" @@ -1760,28 +1817,46 @@ async def store_partial_state_room( self, room_id: str, servers: Collection[str], + device_lists_stream_id: int, ) -> None: - """Mark the given room as containing events with partial state + """Mark the given room as containing events with partial state. + + We also store additional data that describes _when_ we first partial-joined this + room, which helps us to keep other homeservers in sync when we finally fully + join this room. + + We do not include a `join_event_id` here---we need to wait for the join event + to be persisted first. Args: room_id: the ID of the room servers: other servers known to be in the room + device_lists_stream_id: the device_lists stream ID at the time when we first + joined the room. """ await self.db_pool.runInteraction( "store_partial_state_room", self._store_partial_state_room_txn, room_id, servers, + device_lists_stream_id, ) def _store_partial_state_room_txn( - self, txn: LoggingTransaction, room_id: str, servers: Collection[str] + self, + txn: LoggingTransaction, + room_id: str, + servers: Collection[str], + device_lists_stream_id: int, ) -> None: DatabasePool.simple_insert_txn( txn, table="partial_state_rooms", values={ "room_id": room_id, + "device_lists_stream_id": device_lists_stream_id, + # To be updated later once the join event is persisted. + "join_event_id": None, }, ) DatabasePool.simple_insert_many_txn( @@ -1791,6 +1866,39 @@ def _store_partial_state_room_txn( values=((room_id, s) for s in servers), ) self._invalidate_cache_and_stream(txn, self.is_partial_state_room, (room_id,)) + self._invalidate_cache_and_stream( + txn, self.get_partial_state_servers_at_join, (room_id,) + ) + + async def write_partial_state_rooms_join_event_id( + self, + room_id: str, + join_event_id: str, + ) -> None: + """Record the join event which resulted from a partial join. + + We do this separately to `store_partial_state_room` because we need to wait for + the join event to be persisted. Otherwise we violate a foreign key constraint. + """ + await self.db_pool.runInteraction( + "write_partial_state_rooms_join_event_id", + self._write_partial_state_rooms_join_event_id, + room_id, + join_event_id, + ) + + def _write_partial_state_rooms_join_event_id( + self, + txn: LoggingTransaction, + room_id: str, + join_event_id: str, + ) -> None: + DatabasePool.simple_update_txn( + txn, + table="partial_state_rooms", + keyvalues={"room_id": room_id}, + updatevalues={"join_event_id": join_event_id}, + ) async def maybe_store_room_on_outlier_membership( self, room_id: str, room_version: RoomVersion diff --git a/synapse/storage/databases/main/roommember.py b/synapse/storage/databases/main/roommember.py index 197f66e538b1..6638eb2b8924 100644 --- a/synapse/storage/databases/main/roommember.py +++ b/synapse/storage/databases/main/roommember.py @@ -146,42 +146,37 @@ def _transact(txn: LoggingTransaction) -> int: @cached(max_entries=100000, iterable=True) async def get_users_in_room(self, room_id: str) -> List[str]: - """ - Returns a list of users in the room sorted by longest in the room first - (aka. with the lowest depth). This is done to match the sort in - `get_current_hosts_in_room()` and so we can re-use the cache but it's - not horrible to have here either. - - Uses `m.room.member`s in the room state at the current forward extremities to - determine which users are in the room. + """Returns a list of users in the room. Will return inaccurate results for rooms with partial state, since the state for the forward extremities of those rooms will exclude most members. We may also calculate room state incorrectly for such rooms and believe that a member is or is not in the room when the opposite is true. """ - return await self.db_pool.runInteraction( - "get_users_in_room", self.get_users_in_room_txn, room_id + return await self.db_pool.simple_select_onecol( + table="current_state_events", + keyvalues={ + "type": EventTypes.Member, + "room_id": room_id, + "membership": Membership.JOIN, + }, + retcol="state_key", + desc="get_users_in_room", ) def get_users_in_room_txn(self, txn: LoggingTransaction, room_id: str) -> List[str]: - """ - Returns a list of users in the room sorted by longest in the room first - (aka. with the lowest depth). This is done to match the sort in - `get_current_hosts_in_room()` and so we can re-use the cache but it's - not horrible to have here either. - """ - sql = """ - SELECT c.state_key FROM current_state_events as c - /* Get the depth of the event from the events table */ - INNER JOIN events AS e USING (event_id) - WHERE c.type = 'm.room.member' AND c.room_id = ? AND membership = ? - /* Sorted by lowest depth first */ - ORDER BY e.depth ASC; - """ + """Returns a list of users in the room.""" - txn.execute(sql, (room_id, Membership.JOIN)) - return [r[0] for r in txn] + return self.db_pool.simple_select_onecol_txn( + txn, + table="current_state_events", + keyvalues={ + "type": EventTypes.Member, + "room_id": room_id, + "membership": Membership.JOIN, + }, + retcol="state_key", + ) @cached() def get_user_in_room_with_profile( @@ -950,7 +945,44 @@ async def _check_host_room_membership( return True @cached(iterable=True, max_entries=10000) - async def get_current_hosts_in_room(self, room_id: str) -> List[str]: + async def get_current_hosts_in_room(self, room_id: str) -> Set[str]: + """Get current hosts in room based on current state.""" + + # First we check if we already have `get_users_in_room` in the cache, as + # we can just calculate result from that + users = self.get_users_in_room.cache.get_immediate( + (room_id,), None, update_metrics=False + ) + if users is not None: + return {get_domain_from_id(u) for u in users} + + if isinstance(self.database_engine, Sqlite3Engine): + # If we're using SQLite then let's just always use + # `get_users_in_room` rather than funky SQL. + users = await self.get_users_in_room(room_id) + return {get_domain_from_id(u) for u in users} + + # For PostgreSQL we can use a regex to pull out the domains from the + # joined users in `current_state_events` via regex. + + def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> Set[str]: + sql = """ + SELECT DISTINCT substring(state_key FROM '@[^:]*:(.*)$') + FROM current_state_events + WHERE + type = 'm.room.member' + AND membership = 'join' + AND room_id = ? + """ + txn.execute(sql, (room_id,)) + return {d for d, in txn} + + return await self.db_pool.runInteraction( + "get_current_hosts_in_room", get_current_hosts_in_room_txn + ) + + @cached(iterable=True, max_entries=10000) + async def get_current_hosts_in_room_ordered(self, room_id: str) -> List[str]: """ Get current hosts in room based on current state. @@ -958,48 +990,33 @@ async def get_current_hosts_in_room(self, room_id: str) -> List[str]: longest is good because they're most likely to have anything we ask about. - Uses `m.room.member`s in the room state at the current forward extremities to - determine which hosts are in the room. + For SQLite the returned list is not ordered, as SQLite doesn't support + the appropriate SQL. - Will return inaccurate results for rooms with partial state, since the state for - the forward extremities of those rooms will exclude most members. We may also - calculate room state incorrectly for such rooms and believe that a host is or - is not in the room when the opposite is true. + Uses `m.room.member`s in the room state at the current forward + extremities to determine which hosts are in the room. + + Will return inaccurate results for rooms with partial state, since the + state for the forward extremities of those rooms will exclude most + members. We may also calculate room state incorrectly for such rooms and + believe that a host is or is not in the room when the opposite is true. Returns: Returns a list of servers sorted by longest in the room first. (aka. sorted by join with the lowest depth first). """ - # First we check if we already have `get_users_in_room` in the cache, as - # we can just calculate result from that - users = self.get_users_in_room.cache.get_immediate( - (room_id,), None, update_metrics=False - ) - if users is None and isinstance(self.database_engine, Sqlite3Engine): + if isinstance(self.database_engine, Sqlite3Engine): # If we're using SQLite then let's just always use # `get_users_in_room` rather than funky SQL. - users = await self.get_users_in_room(room_id) - if users is not None: - # Because `users` is sorted from lowest -> highest depth, the list - # of domains will also be sorted that way. - domains: List[str] = [] - # We use a `Set` just for fast lookups - domain_set: Set[str] = set() - for u in users: - if ":" not in u: - continue - domain = get_domain_from_id(u) - if domain not in domain_set: - domain_set.add(domain) - domains.append(domain) - return domains + domains = await self.get_current_hosts_in_room(room_id) + return list(domains) # For PostgreSQL we can use a regex to pull out the domains from the # joined users in `current_state_events` via regex. - def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> List[str]: + def get_current_hosts_in_room_ordered_txn(txn: LoggingTransaction) -> List[str]: # Returns a list of servers currently joined in the room sorted by # longest in the room first (aka. with the lowest depth). The # heuristic of sorting by servers who have been in the room the @@ -1027,7 +1044,7 @@ def get_current_hosts_in_room_txn(txn: LoggingTransaction) -> List[str]: return [d for d, in txn if d is not None] return await self.db_pool.runInteraction( - "get_current_hosts_in_room", get_current_hosts_in_room_txn + "get_current_hosts_in_room_ordered", get_current_hosts_in_room_ordered_txn ) async def get_joined_hosts( diff --git a/synapse/storage/databases/main/search.py b/synapse/storage/databases/main/search.py index f6e24b68d26d..1b79acf95559 100644 --- a/synapse/storage/databases/main/search.py +++ b/synapse/storage/databases/main/search.py @@ -641,7 +641,7 @@ async def search_rooms( raise Exception("Unrecognized database engine") # mypy expects to append only a `str`, not an `int` - args.append(limit) # type: ignore[arg-type] + args.append(limit) results = await self.db_pool.execute( "search_rooms", self.db_pool.cursor_to_dict, sql, *args diff --git a/synapse/storage/databases/main/stream.py b/synapse/storage/databases/main/stream.py index 8c17b68756d0..b41a01504f5f 100644 --- a/synapse/storage/databases/main/stream.py +++ b/synapse/storage/databases/main/stream.py @@ -1343,15 +1343,15 @@ def _paginate_room_events_txn( if rows: topo = rows[-1].topological_ordering - toke = rows[-1].stream_ordering + token = rows[-1].stream_ordering if direction == "b": # Tokens are positions between events. # This token points *after* the last event in the chunk. # We need it to point to the event before it in the chunk # when we are going backwards so we subtract one from the # stream part. - toke -= 1 - next_token = RoomStreamToken(topo, toke) + token -= 1 + next_token = RoomStreamToken(topo, token) else: # TODO (erikj): We should work out what to do here instead. next_token = to_token if to_token else from_token diff --git a/synapse/storage/engines/_base.py b/synapse/storage/engines/_base.py index 0d16a419a42c..70e594a68f09 100644 --- a/synapse/storage/engines/_base.py +++ b/synapse/storage/engines/_base.py @@ -32,9 +32,10 @@ class IncorrectDatabaseSetup(RuntimeError): ConnectionType = TypeVar("ConnectionType", bound=Connection) +CursorType = TypeVar("CursorType", bound=Cursor) -class BaseDatabaseEngine(Generic[ConnectionType], metaclass=abc.ABCMeta): +class BaseDatabaseEngine(Generic[ConnectionType, CursorType], metaclass=abc.ABCMeta): def __init__(self, module: DBAPI2Module, config: Mapping[str, Any]): self.module = module @@ -64,7 +65,7 @@ def check_database( ... @abc.abstractmethod - def check_new_database(self, txn: Cursor) -> None: + def check_new_database(self, txn: CursorType) -> None: """Gets called when setting up a brand new database. This allows us to apply stricter checks on new databases versus existing database. """ @@ -124,3 +125,21 @@ def attempt_to_set_isolation_level( Note: This has no effect on SQLite3, as transactions are SERIALIZABLE by default. """ ... + + @staticmethod + @abc.abstractmethod + def executescript(cursor: CursorType, script: str) -> None: + """Execute a chunk of SQL containing multiple semicolon-delimited statements. + + This is not provided by DBAPI2, and so needs engine-specific support. + """ + ... + + @classmethod + def execute_script_file(cls, cursor: CursorType, filepath: str) -> None: + """Execute a file containing multiple semicolon-delimited SQL statements. + + This is not provided by DBAPI2, and so needs engine-specific support. + """ + with open(filepath, "rt") as f: + cls.executescript(cursor, f.read()) diff --git a/synapse/storage/engines/postgres.py b/synapse/storage/engines/postgres.py index 7f7d006ac209..d8c0f64d9a44 100644 --- a/synapse/storage/engines/postgres.py +++ b/synapse/storage/engines/postgres.py @@ -31,7 +31,9 @@ logger = logging.getLogger(__name__) -class PostgresEngine(BaseDatabaseEngine[psycopg2.extensions.connection]): +class PostgresEngine( + BaseDatabaseEngine[psycopg2.extensions.connection, psycopg2.extensions.cursor] +): def __init__(self, database_config: Mapping[str, Any]): super().__init__(psycopg2, database_config) psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) @@ -212,3 +214,11 @@ def attempt_to_set_isolation_level( else: isolation_level = self.isolation_level_map[isolation_level] return conn.set_isolation_level(isolation_level) + + @staticmethod + def executescript(cursor: psycopg2.extensions.cursor, script: str) -> None: + """Execute a chunk of SQL containing multiple semicolon-delimited statements. + + Psycopg2 seems happy to do this in DBAPI2's `execute()` function. + """ + cursor.execute(script) diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py index 095ae0a096b8..faa574dbfd6e 100644 --- a/synapse/storage/engines/sqlite.py +++ b/synapse/storage/engines/sqlite.py @@ -24,7 +24,7 @@ from synapse.storage.database import LoggingDatabaseConnection -class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection]): +class Sqlite3Engine(BaseDatabaseEngine[sqlite3.Connection, sqlite3.Cursor]): def __init__(self, database_config: Mapping[str, Any]): super().__init__(sqlite3, database_config) @@ -120,6 +120,25 @@ def attempt_to_set_isolation_level( # All transactions are SERIALIZABLE by default in sqlite pass + @staticmethod + def executescript(cursor: sqlite3.Cursor, script: str) -> None: + """Execute a chunk of SQL containing multiple semicolon-delimited statements. + + Python's built-in SQLite driver does not allow you to do this with DBAPI2's + `execute`: + + > execute() will only execute a single SQL statement. If you try to execute more + > than one statement with it, it will raise a Warning. Use executescript() if + > you want to execute multiple SQL statements with one call. + + Though the docs for `executescript` warn: + + > If there is a pending transaction, an implicit COMMIT statement is executed + > first. No other implicit transaction control is performed; any transaction + > control must be added to sql_script. + """ + cursor.executescript(script) + # Following functions taken from: https://github.com/coleifer/peewee diff --git a/synapse/storage/prepare_database.py b/synapse/storage/prepare_database.py index 09a2b58f4c72..3acdb39da75c 100644 --- a/synapse/storage/prepare_database.py +++ b/synapse/storage/prepare_database.py @@ -266,7 +266,7 @@ def _setup_new_database( ".sql." + specific ): logger.debug("Applying schema %s", entry.absolute_path) - executescript(cur, entry.absolute_path) + database_engine.execute_script_file(cur, entry.absolute_path) cur.execute( "INSERT INTO schema_version (version, upgraded) VALUES (?,?)", @@ -517,7 +517,7 @@ def _upgrade_existing_database( UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path ) logger.info("Applying schema %s", relative_path) - executescript(cur, absolute_path) + database_engine.execute_script_file(cur, absolute_path) elif ext == specific_engine_extension and root_name.endswith(".sql"): # A .sql file specific to our engine; just read and execute it if is_worker: @@ -525,7 +525,7 @@ def _upgrade_existing_database( UNAPPLIED_DELTA_ON_WORKER_ERROR % relative_path ) logger.info("Applying engine-specific schema %s", relative_path) - executescript(cur, absolute_path) + database_engine.execute_script_file(cur, absolute_path) elif ext in specific_engine_extensions and root_name.endswith(".sql"): # A .sql file for a different engine; skip it. continue @@ -666,7 +666,7 @@ def _get_or_create_schema_state( ) -> Optional[_SchemaState]: # Bluntly try creating the schema_version tables. sql_path = os.path.join(schema_path, "common", "schema_version.sql") - executescript(txn, sql_path) + database_engine.execute_script_file(txn, sql_path) txn.execute("SELECT version, upgraded FROM schema_version") row = txn.fetchone() diff --git a/synapse/storage/schema/__init__.py b/synapse/storage/schema/__init__.py index f29424d17a74..4a5c947699eb 100644 --- a/synapse/storage/schema/__init__.py +++ b/synapse/storage/schema/__init__.py @@ -85,6 +85,7 @@ events over federation. - Add indexes to various tables (`event_failed_pull_attempts`, `insertion_events`, `batch_events`) to make it easy to delete all associated rows when purging a room. + - `inserted_ts` column is added to `event_push_actions_staging` table. """ diff --git a/synapse/storage/schema/common/full_schemas/72/full.sql.postgres b/synapse/storage/schema/common/full_schemas/72/full.sql.postgres new file mode 100644 index 000000000000..f0e546f052d2 --- /dev/null +++ b/synapse/storage/schema/common/full_schemas/72/full.sql.postgres @@ -0,0 +1,8 @@ +CREATE TABLE background_updates ( + update_name text NOT NULL, + progress_json text NOT NULL, + depends_on text, + ordering integer DEFAULT 0 NOT NULL +); +ALTER TABLE ONLY background_updates + ADD CONSTRAINT background_updates_uniqueness UNIQUE (update_name); diff --git a/synapse/storage/schema/common/full_schemas/72/full.sql.sqlite b/synapse/storage/schema/common/full_schemas/72/full.sql.sqlite new file mode 100644 index 000000000000..d5a2c04aa9c5 --- /dev/null +++ b/synapse/storage/schema/common/full_schemas/72/full.sql.sqlite @@ -0,0 +1,6 @@ +CREATE TABLE background_updates ( + update_name text NOT NULL, + progress_json text NOT NULL, + depends_on text, ordering INT NOT NULL DEFAULT 0, + CONSTRAINT background_updates_uniqueness UNIQUE (update_name) +); diff --git a/synapse/storage/schema/main/delta/73/02add_pusher_enabled.sql b/synapse/storage/schema/main/delta/73/02add_pusher_enabled.sql new file mode 100644 index 000000000000..dba3b4900b59 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/02add_pusher_enabled.sql @@ -0,0 +1,16 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +ALTER TABLE pushers ADD COLUMN enabled BOOLEAN; \ No newline at end of file diff --git a/synapse/storage/schema/main/delta/73/03pusher_device_id.sql b/synapse/storage/schema/main/delta/73/03pusher_device_id.sql new file mode 100644 index 000000000000..1b4ffbeebec0 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/03pusher_device_id.sql @@ -0,0 +1,20 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Add a device_id column to track the device ID that created the pusher. It's NULLable +-- on purpose, because a) it might not be possible to track down the device that created +-- old pushers (pushers.access_token and access_tokens.device_id are both NULLable), and +-- b) access tokens retrieved via the admin API don't have a device associated to them. +ALTER TABLE pushers ADD COLUMN device_id TEXT; \ No newline at end of file diff --git a/synapse/storage/schema/main/delta/73/03users_approved_column.sql b/synapse/storage/schema/main/delta/73/03users_approved_column.sql new file mode 100644 index 000000000000..5328d592ea00 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/03users_approved_column.sql @@ -0,0 +1,20 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Add a column to the users table to track whether the user needs to be approved by an +-- administrator. +-- A NULL column means the user was created before this feature was supported by Synapse, +-- and should be considered as TRUE. +ALTER TABLE users ADD COLUMN approved BOOLEAN; diff --git a/synapse/storage/schema/main/delta/73/04partial_join_details.sql b/synapse/storage/schema/main/delta/73/04partial_join_details.sql new file mode 100644 index 000000000000..5fb2bfe1a23d --- /dev/null +++ b/synapse/storage/schema/main/delta/73/04partial_join_details.sql @@ -0,0 +1,23 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- To ensure we correctly notify other homeservers about device list changes from our +-- users after a partial join transitions to a full join, we need to know when we began +-- the partial join. For now it's sufficient to know the device_list stream_id at the +-- time of the partial join, and the join event created for us during a partial join. +-- +-- Both columns are backwards compatible. +ALTER TABLE partial_state_rooms ADD COLUMN device_lists_stream_id BIGINT NOT NULL DEFAULT 0; +ALTER TABLE partial_state_rooms ADD COLUMN join_event_id TEXT REFERENCES events(event_id); diff --git a/synapse/storage/schema/main/delta/73/04pending_device_list_updates.sql b/synapse/storage/schema/main/delta/73/04pending_device_list_updates.sql new file mode 100644 index 000000000000..dbd78d677db4 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/04pending_device_list_updates.sql @@ -0,0 +1,28 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Stores remote device lists we have received for remote users while a partial +-- join is in progress. +-- +-- This allows us to replay any device list updates if it turns out the remote +-- user was in the partially joined room +CREATE TABLE device_lists_remote_pending( + stream_id BIGINT PRIMARY KEY, + user_id TEXT NOT NULL, + device_id TEXT NOT NULL +); + +-- We only keep the most recent update for a given user/device pair. +CREATE UNIQUE INDEX device_lists_remote_pending_user_device_id ON device_lists_remote_pending(user_id, device_id); diff --git a/synapse/storage/schema/main/delta/73/05old_push_actions.sql.postgres b/synapse/storage/schema/main/delta/73/05old_push_actions.sql.postgres new file mode 100644 index 000000000000..4af1a8470bb6 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/05old_push_actions.sql.postgres @@ -0,0 +1,22 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- Add a column so that we know when a push action was inserted, to make it +-- easier to clear out old ones. +ALTER TABLE event_push_actions_staging ADD COLUMN inserted_ts BIGINT; + +-- We now add a default for *new* rows. We don't do this above as we don't want +-- to have to update every remove with the new default. +ALTER TABLE event_push_actions_staging ALTER COLUMN inserted_ts SET DEFAULT extract(epoch from now()) * 1000; diff --git a/synapse/storage/schema/main/delta/73/05old_push_actions.sql.sqlite b/synapse/storage/schema/main/delta/73/05old_push_actions.sql.sqlite new file mode 100644 index 000000000000..7482dabba224 --- /dev/null +++ b/synapse/storage/schema/main/delta/73/05old_push_actions.sql.sqlite @@ -0,0 +1,24 @@ +/* Copyright 2022 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- On SQLite we must be in monolith mode and updating the database from Synapse, +-- so its safe to assume that `event_push_actions_staging` should be empty (as +-- over restart an event must either have been fully persisted or we'll +-- recalculate the push actions) +DELETE FROM event_push_actions_staging; + +-- Add a column so that we know when a push action was inserted, to make it +-- easier to clear out old ones. +ALTER TABLE event_push_actions_staging ADD COLUMN inserted_ts BIGINT; diff --git a/synapse/storage/schema/main/full_schemas/72/full.sql.postgres b/synapse/storage/schema/main/full_schemas/72/full.sql.postgres new file mode 100644 index 000000000000..d421fd9ab9c9 --- /dev/null +++ b/synapse/storage/schema/main/full_schemas/72/full.sql.postgres @@ -0,0 +1,1344 @@ +CREATE FUNCTION check_partial_state_events() RETURNS trigger + LANGUAGE plpgsql + AS $$ + BEGIN + IF EXISTS ( + SELECT 1 FROM events + WHERE events.event_id = NEW.event_id + AND events.room_id != NEW.room_id + ) THEN + RAISE EXCEPTION 'Incorrect room_id in partial_state_events'; + END IF; + RETURN NEW; + END; + $$; +CREATE TABLE access_tokens ( + id bigint NOT NULL, + user_id text NOT NULL, + device_id text, + token text NOT NULL, + valid_until_ms bigint, + puppets_user_id text, + last_validated bigint, + refresh_token_id bigint, + used boolean +); +CREATE TABLE account_data ( + user_id text NOT NULL, + account_data_type text NOT NULL, + stream_id bigint NOT NULL, + content text NOT NULL, + instance_name text +); +CREATE SEQUENCE account_data_sequence + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE account_validity ( + user_id text NOT NULL, + expiration_ts_ms bigint NOT NULL, + email_sent boolean NOT NULL, + renewal_token text, + token_used_ts_ms bigint +); +CREATE TABLE application_services_state ( + as_id text NOT NULL, + state character varying(5), + read_receipt_stream_id bigint, + presence_stream_id bigint, + to_device_stream_id bigint, + device_list_stream_id bigint +); +CREATE SEQUENCE application_services_txn_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE application_services_txns ( + as_id text NOT NULL, + txn_id bigint NOT NULL, + event_ids text NOT NULL +); +CREATE TABLE appservice_room_list ( + appservice_id text NOT NULL, + network_id text NOT NULL, + room_id text NOT NULL +); +CREATE TABLE appservice_stream_position ( + lock character(1) DEFAULT 'X'::bpchar NOT NULL, + stream_ordering bigint, + CONSTRAINT appservice_stream_position_lock_check CHECK ((lock = 'X'::bpchar)) +); +CREATE TABLE batch_events ( + event_id text NOT NULL, + room_id text NOT NULL, + batch_id text NOT NULL +); +CREATE TABLE blocked_rooms ( + room_id text NOT NULL, + user_id text NOT NULL +); +CREATE TABLE cache_invalidation_stream_by_instance ( + stream_id bigint NOT NULL, + instance_name text NOT NULL, + cache_func text NOT NULL, + keys text[], + invalidation_ts bigint +); +CREATE SEQUENCE cache_invalidation_stream_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE current_state_delta_stream ( + stream_id bigint NOT NULL, + room_id text NOT NULL, + type text NOT NULL, + state_key text NOT NULL, + event_id text, + prev_event_id text, + instance_name text +); +CREATE TABLE current_state_events ( + event_id text NOT NULL, + room_id text NOT NULL, + type text NOT NULL, + state_key text NOT NULL, + membership text +); +CREATE TABLE dehydrated_devices ( + user_id text NOT NULL, + device_id text NOT NULL, + device_data text NOT NULL +); +CREATE TABLE deleted_pushers ( + stream_id bigint NOT NULL, + app_id text NOT NULL, + pushkey text NOT NULL, + user_id text NOT NULL +); +CREATE TABLE destination_rooms ( + destination text NOT NULL, + room_id text NOT NULL, + stream_ordering bigint NOT NULL +); +CREATE TABLE destinations ( + destination text NOT NULL, + retry_last_ts bigint, + retry_interval bigint, + failure_ts bigint, + last_successful_stream_ordering bigint +); +CREATE TABLE device_auth_providers ( + user_id text NOT NULL, + device_id text NOT NULL, + auth_provider_id text NOT NULL, + auth_provider_session_id text NOT NULL +); +CREATE TABLE device_federation_inbox ( + origin text NOT NULL, + message_id text NOT NULL, + received_ts bigint NOT NULL, + instance_name text +); +CREATE TABLE device_federation_outbox ( + destination text NOT NULL, + stream_id bigint NOT NULL, + queued_ts bigint NOT NULL, + messages_json text NOT NULL, + instance_name text +); +CREATE TABLE device_inbox ( + user_id text NOT NULL, + device_id text NOT NULL, + stream_id bigint NOT NULL, + message_json text NOT NULL, + instance_name text +); +CREATE SEQUENCE device_inbox_sequence + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE device_lists_changes_in_room ( + user_id text NOT NULL, + device_id text NOT NULL, + room_id text NOT NULL, + stream_id bigint NOT NULL, + converted_to_destinations boolean NOT NULL, + opentracing_context text +); +CREATE TABLE device_lists_outbound_last_success ( + destination text NOT NULL, + user_id text NOT NULL, + stream_id bigint NOT NULL +); +CREATE TABLE device_lists_outbound_pokes ( + destination text NOT NULL, + stream_id bigint NOT NULL, + user_id text NOT NULL, + device_id text NOT NULL, + sent boolean NOT NULL, + ts bigint NOT NULL, + opentracing_context text +); +CREATE TABLE device_lists_remote_cache ( + user_id text NOT NULL, + device_id text NOT NULL, + content text NOT NULL +); +CREATE TABLE device_lists_remote_extremeties ( + user_id text NOT NULL, + stream_id text NOT NULL +); +CREATE TABLE device_lists_remote_resync ( + user_id text NOT NULL, + added_ts bigint NOT NULL +); +CREATE TABLE device_lists_stream ( + stream_id bigint NOT NULL, + user_id text NOT NULL, + device_id text NOT NULL +); +CREATE TABLE devices ( + user_id text NOT NULL, + device_id text NOT NULL, + display_name text, + last_seen bigint, + ip text, + user_agent text, + hidden boolean DEFAULT false +); +CREATE TABLE e2e_cross_signing_keys ( + user_id text NOT NULL, + keytype text NOT NULL, + keydata text NOT NULL, + stream_id bigint NOT NULL +); +CREATE TABLE e2e_cross_signing_signatures ( + user_id text NOT NULL, + key_id text NOT NULL, + target_user_id text NOT NULL, + target_device_id text NOT NULL, + signature text NOT NULL +); +CREATE TABLE e2e_device_keys_json ( + user_id text NOT NULL, + device_id text NOT NULL, + ts_added_ms bigint NOT NULL, + key_json text NOT NULL +); +CREATE TABLE e2e_fallback_keys_json ( + user_id text NOT NULL, + device_id text NOT NULL, + algorithm text NOT NULL, + key_id text NOT NULL, + key_json text NOT NULL, + used boolean DEFAULT false NOT NULL +); +CREATE TABLE e2e_one_time_keys_json ( + user_id text NOT NULL, + device_id text NOT NULL, + algorithm text NOT NULL, + key_id text NOT NULL, + ts_added_ms bigint NOT NULL, + key_json text NOT NULL +); +CREATE TABLE e2e_room_keys ( + user_id text NOT NULL, + room_id text NOT NULL, + session_id text NOT NULL, + version bigint NOT NULL, + first_message_index integer, + forwarded_count integer, + is_verified boolean, + session_data text NOT NULL +); +CREATE TABLE e2e_room_keys_versions ( + user_id text NOT NULL, + version bigint NOT NULL, + algorithm text NOT NULL, + auth_data text NOT NULL, + deleted smallint DEFAULT 0 NOT NULL, + etag bigint +); +CREATE TABLE erased_users ( + user_id text NOT NULL +); +CREATE TABLE event_auth ( + event_id text NOT NULL, + auth_id text NOT NULL, + room_id text NOT NULL +); +CREATE SEQUENCE event_auth_chain_id + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE event_auth_chain_links ( + origin_chain_id bigint NOT NULL, + origin_sequence_number bigint NOT NULL, + target_chain_id bigint NOT NULL, + target_sequence_number bigint NOT NULL +); +CREATE TABLE event_auth_chain_to_calculate ( + event_id text NOT NULL, + room_id text NOT NULL, + type text NOT NULL, + state_key text NOT NULL +); +CREATE TABLE event_auth_chains ( + event_id text NOT NULL, + chain_id bigint NOT NULL, + sequence_number bigint NOT NULL +); +CREATE TABLE event_backward_extremities ( + event_id text NOT NULL, + room_id text NOT NULL +); +CREATE TABLE event_edges ( + event_id text NOT NULL, + prev_event_id text NOT NULL, + room_id text, + is_state boolean DEFAULT false NOT NULL +); +CREATE TABLE event_expiry ( + event_id text NOT NULL, + expiry_ts bigint NOT NULL +); +CREATE TABLE event_forward_extremities ( + event_id text NOT NULL, + room_id text NOT NULL +); +CREATE TABLE event_json ( + event_id text NOT NULL, + room_id text NOT NULL, + internal_metadata text NOT NULL, + json text NOT NULL, + format_version integer +); +CREATE TABLE event_labels ( + event_id text NOT NULL, + label text NOT NULL, + room_id text NOT NULL, + topological_ordering bigint NOT NULL +); +CREATE TABLE event_push_actions ( + room_id text NOT NULL, + event_id text NOT NULL, + user_id text NOT NULL, + profile_tag character varying(32), + actions text NOT NULL, + topological_ordering bigint, + stream_ordering bigint, + notif smallint, + highlight smallint, + unread smallint, + thread_id text +); +CREATE TABLE event_push_actions_staging ( + event_id text NOT NULL, + user_id text NOT NULL, + actions text NOT NULL, + notif smallint NOT NULL, + highlight smallint NOT NULL, + unread smallint, + thread_id text +); +CREATE TABLE event_push_summary ( + user_id text NOT NULL, + room_id text NOT NULL, + notif_count bigint NOT NULL, + stream_ordering bigint NOT NULL, + unread_count bigint, + last_receipt_stream_ordering bigint, + thread_id text +); +CREATE TABLE event_push_summary_last_receipt_stream_id ( + lock character(1) DEFAULT 'X'::bpchar NOT NULL, + stream_id bigint NOT NULL, + CONSTRAINT event_push_summary_last_receipt_stream_id_lock_check CHECK ((lock = 'X'::bpchar)) +); +CREATE TABLE event_push_summary_stream_ordering ( + lock character(1) DEFAULT 'X'::bpchar NOT NULL, + stream_ordering bigint NOT NULL, + CONSTRAINT event_push_summary_stream_ordering_lock_check CHECK ((lock = 'X'::bpchar)) +); +CREATE TABLE event_relations ( + event_id text NOT NULL, + relates_to_id text NOT NULL, + relation_type text NOT NULL, + aggregation_key text +); +CREATE TABLE event_reports ( + id bigint NOT NULL, + received_ts bigint NOT NULL, + room_id text NOT NULL, + event_id text NOT NULL, + user_id text NOT NULL, + reason text, + content text +); +CREATE TABLE event_search ( + event_id text, + room_id text, + sender text, + key text, + vector tsvector, + origin_server_ts bigint, + stream_ordering bigint +); +CREATE TABLE event_to_state_groups ( + event_id text NOT NULL, + state_group bigint NOT NULL +); +CREATE TABLE event_txn_id ( + event_id text NOT NULL, + room_id text NOT NULL, + user_id text NOT NULL, + token_id bigint NOT NULL, + txn_id text NOT NULL, + inserted_ts bigint NOT NULL +); +CREATE TABLE events ( + topological_ordering bigint NOT NULL, + event_id text NOT NULL, + type text NOT NULL, + room_id text NOT NULL, + content text, + unrecognized_keys text, + processed boolean NOT NULL, + outlier boolean NOT NULL, + depth bigint DEFAULT 0 NOT NULL, + origin_server_ts bigint, + received_ts bigint, + sender text, + contains_url boolean, + instance_name text, + stream_ordering bigint, + state_key text, + rejection_reason text +); +CREATE SEQUENCE events_backfill_stream_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE SEQUENCE events_stream_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE ex_outlier_stream ( + event_stream_ordering bigint NOT NULL, + event_id text NOT NULL, + state_group bigint NOT NULL, + instance_name text +); +CREATE TABLE federation_inbound_events_staging ( + origin text NOT NULL, + room_id text NOT NULL, + event_id text NOT NULL, + received_ts bigint NOT NULL, + event_json text NOT NULL, + internal_metadata text NOT NULL +); +CREATE TABLE federation_stream_position ( + type text NOT NULL, + stream_id bigint NOT NULL, + instance_name text DEFAULT 'master'::text NOT NULL +); +CREATE TABLE ignored_users ( + ignorer_user_id text NOT NULL, + ignored_user_id text NOT NULL +); +CREATE TABLE insertion_event_edges ( + event_id text NOT NULL, + room_id text NOT NULL, + insertion_prev_event_id text NOT NULL +); +CREATE TABLE insertion_event_extremities ( + event_id text NOT NULL, + room_id text NOT NULL +); +CREATE TABLE insertion_events ( + event_id text NOT NULL, + room_id text NOT NULL, + next_batch_id text NOT NULL +); +CREATE TABLE instance_map ( + instance_id integer NOT NULL, + instance_name text NOT NULL +); +CREATE SEQUENCE instance_map_instance_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +ALTER SEQUENCE instance_map_instance_id_seq OWNED BY instance_map.instance_id; +CREATE TABLE local_current_membership ( + room_id text NOT NULL, + user_id text NOT NULL, + event_id text NOT NULL, + membership text NOT NULL +); +CREATE TABLE local_media_repository ( + media_id text, + media_type text, + media_length integer, + created_ts bigint, + upload_name text, + user_id text, + quarantined_by text, + url_cache text, + last_access_ts bigint, + safe_from_quarantine boolean DEFAULT false NOT NULL +); +CREATE TABLE local_media_repository_thumbnails ( + media_id text, + thumbnail_width integer, + thumbnail_height integer, + thumbnail_type text, + thumbnail_method text, + thumbnail_length integer +); +CREATE TABLE local_media_repository_url_cache ( + url text, + response_code integer, + etag text, + expires_ts bigint, + og text, + media_id text, + download_ts bigint +); +CREATE TABLE monthly_active_users ( + user_id text NOT NULL, + "timestamp" bigint NOT NULL +); +CREATE TABLE open_id_tokens ( + token text NOT NULL, + ts_valid_until_ms bigint NOT NULL, + user_id text NOT NULL +); +CREATE TABLE partial_state_events ( + room_id text NOT NULL, + event_id text NOT NULL +); +CREATE TABLE partial_state_rooms ( + room_id text NOT NULL +); +CREATE TABLE partial_state_rooms_servers ( + room_id text NOT NULL, + server_name text NOT NULL +); +CREATE TABLE presence ( + user_id text NOT NULL, + state character varying(20), + status_msg text, + mtime bigint +); +CREATE TABLE presence_stream ( + stream_id bigint, + user_id text, + state text, + last_active_ts bigint, + last_federation_update_ts bigint, + last_user_sync_ts bigint, + status_msg text, + currently_active boolean, + instance_name text +); +CREATE SEQUENCE presence_stream_sequence + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE profiles ( + user_id text NOT NULL, + displayname text, + avatar_url text +); +CREATE TABLE push_rules ( + id bigint NOT NULL, + user_name text NOT NULL, + rule_id text NOT NULL, + priority_class smallint NOT NULL, + priority integer DEFAULT 0 NOT NULL, + conditions text NOT NULL, + actions text NOT NULL +); +CREATE TABLE push_rules_enable ( + id bigint NOT NULL, + user_name text NOT NULL, + rule_id text NOT NULL, + enabled smallint +); +CREATE TABLE push_rules_stream ( + stream_id bigint NOT NULL, + event_stream_ordering bigint NOT NULL, + user_id text NOT NULL, + rule_id text NOT NULL, + op text NOT NULL, + priority_class smallint, + priority integer, + conditions text, + actions text +); +CREATE TABLE pusher_throttle ( + pusher bigint NOT NULL, + room_id text NOT NULL, + last_sent_ts bigint, + throttle_ms bigint +); +CREATE TABLE pushers ( + id bigint NOT NULL, + user_name text NOT NULL, + access_token bigint, + profile_tag text NOT NULL, + kind text NOT NULL, + app_id text NOT NULL, + app_display_name text NOT NULL, + device_display_name text NOT NULL, + pushkey text NOT NULL, + ts bigint NOT NULL, + lang text, + data text, + last_stream_ordering bigint, + last_success bigint, + failing_since bigint +); +CREATE TABLE ratelimit_override ( + user_id text NOT NULL, + messages_per_second bigint, + burst_count bigint +); +CREATE TABLE receipts_graph ( + room_id text NOT NULL, + receipt_type text NOT NULL, + user_id text NOT NULL, + event_ids text NOT NULL, + data text NOT NULL, + thread_id text +); +CREATE TABLE receipts_linearized ( + stream_id bigint NOT NULL, + room_id text NOT NULL, + receipt_type text NOT NULL, + user_id text NOT NULL, + event_id text NOT NULL, + data text NOT NULL, + instance_name text, + event_stream_ordering bigint, + thread_id text +); +CREATE SEQUENCE receipts_sequence + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE received_transactions ( + transaction_id text, + origin text, + ts bigint, + response_code integer, + response_json bytea, + has_been_referenced smallint DEFAULT 0 +); +CREATE TABLE redactions ( + event_id text NOT NULL, + redacts text NOT NULL, + have_censored boolean DEFAULT false NOT NULL, + received_ts bigint +); +CREATE TABLE refresh_tokens ( + id bigint NOT NULL, + user_id text NOT NULL, + device_id text NOT NULL, + token text NOT NULL, + next_token_id bigint, + expiry_ts bigint, + ultimate_session_expiry_ts bigint +); +CREATE TABLE registration_tokens ( + token text NOT NULL, + uses_allowed integer, + pending integer NOT NULL, + completed integer NOT NULL, + expiry_time bigint +); +CREATE TABLE rejections ( + event_id text NOT NULL, + reason text NOT NULL, + last_check text NOT NULL +); +CREATE TABLE remote_media_cache ( + media_origin text, + media_id text, + media_type text, + created_ts bigint, + upload_name text, + media_length integer, + filesystem_id text, + last_access_ts bigint, + quarantined_by text +); +CREATE TABLE remote_media_cache_thumbnails ( + media_origin text, + media_id text, + thumbnail_width integer, + thumbnail_height integer, + thumbnail_method text, + thumbnail_type text, + thumbnail_length integer, + filesystem_id text +); +CREATE TABLE room_account_data ( + user_id text NOT NULL, + room_id text NOT NULL, + account_data_type text NOT NULL, + stream_id bigint NOT NULL, + content text NOT NULL, + instance_name text +); +CREATE TABLE room_alias_servers ( + room_alias text NOT NULL, + server text NOT NULL +); +CREATE TABLE room_aliases ( + room_alias text NOT NULL, + room_id text NOT NULL, + creator text +); +CREATE TABLE room_depth ( + room_id text NOT NULL, + min_depth bigint +); +CREATE TABLE room_memberships ( + event_id text NOT NULL, + user_id text NOT NULL, + sender text NOT NULL, + room_id text NOT NULL, + membership text NOT NULL, + forgotten integer DEFAULT 0, + display_name text, + avatar_url text +); +CREATE TABLE room_retention ( + room_id text NOT NULL, + event_id text NOT NULL, + min_lifetime bigint, + max_lifetime bigint +); +CREATE TABLE room_stats_current ( + room_id text NOT NULL, + current_state_events integer NOT NULL, + joined_members integer NOT NULL, + invited_members integer NOT NULL, + left_members integer NOT NULL, + banned_members integer NOT NULL, + local_users_in_room integer NOT NULL, + completed_delta_stream_id bigint NOT NULL, + knocked_members integer +); +CREATE TABLE room_stats_earliest_token ( + room_id text NOT NULL, + token bigint NOT NULL +); +CREATE TABLE room_stats_state ( + room_id text NOT NULL, + name text, + canonical_alias text, + join_rules text, + history_visibility text, + encryption text, + avatar text, + guest_access text, + is_federatable boolean, + topic text, + room_type text +); +CREATE TABLE room_tags ( + user_id text NOT NULL, + room_id text NOT NULL, + tag text NOT NULL, + content text NOT NULL +); +CREATE TABLE room_tags_revisions ( + user_id text NOT NULL, + room_id text NOT NULL, + stream_id bigint NOT NULL, + instance_name text +); +CREATE TABLE rooms ( + room_id text NOT NULL, + is_public boolean, + creator text, + room_version text, + has_auth_chain_index boolean +); +CREATE TABLE server_keys_json ( + server_name text NOT NULL, + key_id text NOT NULL, + from_server text NOT NULL, + ts_added_ms bigint NOT NULL, + ts_valid_until_ms bigint NOT NULL, + key_json bytea NOT NULL +); +CREATE TABLE server_signature_keys ( + server_name text, + key_id text, + from_server text, + ts_added_ms bigint, + verify_key bytea, + ts_valid_until_ms bigint +); +CREATE TABLE sessions ( + session_type text NOT NULL, + session_id text NOT NULL, + value text NOT NULL, + expiry_time_ms bigint NOT NULL +); +CREATE TABLE state_events ( + event_id text NOT NULL, + room_id text NOT NULL, + type text NOT NULL, + state_key text NOT NULL, + prev_state text +); +CREATE TABLE stats_incremental_position ( + lock character(1) DEFAULT 'X'::bpchar NOT NULL, + stream_id bigint NOT NULL, + CONSTRAINT stats_incremental_position_lock_check CHECK ((lock = 'X'::bpchar)) +); +CREATE TABLE stream_ordering_to_exterm ( + stream_ordering bigint NOT NULL, + room_id text NOT NULL, + event_id text NOT NULL +); +CREATE TABLE stream_positions ( + stream_name text NOT NULL, + instance_name text NOT NULL, + stream_id bigint NOT NULL +); +CREATE TABLE threepid_guest_access_tokens ( + medium text, + address text, + guest_access_token text, + first_inviter text +); +CREATE TABLE threepid_validation_session ( + session_id text NOT NULL, + medium text NOT NULL, + address text NOT NULL, + client_secret text NOT NULL, + last_send_attempt bigint NOT NULL, + validated_at bigint +); +CREATE TABLE threepid_validation_token ( + token text NOT NULL, + session_id text NOT NULL, + next_link text, + expires bigint NOT NULL +); +CREATE TABLE ui_auth_sessions ( + session_id text NOT NULL, + creation_time bigint NOT NULL, + serverdict text NOT NULL, + clientdict text NOT NULL, + uri text NOT NULL, + method text NOT NULL, + description text NOT NULL +); +CREATE TABLE ui_auth_sessions_credentials ( + session_id text NOT NULL, + stage_type text NOT NULL, + result text NOT NULL +); +CREATE TABLE ui_auth_sessions_ips ( + session_id text NOT NULL, + ip text NOT NULL, + user_agent text NOT NULL +); +CREATE TABLE user_daily_visits ( + user_id text NOT NULL, + device_id text, + "timestamp" bigint NOT NULL, + user_agent text +); +CREATE TABLE user_directory ( + user_id text NOT NULL, + room_id text, + display_name text, + avatar_url text +); +CREATE TABLE user_directory_search ( + user_id text NOT NULL, + vector tsvector +); +CREATE TABLE user_directory_stream_pos ( + lock character(1) DEFAULT 'X'::bpchar NOT NULL, + stream_id bigint, + CONSTRAINT user_directory_stream_pos_lock_check CHECK ((lock = 'X'::bpchar)) +); +CREATE TABLE user_external_ids ( + auth_provider text NOT NULL, + external_id text NOT NULL, + user_id text NOT NULL +); +CREATE TABLE user_filters ( + user_id text NOT NULL, + filter_id bigint NOT NULL, + filter_json bytea NOT NULL +); +CREATE SEQUENCE user_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE user_ips ( + user_id text NOT NULL, + access_token text NOT NULL, + device_id text, + ip text NOT NULL, + user_agent text NOT NULL, + last_seen bigint NOT NULL +); +CREATE TABLE user_signature_stream ( + stream_id bigint NOT NULL, + from_user_id text NOT NULL, + user_ids text NOT NULL +); +CREATE TABLE user_stats_current ( + user_id text NOT NULL, + joined_rooms bigint NOT NULL, + completed_delta_stream_id bigint NOT NULL +); +CREATE TABLE user_threepid_id_server ( + user_id text NOT NULL, + medium text NOT NULL, + address text NOT NULL, + id_server text NOT NULL +); +CREATE TABLE user_threepids ( + user_id text NOT NULL, + medium text NOT NULL, + address text NOT NULL, + validated_at bigint NOT NULL, + added_at bigint NOT NULL +); +CREATE TABLE users ( + name text, + password_hash text, + creation_ts bigint, + admin smallint DEFAULT 0 NOT NULL, + upgrade_ts bigint, + is_guest smallint DEFAULT 0 NOT NULL, + appservice_id text, + consent_version text, + consent_server_notice_sent text, + user_type text, + deactivated smallint DEFAULT 0 NOT NULL, + shadow_banned boolean, + consent_ts bigint +); +CREATE TABLE users_in_public_rooms ( + user_id text NOT NULL, + room_id text NOT NULL +); +CREATE TABLE users_pending_deactivation ( + user_id text NOT NULL +); +CREATE TABLE users_to_send_full_presence_to ( + user_id text NOT NULL, + presence_stream_id bigint +); +CREATE TABLE users_who_share_private_rooms ( + user_id text NOT NULL, + other_user_id text NOT NULL, + room_id text NOT NULL +); +CREATE TABLE worker_locks ( + lock_name text NOT NULL, + lock_key text NOT NULL, + instance_name text NOT NULL, + token text NOT NULL, + last_renewed_ts bigint NOT NULL +); +ALTER TABLE ONLY instance_map ALTER COLUMN instance_id SET DEFAULT nextval('instance_map_instance_id_seq'::regclass); +ALTER TABLE ONLY access_tokens + ADD CONSTRAINT access_tokens_pkey PRIMARY KEY (id); +ALTER TABLE ONLY access_tokens + ADD CONSTRAINT access_tokens_token_key UNIQUE (token); +ALTER TABLE ONLY account_data + ADD CONSTRAINT account_data_uniqueness UNIQUE (user_id, account_data_type); +ALTER TABLE ONLY account_validity + ADD CONSTRAINT account_validity_pkey PRIMARY KEY (user_id); +ALTER TABLE ONLY application_services_state + ADD CONSTRAINT application_services_state_pkey PRIMARY KEY (as_id); +ALTER TABLE ONLY application_services_txns + ADD CONSTRAINT application_services_txns_as_id_txn_id_key UNIQUE (as_id, txn_id); +ALTER TABLE ONLY appservice_stream_position + ADD CONSTRAINT appservice_stream_position_lock_key UNIQUE (lock); +ALTER TABLE ONLY current_state_events + ADD CONSTRAINT current_state_events_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY current_state_events + ADD CONSTRAINT current_state_events_room_id_type_state_key_key UNIQUE (room_id, type, state_key); +ALTER TABLE ONLY dehydrated_devices + ADD CONSTRAINT dehydrated_devices_pkey PRIMARY KEY (user_id); +ALTER TABLE ONLY destination_rooms + ADD CONSTRAINT destination_rooms_pkey PRIMARY KEY (destination, room_id); +ALTER TABLE ONLY destinations + ADD CONSTRAINT destinations_pkey PRIMARY KEY (destination); +ALTER TABLE ONLY devices + ADD CONSTRAINT device_uniqueness UNIQUE (user_id, device_id); +ALTER TABLE ONLY e2e_device_keys_json + ADD CONSTRAINT e2e_device_keys_json_uniqueness UNIQUE (user_id, device_id); +ALTER TABLE ONLY e2e_fallback_keys_json + ADD CONSTRAINT e2e_fallback_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm); +ALTER TABLE ONLY e2e_one_time_keys_json + ADD CONSTRAINT e2e_one_time_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm, key_id); +ALTER TABLE ONLY event_auth_chain_to_calculate + ADD CONSTRAINT event_auth_chain_to_calculate_pkey PRIMARY KEY (event_id); +ALTER TABLE ONLY event_auth_chains + ADD CONSTRAINT event_auth_chains_pkey PRIMARY KEY (event_id); +ALTER TABLE ONLY event_backward_extremities + ADD CONSTRAINT event_backward_extremities_event_id_room_id_key UNIQUE (event_id, room_id); +ALTER TABLE ONLY event_expiry + ADD CONSTRAINT event_expiry_pkey PRIMARY KEY (event_id); +ALTER TABLE ONLY event_forward_extremities + ADD CONSTRAINT event_forward_extremities_event_id_room_id_key UNIQUE (event_id, room_id); +ALTER TABLE ONLY event_push_actions + ADD CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag); +ALTER TABLE ONLY event_json + ADD CONSTRAINT event_json_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY event_labels + ADD CONSTRAINT event_labels_pkey PRIMARY KEY (event_id, label); +ALTER TABLE ONLY event_push_summary_last_receipt_stream_id + ADD CONSTRAINT event_push_summary_last_receipt_stream_id_lock_key UNIQUE (lock); +ALTER TABLE ONLY event_push_summary_stream_ordering + ADD CONSTRAINT event_push_summary_stream_ordering_lock_key UNIQUE (lock); +ALTER TABLE ONLY event_reports + ADD CONSTRAINT event_reports_pkey PRIMARY KEY (id); +ALTER TABLE ONLY event_to_state_groups + ADD CONSTRAINT event_to_state_groups_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY events + ADD CONSTRAINT events_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY ex_outlier_stream + ADD CONSTRAINT ex_outlier_stream_pkey PRIMARY KEY (event_stream_ordering); +ALTER TABLE ONLY instance_map + ADD CONSTRAINT instance_map_pkey PRIMARY KEY (instance_id); +ALTER TABLE ONLY local_media_repository + ADD CONSTRAINT local_media_repository_media_id_key UNIQUE (media_id); +ALTER TABLE ONLY user_threepids + ADD CONSTRAINT medium_address UNIQUE (medium, address); +ALTER TABLE ONLY open_id_tokens + ADD CONSTRAINT open_id_tokens_pkey PRIMARY KEY (token); +ALTER TABLE ONLY partial_state_events + ADD CONSTRAINT partial_state_events_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY partial_state_rooms + ADD CONSTRAINT partial_state_rooms_pkey PRIMARY KEY (room_id); +ALTER TABLE ONLY partial_state_rooms_servers + ADD CONSTRAINT partial_state_rooms_servers_room_id_server_name_key UNIQUE (room_id, server_name); +ALTER TABLE ONLY presence + ADD CONSTRAINT presence_user_id_key UNIQUE (user_id); +ALTER TABLE ONLY profiles + ADD CONSTRAINT profiles_user_id_key UNIQUE (user_id); +ALTER TABLE ONLY push_rules_enable + ADD CONSTRAINT push_rules_enable_pkey PRIMARY KEY (id); +ALTER TABLE ONLY push_rules_enable + ADD CONSTRAINT push_rules_enable_user_name_rule_id_key UNIQUE (user_name, rule_id); +ALTER TABLE ONLY push_rules + ADD CONSTRAINT push_rules_pkey PRIMARY KEY (id); +ALTER TABLE ONLY push_rules + ADD CONSTRAINT push_rules_user_name_rule_id_key UNIQUE (user_name, rule_id); +ALTER TABLE ONLY pusher_throttle + ADD CONSTRAINT pusher_throttle_pkey PRIMARY KEY (pusher, room_id); +ALTER TABLE ONLY pushers + ADD CONSTRAINT pushers2_app_id_pushkey_user_name_key UNIQUE (app_id, pushkey, user_name); +ALTER TABLE ONLY pushers + ADD CONSTRAINT pushers2_pkey PRIMARY KEY (id); +ALTER TABLE ONLY receipts_graph + ADD CONSTRAINT receipts_graph_uniqueness UNIQUE (room_id, receipt_type, user_id); +ALTER TABLE ONLY receipts_graph + ADD CONSTRAINT receipts_graph_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id); +ALTER TABLE ONLY receipts_linearized + ADD CONSTRAINT receipts_linearized_uniqueness UNIQUE (room_id, receipt_type, user_id); +ALTER TABLE ONLY receipts_linearized + ADD CONSTRAINT receipts_linearized_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id); +ALTER TABLE ONLY received_transactions + ADD CONSTRAINT received_transactions_transaction_id_origin_key UNIQUE (transaction_id, origin); +ALTER TABLE ONLY redactions + ADD CONSTRAINT redactions_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY refresh_tokens + ADD CONSTRAINT refresh_tokens_pkey PRIMARY KEY (id); +ALTER TABLE ONLY refresh_tokens + ADD CONSTRAINT refresh_tokens_token_key UNIQUE (token); +ALTER TABLE ONLY registration_tokens + ADD CONSTRAINT registration_tokens_token_key UNIQUE (token); +ALTER TABLE ONLY rejections + ADD CONSTRAINT rejections_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY remote_media_cache + ADD CONSTRAINT remote_media_cache_media_origin_media_id_key UNIQUE (media_origin, media_id); +ALTER TABLE ONLY room_account_data + ADD CONSTRAINT room_account_data_uniqueness UNIQUE (user_id, room_id, account_data_type); +ALTER TABLE ONLY room_aliases + ADD CONSTRAINT room_aliases_room_alias_key UNIQUE (room_alias); +ALTER TABLE ONLY room_depth + ADD CONSTRAINT room_depth_room_id_key UNIQUE (room_id); +ALTER TABLE ONLY room_memberships + ADD CONSTRAINT room_memberships_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY room_retention + ADD CONSTRAINT room_retention_pkey PRIMARY KEY (room_id, event_id); +ALTER TABLE ONLY room_stats_current + ADD CONSTRAINT room_stats_current_pkey PRIMARY KEY (room_id); +ALTER TABLE ONLY room_tags_revisions + ADD CONSTRAINT room_tag_revisions_uniqueness UNIQUE (user_id, room_id); +ALTER TABLE ONLY room_tags + ADD CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag); +ALTER TABLE ONLY rooms + ADD CONSTRAINT rooms_pkey PRIMARY KEY (room_id); +ALTER TABLE ONLY server_keys_json + ADD CONSTRAINT server_keys_json_uniqueness UNIQUE (server_name, key_id, from_server); +ALTER TABLE ONLY server_signature_keys + ADD CONSTRAINT server_signature_keys_server_name_key_id_key UNIQUE (server_name, key_id); +ALTER TABLE ONLY sessions + ADD CONSTRAINT sessions_session_type_session_id_key UNIQUE (session_type, session_id); +ALTER TABLE ONLY state_events + ADD CONSTRAINT state_events_event_id_key UNIQUE (event_id); +ALTER TABLE ONLY stats_incremental_position + ADD CONSTRAINT stats_incremental_position_lock_key UNIQUE (lock); +ALTER TABLE ONLY threepid_validation_session + ADD CONSTRAINT threepid_validation_session_pkey PRIMARY KEY (session_id); +ALTER TABLE ONLY threepid_validation_token + ADD CONSTRAINT threepid_validation_token_pkey PRIMARY KEY (token); +ALTER TABLE ONLY ui_auth_sessions_credentials + ADD CONSTRAINT ui_auth_sessions_credentials_session_id_stage_type_key UNIQUE (session_id, stage_type); +ALTER TABLE ONLY ui_auth_sessions_ips + ADD CONSTRAINT ui_auth_sessions_ips_session_id_ip_user_agent_key UNIQUE (session_id, ip, user_agent); +ALTER TABLE ONLY ui_auth_sessions + ADD CONSTRAINT ui_auth_sessions_session_id_key UNIQUE (session_id); +ALTER TABLE ONLY user_directory_stream_pos + ADD CONSTRAINT user_directory_stream_pos_lock_key UNIQUE (lock); +ALTER TABLE ONLY user_external_ids + ADD CONSTRAINT user_external_ids_auth_provider_external_id_key UNIQUE (auth_provider, external_id); +ALTER TABLE ONLY user_stats_current + ADD CONSTRAINT user_stats_current_pkey PRIMARY KEY (user_id); +ALTER TABLE ONLY users + ADD CONSTRAINT users_name_key UNIQUE (name); +ALTER TABLE ONLY users_to_send_full_presence_to + ADD CONSTRAINT users_to_send_full_presence_to_pkey PRIMARY KEY (user_id); +CREATE INDEX access_tokens_device_id ON access_tokens USING btree (user_id, device_id); +CREATE INDEX account_data_stream_id ON account_data USING btree (user_id, stream_id); +CREATE INDEX application_services_txns_id ON application_services_txns USING btree (as_id); +CREATE UNIQUE INDEX appservice_room_list_idx ON appservice_room_list USING btree (appservice_id, network_id, room_id); +CREATE INDEX batch_events_batch_id ON batch_events USING btree (batch_id); +CREATE UNIQUE INDEX blocked_rooms_idx ON blocked_rooms USING btree (room_id); +CREATE UNIQUE INDEX cache_invalidation_stream_by_instance_id ON cache_invalidation_stream_by_instance USING btree (stream_id); +CREATE INDEX cache_invalidation_stream_by_instance_instance_index ON cache_invalidation_stream_by_instance USING btree (instance_name, stream_id); +CREATE UNIQUE INDEX chunk_events_event_id ON batch_events USING btree (event_id); +CREATE INDEX current_state_delta_stream_idx ON current_state_delta_stream USING btree (stream_id); +CREATE INDEX current_state_events_member_index ON current_state_events USING btree (state_key) WHERE (type = 'm.room.member'::text); +CREATE INDEX deleted_pushers_stream_id ON deleted_pushers USING btree (stream_id); +CREATE INDEX destination_rooms_room_id ON destination_rooms USING btree (room_id); +CREATE INDEX device_auth_providers_devices ON device_auth_providers USING btree (user_id, device_id); +CREATE INDEX device_auth_providers_sessions ON device_auth_providers USING btree (auth_provider_id, auth_provider_session_id); +CREATE INDEX device_federation_inbox_sender_id ON device_federation_inbox USING btree (origin, message_id); +CREATE INDEX device_federation_outbox_destination_id ON device_federation_outbox USING btree (destination, stream_id); +CREATE INDEX device_federation_outbox_id ON device_federation_outbox USING btree (stream_id); +CREATE INDEX device_inbox_stream_id_user_id ON device_inbox USING btree (stream_id, user_id); +CREATE INDEX device_inbox_user_stream_id ON device_inbox USING btree (user_id, device_id, stream_id); +CREATE UNIQUE INDEX device_lists_changes_in_stream_id ON device_lists_changes_in_room USING btree (stream_id, room_id); +CREATE INDEX device_lists_changes_in_stream_id_unconverted ON device_lists_changes_in_room USING btree (stream_id) WHERE (NOT converted_to_destinations); +CREATE UNIQUE INDEX device_lists_outbound_last_success_unique_idx ON device_lists_outbound_last_success USING btree (destination, user_id); +CREATE INDEX device_lists_outbound_pokes_id ON device_lists_outbound_pokes USING btree (destination, stream_id); +CREATE INDEX device_lists_outbound_pokes_stream ON device_lists_outbound_pokes USING btree (stream_id); +CREATE INDEX device_lists_outbound_pokes_user ON device_lists_outbound_pokes USING btree (destination, user_id); +CREATE UNIQUE INDEX device_lists_remote_cache_unique_id ON device_lists_remote_cache USING btree (user_id, device_id); +CREATE UNIQUE INDEX device_lists_remote_extremeties_unique_idx ON device_lists_remote_extremeties USING btree (user_id); +CREATE UNIQUE INDEX device_lists_remote_resync_idx ON device_lists_remote_resync USING btree (user_id); +CREATE INDEX device_lists_remote_resync_ts_idx ON device_lists_remote_resync USING btree (added_ts); +CREATE INDEX device_lists_stream_id ON device_lists_stream USING btree (stream_id, user_id); +CREATE INDEX device_lists_stream_user_id ON device_lists_stream USING btree (user_id, device_id); +CREATE UNIQUE INDEX e2e_cross_signing_keys_idx ON e2e_cross_signing_keys USING btree (user_id, keytype, stream_id); +CREATE UNIQUE INDEX e2e_cross_signing_keys_stream_idx ON e2e_cross_signing_keys USING btree (stream_id); +CREATE INDEX e2e_cross_signing_signatures2_idx ON e2e_cross_signing_signatures USING btree (user_id, target_user_id, target_device_id); +CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions USING btree (user_id, version); +CREATE UNIQUE INDEX e2e_room_keys_with_version_idx ON e2e_room_keys USING btree (user_id, version, room_id, session_id); +CREATE UNIQUE INDEX erased_users_user ON erased_users USING btree (user_id); +CREATE INDEX ev_b_extrem_id ON event_backward_extremities USING btree (event_id); +CREATE INDEX ev_b_extrem_room ON event_backward_extremities USING btree (room_id); +CREATE INDEX ev_edges_prev_id ON event_edges USING btree (prev_event_id); +CREATE INDEX ev_extrem_id ON event_forward_extremities USING btree (event_id); +CREATE INDEX ev_extrem_room ON event_forward_extremities USING btree (room_id); +CREATE INDEX evauth_edges_id ON event_auth USING btree (event_id); +CREATE INDEX event_auth_chain_links_idx ON event_auth_chain_links USING btree (origin_chain_id, target_chain_id); +CREATE INDEX event_auth_chain_to_calculate_rm_id ON event_auth_chain_to_calculate USING btree (room_id); +CREATE UNIQUE INDEX event_auth_chains_c_seq_index ON event_auth_chains USING btree (chain_id, sequence_number); +CREATE INDEX event_contains_url_index ON events USING btree (room_id, topological_ordering, stream_ordering) WHERE ((contains_url = true) AND (outlier = false)); +CREATE UNIQUE INDEX event_edges_event_id_prev_event_id_idx ON event_edges USING btree (event_id, prev_event_id); +CREATE INDEX event_expiry_expiry_ts_idx ON event_expiry USING btree (expiry_ts); +CREATE INDEX event_labels_room_id_label_idx ON event_labels USING btree (room_id, label, topological_ordering); +CREATE INDEX event_push_actions_highlights_index ON event_push_actions USING btree (user_id, room_id, topological_ordering, stream_ordering) WHERE (highlight = 1); +CREATE INDEX event_push_actions_rm_tokens ON event_push_actions USING btree (user_id, room_id, topological_ordering, stream_ordering); +CREATE INDEX event_push_actions_room_id_user_id ON event_push_actions USING btree (room_id, user_id); +CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging USING btree (event_id); +CREATE INDEX event_push_actions_stream_highlight_index ON event_push_actions USING btree (highlight, stream_ordering) WHERE (highlight = 0); +CREATE INDEX event_push_actions_stream_ordering ON event_push_actions USING btree (stream_ordering, user_id); +CREATE INDEX event_push_actions_u_highlight ON event_push_actions USING btree (user_id, stream_ordering); +CREATE UNIQUE INDEX event_push_summary_unique_index ON event_push_summary USING btree (user_id, room_id); +CREATE UNIQUE INDEX event_push_summary_unique_index2 ON event_push_summary USING btree (user_id, room_id, thread_id); +CREATE UNIQUE INDEX event_relations_id ON event_relations USING btree (event_id); +CREATE INDEX event_relations_relates ON event_relations USING btree (relates_to_id, relation_type, aggregation_key); +CREATE INDEX event_search_ev_ridx ON event_search USING btree (room_id); +CREATE UNIQUE INDEX event_search_event_id_idx ON event_search USING btree (event_id); +CREATE INDEX event_search_fts_idx ON event_search USING gin (vector); +CREATE INDEX event_to_state_groups_sg_index ON event_to_state_groups USING btree (state_group); +CREATE UNIQUE INDEX event_txn_id_event_id ON event_txn_id USING btree (event_id); +CREATE INDEX event_txn_id_ts ON event_txn_id USING btree (inserted_ts); +CREATE UNIQUE INDEX event_txn_id_txn_id ON event_txn_id USING btree (room_id, user_id, token_id, txn_id); +CREATE INDEX events_order_room ON events USING btree (room_id, topological_ordering, stream_ordering); +CREATE INDEX events_room_stream ON events USING btree (room_id, stream_ordering); +CREATE UNIQUE INDEX events_stream_ordering ON events USING btree (stream_ordering); +CREATE INDEX events_ts ON events USING btree (origin_server_ts, stream_ordering); +CREATE UNIQUE INDEX federation_inbound_events_staging_instance_event ON federation_inbound_events_staging USING btree (origin, event_id); +CREATE INDEX federation_inbound_events_staging_room ON federation_inbound_events_staging USING btree (room_id, received_ts); +CREATE UNIQUE INDEX federation_stream_position_instance ON federation_stream_position USING btree (type, instance_name); +CREATE INDEX ignored_users_ignored_user_id ON ignored_users USING btree (ignored_user_id); +CREATE UNIQUE INDEX ignored_users_uniqueness ON ignored_users USING btree (ignorer_user_id, ignored_user_id); +CREATE INDEX insertion_event_edges_event_id ON insertion_event_edges USING btree (event_id); +CREATE INDEX insertion_event_edges_insertion_prev_event_id ON insertion_event_edges USING btree (insertion_prev_event_id); +CREATE INDEX insertion_event_edges_insertion_room_id ON insertion_event_edges USING btree (room_id); +CREATE UNIQUE INDEX insertion_event_extremities_event_id ON insertion_event_extremities USING btree (event_id); +CREATE INDEX insertion_event_extremities_room_id ON insertion_event_extremities USING btree (room_id); +CREATE UNIQUE INDEX insertion_events_event_id ON insertion_events USING btree (event_id); +CREATE INDEX insertion_events_next_batch_id ON insertion_events USING btree (next_batch_id); +CREATE UNIQUE INDEX instance_map_idx ON instance_map USING btree (instance_name); +CREATE UNIQUE INDEX local_current_membership_idx ON local_current_membership USING btree (user_id, room_id); +CREATE INDEX local_current_membership_room_idx ON local_current_membership USING btree (room_id); +CREATE UNIQUE INDEX local_media_repository_thumbn_media_id_width_height_method_key ON local_media_repository_thumbnails USING btree (media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method); +CREATE INDEX local_media_repository_thumbnails_media_id ON local_media_repository_thumbnails USING btree (media_id); +CREATE INDEX local_media_repository_url_cache_by_url_download_ts ON local_media_repository_url_cache USING btree (url, download_ts); +CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache USING btree (expires_ts); +CREATE INDEX local_media_repository_url_cache_media_idx ON local_media_repository_url_cache USING btree (media_id); +CREATE INDEX local_media_repository_url_idx ON local_media_repository USING btree (created_ts) WHERE (url_cache IS NOT NULL); +CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users USING btree ("timestamp"); +CREATE UNIQUE INDEX monthly_active_users_users ON monthly_active_users USING btree (user_id); +CREATE INDEX open_id_tokens_ts_valid_until_ms ON open_id_tokens USING btree (ts_valid_until_ms); +CREATE INDEX partial_state_events_room_id_idx ON partial_state_events USING btree (room_id); +CREATE INDEX presence_stream_id ON presence_stream USING btree (stream_id, user_id); +CREATE INDEX presence_stream_state_not_offline_idx ON presence_stream USING btree (state) WHERE (state <> 'offline'::text); +CREATE INDEX presence_stream_user_id ON presence_stream USING btree (user_id); +CREATE INDEX public_room_index ON rooms USING btree (is_public); +CREATE INDEX push_rules_enable_user_name ON push_rules_enable USING btree (user_name); +CREATE INDEX push_rules_stream_id ON push_rules_stream USING btree (stream_id); +CREATE INDEX push_rules_stream_user_stream_id ON push_rules_stream USING btree (user_id, stream_id); +CREATE INDEX push_rules_user_name ON push_rules USING btree (user_name); +CREATE UNIQUE INDEX ratelimit_override_idx ON ratelimit_override USING btree (user_id); +CREATE UNIQUE INDEX receipts_graph_unique_index ON receipts_graph USING btree (room_id, receipt_type, user_id) WHERE (thread_id IS NULL); +CREATE INDEX receipts_linearized_id ON receipts_linearized USING btree (stream_id); +CREATE INDEX receipts_linearized_room_stream ON receipts_linearized USING btree (room_id, stream_id); +CREATE UNIQUE INDEX receipts_linearized_unique_index ON receipts_linearized USING btree (room_id, receipt_type, user_id) WHERE (thread_id IS NULL); +CREATE INDEX receipts_linearized_user ON receipts_linearized USING btree (user_id); +CREATE INDEX received_transactions_ts ON received_transactions USING btree (ts); +CREATE INDEX redactions_have_censored_ts ON redactions USING btree (received_ts) WHERE (NOT have_censored); +CREATE INDEX redactions_redacts ON redactions USING btree (redacts); +CREATE INDEX refresh_tokens_next_token_id ON refresh_tokens USING btree (next_token_id) WHERE (next_token_id IS NOT NULL); +CREATE UNIQUE INDEX remote_media_repository_thumbn_media_origin_id_width_height_met ON remote_media_cache_thumbnails USING btree (media_origin, media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method); +CREATE INDEX room_account_data_stream_id ON room_account_data USING btree (user_id, stream_id); +CREATE INDEX room_alias_servers_alias ON room_alias_servers USING btree (room_alias); +CREATE INDEX room_aliases_id ON room_aliases USING btree (room_id); +CREATE INDEX room_memberships_room_id ON room_memberships USING btree (room_id); +CREATE INDEX room_memberships_user_id ON room_memberships USING btree (user_id); +CREATE INDEX room_memberships_user_room_forgotten ON room_memberships USING btree (user_id, room_id) WHERE (forgotten = 1); +CREATE INDEX room_retention_max_lifetime_idx ON room_retention USING btree (max_lifetime); +CREATE UNIQUE INDEX room_stats_earliest_token_idx ON room_stats_earliest_token USING btree (room_id); +CREATE UNIQUE INDEX room_stats_state_room ON room_stats_state USING btree (room_id); +CREATE INDEX stream_ordering_to_exterm_idx ON stream_ordering_to_exterm USING btree (stream_ordering); +CREATE INDEX stream_ordering_to_exterm_rm_idx ON stream_ordering_to_exterm USING btree (room_id, stream_ordering); +CREATE UNIQUE INDEX stream_positions_idx ON stream_positions USING btree (stream_name, instance_name); +CREATE UNIQUE INDEX threepid_guest_access_tokens_index ON threepid_guest_access_tokens USING btree (medium, address); +CREATE INDEX threepid_validation_token_session_id ON threepid_validation_token USING btree (session_id); +CREATE INDEX user_daily_visits_ts_idx ON user_daily_visits USING btree ("timestamp"); +CREATE INDEX user_daily_visits_uts_idx ON user_daily_visits USING btree (user_id, "timestamp"); +CREATE INDEX user_directory_room_idx ON user_directory USING btree (room_id); +CREATE INDEX user_directory_search_fts_idx ON user_directory_search USING gin (vector); +CREATE UNIQUE INDEX user_directory_search_user_idx ON user_directory_search USING btree (user_id); +CREATE UNIQUE INDEX user_directory_user_idx ON user_directory USING btree (user_id); +CREATE INDEX user_external_ids_user_id_idx ON user_external_ids USING btree (user_id); +CREATE UNIQUE INDEX user_filters_unique ON user_filters USING btree (user_id, filter_id); +CREATE INDEX user_ips_device_id ON user_ips USING btree (user_id, device_id, last_seen); +CREATE INDEX user_ips_last_seen ON user_ips USING btree (user_id, last_seen); +CREATE INDEX user_ips_last_seen_only ON user_ips USING btree (last_seen); +CREATE UNIQUE INDEX user_ips_user_token_ip_unique_index ON user_ips USING btree (user_id, access_token, ip); +CREATE UNIQUE INDEX user_signature_stream_idx ON user_signature_stream USING btree (stream_id); +CREATE UNIQUE INDEX user_threepid_id_server_idx ON user_threepid_id_server USING btree (user_id, medium, address, id_server); +CREATE INDEX user_threepids_medium_address ON user_threepids USING btree (medium, address); +CREATE INDEX user_threepids_user_id ON user_threepids USING btree (user_id); +CREATE INDEX users_creation_ts ON users USING btree (creation_ts); +CREATE INDEX users_have_local_media ON local_media_repository USING btree (user_id, created_ts); +CREATE INDEX users_in_public_rooms_r_idx ON users_in_public_rooms USING btree (room_id); +CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms USING btree (user_id, room_id); +CREATE INDEX users_who_share_private_rooms_o_idx ON users_who_share_private_rooms USING btree (other_user_id); +CREATE INDEX users_who_share_private_rooms_r_idx ON users_who_share_private_rooms USING btree (room_id); +CREATE UNIQUE INDEX users_who_share_private_rooms_u_idx ON users_who_share_private_rooms USING btree (user_id, other_user_id, room_id); +CREATE UNIQUE INDEX worker_locks_key ON worker_locks USING btree (lock_name, lock_key); +CREATE TRIGGER check_partial_state_events BEFORE INSERT OR UPDATE ON partial_state_events FOR EACH ROW EXECUTE PROCEDURE check_partial_state_events(); +ALTER TABLE ONLY access_tokens + ADD CONSTRAINT access_tokens_refresh_token_id_fkey FOREIGN KEY (refresh_token_id) REFERENCES refresh_tokens(id) ON DELETE CASCADE; +ALTER TABLE ONLY destination_rooms + ADD CONSTRAINT destination_rooms_destination_fkey FOREIGN KEY (destination) REFERENCES destinations(destination); +ALTER TABLE ONLY destination_rooms + ADD CONSTRAINT destination_rooms_room_id_fkey FOREIGN KEY (room_id) REFERENCES rooms(room_id); +ALTER TABLE ONLY event_edges + ADD CONSTRAINT event_edges_event_id_fkey FOREIGN KEY (event_id) REFERENCES events(event_id); +ALTER TABLE ONLY event_txn_id + ADD CONSTRAINT event_txn_id_event_id_fkey FOREIGN KEY (event_id) REFERENCES events(event_id) ON DELETE CASCADE; +ALTER TABLE ONLY event_txn_id + ADD CONSTRAINT event_txn_id_token_id_fkey FOREIGN KEY (token_id) REFERENCES access_tokens(id) ON DELETE CASCADE; +ALTER TABLE ONLY partial_state_events + ADD CONSTRAINT partial_state_events_event_id_fkey FOREIGN KEY (event_id) REFERENCES events(event_id); +ALTER TABLE ONLY partial_state_events + ADD CONSTRAINT partial_state_events_room_id_fkey FOREIGN KEY (room_id) REFERENCES partial_state_rooms(room_id); +ALTER TABLE ONLY partial_state_rooms + ADD CONSTRAINT partial_state_rooms_room_id_fkey FOREIGN KEY (room_id) REFERENCES rooms(room_id); +ALTER TABLE ONLY partial_state_rooms_servers + ADD CONSTRAINT partial_state_rooms_servers_room_id_fkey FOREIGN KEY (room_id) REFERENCES partial_state_rooms(room_id); +ALTER TABLE ONLY refresh_tokens + ADD CONSTRAINT refresh_tokens_next_token_id_fkey FOREIGN KEY (next_token_id) REFERENCES refresh_tokens(id) ON DELETE CASCADE; +ALTER TABLE ONLY ui_auth_sessions_credentials + ADD CONSTRAINT ui_auth_sessions_credentials_session_id_fkey FOREIGN KEY (session_id) REFERENCES ui_auth_sessions(session_id); +ALTER TABLE ONLY ui_auth_sessions_ips + ADD CONSTRAINT ui_auth_sessions_ips_session_id_fkey FOREIGN KEY (session_id) REFERENCES ui_auth_sessions(session_id); +ALTER TABLE ONLY users_to_send_full_presence_to + ADD CONSTRAINT users_to_send_full_presence_to_user_id_fkey FOREIGN KEY (user_id) REFERENCES users(name); +INSERT INTO appservice_stream_position VALUES ('X', 0); +INSERT INTO event_push_summary_last_receipt_stream_id VALUES ('X', 0); +INSERT INTO event_push_summary_stream_ordering VALUES ('X', 0); +INSERT INTO federation_stream_position VALUES ('federation', -1, 'master'); +INSERT INTO federation_stream_position VALUES ('events', -1, 'master'); +INSERT INTO stats_incremental_position VALUES ('X', 1); +INSERT INTO user_directory_stream_pos VALUES ('X', 1); +SELECT pg_catalog.setval('account_data_sequence', 1, true); +SELECT pg_catalog.setval('application_services_txn_id_seq', 1, false); +SELECT pg_catalog.setval('cache_invalidation_stream_seq', 1, true); +SELECT pg_catalog.setval('device_inbox_sequence', 1, true); +SELECT pg_catalog.setval('event_auth_chain_id', 1, false); +SELECT pg_catalog.setval('events_backfill_stream_seq', 1, true); +SELECT pg_catalog.setval('events_stream_seq', 1, true); +SELECT pg_catalog.setval('instance_map_instance_id_seq', 1, false); +SELECT pg_catalog.setval('presence_stream_sequence', 1, true); +SELECT pg_catalog.setval('receipts_sequence', 1, true); +SELECT pg_catalog.setval('user_id_seq', 1, false); diff --git a/synapse/storage/schema/main/full_schemas/72/full.sql.sqlite b/synapse/storage/schema/main/full_schemas/72/full.sql.sqlite new file mode 100644 index 000000000000..d403baf1fb12 --- /dev/null +++ b/synapse/storage/schema/main/full_schemas/72/full.sql.sqlite @@ -0,0 +1,646 @@ +CREATE TABLE application_services_txns( as_id TEXT NOT NULL, txn_id INTEGER NOT NULL, event_ids TEXT NOT NULL, UNIQUE(as_id, txn_id) ); +CREATE INDEX application_services_txns_id ON application_services_txns ( as_id ); +CREATE TABLE presence( user_id TEXT NOT NULL, state VARCHAR(20), status_msg TEXT, mtime BIGINT, UNIQUE (user_id) ); +CREATE TABLE users( name TEXT, password_hash TEXT, creation_ts BIGINT, admin SMALLINT DEFAULT 0 NOT NULL, upgrade_ts BIGINT, is_guest SMALLINT DEFAULT 0 NOT NULL, appservice_id TEXT, consent_version TEXT, consent_server_notice_sent TEXT, user_type TEXT DEFAULT NULL, deactivated SMALLINT DEFAULT 0 NOT NULL, shadow_banned BOOLEAN, consent_ts bigint, UNIQUE(name) ); +CREATE TABLE user_ips ( user_id TEXT NOT NULL, access_token TEXT NOT NULL, device_id TEXT, ip TEXT NOT NULL, user_agent TEXT NOT NULL, last_seen BIGINT NOT NULL ); +CREATE TABLE profiles( user_id TEXT NOT NULL, displayname TEXT, avatar_url TEXT, UNIQUE(user_id) ); +CREATE TABLE received_transactions( transaction_id TEXT, origin TEXT, ts BIGINT, response_code INTEGER, response_json bytea, has_been_referenced smallint default 0, UNIQUE (transaction_id, origin) ); +CREATE TABLE destinations( destination TEXT PRIMARY KEY, retry_last_ts BIGINT, retry_interval INTEGER , failure_ts BIGINT, last_successful_stream_ordering BIGINT); +CREATE TABLE events( stream_ordering INTEGER PRIMARY KEY, topological_ordering BIGINT NOT NULL, event_id TEXT NOT NULL, type TEXT NOT NULL, room_id TEXT NOT NULL, content TEXT, unrecognized_keys TEXT, processed BOOL NOT NULL, outlier BOOL NOT NULL, depth BIGINT DEFAULT 0 NOT NULL, origin_server_ts BIGINT, received_ts BIGINT, sender TEXT, contains_url BOOLEAN, instance_name TEXT, state_key TEXT DEFAULT NULL, rejection_reason TEXT DEFAULT NULL, UNIQUE (event_id) ); +CREATE INDEX events_order_room ON events ( room_id, topological_ordering, stream_ordering ); +CREATE TABLE event_json( event_id TEXT NOT NULL, room_id TEXT NOT NULL, internal_metadata TEXT NOT NULL, json TEXT NOT NULL, format_version INTEGER, UNIQUE (event_id) ); +CREATE TABLE state_events( event_id TEXT NOT NULL, room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, prev_state TEXT, UNIQUE (event_id) ); +CREATE TABLE current_state_events( event_id TEXT NOT NULL, room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, membership TEXT, UNIQUE (event_id), UNIQUE (room_id, type, state_key) ); +CREATE TABLE room_memberships( event_id TEXT NOT NULL, user_id TEXT NOT NULL, sender TEXT NOT NULL, room_id TEXT NOT NULL, membership TEXT NOT NULL, forgotten INTEGER DEFAULT 0, display_name TEXT, avatar_url TEXT, UNIQUE (event_id) ); +CREATE INDEX room_memberships_room_id ON room_memberships (room_id); +CREATE INDEX room_memberships_user_id ON room_memberships (user_id); +CREATE TABLE rooms( room_id TEXT PRIMARY KEY NOT NULL, is_public BOOL, creator TEXT , room_version TEXT, has_auth_chain_index BOOLEAN); +CREATE TABLE server_signature_keys( server_name TEXT, key_id TEXT, from_server TEXT, ts_added_ms BIGINT, verify_key bytea, ts_valid_until_ms BIGINT, UNIQUE (server_name, key_id) ); +CREATE TABLE rejections( event_id TEXT NOT NULL, reason TEXT NOT NULL, last_check TEXT NOT NULL, UNIQUE (event_id) ); +CREATE TABLE push_rules ( id BIGINT PRIMARY KEY, user_name TEXT NOT NULL, rule_id TEXT NOT NULL, priority_class SMALLINT NOT NULL, priority INTEGER NOT NULL DEFAULT 0, conditions TEXT NOT NULL, actions TEXT NOT NULL, UNIQUE(user_name, rule_id) ); +CREATE INDEX push_rules_user_name on push_rules (user_name); +CREATE TABLE push_rules_enable ( id BIGINT PRIMARY KEY, user_name TEXT NOT NULL, rule_id TEXT NOT NULL, enabled SMALLINT, UNIQUE(user_name, rule_id) ); +CREATE INDEX push_rules_enable_user_name on push_rules_enable (user_name); +CREATE TABLE event_forward_extremities( event_id TEXT NOT NULL, room_id TEXT NOT NULL, UNIQUE (event_id, room_id) ); +CREATE INDEX ev_extrem_room ON event_forward_extremities(room_id); +CREATE INDEX ev_extrem_id ON event_forward_extremities(event_id); +CREATE TABLE event_backward_extremities( event_id TEXT NOT NULL, room_id TEXT NOT NULL, UNIQUE (event_id, room_id) ); +CREATE INDEX ev_b_extrem_room ON event_backward_extremities(room_id); +CREATE INDEX ev_b_extrem_id ON event_backward_extremities(event_id); +CREATE TABLE room_depth( room_id TEXT NOT NULL, min_depth INTEGER NOT NULL, UNIQUE (room_id) ); +CREATE TABLE event_to_state_groups( event_id TEXT NOT NULL, state_group BIGINT NOT NULL, UNIQUE (event_id) ); +CREATE TABLE local_media_repository ( media_id TEXT, media_type TEXT, media_length INTEGER, created_ts BIGINT, upload_name TEXT, user_id TEXT, quarantined_by TEXT, url_cache TEXT, last_access_ts BIGINT, safe_from_quarantine BOOLEAN NOT NULL DEFAULT 0, UNIQUE (media_id) ); +CREATE TABLE remote_media_cache ( media_origin TEXT, media_id TEXT, media_type TEXT, created_ts BIGINT, upload_name TEXT, media_length INTEGER, filesystem_id TEXT, last_access_ts BIGINT, quarantined_by TEXT, UNIQUE (media_origin, media_id) ); +CREATE TABLE redactions ( event_id TEXT NOT NULL, redacts TEXT NOT NULL, have_censored BOOL NOT NULL DEFAULT false, received_ts BIGINT, UNIQUE (event_id) ); +CREATE INDEX redactions_redacts ON redactions (redacts); +CREATE TABLE room_aliases( room_alias TEXT NOT NULL, room_id TEXT NOT NULL, creator TEXT, UNIQUE (room_alias) ); +CREATE INDEX room_aliases_id ON room_aliases(room_id); +CREATE TABLE room_alias_servers( room_alias TEXT NOT NULL, server TEXT NOT NULL ); +CREATE INDEX room_alias_servers_alias ON room_alias_servers(room_alias); +CREATE TABLE IF NOT EXISTS "server_keys_json" ( server_name TEXT NOT NULL, key_id TEXT NOT NULL, from_server TEXT NOT NULL, ts_added_ms BIGINT NOT NULL, ts_valid_until_ms BIGINT NOT NULL, key_json bytea NOT NULL, CONSTRAINT server_keys_json_uniqueness UNIQUE (server_name, key_id, from_server) ); +CREATE TABLE e2e_device_keys_json ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, ts_added_ms BIGINT NOT NULL, key_json TEXT NOT NULL, CONSTRAINT e2e_device_keys_json_uniqueness UNIQUE (user_id, device_id) ); +CREATE TABLE e2e_one_time_keys_json ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, algorithm TEXT NOT NULL, key_id TEXT NOT NULL, ts_added_ms BIGINT NOT NULL, key_json TEXT NOT NULL, CONSTRAINT e2e_one_time_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm, key_id) ); +CREATE TABLE IF NOT EXISTS "user_threepids" ( user_id TEXT NOT NULL, medium TEXT NOT NULL, address TEXT NOT NULL, validated_at BIGINT NOT NULL, added_at BIGINT NOT NULL, CONSTRAINT medium_address UNIQUE (medium, address) ); +CREATE INDEX user_threepids_user_id ON user_threepids(user_id); +CREATE VIRTUAL TABLE event_search USING fts4 ( event_id, room_id, sender, key, value ) +/* event_search(event_id,room_id,sender,"key",value) */; +CREATE TABLE room_tags( user_id TEXT NOT NULL, room_id TEXT NOT NULL, tag TEXT NOT NULL, content TEXT NOT NULL, CONSTRAINT room_tag_uniqueness UNIQUE (user_id, room_id, tag) ); +CREATE TABLE room_tags_revisions ( user_id TEXT NOT NULL, room_id TEXT NOT NULL, stream_id BIGINT NOT NULL, instance_name TEXT, CONSTRAINT room_tag_revisions_uniqueness UNIQUE (user_id, room_id) ); +CREATE TABLE account_data( user_id TEXT NOT NULL, account_data_type TEXT NOT NULL, stream_id BIGINT NOT NULL, content TEXT NOT NULL, instance_name TEXT, CONSTRAINT account_data_uniqueness UNIQUE (user_id, account_data_type) ); +CREATE TABLE room_account_data( user_id TEXT NOT NULL, room_id TEXT NOT NULL, account_data_type TEXT NOT NULL, stream_id BIGINT NOT NULL, content TEXT NOT NULL, instance_name TEXT, CONSTRAINT room_account_data_uniqueness UNIQUE (user_id, room_id, account_data_type) ); +CREATE INDEX account_data_stream_id on account_data(user_id, stream_id); +CREATE INDEX room_account_data_stream_id on room_account_data(user_id, stream_id); +CREATE INDEX events_ts ON events(origin_server_ts, stream_ordering); +CREATE TABLE event_push_actions( room_id TEXT NOT NULL, event_id TEXT NOT NULL, user_id TEXT NOT NULL, profile_tag VARCHAR(32), actions TEXT NOT NULL, topological_ordering BIGINT, stream_ordering BIGINT, notif SMALLINT, highlight SMALLINT, unread SMALLINT, thread_id TEXT, CONSTRAINT event_id_user_id_profile_tag_uniqueness UNIQUE (room_id, event_id, user_id, profile_tag) ); +CREATE INDEX event_push_actions_room_id_user_id on event_push_actions(room_id, user_id); +CREATE INDEX events_room_stream on events(room_id, stream_ordering); +CREATE INDEX public_room_index on rooms(is_public); +CREATE INDEX event_push_actions_rm_tokens on event_push_actions( user_id, room_id, topological_ordering, stream_ordering ); +CREATE TABLE presence_stream( stream_id BIGINT, user_id TEXT, state TEXT, last_active_ts BIGINT, last_federation_update_ts BIGINT, last_user_sync_ts BIGINT, status_msg TEXT, currently_active BOOLEAN , instance_name TEXT); +CREATE INDEX presence_stream_id ON presence_stream(stream_id, user_id); +CREATE INDEX presence_stream_user_id ON presence_stream(user_id); +CREATE TABLE push_rules_stream( stream_id BIGINT NOT NULL, event_stream_ordering BIGINT NOT NULL, user_id TEXT NOT NULL, rule_id TEXT NOT NULL, op TEXT NOT NULL, priority_class SMALLINT, priority INTEGER, conditions TEXT, actions TEXT ); +CREATE INDEX push_rules_stream_id ON push_rules_stream(stream_id); +CREATE INDEX push_rules_stream_user_stream_id on push_rules_stream(user_id, stream_id); +CREATE TABLE ex_outlier_stream( event_stream_ordering BIGINT PRIMARY KEY NOT NULL, event_id TEXT NOT NULL, state_group BIGINT NOT NULL , instance_name TEXT); +CREATE TABLE threepid_guest_access_tokens( medium TEXT, address TEXT, guest_access_token TEXT, first_inviter TEXT ); +CREATE UNIQUE INDEX threepid_guest_access_tokens_index ON threepid_guest_access_tokens(medium, address); +CREATE INDEX event_push_actions_stream_ordering on event_push_actions( stream_ordering, user_id ); +CREATE TABLE open_id_tokens ( token TEXT NOT NULL PRIMARY KEY, ts_valid_until_ms bigint NOT NULL, user_id TEXT NOT NULL, UNIQUE (token) ); +CREATE INDEX open_id_tokens_ts_valid_until_ms ON open_id_tokens(ts_valid_until_ms); +CREATE TABLE pusher_throttle( pusher BIGINT NOT NULL, room_id TEXT NOT NULL, last_sent_ts BIGINT, throttle_ms BIGINT, PRIMARY KEY (pusher, room_id) ); +CREATE TABLE event_reports( id BIGINT NOT NULL PRIMARY KEY, received_ts BIGINT NOT NULL, room_id TEXT NOT NULL, event_id TEXT NOT NULL, user_id TEXT NOT NULL, reason TEXT, content TEXT ); +CREATE TABLE appservice_stream_position( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_ordering BIGINT, CHECK (Lock='X') ); +CREATE TABLE device_inbox ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, stream_id BIGINT NOT NULL, message_json TEXT NOT NULL , instance_name TEXT); +CREATE INDEX device_inbox_user_stream_id ON device_inbox(user_id, device_id, stream_id); +CREATE INDEX received_transactions_ts ON received_transactions(ts); +CREATE TABLE device_federation_outbox ( destination TEXT NOT NULL, stream_id BIGINT NOT NULL, queued_ts BIGINT NOT NULL, messages_json TEXT NOT NULL , instance_name TEXT); +CREATE INDEX device_federation_outbox_destination_id ON device_federation_outbox(destination, stream_id); +CREATE TABLE device_federation_inbox ( origin TEXT NOT NULL, message_id TEXT NOT NULL, received_ts BIGINT NOT NULL , instance_name TEXT); +CREATE INDEX device_federation_inbox_sender_id ON device_federation_inbox(origin, message_id); +CREATE TABLE stream_ordering_to_exterm ( stream_ordering BIGINT NOT NULL, room_id TEXT NOT NULL, event_id TEXT NOT NULL ); +CREATE INDEX stream_ordering_to_exterm_idx on stream_ordering_to_exterm( stream_ordering ); +CREATE INDEX stream_ordering_to_exterm_rm_idx on stream_ordering_to_exterm( room_id, stream_ordering ); +CREATE TABLE IF NOT EXISTS "event_auth"( event_id TEXT NOT NULL, auth_id TEXT NOT NULL, room_id TEXT NOT NULL ); +CREATE INDEX evauth_edges_id ON event_auth(event_id); +CREATE INDEX user_threepids_medium_address on user_threepids (medium, address); +CREATE TABLE appservice_room_list( appservice_id TEXT NOT NULL, network_id TEXT NOT NULL, room_id TEXT NOT NULL ); +CREATE UNIQUE INDEX appservice_room_list_idx ON appservice_room_list( appservice_id, network_id, room_id ); +CREATE INDEX device_federation_outbox_id ON device_federation_outbox(stream_id); +CREATE TABLE federation_stream_position( type TEXT NOT NULL, stream_id INTEGER NOT NULL , instance_name TEXT NOT NULL DEFAULT 'master'); +CREATE TABLE device_lists_remote_cache ( user_id TEXT NOT NULL, device_id TEXT NOT NULL, content TEXT NOT NULL ); +CREATE TABLE device_lists_remote_extremeties ( user_id TEXT NOT NULL, stream_id TEXT NOT NULL ); +CREATE TABLE device_lists_stream ( stream_id BIGINT NOT NULL, user_id TEXT NOT NULL, device_id TEXT NOT NULL ); +CREATE INDEX device_lists_stream_id ON device_lists_stream(stream_id, user_id); +CREATE TABLE device_lists_outbound_pokes ( destination TEXT NOT NULL, stream_id BIGINT NOT NULL, user_id TEXT NOT NULL, device_id TEXT NOT NULL, sent BOOLEAN NOT NULL, ts BIGINT NOT NULL , opentracing_context TEXT); +CREATE INDEX device_lists_outbound_pokes_id ON device_lists_outbound_pokes(destination, stream_id); +CREATE INDEX device_lists_outbound_pokes_user ON device_lists_outbound_pokes(destination, user_id); +CREATE TABLE event_push_summary ( user_id TEXT NOT NULL, room_id TEXT NOT NULL, notif_count BIGINT NOT NULL, stream_ordering BIGINT NOT NULL , unread_count BIGINT, last_receipt_stream_ordering BIGINT, thread_id TEXT); +CREATE TABLE event_push_summary_stream_ordering ( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_ordering BIGINT NOT NULL, CHECK (Lock='X') ); +CREATE TABLE IF NOT EXISTS "pushers" ( id BIGINT PRIMARY KEY, user_name TEXT NOT NULL, access_token BIGINT DEFAULT NULL, profile_tag TEXT NOT NULL, kind TEXT NOT NULL, app_id TEXT NOT NULL, app_display_name TEXT NOT NULL, device_display_name TEXT NOT NULL, pushkey TEXT NOT NULL, ts BIGINT NOT NULL, lang TEXT, data TEXT, last_stream_ordering INTEGER, last_success BIGINT, failing_since BIGINT, UNIQUE (app_id, pushkey, user_name) ); +CREATE INDEX device_lists_outbound_pokes_stream ON device_lists_outbound_pokes(stream_id); +CREATE TABLE ratelimit_override ( user_id TEXT NOT NULL, messages_per_second BIGINT, burst_count BIGINT ); +CREATE UNIQUE INDEX ratelimit_override_idx ON ratelimit_override(user_id); +CREATE TABLE current_state_delta_stream ( stream_id BIGINT NOT NULL, room_id TEXT NOT NULL, type TEXT NOT NULL, state_key TEXT NOT NULL, event_id TEXT, prev_event_id TEXT , instance_name TEXT); +CREATE INDEX current_state_delta_stream_idx ON current_state_delta_stream(stream_id); +CREATE TABLE user_directory_stream_pos ( Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, stream_id BIGINT, CHECK (Lock='X') ); +CREATE VIRTUAL TABLE user_directory_search USING fts4 ( user_id, value ) +/* user_directory_search(user_id,value) */; +CREATE TABLE blocked_rooms ( room_id TEXT NOT NULL, user_id TEXT NOT NULL ); +CREATE UNIQUE INDEX blocked_rooms_idx ON blocked_rooms(room_id); +CREATE TABLE IF NOT EXISTS "local_media_repository_url_cache"( url TEXT, response_code INTEGER, etag TEXT, expires_ts BIGINT, og TEXT, media_id TEXT, download_ts BIGINT ); +CREATE INDEX local_media_repository_url_cache_expires_idx ON local_media_repository_url_cache(expires_ts); +CREATE INDEX local_media_repository_url_cache_by_url_download_ts ON local_media_repository_url_cache(url, download_ts); +CREATE INDEX local_media_repository_url_cache_media_idx ON local_media_repository_url_cache(media_id); +CREATE TABLE IF NOT EXISTS "deleted_pushers" ( stream_id BIGINT NOT NULL, app_id TEXT NOT NULL, pushkey TEXT NOT NULL, user_id TEXT NOT NULL ); +CREATE INDEX deleted_pushers_stream_id ON deleted_pushers (stream_id); +CREATE TABLE IF NOT EXISTS "user_directory" ( user_id TEXT NOT NULL, room_id TEXT, display_name TEXT, avatar_url TEXT ); +CREATE INDEX user_directory_room_idx ON user_directory(room_id); +CREATE UNIQUE INDEX user_directory_user_idx ON user_directory(user_id); +CREATE TABLE event_push_actions_staging ( event_id TEXT NOT NULL, user_id TEXT NOT NULL, actions TEXT NOT NULL, notif SMALLINT NOT NULL, highlight SMALLINT NOT NULL , unread SMALLINT, thread_id TEXT); +CREATE INDEX event_push_actions_staging_id ON event_push_actions_staging(event_id); +CREATE TABLE users_pending_deactivation ( user_id TEXT NOT NULL ); +CREATE TABLE user_daily_visits ( user_id TEXT NOT NULL, device_id TEXT, timestamp BIGINT NOT NULL , user_agent TEXT); +CREATE INDEX user_daily_visits_uts_idx ON user_daily_visits(user_id, timestamp); +CREATE INDEX user_daily_visits_ts_idx ON user_daily_visits(timestamp); +CREATE TABLE erased_users ( user_id TEXT NOT NULL ); +CREATE UNIQUE INDEX erased_users_user ON erased_users(user_id); +CREATE TABLE monthly_active_users ( user_id TEXT NOT NULL, timestamp BIGINT NOT NULL ); +CREATE UNIQUE INDEX monthly_active_users_users ON monthly_active_users(user_id); +CREATE INDEX monthly_active_users_time_stamp ON monthly_active_users(timestamp); +CREATE TABLE IF NOT EXISTS "e2e_room_keys_versions" ( user_id TEXT NOT NULL, version BIGINT NOT NULL, algorithm TEXT NOT NULL, auth_data TEXT NOT NULL, deleted SMALLINT DEFAULT 0 NOT NULL , etag BIGINT); +CREATE UNIQUE INDEX e2e_room_keys_versions_idx ON e2e_room_keys_versions(user_id, version); +CREATE TABLE IF NOT EXISTS "e2e_room_keys" ( user_id TEXT NOT NULL, room_id TEXT NOT NULL, session_id TEXT NOT NULL, version BIGINT NOT NULL, first_message_index INT, forwarded_count INT, is_verified BOOLEAN, session_data TEXT NOT NULL ); +CREATE TABLE users_who_share_private_rooms ( user_id TEXT NOT NULL, other_user_id TEXT NOT NULL, room_id TEXT NOT NULL ); +CREATE UNIQUE INDEX users_who_share_private_rooms_u_idx ON users_who_share_private_rooms(user_id, other_user_id, room_id); +CREATE INDEX users_who_share_private_rooms_r_idx ON users_who_share_private_rooms(room_id); +CREATE INDEX users_who_share_private_rooms_o_idx ON users_who_share_private_rooms(other_user_id); +CREATE TABLE user_threepid_id_server ( user_id TEXT NOT NULL, medium TEXT NOT NULL, address TEXT NOT NULL, id_server TEXT NOT NULL ); +CREATE UNIQUE INDEX user_threepid_id_server_idx ON user_threepid_id_server( user_id, medium, address, id_server ); +CREATE TABLE users_in_public_rooms ( user_id TEXT NOT NULL, room_id TEXT NOT NULL ); +CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id, room_id); +CREATE TABLE account_validity ( user_id TEXT PRIMARY KEY, expiration_ts_ms BIGINT NOT NULL, email_sent BOOLEAN NOT NULL, renewal_token TEXT , token_used_ts_ms BIGINT); +CREATE TABLE event_relations ( event_id TEXT NOT NULL, relates_to_id TEXT NOT NULL, relation_type TEXT NOT NULL, aggregation_key TEXT ); +CREATE UNIQUE INDEX event_relations_id ON event_relations(event_id); +CREATE INDEX event_relations_relates ON event_relations(relates_to_id, relation_type, aggregation_key); +CREATE TABLE room_stats_earliest_token ( room_id TEXT NOT NULL, token BIGINT NOT NULL ); +CREATE UNIQUE INDEX room_stats_earliest_token_idx ON room_stats_earliest_token(room_id); +CREATE INDEX user_ips_device_id ON user_ips (user_id, device_id, last_seen); +CREATE INDEX event_push_actions_u_highlight ON event_push_actions (user_id, stream_ordering); +CREATE INDEX device_inbox_stream_id_user_id ON device_inbox (stream_id, user_id); +CREATE INDEX device_lists_stream_user_id ON device_lists_stream (user_id, device_id); +CREATE INDEX user_ips_last_seen ON user_ips (user_id, last_seen); +CREATE INDEX user_ips_last_seen_only ON user_ips (last_seen); +CREATE INDEX users_creation_ts ON users (creation_ts); +CREATE INDEX event_to_state_groups_sg_index ON event_to_state_groups (state_group); +CREATE UNIQUE INDEX device_lists_remote_cache_unique_id ON device_lists_remote_cache (user_id, device_id); +CREATE UNIQUE INDEX device_lists_remote_extremeties_unique_idx ON device_lists_remote_extremeties (user_id); +CREATE UNIQUE INDEX user_ips_user_token_ip_unique_index ON user_ips (user_id, access_token, ip); +CREATE TABLE threepid_validation_session ( + session_id TEXT PRIMARY KEY, + medium TEXT NOT NULL, + address TEXT NOT NULL, + client_secret TEXT NOT NULL, + last_send_attempt BIGINT NOT NULL, + validated_at BIGINT +); +CREATE TABLE threepid_validation_token ( + token TEXT PRIMARY KEY, + session_id TEXT NOT NULL, + next_link TEXT, + expires BIGINT NOT NULL +); +CREATE INDEX threepid_validation_token_session_id ON threepid_validation_token(session_id); +CREATE TABLE event_expiry ( + event_id TEXT PRIMARY KEY, + expiry_ts BIGINT NOT NULL +); +CREATE INDEX event_expiry_expiry_ts_idx ON event_expiry(expiry_ts); +CREATE TABLE event_labels ( + event_id TEXT, + label TEXT, + room_id TEXT NOT NULL, + topological_ordering BIGINT NOT NULL, + PRIMARY KEY(event_id, label) +); +CREATE INDEX event_labels_room_id_label_idx ON event_labels(room_id, label, topological_ordering); +CREATE UNIQUE INDEX e2e_room_keys_with_version_idx ON e2e_room_keys(user_id, version, room_id, session_id); +CREATE TABLE IF NOT EXISTS "devices" ( + user_id TEXT NOT NULL, + device_id TEXT NOT NULL, + display_name TEXT, + last_seen BIGINT, + ip TEXT, + user_agent TEXT, + hidden BOOLEAN DEFAULT 0, + CONSTRAINT device_uniqueness UNIQUE (user_id, device_id) +); +CREATE TABLE room_retention( + room_id TEXT, + event_id TEXT, + min_lifetime BIGINT, + max_lifetime BIGINT, + + PRIMARY KEY(room_id, event_id) +); +CREATE INDEX room_retention_max_lifetime_idx on room_retention(max_lifetime); +CREATE TABLE e2e_cross_signing_keys ( + user_id TEXT NOT NULL, + -- the type of cross-signing key (master, user_signing, or self_signing) + keytype TEXT NOT NULL, + -- the full key information, as a json-encoded dict + keydata TEXT NOT NULL, + -- for keeping the keys in order, so that we can fetch the latest one + stream_id BIGINT NOT NULL +); +CREATE UNIQUE INDEX e2e_cross_signing_keys_idx ON e2e_cross_signing_keys(user_id, keytype, stream_id); +CREATE TABLE e2e_cross_signing_signatures ( + -- user who did the signing + user_id TEXT NOT NULL, + -- key used to sign + key_id TEXT NOT NULL, + -- user who was signed + target_user_id TEXT NOT NULL, + -- device/key that was signed + target_device_id TEXT NOT NULL, + -- the actual signature + signature TEXT NOT NULL +); +CREATE TABLE user_signature_stream ( + -- uses the same stream ID as device list stream + stream_id BIGINT NOT NULL, + -- user who did the signing + from_user_id TEXT NOT NULL, + -- list of users who were signed, as a JSON array + user_ids TEXT NOT NULL +); +CREATE UNIQUE INDEX user_signature_stream_idx ON user_signature_stream(stream_id); +CREATE INDEX e2e_cross_signing_signatures2_idx ON e2e_cross_signing_signatures(user_id, target_user_id, target_device_id); +CREATE TABLE stats_incremental_position ( + Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row. + stream_id BIGINT NOT NULL, + CHECK (Lock='X') +); +CREATE TABLE room_stats_current ( + room_id TEXT NOT NULL PRIMARY KEY, + + -- These are absolute counts + current_state_events INT NOT NULL, + joined_members INT NOT NULL, + invited_members INT NOT NULL, + left_members INT NOT NULL, + banned_members INT NOT NULL, + + local_users_in_room INT NOT NULL, + + -- The maximum delta stream position that this row takes into account. + completed_delta_stream_id BIGINT NOT NULL +, knocked_members INT); +CREATE TABLE user_stats_current ( + user_id TEXT NOT NULL PRIMARY KEY, + + joined_rooms BIGINT NOT NULL, + + -- The maximum delta stream position that this row takes into account. + completed_delta_stream_id BIGINT NOT NULL +); +CREATE TABLE room_stats_state ( + room_id TEXT NOT NULL, + name TEXT, + canonical_alias TEXT, + join_rules TEXT, + history_visibility TEXT, + encryption TEXT, + avatar TEXT, + guest_access TEXT, + is_federatable BOOLEAN, + topic TEXT +, room_type TEXT); +CREATE UNIQUE INDEX room_stats_state_room ON room_stats_state(room_id); +CREATE TABLE IF NOT EXISTS "user_filters" ( user_id TEXT NOT NULL, filter_id BIGINT NOT NULL, filter_json BYTEA NOT NULL ); +CREATE UNIQUE INDEX user_filters_unique ON "user_filters" (user_id, filter_id); +CREATE TABLE user_external_ids ( + auth_provider TEXT NOT NULL, + external_id TEXT NOT NULL, + user_id TEXT NOT NULL, + UNIQUE (auth_provider, external_id) +); +CREATE INDEX users_in_public_rooms_r_idx ON users_in_public_rooms(room_id); +CREATE TABLE device_lists_remote_resync ( + user_id TEXT NOT NULL, + added_ts BIGINT NOT NULL +); +CREATE UNIQUE INDEX device_lists_remote_resync_idx ON device_lists_remote_resync (user_id); +CREATE INDEX device_lists_remote_resync_ts_idx ON device_lists_remote_resync (added_ts); +CREATE TABLE local_current_membership ( + room_id TEXT NOT NULL, + user_id TEXT NOT NULL, + event_id TEXT NOT NULL, + membership TEXT NOT NULL + ); +CREATE UNIQUE INDEX local_current_membership_idx ON local_current_membership(user_id, room_id); +CREATE INDEX local_current_membership_room_idx ON local_current_membership(room_id); +CREATE TABLE ui_auth_sessions( + session_id TEXT NOT NULL, -- The session ID passed to the client. + creation_time BIGINT NOT NULL, -- The time this session was created (epoch time in milliseconds). + serverdict TEXT NOT NULL, -- A JSON dictionary of arbitrary data added by Synapse. + clientdict TEXT NOT NULL, -- A JSON dictionary of arbitrary data from the client. + uri TEXT NOT NULL, -- The URI the UI authentication session is using. + method TEXT NOT NULL, -- The HTTP method the UI authentication session is using. + -- The clientdict, uri, and method make up an tuple that must be immutable + -- throughout the lifetime of the UI Auth session. + description TEXT NOT NULL, -- A human readable description of the operation which caused the UI Auth flow to occur. + UNIQUE (session_id) +); +CREATE TABLE ui_auth_sessions_credentials( + session_id TEXT NOT NULL, -- The corresponding UI Auth session. + stage_type TEXT NOT NULL, -- The stage type. + result TEXT NOT NULL, -- The result of the stage verification, stored as JSON. + UNIQUE (session_id, stage_type), + FOREIGN KEY (session_id) + REFERENCES ui_auth_sessions (session_id) +); +CREATE TABLE IF NOT EXISTS "device_lists_outbound_last_success" ( destination TEXT NOT NULL, user_id TEXT NOT NULL, stream_id BIGINT NOT NULL ); +CREATE UNIQUE INDEX device_lists_outbound_last_success_unique_idx ON "device_lists_outbound_last_success" (destination, user_id); +CREATE TABLE IF NOT EXISTS "local_media_repository_thumbnails" ( media_id TEXT, thumbnail_width INTEGER, thumbnail_height INTEGER, thumbnail_type TEXT, thumbnail_method TEXT, thumbnail_length INTEGER, UNIQUE ( media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method ) ); +CREATE INDEX local_media_repository_thumbnails_media_id ON local_media_repository_thumbnails (media_id); +CREATE TABLE IF NOT EXISTS "remote_media_cache_thumbnails" ( media_origin TEXT, media_id TEXT, thumbnail_width INTEGER, thumbnail_height INTEGER, thumbnail_method TEXT, thumbnail_type TEXT, thumbnail_length INTEGER, filesystem_id TEXT, UNIQUE ( media_origin, media_id, thumbnail_width, thumbnail_height, thumbnail_type, thumbnail_method ) ); +CREATE TABLE ui_auth_sessions_ips( + session_id TEXT NOT NULL, + ip TEXT NOT NULL, + user_agent TEXT NOT NULL, + UNIQUE (session_id, ip, user_agent), + FOREIGN KEY (session_id) + REFERENCES ui_auth_sessions (session_id) +); +CREATE UNIQUE INDEX federation_stream_position_instance ON federation_stream_position(type, instance_name); +CREATE TABLE dehydrated_devices( + user_id TEXT NOT NULL PRIMARY KEY, + device_id TEXT NOT NULL, + device_data TEXT NOT NULL -- JSON-encoded client-defined data +); +CREATE TABLE e2e_fallback_keys_json ( + user_id TEXT NOT NULL, -- The user this fallback key is for. + device_id TEXT NOT NULL, -- The device this fallback key is for. + algorithm TEXT NOT NULL, -- Which algorithm this fallback key is for. + key_id TEXT NOT NULL, -- An id for suppressing duplicate uploads. + key_json TEXT NOT NULL, -- The key as a JSON blob. + used BOOLEAN NOT NULL DEFAULT FALSE, -- Whether the key has been used or not. + CONSTRAINT e2e_fallback_keys_json_uniqueness UNIQUE (user_id, device_id, algorithm) +); +CREATE TABLE destination_rooms ( + -- the destination in question. + destination TEXT NOT NULL REFERENCES destinations (destination), + -- the ID of the room in question + room_id TEXT NOT NULL REFERENCES rooms (room_id), + -- the stream_ordering of the event + stream_ordering BIGINT NOT NULL, + PRIMARY KEY (destination, room_id) + -- We don't declare a foreign key on stream_ordering here because that'd mean + -- we'd need to either maintain an index (expensive) or do a table scan of + -- destination_rooms whenever we delete an event (also potentially expensive). + -- In addition to that, a foreign key on stream_ordering would be redundant + -- as this row doesn't need to refer to a specific event; if the event gets + -- deleted then it doesn't affect the validity of the stream_ordering here. +); +CREATE INDEX destination_rooms_room_id + ON destination_rooms (room_id); +CREATE TABLE stream_positions ( + stream_name TEXT NOT NULL, + instance_name TEXT NOT NULL, + stream_id BIGINT NOT NULL +); +CREATE UNIQUE INDEX stream_positions_idx ON stream_positions(stream_name, instance_name); +CREATE TABLE IF NOT EXISTS "access_tokens" ( + id BIGINT PRIMARY KEY, + user_id TEXT NOT NULL, + device_id TEXT, + token TEXT NOT NULL, + valid_until_ms BIGINT, + puppets_user_id TEXT, + last_validated BIGINT, refresh_token_id BIGINT REFERENCES refresh_tokens (id) ON DELETE CASCADE, used BOOLEAN, + UNIQUE(token) +); +CREATE INDEX access_tokens_device_id ON access_tokens (user_id, device_id); +CREATE TABLE IF NOT EXISTS "event_txn_id" ( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + user_id TEXT NOT NULL, + token_id BIGINT NOT NULL, + txn_id TEXT NOT NULL, + inserted_ts BIGINT NOT NULL, + FOREIGN KEY (event_id) + REFERENCES events (event_id) ON DELETE CASCADE, + FOREIGN KEY (token_id) + REFERENCES access_tokens (id) ON DELETE CASCADE +); +CREATE UNIQUE INDEX event_txn_id_event_id ON event_txn_id(event_id); +CREATE UNIQUE INDEX event_txn_id_txn_id ON event_txn_id(room_id, user_id, token_id, txn_id); +CREATE INDEX event_txn_id_ts ON event_txn_id(inserted_ts); +CREATE TABLE ignored_users( ignorer_user_id TEXT NOT NULL, ignored_user_id TEXT NOT NULL ); +CREATE UNIQUE INDEX ignored_users_uniqueness ON ignored_users (ignorer_user_id, ignored_user_id); +CREATE INDEX ignored_users_ignored_user_id ON ignored_users (ignored_user_id); +CREATE TABLE event_auth_chains ( + event_id TEXT PRIMARY KEY, + chain_id BIGINT NOT NULL, + sequence_number BIGINT NOT NULL +); +CREATE UNIQUE INDEX event_auth_chains_c_seq_index ON event_auth_chains (chain_id, sequence_number); +CREATE TABLE event_auth_chain_links ( + origin_chain_id BIGINT NOT NULL, + origin_sequence_number BIGINT NOT NULL, + + target_chain_id BIGINT NOT NULL, + target_sequence_number BIGINT NOT NULL +); +CREATE INDEX event_auth_chain_links_idx ON event_auth_chain_links (origin_chain_id, target_chain_id); +CREATE TABLE event_auth_chain_to_calculate ( + event_id TEXT PRIMARY KEY, + room_id TEXT NOT NULL, + type TEXT NOT NULL, + state_key TEXT NOT NULL +); +CREATE INDEX event_auth_chain_to_calculate_rm_id ON event_auth_chain_to_calculate(room_id); +CREATE TABLE users_to_send_full_presence_to( + -- The user ID to send full presence to. + user_id TEXT PRIMARY KEY, + -- A presence stream ID token - the current presence stream token when the row was last upserted. + -- If a user calls /sync and this token is part of the update they're to receive, we also include + -- full user presence in the response. + -- This allows multiple devices for a user to receive full presence whenever they next call /sync. + presence_stream_id BIGINT, + FOREIGN KEY (user_id) + REFERENCES users (name) +); +CREATE TABLE refresh_tokens ( + id BIGINT PRIMARY KEY, + user_id TEXT NOT NULL, + device_id TEXT NOT NULL, + token TEXT NOT NULL, + -- When consumed, a new refresh token is generated, which is tracked by + -- this foreign key + next_token_id BIGINT REFERENCES refresh_tokens (id) ON DELETE CASCADE, expiry_ts BIGINT DEFAULT NULL, ultimate_session_expiry_ts BIGINT DEFAULT NULL, + UNIQUE(token) +); +CREATE TABLE worker_locks ( + lock_name TEXT NOT NULL, + lock_key TEXT NOT NULL, + -- We write the instance name to ease manual debugging, we don't ever read + -- from it. + -- Note: instance names aren't guarenteed to be unique. + instance_name TEXT NOT NULL, + -- A random string generated each time an instance takes out a lock. Used by + -- the instance to tell whether the lock is still held by it (e.g. in the + -- case where the process stalls for a long time the lock may time out and + -- be taken out by another instance, at which point the original instance + -- can tell it no longer holds the lock as the tokens no longer match). + token TEXT NOT NULL, + last_renewed_ts BIGINT NOT NULL +); +CREATE UNIQUE INDEX worker_locks_key ON worker_locks (lock_name, lock_key); +CREATE TABLE federation_inbound_events_staging ( + origin TEXT NOT NULL, + room_id TEXT NOT NULL, + event_id TEXT NOT NULL, + received_ts BIGINT NOT NULL, + event_json TEXT NOT NULL, + internal_metadata TEXT NOT NULL +); +CREATE INDEX federation_inbound_events_staging_room ON federation_inbound_events_staging(room_id, received_ts); +CREATE UNIQUE INDEX federation_inbound_events_staging_instance_event ON federation_inbound_events_staging(origin, event_id); +CREATE TABLE insertion_event_edges( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + insertion_prev_event_id TEXT NOT NULL +); +CREATE INDEX insertion_event_edges_insertion_room_id ON insertion_event_edges(room_id); +CREATE INDEX insertion_event_edges_insertion_prev_event_id ON insertion_event_edges(insertion_prev_event_id); +CREATE TABLE insertion_event_extremities( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL +); +CREATE UNIQUE INDEX insertion_event_extremities_event_id ON insertion_event_extremities(event_id); +CREATE INDEX insertion_event_extremities_room_id ON insertion_event_extremities(room_id); +CREATE TABLE registration_tokens( + token TEXT NOT NULL, -- The token that can be used for authentication. + uses_allowed INT, -- The total number of times this token can be used. NULL if no limit. + pending INT NOT NULL, -- The number of in progress registrations using this token. + completed INT NOT NULL, -- The number of times this token has been used to complete a registration. + expiry_time BIGINT, -- The latest time this token will be valid (epoch time in milliseconds). NULL if token doesn't expire. + UNIQUE (token) +); +CREATE TABLE sessions( + session_type TEXT NOT NULL, -- The unique key for this type of session. + session_id TEXT NOT NULL, -- The session ID passed to the client. + value TEXT NOT NULL, -- A JSON dictionary to persist. + expiry_time_ms BIGINT NOT NULL, -- The time this session will expire (epoch time in milliseconds). + UNIQUE (session_type, session_id) +); +CREATE TABLE insertion_events( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + next_batch_id TEXT NOT NULL +); +CREATE UNIQUE INDEX insertion_events_event_id ON insertion_events(event_id); +CREATE INDEX insertion_events_next_batch_id ON insertion_events(next_batch_id); +CREATE TABLE batch_events( + event_id TEXT NOT NULL, + room_id TEXT NOT NULL, + batch_id TEXT NOT NULL +); +CREATE UNIQUE INDEX batch_events_event_id ON batch_events(event_id); +CREATE INDEX batch_events_batch_id ON batch_events(batch_id); +CREATE INDEX insertion_event_edges_event_id ON insertion_event_edges(event_id); +CREATE TABLE device_auth_providers ( + user_id TEXT NOT NULL, + device_id TEXT NOT NULL, + auth_provider_id TEXT NOT NULL, + auth_provider_session_id TEXT NOT NULL +); +CREATE INDEX device_auth_providers_devices + ON device_auth_providers (user_id, device_id); +CREATE INDEX device_auth_providers_sessions + ON device_auth_providers (auth_provider_id, auth_provider_session_id); +CREATE INDEX refresh_tokens_next_token_id + ON refresh_tokens(next_token_id) + WHERE next_token_id IS NOT NULL; +CREATE TABLE partial_state_rooms ( + room_id TEXT PRIMARY KEY, + FOREIGN KEY(room_id) REFERENCES rooms(room_id) +); +CREATE TABLE partial_state_rooms_servers ( + room_id TEXT NOT NULL REFERENCES partial_state_rooms(room_id), + server_name TEXT NOT NULL, + UNIQUE(room_id, server_name) +); +CREATE TABLE partial_state_events ( + -- the room_id is denormalised for efficient indexing (the canonical source is `events`) + room_id TEXT NOT NULL REFERENCES partial_state_rooms(room_id), + event_id TEXT NOT NULL REFERENCES events(event_id), + UNIQUE(event_id) +); +CREATE INDEX partial_state_events_room_id_idx + ON partial_state_events (room_id); +CREATE TRIGGER partial_state_events_bad_room_id + BEFORE INSERT ON partial_state_events + FOR EACH ROW + BEGIN + SELECT RAISE(ABORT, 'Incorrect room_id in partial_state_events') + WHERE EXISTS ( + SELECT 1 FROM events + WHERE events.event_id = NEW.event_id + AND events.room_id != NEW.room_id + ); + END; +CREATE TABLE device_lists_changes_in_room ( + user_id TEXT NOT NULL, + device_id TEXT NOT NULL, + room_id TEXT NOT NULL, + + -- This initially matches `device_lists_stream.stream_id`. Note that we + -- delete older values from `device_lists_stream`, so we can't use a foreign + -- constraint here. + -- + -- The table will contain rows with the same `stream_id` but different + -- `room_id`, as for each device update we store a row per room the user is + -- joined to. Therefore `(stream_id, room_id)` gives a unique index. + stream_id BIGINT NOT NULL, + + -- We have a background process which goes through this table and converts + -- entries into rows in `device_lists_outbound_pokes`. Once we have processed + -- a row, we mark it as such by setting `converted_to_destinations=TRUE`. + converted_to_destinations BOOLEAN NOT NULL, + opentracing_context TEXT +); +CREATE UNIQUE INDEX device_lists_changes_in_stream_id ON device_lists_changes_in_room(stream_id, room_id); +CREATE INDEX device_lists_changes_in_stream_id_unconverted ON device_lists_changes_in_room(stream_id) WHERE NOT converted_to_destinations; +CREATE TABLE IF NOT EXISTS "event_edges" ( + event_id TEXT NOT NULL, + prev_event_id TEXT NOT NULL, + room_id TEXT NULL, + is_state BOOL NOT NULL DEFAULT 0, + FOREIGN KEY(event_id) REFERENCES events(event_id) +); +CREATE UNIQUE INDEX event_edges_event_id_prev_event_id_idx + ON event_edges (event_id, prev_event_id); +CREATE INDEX ev_edges_prev_id ON event_edges (prev_event_id); +CREATE TABLE event_push_summary_last_receipt_stream_id ( + Lock CHAR(1) NOT NULL DEFAULT 'X' UNIQUE, -- Makes sure this table only has one row. + stream_id BIGINT NOT NULL, + CHECK (Lock='X') +); +CREATE TABLE IF NOT EXISTS "application_services_state" ( + as_id TEXT PRIMARY KEY NOT NULL, + state VARCHAR(5), + read_receipt_stream_id BIGINT, + presence_stream_id BIGINT, + to_device_stream_id BIGINT, + device_list_stream_id BIGINT +); +CREATE TABLE IF NOT EXISTS "receipts_linearized" ( + stream_id BIGINT NOT NULL, + room_id TEXT NOT NULL, + receipt_type TEXT NOT NULL, + user_id TEXT NOT NULL, + event_id TEXT NOT NULL, + thread_id TEXT, + event_stream_ordering BIGINT, + data TEXT NOT NULL, + CONSTRAINT receipts_linearized_uniqueness UNIQUE (room_id, receipt_type, user_id), + CONSTRAINT receipts_linearized_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id) +); +CREATE TABLE IF NOT EXISTS "receipts_graph" ( + room_id TEXT NOT NULL, + receipt_type TEXT NOT NULL, + user_id TEXT NOT NULL, + event_ids TEXT NOT NULL, + thread_id TEXT, + data TEXT NOT NULL, + CONSTRAINT receipts_graph_uniqueness UNIQUE (room_id, receipt_type, user_id), + CONSTRAINT receipts_graph_uniqueness_thread UNIQUE (room_id, receipt_type, user_id, thread_id) +); +CREATE INDEX receipts_linearized_id ON receipts_linearized( stream_id ); +CREATE INDEX receipts_linearized_room_stream ON receipts_linearized( room_id, stream_id ); +CREATE INDEX receipts_linearized_user ON receipts_linearized( user_id ); +CREATE INDEX redactions_have_censored_ts ON redactions (received_ts) WHERE NOT have_censored; +CREATE INDEX room_memberships_user_room_forgotten ON room_memberships (user_id, room_id) WHERE forgotten = 1; +CREATE INDEX users_have_local_media ON local_media_repository (user_id, created_ts) ; +CREATE UNIQUE INDEX e2e_cross_signing_keys_stream_idx ON e2e_cross_signing_keys (stream_id) ; +CREATE INDEX user_external_ids_user_id_idx ON user_external_ids (user_id) ; +CREATE INDEX presence_stream_state_not_offline_idx ON presence_stream (state) WHERE state != 'offline'; +CREATE UNIQUE INDEX event_push_summary_unique_index ON event_push_summary (user_id, room_id) ; +CREATE UNIQUE INDEX event_push_summary_unique_index2 ON event_push_summary (user_id, room_id, thread_id) ; +CREATE UNIQUE INDEX receipts_graph_unique_index ON receipts_graph (room_id, receipt_type, user_id) WHERE thread_id IS NULL; +CREATE UNIQUE INDEX receipts_linearized_unique_index ON receipts_linearized (room_id, receipt_type, user_id) WHERE thread_id IS NULL; +CREATE INDEX event_push_actions_stream_highlight_index ON event_push_actions (highlight, stream_ordering) WHERE highlight=0; +CREATE INDEX current_state_events_member_index ON current_state_events (state_key) WHERE type='m.room.member'; +CREATE INDEX event_contains_url_index ON events (room_id, topological_ordering, stream_ordering) WHERE contains_url = true AND outlier = false; +CREATE INDEX event_push_actions_highlights_index ON event_push_actions (user_id, room_id, topological_ordering, stream_ordering) WHERE highlight=1; +CREATE INDEX local_media_repository_url_idx ON local_media_repository (created_ts) WHERE url_cache IS NOT NULL; +INSERT INTO appservice_stream_position VALUES('X',0); +INSERT INTO federation_stream_position VALUES('federation',-1,'master'); +INSERT INTO federation_stream_position VALUES('events',-1,'master'); +INSERT INTO event_push_summary_stream_ordering VALUES('X',0); +INSERT INTO user_directory_stream_pos VALUES('X',1); +INSERT INTO stats_incremental_position VALUES('X',1); +INSERT INTO event_push_summary_last_receipt_stream_id VALUES('X',0); diff --git a/synapse/storage/schema/state/full_schemas/72/full.sql.postgres b/synapse/storage/schema/state/full_schemas/72/full.sql.postgres new file mode 100644 index 000000000000..263ade761e6a --- /dev/null +++ b/synapse/storage/schema/state/full_schemas/72/full.sql.postgres @@ -0,0 +1,30 @@ +CREATE TABLE state_group_edges ( + state_group bigint NOT NULL, + prev_state_group bigint NOT NULL +); +CREATE SEQUENCE state_group_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; +CREATE TABLE state_groups ( + id bigint NOT NULL, + room_id text NOT NULL, + event_id text NOT NULL +); +CREATE TABLE state_groups_state ( + state_group bigint NOT NULL, + room_id text NOT NULL, + type text NOT NULL, + state_key text NOT NULL, + event_id text NOT NULL +); +ALTER TABLE ONLY state_groups_state ALTER COLUMN state_group SET (n_distinct=-0.02); +ALTER TABLE ONLY state_groups + ADD CONSTRAINT state_groups_pkey PRIMARY KEY (id); +CREATE INDEX state_group_edges_prev_idx ON state_group_edges USING btree (prev_state_group); +CREATE UNIQUE INDEX state_group_edges_unique_idx ON state_group_edges USING btree (state_group, prev_state_group); +CREATE INDEX state_groups_room_id_idx ON state_groups USING btree (room_id); +CREATE INDEX state_groups_state_type_idx ON state_groups_state USING btree (state_group, type, state_key); +SELECT pg_catalog.setval('state_group_id_seq', 1, false); diff --git a/synapse/storage/schema/state/full_schemas/72/full.sql.sqlite b/synapse/storage/schema/state/full_schemas/72/full.sql.sqlite new file mode 100644 index 000000000000..dda060b63805 --- /dev/null +++ b/synapse/storage/schema/state/full_schemas/72/full.sql.sqlite @@ -0,0 +1,20 @@ +CREATE TABLE state_groups ( + id BIGINT PRIMARY KEY, + room_id TEXT NOT NULL, + event_id TEXT NOT NULL +); +CREATE TABLE state_groups_state ( + state_group BIGINT NOT NULL, + room_id TEXT NOT NULL, + type TEXT NOT NULL, + state_key TEXT NOT NULL, + event_id TEXT NOT NULL +); +CREATE TABLE state_group_edges ( + state_group BIGINT NOT NULL, + prev_state_group BIGINT NOT NULL +); +CREATE INDEX state_group_edges_prev_idx ON state_group_edges (prev_state_group); +CREATE INDEX state_groups_state_type_idx ON state_groups_state (state_group, type, state_key); +CREATE INDEX state_groups_room_id_idx ON state_groups (room_id) ; +CREATE UNIQUE INDEX state_group_edges_unique_idx ON state_group_edges (state_group, prev_state_group) ; diff --git a/synapse/types.py b/synapse/types.py index ec44601f5424..773f0438d5bd 100644 --- a/synapse/types.py +++ b/synapse/types.py @@ -835,6 +835,7 @@ class ReadReceipt: receipt_type: str user_id: str event_ids: List[str] + thread_id: Optional[str] data: JsonDict diff --git a/synapse/util/caches/descriptors.py b/synapse/util/caches/descriptors.py index b584ac8a3491..fc8c22ea414e 100644 --- a/synapse/util/caches/descriptors.py +++ b/synapse/util/caches/descriptors.py @@ -58,7 +58,7 @@ F = TypeVar("F", bound=Callable[..., Any]) -class _CachedFunction(Generic[F]): +class CachedFunction(Generic[F]): invalidate: Any = None invalidate_all: Any = None invalidate_external: Any = None @@ -250,7 +250,7 @@ def _wrapped(*args: Any, **kwargs: Any) -> Any: return ret2 - wrapped = cast(_CachedFunction, _wrapped) + wrapped = cast(CachedFunction, _wrapped) wrapped.cache = cache obj.__dict__[self.name] = wrapped @@ -376,7 +376,7 @@ def _wrapped(*args: Any, **kwargs: Any) -> Any: return make_deferred_yieldable(ret) - wrapped = cast(_CachedFunction, _wrapped) + wrapped = cast(CachedFunction, _wrapped) if self.num_args == 1: assert not self.tree @@ -464,6 +464,12 @@ def __get__( cache: DeferredCache[CacheKey, Any] = cached_method.cache num_args = cached_method.num_args + if num_args != self.num_args: + raise Exception( + "Number of args (%s) does not match underlying cache_method_name=%s (%s)." + % (self.num_args, self.cached_method_name, num_args) + ) + @functools.wraps(self.orig) def wrapped(*args: Any, **kwargs: Any) -> "defer.Deferred[Dict]": # If we're passed a cache_context then we'll want to call its @@ -634,7 +640,7 @@ def cached( iterable: bool = False, prune_unread_entries: bool = True, name: Optional[str] = None, -) -> Callable[[F], _CachedFunction[F]]: +) -> Callable[[F], CachedFunction[F]]: func = lambda orig: DeferredCacheDescriptor( orig, max_entries=max_entries, @@ -647,7 +653,7 @@ def cached( name=name, ) - return cast(Callable[[F], _CachedFunction[F]], func) + return cast(Callable[[F], CachedFunction[F]], func) def cachedList( @@ -656,7 +662,7 @@ def cachedList( list_name: str, num_args: Optional[int] = None, name: Optional[str] = None, -) -> Callable[[F], _CachedFunction[F]]: +) -> Callable[[F], CachedFunction[F]]: """Creates a descriptor that wraps a function in a `DeferredCacheListDescriptor`. Used to do batch lookups for an already created cache. One of the arguments @@ -693,7 +699,7 @@ def batch_do_something(self, first_arg, second_args): name=name, ) - return cast(Callable[[F], _CachedFunction[F]], func) + return cast(Callable[[F], CachedFunction[F]], func) def _get_cache_key_builder( diff --git a/synapse/util/check_dependencies.py b/synapse/util/check_dependencies.py index 66f1da750289..3b1e20570020 100644 --- a/synapse/util/check_dependencies.py +++ b/synapse/util/check_dependencies.py @@ -66,6 +66,21 @@ def _is_dev_dependency(req: Requirement) -> bool: ) +def _should_ignore_runtime_requirement(req: Requirement) -> bool: + # This is a build-time dependency. Irritatingly, `poetry build` ignores the + # requirements listed in the [build-system] section of pyproject.toml, so in order + # to support `poetry install --no-dev` we have to mark it as a runtime dependency. + # See discussion on https://github.com/python-poetry/poetry/issues/6154 (it sounds + # like the poetry authors don't consider this a bug?) + # + # In any case, workaround this by ignoring setuptools_rust here. (It might be + # slightly cleaner to put `setuptools_rust` in a `build` extra or similar, but for + # now let's do something quick and dirty. + if req.name == "setuptools_rust": + return True + return False + + class Dependency(NamedTuple): requirement: Requirement must_be_installed: bool @@ -77,7 +92,7 @@ def _generic_dependencies() -> Iterable[Dependency]: assert requirements is not None for raw_requirement in requirements: req = Requirement(raw_requirement) - if _is_dev_dependency(req): + if _is_dev_dependency(req) or _should_ignore_runtime_requirement(req): continue # https://packaging.pypa.io/en/latest/markers.html#usage notes that diff --git a/synapse/visibility.py b/synapse/visibility.py index c810a0590773..c4048d24777d 100644 --- a/synapse/visibility.py +++ b/synapse/visibility.py @@ -162,6 +162,10 @@ async def filter_event_for_clients_with_state( if event.internal_metadata.is_soft_failed(): return [] + # Fast path if we don't have any user IDs to check. + if not user_ids: + return () + # Make a set for all user IDs that haven't been filtered out by a check. allowed_user_ids = set(user_ids) diff --git a/tests/federation/test_federation_client.py b/tests/federation/test_federation_client.py index 50e376f69574..a538215931e3 100644 --- a/tests/federation/test_federation_client.py +++ b/tests/federation/test_federation_client.py @@ -23,14 +23,23 @@ from synapse.api.room_versions import RoomVersions from synapse.events import EventBase +from synapse.rest import admin +from synapse.rest.client import login, room from synapse.server import HomeServer from synapse.types import JsonDict from synapse.util import Clock +from tests.test_utils import event_injection from tests.unittest import FederatingHomeserverTestCase class FederationClientTest(FederatingHomeserverTestCase): + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + def prepare(self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer): super().prepare(reactor, clock, homeserver) @@ -231,6 +240,72 @@ def _get_pdu_once(self) -> EventBase: return remote_pdu + def test_backfill_invalid_signature_records_failed_pull_attempts( + self, + ) -> None: + """ + Test to make sure that events from /backfill with invalid signatures get + recorded as failed pull attempts. + """ + OTHER_USER = f"@user:{self.OTHER_SERVER_NAME}" + main_store = self.hs.get_datastores().main + + # Create the room + user_id = self.register_user("kermit", "test") + tok = self.login("kermit", "test") + room_id = self.helper.create_room_as(room_creator=user_id, tok=tok) + + # We purposely don't run `add_hashes_and_signatures_from_other_server` + # over this because we want the signature check to fail. + pulled_event, _ = self.get_success( + event_injection.create_event( + self.hs, + room_id=room_id, + sender=OTHER_USER, + type="test_event_type", + content={"body": "garply"}, + ) + ) + + # We expect an outbound request to /backfill, so stub that out + self._mock_agent.request.side_effect = lambda *args, **kwargs: defer.succeed( + _mock_response( + { + "origin": "yet.another.server", + "origin_server_ts": 900, + # Mimic the other server returning our new `pulled_event` + "pdus": [pulled_event.get_pdu_json()], + } + ) + ) + + self.get_success( + self.hs.get_federation_client().backfill( + # We use "yet.another.server" instead of + # `self.OTHER_SERVER_NAME` because we want to see the behavior + # from `_check_sigs_and_hash_and_fetch_one` where it tries to + # fetch the PDU again from the origin server if the signature + # fails. Just want to make sure that the failure is counted from + # both code paths. + dest="yet.another.server", + room_id=room_id, + limit=1, + extremities=[pulled_event.event_id], + ), + ) + + # Make sure our failed pull attempt was recorded + backfill_num_attempts = self.get_success( + main_store.db_pool.simple_select_one_onecol( + table="event_failed_pull_attempts", + keyvalues={"event_id": pulled_event.event_id}, + retcol="num_attempts", + ) + ) + # This is 2 because it failed once from `self.OTHER_SERVER_NAME` and the + # other from "yet.another.server" + self.assertEqual(backfill_num_attempts, 2) + def _mock_response(resp: JsonDict): body = json.dumps(resp).encode("utf-8") diff --git a/tests/federation/test_federation_sender.py b/tests/federation/test_federation_sender.py index a5aa500ef85d..f1e357764ff4 100644 --- a/tests/federation/test_federation_sender.py +++ b/tests/federation/test_federation_sender.py @@ -49,7 +49,12 @@ def test_send_receipts(self): sender = self.hs.get_federation_sender() receipt = ReadReceipt( - "room_id", "m.read", "user_id", ["event_id"], {"ts": 1234} + "room_id", + "m.read", + "user_id", + ["event_id"], + thread_id=None, + data={"ts": 1234}, ) self.successResultOf(defer.ensureDeferred(sender.send_read_receipt(receipt))) @@ -89,7 +94,12 @@ def test_send_receipts_with_backoff(self): sender = self.hs.get_federation_sender() receipt = ReadReceipt( - "room_id", "m.read", "user_id", ["event_id"], {"ts": 1234} + "room_id", + "m.read", + "user_id", + ["event_id"], + thread_id=None, + data={"ts": 1234}, ) self.successResultOf(defer.ensureDeferred(sender.send_read_receipt(receipt))) @@ -121,7 +131,12 @@ def test_send_receipts_with_backoff(self): # send the second RR receipt = ReadReceipt( - "room_id", "m.read", "user_id", ["other_id"], {"ts": 1234} + "room_id", + "m.read", + "user_id", + ["other_id"], + thread_id=None, + data={"ts": 1234}, ) self.successResultOf(defer.ensureDeferred(sender.send_read_receipt(receipt))) self.pump() diff --git a/tests/handlers/test_appservice.py b/tests/handlers/test_appservice.py index b17af2725b3d..af24c4984da5 100644 --- a/tests/handlers/test_appservice.py +++ b/tests/handlers/test_appservice.py @@ -447,6 +447,7 @@ def test_sending_read_receipt_batches_to_application_services(self): receipt_type="m.read", user_id=self.local_user, event_ids=[f"$eventid_{i}"], + thread_id=None, data={}, ) ) diff --git a/tests/handlers/test_deactivate_account.py b/tests/handlers/test_deactivate_account.py index 7b9b711521d8..bce65fab7d84 100644 --- a/tests/handlers/test_deactivate_account.py +++ b/tests/handlers/test_deactivate_account.py @@ -15,11 +15,11 @@ from twisted.test.proto_helpers import MemoryReactor from synapse.api.constants import AccountDataTypes -from synapse.push.baserules import PushRule from synapse.push.rulekinds import PRIORITY_CLASS_MAP from synapse.rest import admin from synapse.rest.client import account, login from synapse.server import HomeServer +from synapse.synapse_rust.push import PushRule from synapse.util import Clock from tests.unittest import HomeserverTestCase @@ -161,20 +161,15 @@ def test_push_rules_deleted_upon_account_deactivation(self) -> None: self._store.get_push_rules_for_user(self.user) ) # Filter out default rules; we don't care - push_rules = [r for r, _ in filtered_push_rules if self._is_custom_rule(r)] + push_rules = [ + r for r, _ in filtered_push_rules.rules() if self._is_custom_rule(r) + ] # Check our rule made it - self.assertEqual( - push_rules, - [ - PushRule( - rule_id="personal.override.rule1", - priority_class=5, - conditions=[], - actions=[], - ) - ], - push_rules, - ) + self.assertEqual(len(push_rules), 1) + self.assertEqual(push_rules[0].rule_id, "personal.override.rule1") + self.assertEqual(push_rules[0].priority_class, 5) + self.assertEqual(push_rules[0].conditions, []) + self.assertEqual(push_rules[0].actions, []) # Request the deactivation of our account self._deactivate_my_account() @@ -183,7 +178,9 @@ def test_push_rules_deleted_upon_account_deactivation(self) -> None: self._store.get_push_rules_for_user(self.user) ) # Filter out default rules; we don't care - push_rules = [r for r, _ in filtered_push_rules if self._is_custom_rule(r)] + push_rules = [ + r for r, _ in filtered_push_rules.rules() if self._is_custom_rule(r) + ] # Check our rule no longer exists self.assertEqual(push_rules, [], push_rules) diff --git a/tests/handlers/test_message.py b/tests/handlers/test_message.py index 986b50ce0c8b..99384837d05c 100644 --- a/tests/handlers/test_message.py +++ b/tests/handlers/test_message.py @@ -105,7 +105,10 @@ def test_duplicated_txn_id(self): event1, context = self._create_duplicate_event(txn_id) ret_event1 = self.get_success( - self.handler.handle_new_client_event(self.requester, event1, context) + self.handler.handle_new_client_event( + self.requester, + events_and_context=[(event1, context)], + ) ) stream_id1 = ret_event1.internal_metadata.stream_ordering @@ -118,7 +121,10 @@ def test_duplicated_txn_id(self): self.assertNotEqual(event1.event_id, event2.event_id) ret_event2 = self.get_success( - self.handler.handle_new_client_event(self.requester, event2, context) + self.handler.handle_new_client_event( + self.requester, + events_and_context=[(event2, context)], + ) ) stream_id2 = ret_event2.internal_metadata.stream_ordering diff --git a/tests/handlers/test_register.py b/tests/handlers/test_register.py index e4a992ef6f52..05a4300661b5 100644 --- a/tests/handlers/test_register.py +++ b/tests/handlers/test_register.py @@ -499,7 +499,9 @@ def test_auto_create_auto_join_room_preset_invalid_permissions(self): ) ) self.get_success( - event_creation_handler.handle_new_client_event(requester, event, context) + event_creation_handler.handle_new_client_event( + requester, events_and_context=[(event, context)] + ) ) # Register a second user, which won't be be in the room (or even have an invite) diff --git a/tests/handlers/test_typing.py b/tests/handlers/test_typing.py index 8adba29d7f9c..9c821b3042b8 100644 --- a/tests/handlers/test_typing.py +++ b/tests/handlers/test_typing.py @@ -129,7 +129,7 @@ async def check_user_in_room(room_id: str, requester: Requester) -> None: async def check_host_in_room(room_id: str, server_name: str) -> bool: return room_id == ROOM_ID - hs.get_event_auth_handler().check_host_in_room = check_host_in_room + hs.get_event_auth_handler().is_host_in_room = check_host_in_room async def get_current_hosts_in_room(room_id: str): return {member.domain for member in self.room_members} @@ -138,6 +138,10 @@ async def get_current_hosts_in_room(room_id: str): get_current_hosts_in_room ) + hs.get_storage_controllers().state.get_current_hosts_in_room_or_partial_state_approximation = ( + get_current_hosts_in_room + ) + async def get_users_in_room(room_id: str): return {str(u) for u in self.room_members} diff --git a/tests/push/test_bulk_push_rule_evaluator.py b/tests/push/test_bulk_push_rule_evaluator.py new file mode 100644 index 000000000000..675d7df2ac45 --- /dev/null +++ b/tests/push/test_bulk_push_rule_evaluator.py @@ -0,0 +1,74 @@ +from unittest.mock import patch + +from synapse.api.room_versions import RoomVersions +from synapse.push.bulk_push_rule_evaluator import BulkPushRuleEvaluator +from synapse.rest import admin +from synapse.rest.client import login, register, room +from synapse.types import create_requester + +from tests import unittest + + +class TestBulkPushRuleEvaluator(unittest.HomeserverTestCase): + + servlets = [ + admin.register_servlets_for_client_rest_resource, + room.register_servlets, + login.register_servlets, + register.register_servlets, + ] + + def test_action_for_event_by_user_handles_noninteger_power_levels(self) -> None: + """We should convert floats and strings to integers before passing to Rust. + + Reproduces #14060. + + A lack of validation: the gift that keeps on giving. + """ + # Create a new user and room. + alice = self.register_user("alice", "pass") + token = self.login(alice, "pass") + + room_id = self.helper.create_room_as( + alice, room_version=RoomVersions.V9.identifier, tok=token + ) + + # Alter the power levels in that room to include stringy and floaty levels. + # We need to suppress the validation logic or else it will reject these dodgy + # values. (Presumably this validation was not always present.) + event_creation_handler = self.hs.get_event_creation_handler() + requester = create_requester(alice) + with patch("synapse.events.validator.validate_canonicaljson"), patch( + "synapse.events.validator.jsonschema.validate" + ): + self.helper.send_state( + room_id, + "m.room.power_levels", + { + "users": {alice: "100"}, # stringy + "notifications": {"room": 100.0}, # float + }, + token, + state_key="", + ) + + # Create a new message event, and try to evaluate it under the dodgy + # power level event. + event, context = self.get_success( + event_creation_handler.create_event( + requester, + { + "type": "m.room.message", + "room_id": room_id, + "content": { + "msgtype": "m.text", + "body": "helo", + }, + "sender": alice, + }, + ) + ) + + bulk_evaluator = BulkPushRuleEvaluator(self.hs) + # should not raise + self.get_success(bulk_evaluator.action_for_event_by_user(event, context)) diff --git a/tests/push/test_email.py b/tests/push/test_email.py index 7a3b0d675592..fd14568f55ce 100644 --- a/tests/push/test_email.py +++ b/tests/push/test_email.py @@ -114,7 +114,7 @@ def prepare(self, reactor, clock, hs): ) self.pusher = self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=self.user_id, access_token=self.token_id, kind="email", @@ -136,7 +136,7 @@ def test_need_validated_email(self): """ with self.assertRaises(SynapseError) as cm: self.get_success_or_raise( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=self.user_id, access_token=self.token_id, kind="email", diff --git a/tests/push/test_http.py b/tests/push/test_http.py index 46270f7a3603..9bf8078b3b5f 100644 --- a/tests/push/test_http.py +++ b/tests/push/test_http.py @@ -19,9 +19,10 @@ import synapse.rest.admin from synapse.logging.context import make_deferred_yieldable -from synapse.push import PusherConfigException -from synapse.rest.client import login, push_rule, receipts, room +from synapse.push import PusherConfig, PusherConfigException +from synapse.rest.client import login, push_rule, pusher, receipts, room from synapse.server import HomeServer +from synapse.storage.databases.main.registration import TokenLookupResult from synapse.types import JsonDict from synapse.util import Clock @@ -35,6 +36,7 @@ class HTTPPusherTests(HomeserverTestCase): login.register_servlets, receipts.register_servlets, push_rule.register_servlets, + pusher.register_servlets, ] user_id = True hijack_auth = False @@ -74,7 +76,7 @@ def test_invalid_configuration(self) -> None: def test_data(data: Optional[JsonDict]) -> None: self.get_failure( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", @@ -119,7 +121,7 @@ def test_sends_http(self) -> None: token_id = user_tuple.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", @@ -235,7 +237,7 @@ def test_sends_high_priority_for_encrypted(self) -> None: token_id = user_tuple.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", @@ -355,7 +357,7 @@ def test_sends_high_priority_for_one_to_one_only(self) -> None: token_id = user_tuple.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", @@ -442,7 +444,7 @@ def test_sends_high_priority_for_mention(self) -> None: token_id = user_tuple.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", @@ -520,7 +522,7 @@ def test_sends_high_priority_for_atroom(self) -> None: token_id = user_tuple.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", @@ -627,7 +629,7 @@ def _test_push_unread_count(self) -> None: token_id = user_tuple.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", @@ -731,18 +733,38 @@ def _send_read_request( ) self.assertEqual(channel.code, 200, channel.json_body) - def _make_user_with_pusher(self, username: str) -> Tuple[str, str]: + def _make_user_with_pusher( + self, username: str, enabled: bool = True + ) -> Tuple[str, str]: + """Registers a user and creates a pusher for them. + + Args: + username: the localpart of the new user's Matrix ID. + enabled: whether to create the pusher in an enabled or disabled state. + """ user_id = self.register_user(username, "pass") access_token = self.login(username, "pass") # Register the pusher + self._set_pusher(user_id, access_token, enabled) + + return user_id, access_token + + def _set_pusher(self, user_id: str, access_token: str, enabled: bool) -> None: + """Creates or updates the pusher for the given user. + + Args: + user_id: the user's Matrix ID. + access_token: the access token associated with the pusher. + enabled: whether to enable or disable the pusher. + """ user_tuple = self.get_success( self.hs.get_datastores().main.get_user_by_access_token(access_token) ) token_id = user_tuple.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", @@ -752,11 +774,11 @@ def _make_user_with_pusher(self, username: str) -> Tuple[str, str]: pushkey="a@example.com", lang=None, data={"url": "http://example.com/_matrix/push/v1/notify"}, + enabled=enabled, + device_id=user_tuple.device_id, ) ) - return user_id, access_token - def test_dont_notify_rule_overrides_message(self) -> None: """ The override push rule will suppress notification @@ -794,3 +816,148 @@ def test_dont_notify_rule_overrides_message(self) -> None: # The user sends a message back (sends a notification) self.helper.send(room, body="Hello", tok=access_token) self.assertEqual(len(self.push_attempts), 1) + + @override_config({"experimental_features": {"msc3881_enabled": True}}) + def test_disable(self) -> None: + """Tests that disabling a pusher means it's not pushed to anymore.""" + user_id, access_token = self._make_user_with_pusher("user") + other_user_id, other_access_token = self._make_user_with_pusher("otheruser") + + room = self.helper.create_room_as(user_id, tok=access_token) + self.helper.join(room=room, user=other_user_id, tok=other_access_token) + + # Send a message and check that it generated a push. + self.helper.send(room, body="Hi!", tok=other_access_token) + self.assertEqual(len(self.push_attempts), 1) + + # Disable the pusher. + self._set_pusher(user_id, access_token, enabled=False) + + # Send another message and check that it did not generate a push. + self.helper.send(room, body="Hi!", tok=other_access_token) + self.assertEqual(len(self.push_attempts), 1) + + # Get the pushers for the user and check that it is marked as disabled. + channel = self.make_request("GET", "/pushers", access_token=access_token) + self.assertEqual(channel.code, 200) + self.assertEqual(len(channel.json_body["pushers"]), 1) + + enabled = channel.json_body["pushers"][0]["org.matrix.msc3881.enabled"] + self.assertFalse(enabled) + self.assertTrue(isinstance(enabled, bool)) + + @override_config({"experimental_features": {"msc3881_enabled": True}}) + def test_enable(self) -> None: + """Tests that enabling a disabled pusher means it gets pushed to.""" + # Create the user with the pusher already disabled. + user_id, access_token = self._make_user_with_pusher("user", enabled=False) + other_user_id, other_access_token = self._make_user_with_pusher("otheruser") + + room = self.helper.create_room_as(user_id, tok=access_token) + self.helper.join(room=room, user=other_user_id, tok=other_access_token) + + # Send a message and check that it did not generate a push. + self.helper.send(room, body="Hi!", tok=other_access_token) + self.assertEqual(len(self.push_attempts), 0) + + # Enable the pusher. + self._set_pusher(user_id, access_token, enabled=True) + + # Send another message and check that it did generate a push. + self.helper.send(room, body="Hi!", tok=other_access_token) + self.assertEqual(len(self.push_attempts), 1) + + # Get the pushers for the user and check that it is marked as enabled. + channel = self.make_request("GET", "/pushers", access_token=access_token) + self.assertEqual(channel.code, 200) + self.assertEqual(len(channel.json_body["pushers"]), 1) + + enabled = channel.json_body["pushers"][0]["org.matrix.msc3881.enabled"] + self.assertTrue(enabled) + self.assertTrue(isinstance(enabled, bool)) + + @override_config({"experimental_features": {"msc3881_enabled": True}}) + def test_null_enabled(self) -> None: + """Tests that a pusher that has an 'enabled' column set to NULL (eg pushers + created before the column was introduced) is considered enabled. + """ + # We intentionally set 'enabled' to None so that it's stored as NULL in the + # database. + user_id, access_token = self._make_user_with_pusher("user", enabled=None) # type: ignore[arg-type] + + channel = self.make_request("GET", "/pushers", access_token=access_token) + self.assertEqual(channel.code, 200) + self.assertEqual(len(channel.json_body["pushers"]), 1) + self.assertTrue(channel.json_body["pushers"][0]["org.matrix.msc3881.enabled"]) + + def test_update_different_device_access_token_device_id(self) -> None: + """Tests that if we create a pusher from one device, the update it from another + device, the access token and device ID associated with the pusher stays the + same. + """ + # Create a user with a pusher. + user_id, access_token = self._make_user_with_pusher("user") + + # Get the token ID for the current access token, since that's what we store in + # the pushers table. Also get the device ID from it. + user_tuple = self.get_success( + self.hs.get_datastores().main.get_user_by_access_token(access_token) + ) + token_id = user_tuple.token_id + device_id = user_tuple.device_id + + # Generate a new access token, and update the pusher with it. + new_token = self.login("user", "pass") + self._set_pusher(user_id, new_token, enabled=False) + + # Get the current list of pushers for the user. + ret = self.get_success( + self.hs.get_datastores().main.get_pushers_by({"user_name": user_id}) + ) + pushers: List[PusherConfig] = list(ret) + + # Check that we still have one pusher, and that the access token and device ID + # associated with it didn't change. + self.assertEqual(len(pushers), 1) + self.assertEqual(pushers[0].access_token, token_id) + self.assertEqual(pushers[0].device_id, device_id) + + @override_config({"experimental_features": {"msc3881_enabled": True}}) + def test_device_id(self) -> None: + """Tests that a pusher created with a given device ID shows that device ID in + GET /pushers requests. + """ + self.register_user("user", "pass") + access_token = self.login("user", "pass") + + # We create the pusher with an HTTP request rather than with + # _make_user_with_pusher so that we can test the device ID is correctly set when + # creating a pusher via an API call. + self.make_request( + method="POST", + path="/pushers/set", + content={ + "kind": "http", + "app_id": "m.http", + "app_display_name": "HTTP Push Notifications", + "device_display_name": "pushy push", + "pushkey": "a@example.com", + "lang": "en", + "data": {"url": "http://example.com/_matrix/push/v1/notify"}, + }, + access_token=access_token, + ) + + # Look up the user info for the access token so we can compare the device ID. + lookup_result: TokenLookupResult = self.get_success( + self.hs.get_datastores().main.get_user_by_access_token(access_token) + ) + + # Get the user's devices and check it has the correct device ID. + channel = self.make_request("GET", "/pushers", access_token=access_token) + self.assertEqual(channel.code, 200) + self.assertEqual(len(channel.json_body["pushers"]), 1) + self.assertEqual( + channel.json_body["pushers"][0]["org.matrix.msc3881.device_id"], + lookup_result.device_id, + ) diff --git a/tests/push/test_push_rule_evaluator.py b/tests/push/test_push_rule_evaluator.py index 5d0fadf4fa29..49f5d2d578ee 100644 --- a/tests/push/test_push_rule_evaluator.py +++ b/tests/push/test_push_rule_evaluator.py @@ -19,16 +19,18 @@ from twisted.test.proto_helpers import MemoryReactor import synapse.rest.admin -from synapse.api.constants import EventTypes, Membership +from synapse.api.constants import EventTypes, HistoryVisibility, Membership from synapse.api.room_versions import RoomVersions from synapse.appservice import ApplicationService from synapse.events import FrozenEvent -from synapse.push import push_rule_evaluator -from synapse.push.push_rule_evaluator import PushRuleEvaluatorForEvent +from synapse.push.bulk_push_rule_evaluator import _flatten_dict +from synapse.push.httppusher import tweaks_for_actions +from synapse.rest import admin from synapse.rest.client import login, register, room from synapse.server import HomeServer from synapse.storage.databases.main.appservice import _make_exclusive_regex -from synapse.types import JsonDict +from synapse.synapse_rust.push import PushRuleEvaluator +from synapse.types import JsonDict, UserID from synapse.util import Clock from tests import unittest @@ -42,7 +44,7 @@ def _get_evaluator( related_event=None, relations: Optional[Dict[str, Set[Tuple[str, str]]]] = None, relations_match_enabled: bool = False, - ) -> PushRuleEvaluatorForEvent: + ) -> PushRuleEvaluator: event = FrozenEvent( { "event_id": "$event_id", @@ -57,13 +59,13 @@ def _get_evaluator( room_member_count = 0 sender_power_level = 0 power_levels: Dict[str, Union[int, Dict[str, int]]] = {} - return PushRuleEvaluatorForEvent( - event, + return PushRuleEvaluator( + _flatten_dict(event), room_member_count, sender_power_level, - power_levels, + power_levels.get("notifications", {}), related_event, - relations or set(), + relations or {}, relations_match_enabled, ) @@ -295,7 +297,7 @@ def test_tweaks_for_actions(self) -> None: ] self.assertEqual( - push_rule_evaluator.tweaks_for_actions(actions), + tweaks_for_actions(actions), {"sound": "default", "highlight": True}, ) @@ -377,9 +379,6 @@ def test_relation_match(self) -> None: evaluator = self._get_evaluator( {}, {"m.annotation": {("@user:test", "m.reaction")}} ) - condition = {"kind": "relation_match"} - # Oddly, an unknown condition always matches. - self.assertTrue(evaluator.matches(condition, "@user:test", "foo")) # A push rule evaluator with the experimental rule enabled. evaluator = self._get_evaluator( @@ -512,3 +511,80 @@ def test_ignore_appservice_users(self) -> None: ) self.assertEqual(len(users_with_push_actions), 0) + + +class BulkPushRuleEvaluatorTestCase(unittest.HomeserverTestCase): + servlets = [ + admin.register_servlets, + login.register_servlets, + room.register_servlets, + ] + + def prepare( + self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer + ) -> None: + self.main_store = homeserver.get_datastores().main + + self.user_id1 = self.register_user("user1", "password") + self.tok1 = self.login(self.user_id1, "password") + self.user_id2 = self.register_user("user2", "password") + self.tok2 = self.login(self.user_id2, "password") + + self.room_id = self.helper.create_room_as(tok=self.tok1) + + # We want to test history visibility works correctly. + self.helper.send_state( + self.room_id, + EventTypes.RoomHistoryVisibility, + {"history_visibility": HistoryVisibility.JOINED}, + tok=self.tok1, + ) + + def get_notif_count(self, user_id: str) -> int: + return self.get_success( + self.main_store.db_pool.simple_select_one_onecol( + table="event_push_actions", + keyvalues={"user_id": user_id}, + retcol="COALESCE(SUM(notif), 0)", + desc="get_staging_notif_count", + ) + ) + + def test_plain_message(self) -> None: + """Test that sending a normal message in a room will trigger a + notification + """ + + # Have user2 join the room and cle + self.helper.join(self.room_id, self.user_id2, tok=self.tok2) + + # They start off with no notifications, but get them when messages are + # sent. + self.assertEqual(self.get_notif_count(self.user_id2), 0) + + user1 = UserID.from_string(self.user_id1) + self.create_and_send_event(self.room_id, user1) + + self.assertEqual(self.get_notif_count(self.user_id2), 1) + + def test_delayed_message(self) -> None: + """Test that a delayed message that was from before a user joined + doesn't cause a notification for the joined user. + """ + user1 = UserID.from_string(self.user_id1) + + # Send a message before user2 joins + event_id1 = self.create_and_send_event(self.room_id, user1) + + # Have user2 join the room + self.helper.join(self.room_id, self.user_id2, tok=self.tok2) + + # They start off with no notifications + self.assertEqual(self.get_notif_count(self.user_id2), 0) + + # Send another message that references the event before the join to + # simulate a "delayed" event + self.create_and_send_event(self.room_id, user1, prev_event_ids=[event_id1]) + + # user2 should not be notified about it, because they can't see it. + self.assertEqual(self.get_notif_count(self.user_id2), 0) diff --git a/tests/replication/slave/storage/test_events.py b/tests/replication/slave/storage/test_events.py index 7497c15f915a..3b99f316f853 100644 --- a/tests/replication/slave/storage/test_events.py +++ b/tests/replication/slave/storage/test_events.py @@ -171,7 +171,7 @@ def test_push_actions_for_user(self, send_receipt: bool): if send_receipt: self.get_success( self.master_store.insert_receipt( - ROOM_ID, ReceiptTypes.READ, USER_ID_2, [event1.event_id], {} + ROOM_ID, ReceiptTypes.READ, USER_ID_2, [event1.event_id], None, {} ) ) diff --git a/tests/replication/tcp/streams/test_receipts.py b/tests/replication/tcp/streams/test_receipts.py index eb0011784518..ede6d0c11877 100644 --- a/tests/replication/tcp/streams/test_receipts.py +++ b/tests/replication/tcp/streams/test_receipts.py @@ -33,7 +33,12 @@ def test_receipt(self): # tell the master to send a new receipt self.get_success( self.hs.get_datastores().main.insert_receipt( - "!room:blue", "m.read", USER_ID, ["$event:blue"], {"a": 1} + "!room:blue", + "m.read", + USER_ID, + ["$event:blue"], + thread_id=None, + data={"a": 1}, ) ) self.replicate() @@ -48,6 +53,7 @@ def test_receipt(self): self.assertEqual("m.read", row.receipt_type) self.assertEqual(USER_ID, row.user_id) self.assertEqual("$event:blue", row.event_id) + self.assertIsNone(row.thread_id) self.assertEqual({"a": 1}, row.data) # Now let's disconnect and insert some data. @@ -57,7 +63,12 @@ def test_receipt(self): self.get_success( self.hs.get_datastores().main.insert_receipt( - "!room2:blue", "m.read", USER_ID, ["$event2:foo"], {"a": 2} + "!room2:blue", + "m.read", + USER_ID, + ["$event2:foo"], + thread_id=None, + data={"a": 2}, ) ) self.replicate() diff --git a/tests/replication/test_module_cache_invalidation.py b/tests/replication/test_module_cache_invalidation.py new file mode 100644 index 000000000000..b93cae67d3c4 --- /dev/null +++ b/tests/replication/test_module_cache_invalidation.py @@ -0,0 +1,79 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import logging + +import synapse +from synapse.module_api import cached + +from tests.replication._base import BaseMultiWorkerStreamTestCase + +logger = logging.getLogger(__name__) + +FIRST_VALUE = "one" +SECOND_VALUE = "two" + +KEY = "mykey" + + +class TestCache: + current_value = FIRST_VALUE + + @cached() + async def cached_function(self, user_id: str) -> str: + return self.current_value + + +class ModuleCacheInvalidationTestCase(BaseMultiWorkerStreamTestCase): + servlets = [ + synapse.rest.admin.register_servlets, + ] + + def test_module_cache_full_invalidation(self): + main_cache = TestCache() + self.hs.get_module_api().register_cached_function(main_cache.cached_function) + + worker_hs = self.make_worker_hs("synapse.app.generic_worker") + + worker_cache = TestCache() + worker_hs.get_module_api().register_cached_function( + worker_cache.cached_function + ) + + self.assertEqual(FIRST_VALUE, self.get_success(main_cache.cached_function(KEY))) + self.assertEqual( + FIRST_VALUE, self.get_success(worker_cache.cached_function(KEY)) + ) + + main_cache.current_value = SECOND_VALUE + worker_cache.current_value = SECOND_VALUE + # No invalidation yet, should return the cached value on both the main process and the worker + self.assertEqual(FIRST_VALUE, self.get_success(main_cache.cached_function(KEY))) + self.assertEqual( + FIRST_VALUE, self.get_success(worker_cache.cached_function(KEY)) + ) + + # Full invalidation on the main process, should be replicated on the worker that + # should returned the updated value too + self.get_success( + self.hs.get_module_api().invalidate_cache( + main_cache.cached_function, (KEY,) + ) + ) + + self.assertEqual( + SECOND_VALUE, self.get_success(main_cache.cached_function(KEY)) + ) + self.assertEqual( + SECOND_VALUE, self.get_success(worker_cache.cached_function(KEY)) + ) diff --git a/tests/replication/test_pusher_shard.py b/tests/replication/test_pusher_shard.py index 8f4f6688ce86..59fea93e490e 100644 --- a/tests/replication/test_pusher_shard.py +++ b/tests/replication/test_pusher_shard.py @@ -55,7 +55,7 @@ def _create_pusher_and_send_msg(self, localpart): token_id = user_dict.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=user_id, access_token=token_id, kind="http", diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index 9f536ceeb345..4c1ce33463a9 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -25,10 +25,10 @@ from twisted.test.proto_helpers import MemoryReactor import synapse.rest.admin -from synapse.api.constants import UserTypes +from synapse.api.constants import ApprovalNoticeMedium, LoginType, UserTypes from synapse.api.errors import Codes, HttpResponseException, ResourceLimitError from synapse.api.room_versions import RoomVersions -from synapse.rest.client import devices, login, logout, profile, room, sync +from synapse.rest.client import devices, login, logout, profile, register, room, sync from synapse.rest.media.v1.filepath import MediaFilePaths from synapse.server import HomeServer from synapse.types import JsonDict, UserID @@ -578,6 +578,16 @@ def _search_test( _search_test(None, "foo", "user_id") _search_test(None, "bar", "user_id") + @override_config( + { + "experimental_features": { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": True, + } + } + } + ) def test_invalid_parameter(self) -> None: """ If parameters are invalid, an error is returned. @@ -623,6 +633,16 @@ def test_invalid_parameter(self) -> None: self.assertEqual(400, channel.code, msg=channel.json_body) self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + # invalid approved + channel = self.make_request( + "GET", + self.url + "?approved=not_bool", + access_token=self.admin_user_tok, + ) + + self.assertEqual(400, channel.code, msg=channel.json_body) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + # unkown order_by channel = self.make_request( "GET", @@ -841,6 +861,69 @@ def test_order_by(self) -> None: self._order_test([self.admin_user, user1, user2], "creation_ts", "f") self._order_test([user2, user1, self.admin_user], "creation_ts", "b") + @override_config( + { + "experimental_features": { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": True, + } + } + } + ) + def test_filter_out_approved(self) -> None: + """Tests that the endpoint can filter out approved users.""" + # Create our users. + self._create_users(2) + + # Get the list of users. + channel = self.make_request( + "GET", + self.url, + access_token=self.admin_user_tok, + ) + self.assertEqual(200, channel.code, channel.result) + + # Exclude the admin, because we don't want to accidentally un-approve the admin. + non_admin_user_ids = [ + user["name"] + for user in channel.json_body["users"] + if user["name"] != self.admin_user + ] + + self.assertEqual(2, len(non_admin_user_ids), non_admin_user_ids) + + # Select a user and un-approve them. We do this rather than the other way around + # because, since these users are created by an admin, we consider them already + # approved. + not_approved_user = non_admin_user_ids[0] + + channel = self.make_request( + "PUT", + f"/_synapse/admin/v2/users/{not_approved_user}", + {"approved": False}, + access_token=self.admin_user_tok, + ) + self.assertEqual(200, channel.code, channel.result) + + # Now get the list of users again, this time filtering out approved users. + channel = self.make_request( + "GET", + self.url + "?approved=false", + access_token=self.admin_user_tok, + ) + self.assertEqual(200, channel.code, channel.result) + + non_admin_user_ids = [ + user["name"] + for user in channel.json_body["users"] + if user["name"] != self.admin_user + ] + + # We should only have our unapproved user now. + self.assertEqual(1, len(non_admin_user_ids), non_admin_user_ids) + self.assertEqual(not_approved_user, non_admin_user_ids[0]) + def _order_test( self, expected_user_list: List[str], @@ -1272,6 +1355,7 @@ class UserRestTestCase(unittest.HomeserverTestCase): synapse.rest.admin.register_servlets, login.register_servlets, sync.register_servlets, + register.register_servlets, ] def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: @@ -2536,6 +2620,104 @@ def test_accidental_deactivation_prevention(self) -> None: # Ensure they're still alive self.assertEqual(0, channel.json_body["deactivated"]) + @override_config( + { + "experimental_features": { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": True, + } + } + } + ) + def test_approve_account(self) -> None: + """Tests that approving an account correctly sets the approved flag for the user.""" + url = self.url_prefix % "@bob:test" + + # Create the user using the client-server API since otherwise the user will be + # marked as approved automatically. + channel = self.make_request( + "POST", + "register", + { + "username": "bob", + "password": "test", + "auth": {"type": LoginType.DUMMY}, + }, + ) + self.assertEqual(403, channel.code, channel.result) + self.assertEqual(Codes.USER_AWAITING_APPROVAL, channel.json_body["errcode"]) + self.assertEqual( + ApprovalNoticeMedium.NONE, channel.json_body["approval_notice_medium"] + ) + + # Get user + channel = self.make_request( + "GET", + url, + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertIs(False, channel.json_body["approved"]) + + # Approve user + channel = self.make_request( + "PUT", + url, + access_token=self.admin_user_tok, + content={"approved": True}, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertIs(True, channel.json_body["approved"]) + + # Check that the user is now approved + channel = self.make_request( + "GET", + url, + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertIs(True, channel.json_body["approved"]) + + @override_config( + { + "experimental_features": { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": True, + } + } + } + ) + def test_register_approved(self) -> None: + url = self.url_prefix % "@bob:test" + + # Create user + channel = self.make_request( + "PUT", + url, + access_token=self.admin_user_tok, + content={"password": "abc123", "approved": True}, + ) + + self.assertEqual(201, channel.code, msg=channel.json_body) + self.assertEqual("@bob:test", channel.json_body["name"]) + self.assertEqual(1, channel.json_body["approved"]) + + # Get user + channel = self.make_request( + "GET", + url, + access_token=self.admin_user_tok, + ) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual("@bob:test", channel.json_body["name"]) + self.assertEqual(1, channel.json_body["approved"]) + def _is_erased(self, user_id: str, expect: bool) -> None: """Assert that the user is erased or not""" d = self.store.is_user_erased(user_id) @@ -2839,7 +3021,7 @@ def test_get_pushers(self) -> None: token_id = user_tuple.token_id self.get_success( - self.hs.get_pusherpool().add_pusher( + self.hs.get_pusherpool().add_or_update_pusher( user_id=self.other_user, access_token=token_id, kind="http", diff --git a/tests/rest/client/test_auth.py b/tests/rest/client/test_auth.py index 05355c7fb6d8..090cef5216de 100644 --- a/tests/rest/client/test_auth.py +++ b/tests/rest/client/test_auth.py @@ -20,7 +20,8 @@ from twisted.web.resource import Resource import synapse.rest.admin -from synapse.api.constants import LoginType +from synapse.api.constants import ApprovalNoticeMedium, LoginType +from synapse.api.errors import Codes from synapse.handlers.ui_auth.checkers import UserInteractiveAuthChecker from synapse.rest.client import account, auth, devices, login, logout, register from synapse.rest.synapse.client import build_synapse_client_resource_tree @@ -567,6 +568,36 @@ def test_ui_auth_fails_for_incorrect_sso_user(self) -> None: body={"auth": {"session": session_id}}, ) + @skip_unless(HAS_OIDC, "requires OIDC") + @override_config( + { + "oidc_config": TEST_OIDC_CONFIG, + "experimental_features": { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": True, + } + }, + } + ) + def test_sso_not_approved(self) -> None: + """Tests that if we register a user via SSO while requiring approval for new + accounts, we still raise the correct error before logging the user in. + """ + login_resp = self.helper.login_via_oidc("username", expected_status=403) + + self.assertEqual(login_resp["errcode"], Codes.USER_AWAITING_APPROVAL) + self.assertEqual( + ApprovalNoticeMedium.NONE, login_resp["approval_notice_medium"] + ) + + # Check that we didn't register a device for the user during the login attempt. + devices = self.get_success( + self.hs.get_datastores().main.get_devices_by_user("@username:test") + ) + + self.assertEqual(len(devices), 0) + class RefreshAuthTests(unittest.HomeserverTestCase): servlets = [ diff --git a/tests/rest/client/test_login.py b/tests/rest/client/test_login.py index e2a4d982755a..e801ba8c8b7e 100644 --- a/tests/rest/client/test_login.py +++ b/tests/rest/client/test_login.py @@ -23,6 +23,8 @@ from twisted.web.resource import Resource import synapse.rest.admin +from synapse.api.constants import ApprovalNoticeMedium, LoginType +from synapse.api.errors import Codes from synapse.appservice import ApplicationService from synapse.rest.client import devices, login, logout, register from synapse.rest.client.account import WhoamiRestServlet @@ -94,6 +96,7 @@ class LoginRestServletTestCase(unittest.HomeserverTestCase): logout.register_servlets, devices.register_servlets, lambda hs, http_server: WhoamiRestServlet(hs).register(http_server), + register.register_servlets, ] def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: @@ -406,6 +409,44 @@ def test_login_with_overly_long_device_id_fails(self) -> None: self.assertEqual(channel.code, 400) self.assertEqual(channel.json_body["errcode"], "M_INVALID_PARAM") + @override_config( + { + "experimental_features": { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": True, + } + } + } + ) + def test_require_approval(self) -> None: + channel = self.make_request( + "POST", + "register", + { + "username": "kermit", + "password": "monkey", + "auth": {"type": LoginType.DUMMY}, + }, + ) + self.assertEqual(403, channel.code, channel.result) + self.assertEqual(Codes.USER_AWAITING_APPROVAL, channel.json_body["errcode"]) + self.assertEqual( + ApprovalNoticeMedium.NONE, channel.json_body["approval_notice_medium"] + ) + + params = { + "type": LoginType.PASSWORD, + "identifier": {"type": "m.id.user", "user": "kermit"}, + "password": "monkey", + } + channel = self.make_request("POST", LOGIN_URL, params) + self.assertEqual(403, channel.code, channel.result) + self.assertEqual(Codes.USER_AWAITING_APPROVAL, channel.json_body["errcode"]) + self.assertEqual( + ApprovalNoticeMedium.NONE, channel.json_body["approval_notice_medium"] + ) + @skip_unless(has_saml2 and HAS_OIDC, "Requires SAML2 and OIDC") class MultiSSOTestCase(unittest.HomeserverTestCase): diff --git a/tests/rest/client/test_login_token_request.py b/tests/rest/client/test_login_token_request.py new file mode 100644 index 000000000000..c2e1e08811d2 --- /dev/null +++ b/tests/rest/client/test_login_token_request.py @@ -0,0 +1,134 @@ +# Copyright 2022 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.test.proto_helpers import MemoryReactor + +from synapse.rest import admin +from synapse.rest.client import login, login_token_request +from synapse.server import HomeServer +from synapse.util import Clock + +from tests import unittest +from tests.unittest import override_config + +endpoint = "/_matrix/client/unstable/org.matrix.msc3882/login/token" + + +class LoginTokenRequestServletTestCase(unittest.HomeserverTestCase): + + servlets = [ + login.register_servlets, + admin.register_servlets, + login_token_request.register_servlets, + ] + + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: + self.hs = self.setup_test_homeserver() + self.hs.config.registration.enable_registration = True + self.hs.config.registration.registrations_require_3pid = [] + self.hs.config.registration.auto_join_rooms = [] + self.hs.config.captcha.enable_registration_captcha = False + + return self.hs + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.user = "user123" + self.password = "password" + + def test_disabled(self) -> None: + channel = self.make_request("POST", endpoint, {}, access_token=None) + self.assertEqual(channel.code, 400) + + self.register_user(self.user, self.password) + token = self.login(self.user, self.password) + + channel = self.make_request("POST", endpoint, {}, access_token=token) + self.assertEqual(channel.code, 400) + + @override_config({"experimental_features": {"msc3882_enabled": True}}) + def test_require_auth(self) -> None: + channel = self.make_request("POST", endpoint, {}, access_token=None) + self.assertEqual(channel.code, 401) + + @override_config({"experimental_features": {"msc3882_enabled": True}}) + def test_uia_on(self) -> None: + user_id = self.register_user(self.user, self.password) + token = self.login(self.user, self.password) + + channel = self.make_request("POST", endpoint, {}, access_token=token) + self.assertEqual(channel.code, 401) + self.assertIn({"stages": ["m.login.password"]}, channel.json_body["flows"]) + + session = channel.json_body["session"] + + uia = { + "auth": { + "type": "m.login.password", + "identifier": {"type": "m.id.user", "user": self.user}, + "password": self.password, + "session": session, + }, + } + + channel = self.make_request("POST", endpoint, uia, access_token=token) + self.assertEqual(channel.code, 200) + self.assertEqual(channel.json_body["expires_in"], 300) + + login_token = channel.json_body["login_token"] + + channel = self.make_request( + "POST", + "/login", + content={"type": "m.login.token", "token": login_token}, + ) + self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.json_body["user_id"], user_id) + + @override_config( + {"experimental_features": {"msc3882_enabled": True, "msc3882_ui_auth": False}} + ) + def test_uia_off(self) -> None: + user_id = self.register_user(self.user, self.password) + token = self.login(self.user, self.password) + + channel = self.make_request("POST", endpoint, {}, access_token=token) + self.assertEqual(channel.code, 200) + self.assertEqual(channel.json_body["expires_in"], 300) + + login_token = channel.json_body["login_token"] + + channel = self.make_request( + "POST", + "/login", + content={"type": "m.login.token", "token": login_token}, + ) + self.assertEqual(channel.code, 200, channel.result) + self.assertEqual(channel.json_body["user_id"], user_id) + + @override_config( + { + "experimental_features": { + "msc3882_enabled": True, + "msc3882_ui_auth": False, + "msc3882_token_timeout": "15s", + } + } + ) + def test_expires_in(self) -> None: + self.register_user(self.user, self.password) + token = self.login(self.user, self.password) + + channel = self.make_request("POST", endpoint, {}, access_token=token) + self.assertEqual(channel.code, 200) + self.assertEqual(channel.json_body["expires_in"], 15) diff --git a/tests/rest/client/test_register.py b/tests/rest/client/test_register.py index b781875d5295..11cf3939d84b 100644 --- a/tests/rest/client/test_register.py +++ b/tests/rest/client/test_register.py @@ -22,7 +22,11 @@ from twisted.test.proto_helpers import MemoryReactor import synapse.rest.admin -from synapse.api.constants import APP_SERVICE_REGISTRATION_TYPE, LoginType +from synapse.api.constants import ( + APP_SERVICE_REGISTRATION_TYPE, + ApprovalNoticeMedium, + LoginType, +) from synapse.api.errors import Codes from synapse.appservice import ApplicationService from synapse.rest.client import account, account_validity, login, logout, register, sync @@ -765,6 +769,32 @@ def test_inhibit_user_in_use_error(self) -> None: self.assertEqual(channel.code, 400, channel.json_body) self.assertEqual(channel.json_body["errcode"], Codes.USER_IN_USE) + @override_config( + { + "experimental_features": { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": True, + } + } + } + ) + def test_require_approval(self) -> None: + channel = self.make_request( + "POST", + "register", + { + "username": "kermit", + "password": "monkey", + "auth": {"type": LoginType.DUMMY}, + }, + ) + self.assertEqual(403, channel.code, channel.result) + self.assertEqual(Codes.USER_AWAITING_APPROVAL, channel.json_body["errcode"]) + self.assertEqual( + ApprovalNoticeMedium.NONE, channel.json_body["approval_notice_medium"] + ) + class AccountValidityTestCase(unittest.HomeserverTestCase): diff --git a/tests/rest/client/test_relations.py b/tests/rest/client/test_relations.py index 651f4f415d1d..988cdb746d0d 100644 --- a/tests/rest/client/test_relations.py +++ b/tests/rest/client/test_relations.py @@ -654,6 +654,14 @@ def test_unknown_relations(self) -> None: ) # We also expect to get the original event (the id of which is self.parent_id) + # when requesting the unstable endpoint. + self.assertNotIn("original_event", channel.json_body) + channel = self.make_request( + "GET", + f"/_matrix/client/unstable/rooms/{self.room}/relations/{self.parent_id}?limit=1", + access_token=self.user_token, + ) + self.assertEqual(200, channel.code, channel.json_body) self.assertEqual( channel.json_body["original_event"]["event_id"], self.parent_id ) @@ -728,7 +736,6 @@ def test_background_update(self) -> None: class RelationPaginationTestCase(BaseRelationsTestCase): - @unittest.override_config({"experimental_features": {"msc3715_enabled": True}}) def test_basic_paginate_relations(self) -> None: """Tests that calling pagination API correctly the latest relations.""" channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "a") @@ -756,11 +763,6 @@ def test_basic_paginate_relations(self) -> None: channel.json_body["chunk"][0], ) - # We also expect to get the original event (the id of which is self.parent_id) - self.assertEqual( - channel.json_body["original_event"]["event_id"], self.parent_id - ) - # Make sure next_batch has something in it that looks like it could be a # valid token. self.assertIsInstance( @@ -771,7 +773,7 @@ def test_basic_paginate_relations(self) -> None: channel = self.make_request( "GET", f"/_matrix/client/v1/rooms/{self.room}/relations" - f"/{self.parent_id}?limit=1&org.matrix.msc3715.dir=f", + f"/{self.parent_id}?limit=1&dir=f", access_token=self.user_token, ) self.assertEqual(200, channel.code, channel.json_body) @@ -809,7 +811,7 @@ def test_repeated_paginate_relations(self) -> None: channel = self.make_request( "GET", - f"/_matrix/client/v1/rooms/{self.room}/relations/{self.parent_id}?limit=1{from_token}", + f"/_matrix/client/v1/rooms/{self.room}/relations/{self.parent_id}?limit=3{from_token}", access_token=self.user_token, ) self.assertEqual(200, channel.code, channel.json_body) @@ -827,6 +829,32 @@ def test_repeated_paginate_relations(self) -> None: found_event_ids.reverse() self.assertEqual(found_event_ids, expected_event_ids) + # Test forward pagination. + prev_token = "" + found_event_ids = [] + for _ in range(20): + from_token = "" + if prev_token: + from_token = "&from=" + prev_token + + channel = self.make_request( + "GET", + f"/_matrix/client/v1/rooms/{self.room}/relations/{self.parent_id}?dir=f&limit=3{from_token}", + access_token=self.user_token, + ) + self.assertEqual(200, channel.code, channel.json_body) + + found_event_ids.extend(e["event_id"] for e in channel.json_body["chunk"]) + next_batch = channel.json_body.get("next_batch") + + self.assertNotEqual(prev_token, next_batch) + prev_token = next_batch + + if not prev_token: + break + + self.assertEqual(found_event_ids, expected_event_ids) + def test_pagination_from_sync_and_messages(self) -> None: """Pagination tokens from /sync and /messages can be used to paginate /relations.""" channel = self._send_relation(RelationTypes.ANNOTATION, "m.reaction", "A") diff --git a/tests/rest/client/test_rooms.py b/tests/rest/client/test_rooms.py index c7eb88d33f3d..5e66b5b26cbc 100644 --- a/tests/rest/client/test_rooms.py +++ b/tests/rest/client/test_rooms.py @@ -20,7 +20,7 @@ import json from http import HTTPStatus from typing import Any, Dict, Iterable, List, Optional, Tuple, Union -from unittest.mock import Mock, call +from unittest.mock import Mock, call, patch from urllib import parse as urlparse from parameterized import param, parameterized @@ -39,9 +39,10 @@ RoomTypes, ) from synapse.api.errors import Codes, HttpResponseException +from synapse.appservice import ApplicationService from synapse.handlers.pagination import PurgeStatus from synapse.rest import admin -from synapse.rest.client import account, directory, login, profile, room, sync +from synapse.rest.client import account, directory, login, profile, register, room, sync from synapse.server import HomeServer from synapse.types import JsonDict, RoomAlias, UserID, create_requester from synapse.util import Clock @@ -710,7 +711,7 @@ def test_post_room_no_keys(self) -> None: self.assertEqual(HTTPStatus.OK, channel.code, channel.result) self.assertTrue("room_id" in channel.json_body) assert channel.resource_usage is not None - self.assertEqual(44, channel.resource_usage.db_txn_count) + self.assertEqual(34, channel.resource_usage.db_txn_count) def test_post_room_initial_state(self) -> None: # POST with initial_state config key, expect new room id @@ -723,7 +724,7 @@ def test_post_room_initial_state(self) -> None: self.assertEqual(HTTPStatus.OK, channel.code, channel.result) self.assertTrue("room_id" in channel.json_body) assert channel.resource_usage is not None - self.assertEqual(50, channel.resource_usage.db_txn_count) + self.assertEqual(37, channel.resource_usage.db_txn_count) def test_post_room_visibility_key(self) -> None: # POST with visibility config key, expect new room id @@ -1252,6 +1253,120 @@ async def user_may_join_room( ) +class RoomAppserviceTsParamTestCase(unittest.HomeserverTestCase): + servlets = [ + room.register_servlets, + synapse.rest.admin.register_servlets, + register.register_servlets, + ] + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.appservice_user, _ = self.register_appservice_user( + "as_user_potato", self.appservice.token + ) + + # Create a room as the appservice user. + args = { + "access_token": self.appservice.token, + "user_id": self.appservice_user, + } + channel = self.make_request( + "POST", + f"/_matrix/client/r0/createRoom?{urlparse.urlencode(args)}", + content={"visibility": "public"}, + ) + + assert channel.code == 200 + self.room = channel.json_body["room_id"] + + self.main_store = self.hs.get_datastores().main + + def make_homeserver(self, reactor: MemoryReactor, clock: Clock) -> HomeServer: + config = self.default_config() + + self.appservice = ApplicationService( + token="i_am_an_app_service", + id="1234", + namespaces={"users": [{"regex": r"@as_user.*", "exclusive": True}]}, + # Note: this user does not have to match the regex above + sender="@as_main:test", + ) + + mock_load_appservices = Mock(return_value=[self.appservice]) + with patch( + "synapse.storage.databases.main.appservice.load_appservices", + mock_load_appservices, + ): + hs = self.setup_test_homeserver(config=config) + return hs + + def test_send_event_ts(self) -> None: + """Test sending a non-state event with a custom timestamp.""" + ts = 1 + + url_params = { + "user_id": self.appservice_user, + "ts": ts, + } + channel = self.make_request( + "PUT", + path=f"/_matrix/client/r0/rooms/{self.room}/send/m.room.message/1234?" + + urlparse.urlencode(url_params), + content={"body": "test", "msgtype": "m.text"}, + access_token=self.appservice.token, + ) + self.assertEqual(channel.code, 200, channel.json_body) + event_id = channel.json_body["event_id"] + + # Ensure the event was persisted with the correct timestamp. + res = self.get_success(self.main_store.get_event(event_id)) + self.assertEquals(ts, res.origin_server_ts) + + def test_send_state_event_ts(self) -> None: + """Test sending a state event with a custom timestamp.""" + ts = 1 + + url_params = { + "user_id": self.appservice_user, + "ts": ts, + } + channel = self.make_request( + "PUT", + path=f"/_matrix/client/r0/rooms/{self.room}/state/m.room.name?" + + urlparse.urlencode(url_params), + content={"name": "test"}, + access_token=self.appservice.token, + ) + self.assertEqual(channel.code, 200, channel.json_body) + event_id = channel.json_body["event_id"] + + # Ensure the event was persisted with the correct timestamp. + res = self.get_success(self.main_store.get_event(event_id)) + self.assertEquals(ts, res.origin_server_ts) + + def test_send_membership_event_ts(self) -> None: + """Test sending a membership event with a custom timestamp.""" + ts = 1 + + url_params = { + "user_id": self.appservice_user, + "ts": ts, + } + channel = self.make_request( + "PUT", + path=f"/_matrix/client/r0/rooms/{self.room}/state/m.room.member/{self.appservice_user}?" + + urlparse.urlencode(url_params), + content={"membership": "join", "display_name": "test"}, + access_token=self.appservice.token, + ) + self.assertEqual(channel.code, 200, channel.json_body) + event_id = channel.json_body["event_id"] + + # Ensure the event was persisted with the correct timestamp. + res = self.get_success(self.main_store.get_event(event_id)) + self.assertEquals(ts, res.origin_server_ts) + + class RoomJoinRatelimitTestCase(RoomBase): user_id = "@sid1:red" diff --git a/tests/rest/client/utils.py b/tests/rest/client/utils.py index dd26145bf8c1..c249a42bb641 100644 --- a/tests/rest/client/utils.py +++ b/tests/rest/client/utils.py @@ -543,8 +543,12 @@ def upload_media( return channel.json_body - def login_via_oidc(self, remote_user_id: str) -> JsonDict: - """Log in (as a new user) via OIDC + def login_via_oidc( + self, + remote_user_id: str, + expected_status: int = 200, + ) -> JsonDict: + """Log in via OIDC Returns the result of the final token login. @@ -578,7 +582,9 @@ def login_via_oidc(self, remote_user_id: str) -> JsonDict: "/login", content={"type": "m.login.token", "token": login_token}, ) - assert channel.code == HTTPStatus.OK + assert ( + channel.code == expected_status + ), f"unexpected status in response: {channel.code}" return channel.json_body def auth_via_oidc( diff --git a/tests/storage/databases/main/test_events_worker.py b/tests/storage/databases/main/test_events_worker.py index 67401272ac37..32a798d74bca 100644 --- a/tests/storage/databases/main/test_events_worker.py +++ b/tests/storage/databases/main/test_events_worker.py @@ -35,66 +35,45 @@ from synapse.util.async_helpers import yieldable_gather_results from tests import unittest +from tests.test_utils.event_injection import create_event, inject_event class HaveSeenEventsTestCase(unittest.HomeserverTestCase): + servlets = [ + admin.register_servlets, + room.register_servlets, + login.register_servlets, + ] + def prepare(self, reactor, clock, hs): + self.hs = hs self.store: EventsWorkerStore = hs.get_datastores().main - # insert some test data - for rid in ("room1", "room2"): - self.get_success( - self.store.db_pool.simple_insert( - "rooms", - {"room_id": rid, "room_version": 4}, - ) - ) + self.user = self.register_user("user", "pass") + self.token = self.login(self.user, "pass") + self.room_id = self.helper.create_room_as(self.user, tok=self.token) self.event_ids: List[str] = [] - for idx, rid in enumerate( - ( - "room1", - "room1", - "room1", - "room2", - ) - ): - event_json = {"type": f"test {idx}", "room_id": rid} - event = make_event_from_dict(event_json, room_version=RoomVersions.V4) - event_id = event.event_id - - self.get_success( - self.store.db_pool.simple_insert( - "events", - { - "event_id": event_id, - "room_id": rid, - "topological_ordering": idx, - "stream_ordering": idx, - "type": event.type, - "processed": True, - "outlier": False, - }, + for i in range(3): + event = self.get_success( + inject_event( + hs, + room_version=RoomVersions.V7.identifier, + room_id=self.room_id, + sender=self.user, + type="test_event_type", + content={"body": f"foobarbaz{i}"}, ) ) - self.get_success( - self.store.db_pool.simple_insert( - "event_json", - { - "event_id": event_id, - "room_id": rid, - "json": json.dumps(event_json), - "internal_metadata": "{}", - "format_version": 3, - }, - ) - ) - self.event_ids.append(event_id) + + self.event_ids.append(event.event_id) def test_simple(self): with LoggingContext(name="test") as ctx: res = self.get_success( - self.store.have_seen_events("room1", [self.event_ids[0], "event19"]) + self.store.have_seen_events( + self.room_id, [self.event_ids[0], "eventdoesnotexist"] + ) ) self.assertEqual(res, {self.event_ids[0]}) @@ -104,7 +83,9 @@ def test_simple(self): # a second lookup of the same events should cause no queries with LoggingContext(name="test") as ctx: res = self.get_success( - self.store.have_seen_events("room1", [self.event_ids[0], "event19"]) + self.store.have_seen_events( + self.room_id, [self.event_ids[0], "eventdoesnotexist"] + ) ) self.assertEqual(res, {self.event_ids[0]}) self.assertEqual(ctx.get_resource_usage().db_txn_count, 0) @@ -116,11 +97,86 @@ def test_query_via_event_cache(self): # looking it up should now cause no db hits with LoggingContext(name="test") as ctx: res = self.get_success( - self.store.have_seen_events("room1", [self.event_ids[0]]) + self.store.have_seen_events(self.room_id, [self.event_ids[0]]) ) self.assertEqual(res, {self.event_ids[0]}) self.assertEqual(ctx.get_resource_usage().db_txn_count, 0) + def test_persisting_event_invalidates_cache(self): + """ + Test to make sure that the `have_seen_event` cache + is invalidated after we persist an event and returns + the updated value. + """ + event, event_context = self.get_success( + create_event( + self.hs, + room_id=self.room_id, + sender=self.user, + type="test_event_type", + content={"body": "garply"}, + ) + ) + + with LoggingContext(name="test") as ctx: + # First, check `have_seen_event` for an event we have not seen yet + # to prime the cache with a `false` value. + res = self.get_success( + self.store.have_seen_events(event.room_id, [event.event_id]) + ) + self.assertEqual(res, set()) + + # That should result in a single db query to lookup + self.assertEqual(ctx.get_resource_usage().db_txn_count, 1) + + # Persist the event which should invalidate or prefill the + # `have_seen_event` cache so we don't return stale values. + persistence = self.hs.get_storage_controllers().persistence + self.get_success( + persistence.persist_event( + event, + event_context, + ) + ) + + with LoggingContext(name="test") as ctx: + # Check `have_seen_event` again and we should see the updated fact + # that we have now seen the event after persisting it. + res = self.get_success( + self.store.have_seen_events(event.room_id, [event.event_id]) + ) + self.assertEqual(res, {event.event_id}) + + # That should result in a single db query to lookup + self.assertEqual(ctx.get_resource_usage().db_txn_count, 1) + + def test_invalidate_cache_by_room_id(self): + """ + Test to make sure that all events associated with the given `(room_id,)` + are invalidated in the `have_seen_event` cache. + """ + with LoggingContext(name="test") as ctx: + # Prime the cache with some values + res = self.get_success( + self.store.have_seen_events(self.room_id, self.event_ids) + ) + self.assertEqual(res, set(self.event_ids)) + + # That should result in a single db query to lookup + self.assertEqual(ctx.get_resource_usage().db_txn_count, 1) + + # Clear the cache with any events associated with the `room_id` + self.store.have_seen_event.invalidate((self.room_id,)) + + with LoggingContext(name="test") as ctx: + res = self.get_success( + self.store.have_seen_events(self.room_id, self.event_ids) + ) + self.assertEqual(res, set(self.event_ids)) + + # Since we cleared the cache, it should result in another db query to lookup + self.assertEqual(ctx.get_resource_usage().db_txn_count, 1) + class EventCacheTestCase(unittest.HomeserverTestCase): """Test that the various layers of event cache works.""" diff --git a/tests/storage/test_event_chain.py b/tests/storage/test_event_chain.py index a0ce077a9957..de9f4af2de90 100644 --- a/tests/storage/test_event_chain.py +++ b/tests/storage/test_event_chain.py @@ -531,7 +531,9 @@ def _generate_room(self) -> Tuple[str, List[Set[str]]]: ) ) self.get_success( - event_handler.handle_new_client_event(self.requester, event, context) + event_handler.handle_new_client_event( + self.requester, events_and_context=[(event, context)] + ) ) state1 = set(self.get_success(context.get_current_state_ids()).values()) @@ -549,7 +551,9 @@ def _generate_room(self) -> Tuple[str, List[Set[str]]]: ) ) self.get_success( - event_handler.handle_new_client_event(self.requester, event, context) + event_handler.handle_new_client_event( + self.requester, events_and_context=[(event, context)] + ) ) state2 = set(self.get_success(context.get_current_state_ids()).values()) diff --git a/tests/storage/test_event_federation.py b/tests/storage/test_event_federation.py index a6679e131201..59b891090726 100644 --- a/tests/storage/test_event_federation.py +++ b/tests/storage/test_event_federation.py @@ -12,25 +12,38 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Tuple, Union +import datetime +from typing import Dict, List, Tuple, Union import attr from parameterized import parameterized +from twisted.test.proto_helpers import MemoryReactor + +from synapse.api.constants import EventTypes from synapse.api.room_versions import ( KNOWN_ROOM_VERSIONS, EventFormatVersions, RoomVersion, ) from synapse.events import _EventInternalMetadata -from synapse.util import json_encoder +from synapse.server import HomeServer +from synapse.storage.database import LoggingTransaction +from synapse.types import JsonDict +from synapse.util import Clock, json_encoder import tests.unittest import tests.utils +@attr.s(auto_attribs=True, frozen=True, slots=True) +class _BackfillSetupInfo: + room_id: str + depth_map: Dict[str, int] + + class EventFederationWorkerStoreTestCase(tests.unittest.HomeserverTestCase): - def prepare(self, reactor, clock, hs): + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.store = hs.get_datastores().main def test_get_prev_events_for_room(self): @@ -571,11 +584,544 @@ def prev_event_format(prev_event_id: str) -> Union[Tuple[str, dict], str]: ) self.assertEqual(count, 1) - _, event_id = self.get_success( + next_staged_event_info = self.get_success( self.store.get_next_staged_event_id_for_room(room_id) ) + assert next_staged_event_info + _, event_id = next_staged_event_info self.assertEqual(event_id, "$fake_event_id_500") + def _setup_room_for_backfill_tests(self) -> _BackfillSetupInfo: + """ + Sets up a room with various events and backward extremities to test + backfill functions against. + + Returns: + _BackfillSetupInfo including the `room_id` to test against and + `depth_map` of events in the room + """ + room_id = "!backfill-room-test:some-host" + + # The silly graph we use to test grabbing backward extremities, + # where the top is the oldest events. + # 1 (oldest) + # | + # 2 ⹁ + # | \ + # | [b1, b2, b3] + # | | + # | A + # | / + # 3 { + # | \ + # | [b4, b5, b6] + # | | + # | B + # | / + # 4 ´ + # | + # 5 (newest) + + event_graph: Dict[str, List[str]] = { + "1": [], + "2": ["1"], + "3": ["2", "A"], + "4": ["3", "B"], + "5": ["4"], + "A": ["b1", "b2", "b3"], + "b1": ["2"], + "b2": ["2"], + "b3": ["2"], + "B": ["b4", "b5", "b6"], + "b4": ["3"], + "b5": ["3"], + "b6": ["3"], + } + + depth_map: Dict[str, int] = { + "1": 1, + "2": 2, + "b1": 3, + "b2": 3, + "b3": 3, + "A": 4, + "3": 5, + "b4": 6, + "b5": 6, + "b6": 6, + "B": 7, + "4": 8, + "5": 9, + } + + # The events we have persisted on our server. + # The rest are events in the room but not backfilled tet. + our_server_events = {"5", "4", "B", "3", "A"} + + complete_event_dict_map: Dict[str, JsonDict] = {} + stream_ordering = 0 + for (event_id, prev_event_ids) in event_graph.items(): + depth = depth_map[event_id] + + complete_event_dict_map[event_id] = { + "event_id": event_id, + "type": "test_regular_type", + "room_id": room_id, + "sender": "@sender", + "prev_event_ids": prev_event_ids, + "auth_event_ids": [], + "origin_server_ts": stream_ordering, + "depth": depth, + "stream_ordering": stream_ordering, + "content": {"body": "event" + event_id}, + } + + stream_ordering += 1 + + def populate_db(txn: LoggingTransaction): + # Insert the room to satisfy the foreign key constraint of + # `event_failed_pull_attempts` + self.store.db_pool.simple_insert_txn( + txn, + "rooms", + { + "room_id": room_id, + "creator": "room_creator_user_id", + "is_public": True, + "room_version": "6", + }, + ) + + # Insert our server events + for event_id in our_server_events: + event_dict = complete_event_dict_map[event_id] + + self.store.db_pool.simple_insert_txn( + txn, + table="events", + values={ + "event_id": event_dict.get("event_id"), + "type": event_dict.get("type"), + "room_id": event_dict.get("room_id"), + "depth": event_dict.get("depth"), + "topological_ordering": event_dict.get("depth"), + "stream_ordering": event_dict.get("stream_ordering"), + "processed": True, + "outlier": False, + }, + ) + + # Insert the event edges + for event_id in our_server_events: + for prev_event_id in event_graph[event_id]: + self.store.db_pool.simple_insert_txn( + txn, + table="event_edges", + values={ + "event_id": event_id, + "prev_event_id": prev_event_id, + "room_id": room_id, + }, + ) + + # Insert the backward extremities + prev_events_of_our_events = { + prev_event_id + for our_server_event in our_server_events + for prev_event_id in complete_event_dict_map[our_server_event][ + "prev_event_ids" + ] + } + backward_extremities = prev_events_of_our_events - our_server_events + for backward_extremity in backward_extremities: + self.store.db_pool.simple_insert_txn( + txn, + table="event_backward_extremities", + values={ + "event_id": backward_extremity, + "room_id": room_id, + }, + ) + + self.get_success( + self.store.db_pool.runInteraction( + "_setup_room_for_backfill_tests_populate_db", + populate_db, + ) + ) + + return _BackfillSetupInfo(room_id=room_id, depth_map=depth_map) + + def test_get_backfill_points_in_room(self): + """ + Test to make sure only backfill points that are older and come before + the `current_depth` are returned. + """ + setup_info = self._setup_room_for_backfill_tests() + room_id = setup_info.room_id + depth_map = setup_info.depth_map + + # Try at "B" + backfill_points = self.get_success( + self.store.get_backfill_points_in_room(room_id, depth_map["B"], limit=100) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + self.assertEqual(backfill_event_ids, ["b6", "b5", "b4", "2", "b3", "b2", "b1"]) + + # Try at "A" + backfill_points = self.get_success( + self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + # Event "2" has a depth of 2 but is not included here because we only + # know the approximate depth of 5 from our event "3". + self.assertListEqual(backfill_event_ids, ["b3", "b2", "b1"]) + + def test_get_backfill_points_in_room_excludes_events_we_have_attempted( + self, + ): + """ + Test to make sure that events we have attempted to backfill (and within + backoff timeout duration) do not show up as an event to backfill again. + """ + setup_info = self._setup_room_for_backfill_tests() + room_id = setup_info.room_id + depth_map = setup_info.depth_map + + # Record some attempts to backfill these events which will make + # `get_backfill_points_in_room` exclude them because we + # haven't passed the backoff interval. + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b5", "fake cause") + ) + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b4", "fake cause") + ) + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b3", "fake cause") + ) + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b2", "fake cause") + ) + + # No time has passed since we attempted to backfill ^ + + # Try at "B" + backfill_points = self.get_success( + self.store.get_backfill_points_in_room(room_id, depth_map["B"], limit=100) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + # Only the backfill points that we didn't record earlier exist here. + self.assertEqual(backfill_event_ids, ["b6", "2", "b1"]) + + def test_get_backfill_points_in_room_attempted_event_retry_after_backoff_duration( + self, + ): + """ + Test to make sure after we fake attempt to backfill event "b3" many times, + we can see retry and see the "b3" again after the backoff timeout duration + has exceeded. + """ + setup_info = self._setup_room_for_backfill_tests() + room_id = setup_info.room_id + depth_map = setup_info.depth_map + + # Record some attempts to backfill these events which will make + # `get_backfill_points_in_room` exclude them because we + # haven't passed the backoff interval. + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b3", "fake cause") + ) + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause") + ) + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause") + ) + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause") + ) + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause") + ) + + # Now advance time by 2 hours and we should only be able to see "b3" + # because we have waited long enough for the single attempt (2^1 hours) + # but we still shouldn't see "b1" because we haven't waited long enough + # for this many attempts. We didn't do anything to "b2" so it should be + # visible regardless. + self.reactor.advance(datetime.timedelta(hours=2).total_seconds()) + + # Try at "A" and make sure that "b1" is not in the list because we've + # already attempted many times + backfill_points = self.get_success( + self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + self.assertEqual(backfill_event_ids, ["b3", "b2"]) + + # Now advance time by 20 hours (above 2^4 because we made 4 attemps) and + # see if we can now backfill it + self.reactor.advance(datetime.timedelta(hours=20).total_seconds()) + + # Try at "A" again after we advanced enough time and we should see "b3" again + backfill_points = self.get_success( + self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + self.assertEqual(backfill_event_ids, ["b3", "b2", "b1"]) + + def test_get_backfill_points_in_room_works_after_many_failed_pull_attempts_that_could_naively_overflow( + self, + ) -> None: + """ + A test that reproduces #13929 (Postgres only). + + Test to make sure we can still get backfill points after many failed pull + attempts that cause us to backoff to the limit. Even if the backoff formula + would tell us to wait for more seconds than can be expressed in a 32 bit + signed int. + """ + setup_info = self._setup_room_for_backfill_tests() + room_id = setup_info.room_id + depth_map = setup_info.depth_map + + # Pretend that we have tried and failed 10 times to backfill event b1. + for _ in range(10): + self.get_success( + self.store.record_event_failed_pull_attempt(room_id, "b1", "fake cause") + ) + + # If the backoff periods grow without limit: + # After the first failed attempt, we would have backed off for 1 << 1 = 2 hours. + # After the second failed attempt we would have backed off for 1 << 2 = 4 hours, + # so after the 10th failed attempt we should backoff for 1 << 10 == 1024 hours. + # Wait 1100 hours just so we have a nice round number. + self.reactor.advance(datetime.timedelta(hours=1100).total_seconds()) + + # 1024 hours in milliseconds is 1024 * 3600000, which exceeds the largest 32 bit + # signed integer. The bug we're reproducing is that this overflow causes an + # error in postgres preventing us from fetching a set of backwards extremities + # to retry fetching. + backfill_points = self.get_success( + self.store.get_backfill_points_in_room(room_id, depth_map["A"], limit=100) + ) + + # We should aim to fetch all backoff points: b1's latest backoff period has + # expired, and we haven't tried the rest. + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + self.assertEqual(backfill_event_ids, ["b3", "b2", "b1"]) + + def _setup_room_for_insertion_backfill_tests(self) -> _BackfillSetupInfo: + """ + Sets up a room with various insertion event backward extremities to test + backfill functions against. + + Returns: + _BackfillSetupInfo including the `room_id` to test against and + `depth_map` of events in the room + """ + room_id = "!backfill-room-test:some-host" + + depth_map: Dict[str, int] = { + "1": 1, + "2": 2, + "insertion_eventA": 3, + "3": 4, + "insertion_eventB": 5, + "4": 6, + "5": 7, + } + + def populate_db(txn: LoggingTransaction): + # Insert the room to satisfy the foreign key constraint of + # `event_failed_pull_attempts` + self.store.db_pool.simple_insert_txn( + txn, + "rooms", + { + "room_id": room_id, + "creator": "room_creator_user_id", + "is_public": True, + "room_version": "6", + }, + ) + + # Insert our server events + stream_ordering = 0 + for event_id, depth in depth_map.items(): + self.store.db_pool.simple_insert_txn( + txn, + table="events", + values={ + "event_id": event_id, + "type": EventTypes.MSC2716_INSERTION + if event_id.startswith("insertion_event") + else "test_regular_type", + "room_id": room_id, + "depth": depth, + "topological_ordering": depth, + "stream_ordering": stream_ordering, + "processed": True, + "outlier": False, + }, + ) + + if event_id.startswith("insertion_event"): + self.store.db_pool.simple_insert_txn( + txn, + table="insertion_event_extremities", + values={ + "event_id": event_id, + "room_id": room_id, + }, + ) + + stream_ordering += 1 + + self.get_success( + self.store.db_pool.runInteraction( + "_setup_room_for_insertion_backfill_tests_populate_db", + populate_db, + ) + ) + + return _BackfillSetupInfo(room_id=room_id, depth_map=depth_map) + + def test_get_insertion_event_backward_extremities_in_room(self): + """ + Test to make sure only insertion event backward extremities that are + older and come before the `current_depth` are returned. + """ + setup_info = self._setup_room_for_insertion_backfill_tests() + room_id = setup_info.room_id + depth_map = setup_info.depth_map + + # Try at "insertion_eventB" + backfill_points = self.get_success( + self.store.get_insertion_event_backward_extremities_in_room( + room_id, depth_map["insertion_eventB"], limit=100 + ) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + self.assertEqual(backfill_event_ids, ["insertion_eventB", "insertion_eventA"]) + + # Try at "insertion_eventA" + backfill_points = self.get_success( + self.store.get_insertion_event_backward_extremities_in_room( + room_id, depth_map["insertion_eventA"], limit=100 + ) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + # Event "2" has a depth of 2 but is not included here because we only + # know the approximate depth of 5 from our event "3". + self.assertListEqual(backfill_event_ids, ["insertion_eventA"]) + + def test_get_insertion_event_backward_extremities_in_room_excludes_events_we_have_attempted( + self, + ): + """ + Test to make sure that insertion events we have attempted to backfill + (and within backoff timeout duration) do not show up as an event to + backfill again. + """ + setup_info = self._setup_room_for_insertion_backfill_tests() + room_id = setup_info.room_id + depth_map = setup_info.depth_map + + # Record some attempts to backfill these events which will make + # `get_insertion_event_backward_extremities_in_room` exclude them + # because we haven't passed the backoff interval. + self.get_success( + self.store.record_event_failed_pull_attempt( + room_id, "insertion_eventA", "fake cause" + ) + ) + + # No time has passed since we attempted to backfill ^ + + # Try at "insertion_eventB" + backfill_points = self.get_success( + self.store.get_insertion_event_backward_extremities_in_room( + room_id, depth_map["insertion_eventB"], limit=100 + ) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + # Only the backfill points that we didn't record earlier exist here. + self.assertEqual(backfill_event_ids, ["insertion_eventB"]) + + def test_get_insertion_event_backward_extremities_in_room_attempted_event_retry_after_backoff_duration( + self, + ): + """ + Test to make sure after we fake attempt to backfill event + "insertion_eventA" many times, we can see retry and see the + "insertion_eventA" again after the backoff timeout duration has + exceeded. + """ + setup_info = self._setup_room_for_insertion_backfill_tests() + room_id = setup_info.room_id + depth_map = setup_info.depth_map + + # Record some attempts to backfill these events which will make + # `get_backfill_points_in_room` exclude them because we + # haven't passed the backoff interval. + self.get_success( + self.store.record_event_failed_pull_attempt( + room_id, "insertion_eventB", "fake cause" + ) + ) + self.get_success( + self.store.record_event_failed_pull_attempt( + room_id, "insertion_eventA", "fake cause" + ) + ) + self.get_success( + self.store.record_event_failed_pull_attempt( + room_id, "insertion_eventA", "fake cause" + ) + ) + self.get_success( + self.store.record_event_failed_pull_attempt( + room_id, "insertion_eventA", "fake cause" + ) + ) + self.get_success( + self.store.record_event_failed_pull_attempt( + room_id, "insertion_eventA", "fake cause" + ) + ) + + # Now advance time by 2 hours and we should only be able to see + # "insertion_eventB" because we have waited long enough for the single + # attempt (2^1 hours) but we still shouldn't see "insertion_eventA" + # because we haven't waited long enough for this many attempts. + self.reactor.advance(datetime.timedelta(hours=2).total_seconds()) + + # Try at "insertion_eventA" and make sure that "insertion_eventA" is not + # in the list because we've already attempted many times + backfill_points = self.get_success( + self.store.get_insertion_event_backward_extremities_in_room( + room_id, depth_map["insertion_eventA"], limit=100 + ) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + self.assertEqual(backfill_event_ids, []) + + # Now advance time by 20 hours (above 2^4 because we made 4 attemps) and + # see if we can now backfill it + self.reactor.advance(datetime.timedelta(hours=20).total_seconds()) + + # Try at "insertion_eventA" again after we advanced enough time and we + # should see "insertion_eventA" again + backfill_points = self.get_success( + self.store.get_insertion_event_backward_extremities_in_room( + room_id, depth_map["insertion_eventA"], limit=100 + ) + ) + backfill_event_ids = [backfill_point[0] for backfill_point in backfill_points] + self.assertEqual(backfill_event_ids, ["insertion_eventA"]) + @attr.s class FakeEvent: diff --git a/tests/storage/test_event_push_actions.py b/tests/storage/test_event_push_actions.py index 3b4386fc1851..292f432b1e31 100644 --- a/tests/storage/test_event_push_actions.py +++ b/tests/storage/test_event_push_actions.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Tuple + from twisted.test.proto_helpers import MemoryReactor from synapse.rest import admin @@ -22,8 +24,6 @@ from tests.unittest import HomeserverTestCase -USER_ID = "@user:example.com" - class EventPushActionsStoreTestCase(HomeserverTestCase): servlets = [ @@ -38,21 +38,13 @@ def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: assert persist_events_store is not None self.persist_events_store = persist_events_store - def test_get_unread_push_actions_for_user_in_range_for_http(self) -> None: - self.get_success( - self.store.get_unread_push_actions_for_user_in_range_for_http( - USER_ID, 0, 1000, 20 - ) - ) + def _create_users_and_room(self) -> Tuple[str, str, str, str, str]: + """ + Creates two users and a shared room. - def test_get_unread_push_actions_for_user_in_range_for_email(self) -> None: - self.get_success( - self.store.get_unread_push_actions_for_user_in_range_for_email( - USER_ID, 0, 1000, 20 - ) - ) - - def test_count_aggregation(self) -> None: + Returns: + Tuple of (user 1 ID, user 1 token, user 2 ID, user 2 token, room ID). + """ # Create a user to receive notifications and send receipts. user_id = self.register_user("user1235", "pass") token = self.login("user1235", "pass") @@ -65,6 +57,70 @@ def test_count_aggregation(self) -> None: room_id = self.helper.create_room_as(user_id, tok=token) self.helper.join(room_id, other_id, tok=other_token) + return user_id, token, other_id, other_token, room_id + + def test_get_unread_push_actions_for_user_in_range(self) -> None: + """Test getting unread push actions for HTTP and email pushers.""" + user_id, token, _, other_token, room_id = self._create_users_and_room() + + # Create two events, one of which is a highlight. + self.helper.send_event( + room_id, + type="m.room.message", + content={"msgtype": "m.text", "body": "msg"}, + tok=other_token, + ) + event_id = self.helper.send_event( + room_id, + type="m.room.message", + content={"msgtype": "m.text", "body": user_id}, + tok=other_token, + )["event_id"] + + # Fetch unread actions for HTTP pushers. + http_actions = self.get_success( + self.store.get_unread_push_actions_for_user_in_range_for_http( + user_id, 0, 1000, 20 + ) + ) + self.assertEqual(2, len(http_actions)) + + # Fetch unread actions for email pushers. + email_actions = self.get_success( + self.store.get_unread_push_actions_for_user_in_range_for_email( + user_id, 0, 1000, 20 + ) + ) + self.assertEqual(2, len(email_actions)) + + # Send a receipt, which should clear any actions. + self.get_success( + self.store.insert_receipt( + room_id, + "m.read", + user_id=user_id, + event_ids=[event_id], + thread_id=None, + data={}, + ) + ) + http_actions = self.get_success( + self.store.get_unread_push_actions_for_user_in_range_for_http( + user_id, 0, 1000, 20 + ) + ) + self.assertEqual([], http_actions) + email_actions = self.get_success( + self.store.get_unread_push_actions_for_user_in_range_for_email( + user_id, 0, 1000, 20 + ) + ) + self.assertEqual([], email_actions) + + def test_count_aggregation(self) -> None: + # Create a user to receive notifications and send receipts. + user_id, token, _, other_token, room_id = self._create_users_and_room() + last_event_id: str def _assert_counts(noitf_count: int, highlight_count: int) -> None: @@ -107,6 +163,7 @@ def _mark_read(event_id: str) -> None: "m.read", user_id=user_id, event_ids=[event_id], + thread_id=None, data={}, ) ) diff --git a/tests/storage/test_monthly_active_users.py b/tests/storage/test_monthly_active_users.py index e8b4a5644bf7..c55c4db97033 100644 --- a/tests/storage/test_monthly_active_users.py +++ b/tests/storage/test_monthly_active_users.py @@ -96,8 +96,12 @@ def test_initialise_reserved_users(self): # Test each of the registered users is marked as active timestamp = self.get_success(self.store.user_last_seen_monthly_active(user1)) + # Mypy notes that one shouldn't compare Optional[int] to 0 with assertGreater. + # Check that timestamp really is an int. + assert timestamp is not None self.assertGreater(timestamp, 0) timestamp = self.get_success(self.store.user_last_seen_monthly_active(user2)) + assert timestamp is not None self.assertGreater(timestamp, 0) # Test that users with reserved 3pids are not removed from the MAU table @@ -166,10 +170,11 @@ def test_user_last_seen_monthly_active(self): self.get_success(self.store.upsert_monthly_active_user(user_id2)) result = self.get_success(self.store.user_last_seen_monthly_active(user_id1)) + assert result is not None self.assertGreater(result, 0) result = self.get_success(self.store.user_last_seen_monthly_active(user_id3)) - self.assertNotEqual(result, 0) + self.assertIsNone(result) @override_config({"max_mau_value": 5}) def test_reap_monthly_active_users(self): diff --git a/tests/storage/test_receipts.py b/tests/storage/test_receipts.py index cfa008aa8343..261159c28c96 100644 --- a/tests/storage/test_receipts.py +++ b/tests/storage/test_receipts.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Collection, Optional from synapse.api.constants import ReceiptTypes from synapse.types import UserID, create_requester @@ -86,6 +87,33 @@ def prepare(self, reactor, clock, homeserver) -> None: ) ) + def get_last_unthreaded_receipt( + self, receipt_types: Collection[str], room_id: Optional[str] = None + ) -> Optional[str]: + """ + Fetch the event ID for the latest unthreaded receipt in the test room for the test user. + + Args: + receipt_types: The receipt types to fetch. + + Returns: + The latest receipt, if one exists. + """ + result = self.get_success( + self.store.db_pool.runInteraction( + "get_last_receipt_event_id_for_user", + self.store.get_last_unthreaded_receipt_for_user_txn, + OUR_USER_ID, + room_id or self.room_id1, + receipt_types, + ) + ) + if not result: + return None + + event_id, _ = result + return event_id + def test_return_empty_with_no_data(self) -> None: res = self.get_success( self.store.get_receipts_for_user( @@ -109,16 +137,10 @@ def test_return_empty_with_no_data(self) -> None: ) self.assertEqual(res, {}) - res = self.get_success( - self.store.get_last_receipt_event_id_for_user( - OUR_USER_ID, - self.room_id1, - [ - ReceiptTypes.READ, - ReceiptTypes.READ_PRIVATE, - ], - ) + res = self.get_last_unthreaded_receipt( + [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE] ) + self.assertEqual(res, None) def test_get_receipts_for_user(self) -> None: @@ -133,13 +155,18 @@ def test_get_receipts_for_user(self) -> None: # Send public read receipt for the first event self.get_success( self.store.insert_receipt( - self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_1_id], {} + self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_1_id], None, {} ) ) # Send private read receipt for the second event self.get_success( self.store.insert_receipt( - self.room_id1, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event1_2_id], {} + self.room_id1, + ReceiptTypes.READ_PRIVATE, + OUR_USER_ID, + [event1_2_id], + None, + {}, ) ) @@ -166,7 +193,7 @@ def test_get_receipts_for_user(self) -> None: # Test receipt updating self.get_success( self.store.insert_receipt( - self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_2_id], {} + self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_2_id], None, {} ) ) res = self.get_success( @@ -182,7 +209,12 @@ def test_get_receipts_for_user(self) -> None: # Test new room is reflected in what the method returns self.get_success( self.store.insert_receipt( - self.room_id2, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event2_1_id], {} + self.room_id2, + ReceiptTypes.READ_PRIVATE, + OUR_USER_ID, + [event2_1_id], + None, + {}, ) ) res = self.get_success( @@ -204,53 +236,42 @@ def test_get_last_receipt_event_id_for_user(self) -> None: # Send public read receipt for the first event self.get_success( self.store.insert_receipt( - self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_1_id], {} + self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_1_id], None, {} ) ) # Send private read receipt for the second event self.get_success( self.store.insert_receipt( - self.room_id1, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event1_2_id], {} + self.room_id1, + ReceiptTypes.READ_PRIVATE, + OUR_USER_ID, + [event1_2_id], + None, + {}, ) ) # Test we get the latest event when we want both private and public receipts - res = self.get_success( - self.store.get_last_receipt_event_id_for_user( - OUR_USER_ID, - self.room_id1, - [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE], - ) + res = self.get_last_unthreaded_receipt( + [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE] ) self.assertEqual(res, event1_2_id) # Test we get the older event when we want only public receipt - res = self.get_success( - self.store.get_last_receipt_event_id_for_user( - OUR_USER_ID, self.room_id1, [ReceiptTypes.READ] - ) - ) + res = self.get_last_unthreaded_receipt([ReceiptTypes.READ]) self.assertEqual(res, event1_1_id) # Test we get the latest event when we want only the private receipt - res = self.get_success( - self.store.get_last_receipt_event_id_for_user( - OUR_USER_ID, self.room_id1, [ReceiptTypes.READ_PRIVATE] - ) - ) + res = self.get_last_unthreaded_receipt([ReceiptTypes.READ_PRIVATE]) self.assertEqual(res, event1_2_id) # Test receipt updating self.get_success( self.store.insert_receipt( - self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_2_id], {} - ) - ) - res = self.get_success( - self.store.get_last_receipt_event_id_for_user( - OUR_USER_ID, self.room_id1, [ReceiptTypes.READ] + self.room_id1, ReceiptTypes.READ, OUR_USER_ID, [event1_2_id], None, {} ) ) + res = self.get_last_unthreaded_receipt([ReceiptTypes.READ]) self.assertEqual(res, event1_2_id) # Send some events into the second room @@ -261,14 +282,15 @@ def test_get_last_receipt_event_id_for_user(self) -> None: # Test new room is reflected in what the method returns self.get_success( self.store.insert_receipt( - self.room_id2, ReceiptTypes.READ_PRIVATE, OUR_USER_ID, [event2_1_id], {} - ) - ) - res = self.get_success( - self.store.get_last_receipt_event_id_for_user( - OUR_USER_ID, self.room_id2, - [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE], + ReceiptTypes.READ_PRIVATE, + OUR_USER_ID, + [event2_1_id], + None, + {}, ) ) + res = self.get_last_unthreaded_receipt( + [ReceiptTypes.READ, ReceiptTypes.READ_PRIVATE], room_id=self.room_id2 + ) self.assertEqual(res, event2_1_id) diff --git a/tests/storage/test_registration.py b/tests/storage/test_registration.py index 853a93afab39..05ea802008ad 100644 --- a/tests/storage/test_registration.py +++ b/tests/storage/test_registration.py @@ -16,9 +16,10 @@ from synapse.api.constants import UserTypes from synapse.api.errors import ThreepidValidationError from synapse.server import HomeServer +from synapse.types import JsonDict, UserID from synapse.util import Clock -from tests.unittest import HomeserverTestCase +from tests.unittest import HomeserverTestCase, override_config class RegistrationStoreTestCase(HomeserverTestCase): @@ -48,6 +49,7 @@ def test_register(self) -> None: "user_type": None, "deactivated": 0, "shadow_banned": 0, + "approved": 1, }, (self.get_success(self.store.get_user_by_id(self.user_id))), ) @@ -166,3 +168,101 @@ def test_3pid_inhibit_invalid_validation_session_error(self) -> None: ThreepidValidationError, ) self.assertEqual(e.value.msg, "Validation token not found or has expired", e) + + +class ApprovalRequiredRegistrationTestCase(HomeserverTestCase): + def default_config(self) -> JsonDict: + config = super().default_config() + + # If there's already some config for this feature in the default config, it + # means we're overriding it with @override_config. In this case we don't want + # to do anything more with it. + msc3866_config = config.get("experimental_features", {}).get("msc3866") + if msc3866_config is not None: + return config + + # Require approval for all new accounts. + config["experimental_features"] = { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": True, + } + } + return config + + def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: + self.store = hs.get_datastores().main + self.user_id = "@my-user:test" + self.pwhash = "{xx1}123456789" + + @override_config( + { + "experimental_features": { + "msc3866": { + "enabled": True, + "require_approval_for_new_accounts": False, + } + } + } + ) + def test_approval_not_required(self) -> None: + """Tests that if we don't require approval for new accounts, newly created + accounts are automatically marked as approved. + """ + self.get_success(self.store.register_user(self.user_id, self.pwhash)) + + user = self.get_success(self.store.get_user_by_id(self.user_id)) + assert user is not None + self.assertTrue(user["approved"]) + + approved = self.get_success(self.store.is_user_approved(self.user_id)) + self.assertTrue(approved) + + def test_approval_required(self) -> None: + """Tests that if we require approval for new accounts, newly created accounts + are not automatically marked as approved. + """ + self.get_success(self.store.register_user(self.user_id, self.pwhash)) + + user = self.get_success(self.store.get_user_by_id(self.user_id)) + assert user is not None + self.assertFalse(user["approved"]) + + approved = self.get_success(self.store.is_user_approved(self.user_id)) + self.assertFalse(approved) + + def test_override(self) -> None: + """Tests that if we require approval for new accounts, but we explicitly say the + new user should be considered approved, they're marked as approved. + """ + self.get_success( + self.store.register_user( + self.user_id, + self.pwhash, + approved=True, + ) + ) + + user = self.get_success(self.store.get_user_by_id(self.user_id)) + self.assertIsNotNone(user) + assert user is not None + self.assertEqual(user["approved"], 1) + + approved = self.get_success(self.store.is_user_approved(self.user_id)) + self.assertTrue(approved) + + def test_approve_user(self) -> None: + """Tests that approving the user updates their approval status.""" + self.get_success(self.store.register_user(self.user_id, self.pwhash)) + + approved = self.get_success(self.store.is_user_approved(self.user_id)) + self.assertFalse(approved) + + self.get_success( + self.store.update_user_approval_status( + UserID.from_string(self.user_id), True + ) + ) + + approved = self.get_success(self.store.is_user_approved(self.user_id)) + self.assertTrue(approved) diff --git a/tests/test_federation.py b/tests/test_federation.py index b138fd030fe4..fe2964d764d6 100644 --- a/tests/test_federation.py +++ b/tests/test_federation.py @@ -87,8 +87,8 @@ async def _check_event_auth(origin, event, context): federation_event_handler._check_event_auth = _check_event_auth self.client = self.homeserver.get_federation_client() - self.client._check_sigs_and_hash_and_fetch = lambda dest, pdus, **k: succeed( - pdus + self.client._check_sigs_and_hash_for_pulled_events_and_fetch = ( + lambda dest, pdus, **k: succeed(pdus) ) # Send the join, it should return None (which is not an error) diff --git a/tests/unittest.py b/tests/unittest.py index 975b0a23a7b7..5116be338ee0 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -300,47 +300,31 @@ def setUp(self) -> None: if hasattr(self, "user_id"): if self.hijack_auth: assert self.helper.auth_user_id is not None + token = "some_fake_token" # We need a valid token ID to satisfy foreign key constraints. token_id = self.get_success( self.hs.get_datastores().main.add_access_token_to_user( self.helper.auth_user_id, - "some_fake_token", + token, None, None, ) ) - async def get_user_by_access_token( - token: Optional[str] = None, allow_guest: bool = False - ) -> JsonDict: - assert self.helper.auth_user_id is not None - return { - "user": UserID.from_string(self.helper.auth_user_id), - "token_id": token_id, - "is_guest": False, - } - - async def get_user_by_req( - request: SynapseRequest, - allow_guest: bool = False, - allow_expired: bool = False, - ) -> Requester: + # This has to be a function and not just a Mock, because + # `self.helper.auth_user_id` is temporarily reassigned in some tests + async def get_requester(*args, **kwargs) -> Requester: assert self.helper.auth_user_id is not None return create_requester( - UserID.from_string(self.helper.auth_user_id), - token_id, - False, - False, - None, + user_id=UserID.from_string(self.helper.auth_user_id), + access_token_id=token_id, ) # Type ignore: mypy doesn't like us assigning to methods. - self.hs.get_auth().get_user_by_req = get_user_by_req # type: ignore[assignment] - self.hs.get_auth().get_user_by_access_token = get_user_by_access_token # type: ignore[assignment] - self.hs.get_auth().get_access_token_from_request = Mock( # type: ignore[assignment] - return_value="1234" - ) + self.hs.get_auth().get_user_by_req = get_requester # type: ignore[assignment] + self.hs.get_auth().get_user_by_access_token = get_requester # type: ignore[assignment] + self.hs.get_auth().get_access_token_from_request = Mock(return_value=token) # type: ignore[assignment] if self.needs_threadpool: self.reactor.threadpool = ThreadPool() # type: ignore[assignment] @@ -750,7 +734,9 @@ def create_and_send_event( event.internal_metadata.soft_failed = True self.get_success( - event_creator.handle_new_client_event(requester, event, context) + event_creator.handle_new_client_event( + requester, events_and_context=[(event, context)] + ) ) return event.event_id diff --git a/tests/util/caches/test_descriptors.py b/tests/util/caches/test_descriptors.py index 48e616ac7419..90861fe522c2 100644 --- a/tests/util/caches/test_descriptors.py +++ b/tests/util/caches/test_descriptors.py @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging -from typing import Set +from typing import Iterable, Set, Tuple from unittest import mock from twisted.internet import defer, reactor @@ -1008,3 +1008,34 @@ async def do_lookup(): obj.inner_context_was_finished, "Tried to restart a finished logcontext" ) self.assertEqual(current_context(), SENTINEL_CONTEXT) + + def test_num_args_mismatch(self): + """ + Make sure someone does not accidentally use @cachedList on a method with + a mismatch in the number args to the underlying single cache method. + """ + + class Cls: + @descriptors.cached(tree=True) + def fn(self, room_id, event_id): + pass + + # This is wrong ❌. `@cachedList` expects to be given the same number + # of arguments as the underlying cached function, just with one of + # the arguments being an iterable + @descriptors.cachedList(cached_method_name="fn", list_name="keys") + def list_fn(self, keys: Iterable[Tuple[str, str]]): + pass + + # Corrected syntax ✅ + # + # @cachedList(cached_method_name="fn", list_name="event_ids") + # async def list_fn( + # self, room_id: str, event_ids: Collection[str], + # ) + + obj = Cls() + + # Make sure this raises an error about the arg mismatch + with self.assertRaises(Exception): + obj.list_fn([("foo", "bar")]) diff --git a/tests/util/test_check_dependencies.py b/tests/util/test_check_dependencies.py index 5d1aa025d127..6913de24b9c6 100644 --- a/tests/util/test_check_dependencies.py +++ b/tests/util/test_check_dependencies.py @@ -40,7 +40,10 @@ class TestDependencyChecker(TestCase): def mock_installed_package( self, distribution: Optional[DummyDistribution] ) -> Generator[None, None, None]: - """Pretend that looking up any distribution yields the given `distribution`.""" + """Pretend that looking up any package yields the given `distribution`. + + If `distribution = None`, we pretend that the package is not installed. + """ def mock_distribution(name: str): if distribution is None: @@ -81,7 +84,7 @@ def test_version_reported_as_none(self) -> None: self.assertRaises(DependencyException, check_requirements) def test_checks_ignore_dev_dependencies(self) -> None: - """Bot generic and per-extra checks should ignore dev dependencies.""" + """Both generic and per-extra checks should ignore dev dependencies.""" with patch( "synapse.util.check_dependencies.metadata.requires", return_value=["dummypkg >= 1; extra == 'mypy'"], @@ -142,3 +145,16 @@ def test_release_candidates_satisfy_dependency(self) -> None: with self.mock_installed_package(new_release_candidate): # should not raise check_requirements() + + def test_setuptools_rust_ignored(self) -> None: + """Test a workaround for a `poetry build` problem. Reproduces #13926.""" + with patch( + "synapse.util.check_dependencies.metadata.requires", + return_value=["setuptools_rust >= 1.3"], + ): + with self.mock_installed_package(None): + # should not raise, even if setuptools_rust is not installed + check_requirements() + with self.mock_installed_package(old): + # We also ignore old versions of setuptools_rust + check_requirements() diff --git a/tests/utils.py b/tests/utils.py index 65db4376973c..045a8b5fa7d9 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -270,9 +270,7 @@ def looping_call( *args: P.args, **kwargs: P.kwargs, ) -> None: - # This type-ignore should be redundant once we use a mypy release with - # https://github.com/python/mypy/pull/12668. - self.loopers.append(Looper(function, interval / 1000.0, self.now, args, kwargs)) # type: ignore[arg-type] + self.loopers.append(Looper(function, interval / 1000.0, self.now, args, kwargs)) def cancel_call_later(self, timer: Timer, ignore_errs: bool = False) -> None: if timer.expired: