From 8e637464a8f675cfa8733a752dfd7ce3c7a4fc99 Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Sun, 11 Oct 2020 14:33:54 +0200 Subject: [PATCH 1/8] Add an admin api to delete local media. related to: #6459 Add `DELETE /_synapse/admin/v1/media//` to delete a single file from server --- docs/admin_api/media_admin_api.md | 24 +++ synapse/rest/admin/media.py | 31 +++- synapse/rest/media/v1/filepath.py | 17 +++ synapse/rest/media/v1/media_repository.py | 30 ++++ tests/rest/admin/test_media.py | 178 ++++++++++++++++++++++ 5 files changed, 279 insertions(+), 1 deletion(-) create mode 100644 tests/rest/admin/test_media.py diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md index 26948770d8e6..83a8079650fa 100644 --- a/docs/admin_api/media_admin_api.md +++ b/docs/admin_api/media_admin_api.md @@ -100,3 +100,27 @@ Response: "num_quarantined": 10 # The number of media items successfully quarantined } ``` + +# Draft: Delete local media +This API deletes the *local* media from the disc of your own server. + +Request: + +``` +DELETE /_synapse/admin/v1/media// + +{} +``` + +URL Parameters + +* `server_name` - The name of your local server (e.g `matrix.org`) +* `media_id` - The ID of the media (e.g `abcdefghijklmnopqrstuvwx`) + +Response: + +``` +{ + "deleted": 1 # The number of media items successfully deleted +} +``` \ No newline at end of file diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index ee75095c0ede..af32b29b8d13 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -16,9 +16,10 @@ import logging -from synapse.api.errors import AuthError +from synapse.api.errors import AuthError, NotFoundError, SynapseError from synapse.http.servlet import RestServlet, parse_integer from synapse.rest.admin._base import ( + admin_patterns, assert_requester_is_admin, assert_user_is_admin, historical_admin_path_patterns, @@ -150,6 +151,33 @@ async def on_POST(self, request): return 200, ret +class DeleteMediaByID(RestServlet): + """Delete local media by a given ID. Removes it from this server. + """ + + PATTERNS = admin_patterns("/media/(?P[^/]+)/(?P[^/]+)", "v1") + + def __init__(self, hs): + self.store = hs.get_datastore() + self.auth = hs.get_auth() + self.server_name = hs.hostname + self.media_repository = hs.get_media_repository() + + async def on_DELETE(self, request, server_name: str, media_id: str): + await assert_requester_is_admin(self.auth, request) + + if self.server_name != server_name: + raise SynapseError(400, "Can only delete local media") + + if await self.store.get_local_media(media_id) is None: + raise NotFoundError("Unknown media") + + logging.info("Deleting local media by ID: %s", media_id) + + ret = await self.media_repository.delete_local_media(media_id) + return 200, {"deleted": ret} + + def register_servlets_for_media_repo(hs, http_server): """ Media repo specific APIs. @@ -159,3 +187,4 @@ def register_servlets_for_media_repo(hs, http_server): QuarantineMediaByID(hs).register(http_server) QuarantineMediaByUser(hs).register(http_server) ListMediaInRoom(hs).register(http_server) + DeleteMediaByID(hs).register(http_server) diff --git a/synapse/rest/media/v1/filepath.py b/synapse/rest/media/v1/filepath.py index 7447eeaebeee..9e079f672ff0 100644 --- a/synapse/rest/media/v1/filepath.py +++ b/synapse/rest/media/v1/filepath.py @@ -69,6 +69,23 @@ def local_media_thumbnail_rel(self, media_id, width, height, content_type, metho local_media_thumbnail = _wrap_in_base_path(local_media_thumbnail_rel) + def local_media_thumbnail_dir(self, media_id: str) -> str: + """ + Retrieve the local store path of thumbnails of a given media_id + + Args: + media_id: The media ID to query. + Returns: + Path of local_thumbnails from media_id + """ + return os.path.join( + self.base_path, + "local_thumbnails", + media_id[0:2], + media_id[2:4], + media_id[4:], + ) + def remote_media_filepath_rel(self, server_name, file_id): return os.path.join( "remote_content", server_name, file_id[0:2], file_id[2:4], file_id[4:] diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index e1192b47cdb0..5d936fcffd66 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -767,6 +767,36 @@ async def delete_old_remote_media(self, before_ts): return {"deleted": deleted} + async def delete_local_media(self, media_id: str) -> int: + """ + Delete the given media_id from this server + + Args: + media_id: The media ID to delete. + Returns: + Number of deleted files. + In this case 1 or 0 + """ + logger.info("Deleting local media: %s", media_id) + + full_path = self.filepaths.local_media_filepath(media_id) + try: + os.remove(full_path) + except OSError as e: + logger.warning("Failed to remove file: %r: %s", full_path, e) + if e.errno != errno.ENOENT: + return 0 + + thumbnail_dir = self.filepaths.local_media_thumbnail_dir(media_id) + shutil.rmtree(thumbnail_dir, ignore_errors=True) + + await self.store.delete_remote_media(self.server_name, media_id) + + await self.store.delete_url_cache((media_id,)) + await self.store.delete_url_cache_media((media_id,)) + + return 1 + class MediaRepositoryResource(Resource): """File uploading and downloading. diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py new file mode 100644 index 000000000000..f3e3de84a001 --- /dev/null +++ b/tests/rest/admin/test_media.py @@ -0,0 +1,178 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Dirk Klimpel +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from binascii import unhexlify + +import synapse.rest.admin +from synapse.api.errors import Codes +from synapse.rest.client.v1 import login +from synapse.rest.media.v1.filepath import MediaFilePaths + +from tests import unittest + + +class DeleteMediaByIDTestCase(unittest.HomeserverTestCase): + + servlets = [ + synapse.rest.admin.register_servlets, + synapse.rest.admin.register_servlets_for_media_repo, + login.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + self.handler = hs.get_device_handler() + self.media_repo = hs.get_media_repository_resource() + self.server_name = hs.hostname + + self.admin_user = self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + self.filepaths = MediaFilePaths(hs.config.media_store_path) + + def test_no_auth(self): + """ + Try to delete media without authentication. + """ + url = "/_synapse/admin/v1/media/%s/%s" % (self.server_name, "12345") + + request, channel = self.make_request("DELETE", url, b"{}") + self.render(request) + + self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) + + def test_requester_is_no_admin(self): + """ + If the user is not a server admin, an error is returned. + """ + self.other_user = self.register_user("user", "pass") + self.other_user_token = self.login("user", "pass") + + url = "/_synapse/admin/v1/media/%s/%s" % (self.server_name, "12345") + + request, channel = self.make_request( + "DELETE", url, access_token=self.other_user_token, + ) + self.render(request) + + self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + def test_media_does_not_exist(self): + """ + Tests that a lookup for a media that does not exist returns a 404 + """ + url = "/_synapse/admin/v1/media/%s/%s" % (self.server_name, "12345") + + request, channel = self.make_request( + "DELETE", url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(404, channel.code, msg=channel.json_body) + self.assertEqual(Codes.NOT_FOUND, channel.json_body["errcode"]) + + def test_media_is_not_local(self): + """ + Tests that a lookup for a media that is not a local returns a 400 + """ + url = "/_synapse/admin/v1/media/%s/%s" % ("unknown_domain", "12345") + + request, channel = self.make_request( + "DELETE", url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, channel.code, msg=channel.json_body) + self.assertEqual("Can only delete local media", channel.json_body["error"]) + + def test_delete_media(self): + """ + Tests that delete a media is successfully + """ + + download_resource = self.media_repo.children[b"download"] + upload_resource = self.media_repo.children[b"upload"] + image_data = unhexlify( + b"89504e470d0a1a0a0000000d4948445200000001000000010806" + b"0000001f15c4890000000a49444154789c63000100000500010d" + b"0a2db40000000049454e44ae426082" + ) + + # Upload some media into the room + response = self.helper.upload_media( + upload_resource, image_data, tok=self.admin_user_tok, expect_code=200 + ) + # Extract media ID from the response + server_and_media_id = response["content_uri"][6:] # Cut off 'mxc://' + server_name, media_id = server_and_media_id.split("/") + + self.assertEqual(server_name, self.server_name) + + # Attempt to access media + request, channel = self.make_request( + "GET", + server_and_media_id, + shorthand=False, + access_token=self.admin_user_tok, + ) + request.render(download_resource) + self.pump(1.0) + + # Should be successful + self.assertEqual( + 200, + channel.code, + msg=( + "Expected to receive a 200 on accessing media: %s" % server_and_media_id + ), + ) + + # Test if the file exists + local_path = self.filepaths.local_media_filepath(media_id) + self.assertTrue(os.path.exists(local_path)) + + url = "/_synapse/admin/v1/media/%s/%s" % (self.server_name, media_id) + + # Delete media + request, channel = self.make_request( + "DELETE", url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(1, channel.json_body["deleted"]) + + # Attempt to access media + request, channel = self.make_request( + "GET", + server_and_media_id, + shorthand=False, + access_token=self.admin_user_tok, + ) + request.render(download_resource) + self.pump(1.0) + self.assertEqual( + 404, + channel.code, + msg=( + "Expected to receive a 404 on accessing deleted media: %s" + % server_and_media_id + ), + ) + + # Test if the file is deleted + self.assertFalse(os.path.exists(local_path)) From 1dc57c794fca9d7e501a132e3bc5098b0cb0c032 Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Sun, 11 Oct 2020 14:44:39 +0200 Subject: [PATCH 2/8] add newsfile --- changelog.d/8519.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/8519.feature diff --git a/changelog.d/8519.feature b/changelog.d/8519.feature new file mode 100644 index 000000000000..b0fde1a9b562 --- /dev/null +++ b/changelog.d/8519.feature @@ -0,0 +1 @@ +Add an admin api `DELETE /_synapse/admin/v1/media//` to delete a single file from server. Contributed by @dklimpel. \ No newline at end of file From d8167de6801a876de041ab63faa2b28e2e779635 Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Fri, 16 Oct 2020 20:35:42 +0200 Subject: [PATCH 3/8] Add additional api for request by date or size --- docs/admin_api/media_admin_api.md | 71 +++- synapse/rest/admin/media.py | 55 ++- synapse/rest/media/v1/media_repository.py | 78 +++- .../databases/main/media_repository.py | 48 +++ tests/rest/admin/test_media.py | 362 +++++++++++++++++- 5 files changed, 582 insertions(+), 32 deletions(-) diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md index 83a8079650fa..c7ce6f57ab88 100644 --- a/docs/admin_api/media_admin_api.md +++ b/docs/admin_api/media_admin_api.md @@ -101,8 +101,16 @@ Response: } ``` -# Draft: Delete local media -This API deletes the *local* media from the disc of your own server. +# Delete local media +This API deletes the *local* media from the disk of your own server. +This includes any local thumbnails and copies of media downloaded from +remote homeservers. +This API will not affect media that has been uploaded to external +media repositories (e.g https://github.com/turt2live/matrix-media-repo/). +See also [purge_remote_media.rst](purge_remote_media.rst). + +## Delete a specific local media +Delete a specific ``media_id``. Request: @@ -114,13 +122,60 @@ DELETE /_synapse/admin/v1/media// URL Parameters -* `server_name` - The name of your local server (e.g `matrix.org`) -* `media_id` - The ID of the media (e.g `abcdefghijklmnopqrstuvwx`) +* ``server_name``: string - The name of your local server (e.g ``matrix.org``) +* ``media_id``: string - The ID of the media (e.g ``abcdefghijklmnopqrstuvwx``) Response: +```json + { + "deleted_media":[ + "abcdefghijklmnopqrstuvwx" + ], + "total": 1 + } ``` -{ - "deleted": 1 # The number of media items successfully deleted -} -``` \ No newline at end of file + +The following fields are returned in the JSON response body: + +* ``deleted_media``: list of strings - List of deleted ``media_id`` +* ``total``: integer - Total number of deleted ``media_id`` + +## Delete local media by date or size + +Request: + +``` +POST /_synapse/admin/v1/media//delete?before_ts= + +{} +``` + +URL Parameters + +* ``server_name``: string - The name of your local server (e.g ``matrix.org``) +* ``before_ts``: string representing a positive integer - Unix timestamp in ms. +Files that were last used before this timestamp will be deleted. It is the timestamp of +last access and not the timestamp creation. +* ``size_gt``: Optional - string representing a positive integer - Size of the media in bytes. +Files that are larger will be deleted. Defaults to ``0``. +* ``keep_profiles``: Optional- string representing a boolean - Switch to delete also files +that are still used in image data (e.g user profile, room avatar). +If ``false`` thse files will be deleted. Defaults to ``true``. + +Response: + +```json + { + "deleted_media":[ + "abcdefghijklmnopqrstuvwx", + "abcdefghijklmnopqrstuvwz" + ], + "total": 2 + } +``` + +The following fields are returned in the JSON response body: + +* ``deleted_media``: list of strings - List of deleted ``media_id`` +* ``total``: integer - Total number of deleted ``media_id`` diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index af32b29b8d13..527b6dd72317 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -16,8 +16,8 @@ import logging -from synapse.api.errors import AuthError, NotFoundError, SynapseError -from synapse.http.servlet import RestServlet, parse_integer +from synapse.api.errors import AuthError, Codes, NotFoundError, SynapseError +from synapse.http.servlet import RestServlet, parse_boolean, parse_integer from synapse.rest.admin._base import ( admin_patterns, assert_requester_is_admin, @@ -155,7 +155,7 @@ class DeleteMediaByID(RestServlet): """Delete local media by a given ID. Removes it from this server. """ - PATTERNS = admin_patterns("/media/(?P[^/]+)/(?P[^/]+)", "v1") + PATTERNS = admin_patterns("/media/(?P[^/]+)/(?P[^/]+)") def __init__(self, hs): self.store = hs.get_datastore() @@ -174,8 +174,52 @@ async def on_DELETE(self, request, server_name: str, media_id: str): logging.info("Deleting local media by ID: %s", media_id) - ret = await self.media_repository.delete_local_media(media_id) - return 200, {"deleted": ret} + deleted_media, total = await self.media_repository.delete_local_media(media_id) + return 200, {"deleted_media": deleted_media, "total": total} + + +class DeleteMediaByDateSize(RestServlet): + """Delete local media by timestamp and size. + Removes it from this server. + """ + + PATTERNS = admin_patterns("/media/(?P[^/]+)/delete") + + def __init__(self, hs): + self.store = hs.get_datastore() + self.auth = hs.get_auth() + self.server_name = hs.hostname + self.media_repository = hs.get_media_repository() + + async def on_POST(self, request, server_name: str): + await assert_requester_is_admin(self.auth, request) + + before_ts = parse_integer(request, "before_ts", required=True) + size_gt = parse_integer(request, "size_gt", default=0) + keep_profiles = parse_boolean(request, "keep_profiles", default=True) + + if before_ts < 0: + raise SynapseError( + 400, + "Query parameter before_ts must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + if size_gt < 0: + raise SynapseError( + 400, + "Query parameter size_gt must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + if self.server_name != server_name: + raise SynapseError(400, "Can only delete local media") + + logging.info("Deleting local media by timestamp: %s", before_ts) + + deleted_media, total = await self.media_repository.delete_old_local_media( + before_ts, size_gt, keep_profiles + ) + return 200, {"deleted_media": deleted_media, "total": total} def register_servlets_for_media_repo(hs, http_server): @@ -188,3 +232,4 @@ def register_servlets_for_media_repo(hs, http_server): QuarantineMediaByUser(hs).register(http_server) ListMediaInRoom(hs).register(http_server) DeleteMediaByID(hs).register(http_server) + DeleteMediaByDateSize(hs).register(http_server) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 5d936fcffd66..72a04e5556e9 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -18,7 +18,7 @@ import logging import os import shutil -from typing import IO, Dict, Optional, Tuple +from typing import IO, Dict, List, Optional, Tuple import twisted.internet.error import twisted.web.http @@ -767,35 +767,79 @@ async def delete_old_remote_media(self, before_ts): return {"deleted": deleted} - async def delete_local_media(self, media_id: str) -> int: + async def delete_local_media(self, media_id: str) -> Tuple[List[str], int]: """ Delete the given media_id from this server Args: media_id: The media ID to delete. Returns: - Number of deleted files. - In this case 1 or 0 + List of deleted media_id + Number of deleted media_id """ logger.info("Deleting local media: %s", media_id) - full_path = self.filepaths.local_media_filepath(media_id) - try: - os.remove(full_path) - except OSError as e: - logger.warning("Failed to remove file: %r: %s", full_path, e) - if e.errno != errno.ENOENT: - return 0 + return await self._remove_local_media_from_disk((media_id,)) + + async def delete_old_local_media( + self, before_ts: int, size_gt: int = 0, keep_profiles: bool = True, + ) -> Tuple[List[str], int]: + """ + Delete old media_id from this server + + Args: + before_ts: Unix timestamp in ms. + Files that were last used before this timestamp will be deleted + size_gt: Size of the media in bytes. Files that are larger will be deleted + keep_profiles: Switch to delete also files that are still used in image data + (e.g user profile, room avatar) + If false thse files will be deleted + Returns: + List of deleted media_id + Number of deleted media_id + """ + old_media = await self.store.get_local_media_before( + before_ts, size_gt, keep_profiles, + ) + logger.info("Deleting local media: %s", old_media) + return await self._remove_local_media_from_disk(old_media) + + async def _remove_local_media_from_disk( + self, media_ids: List[str] + ) -> Tuple[List[str], int]: + """ + Delete old media_id from this server + + Args: + media_ids: List of media_id to delete + Returns: + List of deleted media_id + Number of deleted media_id + """ + removed_media = [] + for media_id in media_ids: + logger.info("Deleting: %s", media_id) + full_path = self.filepaths.local_media_filepath(media_id) + try: + os.remove(full_path) + except OSError as e: + logger.warning("Failed to remove file: %r: %s", full_path, e) + if e.errno == errno.ENOENT: + pass + else: + continue + + thumbnail_dir = self.filepaths.local_media_thumbnail_dir(media_id) + shutil.rmtree(thumbnail_dir, ignore_errors=True) - thumbnail_dir = self.filepaths.local_media_thumbnail_dir(media_id) - shutil.rmtree(thumbnail_dir, ignore_errors=True) + await self.store.delete_remote_media(self.server_name, media_id) - await self.store.delete_remote_media(self.server_name, media_id) + await self.store.delete_url_cache((media_id,)) + await self.store.delete_url_cache_media((media_id,)) - await self.store.delete_url_cache((media_id,)) - await self.store.delete_url_cache_media((media_id,)) + removed_media.append(media_id) - return 1 + return removed_media, len(removed_media) class MediaRepositoryResource(Resource): diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index cc538c5c104f..f7ed28cc3230 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -93,6 +93,7 @@ class MediaRepositoryStore(MediaRepositoryBackgroundUpdateStore): def __init__(self, database: DatabasePool, db_conn, hs): super().__init__(database, db_conn, hs) + self.server_name = hs.hostname async def get_local_media(self, media_id: str) -> Optional[Dict[str, Any]]: """Get the metadata for a local piece of media @@ -115,6 +116,53 @@ async def get_local_media(self, media_id: str) -> Optional[Dict[str, Any]]: desc="get_local_media", ) + async def get_local_media_before( + self, before_ts: int, size_gt: int, keep_profiles: bool, + ) -> Optional[List[str]]: + + sql = """ + SELECT media_id + FROM local_media_repository AS lmr + WHERE last_access_ts < ? and media_length > ? + """ + + if keep_profiles: + sql_keep = """ + AND ( + NOT EXISTS + (SELECT 1 + FROM profiles + WHERE profiles.avatar_url = '{media_prefix}' || lmr.media_id) + AND NOT EXISTS + (SELECT 1 + FROM groups + WHERE groups.avatar_url = '{media_prefix}' || lmr.media_id) + AND NOT EXISTS + (SELECT 1 + FROM room_memberships + WHERE room_memberships.avatar_url = '{media_prefix}' || lmr.media_id) + AND NOT EXISTS + (SELECT 1 + FROM user_directory + WHERE user_directory.avatar_url = '{media_prefix}' || lmr.media_id) + AND NOT EXISTS + (SELECT 1 + FROM room_stats_state + WHERE room_stats_state.avatar = '{media_prefix}' || lmr.media_id) + ) + """.format( + media_prefix="mxc://%s/" % (self.server_name,), + ) + sql += sql_keep + + def _get_local_media_before_txn(txn): + txn.execute(sql, (before_ts, size_gt)) + return [row[0] for row in txn] + + return await self.db_pool.runInteraction( + "get_local_media_before", _get_local_media_before_txn + ) + async def store_local_media( self, media_id, diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index f3e3de84a001..4f65608810af 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -13,12 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json import os from binascii import unhexlify import synapse.rest.admin from synapse.api.errors import Codes -from synapse.rest.client.v1 import login +from synapse.rest.client.v1 import login, profile, room from synapse.rest.media.v1.filepath import MediaFilePaths from tests import unittest @@ -154,7 +155,10 @@ def test_delete_media(self): self.render(request) self.assertEqual(200, channel.code, msg=channel.json_body) - self.assertEqual(1, channel.json_body["deleted"]) + self.assertEqual(1, channel.json_body["total"]) + self.assertEqual( + media_id, channel.json_body["deleted_media"][0], + ) # Attempt to access media request, channel = self.make_request( @@ -176,3 +180,357 @@ def test_delete_media(self): # Test if the file is deleted self.assertFalse(os.path.exists(local_path)) + + +class DeleteMediaByDateSizeTestCase(unittest.HomeserverTestCase): + + servlets = [ + synapse.rest.admin.register_servlets, + synapse.rest.admin.register_servlets_for_media_repo, + login.register_servlets, + profile.register_servlets, + room.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + self.handler = hs.get_device_handler() + self.media_repo = hs.get_media_repository_resource() + self.server_name = hs.hostname + self.clock = hs.clock + + self.admin_user = self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + self.filepaths = MediaFilePaths(hs.config.media_store_path) + self.url = "/_synapse/admin/v1/media/%s/delete" % self.server_name + + def test_no_auth(self): + """ + Try to delete media without authentication. + """ + + request, channel = self.make_request("POST", self.url, b"{}") + self.render(request) + + self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) + + def test_requester_is_no_admin(self): + """ + If the user is not a server admin, an error is returned. + """ + self.other_user = self.register_user("user", "pass") + self.other_user_token = self.login("user", "pass") + + request, channel = self.make_request( + "POST", self.url, access_token=self.other_user_token, + ) + self.render(request) + + self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + def test_media_is_not_local(self): + """ + Tests that a lookup for a media that is not a local returns a 400 + """ + url = "/_synapse/admin/v1/media/%s/delete" % "unknown_domain" + + request, channel = self.make_request( + "POST", url + "?before_ts=1234", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, channel.code, msg=channel.json_body) + self.assertEqual("Can only delete local media", channel.json_body["error"]) + + def test_missing_parameter(self): + """ + If the parameter `before_ts` is missing, an error is returned. + """ + request, channel = self.make_request( + "POST", self.url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.MISSING_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "Missing integer query parameter b'before_ts'", channel.json_body["error"] + ) + + def test_invalid_parameter(self): + """ + If parameters are invalid, an error is returned. + """ + request, channel = self.make_request( + "POST", self.url + "?before_ts=-1234", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "Query parameter before_ts must be a string representing a positive integer.", + channel.json_body["error"], + ) + + request, channel = self.make_request( + "POST", + self.url + "?before_ts=1234&size_gt=-1234", + access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + self.assertEqual( + "Query parameter size_gt must be a string representing a positive integer.", + channel.json_body["error"], + ) + + request, channel = self.make_request( + "POST", + self.url + "?before_ts=1234&keep_profiles=not_bool", + access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.UNKNOWN, channel.json_body["errcode"]) + self.assertEqual( + "Boolean query parameter b'keep_profiles' must be one of ['true', 'false']", + channel.json_body["error"], + ) + + def test_keep_media_by_date(self): + """ + Tests that do not delete a media if is newer than `before_ts` + """ + + # timestamp before upload + now_ms = self.clock.time_msec() + server_and_media_id = self._create_media() + + self._access_media(server_and_media_id) + + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms), + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(0, channel.json_body["total"]) + + self._access_media(server_and_media_id) + + # timestamp after upload + now_ms = self.clock.time_msec() + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms), + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(1, channel.json_body["total"]) + self.assertEqual( + server_and_media_id.split("/")[1], channel.json_body["deleted_media"][0], + ) + + self._access_media(server_and_media_id, False) + + def test_keep_media_by_size(self): + """ + Tests that do not delete a media if is smaller or equal than `size_gt` + """ + server_and_media_id = self._create_media() + + self._access_media(server_and_media_id) + + now_ms = self.clock.time_msec() + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms) + "&size_gt=67", + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(0, channel.json_body["total"]) + + self._access_media(server_and_media_id) + + now_ms = self.clock.time_msec() + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms) + "&size_gt=66", + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(1, channel.json_body["total"]) + self.assertEqual( + server_and_media_id.split("/")[1], channel.json_body["deleted_media"][0], + ) + + self._access_media(server_and_media_id, False) + + def test_keep_media_by_user_avatar(self): + """ + Tests that do not delete a media if is used as user avatar + Tests parameter `keep_profiles` + """ + server_and_media_id = self._create_media() + + self._access_media(server_and_media_id) + + # set media as avatar + request, channel = self.make_request( + "PUT", + "/profile/%s/avatar_url" % (self.admin_user,), + content=json.dumps({"avatar_url": "mxc://%s" % (server_and_media_id,)}), + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + + now_ms = self.clock.time_msec() + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms) + "&keep_profiles=true", + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(0, channel.json_body["total"]) + + self._access_media(server_and_media_id) + + now_ms = self.clock.time_msec() + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms) + "&keep_profiles=false", + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(1, channel.json_body["total"]) + self.assertEqual( + server_and_media_id.split("/")[1], channel.json_body["deleted_media"][0], + ) + + self._access_media(server_and_media_id, False) + + def test_keep_media_by_room_avatar(self): + """ + Tests that do not delete a media if is used as room avatar + Tests parameter `keep_profiles` + """ + server_and_media_id = self._create_media() + + self._access_media(server_and_media_id) + + # set media as room avatar + room_id = self.helper.create_room_as(self.admin_user, tok=self.admin_user_tok) + request, channel = self.make_request( + "PUT", + "/rooms/%s/state/m.room.avatar" % (room_id,), + content=json.dumps({"url": "mxc://%s" % (server_and_media_id,)}), + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + + now_ms = self.clock.time_msec() + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms) + "&keep_profiles=true", + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(0, channel.json_body["total"]) + + self._access_media(server_and_media_id) + + now_ms = self.clock.time_msec() + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms) + "&keep_profiles=false", + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(1, channel.json_body["total"]) + self.assertEqual( + server_and_media_id.split("/")[1], channel.json_body["deleted_media"][0], + ) + + self._access_media(server_and_media_id, False) + + def _create_media(self): + """ + Create a media and return media_id and server_and_media_id + """ + upload_resource = self.media_repo.children[b"upload"] + # file size is 67 Byte + image_data = unhexlify( + b"89504e470d0a1a0a0000000d4948445200000001000000010806" + b"0000001f15c4890000000a49444154789c63000100000500010d" + b"0a2db40000000049454e44ae426082" + ) + + # Upload some media into the room + response = self.helper.upload_media( + upload_resource, image_data, tok=self.admin_user_tok, expect_code=200 + ) + # Extract media ID from the response + server_and_media_id = response["content_uri"][6:] # Cut off 'mxc://' + server_name = server_and_media_id.split("/")[0] + + # Check that new media is a local and not remote + self.assertEqual(server_name, self.server_name) + + return server_and_media_id + + def _access_media(self, server_and_media_id, expect_success=True): + """ + Try to access a media and check the result + """ + download_resource = self.media_repo.children[b"download"] + + media_id = server_and_media_id.split("/")[1] + local_path = self.filepaths.local_media_filepath(media_id) + + request, channel = self.make_request( + "GET", + server_and_media_id, + shorthand=False, + access_token=self.admin_user_tok, + ) + request.render(download_resource) + self.pump(1.0) + + if expect_success: + self.assertEqual( + 200, + channel.code, + msg=( + "Expected to receive a 200 on accessing media: %s" + % server_and_media_id + ), + ) + # Test that the file exists + self.assertTrue(os.path.exists(local_path)) + else: + self.assertEqual( + 404, + channel.code, + msg=( + "Expected to receive a 404 on accessing deleted media: %s" + % (server_and_media_id) + ), + ) + # Test that the file is deleted + self.assertFalse(os.path.exists(local_path)) From e853bc5505e3f1abb2d33e3c56c6b9d51272c76f Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Fri, 16 Oct 2020 21:06:42 +0200 Subject: [PATCH 4/8] tox --- synapse/rest/media/v1/media_repository.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 72a04e5556e9..5b059bd270d0 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -779,7 +779,7 @@ async def delete_local_media(self, media_id: str) -> Tuple[List[str], int]: """ logger.info("Deleting local media: %s", media_id) - return await self._remove_local_media_from_disk((media_id,)) + return await self._remove_local_media_from_disk([media_id]) async def delete_old_local_media( self, before_ts: int, size_gt: int = 0, keep_profiles: bool = True, From be9ee456bba92c6b93bc96d1797d4064483ee43d Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 19 Oct 2020 20:15:52 +0200 Subject: [PATCH 5/8] Apply suggestions from code review - Part 1 Co-authored-by: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> --- docs/admin_api/media_admin_api.md | 14 +++++++------- synapse/rest/admin/media.py | 4 ++-- synapse/rest/media/v1/media_repository.py | 10 ++++++---- 3 files changed, 15 insertions(+), 13 deletions(-) diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md index c7ce6f57ab88..e288fc50674f 100644 --- a/docs/admin_api/media_admin_api.md +++ b/docs/admin_api/media_admin_api.md @@ -129,7 +129,7 @@ Response: ```json { - "deleted_media":[ + "deleted_media": [ "abcdefghijklmnopqrstuvwx" ], "total": 1 @@ -138,7 +138,7 @@ Response: The following fields are returned in the JSON response body: -* ``deleted_media``: list of strings - List of deleted ``media_id`` +* ``deleted_media``: an array of strings - List of deleted ``media_id`` * ``total``: integer - Total number of deleted ``media_id`` ## Delete local media by date or size @@ -153,21 +153,21 @@ POST /_synapse/admin/v1/media//delete?before_ts= URL Parameters -* ``server_name``: string - The name of your local server (e.g ``matrix.org``) +* ``server_name``: string - The name of your local server (e.g ``matrix.org``). * ``before_ts``: string representing a positive integer - Unix timestamp in ms. Files that were last used before this timestamp will be deleted. It is the timestamp of last access and not the timestamp creation. * ``size_gt``: Optional - string representing a positive integer - Size of the media in bytes. Files that are larger will be deleted. Defaults to ``0``. -* ``keep_profiles``: Optional- string representing a boolean - Switch to delete also files +* ``keep_profiles``: Optional - string representing a boolean - Switch to also delete files that are still used in image data (e.g user profile, room avatar). -If ``false`` thse files will be deleted. Defaults to ``true``. +If ``false`` these files will be deleted. Defaults to ``true``. Response: ```json { - "deleted_media":[ + "deleted_media": [ "abcdefghijklmnopqrstuvwx", "abcdefghijklmnopqrstuvwz" ], @@ -177,5 +177,5 @@ Response: The following fields are returned in the JSON response body: -* ``deleted_media``: list of strings - List of deleted ``media_id`` +* ``deleted_media``: an array of strings - List of deleted ``media_id`` * ``total``: integer - Total number of deleted ``media_id`` diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index 527b6dd72317..f415c22a954b 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -179,8 +179,8 @@ async def on_DELETE(self, request, server_name: str, media_id: str): class DeleteMediaByDateSize(RestServlet): - """Delete local media by timestamp and size. - Removes it from this server. + """Delete local media and local copies of remote media by + timestamp and size. """ PATTERNS = admin_patterns("/media/(?P[^/]+)/delete") diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 5b059bd270d0..603ee891a7ba 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -769,7 +769,7 @@ async def delete_old_remote_media(self, before_ts): async def delete_local_media(self, media_id: str) -> Tuple[List[str], int]: """ - Delete the given media_id from this server + Delete the given local or remote media ID from this server Args: media_id: The media ID to delete. @@ -785,7 +785,8 @@ async def delete_old_local_media( self, before_ts: int, size_gt: int = 0, keep_profiles: bool = True, ) -> Tuple[List[str], int]: """ - Delete old media_id from this server + Delete local or remote media from this server by size and timestamp. Removes + media files, any thumbnails and cached URLs. Args: before_ts: Unix timestamp in ms. @@ -793,7 +794,7 @@ async def delete_old_local_media( size_gt: Size of the media in bytes. Files that are larger will be deleted keep_profiles: Switch to delete also files that are still used in image data (e.g user profile, room avatar) - If false thse files will be deleted + If false these files will be deleted Returns: List of deleted media_id Number of deleted media_id @@ -808,7 +809,8 @@ async def _remove_local_media_from_disk( self, media_ids: List[str] ) -> Tuple[List[str], int]: """ - Delete old media_id from this server + Delete local or remote media from this server. Removes media files, + any thumbnails and cached URLs. Args: media_ids: List of media_id to delete From 542a11e0bea68bfe1e4d0022846dc9e3861998c7 Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 19 Oct 2020 21:07:59 +0200 Subject: [PATCH 6/8] Apply suggestions from code review - Part 2 --- changelog.d/8519.feature | 2 +- docs/admin_api/media_admin_api.md | 26 +++++++++++------------ synapse/rest/admin/media.py | 5 ++++- synapse/rest/media/v1/media_repository.py | 12 +++-------- tests/rest/admin/test_media.py | 11 +++++----- 5 files changed, 27 insertions(+), 29 deletions(-) diff --git a/changelog.d/8519.feature b/changelog.d/8519.feature index b0fde1a9b562..e2ab54868106 100644 --- a/changelog.d/8519.feature +++ b/changelog.d/8519.feature @@ -1 +1 @@ -Add an admin api `DELETE /_synapse/admin/v1/media//` to delete a single file from server. Contributed by @dklimpel. \ No newline at end of file +Add an admin api to delete a single file or files were not used for a defined time from server. Contributed by @dklimpel. \ No newline at end of file diff --git a/docs/admin_api/media_admin_api.md b/docs/admin_api/media_admin_api.md index e288fc50674f..3994e1f1a988 100644 --- a/docs/admin_api/media_admin_api.md +++ b/docs/admin_api/media_admin_api.md @@ -110,7 +110,7 @@ media repositories (e.g https://github.com/turt2live/matrix-media-repo/). See also [purge_remote_media.rst](purge_remote_media.rst). ## Delete a specific local media -Delete a specific ``media_id``. +Delete a specific `media_id`. Request: @@ -122,8 +122,8 @@ DELETE /_synapse/admin/v1/media// URL Parameters -* ``server_name``: string - The name of your local server (e.g ``matrix.org``) -* ``media_id``: string - The ID of the media (e.g ``abcdefghijklmnopqrstuvwx``) +* `server_name`: string - The name of your local server (e.g `matrix.org`) +* `media_id`: string - The ID of the media (e.g `abcdefghijklmnopqrstuvwx`) Response: @@ -138,8 +138,8 @@ Response: The following fields are returned in the JSON response body: -* ``deleted_media``: an array of strings - List of deleted ``media_id`` -* ``total``: integer - Total number of deleted ``media_id`` +* `deleted_media`: an array of strings - List of deleted `media_id` +* `total`: integer - Total number of deleted `media_id` ## Delete local media by date or size @@ -153,15 +153,15 @@ POST /_synapse/admin/v1/media//delete?before_ts= URL Parameters -* ``server_name``: string - The name of your local server (e.g ``matrix.org``). -* ``before_ts``: string representing a positive integer - Unix timestamp in ms. +* `server_name`: string - The name of your local server (e.g `matrix.org`). +* `before_ts`: string representing a positive integer - Unix timestamp in ms. Files that were last used before this timestamp will be deleted. It is the timestamp of last access and not the timestamp creation. -* ``size_gt``: Optional - string representing a positive integer - Size of the media in bytes. -Files that are larger will be deleted. Defaults to ``0``. -* ``keep_profiles``: Optional - string representing a boolean - Switch to also delete files +* `size_gt`: Optional - string representing a positive integer - Size of the media in bytes. +Files that are larger will be deleted. Defaults to `0`. +* `keep_profiles`: Optional - string representing a boolean - Switch to also delete files that are still used in image data (e.g user profile, room avatar). -If ``false`` these files will be deleted. Defaults to ``true``. +If `false` these files will be deleted. Defaults to `true`. Response: @@ -177,5 +177,5 @@ Response: The following fields are returned in the JSON response body: -* ``deleted_media``: an array of strings - List of deleted ``media_id`` -* ``total``: integer - Total number of deleted ``media_id`` +* `deleted_media`: an array of strings - List of deleted `media_id` +* `total`: integer - Total number of deleted `media_id` diff --git a/synapse/rest/admin/media.py b/synapse/rest/admin/media.py index f415c22a954b..ba50cb876d41 100644 --- a/synapse/rest/admin/media.py +++ b/synapse/rest/admin/media.py @@ -214,7 +214,10 @@ async def on_POST(self, request, server_name: str): if self.server_name != server_name: raise SynapseError(400, "Can only delete local media") - logging.info("Deleting local media by timestamp: %s", before_ts) + logging.info( + "Deleting local media by timestamp: %s, size larger than: %s, keep profile media: %s" + % (before_ts, size_gt, keep_profiles) + ) deleted_media, total = await self.media_repository.delete_old_local_media( before_ts, size_gt, keep_profiles diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 603ee891a7ba..a15bd61f5cac 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -774,11 +774,8 @@ async def delete_local_media(self, media_id: str) -> Tuple[List[str], int]: Args: media_id: The media ID to delete. Returns: - List of deleted media_id - Number of deleted media_id + A tuple of (list of deleted media IDs, total deleted media IDs). """ - logger.info("Deleting local media: %s", media_id) - return await self._remove_local_media_from_disk([media_id]) async def delete_old_local_media( @@ -796,13 +793,11 @@ async def delete_old_local_media( (e.g user profile, room avatar) If false these files will be deleted Returns: - List of deleted media_id - Number of deleted media_id + A tuple of (list of deleted media IDs, total deleted media IDs). """ old_media = await self.store.get_local_media_before( before_ts, size_gt, keep_profiles, ) - logger.info("Deleting local media: %s", old_media) return await self._remove_local_media_from_disk(old_media) async def _remove_local_media_from_disk( @@ -815,8 +810,7 @@ async def _remove_local_media_from_disk( Args: media_ids: List of media_id to delete Returns: - List of deleted media_id - Number of deleted media_id + A tuple of (list of deleted media IDs, total deleted media IDs). """ removed_media = [] for media_id in media_ids: diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index 4f65608810af..03276b99d08d 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -232,7 +232,7 @@ def test_requester_is_no_admin(self): def test_media_is_not_local(self): """ - Tests that a lookup for a media that is not a local returns a 400 + Tests that a lookup for media that is not local returns a 400 """ url = "/_synapse/admin/v1/media/%s/delete" % "unknown_domain" @@ -305,7 +305,7 @@ def test_invalid_parameter(self): def test_keep_media_by_date(self): """ - Tests that do not delete a media if is newer than `before_ts` + Tests that media is not deleted if it is newer than `before_ts` """ # timestamp before upload @@ -343,7 +343,8 @@ def test_keep_media_by_date(self): def test_keep_media_by_size(self): """ - Tests that do not delete a media if is smaller or equal than `size_gt` + Tests that media is not deleted if its size is smaller than or equal + to `size_gt` """ server_and_media_id = self._create_media() @@ -378,7 +379,7 @@ def test_keep_media_by_size(self): def test_keep_media_by_user_avatar(self): """ - Tests that do not delete a media if is used as user avatar + Tests that we do not delete media if is used as a user avatar Tests parameter `keep_profiles` """ server_and_media_id = self._create_media() @@ -424,7 +425,7 @@ def test_keep_media_by_user_avatar(self): def test_keep_media_by_room_avatar(self): """ - Tests that do not delete a media if is used as room avatar + Tests that we do not delete media if it is used as a room avatar Tests parameter `keep_profiles` """ server_and_media_id = self._create_media() From 1cb5c44f93a59cdbf96d21b881c1a06aeeb731dd Mon Sep 17 00:00:00 2001 From: dklimpel <5740567+dklimpel@users.noreply.github.com> Date: Mon, 19 Oct 2020 22:17:27 +0200 Subject: [PATCH 7/8] Add query and test for media that is never accessed --- .../databases/main/media_repository.py | 9 ++++-- tests/rest/admin/test_media.py | 31 +++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/synapse/storage/databases/main/media_repository.py b/synapse/storage/databases/main/media_repository.py index f7ed28cc3230..7ef5f1bf2b96 100644 --- a/synapse/storage/databases/main/media_repository.py +++ b/synapse/storage/databases/main/media_repository.py @@ -120,10 +120,15 @@ async def get_local_media_before( self, before_ts: int, size_gt: int, keep_profiles: bool, ) -> Optional[List[str]]: + # to find files that have never been accessed (last_access_ts IS NULL) + # compare with `created_ts` sql = """ SELECT media_id FROM local_media_repository AS lmr - WHERE last_access_ts < ? and media_length > ? + WHERE + ( last_access_ts < ? + OR ( created_ts < ? AND last_access_ts IS NULL ) ) + AND media_length > ? """ if keep_profiles: @@ -156,7 +161,7 @@ async def get_local_media_before( sql += sql_keep def _get_local_media_before_txn(txn): - txn.execute(sql, (before_ts, size_gt)) + txn.execute(sql, (before_ts, before_ts, size_gt)) return [row[0] for row in txn] return await self.db_pool.runInteraction( diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index 03276b99d08d..d86916d71349 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -303,6 +303,37 @@ def test_invalid_parameter(self): channel.json_body["error"], ) + def test_delete_media_never_accessed(self): + """ + Tests that media deleted if it is older than `before_ts` and never accessed + `last_access_ts` is `NULL` and `created_ts` < `before_ts` + """ + + # upload an do not access + server_and_media_id = self._create_media() + self.pump(1.0) + + # test that the file exists + media_id = server_and_media_id.split("/")[1] + local_path = self.filepaths.local_media_filepath(media_id) + self.assertTrue(os.path.exists(local_path)) + + # timestamp after upload/create + now_ms = self.clock.time_msec() + request, channel = self.make_request( + "POST", + self.url + "?before_ts=" + str(now_ms), + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(1, channel.json_body["total"]) + self.assertEqual( + media_id, channel.json_body["deleted_media"][0], + ) + + self._access_media(server_and_media_id, False) + def test_keep_media_by_date(self): """ Tests that media is not deleted if it is newer than `before_ts` From 3b9a69147edbff0ff593707964b030ef007232c9 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 26 Oct 2020 17:01:03 +0000 Subject: [PATCH 8/8] Apply suggestions from code review --- synapse/rest/media/v1/media_repository.py | 2 +- tests/rest/admin/test_media.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index a15bd61f5cac..5cce7237a063 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -814,7 +814,7 @@ async def _remove_local_media_from_disk( """ removed_media = [] for media_id in media_ids: - logger.info("Deleting: %s", media_id) + logger.info("Deleting media with ID '%s'", media_id) full_path = self.filepaths.local_media_filepath(media_id) try: os.remove(full_path) diff --git a/tests/rest/admin/test_media.py b/tests/rest/admin/test_media.py index d86916d71349..721fa1ed51af 100644 --- a/tests/rest/admin/test_media.py +++ b/tests/rest/admin/test_media.py @@ -309,7 +309,7 @@ def test_delete_media_never_accessed(self): `last_access_ts` is `NULL` and `created_ts` < `before_ts` """ - # upload an do not access + # upload and do not access server_and_media_id = self._create_media() self.pump(1.0)