Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Performance improvements and refactor of Ratelimiter #7595

Merged
merged 33 commits into from
Jun 5, 2020
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
4f715be
Refactor and comment ratelimiting. Set limits in constructor
anoadragon453 May 28, 2020
0e6ee7c
Ratelimiters are instantiated by the HomeServer class
anoadragon453 May 28, 2020
82eac22
Modify servlets to pull Ratelimiters from HomeServer class
anoadragon453 May 28, 2020
a0ef594
Update unittests
anoadragon453 May 28, 2020
6a07c2d
lint
anoadragon453 May 28, 2020
c322ba0
changelog
anoadragon453 May 28, 2020
f6203a6
Make rate_hz and burst_count overridable per-request
anoadragon453 May 29, 2020
1f6156b
Set clock with constructor, store rate_hz per key again
anoadragon453 Jun 1, 2020
c236806
Instantiate Ratelimiters in respective classes
anoadragon453 Jun 1, 2020
470de6e
Use patch for the Ratelimiter in some tests. Set using config in others
anoadragon453 Jun 1, 2020
515a186
Update copyright header
anoadragon453 Jun 1, 2020
87ab836
Remove resolved question
anoadragon453 Jun 1, 2020
56c52a5
lint
anoadragon453 Jun 1, 2020
2d7e087
lint, mypy
anoadragon453 Jun 1, 2020
a566b46
Remove unittest.DEBUG statement
anoadragon453 Jun 1, 2020
41c7288
Update changelog.d/7595.misc
anoadragon453 Jun 2, 2020
58d4919
Remove erroneous print statement
anoadragon453 Jun 2, 2020
aa1f4c3
Merge branch 'anoa/ratelimit_config_perf' of github.com:matrix-org/sy…
anoadragon453 Jun 2, 2020
39b484b
Move update after optional method arguments
anoadragon453 Jun 2, 2020
d727bed
Make it obvious that time_now_s is just for testing
anoadragon453 Jun 2, 2020
9f76a8d
Update ratelimiter calling methods and tests
anoadragon453 Jun 2, 2020
8867900
No need to re-check for None in can_do_action
anoadragon453 Jun 2, 2020
ef7383f
time_now_s is used in ratelimit
anoadragon453 Jun 3, 2020
189c01b
Comment changes revolving around time_allowed
anoadragon453 Jun 3, 2020
4a88edb
Fix missed call to self.rate_hz
anoadragon453 Jun 3, 2020
14a0af5
Test Ratelimiter ratelimit method and param overrides
anoadragon453 Jun 3, 2020
c145c81
Back out some changes.
clokep Jun 4, 2020
12b4d47
Do not specify ratelimiters in tests when unnecessary.
clokep Jun 4, 2020
d84d779
Update timestamp comment
anoadragon453 Jun 4, 2020
45a7791
Clean up Exception raising assertion
anoadragon453 Jun 4, 2020
3899589
Clean up and split out tests
anoadragon453 Jun 4, 2020
08c5114
Remove _ = style
anoadragon453 Jun 4, 2020
9beee5f
Merge branch 'anoa/ratelimit_config_perf' of github.com:matrix-org/sy…
anoadragon453 Jun 4, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/7595.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Refactor `Ratelimiter` to limit the amount of expensive config value accesses.
154 changes: 118 additions & 36 deletions synapse/api/ratelimiting.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Copyright 2014-2016 OpenMarket Ltd
# Copyright 2020 The Matrix.org Foundation C.I.C.
clokep marked this conversation as resolved.
Show resolved Hide resolved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -16,75 +17,156 @@
from typing import Any, Optional, Tuple

from synapse.api.errors import LimitExceededError
from synapse.util import Clock


class Ratelimiter(object):
"""
Ratelimit message sending by user.
Ratelimit actions marked by arbitrary keys.

Args:
clock: A homeserver clock, for retrieving the current time
rate_hz: The long term number of actions that can be performed in a second.
burst_count: How many actions that can be performed before being limited.
"""

def __init__(self):
self.message_counts = (
OrderedDict()
) # type: OrderedDict[Any, Tuple[float, int, Optional[float]]]
def __init__(self, clock: Clock, rate_hz: float, burst_count: int):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I debated two things here:

  • Making rate_hz / burst_count optional so we don't have the case where we define them as 0 to override them each time.
  • Having two rate limiters (with one being a sub-class of the other) for the two situations (per key limit vs. per rate limiter limit).

I think these are both just bikeshedding at this point though and it isn't worth poking at more. I just wanted to write them down!

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Neat ideas, will keep them in mind if we decide to do another refactor at some point. Thanks!

self.clock = clock
self.rate_hz = rate_hz
self.burst_count = burst_count

# A ordered dictionary keeping track of actions, when they were last
# performed and how often. Each entry is a mapping from a key of arbitrary type
# to a tuple representing:
# * How many times an action has occurred since a point in time
# * The point in time
# * The rate_hz of this particular entry. This can vary per request
self.actions = OrderedDict() # type: OrderedDict[Any, Tuple[float, int, float]]

def can_do_action(self, key, time_now_s, rate_hz, burst_count, update=True):
def can_do_action(
self,
key: Any,
rate_hz: Optional[float] = None,
burst_count: Optional[int] = None,
update: bool = True,
_time_now_s: Optional[int] = None,
) -> Tuple[bool, float]:
"""Can the entity (e.g. user or IP address) perform the action?

Args:
key: The key we should use when rate limiting. Can be a user ID
(when sending events), an IP address, etc.
time_now_s: The time now.
rate_hz: The long term number of messages a user can send in a
second.
burst_count: How many messages the user can send before being
limited.
update (bool): Whether to update the message rates or not. This is
useful to check if a message would be allowed to be sent before
its ready to be actually sent.
rate_hz: The long term number of actions that can be performed in a second.
Overrides the value set during instantiation if set.
burst_count: How many actions that can be performed before being limited.
Overrides the value set during instantiation if set.
update: Whether to count this check as performing the action
_time_now_s: The current time. Optional, defaults to the current time according
to self.clock. Only used by tests.

Returns:
A pair of a bool indicating if they can send a message now and a
time in seconds of when they can next send a message.
A tuple containing:
* A bool indicating if they can perform the action now
* The time in seconds of when it can next be performed.
-1 if rate_hz is less than or equal to zero
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved
"""
self.prune_message_counts(time_now_s)
message_count, time_start, _ignored = self.message_counts.get(
key, (0.0, time_now_s, None)
)
# Override default values if set
time_now_s = _time_now_s if _time_now_s is not None else self.clock.time()
rate_hz = rate_hz if rate_hz is not None else self.rate_hz
burst_count = burst_count if burst_count is not None else self.burst_count

# Remove any expired entries
self._prune_message_counts(time_now_s)

# Check if there is an existing count entry for this key
action_count, time_start, _ = self.actions.get(key, (0.0, time_now_s, 0.0))
anoadragon453 marked this conversation as resolved.
Show resolved Hide resolved

# Check whether performing another action is allowed
time_delta = time_now_s - time_start
sent_count = message_count - time_delta * rate_hz
if sent_count < 0:
performed_count = action_count - time_delta * rate_hz
if performed_count < 0:
# Allow, reset back to count 1
allowed = True
time_start = time_now_s
message_count = 1.0
elif sent_count > burst_count - 1.0:
action_count = 1.0
elif performed_count > burst_count - 1.0:
# Deny, we have exceeded our burst count
allowed = False
else:
# We haven't reached our limit yet
allowed = True
message_count += 1
action_count += 1.0

if update:
self.message_counts[key] = (message_count, time_start, rate_hz)
self.actions[key] = (action_count, time_start, rate_hz)

if self.rate_hz > 0:
# Find out when the count of existing actions expires
time_allowed = time_start + (action_count - burst_count + 1) / rate_hz

if rate_hz > 0:
time_allowed = time_start + (message_count - burst_count + 1) / rate_hz
# Don't give back a time in the past
if time_allowed < time_now_s:
time_allowed = time_now_s
else:
# XXX: Why is this -1? This seems to only be used in
# self.ratelimit. I guess so that clients get a time in the past and don't
# feel afraid to try again immediately
Copy link
Member Author

@anoadragon453 anoadragon453 Jun 3, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note: I'm tempted to resolve this, but I don't want to change the behaviour of Ratelimiter in this PR any more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that this sounds like a follow-up!

time_allowed = -1

return allowed, time_allowed

def prune_message_counts(self, time_now_s):
for key in list(self.message_counts.keys()):
message_count, time_start, rate_hz = self.message_counts[key]
def _prune_message_counts(self, time_now_s: int):
"""Remove message count entries that have not exceeded their defined
rate_hz limit

Args:
time_now_s: The current time
"""
# We create a copy of the key list here as the dictionary is modified during
# the loop
for key in list(self.actions.keys()):
action_count, time_start, rate_hz = self.actions[key]

# Rate limit = "seconds since we started limiting this action" * rate_hz
# If this limit has not been exceeded, wipe our record of this action
time_delta = time_now_s - time_start
if message_count - time_delta * rate_hz > 0:
break
if action_count - time_delta * rate_hz > 0:
continue
else:
del self.message_counts[key]
del self.actions[key]

def ratelimit(
self,
key: Any,
rate_hz: Optional[float] = None,
burst_count: Optional[int] = None,
update: bool = True,
_time_now_s: Optional[int] = None,
):
"""Checks if an action can be performed. If not, raises a LimitExceededError

Args:
key: An arbitrary key used to classify an action
rate_hz: The long term number of actions that can be performed in a second.
Overrides the value set during instantiation if set.
burst_count: How many actions that can be performed before being limited.
Overrides the value set during instantiation if set.
update: Whether to count this check as performing the action
_time_now_s: The current time. Optional, defaults to the current time according
to self.clock. Only used by tests.

Raises:
LimitExceededError: If an action could not be performed, along with the time in
milliseconds until the action can be performed again
"""
time_now_s = _time_now_s if _time_now_s is not None else self.clock.time()

def ratelimit(self, key, time_now_s, rate_hz, burst_count, update=True):
allowed, time_allowed = self.can_do_action(
key, time_now_s, rate_hz, burst_count, update
key,
rate_hz=rate_hz,
burst_count=burst_count,
update=update,
_time_now_s=time_now_s,
)

if not allowed:
Expand Down
8 changes: 7 additions & 1 deletion synapse/config/ratelimiting.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Dict

from ._base import Config


class RateLimitConfig(object):
def __init__(self, config, defaults={"per_second": 0.17, "burst_count": 3.0}):
def __init__(
self,
config: Dict[str, float],
defaults={"per_second": 0.17, "burst_count": 3.0},
):
self.per_second = config.get("per_second", defaults["per_second"])
self.burst_count = config.get("burst_count", defaults["burst_count"])

Expand Down
60 changes: 29 additions & 31 deletions synapse/handlers/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import synapse.types
from synapse.api.constants import EventTypes, Membership
from synapse.api.errors import LimitExceededError
from synapse.api.ratelimiting import Ratelimiter
from synapse.types import UserID

logger = logging.getLogger(__name__)
Expand All @@ -44,11 +44,26 @@ def __init__(self, hs):
self.notifier = hs.get_notifier()
self.state_handler = hs.get_state_handler()
self.distributor = hs.get_distributor()
self.ratelimiter = hs.get_ratelimiter()
self.admin_redaction_ratelimiter = hs.get_admin_redaction_ratelimiter()
self.clock = hs.get_clock()
self.hs = hs

# The rate_hz and burst_count are overridden on a per-user basis
self.request_ratelimiter = Ratelimiter(
clock=self.clock, rate_hz=0, burst_count=0
)
self._rc_message = self.hs.config.rc_message

# Check whether ratelimiting room admin message redaction is enabled
# by the presence of rate limits in the config
if self.hs.config.rc_admin_redaction:
self.admin_redaction_ratelimiter = Ratelimiter(
clock=self.clock,
rate_hz=self.hs.config.rc_admin_redaction.per_second,
burst_count=self.hs.config.rc_admin_redaction.burst_count,
)
else:
self.admin_redaction_ratelimiter = None

self.server_name = hs.hostname

self.event_builder_factory = hs.get_event_builder_factory()
Expand All @@ -70,7 +85,6 @@ def ratelimit(self, requester, update=True, is_admin_redaction=False):
Raises:
LimitExceededError if the request should be ratelimited
"""
time_now = self.clock.time()
user_id = requester.user.to_string()

# The AS user itself is never rate limited.
Expand All @@ -83,48 +97,32 @@ def ratelimit(self, requester, update=True, is_admin_redaction=False):
if requester.app_service and not requester.app_service.is_rate_limited():
return

messages_per_second = self._rc_message.per_second
burst_count = self._rc_message.burst_count

# Check if there is a per user override in the DB.
override = yield self.store.get_ratelimit_for_user(user_id)
if override:
# If overriden with a null Hz then ratelimiting has been entirely
# If overridden with a null Hz then ratelimiting has been entirely
# disabled for the user
if not override.messages_per_second:
return

messages_per_second = override.messages_per_second
burst_count = override.burst_count

if is_admin_redaction and self.admin_redaction_ratelimiter:
# If we have separate config for admin redactions, use a separate
# ratelimiter as to not have user_ids clash
self.admin_redaction_ratelimiter.ratelimit(user_id, update=update)
else:
# We default to different values if this is an admin redaction and
# the config is set
if is_admin_redaction and self.hs.config.rc_admin_redaction:
messages_per_second = self.hs.config.rc_admin_redaction.per_second
burst_count = self.hs.config.rc_admin_redaction.burst_count
else:
messages_per_second = self.hs.config.rc_message.per_second
burst_count = self.hs.config.rc_message.burst_count

if is_admin_redaction and self.hs.config.rc_admin_redaction:
# If we have separate config for admin redactions we use a separate
# ratelimiter
allowed, time_allowed = self.admin_redaction_ratelimiter.can_do_action(
user_id,
time_now,
rate_hz=messages_per_second,
burst_count=burst_count,
update=update,
)
else:
allowed, time_allowed = self.ratelimiter.can_do_action(
# Override rate and burst count per-user
self.request_ratelimiter.ratelimit(
user_id,
time_now,
rate_hz=messages_per_second,
burst_count=burst_count,
update=update,
)
if not allowed:
raise LimitExceededError(
retry_after_ms=int(1000 * (time_allowed - time_now))
)

async def maybe_kick_guest_users(self, event, context=None):
# Technically this function invalidates current_state by changing it.
Expand Down
24 changes: 8 additions & 16 deletions synapse/handlers/auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,11 @@ def __init__(self, hs):

# Ratelimiter for failed auth during UIA. Uses same ratelimit config
# as per `rc_login.failed_attempts`.
self._failed_uia_attempts_ratelimiter = Ratelimiter()
self._failed_uia_attempts_ratelimiter = Ratelimiter(
clock=self.clock,
rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
)

self._clock = self.hs.get_clock()

Expand Down Expand Up @@ -196,13 +200,7 @@ async def validate_user_via_ui_auth(
user_id = requester.user.to_string()

# Check if we should be ratelimited due to too many previous failed attempts
self._failed_uia_attempts_ratelimiter.ratelimit(
user_id,
time_now_s=self._clock.time(),
rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
update=False,
)
self._failed_uia_attempts_ratelimiter.ratelimit(user_id, update=False)

# build a list of supported flows
flows = [[login_type] for login_type in self._supported_ui_auth_types]
Expand All @@ -212,14 +210,8 @@ async def validate_user_via_ui_auth(
flows, request, request_body, clientip, description
)
except LoginError:
# Update the ratelimite to say we failed (`can_do_action` doesn't raise).
self._failed_uia_attempts_ratelimiter.can_do_action(
user_id,
time_now_s=self._clock.time(),
rate_hz=self.hs.config.rc_login_failed_attempts.per_second,
burst_count=self.hs.config.rc_login_failed_attempts.burst_count,
update=True,
)
# Update the ratelimiter to say we failed (`can_do_action` doesn't raise).
self._failed_uia_attempts_ratelimiter.can_do_action(user_id)
raise

# find the completed login type
Expand Down
1 change: 0 additions & 1 deletion synapse/handlers/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,6 @@ def __init__(self, hs):
self.profile_handler = hs.get_profile_handler()
self.event_builder_factory = hs.get_event_builder_factory()
self.server_name = hs.hostname
self.ratelimiter = hs.get_ratelimiter()
self.notifier = hs.get_notifier()
self.config = hs.config
self.require_membership_for_aliases = hs.config.require_membership_for_aliases
Expand Down
9 changes: 1 addition & 8 deletions synapse/handlers/register.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,14 +425,7 @@ def check_registration_ratelimit(self, address):
if not address:
return

time_now = self.clock.time()

self.ratelimiter.ratelimit(
address,
time_now_s=time_now,
rate_hz=self.hs.config.rc_registration.per_second,
burst_count=self.hs.config.rc_registration.burst_count,
)
self.ratelimiter.ratelimit(address)

def register_with_store(
self,
Expand Down
Loading