Skip to content

Commit

Permalink
Builds: set scale-in protection before/after each build (#10507)
Browse files Browse the repository at this point in the history
* Builds: set scale-in protection before/after each build

We use AWS scale-in protection to protect our instances from being scaled-in
automatically while running a build. This avoids killing the build in the middle
while it's running.

Besides, it could help us to be more aggressive to scale-in if we consider
without degrading user experience.

Related readthedocs/readthedocs-ops#1324

* Apply suggestions from code review

Co-authored-by: Anthony <aj@ohess.org>

* Build: scale-in under a feature flag

This way we will be able to have more control and enable/disable this without
requiring a deploy in case that something goes wrong.

* Typo

* Test: mock `update_docs_task` on a test that doesn't use it

---------

Co-authored-by: Anthony <aj@ohess.org>
  • Loading branch information
humitos and agjohnson authored Jul 10, 2023
1 parent 6ccdcb0 commit 3a9befe
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 2 deletions.
5 changes: 5 additions & 0 deletions readthedocs/projects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1937,6 +1937,7 @@ def add_features(sender, **kwargs):
# Build related features
HOSTING_INTEGRATIONS = "hosting_integrations"
NO_CONFIG_FILE_DEPRECATED = "no_config_file"
SCALE_IN_PROTECTION = "scale_in_prtection"

FEATURES = (
(
Expand Down Expand Up @@ -2067,6 +2068,10 @@ def add_features(sender, **kwargs):
NO_CONFIG_FILE_DEPRECATED,
_("Build: Building without a configuration file is deprecated."),
),
(
SCALE_IN_PROTECTION,
_("Build: Set scale-in protection before/after building."),
),
)

FEATURES = sorted(FEATURES, key=lambda l: l[1])
Expand Down
25 changes: 24 additions & 1 deletion readthedocs/projects/tasks/builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
ProjectBuildsSkippedError,
YAMLParseError,
)
from readthedocs.projects.models import Feature
from readthedocs.storage import build_media_storage
from readthedocs.telemetry.collectors import BuildDataCollector
from readthedocs.telemetry.tasks import save_build_data
Expand All @@ -67,7 +68,12 @@
from ..signals import before_vcs
from .mixins import SyncRepositoryMixin
from .search import fileify
from .utils import BuildRequest, clean_build, send_external_build_status
from .utils import (
BuildRequest,
clean_build,
send_external_build_status,
set_builder_scale_in_protection,
)

log = structlog.get_logger(__name__)

Expand Down Expand Up @@ -418,6 +424,16 @@ def before_start(self, task_id, args, kwargs):
version_slug=self.data.version.slug,
)

# Enable scale-in protection on this instance
#
# TODO: move this to the beginning of this method
# once we don't need to rely on `self.data.project`.
if self.data.project.has_feature(Feature.SCALE_IN_PROTECTION):
set_builder_scale_in_protection.delay(
instance=socket.gethostname(),
protected_from_scale_in=True,
)

# Clean the build paths completely to avoid conflicts with previous run
# (e.g. cleanup task failed for some reason)
clean_build(self.data.version)
Expand Down Expand Up @@ -728,6 +744,13 @@ def after_return(self, status, retval, task_id, args, kwargs, einfo):
except Exception:
log.exception("Failed to revoke build api key.", exc_info=True)

# Disable scale-in protection on this instance
if self.data.project.has_feature(Feature.SCALE_IN_PROTECTION):
set_builder_scale_in_protection.delay(
instance=socket.gethostname(),
protected_from_scale_in=False,
)

log.info(
'Build finished.',
length=self.data.build['length'],
Expand Down
45 changes: 45 additions & 0 deletions readthedocs/projects/tasks/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import datetime
import os
import re

import boto3
import structlog
from celery.worker.request import Request
from django.conf import settings
Expand Down Expand Up @@ -321,6 +323,49 @@ def deprecated_config_file_used_notification():
)


@app.task(queue="web")
def set_builder_scale_in_protection(instance, protected_from_scale_in):
"""
Set scale-in protection on this builder ``instance``.
This way, AWS will not scale-in this instance while it's building the documentation.
This is pretty useful for long running tasks.
"""
log.bind(instance=instance, protected_from_scale_in=protected_from_scale_in)

if settings.DEBUG or settings.RTD_DOCKER_COMPOSE:
log.info(
"Running development environment. Skipping scale-in protection.",
)
return

asg = boto3.client(
"autoscaling",
aws_access_key_id=settings.RTD_AWS_SCALE_IN_ACCESS_KEY,
aws_secret_access_key=settings.RTD_AWS_SCALE_IN_SECRET_ACCESS_KEY,
region_name=settings.RTD_AWS_SCALE_IN_REGION_NAME,
)

# web-extra-i-0c3e866c4e323928f
hostname_match = re.match(r"([a-z\-]+)-(i-[a-f0-9]+)", instance)
if not hostname_match:
log.warning(
"Unable to set scale-in protection. Hostname name matching not found.",
)
return
scaling_group, instance_id = hostname_match.groups()

# Set protection on instance
try:
asg.set_instance_protection(
InstanceIds=[instance_id],
AutoScalingGroupName=scaling_group,
ProtectedFromScaleIn=protected_from_scale_in,
)
except Exception:
log.exception("Failed when trying to set instance protection.")


class BuildRequest(Request):

def on_timeout(self, soft, timeout):
Expand Down
4 changes: 3 additions & 1 deletion readthedocs/rtd_tests/tests/test_project_forms.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from unittest import mock

from django.contrib.auth.models import User
from django.test import TestCase
Expand Down Expand Up @@ -258,8 +259,9 @@ def test_can_update_privacy_level(self):
self.assertTrue(form.is_valid())
self.assertEqual(self.project.privacy_level, PRIVATE)

@mock.patch("readthedocs.projects.tasks.builds.update_docs_task")
@override_settings(ALLOW_PRIVATE_REPOS=False)
def test_custom_readthedocs_yaml(self):
def test_custom_readthedocs_yaml(self, update_docs_task):
custom_readthedocs_yaml_path = "folder/.readthedocs.yaml"
form = ProjectAdvancedForm(
{
Expand Down

0 comments on commit 3a9befe

Please sign in to comment.