Skip to content

Commit

Permalink
Add queue / instance group registration to heartbeat for k8s installs
Browse files Browse the repository at this point in the history
There is some history here.

ansible#7190 <- This PR was an attempt at fixing a
bug notting ran into where some jobs on k8s installs would get stuck in Waiting
forever.

The PR mentioned above introduced a bug where there are no instance groups on a
fresh k8s-based install. This is because this process currently happens in the
launch scripts, before the database is up.

With this patch, queue / instance group registration happens in the heartbeat,
right after auto-registering the instance.
  • Loading branch information
shanemcd committed Jun 6, 2020
1 parent 4a5edf7 commit 91dbc2d
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 50 deletions.
105 changes: 58 additions & 47 deletions awx/main/management/commands/register_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,44 +16,37 @@ def __init__(self, message, changed, *args, **kwargs):
super(InstanceNotFound, self).__init__(*args, **kwargs)


class Command(BaseCommand):

def add_arguments(self, parser):
parser.add_argument('--queuename', dest='queuename', type=str,
help='Queue to create/update')
parser.add_argument('--hostnames', dest='hostnames', type=str,
help='Comma-Delimited Hosts to add to the Queue (will not remove already assigned instances)')
parser.add_argument('--controller', dest='controller', type=str,
default='', help='The controlling group (makes this an isolated group)')
parser.add_argument('--instance_percent', dest='instance_percent', type=int, default=0,
help='The percentage of active instances that will be assigned to this group'),
parser.add_argument('--instance_minimum', dest='instance_minimum', type=int, default=0,
help='The minimum number of instance that will be retained for this group from available instances')


def get_create_update_instance_group(self, queuename, instance_percent, instance_min):
class RegisterQueue:
def __init__(self, queuename, controller, instance_percent, inst_min, hostname_list):
self.instance_not_found_err = None
self.queuename = queuename
self.controller = controller
self.instance_percent = instance_percent
self.instance_min = inst_min
self.hostname_list = hostname_list

def get_create_update_instance_group(self):
created = False
changed = False

(ig, created) = InstanceGroup.objects.get_or_create(name=queuename)
if ig.policy_instance_percentage != instance_percent:
ig.policy_instance_percentage = instance_percent
(ig, created) = InstanceGroup.objects.get_or_create(name=self.queuename)
if ig.policy_instance_percentage != self.instance_percent:
ig.policy_instance_percentage = self.instance_percent
changed = True
if ig.policy_instance_minimum != instance_min:
ig.policy_instance_minimum = instance_min
if ig.policy_instance_minimum != self.instance_min:
ig.policy_instance_minimum = self.instance_min
changed = True

if changed:
ig.save()

return (ig, created, changed)

def update_instance_group_controller(self, ig, controller):
def update_instance_group_controller(self, ig):
changed = False
control_ig = None

if controller:
control_ig = InstanceGroup.objects.filter(name=controller).first()
if self.controller:
control_ig = InstanceGroup.objects.filter(name=self.controller).first()

if control_ig and ig.controller_id != control_ig.pk:
ig.controller = control_ig
Expand All @@ -62,10 +55,10 @@ def update_instance_group_controller(self, ig, controller):

return (control_ig, changed)

def add_instances_to_group(self, ig, hostname_list):
def add_instances_to_group(self, ig):
changed = False

instance_list_unique = set([x.strip() for x in hostname_list if x])
instance_list_unique = set([x.strip() for x in self.hostname_list if x])
instances = []
for inst_name in instance_list_unique:
instance = Instance.objects.filter(hostname=inst_name)
Expand All @@ -86,43 +79,61 @@ def add_instances_to_group(self, ig, hostname_list):

return (instances, changed)

def handle(self, **options):
instance_not_found_err = None
queuename = options.get('queuename')
if not queuename:
raise CommandError("Specify `--queuename` to use this command.")
ctrl = options.get('controller')
inst_per = options.get('instance_percent')
inst_min = options.get('instance_minimum')
hostname_list = []
if options.get('hostnames'):
hostname_list = options.get('hostnames').split(",")

def register(self):
with advisory_lock('cluster_policy_lock'):
with transaction.atomic():
changed2 = False
changed3 = False
(ig, created, changed1) = self.get_create_update_instance_group(queuename, inst_per, inst_min)
(ig, created, changed1) = self.get_create_update_instance_group()
if created:
print("Creating instance group {}".format(ig.name))
elif not created:
print("Instance Group already registered {}".format(ig.name))

if ctrl:
(ig_ctrl, changed2) = self.update_instance_group_controller(ig, ctrl)
if self.controller:
(ig_ctrl, changed2) = self.update_instance_group_controller(ig)
if changed2:
print("Set controller group {} on {}.".format(ctrl, queuename))
print("Set controller group {} on {}.".format(self.controller, self.queuename))

try:
(instances, changed3) = self.add_instances_to_group(ig, hostname_list)
(instances, changed3) = self.add_instances_to_group(ig)
for i in instances:
print("Added instance {} to {}".format(i.hostname, ig.name))
except InstanceNotFound as e:
instance_not_found_err = e
self.instance_not_found_err = e

if any([changed1, changed2, changed3]):
print('(changed: True)')

if instance_not_found_err:
print(instance_not_found_err.message)

class Command(BaseCommand):

def add_arguments(self, parser):
parser.add_argument('--queuename', dest='queuename', type=str,
help='Queue to create/update')
parser.add_argument('--hostnames', dest='hostnames', type=str,
help='Comma-Delimited Hosts to add to the Queue (will not remove already assigned instances)')
parser.add_argument('--controller', dest='controller', type=str,
default='', help='The controlling group (makes this an isolated group)')
parser.add_argument('--instance_percent', dest='instance_percent', type=int, default=0,
help='The percentage of active instances that will be assigned to this group'),
parser.add_argument('--instance_minimum', dest='instance_minimum', type=int, default=0,
help='The minimum number of instance that will be retained for this group from available instances')


def handle(self, **options):
queuename = options.get('queuename')
if not queuename:
raise CommandError("Specify `--queuename` to use this command.")
ctrl = options.get('controller')
inst_per = options.get('instance_percent')
instance_min = options.get('instance_minimum')
hostname_list = []
if options.get('hostnames'):
hostname_list = options.get('hostnames').split(",")

rq = RegisterQueue(queuename, ctrl, inst_per, instance_min, hostname_list)
rq.register()
if rq.instance_not_found_err:
print(rq.instance_not_found_err.message)
sys.exit(1)
5 changes: 4 additions & 1 deletion awx/main/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,11 @@ def register(self, uuid=None, hostname=None, ip_address=None):

def get_or_register(self):
if settings.AWX_AUTO_DEPROVISION_INSTANCES:
from awx.main.management.commands.register_queue import RegisterQueue
pod_ip = os.environ.get('MY_POD_IP')
return self.register(ip_address=pod_ip)
registered = self.register(ip_address=pod_ip)
RegisterQueue('tower', None, 100, 0, []).register()
return registered
else:
return (False, self.me())

Expand Down
4 changes: 2 additions & 2 deletions installer/roles/image_build/templates/launch_awx_task.sh.j2
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ ANSIBLE_REMOTE_TEMP=/tmp ANSIBLE_LOCAL_TEMP=/tmp ansible -i "127.0.0.1," -c loca

if [ -z "$AWX_SKIP_MIGRATIONS" ]; then
awx-manage migrate --noinput
awx-manage provision_instance --hostname=$(hostname)
awx-manage register_queue --queuename=tower --instance_percent=100
fi

if [ ! -z "$AWX_ADMIN_USER" ]&&[ ! -z "$AWX_ADMIN_PASSWORD" ]; then
Expand All @@ -21,8 +23,6 @@ if [ ! -z "$AWX_ADMIN_USER" ]&&[ ! -z "$AWX_ADMIN_PASSWORD" ]; then
{% endif %}
fi
echo 'from django.conf import settings; x = settings.AWX_TASK_ENV; x["HOME"] = "/var/lib/awx"; settings.AWX_TASK_ENV = x' | awx-manage shell
awx-manage provision_instance --hostname=$(hostname)
awx-manage register_queue --queuename=tower --instance_percent=100

unset $(cut -d = -f -1 /etc/tower/conf.d/environment.sh)

Expand Down

0 comments on commit 91dbc2d

Please sign in to comment.