Skip to content

Commit

Permalink
Soft device affinity implementation.
Browse files Browse the repository at this point in the history
Now that shards as tags are being sent down on perf jobs, this code can stop sending
specific device ids, but instead smartly tries to allocate perf jobs to bots based on
to following:

1) what bot we last triggered it on by querying swarming for a list of tasks based
on the dimensions and shard
2) what bots are currently alive by querying swarming for all bots with the given
dimensions and checking that they are not quarantined and not is_dead.

This requires that the dimensions for perf hardware is unique, which it will be with
the new set of devices that will run this recipe.

Bug: 831252
Cq-Include-Trybots: luci.chromium.try:android_optional_gpu_tests_rel;luci.chromium.try:linux_optional_gpu_tests_rel;luci.chromium.try:mac_optional_gpu_tests_rel;luci.chromium.try:win_optional_gpu_tests_rel
Change-Id: I2bfb708a1bb65dbdf85c85b976c463605fb28335
Reviewed-on: https://chromium-review.googlesource.com/1017304
Reviewed-by: Marc-Antoine Ruel <maruel@chromium.org>
Reviewed-by: Ned Nguyen <nednguyen@google.com>
Reviewed-by: Kenneth Russell <kbr@chromium.org>
Commit-Queue: Emily Hanley <eyaich@chromium.org>
Cr-Commit-Position: refs/heads/master@{#554807}
  • Loading branch information
Emily Hanley authored and Commit Bot committed Apr 30, 2018
1 parent a2e3f7a commit 681d1d4
Show file tree
Hide file tree
Showing 7 changed files with 497 additions and 192 deletions.
16 changes: 4 additions & 12 deletions testing/buildbot/chromium.perf.fyi.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
"can_use_on_swarming_builders": true,
"dimension_sets": [
{
"device_os": "O",
"device_type": "gobo",
"os": "Android",
"pool": "chrome.tests.perf-fyi"
}
Expand All @@ -36,8 +38,6 @@
},
"trigger_script": {
"args": [
"--multiple-trigger-configs",
"[{\"id\": \"build30-a7--device1\"}, {\"id\": \"build30-a7--device2\"}, {\"id\": \"build30-a7--device3\"}, {\"id\": \"build30-a7--device4\"}, {\"id\": \"build30-a7--device5\"}, {\"id\": \"build30-a7--device6\"}, {\"id\": \"build30-a7--device7\"}, {\"id\": \"build31-a7--device1\"}, {\"id\": \"build31-a7--device2\"}, {\"id\": \"build31-a7--device3\"}, {\"id\": \"build31-a7--device4\"}, {\"id\": \"build31-a7--device5\"}, {\"id\": \"build31-a7--device6\"}, {\"id\": \"build31-a7--device7\"}]",
"--multiple-dimension-script-verbose",
"True"
],
Expand Down Expand Up @@ -484,8 +484,6 @@
},
"trigger_script": {
"args": [
"--multiple-trigger-configs",
"[{\"id\": \"build246-a9\"}]",
"--multiple-dimension-script-verbose",
"True"
],
Expand Down Expand Up @@ -528,8 +526,6 @@
},
"trigger_script": {
"args": [
"--multiple-trigger-configs",
"[{\"id\": \"build246-a9\"}, {\"id\": \"build247-a9\"}, {\"id\": \"build248-a9\"}, {\"id\": \"build249-a9\"}, {\"id\": \"build250-a9\"}, {\"id\": \"build251-a9\"}, {\"id\": \"build252-a9\"}, {\"id\": \"build253-a9\"}, {\"id\": \"build254-a9\"}, {\"id\": \"build255-a9\"}, {\"id\": \"build256-a9\"}, {\"id\": \"build257-a9\"}, {\"id\": \"build258-a9\"}, {\"id\": \"build259-a9\"}, {\"id\": \"build260-a9\"}, {\"id\": \"build261-a9\"}, {\"id\": \"build262-a9\"}, {\"id\": \"build263-a9\"}, {\"id\": \"build264-a9\"}, {\"id\": \"build265-a9\"}, {\"id\": \"build266-a9\"}, {\"id\": \"build267-a9\"}, {\"id\": \"build268-a9\"}, {\"id\": \"build269-a9\"}, {\"id\": \"build270-a9\"}, {\"id\": \"build271-a9\"}]",
"--multiple-dimension-script-verbose",
"True"
],
Expand Down Expand Up @@ -571,8 +567,6 @@
},
"trigger_script": {
"args": [
"--multiple-trigger-configs",
"[{\"id\": \"build247-a9\"}]",
"--multiple-dimension-script-verbose",
"True"
],
Expand Down Expand Up @@ -636,6 +630,7 @@
"can_use_on_swarming_builders": true,
"dimension_sets": [
{
"gpu": "none",
"os": "Linux",
"pool": "chrome.tests.perf-fyi"
}
Expand All @@ -649,8 +644,6 @@
},
"trigger_script": {
"args": [
"--multiple-trigger-configs",
"[{\"id\": \"swarm77-c7\"}]",
"--multiple-dimension-script-verbose",
"True"
],
Expand Down Expand Up @@ -681,6 +674,7 @@
"can_use_on_swarming_builders": true,
"dimension_sets": [
{
"gpu": "none",
"os": "Linux",
"pool": "chrome.tests.perf-fyi"
}
Expand All @@ -694,8 +688,6 @@
},
"trigger_script": {
"args": [
"--multiple-trigger-configs",
"[{\"id\": \"swarm77-c7\"}, {\"id\": \"swarm78-c7\"}, {\"id\": \"swarm79-c7\"}]",
"--multiple-dimension-script-verbose",
"True"
],
Expand Down
42 changes: 21 additions & 21 deletions testing/buildbot/chromium.perf.json
Original file line number Diff line number Diff line change
Expand Up @@ -50559,10 +50559,10 @@
},
"trigger_script": {
"args": [
"--multiple-trigger-configs",
"[{\"id\": \"build67-a7\"}]",
"--multiple-dimension-script-verbose",
"True"
"True",
"--multiple-trigger-configs",
"[{\"id\": \"build67-a7\"}]"
],
"script": "//testing/trigger_scripts/perf_device_trigger.py"
}
Expand Down Expand Up @@ -50602,10 +50602,10 @@
},
"trigger_script": {
"args": [
"--multiple-trigger-configs",
"[{\"id\": \"build70-a7\"}]",
"--multiple-dimension-script-verbose",
"True"
"True",
"--multiple-trigger-configs",
"[{\"id\": \"build70-a7\"}]"
],
"script": "//testing/trigger_scripts/perf_device_trigger.py"
}
Expand Down Expand Up @@ -50645,10 +50645,10 @@
},
"trigger_script": {
"args": [
"--multiple-trigger-configs",
"[{\"id\": \"build68-a7\"}]",
"--multiple-dimension-script-verbose",
"True"
"True",
"--multiple-trigger-configs",
"[{\"id\": \"build68-a7\"}]"
],
"script": "//testing/trigger_scripts/perf_device_trigger.py"
}
Expand Down Expand Up @@ -50689,10 +50689,10 @@
},
"trigger_script": {
"args": [
"--multiple-trigger-configs",
"[{\"id\": \"build67-a7\"}, {\"id\": \"build68-a7\"}, {\"id\": \"build69-a7\"}, {\"id\": \"build70-a7\"}, {\"id\": \"build71-a7\"}, {\"id\": \"build72-a7\"}, {\"id\": \"build73-a7\"}, {\"id\": \"build74-a7\"}, {\"id\": \"build75-a7\"}, {\"id\": \"build76-a7\"}, {\"id\": \"build77-a7\"}, {\"id\": \"build78-a7\"}, {\"id\": \"build79-a7\"}, {\"id\": \"build80-a7\"}, {\"id\": \"build81-a7\"}, {\"id\": \"build82-a7\"}, {\"id\": \"build83-a7\"}, {\"id\": \"build84-a7\"}, {\"id\": \"build85-a7\"}, {\"id\": \"build86-a7\"}, {\"id\": \"build87-a7\"}, {\"id\": \"build88-a7\"}, {\"id\": \"build89-a7\"}, {\"id\": \"build90-a7\"}, {\"id\": \"build91-a7\"}, {\"id\": \"build92-a7\"}]",
"--multiple-dimension-script-verbose",
"True"
"True",
"--multiple-trigger-configs",
"[{\"id\": \"build67-a7\"}, {\"id\": \"build68-a7\"}, {\"id\": \"build69-a7\"}, {\"id\": \"build70-a7\"}, {\"id\": \"build71-a7\"}, {\"id\": \"build72-a7\"}, {\"id\": \"build73-a7\"}, {\"id\": \"build74-a7\"}, {\"id\": \"build75-a7\"}, {\"id\": \"build76-a7\"}, {\"id\": \"build77-a7\"}, {\"id\": \"build78-a7\"}, {\"id\": \"build79-a7\"}, {\"id\": \"build80-a7\"}, {\"id\": \"build81-a7\"}, {\"id\": \"build82-a7\"}, {\"id\": \"build83-a7\"}, {\"id\": \"build84-a7\"}, {\"id\": \"build85-a7\"}, {\"id\": \"build86-a7\"}, {\"id\": \"build87-a7\"}, {\"id\": \"build88-a7\"}, {\"id\": \"build89-a7\"}, {\"id\": \"build90-a7\"}, {\"id\": \"build91-a7\"}, {\"id\": \"build92-a7\"}]"
],
"script": "//testing/trigger_scripts/perf_device_trigger.py"
}
Expand Down Expand Up @@ -50732,10 +50732,10 @@
},
"trigger_script": {
"args": [
"--multiple-trigger-configs",
"[{\"id\": \"build69-a7\"}]",
"--multiple-dimension-script-verbose",
"True"
"True",
"--multiple-trigger-configs",
"[{\"id\": \"build69-a7\"}]"
],
"script": "//testing/trigger_scripts/perf_device_trigger.py"
}
Expand Down Expand Up @@ -50779,10 +50779,10 @@
},
"trigger_script": {
"args": [
"--multiple-trigger-configs",
"[{\"id\": \"build41-a7\"}]",
"--multiple-dimension-script-verbose",
"True"
"True",
"--multiple-trigger-configs",
"[{\"id\": \"build41-a7\"}]"
],
"script": "//testing/trigger_scripts/perf_device_trigger.py"
}
Expand Down Expand Up @@ -50823,10 +50823,10 @@
},
"trigger_script": {
"args": [
"--multiple-trigger-configs",
"[{\"id\": \"build41-a7\"}, {\"id\": \"build42-a7\"}, {\"id\": \"build43-a7\"}, {\"id\": \"build44-a7\"}, {\"id\": \"build45-a7\"}, {\"id\": \"build46-a7\"}, {\"id\": \"build47-a7\"}, {\"id\": \"build48-a7\"}, {\"id\": \"build49-a7\"}, {\"id\": \"build50-a7\"}, {\"id\": \"build51-a7\"}, {\"id\": \"build52-a7\"}, {\"id\": \"build53-a7\"}, {\"id\": \"build54-a7\"}, {\"id\": \"build55-a7\"}, {\"id\": \"build56-a7\"}, {\"id\": \"build57-a7\"}, {\"id\": \"build58-a7\"}, {\"id\": \"build59-a7\"}, {\"id\": \"build60-a7\"}, {\"id\": \"build61-a7\"}, {\"id\": \"build62-a7\"}, {\"id\": \"build63-a7\"}, {\"id\": \"build64-a7\"}, {\"id\": \"build65-a7\"}, {\"id\": \"build66-a7\"}]",
"--multiple-dimension-script-verbose",
"True"
"True",
"--multiple-trigger-configs",
"[{\"id\": \"build41-a7\"}, {\"id\": \"build42-a7\"}, {\"id\": \"build43-a7\"}, {\"id\": \"build44-a7\"}, {\"id\": \"build45-a7\"}, {\"id\": \"build46-a7\"}, {\"id\": \"build47-a7\"}, {\"id\": \"build48-a7\"}, {\"id\": \"build49-a7\"}, {\"id\": \"build50-a7\"}, {\"id\": \"build51-a7\"}, {\"id\": \"build52-a7\"}, {\"id\": \"build53-a7\"}, {\"id\": \"build54-a7\"}, {\"id\": \"build55-a7\"}, {\"id\": \"build56-a7\"}, {\"id\": \"build57-a7\"}, {\"id\": \"build58-a7\"}, {\"id\": \"build59-a7\"}, {\"id\": \"build60-a7\"}, {\"id\": \"build61-a7\"}, {\"id\": \"build62-a7\"}, {\"id\": \"build63-a7\"}, {\"id\": \"build64-a7\"}, {\"id\": \"build65-a7\"}, {\"id\": \"build66-a7\"}]"
],
"script": "//testing/trigger_scripts/perf_device_trigger.py"
}
Expand Down
82 changes: 47 additions & 35 deletions testing/trigger_scripts/base_test_triggerer.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,21 +30,18 @@

SWARMING_PY = os.path.join(SRC_DIR, 'tools', 'swarming_client', 'swarming.py')


def strip_unicode(obj):
"""Recursively re-encodes strings as utf-8 inside |obj|. Returns the result.
"""
if isinstance(obj, unicode):
return obj.encode('utf-8', 'replace')

if isinstance(obj, list):
return list(map(strip_unicode, obj))

if isinstance(obj, dict):
new_obj = type(obj)(
(strip_unicode(k), strip_unicode(v)) for k, v in obj.iteritems() )
return new_obj

return obj


Expand Down Expand Up @@ -116,6 +113,36 @@ def parse_bot_configs(self, args):
if not all(isinstance(entry, dict) for entry in self._bot_configs):
raise ValueError('Bot configurations must all be dictionaries')

# TODO(eyaich): Move the stateless logic that is specific to querying
# swarming to its own object to make trigger logic more clear.
def query_swarming(self, api, query_args, verbose,
limit='0',
server='chromium-swarm.appspot.com',
service_account=None):
try:
temp_file = self.make_temp_file(prefix='base_trigger_dimensions',
suffix='.json')
encoded_args = urllib.urlencode(query_args)
args =['query',
'-S',
server,
'--limit',
limit,
'--json',
temp_file]
# Add in service account auth if present
if service_account:
args.append('--auth-service-account-json')
args.append(service_account)
# Append the query at the end
args.append(('%s?%s' % (api, encoded_args)))
ret = self.run_swarming(args, verbose)
if ret:
raise Exception('Error running swarming.py')
return self.read_encoded_json_from_temp_file(temp_file)
finally:
self.delete_temp_file(temp_file)

def query_swarming_for_bot_configs(self, verbose):
# Query Swarming to figure out which bots are available.
for config in self._bot_configs:
Expand All @@ -125,34 +152,16 @@ def query_swarming_for_bot_configs(self, verbose):
# Ignore dead and quarantined bots.
values.append(('is_dead', 'FALSE'))
values.append(('quarantined', 'FALSE'))
query_arg = urllib.urlencode(values)

temp_file = self.make_temp_file(prefix='base_trigger_dimensions',
suffix='.json')
try:
ret = self.run_swarming(['query',
'-S',
'chromium-swarm.appspot.com',
'--limit',
'0',
'--json',
temp_file,
('bots/count?%s' % query_arg)],
verbose)
if ret:
raise Exception('Error running swarming.py')
with open(temp_file) as fp:
query_result = strip_unicode(json.load(fp))
# Summarize number of available bots per configuration.
count = int(query_result['count'])
# Be robust against errors in computation.
available = max(0, count - int(query_result['busy']))
self._bot_statuses.append({'total': count, 'available': available})
if verbose:
idx = len(self._bot_statuses) - 1
print 'Bot config %d: %s' % (idx, str(self._bot_statuses[idx]))
finally:
self.delete_temp_file(temp_file)
query_result = self.query_swarming('bots/count', values, verbose)
# Summarize number of available bots per configuration.
count = int(query_result['count'])
# Be robust against errors in computation.
available = max(0, count - int(query_result['busy']))
self._bot_statuses.append({'total': count, 'available': available})
if verbose:
idx = len(self._bot_statuses) - 1
print 'Bot config %d: %s' % (idx, str(self._bot_statuses[idx]))
# Sum up the total count of all bots.
self._total_bots = sum(x['total'] for x in self._bot_statuses)
if verbose:
Expand All @@ -178,6 +187,9 @@ def read_json_from_temp_file(self, temp_file):
with open(temp_file) as f:
return json.load(f)

def read_encoded_json_from_temp_file(self, temp_file):
return strip_unicode(self.read_json_from_temp_file(temp_file))

def write_json_to_file(self, merged_json, output_file):
with open(output_file, 'w') as f:
json.dump(merged_json, f)
Expand Down Expand Up @@ -266,13 +278,13 @@ def trigger_tasks(self, args, remaining):
self.write_json_to_file(merged_json, args.dump_json)
return 0


def setup_parser_contract(self, parser):
parser.add_argument('--multiple-trigger-configs', type=str, required=True,
@staticmethod
def setup_parser_contract(parser):
parser.add_argument('--multiple-trigger-configs', type=str, required=False,
help='The Swarming configurations to trigger tasks on, '
'in the form of a JSON array of dictionaries (these are'
' Swarming dimension_sets). At least one entry in this '
'dictionary is required.')
' Swarming dimension_sets). At least one entry is'
'required if you dont override parse_bot_configs')
parser.add_argument('--multiple-dimension-script-verbose', type=bool,
default=False, help='Turn on verbose logging')
parser.add_argument('--dump-json', required=True,
Expand Down
Loading

0 comments on commit 681d1d4

Please sign in to comment.