Skip to content

Commit

Permalink
Add script to display history for a single test across multiple jobs …
Browse files Browse the repository at this point in the history
…over time (pytorch#52000)

Summary:
Adapted from this gist: https://gist.github.com/malfet/1c34f261a28ae7af61210174394eaece

Pull Request resolved: pytorch#52000

Test Plan: Example shell session here: https://pastebin.com/HYgWZBFB

Reviewed By: walterddr

Differential Revision: D26372191

Pulled By: samestep

fbshipit-source-id: cdc9a27e1b4a0b3123a70e693b17d524e7c6cb95
  • Loading branch information
samestep authored and facebook-github-bot committed Feb 11, 2021
1 parent 8908874 commit deb74ed
Show file tree
Hide file tree
Showing 3 changed files with 242 additions and 0 deletions.
1 change: 1 addition & 0 deletions mypy-strict.ini
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ strict_equality = True
files = tools/codegen/gen.py,
tools/autograd/*.py,
tools/pyi/*.py,
tools/test_history.py,
torch/testing/_internal/mypy_wrapper.py,
torch/testing/_internal/print_test_stats.py,
torch/utils/benchmark/utils/common.py,
Expand Down
2 changes: 2 additions & 0 deletions tools/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ Developer tools which you might find useful:
can conveniently run diffs on them when working on code-generation.
(See also [generated_dirs.txt](generated_dirs.txt) which
specifies the list of directories with generated files.)
* [test_history.py](test_history.py) - Query S3 to display history of a single
test across multiple jobs over time.

Important if you want to run on AMD GPU:

Expand Down
239 changes: 239 additions & 0 deletions tools/test_history.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
#!/usr/bin/env python3

import argparse
import bz2
import json
import subprocess
from datetime import datetime
from typing import Any, Dict, List, Optional, Tuple

import boto3 # type: ignore[import]


def get_git_commit_history(
*,
path: str,
ref: str
) -> List[Tuple[str, datetime]]:
rc = subprocess.check_output(
['git', '-C', path, 'log', '--pretty=format:%H %ct', ref],
).decode("latin-1")
return [
(x[0], datetime.fromtimestamp(int(x[1])))
for x in [line.split(" ") for line in rc.split("\n")]
]


def get_ossci_jsons(
*,
bucket: Any,
sha: str,
jobs: Optional[List[str]]
) -> Dict[str, Any]:
prefix = f"test_time/{sha}/"
objs: List[Any]
if jobs is None:
objs = list(bucket.objects.filter(Prefix=prefix))
else:
objs = []
for job in jobs:
objs.extend(list(bucket.objects.filter(Prefix=f"{prefix}{job}/")))
# initial pass to avoid downloading more than necessary
# in the case where there are multiple reports for a single sha+job
uniqueified = {obj.key.split('/')[2]: obj for obj in objs}
return {
job: json.loads(bz2.decompress(obj.get()['Body'].read()))
for job, obj in uniqueified.items()
}


def get_case(
*,
data: Any,
suite_name: str,
test_name: str,
) -> Optional[Dict[str, Any]]:
suite = data.get('suites', {}).get(suite_name)
if suite:
testcase_times = {
case['name']: case
for case in suite['cases']
}
return testcase_times.get(test_name)
return None


def case_status(case: Dict[str, Any]) -> Optional[str]:
for k in {'errored', 'failed', 'skipped'}:
if case[k]:
return k
return None


def make_column(
*,
data: Any,
suite_name: str,
test_name: str,
digits: int,
) -> str:
decimals = 3
num_length = digits + 1 + decimals
case = get_case(data=data, suite_name=suite_name, test_name=test_name)
if case:
status = case_status(case)
if status:
return f'{status.rjust(num_length)} '
else:
return f'{case["seconds"]:{num_length}.{decimals}f}s'
return ' ' * (num_length + 1)


def make_columns(
*,
bucket: Any,
sha: str,
jobs: List[str],
suite_name: str,
test_name: str,
digits: int,
) -> str:
jsons = get_ossci_jsons(bucket=bucket, sha=sha, jobs=jobs)
return ' '.join(
make_column(
data=jsons.get(job, {}),
suite_name=suite_name,
test_name=test_name,
digits=digits,
)
for job in jobs
)


def make_lines(
*,
bucket: Any,
sha: str,
jobs: Optional[List[str]],
suite_name: str,
test_name: str,
) -> List[str]:
jsons = get_ossci_jsons(bucket=bucket, sha=sha, jobs=jobs)
lines = []
for job, data in jsons.items():
case = get_case(data=data, suite_name=suite_name, test_name=test_name)
if case:
status = case_status(case)
lines.append(f'{job} {case["seconds"]} {status or ""}')
return lines


def display_history(
*,
bucket: Any,
commits: List[Tuple[str, datetime]],
jobs: Optional[List[str]],
suite_name: str,
test_name: str,
delta: int,
mode: str,
digits: int,
) -> None:
prev_time = datetime.now()
for sha, time in commits:
if (prev_time - time).total_seconds() < delta * 3600:
continue
prev_time = time
lines: List[str]
if mode == 'columns':
assert jobs is not None
lines = [make_columns(
bucket=bucket,
sha=sha,
jobs=jobs,
suite_name=suite_name,
test_name=test_name,
digits=digits,
)]
else:
assert mode == 'multiline'
lines = make_lines(
bucket=bucket,
sha=sha,
jobs=jobs,
suite_name=suite_name,
test_name=test_name,
)
for line in lines:
print(f"{time} {sha} {line}".rstrip())


if __name__ == "__main__":
parser = argparse.ArgumentParser(
__file__,
description='Display the history of a test.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
'mode',
choices=['columns', 'multiline'],
help='output format',
)
parser.add_argument(
'--pytorch',
help='path to local PyTorch clone',
default='.',
)
parser.add_argument(
'--ref',
help='starting point (most recent Git ref) to display history for',
default='master',
)
parser.add_argument(
'--delta',
type=int,
help='minimum number of hours between rows',
default=12,
)
parser.add_argument(
'--digits',
type=int,
help='(columns) number of digits to display before the decimal point',
default=4,
)
parser.add_argument(
'--all',
action='store_true',
help='(multiline) ignore listed jobs, show all jobs for each commit',
)
parser.add_argument(
'suite',
help='name of the suite containing the test',
)
parser.add_argument(
'test',
help='name of the test',
)
parser.add_argument(
'job',
nargs='*',
help='names of jobs to display columns for, in order',
default=[],
)
args = parser.parse_args()

commits = get_git_commit_history(path=args.pytorch, ref=args.ref)

s3 = boto3.resource("s3")
bucket = s3.Bucket('ossci-metrics')

display_history(
bucket=bucket,
commits=commits,
jobs=None if args.all else args.job,
suite_name=args.suite,
test_name=args.test,
delta=args.delta,
mode=args.mode,
digits=args.digits,
)

0 comments on commit deb74ed

Please sign in to comment.