Skip to content

Commit

Permalink
Add lint to ensure all test files have headers with ownership info (p…
Browse files Browse the repository at this point in the history
…ytorch#66826)

Summary:
UPDATE: CI should be green now with the added files.

This should fail for now, but will pass when all action for pytorch#66232 is done.

Example failure run: https://github.com/pytorch/pytorch/runs/4052881947?check_suite_focus=true

Pull Request resolved: pytorch#66826

Reviewed By: seemethere

Differential Revision: D32087209

Pulled By: janeyx99

fbshipit-source-id: ad4b51e46de54f23aebacd592ee67577869f8bb6
  • Loading branch information
janeyx99 authored and facebook-github-bot committed Nov 4, 2021
1 parent 2766662 commit 88d86de
Show file tree
Hide file tree
Showing 12 changed files with 201 additions and 1 deletion.
71 changes: 71 additions & 0 deletions .github/scripts/export_pytorch_labels.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env python3
'''
Test ownership was introduced in https://github.com/pytorch/pytorch/issues/66232.
As a part of enforcing test ownership, we want to maintain a list of existing PyTorch labels
to verify the owners' existence. This script outputs a file containing a list of existing
pytorch/pytorch labels so that the file could be uploaded to S3.
This script assumes the correct env vars are set for AWS permissions.
'''

import boto3 # type: ignore[import]
import json
from functools import lru_cache
from typing import List, Any
from urllib.request import urlopen, Request

# Modified from https://github.com/pytorch/pytorch/blob/b00206d4737d1f1e7a442c9f8a1cadccd272a386/torch/hub.py#L129
def _read_url(url: Any) -> Any:
with urlopen(url) as r:
return r.headers, r.read().decode(r.headers.get_content_charset('utf-8'))


def request_for_labels(url: str) -> Any:
headers = {'Accept': 'application/vnd.github.v3+json'}
return _read_url(Request(url, headers=headers))


def get_last_page(header: Any) -> int:
# Link info looks like: <https://api.github.com/repositories/65600975/labels?per_page=100&page=2>;
# rel="next", <https://api.github.com/repositories/65600975/labels?per_page=100&page=3>; rel="last"
link_info = header['link']
prefix = "&page="
suffix = ">;"
return int(link_info[link_info.rindex(prefix) + len(prefix):link_info.rindex(suffix)])


def update_labels(labels: List[str], info: str) -> None:
labels_json = json.loads(info)
labels.extend([x["name"] for x in labels_json])


@lru_cache()
def get_pytorch_labels() -> List[str]:
prefix = "https://api.github.com/repos/pytorch/pytorch/labels?per_page=100"
header, info = request_for_labels(prefix + "&page=1")
labels: List[str] = []
update_labels(labels, info)

last_page = get_last_page(header)
assert last_page > 0, "Error reading header info to determine total number of pages of labels"
for page_number in range(2, last_page + 1): # skip page 1
_, info = request_for_labels(prefix + f"&page={page_number}")
update_labels(labels, info)

return labels


def send_labels_to_S3(labels: List[str]) -> None:
labels_file_name = "pytorch_labels.json"
obj = boto3.resource('s3').Object('ossci-metrics', labels_file_name)
obj.put(Body=json.dumps(labels).encode())


def main() -> None:
send_labels_to_S3(get_pytorch_labels())


if __name__ == '__main__':
main()
88 changes: 88 additions & 0 deletions .github/scripts/lint_test_ownership.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#!/usr/bin/env python3
'''
Test ownership was introduced in https://github.com/pytorch/pytorch/issues/66232.
This lint verifies that every Python test file (file that matches test_*.py or *_test.py in the test folder)
has valid ownership information in a comment header. Valid means:
- The format of the header follows the pattern "# Owner(s): ["list", "of owner", "labels"]
- Each owner label actually exists in PyTorch
- Each owner label starts with "module: " or "oncall: " or is in ACCEPTABLE_OWNER_LABELS
This file is expected to run in the root directory of pytorch/pytorch.
'''
import boto3 # type: ignore[import]
import botocore # type: ignore[import]
import fnmatch
import json
import sys
from pathlib import Path
from typing import List, Any


# Team/owner labels usually start with "module: " or "oncall: ", but the following are acceptable exceptions
ACCEPTABLE_OWNER_LABELS = ["NNC", "high priority"]
GLOB_EXCEPTIONS = [
"test/run_test.py"
]

PYTORCH_ROOT = Path(__file__).resolve().parent.parent.parent
TEST_DIR = PYTORCH_ROOT / "test"
CURRENT_FILE_NAME = Path(__file__).resolve().relative_to(PYTORCH_ROOT)

S3_RESOURCE_READ_ONLY = boto3.resource("s3", config=botocore.config.Config(signature_version=botocore.UNSIGNED))


def get_all_test_files() -> List[Path]:
test_files = list(TEST_DIR.glob("**/test_*.py"))
test_files.extend(list(TEST_DIR.glob("**/*_test.py")))
return [f for f in test_files if any([fnmatch.fnmatch(str(f), g) for g in GLOB_EXCEPTIONS])]


def get_pytorch_labels() -> Any:
bucket = S3_RESOURCE_READ_ONLY.Bucket("ossci-metrics")
summaries = bucket.objects.filter(Prefix="pytorch_labels.json")
for summary in summaries:
labels = summary.get()["Body"].read()
return json.loads(labels)


# Returns a string denoting the error invalidating the label OR an empty string if nothing is wrong
def validate_label(label: str, pytorch_labels: List[str]) -> str:
if label not in pytorch_labels:
return f"{label} is not a PyTorch label (please choose from https://github.com/pytorch/pytorch/labels)"
if label.startswith("module:") or label.startswith("oncall:") or label in ACCEPTABLE_OWNER_LABELS:
return ""
return f"{label} is not an acceptable owner (please update to another label or edit ACCEPTABLE_OWNERS_LABELS " \
"in {CURRENT_FILE_NAME}"


# Returns a string denoting the error invalidating the file OR an empty string if nothing is wrong
def validate_file(filename: Path, pytorch_labels: List[str]) -> str:
prefix = "# Owner(s): "
relative_name = Path(filename).relative_to(PYTORCH_ROOT)
with open(filename) as f:
for line in f.readlines():
if line.startswith(prefix):
labels = json.loads(line[len(prefix):])
labels_msgs = [validate_label(label, pytorch_labels) for label in labels]
file_msg = ", ".join([x for x in labels_msgs if x != ""])
return f"{relative_name}: {file_msg}" if file_msg != "" else ""
return f"{relative_name}: missing a comment header with ownership information."


def main() -> None:
test_file_paths = get_all_test_files()
pytorch_labels = get_pytorch_labels()

file_msgs = [validate_file(f, pytorch_labels) for f in test_file_paths]
err_msg = "\n".join([x for x in file_msgs if x != ""])
if err_msg != "":
err_msg = err_msg + "\n\nIf you see files with missing ownership information above, " \
"please add the following line\n\n# Owner(s): [\"<owner: label>\"]\n\nto the top of each test file. " \
"The owner should be an existing pytorch/pytorch label."
print(err_msg)
sys.exit(1)


if __name__ == '__main__':
main()
3 changes: 2 additions & 1 deletion .github/workflows/lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ jobs:
- name: Ensure all test files have header containing ownership information
if: always()
run: |
(! git grep -L "# Owner(s): \[" ./test/distributed/**/test_*.py || (printf "The above test files are missing a comment header with ownership information; please add the following line\n\n# Owner(s): [\"<owner: label>\"]\n\nto the top of each test file. The owner should be an existing pytorch/pytorch label."; false))
python3 -m pip install boto3==1.16.34
.github/scripts/lint_test_ownership.py
clang-format:
runs-on: ubuntu-18.04
Expand Down
24 changes: 24 additions & 0 deletions .github/workflows/update_pytorch_labels.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Update PyTorch Labels in S3

on:
label:
workflow_dispatch:

concurrency:
group: 1
cancel-in-progress: true

jobs:
update-labels-in-S3:
runs-on: ubuntu-18.04
if: ${{ github.repository == 'pytorch/pytorch' }}
steps:
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
- name: Update PyTorch labels list in S3
env:
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_S3_UPDATE_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_S3_UPDATE_SECRET_ACCESS_KEY }}
run: |
python3 -m pip install boto3==1.16.34
.github/scripts/export_pytorch_labels.py
2 changes: 2 additions & 0 deletions test/benchmark_utils/test_benchmark_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Owner(s): ["module: unknown"]

import collections
import json
import os
Expand Down
2 changes: 2 additions & 0 deletions test/mobile/test_lite_script_type.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Owner(s): ["oncall: mobile"]

import torch
import torch.utils.bundled_inputs
import io
Expand Down
2 changes: 2 additions & 0 deletions test/onnx/model_defs/op_test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Owner(s): ["module: onnx"]

import torch
import torch.nn as nn

Expand Down
2 changes: 2 additions & 0 deletions test/package/package_a/test_nn_module.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Owner(s): ["oncall: package/deploy"]

import torch


Expand Down
2 changes: 2 additions & 0 deletions test/package/test_load_bc_packages.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Owner(s): ["oncall: package/deploy"]

from pathlib import Path
from unittest import skipIf

Expand Down
2 changes: 2 additions & 0 deletions test/test_bundled_images.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/usr/bin/env python3
# Owner(s): ["module: unknown"]

import torch
import torch.utils.bundled_inputs
import io
Expand Down
2 changes: 2 additions & 0 deletions test/test_bundled_inputs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/usr/bin/env python3
# Owner(s): ["oncall: mobile"]

import io
import textwrap
from typing import List, Optional, Dict
Expand Down
2 changes: 2 additions & 0 deletions test/test_jit_autocast.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# Owner(s): ["oncall: jit"]

import torch
from torch.cuda.amp import autocast
from typing import Optional
Expand Down

0 comments on commit 88d86de

Please sign in to comment.