Skip to content

Commit

Permalink
[FEA] changelog generator (#599)
Browse files Browse the repository at this point in the history
* changelog generator

Signed-off-by: Peixin Li <pxli@nyu.edu>
  • Loading branch information
pxLi authored Sep 1, 2020
1 parent abeb5bb commit 78f2519
Showing 1 changed file with 329 additions and 0 deletions.
329 changes: 329 additions & 0 deletions scripts/generate-changelog
Original file line number Diff line number Diff line change
@@ -0,0 +1,329 @@
#!/usr/bin/env python

# Copyright (c) 2020, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""A simple changelog generator
This tool takes list of release versions to generate `CHANGELOG.md`.
The changelog will include all merged PRs w/o `[bot]` postfix,
and issues that include the labels `bug`, `feature request`, `SQL`, `performance` and `shuffle`,
minus any issues with the labels `wontfix`, `invalid` or `duplicate`.
For each project there should be an issue subsection for,
Features: all issues with label `feature request` + `SQL`
Performance: all issues with label `performance` + `shuffle`
Bugs fixed: all issues with label `bug`
To deduplicate section, the priority should be `Bugs fixed > Performance > Features`
NOTE: This is a repo-specific script, so you may not use it in other places.
Release version from arguments will be used to map project name and branch name.
Mapping Pattern:
release -> project: Release {release}, branch: branch-{release}
e.g.
0.1 -> project: Release 0.1, branch: branch-0.1
0.2 -> project: Release 0.2, branch: branch-0.2
Dependencies:
- requests
Usage:
cd spark-rapids/
# generate changelog for release 0.1,0.2
scripts/changelog --token=<GITHUB_PERSONAL_ACCESS_TOKEN> --releases=0.1,0.2
# generate changelog for release 0.1,0.2 to /tmp/CHANGELOG.md
GITHUB_TOKEN=XXX scripts/changelog --releases=0.1,0.2 --path=/tmp/CHANGELOG.md
"""
import os
import sys
from argparse import ArgumentParser
from collections import OrderedDict
from datetime import date

import requests

# Constants
RELEASE = "Release"
PULL_REQUESTS = "pullRequests"
ISSUES = "issues"
# Subtitles
INVALID = 'Invalid'
BUGS_FIXED = 'Bugs Fixed'
PERFORMANCE = 'Performance'
FEATURES = 'Features'
PRS = 'PRs'
# Labels
LABEL_WONTFIX, LABEL_INVALID, LABEL_DUPLICATE = 'wontfix', 'invalid', 'duplicate'
LABEL_BUG = 'bug'
LABEL_PERFORMANCE, LABEL_SHUFFLE = 'performance', 'shuffle'
LABEL_FEATURE, LABEL_SQL = 'feature request', 'SQL'
# Queries
query_pr = """
query ($baseRefName: String!, $after: String) {
repository(name: "spark-rapids", owner: "NVIDIA") {
pullRequests(states: [MERGED], baseRefName: $baseRefName, first: 100, after: $after) {
totalCount
nodes {
number
title
headRefName
baseRefName
state
url
labels(first: 10) {
nodes {
name
}
}
projectCards(first: 10) {
nodes {
project {
name
}
column {
name
}
}
}
mergedAt
}
pageInfo {
hasNextPage
endCursor
}
}
}
}
"""
query_issue = """
query ($after: String) {
repository(name: "spark-rapids", owner: "NVIDIA") {
issues(states: [CLOSED], labels: ["SQL", "feature request", "performance", "bug", "shuffle"], first: 100, after: $after) {
totalCount
nodes {
number
title
state
url
labels(first: 10) {
nodes {
name
}
}
projectCards(first: 10) {
nodes {
project {
name
}
column {
name
}
}
}
closedAt
}
pageInfo {
hasNextPage
endCursor
}
}
}
}
"""


def process_changelog(resource_type: str, changelog: dict, releases: set, projects: set, no_project_prs: list,
token: str):
if resource_type == PULL_REQUESTS:
items = process_pr(releases=releases, token=token)
time_field = 'mergedAt'
elif resource_type == ISSUES:
items = process_issue(token=token)
time_field = 'closedAt'
else:
print(f"[process_changelog] Invalid type: {resource_type}")
sys.exit(1)

for item in items:
if len(item['projectCards']['nodes']) == 0:
if resource_type == PULL_REQUESTS:
no_project_prs.append(item)
continue

project = item['projectCards']['nodes'][0]['project']['name']
if not release_project(project, projects):
continue

if project not in changelog:
changelog[project] = {
FEATURES: [],
PERFORMANCE: [],
BUGS_FIXED: [],
PRS: [],
}

labels = set()
for label in item['labels']['nodes']:
labels.add(label['name'])
category = rules(labels)
if resource_type == ISSUES and category == INVALID:
continue
if resource_type == PULL_REQUESTS:
if '[bot]' in item['title']: # skip auto-gen PR, created by our github actions workflows
continue
category = PRS

changelog[project][category].append({
"number": item['number'],
"title": item['title'],
"url": item['url'],
"time": item[time_field],
})


def process_pr(releases: set, token: str):
pr = []
for rel in releases:
pr.extend(fetch(resource_type=PULL_REQUESTS, token=token, variables={'baseRefName': f"branch-{rel}"}))
return pr


def process_issue(token: str):
return fetch(resource_type=ISSUES, token=token)


def fetch(resource_type: str, token: str, variables: dict = None):
items = []
if resource_type == PULL_REQUESTS and variables:
q = query_pr
elif resource_type == ISSUES:
q = query_issue
variables = {}
else:
return items

has_next = True
while has_next:
res = post(query=q, token=token, variable=variables)
if res.status_code == 200:
d = res.json()
has_next = d['data']['repository'][resource_type]["pageInfo"]["hasNextPage"]
variables['after'] = d['data']['repository'][resource_type]["pageInfo"]["endCursor"]
items.extend(d['data']['repository'][resource_type]['nodes'])
else:
raise Exception("Query failed to run by returning code of {}. {}".format(res.status_code, q))
return items


def post(query: str, token: str, variable: dict):
return requests.post('https://api.github.com/graphql',
json={'query': query, 'variables': variable},
headers={"Authorization": f"token {token}"})


def release_project(project_name: str, projects: set):
if project_name in projects:
return True
return False


def rules(labels: set):
if LABEL_WONTFIX in labels or LABEL_INVALID in labels or LABEL_DUPLICATE in labels:
return INVALID
if LABEL_BUG in labels:
return BUGS_FIXED
if LABEL_PERFORMANCE in labels or LABEL_SHUFFLE in labels:
return PERFORMANCE
if LABEL_FEATURE in labels or LABEL_SQL in labels:
return FEATURES
return INVALID


def form_changelog(path: str, changelog: dict):
sorted_dict = OrderedDict(sorted(changelog.items(), reverse=True))
subsections = ""
for project_name, issues in sorted_dict.items():
subsections += f"\n## {project_name}\n"
subsections += form_subsection(issues, FEATURES)
subsections += form_subsection(issues, PERFORMANCE)
subsections += form_subsection(issues, BUGS_FIXED)
subsections += form_subsection(issues, PRS)
markdown = f"""# Change log
Generated on {date.today()}
{subsections}
"""
with open(path, "w") as file:
file.write(markdown)


def form_subsection(issues: dict, subtitle: str):
if len(issues[subtitle]) == 0:
return ''
subsection = f"\n### {subtitle}\n"
subsection += "|||\n|:---|:---|"
for issue in sorted(issues[subtitle], key=lambda x: x['time'], reverse=True):
subsection += f"\n|[#{issue['number']}]({issue['url']})|{issue['title']}|"
return subsection


def print_no_project_pr(no_project_prs: list):
if len(no_project_prs) != 0:
print("Merged Pull Requests w/o Project:")
for pr in no_project_prs:
print(f"{pr['baseRefName']} #{pr['number']} {pr['title']} {pr['url']}")


def main(rels: str, path: str, token: str):
print('Generating changelog ...')

try:
changelog = {} # changelog dict
releases = {x.strip() for x in rels.split(',')}
projects = {f"{RELEASE} {rel}" for rel in releases}
no_project_prs = [] # list of merge pr w/o project

print('Processing pull requests ...')
process_changelog(resource_type=PULL_REQUESTS, changelog=changelog,
releases=releases, projects=projects,
no_project_prs=no_project_prs, token=token)
print('Processing issues ...')
process_changelog(resource_type=ISSUES, changelog=changelog,
releases=releases, projects=projects,
no_project_prs=no_project_prs, token=token)
# form doc
form_changelog(path=path, changelog=changelog)
except Exception as e: # pylint: disable=broad-except
print(e)
sys.exit(1)

print('Done.')
# post action
print_no_project_pr(no_project_prs=no_project_prs)


if __name__ == '__main__':
parser = ArgumentParser(description="Changelog Generator")
parser.add_argument("--releases", help="list of release versions, separated by comma",
default="0.1,0.2")
parser.add_argument("--token", help="github token, will use GITHUB_TOKEN if empty", default='')
parser.add_argument("--path", help="path for generated changelog file", default='./CHANGELOG.md')
args = parser.parse_args()

github_token = args.token if args.token else os.environ.get('GITHUB_TOKEN')
assert github_token, 'env GITHUB_TOKEN should not be empty'

main(rels=args.releases, path=args.path, token=github_token)

0 comments on commit 78f2519

Please sign in to comment.