From 105fdc9b2a52d9f70e14539b506c3a5460d2a20b Mon Sep 17 00:00:00 2001 From: pxLi Date: Tue, 1 Sep 2020 23:21:29 +0800 Subject: [PATCH] [FEA] changelog generator (#599) * changelog generator Signed-off-by: Peixin Li --- scripts/generate-changelog | 329 +++++++++++++++++++++++++++++++++++++ 1 file changed, 329 insertions(+) create mode 100755 scripts/generate-changelog diff --git a/scripts/generate-changelog b/scripts/generate-changelog new file mode 100755 index 00000000000..49f319acabe --- /dev/null +++ b/scripts/generate-changelog @@ -0,0 +1,329 @@ +#!/usr/bin/env python + +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A simple changelog generator + +This tool takes list of release versions to generate `CHANGELOG.md`. +The changelog will include all merged PRs w/o `[bot]` postfix, +and issues that include the labels `bug`, `feature request`, `SQL`, `performance` and `shuffle`, +minus any issues with the labels `wontfix`, `invalid` or `duplicate`. + +For each project there should be an issue subsection for, +Features: all issues with label `feature request` + `SQL` +Performance: all issues with label `performance` + `shuffle` +Bugs fixed: all issues with label `bug` + +To deduplicate section, the priority should be `Bugs fixed > Performance > Features` + +NOTE: This is a repo-specific script, so you may not use it in other places. +Release version from arguments will be used to map project name and branch name. +Mapping Pattern: +release -> project: Release {release}, branch: branch-{release} +e.g. + 0.1 -> project: Release 0.1, branch: branch-0.1 + 0.2 -> project: Release 0.2, branch: branch-0.2 + +Dependencies: + - requests + +Usage: + cd spark-rapids/ + + # generate changelog for release 0.1,0.2 + scripts/changelog --token= --releases=0.1,0.2 + + # generate changelog for release 0.1,0.2 to /tmp/CHANGELOG.md + GITHUB_TOKEN=XXX scripts/changelog --releases=0.1,0.2 --path=/tmp/CHANGELOG.md +""" +import os +import sys +from argparse import ArgumentParser +from collections import OrderedDict +from datetime import date + +import requests + +# Constants +RELEASE = "Release" +PULL_REQUESTS = "pullRequests" +ISSUES = "issues" +# Subtitles +INVALID = 'Invalid' +BUGS_FIXED = 'Bugs Fixed' +PERFORMANCE = 'Performance' +FEATURES = 'Features' +PRS = 'PRs' +# Labels +LABEL_WONTFIX, LABEL_INVALID, LABEL_DUPLICATE = 'wontfix', 'invalid', 'duplicate' +LABEL_BUG = 'bug' +LABEL_PERFORMANCE, LABEL_SHUFFLE = 'performance', 'shuffle' +LABEL_FEATURE, LABEL_SQL = 'feature request', 'SQL' +# Queries +query_pr = """ +query ($baseRefName: String!, $after: String) { + repository(name: "spark-rapids", owner: "NVIDIA") { + pullRequests(states: [MERGED], baseRefName: $baseRefName, first: 100, after: $after) { + totalCount + nodes { + number + title + headRefName + baseRefName + state + url + labels(first: 10) { + nodes { + name + } + } + projectCards(first: 10) { + nodes { + project { + name + } + column { + name + } + } + } + mergedAt + } + pageInfo { + hasNextPage + endCursor + } + } + } +} +""" +query_issue = """ +query ($after: String) { + repository(name: "spark-rapids", owner: "NVIDIA") { + issues(states: [CLOSED], labels: ["SQL", "feature request", "performance", "bug", "shuffle"], first: 100, after: $after) { + totalCount + nodes { + number + title + state + url + labels(first: 10) { + nodes { + name + } + } + projectCards(first: 10) { + nodes { + project { + name + } + column { + name + } + } + } + closedAt + } + pageInfo { + hasNextPage + endCursor + } + } + } +} +""" + + +def process_changelog(resource_type: str, changelog: dict, releases: set, projects: set, no_project_prs: list, + token: str): + if resource_type == PULL_REQUESTS: + items = process_pr(releases=releases, token=token) + time_field = 'mergedAt' + elif resource_type == ISSUES: + items = process_issue(token=token) + time_field = 'closedAt' + else: + print(f"[process_changelog] Invalid type: {resource_type}") + sys.exit(1) + + for item in items: + if len(item['projectCards']['nodes']) == 0: + if resource_type == PULL_REQUESTS: + no_project_prs.append(item) + continue + + project = item['projectCards']['nodes'][0]['project']['name'] + if not release_project(project, projects): + continue + + if project not in changelog: + changelog[project] = { + FEATURES: [], + PERFORMANCE: [], + BUGS_FIXED: [], + PRS: [], + } + + labels = set() + for label in item['labels']['nodes']: + labels.add(label['name']) + category = rules(labels) + if resource_type == ISSUES and category == INVALID: + continue + if resource_type == PULL_REQUESTS: + if '[bot]' in item['title']: # skip auto-gen PR, created by our github actions workflows + continue + category = PRS + + changelog[project][category].append({ + "number": item['number'], + "title": item['title'], + "url": item['url'], + "time": item[time_field], + }) + + +def process_pr(releases: set, token: str): + pr = [] + for rel in releases: + pr.extend(fetch(resource_type=PULL_REQUESTS, token=token, variables={'baseRefName': f"branch-{rel}"})) + return pr + + +def process_issue(token: str): + return fetch(resource_type=ISSUES, token=token) + + +def fetch(resource_type: str, token: str, variables: dict = None): + items = [] + if resource_type == PULL_REQUESTS and variables: + q = query_pr + elif resource_type == ISSUES: + q = query_issue + variables = {} + else: + return items + + has_next = True + while has_next: + res = post(query=q, token=token, variable=variables) + if res.status_code == 200: + d = res.json() + has_next = d['data']['repository'][resource_type]["pageInfo"]["hasNextPage"] + variables['after'] = d['data']['repository'][resource_type]["pageInfo"]["endCursor"] + items.extend(d['data']['repository'][resource_type]['nodes']) + else: + raise Exception("Query failed to run by returning code of {}. {}".format(res.status_code, q)) + return items + + +def post(query: str, token: str, variable: dict): + return requests.post('https://api.github.com/graphql', + json={'query': query, 'variables': variable}, + headers={"Authorization": f"token {token}"}) + + +def release_project(project_name: str, projects: set): + if project_name in projects: + return True + return False + + +def rules(labels: set): + if LABEL_WONTFIX in labels or LABEL_INVALID in labels or LABEL_DUPLICATE in labels: + return INVALID + if LABEL_BUG in labels: + return BUGS_FIXED + if LABEL_PERFORMANCE in labels or LABEL_SHUFFLE in labels: + return PERFORMANCE + if LABEL_FEATURE in labels or LABEL_SQL in labels: + return FEATURES + return INVALID + + +def form_changelog(path: str, changelog: dict): + sorted_dict = OrderedDict(sorted(changelog.items(), reverse=True)) + subsections = "" + for project_name, issues in sorted_dict.items(): + subsections += f"\n## {project_name}\n" + subsections += form_subsection(issues, FEATURES) + subsections += form_subsection(issues, PERFORMANCE) + subsections += form_subsection(issues, BUGS_FIXED) + subsections += form_subsection(issues, PRS) + markdown = f"""# Change log +Generated on {date.today()} +{subsections} +""" + with open(path, "w") as file: + file.write(markdown) + + +def form_subsection(issues: dict, subtitle: str): + if len(issues[subtitle]) == 0: + return '' + subsection = f"\n### {subtitle}\n" + subsection += "|||\n|:---|:---|" + for issue in sorted(issues[subtitle], key=lambda x: x['time'], reverse=True): + subsection += f"\n|[#{issue['number']}]({issue['url']})|{issue['title']}|" + return subsection + + +def print_no_project_pr(no_project_prs: list): + if len(no_project_prs) != 0: + print("Merged Pull Requests w/o Project:") + for pr in no_project_prs: + print(f"{pr['baseRefName']} #{pr['number']} {pr['title']} {pr['url']}") + + +def main(rels: str, path: str, token: str): + print('Generating changelog ...') + + try: + changelog = {} # changelog dict + releases = {x.strip() for x in rels.split(',')} + projects = {f"{RELEASE} {rel}" for rel in releases} + no_project_prs = [] # list of merge pr w/o project + + print('Processing pull requests ...') + process_changelog(resource_type=PULL_REQUESTS, changelog=changelog, + releases=releases, projects=projects, + no_project_prs=no_project_prs, token=token) + print('Processing issues ...') + process_changelog(resource_type=ISSUES, changelog=changelog, + releases=releases, projects=projects, + no_project_prs=no_project_prs, token=token) + # form doc + form_changelog(path=path, changelog=changelog) + except Exception as e: # pylint: disable=broad-except + print(e) + sys.exit(1) + + print('Done.') + # post action + print_no_project_pr(no_project_prs=no_project_prs) + + +if __name__ == '__main__': + parser = ArgumentParser(description="Changelog Generator") + parser.add_argument("--releases", help="list of release versions, separated by comma", + default="0.1,0.2") + parser.add_argument("--token", help="github token, will use GITHUB_TOKEN if empty", default='') + parser.add_argument("--path", help="path for generated changelog file", default='./CHANGELOG.md') + args = parser.parse_args() + + github_token = args.token if args.token else os.environ.get('GITHUB_TOKEN') + assert github_token, 'env GITHUB_TOKEN should not be empty' + + main(rels=args.releases, path=args.path, token=github_token)