Skip to content

Commit

Permalink
Improve data fetching and processing accuracy
Browse files Browse the repository at this point in the history
  • Loading branch information
jstrieb committed Aug 18, 2020
1 parent f7792fd commit 517eadd
Showing 1 changed file with 100 additions and 34 deletions.
134 changes: 100 additions & 34 deletions github_stats.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#!/usr/bin/python3

import json
import os

from typing import Dict, List, Optional
from typing import Dict, List, Optional, Set

import requests

Expand Down Expand Up @@ -31,18 +31,53 @@ def query(self, generated_query: str) -> Dict:
return r.json()

@staticmethod
def repos_overview(after: Optional[str] = None) -> str:
def repos_overview(contrib_cursor: Optional[str] = None,
owned_cursor: Optional[str] = None) -> str:
return f"""{{
viewer {{
repositories(
first: 100,
orderBy: {{
field: UPDATED_AT,
direction: DESC
}},
after: {"null" if owned_cursor is None else '"' + owned_cursor + '"'}
) {{
pageInfo {{
hasNextPage
endCursor
}}
nodes {{
nameWithOwner
stargazers {{
totalCount
}}
forkCount
languages(first: 10, orderBy: {{field: SIZE, direction: DESC}}) {{
edges {{
size
node {{
name
color
}}
}}
}}
}}
}}
repositoriesContributedTo(
first: 100,
includeUserRepositories: true,
includeUserRepositories: false,
orderBy: {{
field: UPDATED_AT,
direction: DESC
}},
contributionTypes: COMMIT,
after: {"null" if after is None else '"' + after + '"'}
contributionTypes: [
COMMIT,
PULL_REQUEST,
REPOSITORY,
PULL_REQUEST_REVIEW
]
after: {"null" if contrib_cursor is None else '"' + contrib_cursor + '"'}
) {{
pageInfo {{
hasNextPage
Expand Down Expand Up @@ -84,7 +119,10 @@ def contrib_years() -> str:
@staticmethod
def contribs_by_year(year: str) -> str:
return f"""
year{year}: contributionsCollection(from: "{year}-01-01T00:00:00Z", to: "{year + 1}-01-01T00:00:00Z") {{
year{year}: contributionsCollection(
from: "{year}-01-01T00:00:00Z",
to: "{int(year) + 1}-01-01T00:00:00Z"
) {{
contributionCalendar {{
totalContributions
}}
Expand All @@ -104,7 +142,8 @@ def all_contribs(cls, years: List[str]) -> str:


class Stats(object):
def __init__(self, access_token: str):
def __init__(self, access_token: str, exclude_repos: Optional[Set] = None):
self._exclude_repos = set() if exclude_repos is None else exclude_repos
self.queries = Queries(access_token)

self._stargazers = None
Expand All @@ -119,72 +158,99 @@ def __str__(self) -> str:
)
return f"""Stargazers: {self.stargazers:,}
Forks: {self.forks:,}
Total contributions: {self.total_contributions:,}
Languages:
All-time contributions: {self.total_contributions:,}
Repositories with contributions: {len(self.repos)}
Languages:
- {formatted_languages}"""

def get_stats(self):
def get_stats(self) -> None:
self._stargazers = 0
self._forks = 0
self._languages = dict()
self._repos = set()

next_page = None
langs = dict()
next_owned = None
next_contrib = None
while True:
repos = self.queries.query(Queries.repos_overview(next_page)) \
.get("data", {}) \
.get("viewer", {}) \
.get("repositoriesContributedTo", {})
raw_results = self.queries.query(
Queries.repos_overview(owned_cursor=next_owned,
contrib_cursor=next_contrib)
)
contrib_repos = (raw_results
.get("data", {})
.get("viewer", {})
.get("repositoriesContributedTo", {}))
owned_repos = (raw_results
.get("data", {})
.get("viewer", {})
.get("repositories", {}))
repos = contrib_repos.get("nodes", []) + owned_repos.get("nodes", [])

for repo in repos.get("nodes", []):
self._repos.add(repo.get("nameWithOwner"))
for repo in repos:
name = repo.get("nameWithOwner")
self._repos.add(name)
self._stargazers += repo.get("stargazers").get("totalCount", 0)
self._forks += repo.get("forkCount", 0)

if name in self._exclude_repos:
continue
for lang in repo.get("languages", {}).get("edges", []):
name = lang.get("node", {}).get("name", "Other")
langs[name] = langs.get(name, 0) + lang.get("size", 0)
if name in self.languages:
self.languages[name]["size"] += lang.get("size", 0)
self.languages[name]["occurrences"] += 1
else:
self.languages[name] = {
"size": lang.get("size", 0),
"occurrences": 1,
"color": lang.get("color")
}

if repos.get("pageInfo", {}).get("hasNextPage", False):
next_page = repos.get("pageInfo", {}).get("endCursor")
if (owned_repos.get("pageInfo", {}).get("hasNextPage", False)
or contrib_repos.get("pageInfo", {}).get("hasNextPage", False)):
next_owned = owned_repos.get("pageInfo", {}).get("endCursor")
next_contrib = contrib_repos.get("pageInfo", {}).get("endCursor")
else:
break

# TODO: Improve languages to scale by number of contributions to
# specific filetypes
langs_total = sum(langs.values())
self._languages = {l: 100 * (s / langs_total) for l, s in langs.items()}

@property
def stargazers(self):
def stargazers(self) -> int:
if self._stargazers is not None:
return self._stargazers
self.get_stats()
return self._stargazers

@property
def forks(self):
def forks(self) -> int:
if self._forks is not None:
return self._forks
self.get_stats()
return self._forks

@property
def languages(self):
def languages(self) -> Dict:
if self._languages is not None:
return self._languages
self.get_stats()
return self._languages

# TODO: Improve languages to scale by number of contributions to
# specific filetypes
langs_total = sum([v.get("size", 0) for v in self._languages.values()])
langs = {
l: 100 * (s.get("size", 0) / langs_total)
for l, s in self._languages.items()
}
return langs

@property
def repos(self):
def repos(self) -> List[str]:
if self._repos is not None:
return self._repos
self.get_stats()
return self._repos

@property
def total_contributions(self):
def total_contributions(self) -> int:
if self._total_contributions is not None:
return self._total_contributions

Expand All @@ -210,7 +276,7 @@ def total_contributions(self):

def main() -> None:
access_token = os.getenv("ACCESS_TOKEN")
s = Stats(access_token)
s = Stats(access_token, exclude_repos={"Genrep-Software/style-guides"})
print(s)


Expand Down

0 comments on commit 517eadd

Please sign in to comment.