Skip to content
This repository has been archived by the owner on Jul 18, 2022. It is now read-only.

Commit

Permalink
Merge pull request #19 from Bonfire/tom-backend-changes
Browse files Browse the repository at this point in the history
Tom backend changes
  • Loading branch information
Baran Barut committed Nov 20, 2019
2 parents c384ded + da9c915 commit cd435a3
Showing 1 changed file with 103 additions and 49 deletions.
152 changes: 103 additions & 49 deletions Backend/app_interface.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
'''
Authors: Thomas Serrano, Curtis Helsel
Last Updated: OCT-29-2019
Authors: Thomas Serrano, Curtis Helsel, Baran Barut
Last Updated: NOV-20-2019
'''
from config import * #Application stuff
from predictor import * #ML stuff
Expand All @@ -10,60 +10,114 @@
from github import Github
from os import walk, path #For parsing through directories

'''
Gets a list of filenames for a given file palth.
base_dir is used to exclude the common base director
'''
def get_filenames(base_dir, file_path, file_names):

for cur in listdir(file_path):

#Full path is needed to test whether cur is a directory
#full_path = path.join(file_path, cur)
full_path = file_path + '/' + cur

#Gets the name of the current directory, then joins with the current filename before appending
directory = path.split(file_path)[-1]
final_name = full_path.replace(base_dir, '')[1:]

#If file...
if not path.isdir(full_path):
#Filter out stuff like .gitignore etc
if final_name[0] != '.':
file_names.append(final_name)
#If directory, recursively got through it
else:
get_filenames(base_dir, full_path, file_names)

'''
Provides an interface to connect the application with the machine learning model
'''
def app_interface():
#Get command-line arguments
username = sys.argv[1]
password = sys.argv[2]
repo_name = sys.argv[3]
repo_dir = sys.argv[4]

#print("Starting interface...")
#print("Arguments:\n" + username + " / " + password + " / " + repo_name + " / " + repo_dir)

#Get Github access
git = Github(username, password)

#Grabs repo object for data collection
github_repo = git.get_repo(repo_name, lazy=False)
repo = Repo(repo_dir)

#Retrieve repository data
repo_data = list(repository_data(github_repo, repo, repo_name, repo_dir))

#Retrieve all filenames for all directories
files_path = []
for root, dirs, files in walk(repo_dir):
f_path = root.split(path.basename(repo_name) + "\\")[-1]
if f_path is not "":
f_path += "/"
files_path += [str(f_path + f) for f in files if not f[0] == '.']
dirs[:] = [d for d in dirs if not d[0] == '.']
#Get number of command-line arguments
argc = len(sys.argv)

'''
print("Starting interface...")
print(argc)
for a in sys.argv:
print(a)
'''

#Must have 1, 2, or 4 arguments (program name is the first argument)
if argc not in [2, 3, 5]:
print("\n\tERROR: INCORRECT NUMBER OF ARGUMENTS PASSED")
exit()

#Used for flow control for different types of repos
repo_remote = False
repo_private = False

#The first argument is the repo directory. This is required for all repos, and is the only
#argument for local repos.
repo_dir = sys.argv[1]
repo = Repo(repo_dir)

#Repo name is required for remote & public repos.
if argc > 2:
repo_remote = True
repo_name = sys.argv[2]

#Login information is required for remote & private repos.
if argc > 3:
repo_private = True
username = sys.argv[3]
password = sys.argv[4]

#Get Github access. Use login if applicable.
if repo_private:
git = Github(username, password)
else:
git = Github(None, None)

#Grabs repo object for data collection
github_repo = git.get_repo(repo_name, lazy=False)

#Retrieve repository data
repo_data = list(repository_data(github_repo, repo, repo_name, repo_dir))

#Retrieve all filenames for all directories
files_path = []
get_filenames(repo_dir, repo_dir, files_path)

#Get a list of data points for each file
avgs = get_averages(files_path, repo.head.commit.hexsha, repo)

#Separate file names from avgs
file_names = []
avgs_final = []
for a in avgs:
file_names.append(a[0])
avgs_final.append(a[1:])

#Collect data together into a single list
final_data = []

#Get a list of data points for each file
avgs = get_averages(files_path, repo.head.commit.hexsha, repo)
#If this is a remote repo, we need to add the repo data, otherwise use averages
if repo_remote:
for af in avgs_final:
final_data.append(af + repo_data[1:])
else:
final_data = avgs_final

#Separate file names from avgs
file_names = []
avgs_final = []
for a in avgs:
file_names.append(a[0])
avgs_final.append(a[1:])

#Collect data together into a single list
final_data = []
for af in avgs_final:
final_data.append(af + repo_data[1:])

#Predict and print results data to std out
results = predict(final_data)

#Final results output
for i, f in enumerate(file_names):
print(f + "," + str(results[i][0]))
#Predict and print results data to std out
results = predict(final_data)

#Final results output
for i, f in enumerate(file_names):
print(f + "," + str(results[i][0]))

#Main - delete later?
if __name__ == "__main__":
app_interface()
app_interface()

0 comments on commit cd435a3

Please sign in to comment.