Skip to content

Commit

Permalink
ybdata console utility
Browse files Browse the repository at this point in the history
  • Loading branch information
bbengfort committed Dec 31, 2018
1 parent 265bb32 commit 9eac165
Show file tree
Hide file tree
Showing 12 changed files with 458 additions and 0 deletions.
18 changes: 18 additions & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
include *.md
include *.rst
include *.txt
include *.yml
include *.cfg
include Makefile
recursive-include docs *.rst
recursive-include docs *.jpg
recursive-include docs *.png
recursive-include docs *.py
recursive-include docs Makefile
recursive-include tests *.py
recursive-include uploads *.zip
recursive-include uploads *.tgz
recursive-include uploads *.json
recursive-include fixtures *.gz
recursive-include fixtures *.md
recursive-include fixtures *.json
49 changes: 49 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Shell to use with Make
SHELL := /bin/bash

# Set important Paths
PROJECT := ybdata
LOCALPATH := $(CURDIR)/$(PROJECT)

# Export targets not associated with files
.PHONY: test coverage pip clean publish uml build deploy install

# Clean build files
clean:
find . -name "*.pyc" -print0 | xargs -0 rm -rf
find . -name "__pycache__" -print0 | xargs -0 rm -rf
find . -name "*-failed-diff.png" -print0 | xargs -0 rm -rf
-rm -rf htmlcov
-rm -rf .coverage
-rm -rf build
-rm -rf dist
-rm -rf $(PROJECT).egg-info
-rm -rf .eggs
-rm -rf site
-rm -rf classes_$(PROJECT).png
-rm -rf packages_$(PROJECT).png
-rm -rf docs/_build

# Targets for testing
test:
python setup.py test

# Publish to gh-pages
publish:
git subtree push --prefix=deploy origin gh-pages

# Draw UML diagrams
uml:
pyreverse -ASmy -k -o png -p $(PROJECT) $(LOCALPATH)

# Build the universal wheel and source distribution
build:
python setup.py sdist bdist_wheel

# Install the package from source
install:
python setup.py install

# Deploy to PyPI
deploy:
twine upload dist/*
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,20 @@

Yellowbrick datasets are hosted in an S3 drive in the cloud to allow easy access to the data for examples. This repository manages those datasets, their data structure, and interactions with the cloud.

## Getting Started

The `ybdata` script is installed as an entry point in `setup.py`. You can install the package and the script using `pip install yellowbrick-data`. If you've downloaded the source code from GitHub you can install the app using editable mode with pip. In the current working directory of the project, use:

```
$ pip install -e .
```

At this point you should have a `ybdata` command on your `$PATH`. Like git, this utility has many subcommands for various data related management tasks. To see a list of the commands and their descriptions:

```
$ ybdata --help
```

## Datasets Basics

All datasets must have the following properties:
Expand Down
12 changes: 12 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
## App Requirements
commis==0.4
tabulate==0.8.2
tqdm==4.28.1

## Third-Party Dependencies
#colorama==0.3.6

## Python Tools (uncomment for deployment)
#pip==18.1
#setuptools==40.6.3
#wheel==0.32.3
5 changes: 5 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[metadata]
description-file = README.md

[wheel]
universal = 1
143 changes: 143 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#!/usr/bin/env python
# setup
# Setup script for installing yellowbrick-datasets
#
# Author: Benjamin Bengfort <bbengfort@districtdatalabs.com>
# Created: Sun Dec 30 08:42:26 2018 -0500
#
# For license information, see LICENSE.txt
#
# ID: setup.py [] benjamin@bengfort.com $

"""
Setup script for installing yellowbrick-datasets.
See http://bbengfort.github.io/programmer/2016/01/20/packaging-with-pypi.html
"""

##########################################################################
## Imports
##########################################################################

import os
import codecs

from setuptools import setup
from setuptools import find_packages

##########################################################################
## Package Information
##########################################################################

## Basic information
NAME = "yellowbrick-datasets"
DESCRIPTION = "Yellowbrick datasets management and deployment scripts."
AUTHOR = "Rebecca Bilbro, Benjamin Bengfort"
EMAIL = "info@districtdatalabs.com"
MAINTAINER = "Benjamin Bengfort"
LICENSE = "Apache 2"
REPOSITORY = "https://github.com/districtdatalabs/yellowbrick-datasets"
PACKAGE = "ybdata"

## Define the keywords
KEYWORDS = ('datasets', 'machine learning', 'scikit-learn', 'matplotlib', 'data science')

## Define the classifiers
## See https://pypi.python.org/pypi?%3Aaction=list_classifiers
CLASSIFIERS = (
'Development Status :: 4 - Beta',
'Intended Audience :: Developers',
'Intended Audience :: Science/Research',
'License :: OSI Approved :: Apache Software License',
'Natural Language :: English',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 3.6',
'Topic :: Software Development',
'Topic :: Software Development :: Libraries :: Python Modules',
)

## Important Paths
PROJECT = os.path.abspath(os.path.dirname(__file__))
REQUIRE_PATH = "requirements.txt"
VERSION_PATH = os.path.join(PACKAGE, "version.py")
PKG_DESCRIBE = "README.md"

## Directories to ignore in find_packages
EXCLUDES = (
"tests", "bin", "docs", "fixtures", "register",
"notebooks", "examples", "uploads", "venv",
)

##########################################################################
## Helper Functions
##########################################################################

def read(*parts):
"""
Assume UTF-8 encoding and return the contents of the file located at the
absolute path from the REPOSITORY joined with *parts.
"""
with codecs.open(os.path.join(PROJECT, *parts), 'rb', 'utf-8') as f:
return f.read()


def get_version(path=VERSION_PATH):
"""
Reads the file defined in the VERSION_PATH to find the get_version
function, and executes it to ensure that it is loaded correctly. This
generally ensures that no imports are executed to get the version.
"""
namespace = {}
exec(read(path), namespace)
return namespace['get_version'](short=True)


def get_requires(path=REQUIRE_PATH):
"""
Yields a generator of requirements as defined by the REQUIRE_PATH which
should point to a requirements.txt output by `pip freeze`.
"""
for line in read(path).splitlines():
line = line.strip()
if line and not line.startswith('#'):
yield line


##########################################################################
## Define the configuration
##########################################################################

config = {
"name": NAME,
"version": get_version(),
"description": DESCRIPTION,
"long_description": read(PKG_DESCRIBE),
"license": LICENSE,
"author": AUTHOR,
"author_email": EMAIL,
"maintainer": MAINTAINER,
"maintainer_email": EMAIL,
"url": REPOSITORY,
"download_url": "{}/tarball/v{}".format(REPOSITORY, get_version()),
"packages": find_packages(where=PROJECT, exclude=EXCLUDES),
"install_requires": list(get_requires()),
"classifiers": CLASSIFIERS,
"keywords": KEYWORDS,
"zip_safe": True,
"entry_points": {
"console_scripts": [
"ybdata = ybdata.__main__:main"
],
},
"scripts": [],
"setup_requires":[],
"tests_require":[],
}


##########################################################################
## Run setup script
##########################################################################

if __name__ == '__main__':
setup(**config)
26 changes: 26 additions & 0 deletions ybdata/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# ybdata
# Yellowbrick datasets management and deployment scripts.
#
# Author: Benjamin Bengfort <benjamin@bengfort.com>
# Created: Sun Dec 30 08:50:55 2018 -0500
#
# For license information, see LICENSE.txt
#
# ID: __init__.py [] benjamin@bengfort.com $

"""
Yellowbrick datasets management and deployment scripts.
"""

##########################################################################
## Imports
##########################################################################

# Import the version number at the top level
from .version import get_version, __version_info__

##########################################################################
## Package Version
##########################################################################

__version__ = get_version(short=True)
35 changes: 35 additions & 0 deletions ybdata/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# ybdata
# Entry point for CLI script used by python -m and setuptools.
#
# Author: Benjamin Bengfort <benjamin@bengfort.com>
# Created: Mon Dec 31 07:16:04 2018 -0500
#
# For license information, see LICENSE.txt
#
# ID: __main__.py [] benjamin@bengfort.com $

"""
Entry point for CLI script used by python -m and setuptools.
"""

##########################################################################
## Imports
##########################################################################

from .app import YBDatasetUtility


##########################################################################
## Main Method
##########################################################################

def main():
"""
Loads the environment if required, loads the utility and executes it.
"""
utility = YBDatasetUtility.load()
utility.execute()


if __name__ == "__main__":
main()
45 changes: 45 additions & 0 deletions ybdata/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# ybdata.app
# CLI utility for executing yellowbrick dataset management commands.
#
# Author: Benjamin Bengfort <benjamin@bengfort.com>
# Created: Mon Dec 31 07:19:34 2018 -0500
#
# For license information, see LICENSE.txt
#
# ID: app.py [] benjamin@bengfort.com $

"""
CLI utility for executing yellowbrick dataset management commands.
"""

##########################################################################
## Imports
##########################################################################

from commis import color
from commis import ConsoleProgram

from .commands import COMMANDS
from .version import get_version


##########################################################################
## Console Program
##########################################################################

DESCRIPTION = "Yellowbrick dataset management utilties"
EPILOG = "Intended for use by Yellowbrick maintainers and core contributors"


class YBDatasetUtility(ConsoleProgram):

description = color.format(DESCRIPTION, color.CYAN)
epilog = color.format(EPILOG, color.MAGENTA)
version = color.format("v{}", color.CYAN, get_version(short=True))

@classmethod
def load(klass, commands=COMMANDS):
utility = klass()
for command in commands:
utility.register(command)
return utility
29 changes: 29 additions & 0 deletions ybdata/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# ybdata.commands
# Subcommand definitions executed by the utility program.
#
# Author: Benjamin Bengfort <benjamin@bengfort.com>
# Created: Mon Dec 31 07:23:45 2018 -0500
#
# For license information, see LICENSE.txt
#
# ID: __init__.py [] benjamin@bengfort.com $

"""
Subcommand definitions executed by the utility program.
"""

##########################################################################
## Imports
##########################################################################

from .validate import ValidateCommand


##########################################################################
## Active Commands
##########################################################################

# Commands become available to the command-line if added to this list.
COMMANDS = [
ValidateCommand
]
Loading

0 comments on commit 9eac165

Please sign in to comment.