Skip to content

Commit

Permalink
Separate out utility functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
ResidentMario committed Feb 2, 2018
1 parent 2e3a4e4 commit 2f8737a
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 59 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,4 @@ _map.html

# Test cache
.cache
.pytest_cache
1 change: 1 addition & 0 deletions missingno/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
from .missingno import nullity_filter
from .missingno import nullity_sort
from ._version import __version__
from .utils import nullity_filter, nullity_sort
46 changes: 1 addition & 45 deletions missingno/missingno.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,51 +6,7 @@
import seaborn as sns
import pandas as pd
from mpl_toolkits.axes_grid1 import make_axes_locatable


def nullity_sort(df, sort=None):
"""
Sorts a DataFrame according to its nullity, in either ascending or descending order.
:param df: The DataFrame object being sorted.
:param sort: The sorting method: either "ascending", "descending", or None (default).
:return: The nullity-sorted DataFrame.
"""
if sort == '"ascending':
return df.iloc[np.argsort(df.count(axis='columns').values), :]
elif sort == 'descending':
return df.iloc[np.flipud(np.argsort(df.count(axis='columns').values)), :]
else:
return df


def nullity_filter(df, filter=None, p=0, n=0):
"""
Filters a DataFrame according to its nullity, using some combination of 'top' and 'bottom' numerical and
percentage values. Percentages and numerical thresholds can be specified simultaneously: for example,
to get a DataFrame with columns of at least 75% completeness but with no more than 5 columns, use
`nullity_filter(df, filter='top', p=.75, n=5)`.
:param df: The DataFrame whose columns are being filtered.
:param filter: The orientation of the filter being applied to the DataFrame. One of, "top", "bottom",
or None (default). The filter will simply return the DataFrame if you leave the filter argument unspecified or
as None.
:param p: A completeness ratio cut-off. If non-zero the filter will limit the DataFrame to columns with at least p
completeness. Input should be in the range [0, 1].
:param n: A numerical cut-off. If non-zero no more than this number of columns will be returned.
:return: The nullity-filtered `DataFrame`.
"""
if filter == 'top':
if p:
df = df.iloc[:, [c >= p for c in df.count(axis='rows').values / len(df)]]
if n:
df = df.iloc[:, np.sort(np.argsort(df.count(axis='rows').values)[-n:])]
elif filter == 'bottom':
if p:
df = df.iloc[:, [c <= p for c in df.count(axis='rows').values / len(df)]]
if n:
df = df.iloc[:, np.sort(np.argsort(df.count(axis='rows').values)[:n])]
return df
from .utils import nullity_filter, nullity_sort


def matrix(df,
Expand Down
47 changes: 47 additions & 0 deletions missingno/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""Utility functions for missingno."""
import numpy as np


def nullity_sort(df, sort=None):
"""
Sorts a DataFrame according to its nullity, in either ascending or descending order.
:param df: The DataFrame object being sorted.
:param sort: The sorting method: either "ascending", "descending", or None (default).
:return: The nullity-sorted DataFrame.
"""
if sort == 'ascending':
return df.iloc[np.argsort(df.count(axis='columns').values), :]
elif sort == 'descending':
return df.iloc[np.flipud(np.argsort(df.count(axis='columns').values)), :]
else:
return df


def nullity_filter(df, filter=None, p=0, n=0):
"""
Filters a DataFrame according to its nullity, using some combination of 'top' and 'bottom' numerical and
percentage values. Percentages and numerical thresholds can be specified simultaneously: for example,
to get a DataFrame with columns of at least 75% completeness but with no more than 5 columns, use
`nullity_filter(df, filter='top', p=.75, n=5)`.
:param df: The DataFrame whose columns are being filtered.
:param filter: The orientation of the filter being applied to the DataFrame. One of, "top", "bottom",
or None (default). The filter will simply return the DataFrame if you leave the filter argument unspecified or
as None.
:param p: A completeness ratio cut-off. If non-zero the filter will limit the DataFrame to columns with at least p
completeness. Input should be in the range [0, 1].
:param n: A numerical cut-off. If non-zero no more than this number of columns will be returned.
:return: The nullity-filtered `DataFrame`.
"""
if filter == 'top':
if p:
df = df.iloc[:, [c >= p for c in df.count(axis='rows').values / len(df)]]
if n:
df = df.iloc[:, np.sort(np.argsort(df.count(axis='rows').values)[-n:])]
elif filter == 'bottom':
if p:
df = df.iloc[:, [c <= p for c in df.count(axis='rows').values / len(df)]]
if n:
df = df.iloc[:, np.sort(np.argsort(df.count(axis='rows').values)[:n])]
return df
26 changes: 13 additions & 13 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from setuptools import setup
setup(
name = 'missingno',
packages = ['missingno'], # this must be the same as the name above
install_requires=['numpy', 'matplotlib', 'scipy', 'seaborn'],
py_modules=['missingno'],
version = '0.3.8', # note to self: also update the one is the source!
description = 'Missing data visualization module for Python.',
author = 'Aleksey Bilogur',
author_email = 'aleksey.bilogur@gmail.com',
url = 'https://github.com/ResidentMario/missingno',
download_url = 'https://github.com/ResidentMario/missingno/tarball/0.3.8',
keywords = ['data', 'data visualization', 'data analysis', 'missing data', 'data science', 'pandas', 'python',
'jupyter'],
classifiers = [],
name='missingno',
packages=['missingno'], # this must be the same as the name above
install_requires=['numpy', 'matplotlib', 'scipy', 'seaborn'],
py_modules=['missingno'],
version='0.3.8', # note to self: also update the one is the source!
description='Missing data visualization module for Python.',
author='Aleksey Bilogur',
author_email='aleksey.bilogur@gmail.com',
url='https://github.com/ResidentMario/missingno',
download_url='https://github.com/ResidentMario/missingno/tarball/0.3.8',
keywords=['data', 'data visualization', 'data analysis', 'missing data', 'data science', 'pandas', 'python',
'jupyter'],
classifiers=[]
)
2 changes: 1 addition & 1 deletion tests/util_tests.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""
`gtfs-tripify` utilities test module. Asserts that utility functions are correct.
Utilities test module. Asserts that utility functions are correct.
"""

import unittest
Expand Down

0 comments on commit 2f8737a

Please sign in to comment.