Separate out utility functions.

admanda · Feb 2, 2018 · 2f8737a · 2f8737a
1 parent 2e3a4e4
commit 2f8737a
Show file tree

Hide file tree

Showing 6 changed files with 64 additions and 59 deletions.
diff --git a/.gitignore b/.gitignore
@@ -44,3 +44,4 @@ _map.html
 
 # Test cache
 .cache
+.pytest_cache
diff --git a/missingno/__init__.py b/missingno/__init__.py
@@ -6,3 +6,4 @@
 from .missingno import nullity_filter
 from .missingno import nullity_sort
 from ._version import __version__
+from .utils import nullity_filter, nullity_sort
diff --git a/missingno/missingno.py b/missingno/missingno.py
@@ -6,51 +6,7 @@
 import seaborn as sns
 import pandas as pd
 from mpl_toolkits.axes_grid1 import make_axes_locatable
-
-
-def nullity_sort(df, sort=None):
-    """
-    Sorts a DataFrame according to its nullity, in either ascending or descending order.
-
-    :param df: The DataFrame object being sorted.
-    :param sort: The sorting method: either "ascending", "descending", or None (default).
-    :return: The nullity-sorted DataFrame.
-    """
-    if sort == '"ascending':
-        return df.iloc[np.argsort(df.count(axis='columns').values), :]
-    elif sort == 'descending':
-        return df.iloc[np.flipud(np.argsort(df.count(axis='columns').values)), :]
-    else:
-        return df
-
-
-def nullity_filter(df, filter=None, p=0, n=0):
-    """
-    Filters a DataFrame according to its nullity, using some combination of 'top' and 'bottom' numerical and
-    percentage values. Percentages and numerical thresholds can be specified simultaneously: for example,
-    to get a DataFrame with columns of at least 75% completeness but with no more than 5 columns, use
-    `nullity_filter(df, filter='top', p=.75, n=5)`.
-
-    :param df: The DataFrame whose columns are being filtered.
-    :param filter: The orientation of the filter being applied to the DataFrame. One of, "top", "bottom",
-    or None (default). The filter will simply return the DataFrame if you leave the filter argument unspecified or
-    as None.
-    :param p: A completeness ratio cut-off. If non-zero the filter will limit the DataFrame to columns with at least p
-    completeness. Input should be in the range [0, 1].
-    :param n: A numerical cut-off. If non-zero no more than this number of columns will be returned.
-    :return: The nullity-filtered `DataFrame`.
-    """
-    if filter == 'top':
-        if p:
-            df = df.iloc[:, [c >= p for c in df.count(axis='rows').values / len(df)]]
-        if n:
-            df = df.iloc[:, np.sort(np.argsort(df.count(axis='rows').values)[-n:])]
-    elif filter == 'bottom':
-        if p:
-            df = df.iloc[:, [c <= p for c in df.count(axis='rows').values / len(df)]]
-        if n:
-            df = df.iloc[:, np.sort(np.argsort(df.count(axis='rows').values)[:n])]
-    return df
+from .utils import nullity_filter, nullity_sort
 
 
 def matrix(df,

diff --git a/missingno/utils.py b/missingno/utils.py
@@ -0,0 +1,47 @@
+"""Utility functions for missingno."""
+import numpy as np
+
+
+def nullity_sort(df, sort=None):
+    """
+    Sorts a DataFrame according to its nullity, in either ascending or descending order.
+
+    :param df: The DataFrame object being sorted.
+    :param sort: The sorting method: either "ascending", "descending", or None (default).
+    :return: The nullity-sorted DataFrame.
+    """
+    if sort == 'ascending':
+        return df.iloc[np.argsort(df.count(axis='columns').values), :]
+    elif sort == 'descending':
+        return df.iloc[np.flipud(np.argsort(df.count(axis='columns').values)), :]
+    else:
+        return df
+
+
+def nullity_filter(df, filter=None, p=0, n=0):
+    """
+    Filters a DataFrame according to its nullity, using some combination of 'top' and 'bottom' numerical and
+    percentage values. Percentages and numerical thresholds can be specified simultaneously: for example,
+    to get a DataFrame with columns of at least 75% completeness but with no more than 5 columns, use
+    `nullity_filter(df, filter='top', p=.75, n=5)`.
+
+    :param df: The DataFrame whose columns are being filtered.
+    :param filter: The orientation of the filter being applied to the DataFrame. One of, "top", "bottom",
+    or None (default). The filter will simply return the DataFrame if you leave the filter argument unspecified or
+    as None.
+    :param p: A completeness ratio cut-off. If non-zero the filter will limit the DataFrame to columns with at least p
+    completeness. Input should be in the range [0, 1].
+    :param n: A numerical cut-off. If non-zero no more than this number of columns will be returned.
+    :return: The nullity-filtered `DataFrame`.
+    """
+    if filter == 'top':
+        if p:
+            df = df.iloc[:, [c >= p for c in df.count(axis='rows').values / len(df)]]
+        if n:
+            df = df.iloc[:, np.sort(np.argsort(df.count(axis='rows').values)[-n:])]
+    elif filter == 'bottom':
+        if p:
+            df = df.iloc[:, [c <= p for c in df.count(axis='rows').values / len(df)]]
+        if n:
+            df = df.iloc[:, np.sort(np.argsort(df.count(axis='rows').values)[:n])]
+    return df
diff --git a/setup.py b/setup.py
@@ -1,16 +1,16 @@
 from setuptools import setup
 setup(
-  name = 'missingno',
-  packages = ['missingno'], # this must be the same as the name above
-  install_requires=['numpy', 'matplotlib', 'scipy', 'seaborn'],
-  py_modules=['missingno'],
-  version = '0.3.8',  # note to self: also update the one is the source!
-  description = 'Missing data visualization module for Python.',
-  author = 'Aleksey Bilogur',
-  author_email = 'aleksey.bilogur@gmail.com',
-  url = 'https://github.com/ResidentMario/missingno',
-  download_url = 'https://github.com/ResidentMario/missingno/tarball/0.3.8',
-  keywords = ['data', 'data visualization', 'data analysis', 'missing data', 'data science', 'pandas', 'python',
-              'jupyter'],
-  classifiers = [],
+    name='missingno',
+    packages=['missingno'], # this must be the same as the name above
+    install_requires=['numpy', 'matplotlib', 'scipy', 'seaborn'],
+    py_modules=['missingno'],
+    version='0.3.8',  # note to self: also update the one is the source!
+    description='Missing data visualization module for Python.',
+    author='Aleksey Bilogur',
+    author_email='aleksey.bilogur@gmail.com',
+    url='https://github.com/ResidentMario/missingno',
+    download_url='https://github.com/ResidentMario/missingno/tarball/0.3.8',
+    keywords=['data', 'data visualization', 'data analysis', 'missing data', 'data science', 'pandas', 'python',
+            'jupyter'],
+    classifiers=[]
 )
diff --git a/tests/util_tests.py b/tests/util_tests.py
@@ -1,5 +1,5 @@
 """
-`gtfs-tripify` utilities test module. Asserts that utility functions are correct.
+Utilities test module. Asserts that utility functions are correct.
 """
 
 import unittest