forked from ResidentMario/missingno
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2e3a4e4
commit 2f8737a
Showing
6 changed files
with
64 additions
and
59 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -44,3 +44,4 @@ _map.html | |
|
||
# Test cache | ||
.cache | ||
.pytest_cache |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
"""Utility functions for missingno.""" | ||
import numpy as np | ||
|
||
|
||
def nullity_sort(df, sort=None): | ||
""" | ||
Sorts a DataFrame according to its nullity, in either ascending or descending order. | ||
:param df: The DataFrame object being sorted. | ||
:param sort: The sorting method: either "ascending", "descending", or None (default). | ||
:return: The nullity-sorted DataFrame. | ||
""" | ||
if sort == 'ascending': | ||
return df.iloc[np.argsort(df.count(axis='columns').values), :] | ||
elif sort == 'descending': | ||
return df.iloc[np.flipud(np.argsort(df.count(axis='columns').values)), :] | ||
else: | ||
return df | ||
|
||
|
||
def nullity_filter(df, filter=None, p=0, n=0): | ||
""" | ||
Filters a DataFrame according to its nullity, using some combination of 'top' and 'bottom' numerical and | ||
percentage values. Percentages and numerical thresholds can be specified simultaneously: for example, | ||
to get a DataFrame with columns of at least 75% completeness but with no more than 5 columns, use | ||
`nullity_filter(df, filter='top', p=.75, n=5)`. | ||
:param df: The DataFrame whose columns are being filtered. | ||
:param filter: The orientation of the filter being applied to the DataFrame. One of, "top", "bottom", | ||
or None (default). The filter will simply return the DataFrame if you leave the filter argument unspecified or | ||
as None. | ||
:param p: A completeness ratio cut-off. If non-zero the filter will limit the DataFrame to columns with at least p | ||
completeness. Input should be in the range [0, 1]. | ||
:param n: A numerical cut-off. If non-zero no more than this number of columns will be returned. | ||
:return: The nullity-filtered `DataFrame`. | ||
""" | ||
if filter == 'top': | ||
if p: | ||
df = df.iloc[:, [c >= p for c in df.count(axis='rows').values / len(df)]] | ||
if n: | ||
df = df.iloc[:, np.sort(np.argsort(df.count(axis='rows').values)[-n:])] | ||
elif filter == 'bottom': | ||
if p: | ||
df = df.iloc[:, [c <= p for c in df.count(axis='rows').values / len(df)]] | ||
if n: | ||
df = df.iloc[:, np.sort(np.argsort(df.count(axis='rows').values)[:n])] | ||
return df |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,16 @@ | ||
from setuptools import setup | ||
setup( | ||
name = 'missingno', | ||
packages = ['missingno'], # this must be the same as the name above | ||
install_requires=['numpy', 'matplotlib', 'scipy', 'seaborn'], | ||
py_modules=['missingno'], | ||
version = '0.3.8', # note to self: also update the one is the source! | ||
description = 'Missing data visualization module for Python.', | ||
author = 'Aleksey Bilogur', | ||
author_email = 'aleksey.bilogur@gmail.com', | ||
url = 'https://github.com/ResidentMario/missingno', | ||
download_url = 'https://github.com/ResidentMario/missingno/tarball/0.3.8', | ||
keywords = ['data', 'data visualization', 'data analysis', 'missing data', 'data science', 'pandas', 'python', | ||
'jupyter'], | ||
classifiers = [], | ||
name='missingno', | ||
packages=['missingno'], # this must be the same as the name above | ||
install_requires=['numpy', 'matplotlib', 'scipy', 'seaborn'], | ||
py_modules=['missingno'], | ||
version='0.3.8', # note to self: also update the one is the source! | ||
description='Missing data visualization module for Python.', | ||
author='Aleksey Bilogur', | ||
author_email='aleksey.bilogur@gmail.com', | ||
url='https://github.com/ResidentMario/missingno', | ||
download_url='https://github.com/ResidentMario/missingno/tarball/0.3.8', | ||
keywords=['data', 'data visualization', 'data analysis', 'missing data', 'data science', 'pandas', 'python', | ||
'jupyter'], | ||
classifiers=[] | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters