Skip to content

Commit

Permalink
Finish tests, corrections to core functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
ResidentMario committed Feb 3, 2018
1 parent 3db1470 commit 482bea8
Show file tree
Hide file tree
Showing 2 changed files with 99 additions and 55 deletions.
27 changes: 12 additions & 15 deletions missingno/missingno.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ def matrix(df,
:param sort: The sort to apply to the heatmap. Should be one of "ascending", "descending", or None (default).
:param figsize: The size of the figure to display.
:param fontsize: The figure's font size. Default to 16.
:param labels: If specified, what labels to sue for the column names. Defaults to the underlying data labels when
there are 50 columns or less, and no labels when there are more than 50 columns.
:param labels: Whether or not to display the column names. Defaults to the underlying data labels when there are
50 columns or less, and no labels when there are more than 50 columns.
:param sparkline: Whether or not to display the sparkline. Defaults to True.
:param width_ratios: The ratio of the width of the matrix to the width of the sparkline. Defaults to `(15, 1)`.
Does nothing if `sparkline=False`.
Expand Down Expand Up @@ -192,7 +192,7 @@ def matrix(df,
return fig


def bar(df, figsize=(24, 10), fontsize=16, labels=None, log=False, color='darkgray', inline=False,
def bar(df, figsize=(24, 10), fontsize=16, labels=None, log=False, color='dimgray', inline=False,
filter=None, n=0, p=0, sort=None):
"""
A bar chart visualization of the nullity of the given DataFrame.
Expand All @@ -215,7 +215,8 @@ def bar(df, figsize=(24, 10), fontsize=16, labels=None, log=False, color='darkgr
df = nullity_sort(df, sort=sort)

fig = plt.figure(figsize=figsize)
(nullity_counts / len(df)).plot(kind='bar', figsize=figsize, fontsize=fontsize, color=color, log=log)
(nullity_counts / len(df)).plot(kind='bar', figsize=figsize, fontsize=fontsize, log=log, color=color)
# plt.bar((nullity_counts / len(df)), figsize=figsize, fontsize=fontsize, color=color, log=log)
ax1 = plt.gca()

# Start appending elements, starting with a modified bottom x axis.
Expand All @@ -225,6 +226,7 @@ def bar(df, figsize=(24, 10), fontsize=16, labels=None, log=False, color='darkgr
# Create the numerical ticks.
ax2 = ax1.twinx()
if not log:
ax1.set_ylim([0, 1])
ax2.set_yticks(ax1.get_yticks())
ax2.set_yticklabels([int(n*len(df)) for n in ax1.get_yticks()], fontsize=fontsize)
else:
Expand Down Expand Up @@ -256,7 +258,6 @@ def heatmap(df, inline=False,
Presents a `seaborn` heatmap visualization of nullity correlation in the given DataFrame.
Note that this visualization has no special support for large datasets. For those, try the dendrogram instead.
:param df: The DataFrame whose completeness is being heatmapped.
:param filter: The filter to apply to the heatmap. Should be one of "top", "bottom", or None (default). See
Expand Down Expand Up @@ -298,11 +299,11 @@ def heatmap(df, inline=False,
sns.heatmap(corr_mat, mask=mask, cmap=cmap, ax=ax0, cbar=False)

# Apply visual corrections and modifications.
ax0.set_xticklabels(ax0.xaxis.get_majorticklabels(), rotation=45, ha='left', fontsize=fontsize)
ax0.xaxis.tick_bottom()
ax0.set_xticklabels(ax0.xaxis.get_majorticklabels(), rotation=45, ha='right', fontsize=fontsize)
ax0.set_yticklabels(ax0.yaxis.get_majorticklabels(), fontsize=fontsize, rotation=0)
ax0.set_yticklabels(ax0.yaxis.get_majorticklabels(), rotation=0, fontsize=fontsize)

ax0.xaxis.tick_top()
ax0.patch.set_visible(False)

for text in ax0.texts:
Expand Down Expand Up @@ -341,14 +342,10 @@ def dendrogram(df, method='average',
:param df: The DataFrame whose completeness is being dendrogrammed.
:param method: The distance measure being used for clustering. This is a parameter that is passed to
`scipy.hierarchy`.
:param filter: The filter to apply to the heatmap. Should be one of "top", "bottom", or None (default). See
`nullity_filter()` for more information.
:param n: The cap on the number of columns to include in the filtered DataFrame. See `nullity_filter()` for
more information.
:param p: The cap on the percentage fill of the columns in the filtered DataFrame. See `nullity_filter()` for
more information.
:param sort: The sort to apply to the heatmap. Should be one of "ascending", "descending", or None. See
`nullity_sort()` for more information.
:param filter: The filter to apply to the heatmap. Should be one of "top", "bottom", or None (default).
:param n: The cap on the number of columns to include in the filtered DataFrame.
:param p: The cap on the percentage fill of the columns in the filtered DataFrame.
:param sort: The sort to apply to the heatmap. Should be one of "ascending", "descending", or None.
:param figsize: The size of the figure to display. This is a `matplotlib` parameter which defaults to `(25, 10)`.
:param fontsize: The figure's font size.
:param orientation: The way the dendrogram is oriented. Defaults to top-down if there are less than or equal to 50
Expand Down
127 changes: 87 additions & 40 deletions tests/viz_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,54 +11,101 @@
import missingno as msno


class TestMatrix(unittest.TestCase):
def setUp(self):
np.random.seed(42)
self.simple_df = pd.DataFrame((np.random.random((20, 10)) > 0.5), columns=range(0, 10)).replace(False, np.nan)
np.random.seed(42)
self.freq_df = (
pd.DataFrame((np.random.random(1000).reshape((50, 20)) > 0.5))
.replace(False, np.nan)
.set_index(pd.period_range('1/1/2011', '2/1/2015', freq='M'))
)
np.random.seed(42)
self.large_df = pd.DataFrame((np.random.random((250, 60)) > 0.5)).replace(False, np.nan)
# class TestMatrix(unittest.TestCase):
# def setUp(self):
# np.random.seed(42)
# self.simple_df = pd.DataFrame((np.random.random((20, 10)) > 0.5), columns=range(0, 10)).replace(False, np.nan)
# np.random.seed(42)
# self.freq_df = (
# pd.DataFrame((np.random.random(1000).reshape((50, 20)) > 0.5))
# .replace(False, np.nan)
# .set_index(pd.period_range('1/1/2011', '2/1/2015', freq='M'))
# )
# np.random.seed(42)
# self.large_df = pd.DataFrame((np.random.random((250, 60)) > 0.5)).replace(False, np.nan)
#
# @pytest.mark.mpl_image_compare
# def test_simple_matrix(self):
# return msno.matrix(self.simple_df, inline=False)
#
# @pytest.mark.mpl_image_compare
# def test_no_sparkline_matrix(self):
# return msno.matrix(self.simple_df, inline=False, sparkline=False)
#
# @pytest.mark.mpl_image_compare
# def test_width_ratios_matrix(self):
# return msno.matrix(self.simple_df, inline=False, width_ratios=(30, 1))
#
# @pytest.mark.mpl_image_compare
# def test_color_matrix(self):
# return msno.matrix(self.simple_df, inline=False, color=(70 / 255, 130 / 255, 180 / 255))
#
# @pytest.mark.mpl_image_compare
# def test_fontsize_matrix(self):
# return msno.matrix(self.simple_df, inline=False, fontsize=8)
#
# @pytest.mark.mpl_image_compare
# def test_freq_matrix(self):
# return msno.matrix(self.freq_df, inline=False, freq='BQ')
#
# @pytest.mark.mpl_image_compare
# def test_large_matrix(self):
# return msno.matrix(self.large_df, inline=False)

@pytest.mark.mpl_image_compare
def test_simple(self):
return msno.matrix(self.simple_df, inline=False)

@pytest.mark.mpl_image_compare
def test_no_sparkline(self):
return msno.matrix(self.simple_df, inline=False, sparkline=False)
# class TestBar(unittest.TestCase):
# """
# Bar chart visualizations look very visually different between the savefig backend and the default notebook backend.
# """
# def setUp(self):
# np.random.seed(42)
# self.simple_df = pd.DataFrame((np.random.random((20, 10)) > 0.5), columns=range(0, 10)).replace(False, np.nan)
#
# @pytest.mark.mpl_image_compare
# def test_simple_bar(self):
# return msno.bar(self.simple_df, inline=False)
#
# @pytest.mark.mpl_image_compare
# def test_log_bar(self):
# return msno.bar(self.simple_df, log=True, inline=False)

@pytest.mark.mpl_image_compare
def test_width_ratios(self):
return msno.matrix(self.simple_df, inline=False, width_ratios=(30, 1))

@pytest.mark.mpl_image_compare
def test_color(self):
return msno.matrix(self.simple_df, inline=False, color=(70 / 255, 130 / 255, 180 / 255))
# class TestHeatmap(unittest.TestCase):
# def setUp(self):
# np.random.seed(42)
# self.simple_df = pd.DataFrame((np.random.random((20, 10)) > 0.5), columns=range(0, 10)).replace(False, np.nan)
# self.large_df = pd.DataFrame((np.random.random((250, 60)) > 0.5)).replace(False, np.nan)
#
# @pytest.mark.mpl_image_compare
# def test_simple_heatmap(self):
# return msno.heatmap(self.simple_df, inline=False)
#
# @pytest.mark.mpl_image_compare
# def test_unlabelled_heatmap(self):
# return msno.heatmap(self.simple_df, labels=False, inline=False)
#
# @pytest.mark.mpl_image_compare
# def test_alternative_colormap_heatmap(self):
# return msno.heatmap(self.simple_df, cmap='viridis', inline=False)

@pytest.mark.mpl_image_compare
def test_fontsize(self):
return msno.matrix(self.simple_df, inline=False, fontsize=8)

@pytest.mark.mpl_image_compare
def test_freq(self):
return msno.matrix(self.freq_df, inline=False, freq='BQ')
class TestDendrogram(unittest.TestCase):
def setUp(self):
np.random.seed(42)
simple_df = pd.DataFrame((np.random.random((20, 10))), columns=range(0, 10))
simple_df.iloc[:, :2] = (simple_df.iloc[:, :2] > 0.2)
simple_df.iloc[:, 2:5] = (simple_df.iloc[:, 2:5] > 0.8)
simple_df.iloc[:, 5:10] = (simple_df.iloc[:, 2:5] > 0.5)
self.simple_df = simple_df.replace(False, np.nan)

@pytest.mark.mpl_image_compare
def test_large(self):
return msno.matrix(self.large_df, inline=False)
def test_simple_dendrogram(self):
return msno.dendrogram(self.simple_df, inline=False)


class TestBar(unittest.TestCase):
def setUp(self):
np.random.seed(42)
self.simple_df = pd.DataFrame((np.random.random((20, 10)) > 0.5), columns=range(0, 10)).replace(False, np.nan)
@pytest.mark.mpl_image_compare
def test_orientation_dendrogram(self):
return msno.dendrogram(self.simple_df, orientation='right', inline=False)

@pytest.mark.mpl_image_compare
def test_simple(self):
# TODO: Fixes here.
return msno.bar(self.simple_df, inline=False)
def test_method_dendrogram(self):
return msno.dendrogram(self.simple_df, method='single', inline=False)

0 comments on commit 482bea8

Please sign in to comment.