Skip to content

Commit

Permalink
fixes scikit-learn-contrib#3 and passing through already numeric colu…
Browse files Browse the repository at this point in the history
…mns along with encoded categories by default.
  • Loading branch information
Will McGinnis committed Apr 5, 2016
1 parent 3fd1b6e commit c85a4e8
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 9 deletions.
5 changes: 4 additions & 1 deletion category_encoders/backward_difference.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ def backward_difference_coding(X_in, cols=None):

if cols is None:
cols = X.columns.values
pass_thru = []
else:
pass_thru = [col for col in X.columns.values if col not in cols]

bin_cols = []
for col in cols:
Expand All @@ -34,7 +37,7 @@ def backward_difference_coding(X_in, cols=None):
X[col + '_%d' % (dig, )] = mod[:, dig]
bin_cols.append(col + '_%d' % (dig, ))

X = X.reindex(columns=bin_cols)
X = X.reindex(columns=bin_cols + pass_thru)
X.fillna(0.0)
return X

Expand Down
10 changes: 5 additions & 5 deletions category_encoders/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ def binary(X_in, cols=None):

if cols is None:
cols = X.columns.values
pass_thru = []
else:
pass_thru = [col for col in X.columns.values if col not in cols]

bin_cols = []
for col in cols:
# figure out how many digits we need to represent the classes present
if X[col].max() == 0:
digits = 1
else:
digits = int(np.ceil(np.log2(X[col].max())))
digits = int(np.ceil(np.log2(len(X[col].unique()))))

# map the ordinal column into a list of these digits, of length digits
X[col] = X[col].map(lambda x: list("{0:b}".format(int(x)))) \
Expand All @@ -44,7 +44,7 @@ def binary(X_in, cols=None):
X[col + '_%d' % (dig, )] = X[col].map(lambda x: int(x[dig]))
bin_cols.append(col + '_%d' % (dig, ))

X = X.reindex(columns=bin_cols)
X = X.reindex(columns=bin_cols + pass_thru)

return X

Expand Down
5 changes: 4 additions & 1 deletion category_encoders/helmert.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ def helmert_coding(X_in, cols=None):

if cols is None:
cols = X.columns.values
pass_thru = []
else:
pass_thru = [col for col in X.columns.values if col not in cols]

bin_cols = []
for col in cols:
Expand All @@ -34,7 +37,7 @@ def helmert_coding(X_in, cols=None):
X[col + '_%d' % (dig, )] = mod[:, dig]
bin_cols.append(col + '_%d' % (dig, ))

X = X.reindex(columns=bin_cols)
X = X.reindex(columns=bin_cols + pass_thru)

return X

Expand Down
6 changes: 5 additions & 1 deletion category_encoders/polynomial.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,12 @@ def polynomial_coding(X_in, cols=None):
"""

X = copy.deepcopy(X_in)

if cols is None:
cols = X.columns.values
pass_thru = []
else:
pass_thru = [col for col in X.columns.values if col not in cols]

bin_cols = []
for col in cols:
Expand All @@ -33,7 +37,7 @@ def polynomial_coding(X_in, cols=None):
X[col + '_%d' % (dig, )] = mod[:, dig]
bin_cols.append(col + '_%d' % (dig, ))

X = X.reindex(columns=bin_cols)
X = X.reindex(columns=bin_cols + pass_thru)

return X

Expand Down
5 changes: 4 additions & 1 deletion category_encoders/sum_coding.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ def sum_coding(X_in, cols=None):

if cols is None:
cols = X.columns.values
pass_thru = []
else:
pass_thru = [col for col in X.columns.values if col not in cols]

bin_cols = []
for col in cols:
Expand All @@ -34,7 +37,7 @@ def sum_coding(X_in, cols=None):
X[col + '_%d' % (dig, )] = mod[:, dig]
bin_cols.append(col + '_%d' % (dig, ))

X = X.reindex(columns=bin_cols)
X = X.reindex(columns=bin_cols + pass_thru)

return X

Expand Down

0 comments on commit c85a4e8

Please sign in to comment.