diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index b8f18113e39..310d36a8c90 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -171,6 +171,12 @@ is_string_dtype, ) + +def str_to_boolean(column: StringColumn): + """Takes in string column and returns boolean column """ + return (column.str().len() > cudf.Scalar(0, dtype="int8")).fillna(False) + + _str_to_numeric_typecast_functions = { np.dtype("int8"): str_cast.stoi8, np.dtype("int16"): str_cast.stoi16, @@ -219,11 +225,6 @@ ParentType = Union["cudf.Series", "cudf.core.index.BaseIndex"] -def str_to_boolean(column): - """Takes in string column and returns boolean column """ - return (column.str.len() > 0).fillna(False) - - class StringMethods(ColumnMethodsMixin): def __init__(self, column, parent=None): """ diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 3c153a16a13..a8c00ce031e 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -200,11 +200,7 @@ def test_string_astype(dtype): ps = pd.Series(data) gs = cudf.Series(data) - # Pandas str --> bool typecasting always returns True if there's a string - if dtype.startswith("bool"): - expect = ps == "True" - else: - expect = ps.astype(dtype) + expect = ps.astype(dtype) got = gs.astype(dtype) assert_eq(expect, got)