diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index a6a9de2e77b..310d36a8c90 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -171,6 +171,12 @@ is_string_dtype, ) + +def str_to_boolean(column: StringColumn): + """Takes in string column and returns boolean column """ + return (column.str().len() > cudf.Scalar(0, dtype="int8")).fillna(False) + + _str_to_numeric_typecast_functions = { np.dtype("int8"): str_cast.stoi8, np.dtype("int16"): str_cast.stoi16, @@ -182,7 +188,7 @@ np.dtype("uint64"): str_cast.stoul, np.dtype("float32"): str_cast.stof, np.dtype("float64"): str_cast.stod, - np.dtype("bool"): str_cast.to_booleans, + np.dtype("bool"): str_to_boolean, } _numeric_to_str_typecast_functions = { diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index 3c153a16a13..a8c00ce031e 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -200,11 +200,7 @@ def test_string_astype(dtype): ps = pd.Series(data) gs = cudf.Series(data) - # Pandas str --> bool typecasting always returns True if there's a string - if dtype.startswith("bool"): - expect = ps == "True" - else: - expect = ps.astype(dtype) + expect = ps.astype(dtype) got = gs.astype(dtype) assert_eq(expect, got)