Skip to content

Commit

Permalink
Fix constructing Column from column_view with expired mask (rapidsai#…
Browse files Browse the repository at this point in the history
…11354)

This PR fixes a subtle bug leading to a segfault when constructing a `Column` from a `column_view`.

Closes rapidsai#11349

Authors:
  - Ashwin Srinath (https://github.com/shwina)

Approvers:
  - https://github.com/brandon-b-miller
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Matthew Roeschke (https://github.com/mroeschke)
  - Lawrence Mitchell (https://github.com/wence-)

URL: rapidsai#11354
  • Loading branch information
shwina authored Jul 28, 2022
1 parent e4ce301 commit c11b780
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 8 deletions.
39 changes: 31 additions & 8 deletions python/cudf/cudf/_lib/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -503,9 +503,12 @@ cdef class Column:
data = None
base_size = size + offset
data_owner = owner

if column_owner:
data_owner = owner.base_data
mask_owner = mask_owner.base_mask
base_size = owner.base_size

if data_ptr:
if data_owner is None:
data = Buffer(
Expand All @@ -523,18 +526,38 @@ cdef class Column:
rmm.DeviceBuffer(ptr=data_ptr, size=0)
)

mask_ptr = <uintptr_t>(cv.null_mask())
mask = None
mask_ptr = <uintptr_t>(cv.null_mask())
if mask_ptr:
if column_owner:
mask_owner = mask_owner.base_mask
if mask_owner is None:
mask = Buffer(
rmm.DeviceBuffer(
ptr=mask_ptr,
size=bitmask_allocation_size_bytes(size+offset)
if column_owner:
# if we reached here, it means `owner` is a `Column`
# that does not have a null mask, but `cv` thinks it
# should have a null mask. This can happen in the
# following sequence of events:
#
# 1) `cv` is constructed as a view into a
# `cudf::column` that is nullable (i.e., it has
# a null mask), but contains no nulls.
# 2) `owner`, a `Column`, is constructed from the
# same `cudf::column`. Because `cudf::column`
# is memory owning, `owner` takes ownership of
# the memory owned by the
# `cudf::column`. Because the column has a null
# count of 0, it may choose to discard the null
# mask.
# 3) Now, `cv` points to a discarded null mask.
#
# TL;DR: we should not include a null mask in the
# result:
mask = None
else:
mask = Buffer(
rmm.DeviceBuffer(
ptr=mask_ptr,
size=bitmask_allocation_size_bytes(size+offset)
)
)
)
else:
mask = Buffer(
data=mask_ptr,
Expand Down
9 changes: 9 additions & 0 deletions python/cudf/cudf/tests/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1683,3 +1683,12 @@ def test_dataframe_indexing_setitem_np_cp_array(array, is_error):
"(10, 3) could not be broadcast to indexing "
"result of shape (10, 2)",
)


def test_iloc_single_row_with_nullable_column():
# see https://github.com/rapidsai/cudf/issues/11349
pdf = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0.1, 0.2, None, 0.4]})
df = cudf.from_pandas(pdf)

df.iloc[0] # before the fix for #11349 this would segfault
assert_eq(pdf.iloc[0], df.iloc[0])

0 comments on commit c11b780

Please sign in to comment.