Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move cudf::char_utf8 definition from detail to public header #14779

Merged
merged 4 commits into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions cpp/include/cudf/strings/detail/utf8.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -23,9 +23,6 @@
*/

namespace cudf {

using char_utf8 = uint32_t; ///< UTF-8 characters are 1-4 bytes

namespace strings {
namespace detail {

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ class mutable_column_view;
class string_view;
class list_view;
class struct_view;

class scalar;

// clang-format off
Expand Down Expand Up @@ -95,6 +94,7 @@ using size_type = int32_t; ///< Row index type for columns and tables
using bitmask_type = uint32_t; ///< Bitmask type stored as 32-bit unsigned integer
using valid_type = uint8_t; ///< Valid type in host memory
using thread_index_type = int64_t; ///< Thread index type in kernels
using char_utf8 = uint32_t; ///< UTF-8 characters are 1-4 bytes

/**
* @brief Similar to `std::distance` but returns `cudf::size_type` and performs `static_cast`
Expand Down
83 changes: 53 additions & 30 deletions docs/cudf/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,16 @@
class PseudoLexer(RegexLexer):
"""Trivial lexer for pseudocode."""

name = 'pseudocode'
aliases = ['pseudo']
name = "pseudocode"
aliases = ["pseudo"]
tokens = {
'root': [
(r'.*\n', PText),
"root": [
(r".*\n", PText),
]
}


lexers['pseudo'] = PseudoLexer()
lexers["pseudo"] = PseudoLexer()

# -- Custom Extensions ----------------------------------------------------
sys.path.append(os.path.abspath("./_ext"))
Expand All @@ -69,6 +69,7 @@ class PseudoLexer(RegexLexer):
"myst_nb",
]


# Preprocess doxygen xml for compatibility with latest Breathe
def clean_definitions(root):
# Breathe can't handle SFINAE properly:
Expand Down Expand Up @@ -105,17 +106,22 @@ def clean_definitions(root):
pass
break


# All of these in type declarations cause Breathe to choke.
# For friend, see https://github.com/breathe-doc/breathe/issues/916
strings_to_remove = ("__forceinline__", "CUDF_HOST_DEVICE", "decltype(auto)", "friend")
strings_to_remove = (
"__forceinline__",
"CUDF_HOST_DEVICE",
"decltype(auto)",
"friend",
)
for node in root.iter():
for string in strings_to_remove:
if node.text is not None:
node.text = node.text.replace(string, "")
if node.tail is not None:
node.tail = node.tail.replace(string, "")


def clean_all_xml_files(path):
for fn in glob.glob(os.path.join(path, "*.xml")):
tree = ET.parse(fn)
Expand All @@ -130,7 +136,7 @@ def clean_all_xml_files(path):
breathe_default_project = "libcudf"


nb_execution_excludepatterns = ['performance-comparisons.ipynb']
nb_execution_excludepatterns = ["performance-comparisons.ipynb"]

nb_execution_mode = "force"
nb_execution_timeout = 300
Expand Down Expand Up @@ -163,9 +169,9 @@ def clean_all_xml_files(path):
# built documents.
#
# The short X.Y version.
version = '24.02'
version = "24.02"
# The full version, including alpha/beta/rc tags.
release = '24.02.00'
release = "24.02.00"

# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
Expand All @@ -177,7 +183,10 @@ def clean_all_xml_files(path):
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This patterns also effect to html_static_path and html_extra_path
exclude_patterns = ['venv', "**/includes/**",]
exclude_patterns = [
"venv",
"**/includes/**",
]

# The name of the Pygments (syntax highlighting) style to use.
pygments_style = "sphinx"
Expand Down Expand Up @@ -286,7 +295,10 @@ def clean_all_xml_files(path):
"pyarrow": ("https://arrow.apache.org/docs/", None),
"python": ("https://docs.python.org/3", None),
"rmm": ("https://docs.rapids.ai/api/rmm/nightly/", None),
"typing_extensions": ("https://typing-extensions.readthedocs.io/en/stable/", None),
"typing_extensions": (
"https://typing-extensions.readthedocs.io/en/stable/",
None,
),
}

# Config numpydoc
Expand Down Expand Up @@ -333,12 +345,15 @@ def _generate_namespaces(namespaces):
all_namespaces.append(f"{base_namespace}::{other_namespace}::")
return all_namespaces

_all_namespaces = _generate_namespaces({
# Note that io::datasource is actually a nested class
"cudf": {"io", "io::datasource", "strings", "ast", "ast::expression"},
"numeric": {},
"nvtext": {},
})

_all_namespaces = _generate_namespaces(
{
# Note that io::datasource is actually a nested class
"cudf": {"io", "io::datasource", "strings", "ast", "ast::expression"},
"numeric": {},
"nvtext": {},
}
)

_names_to_skip = {
# External names
Expand Down Expand Up @@ -369,10 +384,6 @@ def _generate_namespaces(namespaces):
# Unsupported by Breathe
# https://github.com/breathe-doc/breathe/issues/355
"deprecated",
# TODO: This type is currently defined in a detail header but it's in
# the public namespace. However, it's used in the detail header, so it
# needs to be put into a public header that can be shared.
"char_utf8",
# TODO: This is currently in a src file but perhaps should be public
"orc::column_statistics",
# Sphinx doesn't know how to distinguish between the ORC and Parquet
Expand All @@ -396,21 +407,24 @@ def _cached_intersphinx_lookup(env, node, contnode):
key = (node, contnode)
if key in _intersphinx_cache:
return _intersphinx_cache[key]
if (ref := intersphinx.resolve_reference_detect_inventory(env, node, contnode)) is not None:
if (
ref := intersphinx.resolve_reference_detect_inventory(
env, node, contnode
)
) is not None:
_intersphinx_cache[key] = ref
return ref


def on_missing_reference(app, env, node, contnode):
# These variables are defined outside the function to speed up the build.
global _all_namespaces, _names_to_skip, _intersphinx_extra_prefixes, \
_domain_objects, _prefixed_domain_objects, _intersphinx_cache
global _all_namespaces, _names_to_skip, _intersphinx_extra_prefixes, _domain_objects, _prefixed_domain_objects, _intersphinx_cache

# Precompute and cache domains for faster lookups
if _domain_objects is None:
_domain_objects = {}
_prefixed_domain_objects = {}
for (name, _, _, docname, _, _) in env.domains["cpp"].get_objects():
for name, _, _, docname, _, _ in env.domains["cpp"].get_objects():
_domain_objects[name] = docname
for prefix in _all_namespaces:
_prefixed_domain_objects[f"{prefix}{name}"] = name
Expand Down Expand Up @@ -473,19 +487,25 @@ def on_missing_reference(app, env, node, contnode):
# to fail.
if reftarget != node["reftarget"]:
node["reftarget"] = reftarget
if (ref := _cached_intersphinx_lookup(env, node, contnode)) is not None:
if (
ref := _cached_intersphinx_lookup(env, node, contnode)
) is not None:
return ref

# If the template wasn't the (only) issue, we check the various
# namespace prefixes that may need to be added or removed.
for prefix in _intersphinx_extra_prefixes:
if prefix not in reftarget:
node["reftarget"] = f"{prefix}::{reftarget}"
if (ref := _cached_intersphinx_lookup(env, node, contnode)) is not None:
if (
ref := _cached_intersphinx_lookup(env, node, contnode)
) is not None:
return ref
else:
node["reftarget"] = reftarget.replace(f"{prefix}::", "")
if (ref := _cached_intersphinx_lookup(env, node, contnode)) is not None:
if (
ref := _cached_intersphinx_lookup(env, node, contnode)
) is not None:
return ref

return None
Expand All @@ -499,8 +519,11 @@ def on_missing_reference(app, env, node, contnode):
("py:class", "typing_extensions.Self"),
]


def setup(app):
app.add_css_file("https://docs.rapids.ai/assets/css/custom.css")
app.add_js_file("https://docs.rapids.ai/assets/js/custom.js", loading_method="defer")
app.add_js_file(
"https://docs.rapids.ai/assets/js/custom.js", loading_method="defer"
)
app.connect("doctree-read", resolve_aliases)
app.connect("missing-reference", on_missing_reference)