Skip to content

Commit

Permalink
merging upstream
Browse files Browse the repository at this point in the history
  • Loading branch information
divyegala committed Aug 24, 2023
2 parents 33f5ebc + 845ae38 commit 56d3b93
Show file tree
Hide file tree
Showing 21 changed files with 340 additions and 106 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,7 @@ The [build](docs/source/build.md) instructions contain more details on building

The folder structure mirrors other RAPIDS repos, with the following folders:

- `bench/ann`: Python scripts for running ANN benchmarks
- `ci`: Scripts for running CI in PRs
- `conda`: Conda recipes and development conda environments
- `cpp`: Source code for C++ libraries.
Expand Down
15 changes: 6 additions & 9 deletions bench/ann/data_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,18 @@


def export_results(output_filepath, recompute, groundtruth_filepath,
result_filepaths):
result_filepath):
print(f"Writing output file to: {output_filepath}")
ann_bench_scripts_dir = os.path.join(os.getenv("RAFT_HOME"),
"cpp/bench/ann/scripts")
ann_bench_scripts_path = os.path.join(ann_bench_scripts_dir,
"eval.pl")
if recompute:
p = subprocess.Popen([ann_bench_scripts_path, "-f", "-o", output_filepath,
groundtruth_filepath] + result_filepaths)
groundtruth_filepath, result_filepath])
else:
p = subprocess.Popen([ann_bench_scripts_path, "-o", output_filepath,
groundtruth_filepath] + result_filepaths)
groundtruth_filepath, result_filepath])
p.wait()


Expand All @@ -51,16 +51,13 @@ def main():
"bench", "ann", "data")
)

args, result_filepaths = parser.parse_known_args()

# if nothing is provided
if len(result_filepaths) == 0:
raise ValueError("No filepaths to results were provided")
args = parser.parse_args()

result_filepath = os.path.join(args.dataset_path, args.dataset, "result")
groundtruth_filepath = os.path.join(args.dataset_path, args.dataset,
"groundtruth.neighbors.ibin")
export_results(args.output, args.recompute, groundtruth_filepath,
result_filepaths)
result_filepath)


if __name__ == "__main__":
Expand Down
10 changes: 8 additions & 2 deletions bench/ann/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,14 @@ def main():
conf_file = json.load(f)

# Replace base, query to dataset-path
conf_file["dataset"]["base_file"] = os.path.join(dataset_path, "base.fbin")
conf_file["dataset"]["query_file"] = os.path.join(dataset_path, "query.fbin")
replacement_base_filepath = \
os.path.normpath(conf_file["dataset"]["base_file"]).split(os.path.sep)[-1]
conf_file["dataset"]["base_file"] = \
os.path.join(dataset_path, replacement_base_filepath)
replacement_query_filepath = \
os.path.normpath(conf_file["dataset"]["query_file"]).split(os.path.sep)[-1]
conf_file["dataset"]["query_file"] = \
os.path.join(dataset_path, replacement_query_filepath)
# Ensure base and query files exist for dataset
if not os.path.exists(conf_file["dataset"]["base_file"]):
raise FileNotFoundError(conf_file["dataset"]["base_file"])
Expand Down
2 changes: 1 addition & 1 deletion ci/test_wheel_raft_dask.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RAPIDS_PY_WHEEL_NAME="pylibraft_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels
python -m pip install --no-deps ./local-pylibraft-dep/pylibraft*.whl

# Always install latest dask for testing
python -m pip install git+https://github.com/dask/dask.git@2023.7.1 git+https://github.com/dask/distributed.git@2023.7.1 git+https://github.com/rapidsai/dask-cuda.git@branch-23.10
python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main git+https://github.com/rapidsai/dask-cuda.git@branch-23.10

# echo to expand wildcard before adding `[extra]` requires for pip
python -m pip install $(echo ./dist/raft_dask*.whl)[test]
Expand Down
6 changes: 3 additions & 3 deletions conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ dependencies:
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.0
- dask-core==2023.7.1
- dask-core>=2023.7.1
- dask-cuda==23.10.*
- dask==2023.7.1
- distributed==2023.7.1
- dask>=2023.7.1
- distributed>=2023.7.1
- doxygen>=1.8.20
- gcc_linux-64=11.*
- gmock>=1.13.0
Expand Down
6 changes: 3 additions & 3 deletions conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,10 @@ dependencies:
- cupy>=12.0.0
- cxx-compiler
- cython>=3.0.0
- dask-core==2023.7.1
- dask-core>=2023.7.1
- dask-cuda==23.10.*
- dask==2023.7.1
- distributed==2023.7.1
- dask>=2023.7.1
- distributed>=2023.7.1
- doxygen>=1.8.20
- gcc_linux-64=11.*
- gmock>=1.13.0
Expand Down
6 changes: 3 additions & 3 deletions conda/recipes/raft-dask/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,10 @@ requirements:
- cudatoolkit
{% endif %}
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
- dask ==2023.7.1
- dask-core ==2023.7.1
- dask >=2023.7.1
- dask-core >=2023.7.1
- dask-cuda ={{ minor_version }}
- distributed ==2023.7.1
- distributed >=2023.7.1
- joblib >=0.11
- nccl >=2.9.9
- pylibraft {{ version }}
Expand Down
16 changes: 11 additions & 5 deletions cpp/include/raft/neighbors/cagra_serialize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,16 @@ namespace raft::neighbors::cagra {
* @param[in] handle the raft handle
* @param[in] os output stream
* @param[in] index CAGRA index
* @param[in] include_dataset Whether or not to write out the dataset to the file.
*
*/
template <typename T, typename IdxT>
void serialize(raft::resources const& handle, std::ostream& os, const index<T, IdxT>& index)
void serialize(raft::resources const& handle,
std::ostream& os,
const index<T, IdxT>& index,
bool include_dataset = true)
{
detail::serialize(handle, os, index);
detail::serialize(handle, os, index, include_dataset);
}

/**
Expand All @@ -77,14 +81,16 @@ void serialize(raft::resources const& handle, std::ostream& os, const index<T, I
* @param[in] handle the raft handle
* @param[in] filename the file name for saving the index
* @param[in] index CAGRA index
* @param[in] include_dataset Whether or not to write out the dataset to the file.
*
*/
template <typename T, typename IdxT>
void serialize(raft::resources const& handle,
const std::string& filename,
const index<T, IdxT>& index)
const index<T, IdxT>& index,
bool include_dataset = true)
{
detail::serialize(handle, filename, index);
detail::serialize(handle, filename, index, include_dataset);
}

/**
Expand Down Expand Up @@ -158,4 +164,4 @@ namespace raft::neighbors::experimental::cagra {
using raft::neighbors::cagra::deserialize;
using raft::neighbors::cagra::serialize;

} // namespace raft::neighbors::experimental::cagra
} // namespace raft::neighbors::experimental::cagra
54 changes: 35 additions & 19 deletions cpp/include/raft/neighbors/detail/cagra/cagra_serialize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@

namespace raft::neighbors::cagra::detail {

// Serialization version 1.
constexpr int serialization_version = 2;
constexpr int serialization_version = 3;

// NB: we wrap this check in a struct, so that the updated RealSize is easy to see in the error
// message.
Expand All @@ -50,41 +49,53 @@ template struct check_index_layout<sizeof(index<double, std::uint64_t>), expecte
*
*/
template <typename T, typename IdxT>
void serialize(raft::resources const& res, std::ostream& os, const index<T, IdxT>& index_)
void serialize(raft::resources const& res,
std::ostream& os,
const index<T, IdxT>& index_,
bool include_dataset)
{
RAFT_LOG_DEBUG(
"Saving CAGRA index, size %zu, dim %u", static_cast<size_t>(index_.size()), index_.dim());

std::string dtype_string = raft::detail::numpy_serializer::get_numpy_dtype<T>().to_string();
dtype_string.resize(4);
os << dtype_string;

serialize_scalar(res, os, serialization_version);
serialize_scalar(res, os, index_.size());
serialize_scalar(res, os, index_.dim());
serialize_scalar(res, os, index_.graph_degree());
serialize_scalar(res, os, index_.metric());
auto dataset = index_.dataset();
// Remove padding before saving the dataset
auto host_dataset = make_host_matrix<T, int64_t>(dataset.extent(0), dataset.extent(1));
RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(),
sizeof(T) * host_dataset.extent(1),
dataset.data_handle(),
sizeof(T) * dataset.stride(0),
sizeof(T) * host_dataset.extent(1),
dataset.extent(0),
cudaMemcpyDefault,
resource::get_cuda_stream(res)));
resource::sync_stream(res);
serialize_mdspan(res, os, host_dataset.view());
serialize_mdspan(res, os, index_.graph());

serialize_scalar(res, os, include_dataset);
if (include_dataset) {
auto dataset = index_.dataset();
// Remove padding before saving the dataset
auto host_dataset = make_host_matrix<T, int64_t>(dataset.extent(0), dataset.extent(1));
RAFT_CUDA_TRY(cudaMemcpy2DAsync(host_dataset.data_handle(),
sizeof(T) * host_dataset.extent(1),
dataset.data_handle(),
sizeof(T) * dataset.stride(0),
sizeof(T) * host_dataset.extent(1),
dataset.extent(0),
cudaMemcpyDefault,
resource::get_cuda_stream(res)));
resource::sync_stream(res);
serialize_mdspan(res, os, host_dataset.view());
}
}

template <typename T, typename IdxT>
void serialize(raft::resources const& res,
const std::string& filename,
const index<T, IdxT>& index_)
const index<T, IdxT>& index_,
bool include_dataset)
{
std::ofstream of(filename, std::ios::out | std::ios::binary);
if (!of) { RAFT_FAIL("Cannot open file %s", filename.c_str()); }

detail::serialize(res, of, index_);
detail::serialize(res, of, index_, include_dataset);

of.close();
if (!of) { RAFT_FAIL("Error writing output %s", filename.c_str()); }
Expand All @@ -102,6 +113,9 @@ void serialize(raft::resources const& res,
template <typename T, typename IdxT>
auto deserialize(raft::resources const& res, std::istream& is) -> index<T, IdxT>
{
char dtype_string[4];
is.read(dtype_string, 4);

auto ver = deserialize_scalar<int>(res, is);
if (ver != serialization_version) {
RAFT_FAIL("serialization version mismatch, expected %d, got %d ", serialization_version, ver);
Expand All @@ -113,9 +127,11 @@ auto deserialize(raft::resources const& res, std::istream& is) -> index<T, IdxT>

auto dataset = raft::make_host_matrix<T, int64_t>(n_rows, dim);
auto graph = raft::make_host_matrix<IdxT, int64_t>(n_rows, graph_degree);
deserialize_mdspan(res, is, dataset.view());
deserialize_mdspan(res, is, graph.view());

bool has_dataset = deserialize_scalar<bool>(res, is);
if (has_dataset) { deserialize_mdspan(res, is, dataset.view()); }

return index<T, IdxT>(
res, metric, raft::make_const_mdspan(dataset.view()), raft::make_const_mdspan(graph.view()));
}
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/raft/neighbors/detail/nn_descent.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1206,13 +1206,13 @@ void GNND<Data_t, Index_t>::build(Data_t* data,
Index_t* output_graph,
cudaStream_t stream)
{
cudaStreamSynchronize(stream);
nrow_ = nrow;
graph_.h_graph = (InternalID_t<Index_t>*)output_graph;

cudaPointerAttributes data_ptr_attr;
RAFT_CUDA_TRY(cudaPointerGetAttributes(&data_ptr_attr, data));
if (data_ptr_attr.type == cudaMemoryTypeUnregistered) {
std::cout << "HERE AS EXPECTED" << std::endl;
typename std::remove_const<Data_t>::type* input_data;
size_t batch_size = 100000;
RAFT_CUDA_TRY(cudaMallocAsync(
Expand Down Expand Up @@ -1378,7 +1378,7 @@ index<IdxT> build(raft::resources const& res,
const index_params& params,
mdspan<const T, matrix_extent<int64_t>, row_major, Accessor> dataset)
{
RAFT_EXPECTS(dataset.size() < std::numeric_limits<int>::max() - 1,
RAFT_EXPECTS(dataset.extent(0) < std::numeric_limits<int>::max() - 1,
"The dataset size for GNND should be less than %d",
std::numeric_limits<int>::max() - 1);
size_t intermediate_degree = params.intermediate_graph_degree;
Expand Down Expand Up @@ -1415,9 +1415,9 @@ index<IdxT> build(raft::resources const& res,
.termination_threshold = params.termination_threshold,
.metric_type = Metric_t::METRIC_L2};
GNND<const T, int> nnd(build_config);
std::cout << "Intermediate graph dim: " << int_graph.extent(0) << ", " << int_graph.extent(1)
<< std::endl;
GNND<const T, int> nnd(build_config);
nnd.build(dataset.data_handle(),
dataset.extent(0),
int_graph.data_handle(),
Expand Down
6 changes: 4 additions & 2 deletions cpp/include/raft_runtime/neighbors/cagra.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,16 @@ namespace raft::runtime::neighbors::cagra {
raft::device_matrix_view<float, int64_t, row_major> distances); \
void serialize_file(raft::resources const& handle, \
const std::string& filename, \
const raft::neighbors::cagra::index<T, IdxT>& index); \
const raft::neighbors::cagra::index<T, IdxT>& index, \
bool include_dataset = true); \
\
void deserialize_file(raft::resources const& handle, \
const std::string& filename, \
raft::neighbors::cagra::index<T, IdxT>* index); \
void serialize(raft::resources const& handle, \
std::string& str, \
const raft::neighbors::cagra::index<T, IdxT>& index); \
const raft::neighbors::cagra::index<T, IdxT>& index, \
bool include_dataset = true); \
\
void deserialize(raft::resources const& handle, \
const std::string& str, \
Expand Down
10 changes: 6 additions & 4 deletions cpp/src/raft_runtime/neighbors/cagra_serialize.cu
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@ namespace raft::runtime::neighbors::cagra {
#define RAFT_INST_CAGRA_SERIALIZE(DTYPE) \
void serialize_file(raft::resources const& handle, \
const std::string& filename, \
const raft::neighbors::cagra::index<DTYPE, uint32_t>& index) \
const raft::neighbors::cagra::index<DTYPE, uint32_t>& index, \
bool include_dataset) \
{ \
raft::neighbors::cagra::serialize(handle, filename, index); \
raft::neighbors::cagra::serialize(handle, filename, index, include_dataset); \
}; \
\
void deserialize_file(raft::resources const& handle, \
Expand All @@ -41,10 +42,11 @@ namespace raft::runtime::neighbors::cagra {
}; \
void serialize(raft::resources const& handle, \
std::string& str, \
const raft::neighbors::cagra::index<DTYPE, uint32_t>& index) \
const raft::neighbors::cagra::index<DTYPE, uint32_t>& index, \
bool include_dataset) \
{ \
std::stringstream os; \
raft::neighbors::cagra::serialize(handle, os, index); \
raft::neighbors::cagra::serialize(handle, os, index, include_dataset); \
str = os.str(); \
} \
\
Expand Down
Loading

0 comments on commit 56d3b93

Please sign in to comment.