Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support RMM aligned resource adapter in JNI [skip ci] #8266

Merged
merged 5 commits into from
May 20, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 35 additions & 3 deletions java/src/main/java/ai/rapids/cudf/Rmm.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -173,6 +173,36 @@ public static synchronized void initialize(int allocationMode, LogConf logConf,
*/
rongou marked this conversation as resolved.
Show resolved Hide resolved
public static synchronized void initialize(int allocationMode, LogConf logConf, long poolSize,
long maxPoolSize) throws RmmException {
initialize(allocationMode, logConf, poolSize, maxPoolSize, 0, 0);
}

/**
* Initialize memory manager state and storage. This will always initialize
* the CUDA context for the calling thread if it is not already set. The
* caller is responsible for setting the desired CUDA device prior to this
* call if a specific device is already set.
* <p>NOTE: All cudf methods will set the chosen CUDA device in the CUDA
* context of the calling thread after this returns.
* @param allocationMode Allocation strategy to use. Bit set using
* {@link RmmAllocationMode#CUDA_DEFAULT},
* {@link RmmAllocationMode#POOL},
* {@link RmmAllocationMode#ARENA} and
rongou marked this conversation as resolved.
Show resolved Hide resolved
* {@link RmmAllocationMode#CUDA_MANAGED_MEMORY}
* @param logConf How to do logging or null if you don't want to
* @param poolSize The initial pool size in bytes
* @param maxPoolSize The maximum size the pool is allowed to grow. If the specified value
* is <= 0 then the pool size will not be artificially limited.
* @param allocationAlignment The size to which allocations are aligned.
* @param alignmentThreshold Only allocations with size larger than or equal to this threshold
* are aligned with `allocationAlignment`.
* @throws IllegalStateException if RMM has already been initialized
* @throws IllegalArgumentException if a max pool size is specified but the allocation mode
* is not {@link RmmAllocationMode#POOL} or
* {@link RmmAllocationMode#ARENA}, or the maximum pool size is
* below the initial size.
*/
public static synchronized void initialize(int allocationMode, LogConf logConf, long poolSize,
long maxPoolSize, long allocationAlignment, long alignmentThreshold) throws RmmException {
if (initialized) {
throw new IllegalStateException("RMM is already initialized");
}
Expand All @@ -195,7 +225,8 @@ public static synchronized void initialize(int allocationMode, LogConf logConf,
loc = logConf.loc;
}

initializeInternal(allocationMode, loc.internalId, path, poolSize, maxPoolSize);
initializeInternal(allocationMode, loc.internalId, path, poolSize, maxPoolSize,
allocationAlignment, alignmentThreshold);
MemoryCleaner.setDefaultGpu(Cuda.getDevice());
initialized = true;
}
Expand Down Expand Up @@ -241,7 +272,8 @@ private static long[] sortThresholds(long[] thresholds) {
}

private static native void initializeInternal(int allocationMode, int logTo, String path,
long poolSize, long maxPoolSize) throws RmmException;
long poolSize, long maxPoolSize, long allocationAlignment, long alignmentThreshold)
throws RmmException;

/**
* Shut down any initialized RMM instance. This should be used very rarely. It does not need to
Expand Down
30 changes: 16 additions & 14 deletions java/src/main/native/src/RmmJni.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2020, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,6 +20,7 @@
#include <iostream>
rongou marked this conversation as resolved.
Show resolved Hide resolved
#include <limits>

#include <rmm/mr/device/aligned_resource_adaptor.hpp>
#include <rmm/mr/device/arena_memory_resource.hpp>
#include <rmm/mr/device/cuda_memory_resource.hpp>
#include <rmm/mr/device/logging_resource_adaptor.hpp>
Expand Down Expand Up @@ -332,7 +333,9 @@ extern "C" {
JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, jclass clazz,
jint allocation_mode, jint log_to,
jstring jpath, jlong pool_size,
jlong max_pool_size) {
jlong max_pool_size,
jlong allocation_alignment,
jlong alignment_threshold) {
try {
// make sure the CUDA device is setup in the context
cudaError_t cuda_status = cudaFree(0);
Expand All @@ -351,37 +354,36 @@ JNIEXPORT void JNICALL Java_ai_rapids_cudf_Rmm_initializeInternal(JNIEnv *env, j
if (use_managed_mem) {
Initialized_resource = rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
std::make_shared<rmm::mr::managed_memory_resource>(), pool_size, pool_limit);
auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
Tracking_memory_resource.reset(wrapped);
} else {
Initialized_resource = rmm::mr::make_owning_wrapper<rmm::mr::pool_memory_resource>(
std::make_shared<rmm::mr::cuda_memory_resource>(), pool_size, pool_limit);
auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
Tracking_memory_resource.reset(wrapped);
}
} else if (use_arena_alloc) {
std::size_t pool_limit = (max_pool_size > 0) ? static_cast<std::size_t>(max_pool_size) :
std::numeric_limits<std::size_t>::max();
if (use_managed_mem) {
Initialized_resource = rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(
std::make_shared<rmm::mr::managed_memory_resource>(), pool_size, pool_limit);
auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
Tracking_memory_resource.reset(wrapped);
} else {
Initialized_resource = rmm::mr::make_owning_wrapper<rmm::mr::arena_memory_resource>(
std::make_shared<rmm::mr::cuda_memory_resource>(), pool_size, pool_limit);
auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
Tracking_memory_resource.reset(wrapped);
}
} else if (use_managed_mem) {
Initialized_resource = std::make_shared<rmm::mr::managed_memory_resource>();
auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
Tracking_memory_resource.reset(wrapped);
} else {
Initialized_resource = std::make_shared<rmm::mr::cuda_memory_resource>();
auto wrapped = make_tracking_adaptor(Initialized_resource.get(), RMM_ALLOC_SIZE_ALIGNMENT);
Tracking_memory_resource.reset(wrapped);
}

if (allocation_alignment != 0) {
Initialized_resource = rmm::mr::make_owning_wrapper<rmm::mr::aligned_resource_adaptor>(
Initialized_resource, allocation_alignment, alignment_threshold);
}

auto wrapped = make_tracking_adaptor(
Initialized_resource.get(),
std::max(RMM_ALLOC_SIZE_ALIGNMENT, static_cast<std::size_t>(allocation_alignment)));
Tracking_memory_resource.reset(wrapped);

auto resource = Tracking_memory_resource.get();
rmm::mr::set_current_device_resource(resource);

Expand Down