Skip to content

Commit

Permalink
Some cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
JuanGonzalezCaminero committed Sep 24, 2024
1 parent f61bc3b commit 1331728
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 1,396 deletions.
8 changes: 6 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ set(CMAKE_INCLUDE_DIRECTORIES_PROJECT_BEFORE ON)
# Options
option(ADEPT_USE_SURF "Enable surface model navigation on GPU" OFF)
option(ADEPT_USE_SURF_SINGLE "Use surface model in single precision" OFF)
option(USE_SPLIT_KERNELS "Run split version of the transport kernels" OFF)
option(DEBUG_SINGLE_THREAD "Run transport kernels in single thread mode" OFF)
option(WITH_FLUCT "Switch on the energy loss fluctuations" OFF)

Expand Down Expand Up @@ -83,6 +84,11 @@ endif()
if(NOT TARGET VecGeom::vgdml)
message(FATAL_ERROR "AdePT requires VecGeom compiled with GDML support")
endif()
# Run split kernels
if (USE_SPLIT_KERNELS)
add_compile_definitions(USE_SPLIT_KERNELS)
message(STATUS "${Green}AdePT will run with split kernels${ColorReset}")
endif()
# Debugging in single-thread mode
if (DEBUG_SINGLE_THREAD)
add_compile_definitions("$<$<CONFIG:Debug>:DEBUG_SINGLE_THREAD>")
Expand Down Expand Up @@ -153,8 +159,6 @@ if(NOT WITH_FLUCT)
add_compile_definitions(NOFLUCTUATION)
endif()

# string(APPEND CMAKE_CUDA_FLAGS " -Xptxas=-v")

#----------------------------------------------------------------------------#
# Build Targets
#----------------------------------------------------------------------------#
Expand Down
64 changes: 29 additions & 35 deletions include/AdePT/core/AdePTTransport.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,14 @@
#include <AdePT/base/Atomic.h>
#include <AdePT/navigation/AdePTNavigator.h>
#include <AdePT/base/MParray.h>
// #include <AdePT/kernels/electrons.cuh>
// #include <AdePT/kernels/gammas.cuh>


#include <AdePT/kernels/electrons_experimental.cuh>
// #include <AdePT/kernels/electrons_simplified.cuh>
// #include <AdePT/kernels/electrons_simplified_hepem.cuh>

#include <AdePT/kernels/gammas_experimental.cuh>
// #include <AdePT/kernels/gammas_simplified.cuh>
// #include <AdePT/kernels/gammas_simplified_hepem.cuh>

#ifndef USE_SPLIT_KERNELS
#include <AdePT/kernels/electrons.cuh>
#include <AdePT/kernels/gammas.cuh>
#else
#include <AdePT/kernels/electrons_split.cuh>
#include <AdePT/kernels/gammas_split.cuh>
#endif

#include <VecGeom/base/Config.h>
#ifdef VECGEOM_ENABLE_CUDA
Expand Down Expand Up @@ -272,12 +268,14 @@ GPUstate *InitializeGPU(adeptint::TrackBuffer &buffer, int capacity, int maxbatc
COPCORE_CUDA_CHECK(cudaEventCreate(&gpuState.particles[i].event));
}

#ifdef USE_SPLIT_KERNELS
// Init HepEM tracks
// e+ / e-
COPCORE_CUDA_CHECK(cudaMalloc(&gpuState.hepEMBuffers_d.electronsHepEm, capacity * sizeof(G4HepEmElectronTrack)));
COPCORE_CUDA_CHECK(cudaMalloc(&gpuState.hepEMBuffers_d.positronsHepEm, capacity * sizeof(G4HepEmElectronTrack)));
// Gammas
COPCORE_CUDA_CHECK(cudaMalloc(&gpuState.hepEMBuffers_d.gammasHepEm, capacity * sizeof(G4HepEmGammaTrack)));
#endif

InitLeakedQueues<<<1, 1, 0, gpuState.stream>>>(gpuState.allmgr_d, kQueueSize);
COPCORE_CUDA_CHECK(cudaDeviceSynchronize());
Expand Down Expand Up @@ -305,9 +303,11 @@ void FreeGPU(GPUstate &gpuState, G4HepEmState *g4hepem_state)
COPCORE_CUDA_CHECK(cudaFreeHost(gpuState.stats));
COPCORE_CUDA_CHECK(cudaFree(gpuState.toDevice_dev));

COPCORE_CUDA_CHECK(cudaFree(gpuState.hepEMBuffers_d.electronsHepEm));
#ifdef USE_SPLIT_KERNELS
COPCORE_CUDA_CHECK(cudaFree(gpuState.hepEMBuffers_d.electronsHepEm));
COPCORE_CUDA_CHECK(cudaFree(gpuState.hepEMBuffers_d.positronsHepEm));
COPCORE_CUDA_CHECK(cudaFree(gpuState.hepEMBuffers_d.gammasHepEm));
#endif

COPCORE_CUDA_CHECK(cudaStreamDestroy(gpuState.stream));

Expand Down Expand Up @@ -402,13 +402,11 @@ void ShowerGPU(IntegrationLayer &integration, int event, adeptint::TrackBuffer &
transportBlocks = (numElectrons + TransportThreads - 1) / TransportThreads;
transportBlocks = std::min(transportBlocks, MaxBlocks);
#endif
// TransportElectrons<AdeptScoring><<<transportBlocks, TransportThreads, 0, electrons.stream>>>(
// electrons.trackmgr, secondaries, electrons.leakedTracks, scoring_dev,
// VolAuxArray::GetInstance().fAuxData_dev);
// TransportElectrons<true, AdeptScoring><<<transportBlocks, TransportThreads, 0, electrons.stream>>>(
// electrons.trackmgr, gpuState.hepEMBuffers_d.electronsHepEm, secondaries, electrons.leakedTracks, scoring_dev,
// VolAuxArray::GetInstance().fAuxData_dev);

#ifndef USE_SPLIT_KERNELS
TransportElectrons<AdeptScoring><<<transportBlocks, TransportThreads, 0, electrons.stream>>>(
electrons.trackmgr, secondaries, electrons.leakedTracks, scoring_dev,
VolAuxArray::GetInstance().fAuxData_dev);
#else
ElectronPhysics1<true><<<transportBlocks, TransportThreads, 0, electrons.stream>>>(
electrons.trackmgr, gpuState.hepEMBuffers_d.electronsHepEm, VolAuxArray::GetInstance().fAuxData_dev
);
Expand All @@ -421,7 +419,7 @@ void ShowerGPU(IntegrationLayer &integration, int event, adeptint::TrackBuffer &
ElectronInteractions<true, AdeptScoring><<<transportBlocks, TransportThreads, 0, electrons.stream>>>(
electrons.trackmgr, gpuState.hepEMBuffers_d.electronsHepEm, secondaries, electrons.leakedTracks, scoring_dev,
VolAuxArray::GetInstance().fAuxData_dev);

#endif
COPCORE_CUDA_CHECK(cudaEventRecord(electrons.event, electrons.stream));
COPCORE_CUDA_CHECK(cudaStreamWaitEvent(gpuState.stream, electrons.event, 0));
}
Expand All @@ -433,13 +431,11 @@ void ShowerGPU(IntegrationLayer &integration, int event, adeptint::TrackBuffer &
transportBlocks = (numPositrons + TransportThreads - 1) / TransportThreads;
transportBlocks = std::min(transportBlocks, MaxBlocks);
#endif
// TransportPositrons<AdeptScoring><<<transportBlocks, TransportThreads, 0, positrons.stream>>>(
// positrons.trackmgr, secondaries, positrons.leakedTracks, scoring_dev,
// VolAuxArray::GetInstance().fAuxData_dev);
// TransportElectrons<false, AdeptScoring><<<transportBlocks, TransportThreads, 0, positrons.stream>>>(
// positrons.trackmgr, gpuState.hepEMBuffers_d.positronsHepEm, secondaries, positrons.leakedTracks, scoring_dev,
// VolAuxArray::GetInstance().fAuxData_dev);

#ifndef USE_SPLIT_KERNELS
TransportPositrons<AdeptScoring><<<transportBlocks, TransportThreads, 0, positrons.stream>>>(
positrons.trackmgr, secondaries, positrons.leakedTracks, scoring_dev,
VolAuxArray::GetInstance().fAuxData_dev);
#else
ElectronPhysics1<false><<<transportBlocks, TransportThreads, 0, positrons.stream>>>(
positrons.trackmgr, gpuState.hepEMBuffers_d.positronsHepEm, VolAuxArray::GetInstance().fAuxData_dev
);
Expand All @@ -452,7 +448,7 @@ void ShowerGPU(IntegrationLayer &integration, int event, adeptint::TrackBuffer &
ElectronInteractions<false, AdeptScoring><<<transportBlocks, TransportThreads, 0, positrons.stream>>>(
positrons.trackmgr, gpuState.hepEMBuffers_d.positronsHepEm, secondaries, positrons.leakedTracks, scoring_dev,
VolAuxArray::GetInstance().fAuxData_dev);

#endif
COPCORE_CUDA_CHECK(cudaEventRecord(positrons.event, positrons.stream));
COPCORE_CUDA_CHECK(cudaStreamWaitEvent(gpuState.stream, positrons.event, 0));
}
Expand All @@ -464,6 +460,10 @@ void ShowerGPU(IntegrationLayer &integration, int event, adeptint::TrackBuffer &
transportBlocks = (numGammas + TransportThreads - 1) / TransportThreads;
transportBlocks = std::min(transportBlocks, MaxBlocks);
#endif
#ifndef USE_SPLIT_KERNELS
TransportGammas<AdeptScoring><<<transportBlocks, TransportThreads, 0, gammas.stream>>>(
gammas.trackmgr, secondaries, gammas.leakedTracks, scoring_dev, VolAuxArray::GetInstance().fAuxData_dev);
#else
GammaPhysics1<<<transportBlocks, TransportThreads, 0, gammas.stream>>>(
gammas.trackmgr, gpuState.hepEMBuffers_d.gammasHepEm, VolAuxArray::GetInstance().fAuxData_dev
);
Expand All @@ -476,13 +476,7 @@ void ShowerGPU(IntegrationLayer &integration, int event, adeptint::TrackBuffer &
GammaPhysics2<AdeptScoring><<<transportBlocks, TransportThreads, 0, gammas.stream>>>(
gammas.trackmgr, gpuState.hepEMBuffers_d.gammasHepEm, secondaries, scoring_dev, VolAuxArray::GetInstance().fAuxData_dev
);

// TransportGammas<AdeptScoring><<<transportBlocks, TransportThreads, 0, gammas.stream>>>(
// gammas.trackmgr, secondaries, gammas.leakedTracks, scoring_dev, VolAuxArray::GetInstance().fAuxData_dev);

// TransportGammas<AdeptScoring><<<transportBlocks, TransportThreads, 0, gammas.stream>>>(
// gammas.trackmgr, gpuState.hepEMBuffers_d.gammasHepEm, secondaries, gammas.leakedTracks, scoring_dev, VolAuxArray::GetInstance().fAuxData_dev);

#endif
COPCORE_CUDA_CHECK(cudaEventRecord(gammas.event, gammas.stream));
COPCORE_CUDA_CHECK(cudaStreamWaitEvent(gpuState.stream, gammas.event, 0));
}
Expand Down
8 changes: 7 additions & 1 deletion include/AdePT/core/AdePTTransportStruct.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@
#include <G4HepEmParameters.hh>
#include <G4HepEmRandomEngine.hh>

#ifdef USE_SPLIT_KERNELS
#include <G4HepEmElectronTrack.hh>
#include <G4HepEmGammaTrack.hh>
#endif

#ifdef __CUDA_ARCH__
// Define inline implementations of the RNG methods for the device.
Expand Down Expand Up @@ -68,11 +70,13 @@ struct AllTrackManagers {
MParrayTracks *leakedTracks[ParticleType::NumParticleTypes];
};

#ifdef USE_SPLIT_KERNELS
struct HepEmBuffers {
G4HepEmElectronTrack *electronsHepEm;
G4HepEmElectronTrack *positronsHepEm;
G4HepEmGammaTrack *gammasHepEm;
};
#endif

// A data structure to transfer statistics after each iteration.
struct Stats {
Expand All @@ -87,7 +91,9 @@ struct GPUstate {
ParticleType particles[ParticleType::NumParticleTypes];
AllTrackManagers allmgr_h; ///< Host pointers for track managers
AllTrackManagers allmgr_d; ///< Device pointers for track managers
HepEmBuffers hepEMBuffers_d;
#ifdef USE_SPLIT_KERNELS
HepEmBuffers hepEMBuffers_d;
#endif
// Create a stream to synchronize kernels of all particle types.
cudaStream_t stream; ///< all-particle sync stream
TrackData *toDevice_dev{nullptr}; ///< toDevice buffer of tracks
Expand Down
25 changes: 11 additions & 14 deletions include/AdePT/core/Track.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,9 @@ struct Track {
using Precision = vecgeom::Precision;

int parentID{0}; // Stores the track id of the initial particle given to AdePT
long hitsurfID{0};

RanluxppDouble rngState;
double eKin;
double preStepEKin;
double numIALeft[3];
double initialRange;
double dynamicRangeFactor;
Expand All @@ -34,30 +32,29 @@ struct Track {
double properTime{0};

vecgeom::Vector3D<Precision> pos;
vecgeom::Vector3D<Precision> preStepPos;
vecgeom::Vector3D<Precision> dir;
vecgeom::Vector3D<Precision> preStepDir;
vecgeom::NavigationState navState;

#ifdef USE_SPLIT_KERNELS
RanluxppDouble newRNG;

// Variables used to store track info needed for scoring
double preStepEKin;
vecgeom::Vector3D<Precision> preStepPos;
vecgeom::Vector3D<Precision> preStepDir;
vecgeom::NavigationState nextState;
vecgeom::NavigationState preStepNavState;

// Variables used to store navigation results
long hitsurfID{0};
bool propagated{false};
double geometryStepLength{0};
// Todo: check whether it's needed to keep safety here or we can use the one stored in the HepEM track
double safety{0};

RanluxppDouble newRNG;

// Variables used to store physics results from G4HepEM
// double geometricalStepLengthFromPhysics{0};
// int winnerProcessIndex{0};
// double physicalStepLength{0};
// double preStepMFPs[3];
// double PEmxSec{0};
// G4HepEmMSCTrackData mscData;
// Variables used to store results from G4HepEM
bool restrictedPhysicalStepLength{false};
bool stopped{false};
#endif

__host__ __device__ double Uniform() { return rngState.Rndm(); }

Expand Down
Loading

0 comments on commit 1331728

Please sign in to comment.