Peano 4
Loading...
Searching...
No Matches
KernelBenchmarksFVRusanov-main.cpp File Reference
#include "KernelBenchmarksFVRusanov-main.h"
#include <fenv.h>
#include "config.h"
#include "Constants.h"
#include "exahype2/UserInterface.h"
#include "observers/CreateGrid.h"
#include "observers/CreateGridAndConvergeLoadBalancing.h"
#include "observers/CreateGridButPostponeRefinement.h"
#include "observers/InitGrid.h"
#include "observers/PlotSolution.h"
#include "observers/TimeStep.h"
#include "peano4/peano.h"
#include "repositories/DataRepository.h"
#include "repositories/SolverRepository.h"
#include "repositories/StepRepository.h"
#include "tarch/accelerator/accelerator.h"
#include "tarch/accelerator/Device.h"
#include "tarch/logging/CommandLineLogger.h"
#include "tarch/logging/Log.h"
#include "tarch/logging/LogFilter.h"
#include "tarch/logging/Statistics.h"
#include "tarch/multicore/Core.h"
#include "tarch/multicore/multicore.h"
#include "tarch/multicore/otter.h"
#include "tarch/NonCriticalAssertions.h"
#include "tarch/timing/Measurement.h"
#include "tarch/timing/Watch.h"
#include "tasks/FVRusanovSolverEnclaveTask.h"
#include "toolbox/loadbalancing/loadbalancing.h"
Include dependency graph for KernelBenchmarksFVRusanov-main.cpp:

Go to the source code of this file.

Functions

void initInputData (double *Q)
 Set input data.
 
void storeOutcome (const double *Q, const double maxEigenvalue)
 Store outcome of one compute kernel.
 
std::tuple< double, intvalidateOutcome (double *Q, int patchIndex, const double maxEigenvalue)
 Validate data against pre-stored simulation outcome.
 
void reportRuntime (const std::string &kernelIdentificator, const ::tarch::timing::Measurement &timingKernelLaunch, int patches)
 
template<class TempDataEnumerator , ::peano4::utils::LoopPlacement loopParallelism>
void wrapPatchwiseHeapFunctorsHostKernel (int device, ::exahype2::CellData &patchData)
 This is a wrapper around the kernel call with the functors.
 
template<class TempDataEnumerator , ::peano4::utils::LoopPlacement loopParallelism>
void wrapBatchedHeapFunctorHostKernels (int device, ::exahype2::CellData &patchData)
 Another wrapper.
 
template<class TempDataEnumerator , ::peano4::utils::LoopPlacement loopParallelism>
void wrapVolumewiseFunctorHostKernels (int device, ::exahype2::CellData &patchData)
 Another wrapper.
 
template<void(*)(::exahype2::CellData &, ::tarch::timing::Measurement &, ::peano4::utils::LoopPlacement) Function, ::peano4::utils::LoopPlacement loopParallelism>
void wrapStatelessHostKernel (int device, ::exahype2::CellData &patchData)
 Wrapper around stateless kernel invocations.
 
template<void(*)(int, ::exahype2::CellData &, ::tarch::timing::Measurement &) Function>
void wrapDeviceKernel (int device, ::exahype2::CellData &patchData)
 
void runBenchmarks (int numberOfPatches, int launchingThreads)
 Run the benchmark for one particular number of patches.
 
int main (int argc, char **argv)
 Main routine of the SPH code.
 

Variables

::tarch::logging::Log _log ("::")
 
double validMaxEigenvalue = 0.0
 
double * validOutcome = nullptr
 
static constexpr double TimeStamp = 0.5
 
static constexpr double TimeStepSize = 1e-6
 
static constexpr double CellSize = 0.1
 
static constexpr double CellOffset = 4.0
 
static constexpr int HaloSize = 1
 
::tarch::timing::Measurement timingComputeKernel
 
::tarch::multicore::BooleanSemaphore validateOutcomeSemaphore
 
static constexpr int NumberOfInputEntries
 
static constexpr int NumberOfOutputEntries
 
static constexpr int NumberOfFiniteVolumesPerPatch
 

Function Documentation

◆ initInputData()

void initInputData ( double * Q)

Set input data.

We really don't care if this makes any sense. Just make the data reasonably smooth and ensure that all is positive, as there are density and energy among the unknowns. If they become negative or zero, the compute kernels do not make any sense anymore.

Definition at line 73 of file KernelBenchmarksFVRusanov-main.cpp.

References NumberOfInputEntries, and tarch::la::PI.

Referenced by runBenchmarks().

Here is the caller graph for this function:

◆ main()

◆ reportRuntime()

void reportRuntime ( const std::string & kernelIdentificator,
const ::tarch::timing::Measurement & timingKernelLaunch,
int patches )

Definition at line 131 of file KernelBenchmarksFVRusanov-main.cpp.

References tarch::timing::Measurement::getValue(), logInfo, NumberOfFiniteVolumesPerPatch, timingComputeKernel, and tarch::timing::Measurement::toString().

Referenced by runBenchmarks().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ runBenchmarks()

void runBenchmarks ( int numberOfPatches,
int launchingThreads )

Run the benchmark for one particular number of patches.

This operation benchmarks exclusively the host performance. This happens in two steps: We first assess the baseline performance, i.e., the serial kernel call, and then we look if the multithreaded stateless optimisation pays off and what speedup we obtain.

These two steps may run on multiple threads to mimic the spacetree tasking and domain decomposition. In OpenMP for example, there is one parallel loop around the two steps which invokes the same benchmark per loop entry. However, only the first thread always will report its runtime. If you are not bandwidth-bound, the data you obtain should thus be independent of the number of threads used.

Parameters
numberOfPatchesNumber of patches to study

Definition at line 379 of file KernelBenchmarksFVRusanov-main.cpp.

References tarch::allocateMemory(), exahype2::CellData::cellCentre, CellOffset, CellSize, exahype2::CellData::cellSize, exahype2::CellData::dt, endParallelFor, tarch::timing::Measurement::erase(), tarch::freeMemory(), tarch::timing::Watch::getCalendarTime(), tarch::mpi::Rank::getGlobalMasterRank(), tarch::accelerator::Device::getInstance(), tarch::mpi::Rank::getInstance(), tarch::accelerator::Device::getLocalDeviceId(), tarch::mpi::Rank::getNumberOfRanks(), tarch::mpi::Rank::getRank(), HaloSize, tarch::accelerator::Device::HostDevice, initInputData(), j, logError, logInfo, tarch::ManagedSharedAcceleratorDeviceMemory, exahype2::CellData::maxEigenvalue, peano4::utils::Nested, NumberOfInputEntries, NumberOfOutputEntries, parallelFor, exahype2::CellData::QIn, exahype2::CellData::QOut, reportRuntime(), peano4::utils::Serial, tarch::timing::Measurement::setValue(), peano4::grid::GridTraversalEvent::setX(), peano4::utils::SpreadOut, tarch::timing::Watch::stop(), storeOutcome(), exahype2::CellData::t, TimeStamp, TimeStepSize, exahype2::fv::rusanov::timeStepWithRusanovBatchedHeapStateless(), exahype2::fv::rusanov::sycl::timeStepWithRusanovBatchedHeapStateless(), exahype2::fv::rusanov::omp::timeStepWithRusanovBatchedHeapStateless(), exahype2::fv::rusanov::timeStepWithRusanovBatchedInsituStateless(), exahype2::fv::rusanov::sycl::timeStepWithRusanovBatchedManagedStateless(), exahype2::fv::rusanov::sycl::timeStepWithRusanovBatchedUSMStateless(), exahype2::fv::rusanov::cpp::timeStepWithRusanovBatchedUSMStateless(), exahype2::fv::rusanov::omp::timeStepWithRusanovBatchedUSMStateless(), exahype2::fv::rusanov::timeStepWithRusanovPatchwiseHeapStateless(), exahype2::fv::rusanov::sycl::timeStepWithRusanovPatchwiseHeapStateless(), exahype2::fv::rusanov::omp::timeStepWithRusanovPatchwiseHeapStateless(), exahype2::fv::rusanov::timeStepWithRusanovPatchwiseInsituStateless(), exahype2::fv::rusanov::sycl::timeStepWithRusanovPatchwiseManagedStateless(), exahype2::fv::rusanov::sycl::timeStepWithRusanovPatchwiseUSMStateless(), exahype2::fv::rusanov::cpp::timeStepWithRusanovPatchwiseUSMStateless(), exahype2::fv::rusanov::omp::timeStepWithRusanovPatchwiseUSMStateless(), exahype2::fv::rusanov::sycl::timeStepWithRusanovTaskgraphCopyStateless(), exahype2::fv::rusanov::sycl::timeStepWithRusanovTaskgraphManagedStateless(), exahype2::fv::rusanov::sycl::timeStepWithRusanovTaskgraphUSMStateless(), exahype2::fv::rusanov::timeStepWithRusanovVolumewiseStateless(), exahype2::fv::rusanov::omp::timeStepWithRusanovVolumewiseStateless(), timingComputeKernel, validateOutcome(), wrapDeviceKernel(), and wrapStatelessHostKernel().

Referenced by main().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ storeOutcome()

void storeOutcome ( const double * Q,
const double maxEigenvalue )

Store outcome of one compute kernel.

Make a persistent snapshot of a solution and assume, from hereon, that this snapshot is the valid data. You can call this routine as often as you want. Only the very first call will trigger a snapshot.

Definition at line 87 of file KernelBenchmarksFVRusanov-main.cpp.

References logInfo, NumberOfOutputEntries, validateOutcomeSemaphore, validMaxEigenvalue, and validOutcome.

Referenced by runBenchmarks().

Here is the caller graph for this function:

◆ validateOutcome()

std::tuple< double, int > validateOutcome ( double * Q,
int patchIndex,
const double maxEigenvalue )

Validate data against pre-stored simulation outcome.

Works if and only if storeOutcome has been performed before.

Returns
Tuple of maximum difference and total number of errors.

Definition at line 105 of file KernelBenchmarksFVRusanov-main.cpp.

References tarch::la::equals(), logError, NumberOfOutputEntries, validateOutcomeSemaphore, validMaxEigenvalue, and validOutcome.

Referenced by runBenchmarks().

Here is the call graph for this function:
Here is the caller graph for this function:

◆ wrapBatchedHeapFunctorHostKernels()

template<class TempDataEnumerator , ::peano4::utils::LoopPlacement loopParallelism>
void wrapBatchedHeapFunctorHostKernels ( int device,
::exahype2::CellData & patchData )

◆ wrapDeviceKernel()

template<void(*)(int, ::exahype2::CellData &, ::tarch::timing::Measurement &) Function>
void wrapDeviceKernel ( int device,
::exahype2::CellData & patchData )

Definition at line 356 of file KernelBenchmarksFVRusanov-main.cpp.

References assertion, tarch::accelerator::Device::HostDevice, and timingComputeKernel.

Referenced by runBenchmarks().

Here is the caller graph for this function:

◆ wrapPatchwiseHeapFunctorsHostKernel()

template<class TempDataEnumerator , ::peano4::utils::LoopPlacement loopParallelism>
void wrapPatchwiseHeapFunctorsHostKernel ( int device,
::exahype2::CellData & patchData )

This is a wrapper around the kernel call with the functors.

We want to use all kernels exactly the same way. However, the various kernels all have slightly different signatures. So we write small helper functions (wrappers) which map the generic test signature onto the specific kernel.

To make this possible, all parameters which are not part of the generic interface, i.e., which are not patch data or the boolean, have to be mapped onto template arguments.

Here, we use the functor-based generic kernel implementation.

Definition at line 163 of file KernelBenchmarksFVRusanov-main.cpp.

References assertionEquals, h, HaloSize, tarch::accelerator::Device::HostDevice, exahype2::CellData::maxEigenvalue, exahype2::fv::rusanov::timeStepWithRusanovPatchwiseHeapFunctors(), and timingComputeKernel.

Here is the call graph for this function:

◆ wrapStatelessHostKernel()

void wrapStatelessHostKernel ( int device,
::exahype2::CellData & patchData )

Wrapper around stateless kernel invocations.

See also
wrapPatchwiseHeapFunctorsHostKernel()

Definition at line 349 of file KernelBenchmarksFVRusanov-main.cpp.

References assertionEquals, tarch::accelerator::Device::HostDevice, and timingComputeKernel.

Referenced by runBenchmarks().

Here is the caller graph for this function:

◆ wrapVolumewiseFunctorHostKernels()

template<class TempDataEnumerator , ::peano4::utils::LoopPlacement loopParallelism>
void wrapVolumewiseFunctorHostKernels ( int device,
::exahype2::CellData & patchData )

Variable Documentation

◆ _log

::tarch::logging::Log _log("::") ( "::" )

◆ CellOffset

constexpr double CellOffset = 4.0
staticconstexpr

Definition at line 42 of file KernelBenchmarksFVRusanov-main.cpp.

Referenced by runBenchmarks().

◆ CellSize

constexpr double CellSize = 0.1
staticconstexpr

Definition at line 41 of file KernelBenchmarksFVRusanov-main.cpp.

Referenced by runBenchmarks().

◆ HaloSize

◆ NumberOfFiniteVolumesPerPatch

constexpr int NumberOfFiniteVolumesPerPatch
staticconstexpr
Initial value:
= FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch * FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch
* FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch

Definition at line 60 of file KernelBenchmarksFVRusanov-main.cpp.

Referenced by reportRuntime().

◆ NumberOfInputEntries

constexpr int NumberOfInputEntries
staticconstexpr
Initial value:
= (FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch + 2 * HaloSize)
* (FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch + 2 * HaloSize)
* (FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch + 2 * HaloSize) * FVRusanovSolver::NumberOfUnknowns
static constexpr int HaloSize

Definition at line 55 of file KernelBenchmarksFVRusanov-main.cpp.

Referenced by initInputData(), and runBenchmarks().

◆ NumberOfOutputEntries

constexpr int NumberOfOutputEntries
staticconstexpr
Initial value:
= (FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch + 0) * (FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch + 0)
* (FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch + 0) * FVRusanovSolver::NumberOfUnknowns

Definition at line 58 of file KernelBenchmarksFVRusanov-main.cpp.

Referenced by runBenchmarks(), storeOutcome(), and validateOutcome().

◆ TimeStamp

constexpr double TimeStamp = 0.5
staticconstexpr

Definition at line 39 of file KernelBenchmarksFVRusanov-main.cpp.

Referenced by runBenchmarks().

◆ TimeStepSize

constexpr double TimeStepSize = 1e-6
staticconstexpr

Definition at line 40 of file KernelBenchmarksFVRusanov-main.cpp.

Referenced by runBenchmarks().

◆ timingComputeKernel

◆ validateOutcomeSemaphore

::tarch::multicore::BooleanSemaphore validateOutcomeSemaphore

Definition at line 45 of file KernelBenchmarksFVRusanov-main.cpp.

Referenced by storeOutcome(), and validateOutcome().

◆ validMaxEigenvalue

double validMaxEigenvalue = 0.0

Definition at line 37 of file KernelBenchmarksFVRusanov-main.cpp.

Referenced by storeOutcome(), and validateOutcome().

◆ validOutcome

double* validOutcome = nullptr

Definition at line 38 of file KernelBenchmarksFVRusanov-main.cpp.

Referenced by main(), storeOutcome(), and validateOutcome().