3#include "KernelBenchmarksFVRiemann-main.h"
8#include "repositories/DataRepository.h"
9#include "repositories/SolverRepository.h"
10#include "repositories/StepRepository.h"
12#include "tasks/FVRiemannEnclaveTask.h"
30#pragma float_control(precise, on)
31#pragma STDC FENV_ACCESS ON
33using namespace benchmarks::exahype2::kernelbenchmarks;
43static_assert(Accuracy >= std::numeric_limits<double>::epsilon() || Accuracy == 0.0);
46 = (FVRiemann::NumberOfFiniteVolumesPerAxisPerPatch + 2 *
HaloSize)
47 * (FVRiemann::NumberOfFiniteVolumesPerAxisPerPatch + 2 *
HaloSize)
49 * (FVRiemann::NumberOfFiniteVolumesPerAxisPerPatch + 2 *
HaloSize)
51 * (FVRiemann::NumberOfUnknowns + FVRiemann::NumberOfAuxiliaryVariables);
54 = (FVRiemann::NumberOfFiniteVolumesPerAxisPerPatch + 0)
55 * (FVRiemann::NumberOfFiniteVolumesPerAxisPerPatch + 0)
57 * (FVRiemann::NumberOfFiniteVolumesPerAxisPerPatch + 0)
59 * (FVRiemann::NumberOfUnknowns + FVRiemann::NumberOfAuxiliaryVariables);
62 = FVRiemann::NumberOfFiniteVolumesPerAxisPerPatch
63 * FVRiemann::NumberOfFiniteVolumesPerAxisPerPatch
65 * FVRiemann::NumberOfFiniteVolumesPerAxisPerPatch
98 const double*
const maxEigenvalue,
99 const int numberOfPatches
101 if constexpr (Accuracy <= 0.0)
return;
103 validQ =
new double*[numberOfPatches];
104 for (
int patchIndex = 0; patchIndex < numberOfPatches; patchIndex++) {
110 logInfo(
"storeOutcome(...)",
"bookmarked reference solution");
115 if constexpr (Accuracy <= 0.0)
return;
116 for (
int patchIndex = 0; patchIndex < numberOfPatches; patchIndex++) {
117 delete[]
validQ[patchIndex];
134 const double*
const* Q,
135 const double*
const maxEigenvalue,
136 const int numberOfPatches
138 if constexpr (Accuracy <= 0.0)
return;
140 double maxDifference = 0.0;
142 std::cerr.precision(16);
143 for (
int patchIndex = 0; patchIndex < numberOfPatches; patchIndex++) {
149 <<
"patch " << patchIndex <<
": "
150 <<
"Q[" << i <<
"]!=validQ[" << i <<
"] ("
158 maxDifference = std::max(maxDifference,
std::abs(Q[patchIndex][i] -
validQ[patchIndex][i]));
166 <<
"maxEigenvalue[" << patchIndex <<
"]!=validMaxEigenvalue[" << patchIndex <<
"] ("
179 "max difference of outcome from all patches is "
181 <<
" (admissible accuracy="
183 <<
" for " << errors <<
" entries"
195 const std::string& kernelIdentificator,
199 std::stringstream ss;
201 ss << kernelIdentificator <<
":\n\t";
202 ss << kernelMeasurement.
getValue() <<
" |\n\t";
205 logInfo(
"reportRuntime(...)", ss.str());
218template <
void (*Kernel)(exahype2::CellData<
double,
double>&, peano4::utils::LoopPlacement),
219 peano4::utils::LoopPlacement loopPlacement>
222 Kernel(patchData, loopPlacement);
225template <
void (*Kernel)(
int, exahype2::CellData<
double,
double>&)>
228 Kernel(device, patchData);
238 for (
int patchIndex = 0; patchIndex < numberOfPatches; patchIndex++) {
252 const std::string& markerName,
258 while (sample <= NumberOfSamples) {
260 for (
int patchIndex = 0; patchIndex < numberOfPatches; patchIndex++) {
265 parallelFor(launchingThread, NumberOfLaunchingThreads) {
267 kernelWrapper(device, patchData);
280 if constexpr (AssessHostKernels) {
284 FVRiemann::NumberOfFiniteVolumesPerAxisPerPatch,
286 FVRiemann::NumberOfUnknowns,
287 FVRiemann::NumberOfAuxiliaryVariables,
289 EvaluateNonconservativeProduct,
293 EvaluateMaximumEigenvalueAfterTimeStep,
296 "host, stateless, patch-wise, AoS, serial",
303 FVRiemann::NumberOfFiniteVolumesPerAxisPerPatch,
305 FVRiemann::NumberOfUnknowns,
306 FVRiemann::NumberOfAuxiliaryVariables,
308 EvaluateNonconservativeProduct,
312 EvaluateMaximumEigenvalueAfterTimeStep,
315 "host, stateless, patch-wise, AoS, spread-out",
320 for (
int patchIndex = 0; patchIndex < numberOfPatches; patchIndex++) {
326int main(
int argc,
char** argv) {
329 repositories::initLogFilters();
333 repositories::initSharedMemoryAndGPUEnvironment();
335 if constexpr (EnableFPE) {
336 feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
341 "number of compute threads: "
346 "number of threads launching compute kernels: "
347 << NumberOfLaunchingThreads
351 "number of unknowns: "
352 << FVRiemann::NumberOfUnknowns
356 "number of auxiliary variables: "
357 << FVRiemann::NumberOfAuxiliaryVariables
361 "number of finite volumes per axis per patch: "
362 << FVRiemann::NumberOfFiniteVolumesPerAxisPerPatch
366 "number of samples per measurement: "
371 "evaluate max. eigenvalue (reduction step): "
372 << std::boolalpha << EvaluateMaximumEigenvalueAfterTimeStep
376 "floating-point exception handler enabled: "
377 << std::boolalpha << EnableFPE
381 "performing accuracy checks with precision: "
384#if defined(GPUOffloadingSYCL)
387 "set SYCL_DEVICE_FILTER=gpu or ONEAPI_DEVICE_SELECTOR=cuda:0 when using SYCL on the device"
391 "set SYCL_PI_TRACE=2 in case of runtime errors"
395#if defined(SharedOMP)
401 for (
int i = 0; i < NumberOfPatchesToStudy.size(); i++) {
402 logInfo(
"main()",
"number of patches: " << NumberOfPatchesToStudy[i]);
406#if defined(SharedOMP)
#define assertionEquals(lhs, rhs)
void assessKernel(std::function< void(int) > kernelCallInLoop, const std::string &name, int numberOfParticles)
void allocateAndStoreOutcome(const double *const *Q, const double *const maxEigenvalue, const int numberOfPatches)
Allocates and stores outcome of one compute kernel.
constexpr int NumberOfOutputEntriesPerPatch
void freeOutcome(const int numberOfPatches)
constexpr int NumberOfInputEntriesPerPatch
void wrapHostKernel(int device, exahype2::CellData< double, double > &patchData)
We want to use all kernels exactly the same way.
void validateOutcome(const double *const *Q, const double *const maxEigenvalue, const int numberOfPatches)
Validate data against pre-stored simulation outcome.
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &kernelMeasurement, int numberOfPatches)
Reports the runtime and throughput of the benchmarks.
tarch::logging::Log _log("::")
void initInputData(double *Q)
Set input data.
constexpr double CellOffset
constexpr double TimeStamp
double * validMaxEigenvalue
void runBenchmarks(int numberOfPatches)
Run the benchmark for one particular number of patches.
void wrapDeviceKernel(int device, exahype2::CellData< double, double > &patchData)
constexpr int NumberOfFiniteVolumesPerPatch
constexpr double TimeStepSize
constexpr double CellSize
#define logError(methodName, logMacroMessageStream)
Wrapper macro around tarch::tarch::logging::Log to improve logging.
#define logInfo(methodName, logMacroMessageStream)
Wrapper macro around tarch::tarch::logging::Log to improve logging.
#define parallelFor(counter, max)
static constexpr int HostDevice
Accelerator devices (GPUs) are enumerated starting from 0.
static Core & getInstance()
std::string toString() const
void setValue(const double &value)
Set the value.
A simple class that has to be included to measure the clock ticks required for an operation.
double getCalendarTime()
This method returns the elapsed calendar time between the start and stop command of the timer,...
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRiemannPatchwiseHeapStateless(CellData< double, double > &patchData, peano4::utils::LoopPlacement loopPlacement=peano4::utils::LoopPlacement::Serial) InlineMethod
void fillLookupTables()
Fill Lookup Tables.
int initParallelEnvironment(int *argc, char ***argv)
Init Parallel Environment.
void shutdownParallelEnvironment()
Shutdown all the parallel environment, i.e.
bool equals(const Matrix< Rows, Cols, Scalar > &lhs, const Matrix< Rows, Cols, Scalar > &rhs, const Scalar &tolerance=NUMERICAL_ZERO_DIFFERENCE)
Compares to matrices on equality by means of a numerical accuracy.
void initSmartMPI()
Switch on SmartMPI.
void freeMemory(void *data, MemoryLocation location, int device=accelerator::Device::HostDevice)
Free memory.
void shutdownNonCriticalAssertionEnvironment()
peano4::shutdownParallelEnvironment().
void initNonCriticalAssertionEnvironment()
Register the assertion tag from the global communicator.
@ Heap
Create data on the heap of the local device.
@ ManagedSharedAcceleratorDeviceMemory
To be used on host only.
Representation of a number of cells which contains all information that's required to process the sto...
outType ** QOut
Out values.
inType ** QIn
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
double * maxEigenvalue
Out values.
tarch::la::Vector< Dimensions, double > * cellCentre
tarch::la::Vector< Dimensions, double > * cellSize
Array of struct enumerator.