Peano 4
Loading...
Searching...
No Matches
KernelBenchmarksFVRusanov-main.cpp
Go to the documentation of this file.
1#include "KernelBenchmarksFVRusanov-main.h"
2
3#include <fenv.h>
4#pragma float_control(precise, on)
5#pragma STDC FENV_ACCESS ON
6
7#include "config.h"
8#include "Constants.h"
10#include "observers/CreateGrid.h"
11#include "observers/CreateGridAndConvergeLoadBalancing.h"
12#include "observers/CreateGridButPostponeRefinement.h"
13#include "observers/InitGrid.h"
14#include "observers/PlotSolution.h"
15#include "observers/TimeStep.h"
16#include "peano4/peano.h"
17#include "repositories/DataRepository.h"
18#include "repositories/SolverRepository.h"
19#include "repositories/StepRepository.h"
23#include "tarch/logging/Log.h"
31#include "tarch/timing/Watch.h"
32#include "tasks/FVRusanovSolverEnclaveTask.h"
34
35using namespace benchmarks::exahype2::kernelbenchmarks;
37double validMaxEigenvalue = 0.0;
38double* validOutcome = nullptr;
39static constexpr double TimeStamp = 0.5;
40static constexpr double TimeStepSize = 1e-6;
41static constexpr double CellSize = 0.1;
42static constexpr double CellOffset = 4.0;
43static constexpr int HaloSize = 1;
46static_assert(Accuracy >= std::numeric_limits<double>::epsilon() || Accuracy == 0.0);
47
48#if Dimensions == 2
49static constexpr int NumberOfInputEntries = (FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch + 2 * HaloSize)
50 * (FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch + 2 * HaloSize) * FVRusanovSolver::NumberOfUnknowns;
51static constexpr int NumberOfOutputEntries = (FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch + 0) * (FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch + 0)
52 * FVRusanovSolver::NumberOfUnknowns;
53static constexpr int NumberOfFiniteVolumesPerPatch = FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch * FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch;
54#else
55static constexpr int NumberOfInputEntries = (FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch + 2 * HaloSize)
56 * (FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch + 2 * HaloSize)
57 * (FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch + 2 * HaloSize) * FVRusanovSolver::NumberOfUnknowns;
58static constexpr int NumberOfOutputEntries = (FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch + 0) * (FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch + 0)
59 * (FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch + 0) * FVRusanovSolver::NumberOfUnknowns;
60static constexpr int NumberOfFiniteVolumesPerPatch = FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch * FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch
61 * FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch;
62#endif
63
64
73void initInputData(double* Q) {
74 for (int i = 0; i < NumberOfInputEntries; i++) {
75 Q[i] = std::sin(1.0 * i / (NumberOfInputEntries) * ::tarch::la::PI);
76 }
77}
78
79
87void storeOutcome(const double* Q, const double maxEigenvalue) {
89 if (validOutcome == nullptr) {
90 validOutcome = new double[NumberOfOutputEntries]{0.0};
91 std::memcpy(validOutcome, Q, sizeof(double) * NumberOfOutputEntries);
92 validMaxEigenvalue = maxEigenvalue;
93 logInfo("storeOutcome()", "Bookmarked reference solution");
94 }
95}
96
97
105std::tuple<double, int> validateOutcome(double* Q, int patchIndex, const double maxEigenvalue) {
107 int index = 0;
108 int errors = 0;
109 double maxDifference = 0.0;
110
111 for (int i = 0; i < NumberOfOutputEntries; i++) {
112 if (not ::tarch::la::equals(Q[i], validOutcome[i], Accuracy)) {
113 errors++;
114 std::cerr.precision(16);
115 logError("validateOutcome()", std::fixed << "Q[" << i << "]!=validOutcome[" << i << "]: " << Q[i] << "!=" << validOutcome[i]);
116 }
117 maxDifference = std::max(maxDifference, std::abs(Q[i] - validOutcome[i]));
118 index++;
119 }
120
121 if (not tarch::la::equals(maxEigenvalue, validMaxEigenvalue, Accuracy)) {
122 std::cerr.precision(16);
123 logError("validateOutcome()", std::fixed << " maxEigenvalue[" << patchIndex << "]!=validMaxEigenvalue[" << patchIndex << "]: " << maxEigenvalue << "!=" << validMaxEigenvalue);
124 errors++;
125 }
126
127 return {maxDifference, errors};
128}
129
130
131void reportRuntime(const std::string& kernelIdentificator, const ::tarch::timing::Measurement& timingKernelLaunch, int patches) {
132 std::stringstream ss;
133 ss << "\n";
134 ss << kernelIdentificator << ":\n\t";
135
136 ss << timingComputeKernel.getValue() << " |\n\t";
137 ss << (timingComputeKernel.getValue() / patches / NumberOfFiniteVolumesPerPatch) << " |\n\t";
138 ss << timingComputeKernel.toString() << " |\n\t";
139
140 ss << timingKernelLaunch.getValue() << " |\n\t";
141 ss << (timingKernelLaunch.getValue() / patches / NumberOfFiniteVolumesPerPatch);
142 ss << " |\n\t" << timingKernelLaunch.toString();
143
144 logInfo("reportRuntime()", ss.str());
145}
146
147
162template <class TempDataEnumerator, ::peano4::utils::LoopPlacement loopParallelism>
165
167 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
168 HaloSize,
169 FVRusanovSolver::NumberOfUnknowns,
170 FVRusanovSolver::NumberOfAuxiliaryVariables,
171 EvaluateFlux,
172 EvaluateNonconservativeProduct,
173 EvaluateSource,
174 EvaluateMaximumEigenvalueAfterTimeStep,
175 TempDataEnumerator>(
176 patchData,
177 [&](
178 const double* __restrict__ Q,
179 const ::tarch::la::Vector<Dimensions, double>& x,
180 const ::tarch::la::Vector<Dimensions, double>& h,
181 double t,
182 double dt,
183 int normal,
184 double* __restrict__ F
185 ) -> void {
186 if constexpr (EvaluateFlux) {
187 repositories::InstanceOfFVRusanovSolver.flux(Q, x, h, t, dt, normal, F);
188 }
189 },
190 [&](
191 const double* __restrict__ Q,
192 const double* __restrict__ deltaQ,
193 const ::tarch::la::Vector<Dimensions, double>& x,
194 const ::tarch::la::Vector<Dimensions, double>& h,
195 double t,
196 double dt,
197 int normal,
198 double* __restrict__ BTimesDeltaQ
199 ) -> void {
200 if constexpr (EvaluateNonconservativeProduct) {
201 repositories::InstanceOfFVRusanovSolver.nonconservativeProduct(Q, deltaQ, x, h, t, dt, normal, BTimesDeltaQ);
202 }
203 },
204 [&](
205 const double* __restrict__ Q, const ::tarch::la::Vector<Dimensions, double>& x, const ::tarch::la::Vector<Dimensions, double>& h, double t, double dt, double* __restrict__ S
206 ) -> void {
207 if constexpr (EvaluateSource) {
208 repositories::InstanceOfFVRusanovSolver.sourceTerm(Q, x, h, t, dt, S);
209 }
210 },
211 [&](const double* __restrict__ Q, const ::tarch::la::Vector<Dimensions, double>& x, const ::tarch::la::Vector<Dimensions, double>& h, double t, double dt, int normal)
212 -> double { return repositories::InstanceOfFVRusanovSolver.maxEigenvalue(Q, x, h, t, dt, normal); },
214 loopParallelism
215 );
216}
217
218
224template <class TempDataEnumerator, ::peano4::utils::LoopPlacement loopParallelism>
227
229 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
230 HaloSize,
231 FVRusanovSolver::NumberOfUnknowns,
232 FVRusanovSolver::NumberOfAuxiliaryVariables,
233 EvaluateFlux,
234 EvaluateNonconservativeProduct,
235 EvaluateSource,
236 EvaluateMaximumEigenvalueAfterTimeStep,
237 TempDataEnumerator>(
238 patchData,
239 [&](
240 const double* __restrict__ Q,
241 const ::tarch::la::Vector<Dimensions, double>& x,
242 const ::tarch::la::Vector<Dimensions, double>& h,
243 double t,
244 double dt,
245 int normal,
246 double* __restrict__ F
247 ) -> void {
248 if constexpr (EvaluateFlux) {
249 repositories::InstanceOfFVRusanovSolver.flux(Q, x, h, t, dt, normal, F);
250 }
251 },
252 [&](
253 const double* __restrict__ Q,
254 const double* __restrict__ deltaQ,
255 const ::tarch::la::Vector<Dimensions, double>& x,
256 const ::tarch::la::Vector<Dimensions, double>& h,
257 double t,
258 double dt,
259 int normal,
260 double* __restrict__ BTimesDeltaQ
261 ) -> void {
262 if constexpr (EvaluateNonconservativeProduct) {
263 repositories::InstanceOfFVRusanovSolver.nonconservativeProduct(Q, deltaQ, x, h, t, dt, normal, BTimesDeltaQ);
264 }
265 },
266 [&](
267 const double* __restrict__ Q, const ::tarch::la::Vector<Dimensions, double>& x, const ::tarch::la::Vector<Dimensions, double>& h, double t, double dt, double* __restrict__ S
268 ) -> void {
269 if constexpr (EvaluateSource) {
270 repositories::InstanceOfFVRusanovSolver.sourceTerm(Q, x, h, t, dt, S);
271 }
272 },
273 [&](const double* __restrict__ Q, const ::tarch::la::Vector<Dimensions, double>& x, const ::tarch::la::Vector<Dimensions, double>& h, double t, double dt, int normal)
274 -> double { return repositories::InstanceOfFVRusanovSolver.maxEigenvalue(Q, x, h, t, dt, normal); },
276 loopParallelism
277 );
278}
279
280
286template <class TempDataEnumerator, ::peano4::utils::LoopPlacement loopParallelism>
289
291 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
292 HaloSize,
293 FVRusanovSolver::NumberOfUnknowns,
294 FVRusanovSolver::NumberOfAuxiliaryVariables,
295 EvaluateFlux,
296 EvaluateNonconservativeProduct,
297 EvaluateSource,
298 EvaluateMaximumEigenvalueAfterTimeStep,
299 TempDataEnumerator>(
300 patchData,
301 [&](
302 const double* __restrict__ Q,
303 const ::tarch::la::Vector<Dimensions, double>& x,
304 const ::tarch::la::Vector<Dimensions, double>& h,
305 double t,
306 double dt,
307 int normal,
308 double* __restrict__ F
309 ) -> void {
310 if constexpr (EvaluateFlux) {
311 repositories::InstanceOfFVRusanovSolver.flux(Q, x, h, t, dt, normal, F);
312 }
313 },
314 [&](
315 const double* __restrict__ Q,
316 const double* __restrict__ deltaQ,
317 const ::tarch::la::Vector<Dimensions, double>& x,
318 const ::tarch::la::Vector<Dimensions, double>& h,
319 double t,
320 double dt,
321 int normal,
322 double* __restrict__ BTimesDeltaQ
323 ) -> void {
324 if constexpr (EvaluateNonconservativeProduct) {
325 repositories::InstanceOfFVRusanovSolver.nonconservativeProduct(Q, deltaQ, x, h, t, dt, normal, BTimesDeltaQ);
326 }
327 },
328 [&](
329 const double* __restrict__ Q, const ::tarch::la::Vector<Dimensions, double>& x, const ::tarch::la::Vector<Dimensions, double>& h, double t, double dt, double* __restrict__ S
330 ) -> void {
331 if constexpr (EvaluateSource) {
332 repositories::InstanceOfFVRusanovSolver.sourceTerm(Q, x, h, t, dt, S);
333 }
334 },
335 [&](const double* __restrict__ Q, const ::tarch::la::Vector<Dimensions, double>& x, const ::tarch::la::Vector<Dimensions, double>& h, double t, double dt, int normal)
336 -> double { return repositories::InstanceOfFVRusanovSolver.maxEigenvalue(Q, x, h, t, dt, normal); },
338 loopParallelism
339 );
340}
341
342
348template <void (*Function)(::exahype2::CellData&, ::tarch::timing::Measurement&, ::peano4::utils::LoopPlacement), ::peano4::utils::LoopPlacement loopParallelism>
349void wrapStatelessHostKernel(int device, ::exahype2::CellData& patchData) {
351 Function(patchData, timingComputeKernel, loopParallelism);
352}
353
354
355template <void (*Function)(int, ::exahype2::CellData&, ::tarch::timing::Measurement&)>
356void wrapDeviceKernel(int device, ::exahype2::CellData& patchData) {
358 Function(device, patchData, timingComputeKernel);
359}
360
361
379void runBenchmarks(int numberOfPatches, int launchingThreads) {
381 event.setX(CellOffset);
382 event.setH(CellSize);
384
385 auto assessKernel =
386 [&](std::function<void(int device, ::exahype2::CellData& patchData)> kernel, const std::string& markerName, int launchingThreads, int device, int patches) -> void {
388 ::tarch::timing::Measurement timingKernelLaunch;
389
390 // TODO: Does the number of samples change the outcome of the solution?
391 for (int j = 0; j < NumberOfSamples; j++) {
392 parallelFor(launchingThread, launchingThreads) {
393 ::exahype2::CellData patchData(patches);
394 for (int i = 0; i < patches; i++) {
396 patchData.t[i] = TimeStamp;
397 patchData.dt[i] = TimeStepSize;
401 patchData.maxEigenvalue[i] = 0.0;
402 initInputData(patchData.QIn[i]);
403 std::memset(patchData.QOut[i], 0.0, NumberOfOutputEntries * sizeof(double));
404 }
405
406 ::tarch::timing::Watch watchKernelLaunch("::runBenchmarks", "assessKernel(...)", false);
407 kernel(device, patchData);
408 watchKernelLaunch.stop();
409 timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
410
411 if constexpr (Accuracy > 0.0) {
412 int errors = 0;
413 double maxDifference = 0.0;
414 for (int i = 0; i < patches; i++) {
415 storeOutcome(patchData.QOut[i], patchData.maxEigenvalue[i]);
416 auto [maxDifferencePerPatch, errorsPerPatch] = validateOutcome(patchData.QOut[i], i, patchData.maxEigenvalue[i]);
417 errors += errorsPerPatch;
418 maxDifference = std::max(maxDifference, maxDifferencePerPatch);
419 }
420
421 if (errors > 0) {
422 logError(
423 "runBenchmarks()",
424 "max difference of outcome from "
425 << markerName << " for all patches is " << maxDifference << " (admissible accuracy=" << Accuracy << ")"
426 << " for " << errors << " entries"
427 );
428 std::abort();
429 }
430 }
431
432 for (int i = 0; i < patches; i++) {
435 }
436 }
438 }
439
441 reportRuntime(markerName, timingKernelLaunch, patches);
442 }
443 };
444
445 // Kernel launches
447 const int rank = ::tarch::mpi::Rank::getInstance().getRank();
448 const int numberOfRanks = ::tarch::mpi::Rank::getInstance().getNumberOfRanks();
449 const int patchesPerProcess = numberOfPatches / numberOfRanks;
450 const int remainder = numberOfPatches % numberOfRanks;
451 const int startPatch = rank * patchesPerProcess;
452 const int endPatch = startPatch + patchesPerProcess + (rank == numberOfRanks - 1 ? remainder : 0);
453 const int localPatches = endPatch - startPatch;
454
455 // Headers
456 std::stringstream ss;
457 ss << std::left;
458 ss << "\n";
459 ss << "Kernel ID:\n\t";
460 ss << "Compute Kernel Time |\n\t";
461 ss << "Compute Kernel Time (Normalised) |\n\t";
462 ss << "Compute Kernel String |\n\t";
463 ss << "Kernel Launch Time |\n\t";
464 ss << "Kernel Launch Time (Normalised) |\n\t";
465 ss << "Kernel Launch String";
466
468 logInfo("runBenchmarks()", "Number of patches per rank: " << localPatches);
469 logInfo("runBenchmarks()", ss.str());
470 }
471
472 std::string deviceString;
473#ifdef Parallel
474 std::vector<int> devices(numberOfRanks);
475 MPI_Gather(&device, 1, MPI_INT, devices.data(), 1, MPI_INT, 0, MPI_COMM_WORLD);
476 for (int i = 0; i < numberOfRanks; i++) {
477 deviceString += std::to_string(devices[i]);
478 if (i < numberOfRanks - 1) {
479 deviceString += ",";
480 }
481 }
482#else
483 deviceString = std::to_string(device);
484#endif
485
486 if constexpr (EvaluateHostKernels) {
487 assessKernel(
488 wrapBatchedHeapFunctorHostKernels<::exahype2::enumerator::AoSLexicographicEnumerator, ::peano4::utils::LoopPlacement::Serial>,
489 "host, functors, batched, AoS, serial",
490 launchingThreads,
492 localPatches
493 );
494 assessKernel(
495 wrapBatchedHeapFunctorHostKernels<::exahype2::enumerator::AoSLexicographicEnumerator, ::peano4::utils::LoopPlacement::Nested>,
496 "host, functors, batched, AoS, nested",
497 launchingThreads,
499 localPatches
500 );
501 assessKernel(
502 wrapBatchedHeapFunctorHostKernels<::exahype2::enumerator::AoSLexicographicEnumerator, ::peano4::utils::LoopPlacement::SpreadOut>,
503 "host, functors, batched, AoS, spread-out",
504 launchingThreads,
506 localPatches
507 );
508
509 assessKernel(
510 wrapPatchwiseHeapFunctorsHostKernel<::exahype2::enumerator::AoSLexicographicEnumerator, ::peano4::utils::LoopPlacement::Serial>,
511 "host, functors, patch-wise, AoS, serial",
512 launchingThreads,
514 localPatches
515 );
516 assessKernel(
517 wrapPatchwiseHeapFunctorsHostKernel<::exahype2::enumerator::AoSLexicographicEnumerator, ::peano4::utils::LoopPlacement::Nested>,
518 "host, functors, patch-wise, AoS, nested",
519 launchingThreads,
521 localPatches
522 );
523 assessKernel(
524 wrapPatchwiseHeapFunctorsHostKernel<::exahype2::enumerator::AoSLexicographicEnumerator, ::peano4::utils::LoopPlacement::SpreadOut>,
525 "host, functors, patch-wise, AoS, spread-out",
526 launchingThreads,
528 localPatches
529 );
530
531 assessKernel(
532 wrapVolumewiseFunctorHostKernels<::exahype2::enumerator::AoSLexicographicEnumerator, ::peano4::utils::LoopPlacement::Serial>,
533 "host, functors, volume-wise, AoS, serial",
534 launchingThreads,
536 localPatches
537 );
538 assessKernel(
539 wrapVolumewiseFunctorHostKernels<::exahype2::enumerator::AoSLexicographicEnumerator, ::peano4::utils::LoopPlacement::Nested>,
540 "host, functors, volume-wise, AoS, nested",
541 launchingThreads,
543 localPatches
544 );
545 assessKernel(
546 wrapVolumewiseFunctorHostKernels<::exahype2::enumerator::AoSLexicographicEnumerator, ::peano4::utils::LoopPlacement::SpreadOut>,
547 "host, functors, volume-wise, AoS, spread-out",
548 launchingThreads,
550 localPatches
551 );
552
553 assessKernel(
556 FVRusanovSolver,
557 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
558 HaloSize,
559 FVRusanovSolver::NumberOfUnknowns,
560 FVRusanovSolver::NumberOfAuxiliaryVariables,
561 EvaluateFlux,
562 EvaluateNonconservativeProduct,
563 EvaluateSource,
564 EvaluateMaximumEigenvalueAfterTimeStep,
567 "host, stateless, batched, AoS, serial",
568 launchingThreads,
570 localPatches
571 );
572 assessKernel(
575 FVRusanovSolver,
576 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
577 HaloSize,
578 FVRusanovSolver::NumberOfUnknowns,
579 FVRusanovSolver::NumberOfAuxiliaryVariables,
580 EvaluateFlux,
581 EvaluateNonconservativeProduct,
582 EvaluateSource,
583 EvaluateMaximumEigenvalueAfterTimeStep,
586 "host, stateless, batched, AoS, nested",
587 launchingThreads,
589 localPatches
590 );
591 assessKernel(
594 FVRusanovSolver,
595 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
596 HaloSize,
597 FVRusanovSolver::NumberOfUnknowns,
598 FVRusanovSolver::NumberOfAuxiliaryVariables,
599 EvaluateFlux,
600 EvaluateNonconservativeProduct,
601 EvaluateSource,
602 EvaluateMaximumEigenvalueAfterTimeStep,
605 "host, stateless, batched, AoS, spread-out",
606 launchingThreads,
608 localPatches
609 );
610 assessKernel(
613 FVRusanovSolver,
614 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
615 HaloSize,
616 FVRusanovSolver::NumberOfUnknowns,
617 FVRusanovSolver::NumberOfAuxiliaryVariables,
618 EvaluateFlux,
619 EvaluateNonconservativeProduct,
620 EvaluateSource,
621 EvaluateMaximumEigenvalueAfterTimeStep,
624 "host, stateless, batched, SoA, serial",
625 launchingThreads,
627 localPatches
628 );
629 assessKernel(
632 FVRusanovSolver,
633 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
634 HaloSize,
635 FVRusanovSolver::NumberOfUnknowns,
636 FVRusanovSolver::NumberOfAuxiliaryVariables,
637 EvaluateFlux,
638 EvaluateNonconservativeProduct,
639 EvaluateSource,
640 EvaluateMaximumEigenvalueAfterTimeStep,
643 "host, stateless, batched, SoA, nested",
644 launchingThreads,
646 localPatches
647 );
648 assessKernel(
651 FVRusanovSolver,
652 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
653 HaloSize,
654 FVRusanovSolver::NumberOfUnknowns,
655 FVRusanovSolver::NumberOfAuxiliaryVariables,
656 EvaluateFlux,
657 EvaluateNonconservativeProduct,
658 EvaluateSource,
659 EvaluateMaximumEigenvalueAfterTimeStep,
662 "host, stateless, batched, SoA, spread-out",
663 launchingThreads,
665 localPatches
666 );
667 assessKernel(
670 FVRusanovSolver,
671 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
672 HaloSize,
673 FVRusanovSolver::NumberOfUnknowns,
674 FVRusanovSolver::NumberOfAuxiliaryVariables,
675 EvaluateFlux,
676 EvaluateNonconservativeProduct,
677 EvaluateSource,
678 EvaluateMaximumEigenvalueAfterTimeStep,
681 "host, stateless, batched, AoSoA, serial",
682 launchingThreads,
684 localPatches
685 );
686 assessKernel(
689 FVRusanovSolver,
690 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
691 HaloSize,
692 FVRusanovSolver::NumberOfUnknowns,
693 FVRusanovSolver::NumberOfAuxiliaryVariables,
694 EvaluateFlux,
695 EvaluateNonconservativeProduct,
696 EvaluateSource,
697 EvaluateMaximumEigenvalueAfterTimeStep,
700 "host, stateless, batched, AoSoA, nested",
701 launchingThreads,
703 localPatches
704 );
705 assessKernel(
708 FVRusanovSolver,
709 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
710 HaloSize,
711 FVRusanovSolver::NumberOfUnknowns,
712 FVRusanovSolver::NumberOfAuxiliaryVariables,
713 EvaluateFlux,
714 EvaluateNonconservativeProduct,
715 EvaluateSource,
716 EvaluateMaximumEigenvalueAfterTimeStep,
719 "host, stateless, batched, AoSoA, spread-out",
720 launchingThreads,
722 localPatches
723 );
724 if constexpr (not Accuracy > 0.0) {
725 assessKernel(
728 FVRusanovSolver,
729 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
730 HaloSize,
731 FVRusanovSolver::NumberOfUnknowns,
732 FVRusanovSolver::NumberOfAuxiliaryVariables,
733 EvaluateFlux,
734 EvaluateNonconservativeProduct,
735 EvaluateSource,
736 EvaluateMaximumEigenvalueAfterTimeStep>,
738 "host, stateless, batched, insitu, serial",
739 launchingThreads,
741 localPatches
742 );
743 }
744
745 assessKernel(
748 FVRusanovSolver,
749 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
750 HaloSize,
751 FVRusanovSolver::NumberOfUnknowns,
752 FVRusanovSolver::NumberOfAuxiliaryVariables,
753 EvaluateFlux,
754 EvaluateNonconservativeProduct,
755 EvaluateSource,
756 EvaluateMaximumEigenvalueAfterTimeStep,
759 "host, stateless, patch-wise, AoS, serial",
760 launchingThreads,
762 localPatches
763 );
764 assessKernel(
767 FVRusanovSolver,
768 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
769 HaloSize,
770 FVRusanovSolver::NumberOfUnknowns,
771 FVRusanovSolver::NumberOfAuxiliaryVariables,
772 EvaluateFlux,
773 EvaluateNonconservativeProduct,
774 EvaluateSource,
775 EvaluateMaximumEigenvalueAfterTimeStep,
778 "host, stateless, patch-wise, AoS, nested",
779 launchingThreads,
781 localPatches
782 );
783 assessKernel(
786 FVRusanovSolver,
787 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
788 HaloSize,
789 FVRusanovSolver::NumberOfUnknowns,
790 FVRusanovSolver::NumberOfAuxiliaryVariables,
791 EvaluateFlux,
792 EvaluateNonconservativeProduct,
793 EvaluateSource,
794 EvaluateMaximumEigenvalueAfterTimeStep,
797 "host, stateless, patch-wise, AoS, spread-out",
798 launchingThreads,
800 localPatches
801 );
802 assessKernel(
805 FVRusanovSolver,
806 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
807 HaloSize,
808 FVRusanovSolver::NumberOfUnknowns,
809 FVRusanovSolver::NumberOfAuxiliaryVariables,
810 EvaluateFlux,
811 EvaluateNonconservativeProduct,
812 EvaluateSource,
813 EvaluateMaximumEigenvalueAfterTimeStep,
816 "host, stateless, patch-wise, SoA, serial",
817 launchingThreads,
819 localPatches
820 );
821 assessKernel(
824 FVRusanovSolver,
825 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
826 HaloSize,
827 FVRusanovSolver::NumberOfUnknowns,
828 FVRusanovSolver::NumberOfAuxiliaryVariables,
829 EvaluateFlux,
830 EvaluateNonconservativeProduct,
831 EvaluateSource,
832 EvaluateMaximumEigenvalueAfterTimeStep,
835 "host, stateless, patch-wise, SoA, nested",
836 launchingThreads,
838 localPatches
839 );
840 assessKernel(
843 FVRusanovSolver,
844 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
845 HaloSize,
846 FVRusanovSolver::NumberOfUnknowns,
847 FVRusanovSolver::NumberOfAuxiliaryVariables,
848 EvaluateFlux,
849 EvaluateNonconservativeProduct,
850 EvaluateSource,
851 EvaluateMaximumEigenvalueAfterTimeStep,
854 "host, stateless, patch-wise, SoA, spread-out",
855 launchingThreads,
857 localPatches
858 );
859 assessKernel(
862 FVRusanovSolver,
863 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
864 HaloSize,
865 FVRusanovSolver::NumberOfUnknowns,
866 FVRusanovSolver::NumberOfAuxiliaryVariables,
867 EvaluateFlux,
868 EvaluateNonconservativeProduct,
869 EvaluateSource,
870 EvaluateMaximumEigenvalueAfterTimeStep,
873 "host, stateless, patch-wise, AoSoA, serial",
874 launchingThreads,
876 localPatches
877 );
878 assessKernel(
881 FVRusanovSolver,
882 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
883 HaloSize,
884 FVRusanovSolver::NumberOfUnknowns,
885 FVRusanovSolver::NumberOfAuxiliaryVariables,
886 EvaluateFlux,
887 EvaluateNonconservativeProduct,
888 EvaluateSource,
889 EvaluateMaximumEigenvalueAfterTimeStep,
892 "host, stateless, patch-wise, AoSoA, nested",
893 launchingThreads,
895 localPatches
896 );
897 assessKernel(
900 FVRusanovSolver,
901 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
902 HaloSize,
903 FVRusanovSolver::NumberOfUnknowns,
904 FVRusanovSolver::NumberOfAuxiliaryVariables,
905 EvaluateFlux,
906 EvaluateNonconservativeProduct,
907 EvaluateSource,
908 EvaluateMaximumEigenvalueAfterTimeStep,
911 "host, stateless, patch-wise, AoSoA, spread-out",
912 launchingThreads,
914 localPatches
915 );
916 if constexpr (not Accuracy > 0.0) {
917 assessKernel(
920 FVRusanovSolver,
921 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
922 HaloSize,
923 FVRusanovSolver::NumberOfUnknowns,
924 FVRusanovSolver::NumberOfAuxiliaryVariables,
925 EvaluateFlux,
926 EvaluateNonconservativeProduct,
927 EvaluateSource,
928 EvaluateMaximumEigenvalueAfterTimeStep>,
930 "host, stateless, patch-wise, insitu, serial",
931 launchingThreads,
933 localPatches
934 );
935 }
936
937 assessKernel(
940 FVRusanovSolver,
941 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
942 HaloSize,
943 FVRusanovSolver::NumberOfUnknowns,
944 FVRusanovSolver::NumberOfAuxiliaryVariables,
945 EvaluateFlux,
946 EvaluateNonconservativeProduct,
947 EvaluateSource,
948 EvaluateMaximumEigenvalueAfterTimeStep,
951 "host, stateless, volume-wise, AoS, serial",
952 launchingThreads,
954 localPatches
955 );
956 assessKernel(
959 FVRusanovSolver,
960 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
961 HaloSize,
962 FVRusanovSolver::NumberOfUnknowns,
963 FVRusanovSolver::NumberOfAuxiliaryVariables,
964 EvaluateFlux,
965 EvaluateNonconservativeProduct,
966 EvaluateSource,
967 EvaluateMaximumEigenvalueAfterTimeStep,
970 "host, stateless, volume-wise, AoS, nested",
971 launchingThreads,
973 localPatches
974 );
975 assessKernel(
978 FVRusanovSolver,
979 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
980 HaloSize,
981 FVRusanovSolver::NumberOfUnknowns,
982 FVRusanovSolver::NumberOfAuxiliaryVariables,
983 EvaluateFlux,
984 EvaluateNonconservativeProduct,
985 EvaluateSource,
986 EvaluateMaximumEigenvalueAfterTimeStep,
989 "host, stateless, volume-wise, AoS, spread-out",
990 launchingThreads,
992 localPatches
993 );
994 assessKernel(
997 FVRusanovSolver,
998 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
999 HaloSize,
1000 FVRusanovSolver::NumberOfUnknowns,
1001 FVRusanovSolver::NumberOfAuxiliaryVariables,
1002 EvaluateFlux,
1003 EvaluateNonconservativeProduct,
1004 EvaluateSource,
1005 EvaluateMaximumEigenvalueAfterTimeStep,
1008 "host, stateless, volume-wise, SoA, serial",
1009 launchingThreads,
1011 localPatches
1012 );
1013 assessKernel(
1016 FVRusanovSolver,
1017 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1018 HaloSize,
1019 FVRusanovSolver::NumberOfUnknowns,
1020 FVRusanovSolver::NumberOfAuxiliaryVariables,
1021 EvaluateFlux,
1022 EvaluateNonconservativeProduct,
1023 EvaluateSource,
1024 EvaluateMaximumEigenvalueAfterTimeStep,
1027 "host, stateless, volume-wise, SoA, nested",
1028 launchingThreads,
1030 localPatches
1031 );
1032 assessKernel(
1035 FVRusanovSolver,
1036 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1037 HaloSize,
1038 FVRusanovSolver::NumberOfUnknowns,
1039 FVRusanovSolver::NumberOfAuxiliaryVariables,
1040 EvaluateFlux,
1041 EvaluateNonconservativeProduct,
1042 EvaluateSource,
1043 EvaluateMaximumEigenvalueAfterTimeStep,
1046 "host, stateless, volume-wise, SoA, spread-out",
1047 launchingThreads,
1049 localPatches
1050 );
1051 assessKernel(
1054 FVRusanovSolver,
1055 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1056 HaloSize,
1057 FVRusanovSolver::NumberOfUnknowns,
1058 FVRusanovSolver::NumberOfAuxiliaryVariables,
1059 EvaluateFlux,
1060 EvaluateNonconservativeProduct,
1061 EvaluateSource,
1062 EvaluateMaximumEigenvalueAfterTimeStep,
1065 "host, stateless, volume-wise, AoSoA, serial",
1066 launchingThreads,
1068 localPatches
1069 );
1070 assessKernel(
1073 FVRusanovSolver,
1074 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1075 HaloSize,
1076 FVRusanovSolver::NumberOfUnknowns,
1077 FVRusanovSolver::NumberOfAuxiliaryVariables,
1078 EvaluateFlux,
1079 EvaluateNonconservativeProduct,
1080 EvaluateSource,
1081 EvaluateMaximumEigenvalueAfterTimeStep,
1084 "host, stateless, volume-wise, AoSoA, nested",
1085 launchingThreads,
1087 localPatches
1088 );
1089 assessKernel(
1092 FVRusanovSolver,
1093 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1094 HaloSize,
1095 FVRusanovSolver::NumberOfUnknowns,
1096 FVRusanovSolver::NumberOfAuxiliaryVariables,
1097 EvaluateFlux,
1098 EvaluateNonconservativeProduct,
1099 EvaluateSource,
1100 EvaluateMaximumEigenvalueAfterTimeStep,
1103 "host, stateless, volume-wise, AoSoA, spread-out",
1104 launchingThreads,
1106 localPatches
1107 );
1108 }
1109
1110 if constexpr (EvaluateDeviceKernels) {
1111#if defined(GPUOffloadingSYCL)
1112 assessKernel(
1114 FVRusanovSolver,
1115 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1116 HaloSize,
1117 FVRusanovSolver::NumberOfUnknowns,
1118 FVRusanovSolver::NumberOfAuxiliaryVariables,
1119 EvaluateFlux,
1120 EvaluateNonconservativeProduct,
1121 EvaluateSource,
1122 EvaluateMaximumEigenvalueAfterTimeStep,
1124 "device(s) " + deviceString + ", stateless, batched, AoS, usm",
1125 launchingThreads,
1126 device,
1127 localPatches
1128 );
1129 assessKernel(
1131 FVRusanovSolver,
1132 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1133 HaloSize,
1134 FVRusanovSolver::NumberOfUnknowns,
1135 FVRusanovSolver::NumberOfAuxiliaryVariables,
1136 EvaluateFlux,
1137 EvaluateNonconservativeProduct,
1138 EvaluateSource,
1139 EvaluateMaximumEigenvalueAfterTimeStep,
1141 "device(s) " + deviceString + ", stateless, batched, SoA, usm",
1142 launchingThreads,
1143 device,
1144 localPatches
1145 );
1146 assessKernel(
1148 FVRusanovSolver,
1149 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1150 HaloSize,
1151 FVRusanovSolver::NumberOfUnknowns,
1152 FVRusanovSolver::NumberOfAuxiliaryVariables,
1153 EvaluateFlux,
1154 EvaluateNonconservativeProduct,
1155 EvaluateSource,
1156 EvaluateMaximumEigenvalueAfterTimeStep,
1158 "device(s) " + deviceString + ", stateless, batched, AoSoA, usm",
1159 launchingThreads,
1160 device,
1161 localPatches
1162 );
1163
1164 assessKernel(
1166 FVRusanovSolver,
1167 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1168 HaloSize,
1169 FVRusanovSolver::NumberOfUnknowns,
1170 FVRusanovSolver::NumberOfAuxiliaryVariables,
1171 EvaluateFlux,
1172 EvaluateNonconservativeProduct,
1173 EvaluateSource,
1174 EvaluateMaximumEigenvalueAfterTimeStep,
1176 "device(s) " + deviceString + ", stateless, patch-wise, AoS, usm",
1177 launchingThreads,
1178 device,
1179 localPatches
1180 );
1181 assessKernel(
1183 FVRusanovSolver,
1184 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1185 HaloSize,
1186 FVRusanovSolver::NumberOfUnknowns,
1187 FVRusanovSolver::NumberOfAuxiliaryVariables,
1188 EvaluateFlux,
1189 EvaluateNonconservativeProduct,
1190 EvaluateSource,
1191 EvaluateMaximumEigenvalueAfterTimeStep,
1193 "device(s) " + deviceString + ", stateless, patch-wise, SoA, usm",
1194 launchingThreads,
1195 device,
1196 localPatches
1197 );
1198 assessKernel(
1200 FVRusanovSolver,
1201 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1202 HaloSize,
1203 FVRusanovSolver::NumberOfUnknowns,
1204 FVRusanovSolver::NumberOfAuxiliaryVariables,
1205 EvaluateFlux,
1206 EvaluateNonconservativeProduct,
1207 EvaluateSource,
1208 EvaluateMaximumEigenvalueAfterTimeStep,
1210 "device(s) " + deviceString + ", stateless, patch-wise, AoSoA, usm",
1211 launchingThreads,
1212 device,
1213 localPatches
1214 );
1215
1216 assessKernel(
1218 FVRusanovSolver,
1219 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1220 HaloSize,
1221 FVRusanovSolver::NumberOfUnknowns,
1222 FVRusanovSolver::NumberOfAuxiliaryVariables,
1223 EvaluateFlux,
1224 EvaluateNonconservativeProduct,
1225 EvaluateSource,
1226 EvaluateMaximumEigenvalueAfterTimeStep,
1228 "device(s) " + deviceString + ", stateless, task-graph, AoS, usm",
1229 launchingThreads,
1230 device,
1231 localPatches
1232 );
1233 assessKernel(
1235 FVRusanovSolver,
1236 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1237 HaloSize,
1238 FVRusanovSolver::NumberOfUnknowns,
1239 FVRusanovSolver::NumberOfAuxiliaryVariables,
1240 EvaluateFlux,
1241 EvaluateNonconservativeProduct,
1242 EvaluateSource,
1243 EvaluateMaximumEigenvalueAfterTimeStep,
1245 "device(s) " + deviceString + ", stateless, task-graph, SoA, usm",
1246 launchingThreads,
1247 device,
1248 localPatches
1249 );
1250 assessKernel(
1252 FVRusanovSolver,
1253 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1254 HaloSize,
1255 FVRusanovSolver::NumberOfUnknowns,
1256 FVRusanovSolver::NumberOfAuxiliaryVariables,
1257 EvaluateFlux,
1258 EvaluateNonconservativeProduct,
1259 EvaluateSource,
1260 EvaluateMaximumEigenvalueAfterTimeStep,
1262 "device(s) " + deviceString + ", stateless, task-graph, AoSoA, usm",
1263 launchingThreads,
1264 device,
1265 localPatches
1266 );
1267
1268 assessKernel(
1270 FVRusanovSolver,
1271 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1272 HaloSize,
1273 FVRusanovSolver::NumberOfUnknowns,
1274 FVRusanovSolver::NumberOfAuxiliaryVariables,
1275 EvaluateFlux,
1276 EvaluateNonconservativeProduct,
1277 EvaluateSource,
1278 EvaluateMaximumEigenvalueAfterTimeStep,
1280 "device(s) " + deviceString + ", stateless, batched, AoS, copy",
1281 launchingThreads,
1282 device,
1283 localPatches
1284 );
1285 assessKernel(
1287 FVRusanovSolver,
1288 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1289 HaloSize,
1290 FVRusanovSolver::NumberOfUnknowns,
1291 FVRusanovSolver::NumberOfAuxiliaryVariables,
1292 EvaluateFlux,
1293 EvaluateNonconservativeProduct,
1294 EvaluateSource,
1295 EvaluateMaximumEigenvalueAfterTimeStep,
1297 "device(s) " + deviceString + ", stateless, batched, SoA, copy",
1298 launchingThreads,
1299 device,
1300 localPatches
1301 );
1302 assessKernel(
1304 FVRusanovSolver,
1305 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1306 HaloSize,
1307 FVRusanovSolver::NumberOfUnknowns,
1308 FVRusanovSolver::NumberOfAuxiliaryVariables,
1309 EvaluateFlux,
1310 EvaluateNonconservativeProduct,
1311 EvaluateSource,
1312 EvaluateMaximumEigenvalueAfterTimeStep,
1314 "device(s) " + deviceString + ", stateless, batched, AoSoA, copy",
1315 launchingThreads,
1316 device,
1317 localPatches
1318 );
1319
1320 assessKernel(
1322 FVRusanovSolver,
1323 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1324 HaloSize,
1325 FVRusanovSolver::NumberOfUnknowns,
1326 FVRusanovSolver::NumberOfAuxiliaryVariables,
1327 EvaluateFlux,
1328 EvaluateNonconservativeProduct,
1329 EvaluateSource,
1330 EvaluateMaximumEigenvalueAfterTimeStep,
1332 "device(s) " + deviceString + ", stateless, patch-wise, AoS, copy",
1333 launchingThreads,
1334 device,
1335 localPatches
1336 );
1337 assessKernel(
1339 FVRusanovSolver,
1340 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1341 HaloSize,
1342 FVRusanovSolver::NumberOfUnknowns,
1343 FVRusanovSolver::NumberOfAuxiliaryVariables,
1344 EvaluateFlux,
1345 EvaluateNonconservativeProduct,
1346 EvaluateSource,
1347 EvaluateMaximumEigenvalueAfterTimeStep,
1349 "device(s) " + deviceString + ", stateless, patch-wise, SoA, copy",
1350 launchingThreads,
1351 device,
1352 localPatches
1353 );
1354 assessKernel(
1356 FVRusanovSolver,
1357 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1358 HaloSize,
1359 FVRusanovSolver::NumberOfUnknowns,
1360 FVRusanovSolver::NumberOfAuxiliaryVariables,
1361 EvaluateFlux,
1362 EvaluateNonconservativeProduct,
1363 EvaluateSource,
1364 EvaluateMaximumEigenvalueAfterTimeStep,
1366 "device(s) " + deviceString + ", stateless, patch-wise, AoSoA, copy",
1367 launchingThreads,
1368 device,
1369 localPatches
1370 );
1371
1372 assessKernel(
1374 FVRusanovSolver,
1375 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1376 HaloSize,
1377 FVRusanovSolver::NumberOfUnknowns,
1378 FVRusanovSolver::NumberOfAuxiliaryVariables,
1379 EvaluateFlux,
1380 EvaluateNonconservativeProduct,
1381 EvaluateSource,
1382 EvaluateMaximumEigenvalueAfterTimeStep,
1384 "device(s) " + deviceString + ", stateless, task-graph, AoS, copy",
1385 launchingThreads,
1386 device,
1387 localPatches
1388 );
1389 assessKernel(
1391 FVRusanovSolver,
1392 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1393 HaloSize,
1394 FVRusanovSolver::NumberOfUnknowns,
1395 FVRusanovSolver::NumberOfAuxiliaryVariables,
1396 EvaluateFlux,
1397 EvaluateNonconservativeProduct,
1398 EvaluateSource,
1399 EvaluateMaximumEigenvalueAfterTimeStep,
1401 "device(s) " + deviceString + ", stateless, task-graph, SoA, copy",
1402 launchingThreads,
1403 device,
1404 localPatches
1405 );
1406 assessKernel(
1408 FVRusanovSolver,
1409 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1410 HaloSize,
1411 FVRusanovSolver::NumberOfUnknowns,
1412 FVRusanovSolver::NumberOfAuxiliaryVariables,
1413 EvaluateFlux,
1414 EvaluateNonconservativeProduct,
1415 EvaluateSource,
1416 EvaluateMaximumEigenvalueAfterTimeStep,
1418 "device(s) " + deviceString + ", stateless, task-graph, AoSoA, copy",
1419 launchingThreads,
1420 device,
1421 localPatches
1422 );
1423
1424 assessKernel(
1426 FVRusanovSolver,
1427 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1428 HaloSize,
1429 FVRusanovSolver::NumberOfUnknowns,
1430 FVRusanovSolver::NumberOfAuxiliaryVariables,
1431 EvaluateFlux,
1432 EvaluateNonconservativeProduct,
1433 EvaluateSource,
1434 EvaluateMaximumEigenvalueAfterTimeStep,
1436 "device(s) " + deviceString + ", stateless, batched, AoS, managed",
1437 launchingThreads,
1438 device,
1439 localPatches
1440 );
1441 assessKernel(
1443 FVRusanovSolver,
1444 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1445 HaloSize,
1446 FVRusanovSolver::NumberOfUnknowns,
1447 FVRusanovSolver::NumberOfAuxiliaryVariables,
1448 EvaluateFlux,
1449 EvaluateNonconservativeProduct,
1450 EvaluateSource,
1451 EvaluateMaximumEigenvalueAfterTimeStep,
1453 "device(s) " + deviceString + ", stateless, batched, SoA, managed",
1454 launchingThreads,
1455 device,
1456 localPatches
1457 );
1458 assessKernel(
1460 FVRusanovSolver,
1461 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1462 HaloSize,
1463 FVRusanovSolver::NumberOfUnknowns,
1464 FVRusanovSolver::NumberOfAuxiliaryVariables,
1465 EvaluateFlux,
1466 EvaluateNonconservativeProduct,
1467 EvaluateSource,
1468 EvaluateMaximumEigenvalueAfterTimeStep,
1470 "device(s) " + deviceString + ", stateless, batched, AoSoA, managed",
1471 launchingThreads,
1472 device,
1473 localPatches
1474 );
1475
1476 assessKernel(
1478 FVRusanovSolver,
1479 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1480 HaloSize,
1481 FVRusanovSolver::NumberOfUnknowns,
1482 FVRusanovSolver::NumberOfAuxiliaryVariables,
1483 EvaluateFlux,
1484 EvaluateNonconservativeProduct,
1485 EvaluateSource,
1486 EvaluateMaximumEigenvalueAfterTimeStep,
1488 "device(s) " + deviceString + ", stateless, patch-wise, AoS, managed",
1489 launchingThreads,
1490 device,
1491 localPatches
1492 );
1493 assessKernel(
1495 FVRusanovSolver,
1496 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1497 HaloSize,
1498 FVRusanovSolver::NumberOfUnknowns,
1499 FVRusanovSolver::NumberOfAuxiliaryVariables,
1500 EvaluateFlux,
1501 EvaluateNonconservativeProduct,
1502 EvaluateSource,
1503 EvaluateMaximumEigenvalueAfterTimeStep,
1505 "device(s) " + deviceString + ", stateless, patch-wise, SoA, managed",
1506 launchingThreads,
1507 device,
1508 localPatches
1509 );
1510 assessKernel(
1512 FVRusanovSolver,
1513 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1514 HaloSize,
1515 FVRusanovSolver::NumberOfUnknowns,
1516 FVRusanovSolver::NumberOfAuxiliaryVariables,
1517 EvaluateFlux,
1518 EvaluateNonconservativeProduct,
1519 EvaluateSource,
1520 EvaluateMaximumEigenvalueAfterTimeStep,
1522 "device(s) " + deviceString + ", stateless, patch-wise, AoSoA, managed",
1523 launchingThreads,
1524 device,
1525 localPatches
1526 );
1527
1528 assessKernel(
1530 FVRusanovSolver,
1531 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1532 HaloSize,
1533 FVRusanovSolver::NumberOfUnknowns,
1534 FVRusanovSolver::NumberOfAuxiliaryVariables,
1535 EvaluateFlux,
1536 EvaluateNonconservativeProduct,
1537 EvaluateSource,
1538 EvaluateMaximumEigenvalueAfterTimeStep,
1540 "device(s) " + deviceString + ", stateless, task-graph, AoS, managed",
1541 launchingThreads,
1542 device,
1543 localPatches
1544 );
1545 assessKernel(
1547 FVRusanovSolver,
1548 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1549 HaloSize,
1550 FVRusanovSolver::NumberOfUnknowns,
1551 FVRusanovSolver::NumberOfAuxiliaryVariables,
1552 EvaluateFlux,
1553 EvaluateNonconservativeProduct,
1554 EvaluateSource,
1555 EvaluateMaximumEigenvalueAfterTimeStep,
1557 "device(s) " + deviceString + ", stateless, task-graph, SoA, managed",
1558 launchingThreads,
1559 device,
1560 localPatches
1561 );
1562 assessKernel(
1564 FVRusanovSolver,
1565 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1566 HaloSize,
1567 FVRusanovSolver::NumberOfUnknowns,
1568 FVRusanovSolver::NumberOfAuxiliaryVariables,
1569 EvaluateFlux,
1570 EvaluateNonconservativeProduct,
1571 EvaluateSource,
1572 EvaluateMaximumEigenvalueAfterTimeStep,
1574 "device(s) " + deviceString + ", stateless, task-graph, AoSoA, managed",
1575 launchingThreads,
1576 device,
1577 localPatches
1578 );
1579#endif
1580
1581#if defined(GPUOffloadingOMP)
1582 assessKernel(
1584 FVRusanovSolver,
1585 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1586 HaloSize,
1587 FVRusanovSolver::NumberOfUnknowns,
1588 FVRusanovSolver::NumberOfAuxiliaryVariables,
1589 EvaluateFlux,
1590 EvaluateNonconservativeProduct,
1591 EvaluateSource,
1592 EvaluateMaximumEigenvalueAfterTimeStep,
1594 "device(s) " + deviceString + ", stateless, batched, AoS, copy",
1595 launchingThreads,
1596 device,
1597 localPatches
1598 );
1599 assessKernel(
1601 FVRusanovSolver,
1602 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1603 HaloSize,
1604 FVRusanovSolver::NumberOfUnknowns,
1605 FVRusanovSolver::NumberOfAuxiliaryVariables,
1606 EvaluateFlux,
1607 EvaluateNonconservativeProduct,
1608 EvaluateSource,
1609 EvaluateMaximumEigenvalueAfterTimeStep,
1611 "device(s) " + deviceString + ", stateless, batched, SoA, copy",
1612 launchingThreads,
1613 device,
1614 localPatches
1615 );
1616 assessKernel(
1618 FVRusanovSolver,
1619 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1620 HaloSize,
1621 FVRusanovSolver::NumberOfUnknowns,
1622 FVRusanovSolver::NumberOfAuxiliaryVariables,
1623 EvaluateFlux,
1624 EvaluateNonconservativeProduct,
1625 EvaluateSource,
1626 EvaluateMaximumEigenvalueAfterTimeStep,
1628 "device(s) " + deviceString + ", stateless, batched, AoSoA, copy",
1629 launchingThreads,
1630 device,
1631 localPatches
1632 );
1633 assessKernel(
1635 FVRusanovSolver,
1636 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1637 HaloSize,
1638 FVRusanovSolver::NumberOfUnknowns,
1639 FVRusanovSolver::NumberOfAuxiliaryVariables,
1640 EvaluateFlux,
1641 EvaluateNonconservativeProduct,
1642 EvaluateSource,
1643 EvaluateMaximumEigenvalueAfterTimeStep,
1645 "device(s) " + deviceString + ", stateless, batched, AoS, usm",
1646 launchingThreads,
1647 device,
1648 localPatches
1649 );
1650 assessKernel(
1652 FVRusanovSolver,
1653 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1654 HaloSize,
1655 FVRusanovSolver::NumberOfUnknowns,
1656 FVRusanovSolver::NumberOfAuxiliaryVariables,
1657 EvaluateFlux,
1658 EvaluateNonconservativeProduct,
1659 EvaluateSource,
1660 EvaluateMaximumEigenvalueAfterTimeStep,
1662 "device(s) " + deviceString + ", stateless, batched, SoA, usm",
1663 launchingThreads,
1664 device,
1665 localPatches
1666 );
1667 assessKernel(
1669 FVRusanovSolver,
1670 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1671 HaloSize,
1672 FVRusanovSolver::NumberOfUnknowns,
1673 FVRusanovSolver::NumberOfAuxiliaryVariables,
1674 EvaluateFlux,
1675 EvaluateNonconservativeProduct,
1676 EvaluateSource,
1677 EvaluateMaximumEigenvalueAfterTimeStep,
1679 "device(s) " + deviceString + ", stateless, batched, AoSoA, usm",
1680 launchingThreads,
1681 device,
1682 localPatches
1683 );
1684
1685 assessKernel(
1687 FVRusanovSolver,
1688 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1689 HaloSize,
1690 FVRusanovSolver::NumberOfUnknowns,
1691 FVRusanovSolver::NumberOfAuxiliaryVariables,
1692 EvaluateFlux,
1693 EvaluateNonconservativeProduct,
1694 EvaluateSource,
1695 EvaluateMaximumEigenvalueAfterTimeStep,
1697 "device(s) " + deviceString + ", stateless, patch-wise, AoS, copy",
1698 launchingThreads,
1699 device,
1700 localPatches
1701 );
1702 assessKernel(
1704 FVRusanovSolver,
1705 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1706 HaloSize,
1707 FVRusanovSolver::NumberOfUnknowns,
1708 FVRusanovSolver::NumberOfAuxiliaryVariables,
1709 EvaluateFlux,
1710 EvaluateNonconservativeProduct,
1711 EvaluateSource,
1712 EvaluateMaximumEigenvalueAfterTimeStep,
1714 "device(s) " + deviceString + ", stateless, patch-wise, SoA, copy",
1715 launchingThreads,
1716 device,
1717 localPatches
1718 );
1719 assessKernel(
1721 FVRusanovSolver,
1722 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1723 HaloSize,
1724 FVRusanovSolver::NumberOfUnknowns,
1725 FVRusanovSolver::NumberOfAuxiliaryVariables,
1726 EvaluateFlux,
1727 EvaluateNonconservativeProduct,
1728 EvaluateSource,
1729 EvaluateMaximumEigenvalueAfterTimeStep,
1731 "device(s) " + deviceString + ", stateless, patch-wise, AoSoA, copy",
1732 launchingThreads,
1733 device,
1734 localPatches
1735 );
1736 assessKernel(
1738 FVRusanovSolver,
1739 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1740 HaloSize,
1741 FVRusanovSolver::NumberOfUnknowns,
1742 FVRusanovSolver::NumberOfAuxiliaryVariables,
1743 EvaluateFlux,
1744 EvaluateNonconservativeProduct,
1745 EvaluateSource,
1746 EvaluateMaximumEigenvalueAfterTimeStep,
1748 "device(s) " + deviceString + ", stateless, patch-wise, AoS, usm",
1749 launchingThreads,
1750 device,
1751 localPatches
1752 );
1753 assessKernel(
1755 FVRusanovSolver,
1756 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1757 HaloSize,
1758 FVRusanovSolver::NumberOfUnknowns,
1759 FVRusanovSolver::NumberOfAuxiliaryVariables,
1760 EvaluateFlux,
1761 EvaluateNonconservativeProduct,
1762 EvaluateSource,
1763 EvaluateMaximumEigenvalueAfterTimeStep,
1765 "device(s) " + deviceString + ", stateless, patch-wise, SoA, usm",
1766 launchingThreads,
1767 device,
1768 localPatches
1769 );
1770 assessKernel(
1772 FVRusanovSolver,
1773 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1774 HaloSize,
1775 FVRusanovSolver::NumberOfUnknowns,
1776 FVRusanovSolver::NumberOfAuxiliaryVariables,
1777 EvaluateFlux,
1778 EvaluateNonconservativeProduct,
1779 EvaluateSource,
1780 EvaluateMaximumEigenvalueAfterTimeStep,
1782 "device(s) " + deviceString + ", stateless, patch-wise, AoSoA, usm",
1783 launchingThreads,
1784 device,
1785 localPatches
1786 );
1787
1788 assessKernel(
1790 FVRusanovSolver,
1791 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1792 HaloSize,
1793 FVRusanovSolver::NumberOfUnknowns,
1794 FVRusanovSolver::NumberOfAuxiliaryVariables,
1795 EvaluateFlux,
1796 EvaluateNonconservativeProduct,
1797 EvaluateSource,
1798 EvaluateMaximumEigenvalueAfterTimeStep,
1800 "device(s) " + deviceString + ", stateless, volume-wise, AoS, copy",
1801 launchingThreads,
1802 device,
1803 localPatches
1804 );
1805 assessKernel(
1807 FVRusanovSolver,
1808 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1809 HaloSize,
1810 FVRusanovSolver::NumberOfUnknowns,
1811 FVRusanovSolver::NumberOfAuxiliaryVariables,
1812 EvaluateFlux,
1813 EvaluateNonconservativeProduct,
1814 EvaluateSource,
1815 EvaluateMaximumEigenvalueAfterTimeStep,
1817 "device(s) " + deviceString + ", stateless, volume-wise, SoA, copy",
1818 launchingThreads,
1819 device,
1820 localPatches
1821 );
1822 assessKernel(
1824 FVRusanovSolver,
1825 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1826 HaloSize,
1827 FVRusanovSolver::NumberOfUnknowns,
1828 FVRusanovSolver::NumberOfAuxiliaryVariables,
1829 EvaluateFlux,
1830 EvaluateNonconservativeProduct,
1831 EvaluateSource,
1832 EvaluateMaximumEigenvalueAfterTimeStep,
1834 "device(s) " + deviceString + ", stateless, volume-wise, AoSoA, copy",
1835 launchingThreads,
1836 device,
1837 localPatches
1838 );
1839#endif
1840
1841#if defined(GPUOffloadingCPP)
1842 assessKernel(
1844 FVRusanovSolver,
1845 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1846 HaloSize,
1847 FVRusanovSolver::NumberOfUnknowns,
1848 FVRusanovSolver::NumberOfAuxiliaryVariables,
1849 EvaluateFlux,
1850 EvaluateNonconservativeProduct,
1851 EvaluateSource,
1852 EvaluateMaximumEigenvalueAfterTimeStep,
1854 "device(s) " + deviceString + ", stateless, batched, AoS, usm",
1855 launchingThreads,
1856 device,
1857 localPatches
1858 );
1859 assessKernel(
1861 FVRusanovSolver,
1862 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1863 HaloSize,
1864 FVRusanovSolver::NumberOfUnknowns,
1865 FVRusanovSolver::NumberOfAuxiliaryVariables,
1866 EvaluateFlux,
1867 EvaluateNonconservativeProduct,
1868 EvaluateSource,
1869 EvaluateMaximumEigenvalueAfterTimeStep,
1871 "device(s) " + deviceString + ", stateless, batched, SoA, usm",
1872 launchingThreads,
1873 device,
1874 localPatches
1875 );
1876 assessKernel(
1878 FVRusanovSolver,
1879 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1880 HaloSize,
1881 FVRusanovSolver::NumberOfUnknowns,
1882 FVRusanovSolver::NumberOfAuxiliaryVariables,
1883 EvaluateFlux,
1884 EvaluateNonconservativeProduct,
1885 EvaluateSource,
1886 EvaluateMaximumEigenvalueAfterTimeStep,
1888 "device(s) " + deviceString + ", stateless, batched, AoSoA, usm",
1889 launchingThreads,
1890 device,
1891 localPatches
1892 );
1893
1894 assessKernel(
1896 FVRusanovSolver,
1897 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1898 HaloSize,
1899 FVRusanovSolver::NumberOfUnknowns,
1900 FVRusanovSolver::NumberOfAuxiliaryVariables,
1901 EvaluateFlux,
1902 EvaluateNonconservativeProduct,
1903 EvaluateSource,
1904 EvaluateMaximumEigenvalueAfterTimeStep,
1906 "device(s) " + deviceString + ", stateless, patch-wise, AoS, usm",
1907 launchingThreads,
1908 device,
1909 localPatches
1910 );
1911 assessKernel(
1913 FVRusanovSolver,
1914 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1915 HaloSize,
1916 FVRusanovSolver::NumberOfUnknowns,
1917 FVRusanovSolver::NumberOfAuxiliaryVariables,
1918 EvaluateFlux,
1919 EvaluateNonconservativeProduct,
1920 EvaluateSource,
1921 EvaluateMaximumEigenvalueAfterTimeStep,
1923 "device(s) " + deviceString + ", stateless, patch-wise, SoA, usm",
1924 launchingThreads,
1925 device,
1926 localPatches
1927 );
1928 assessKernel(
1930 FVRusanovSolver,
1931 FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch,
1932 HaloSize,
1933 FVRusanovSolver::NumberOfUnknowns,
1934 FVRusanovSolver::NumberOfAuxiliaryVariables,
1935 EvaluateFlux,
1936 EvaluateNonconservativeProduct,
1937 EvaluateSource,
1938 EvaluateMaximumEigenvalueAfterTimeStep,
1940 "device(s) " + deviceString + ", stateless, patch-wise, AoSoA, usm",
1941 launchingThreads,
1942 device,
1943 localPatches
1944 );
1945#endif
1946 }
1947}
1948
1949int main(int argc, char** argv) {
1951 // Do this early, so people can use logInfo properly.
1952 repositories::initLogFilters();
1956 repositories::initSharedMemoryAndGPUEnvironment();
1959
1960 if (EnableFPE) {
1961 feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
1962 }
1963
1965 logInfo("main()", "Dimensions: " << Dimensions);
1966 logInfo("main()", "Number of threads launching compute kernels: " << NumberOfLaunchingThreads);
1967 logInfo("main()", "Number of patches per thread/compute kernel launch to study: " << toString(NumberOfPatchesToStudy));
1968 logInfo("main()", "Number of compute threads: " << ::tarch::multicore::Core::getInstance().getNumberOfThreads());
1969 logInfo("main()", "Number of MPI ranks: " << ::tarch::mpi::Rank::getInstance().getNumberOfRanks());
1970 logInfo("main()", "Number of GPU devices: " << ::tarch::accelerator::Device::getInstance().getNumberOfDevices());
1971 logInfo("main()", "Number of finite volumes per axis per patch: " << FVRusanovSolver::NumberOfFiniteVolumesPerAxisPerPatch);
1972 logInfo("main()", "Number of samples per measurement: " << NumberOfSamples);
1973 logInfo("main()", "Evaluate max. eigenvalue (reduction step): " << std::boolalpha << EvaluateMaximumEigenvalueAfterTimeStep);
1974 if constexpr (EnableFPE) {
1975 logInfo("main()", "Floating-point exception handler enabled");
1976 }
1977 if constexpr (Accuracy > 0.0) {
1978 logInfo("main()", "Performing accuracy checks with precision: " << Accuracy);
1979 }
1980#if defined(GPUOffloadingSYCL)
1981 logInfo("main()", "Set SYCL_DEVICE_FILTER=gpu when using SYCL on the device");
1982 logInfo("main()", "Set SYCL_PI_TRACE=2 in case of runtime errors");
1983#endif
1984 }
1985
1986 for (int n = 0; n < NumberOfPatchesToStudy.size(); n++) {
1987 std::stringstream msg;
1988 msg << "kernel-benchmarks-" << NumberOfPatchesToStudy[n] << "-patches-" << ::tarch::multicore::Core::getInstance().getNumberOfThreads() << "-threads";
1989 OTTER_PHASE_SWITCH(msg.str().c_str());
1990 runBenchmarks(NumberOfPatchesToStudy[n], NumberOfLaunchingThreads);
1991 }
1992
1993 if constexpr (Accuracy > 0.0) {
1994 logInfo("main()", "all kernels yield the same outcome up to a accuracy of " << Accuracy << " unless reported otherwise");
1995 } else {
1996 logInfo("main()", "no accuracy checks were performed");
1997 }
1998
2000 delete[] validOutcome;
2004 return EXIT_SUCCESS;
2005}
#define assertionEquals(lhs, rhs)
#define assertion(expr)
And from this we can write down f$ nabla phi_i nabla phi_i dx but since we are constructing matrix let s investigate the f$ j
void wrapDeviceKernel(int device, ::exahype2::CellData &patchData)
int main(int argc, char **argv)
Main routine of the SPH code.
void wrapStatelessHostKernel(int device, ::exahype2::CellData &patchData)
Wrapper around stateless kernel invocations.
static constexpr int NumberOfInputEntries
std::tuple< double, int > validateOutcome(double *Q, int patchIndex, const double maxEigenvalue)
Validate data against pre-stored simulation outcome.
void wrapBatchedHeapFunctorHostKernels(int device, ::exahype2::CellData &patchData)
Another wrapper.
::tarch::timing::Measurement timingComputeKernel
void runBenchmarks(int numberOfPatches, int launchingThreads)
Run the benchmark for one particular number of patches.
void initInputData(double *Q)
Set input data.
static constexpr double CellOffset
void storeOutcome(const double *Q, const double maxEigenvalue)
Store outcome of one compute kernel.
void wrapPatchwiseHeapFunctorsHostKernel(int device, ::exahype2::CellData &patchData)
This is a wrapper around the kernel call with the functors.
::tarch::logging::Log _log("::")
static constexpr int NumberOfOutputEntries
static constexpr double TimeStamp
void reportRuntime(const std::string &kernelIdentificator, const ::tarch::timing::Measurement &timingKernelLaunch, int patches)
void wrapVolumewiseFunctorHostKernels(int device, ::exahype2::CellData &patchData)
Another wrapper.
static constexpr int HaloSize
static constexpr int NumberOfFiniteVolumesPerPatch
static constexpr double TimeStepSize
static constexpr double CellSize
::tarch::multicore::BooleanSemaphore validateOutcomeSemaphore
#define logError(methodName, logMacroMessageStream)
Wrapper macro around tarch::tarch::logging::Log to improve logging.
Definition Log.h:464
#define logInfo(methodName, logMacroMessageStream)
Wrapper macro around tarch::tarch::logging::Log to improve logging.
Definition Log.h:411
#define parallelFor(counter, max)
Definition Loop.h:441
static Device & getInstance()
static constexpr int HostDevice
Accelerator devices (GPUs) are enumerated starting from 0.
Definition Device.h:48
int getLocalDeviceId() const
Definition Device.cpp:34
Log Device.
Definition Log.h:516
static int getGlobalMasterRank()
Get the global master.
Definition Rank.cpp:414
int getNumberOfRanks() const
Definition Rank.cpp:551
static Rank & getInstance()
This operation returns the singleton instance.
Definition Rank.cpp:538
int getRank() const
Return rank of this node.
Definition Rank.cpp:528
static Core & getInstance()
Definition Core.cpp:55
int getNumberOfThreads() const
Returns the number of threads that is used.
Definition Core.cpp:66
Create a lock around a boolean semaphore region.
Definition Lock.h:19
std::string toString() const
void setValue(const double &value)
Set the value.
A simple class that has to be included to measure the clock ticks required for an operation.
Definition Watch.h:45
double getCalendarTime()
This method returns the elapsed calendar time between the start and stop command of the timer,...
Definition Watch.cpp:74
void stop()
Stop timer.
Definition Watch.cpp:55
std::string toString(Filter filter)
Definition convert.cpp:170
#define endParallelFor
Definition Loop.h:63
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseUSMStateless(int targetDevice, CellData &patchData, tarch::timing::Measurement &measurement) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedUSMStateless(int targetDevice, CellData &patchData, tarch::timing::Measurement &measurement) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovVolumewiseStateless(int targetDevice, CellData &patchData, tarch::timing::Measurement &measurement) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedUSMStateless(int targetDevice, CellData &patchData, tarch::timing::Measurement &measurement) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseUSMStateless(int targetDevice, CellData &patchData, tarch::timing::Measurement &measurement) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseHeapStateless(int targetDevice, CellData &patchData, tarch::timing::Measurement &measurement) InlineMethod
The name patchwise is a little bit irritating in a GPU context.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedHeapStateless(int targetDevice, CellData &patchData, tarch::timing::Measurement &measurement) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovTaskgraphUSMStateless(int targetDevice, CellData &patchData) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseManagedStateless(int targetDevice, CellData &patchData) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseHeapStateless(int targetDevice, CellData &patchData) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedUSMStateless(int targetDevice, CellData &patchData) InlineMethod
This version allocates the temporary data on the GPU via a device malloc, handing in the standard mem...
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseUSMStateless(int targetDevice, CellData &patchData) InlineMethod
1:1 translation of the numerical scheme.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedManagedStateless(int targetDevice, CellData &patchData) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovTaskgraphManagedStateless(int targetDevice, CellData &patchData) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovTaskgraphCopyStateless(int targetDevice, CellData &patchData) InlineMethod
All the memory management is discussed in the documentation of GPUCellData.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedHeapStateless(int targetDevice, CellData &patchData) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedHeapFunctors(CellData &patchData, const FluxFunctor &fluxFunctor, const NonconservativeProductFunctor &nonconservativeProductFunctor, const SourceFunctor &sourceFunctor, const MaxEigenvalueFunctor &maxEigenvalueFunctor, tarch::timing::Measurement &measurement, peano4::utils::LoopPlacement loopParallelism=peano4::utils::LoopPlacement::Serial) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedHeapStateless(::exahype2::CellData &patchData, tarch::timing::Measurement &measurement, peano4::utils::LoopPlacement loopParallelism=peano4::utils::LoopPlacement::Serial) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovVolumewiseStateless(::exahype2::CellData &patchData, tarch::timing::Measurement &measurement, peano4::utils::LoopPlacement loopParallelism=peano4::utils::LoopPlacement::Serial) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedInsituStateless(CellData &patchData, tarch::timing::Measurement &measurement, peano4::utils::LoopPlacement loopParallelism=peano4::utils::LoopPlacement::Serial) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseHeapStateless(CellData &patchData, tarch::timing::Measurement &measurement, peano4::utils::LoopPlacement loopParallelism=peano4::utils::LoopPlacement::Serial) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseInsituStateless(CellData &patchData, tarch::timing::Measurement &measurement, peano4::utils::LoopPlacement loopParallelism=peano4::utils::LoopPlacement::Serial) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovVolumewiseFunctors(CellData &patchData, const FluxFunctor &fluxFunctor, const NonconservativeProductFunctor &nonconservativeProductFunctor, const SourceFunctor &sourceFunctor, const MaxEigenvalueFunctor &maxEigenvalueFunctor, tarch::timing::Measurement &measurement, peano4::utils::LoopPlacement loopParallelism=peano4::utils::LoopPlacement::Serial) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseHeapFunctors(CellData &patchData, const FluxFunctor &fluxFunctor, const NonconservativeProductFunctor &nonconservativeProductFunctor, const SourceFunctor &sourceFunctor, const MaxEigenvalueFunctor &maxEigenvalueFunctor, tarch::timing::Measurement &measurement, peano4::utils::LoopPlacement loopParallelism=peano4::utils::LoopPlacement::Serial) InlineMethod
Apply the Rusanov Riemann solver over a set of patches.
void fillLookupTables()
Fill Lookup Tables.
Definition peano.cpp:92
void runTestsAndBenchmarks()
Runs GPU Offloading tests, GPU Bandwidth benchmarks and prints the benchmark results.
Definition peano.cpp:163
int initParallelEnvironment(int *argc, char ***argv)
Init Parallel Environment.
Definition peano.cpp:106
void shutdownParallelEnvironment()
Shutdown all the parallel environment, i.e.
Definition peano.cpp:132
constexpr double PI
Definition Scalar.h:12
bool equals(const Matrix< Rows, Cols, Scalar > &lhs, const Matrix< Rows, Cols, Scalar > &rhs, const Scalar &tolerance=NUMERICAL_ZERO_DIFFERENCE)
Compares to matrices on equality by means of a numerical accuracy.
void shutdownSmartMPI()
Definition multicore.cpp:27
void initSmartMPI()
Switch on SmartMPI.
Definition multicore.cpp:11
void shutdownNonCriticalAssertionEnvironment()
peano4::shutdownParallelEnvironment().
void freeMemory(void *data, MemoryLocation location)
void initNonCriticalAssertionEnvironment()
Register the assertion tag from the global communicator.
@ ManagedSharedAcceleratorDeviceMemory
To be used on host only.
T * allocateMemory(int size, MemoryLocation location, int device=-1)
Definition accelerator.h:82
#define OTTER_FINALISE()
Definition otter.h:131
#define OTTER_INITIALISE()
Definition otter.h:130
#define OTTER_PHASE_SWITCH(...)
Definition otter.h:145
Representation of a number of cells which contains all information that's required to process the sto...
Definition CellData.h:79
double * maxEigenvalue
Out values.
Definition CellData.h:111
double ** QOut
Out values.
Definition CellData.h:106
double ** QIn
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
Definition CellData.h:84
tarch::la::Vector< Dimensions, double > * cellSize
Definition CellData.h:86
tarch::la::Vector< Dimensions, double > * cellCentre
Definition CellData.h:85
void setX(const tarch::la::Vector< Dimensions, double > &value)
Simple vector class.
Definition Vector.h:134