Peano
Loading...
Searching...
No Matches
Acoustic-main.cpp
Go to the documentation of this file.
1// This file is part of the ExaHyPE2 project. For conditions of distribution and
2// use, please see the copyright notice at www.peano-framework.org
3#include "Acoustic-main.h"
4
5#include <fenv.h>
6#pragma float_control(precise, on)
7#pragma STDC FENV_ACCESS ON
8
9#include "config.h"
10#include "Constants.h"
12#include "observers/CreateGrid.h"
13#include "observers/CreateGridAndConvergeLoadBalancing.h"
14#include "observers/CreateGridButPostponeRefinement.h"
15#include "observers/InitGrid.h"
16#include "observers/PlotSolution.h"
17#include "observers/TimeStep.h"
18#include "peano4/peano.h"
19#include "repositories/DataRepository.h"
20#include "repositories/SolverRepository.h"
21#include "repositories/StepRepository.h"
25#include "tarch/logging/Log.h"
32#include "tarch/timing/Watch.h"
33#include "tasks/AcousticEnclaveTask.h"
35
38double validMaxEigenvalue = 0.0;
39double* validOutcome = nullptr;
40static constexpr double TimeStamp = 0.5;
41static constexpr double TimeStepSize = 1e-6;
42static constexpr double CellSize = 0.1;
43static constexpr double CellOffset = 4.0;
44static constexpr int HaloSize = 1;
47static_assert(
48 Accuracy >= std::numeric_limits<double>::epsilon() || Accuracy == 0.0
49);
50
51#if Dimensions == 2
52static constexpr int NumberOfInputEntries
53 = (Acoustic::NumberOfFiniteVolumesPerAxisPerPatch + 2 * HaloSize)
54 * (Acoustic::NumberOfFiniteVolumesPerAxisPerPatch + 2 * HaloSize)
55 * (Acoustic::NumberOfUnknowns + Acoustic::NumberOfAuxiliaryVariables);
56static constexpr int NumberOfOutputEntries
57 = (Acoustic::NumberOfFiniteVolumesPerAxisPerPatch + 0)
58 * (Acoustic::NumberOfFiniteVolumesPerAxisPerPatch + 0)
59 * (Acoustic::NumberOfUnknowns + Acoustic::NumberOfAuxiliaryVariables);
60static constexpr int NumberOfFiniteVolumesPerPatch
61 = Acoustic::NumberOfFiniteVolumesPerAxisPerPatch
62 * Acoustic::NumberOfFiniteVolumesPerAxisPerPatch;
63#else
64static constexpr int NumberOfInputEntries
65 = (Acoustic::NumberOfFiniteVolumesPerAxisPerPatch + 2 * HaloSize)
66 * (Acoustic::NumberOfFiniteVolumesPerAxisPerPatch + 2 * HaloSize)
67 * (Acoustic::NumberOfFiniteVolumesPerAxisPerPatch + 2 * HaloSize)
68 * (Acoustic::NumberOfUnknowns + Acoustic::NumberOfAuxiliaryVariables);
69static constexpr int NumberOfOutputEntries
70 = (Acoustic::NumberOfFiniteVolumesPerAxisPerPatch + 0)
71 * (Acoustic::NumberOfFiniteVolumesPerAxisPerPatch + 0)
72 * (Acoustic::NumberOfFiniteVolumesPerAxisPerPatch + 0)
73 * (Acoustic::NumberOfUnknowns + Acoustic::NumberOfAuxiliaryVariables);
75 = Acoustic::NumberOfFiniteVolumesPerAxisPerPatch
76 * Acoustic::NumberOfFiniteVolumesPerAxisPerPatch
77 * Acoustic::NumberOfFiniteVolumesPerAxisPerPatch;
78#endif
79
80
89void initInputData(double* Q) {
90 for (int i = 0; i < NumberOfInputEntries; i++) {
91 Q[i] = std::sin(1.0 * i / (NumberOfInputEntries) * ::tarch::la::PI);
92 }
93}
94
95
103void storeOutcome(const double* Q, const double maxEigenvalue) {
105 if (validOutcome == nullptr) {
106 validOutcome = new double[NumberOfOutputEntries]{0.0};
107 std::memcpy(validOutcome, Q, sizeof(double) * NumberOfOutputEntries);
109 logInfo("storeOutcome()", "Bookmarked reference solution");
110 }
111}
112
113
121std::tuple<double, int> validateOutcome(
122 double* Q,
123 int patchIndex,
124 const double maxEigenvalue
125) {
127 int index = 0;
128 int errors = 0;
129 double maxDifference = 0.0;
130
131 for (int i = 0; i < NumberOfOutputEntries; i++) {
132 if (not ::tarch::la::equals(Q[i], validOutcome[i], Accuracy)) {
133 errors++;
134 std::cerr.precision(16);
135 logError(
136 "validateOutcome()",
137 std::fixed
138 << "Q[" << i << "]!=validOutcome[" << i << "]: " << Q[i]
139 << "!=" << validOutcome[i]
140 );
141 }
142 maxDifference = std::max(maxDifference, std::abs(Q[i] - validOutcome[i]));
143 index++;
144 }
145
147 std::cerr.precision(16);
148 logError(
149 "validateOutcome()",
150 std::fixed
151 << " maxEigenvalue[" << patchIndex << "]!=validMaxEigenvalue["
152 << patchIndex << "]: " << maxEigenvalue << "!=" << validMaxEigenvalue
153 );
154 errors++;
155 }
156
157 return {maxDifference, errors};
158}
159
160
162 const std::string& kernelIdentificator,
163 const ::tarch::timing::Measurement& timingKernelLaunch,
164 int patches
165) {
166 std::stringstream ss;
167 ss << "\n";
168 ss << kernelIdentificator << ":\n\t";
169
170 ss << timingComputeKernel.getValue() << " |\n\t";
171 ss
173 )
174 << " |\n\t";
175 ss << timingComputeKernel.toString() << " |\n\t";
176
177 ss << timingKernelLaunch.getValue() << " |\n\t";
178 ss
179 << (timingKernelLaunch.getValue() / patches / NumberOfFiniteVolumesPerPatch
180 );
181 ss << " |\n\t" << timingKernelLaunch.toString();
182
183 logInfo("reportRuntime()", ss.str());
184}
185
186
201template <
202 class TempDataEnumerator,
203 ::peano4::utils::LoopPlacement loopParallelism>
205 int device,
207) {
209
211 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
212 HaloSize,
213 Acoustic::NumberOfUnknowns,
214 Acoustic::NumberOfAuxiliaryVariables,
215 EvaluateFlux,
216 EvaluateNonconservativeProduct,
217 EvaluateSource,
218 EvaluateMaximumEigenvalueAfterTimeStep,
219 TempDataEnumerator>(
220 patchData,
221 [&](
222 const double* __restrict__ Q,
223 const ::tarch::la::Vector<Dimensions, double>& x,
224 const ::tarch::la::Vector<Dimensions, double>& h,
225 double t,
226 double dt,
227 int normal,
228 double* __restrict__ F
229 ) -> void {
230 if constexpr (EvaluateFlux) {
231 repositories::instanceOfAcoustic.flux(Q, x, h, t, dt, normal, F);
232 }
233 },
234 [&](
235 const double* __restrict__ Q,
236 const double* __restrict__ deltaQ,
237 const ::tarch::la::Vector<Dimensions, double>& x,
238 const ::tarch::la::Vector<Dimensions, double>& h,
239 double t,
240 double dt,
241 int normal,
242 double* __restrict__ BTimesDeltaQ
243 ) -> void {
244 if constexpr (EvaluateNonconservativeProduct) {
245 repositories::instanceOfAcoustic
246 .nonconservativeProduct(Q, deltaQ, x, h, t, dt, normal, BTimesDeltaQ);
247 }
248 },
249 [&](
250 const double* __restrict__ Q,
251 const ::tarch::la::Vector<Dimensions, double>& x,
252 const ::tarch::la::Vector<Dimensions, double>& h,
253 double t,
254 double dt,
255 double* __restrict__ S
256 ) -> void {
257 if constexpr (EvaluateSource) {
258 repositories::instanceOfAcoustic.sourceTerm(Q, x, h, t, dt, S);
259 }
260 },
261 [&](
262 const double* __restrict__ Q,
263 const ::tarch::la::Vector<Dimensions, double>& x,
264 const ::tarch::la::Vector<Dimensions, double>& h,
265 double t,
266 double dt,
267 int normal
268 ) -> double {
269 return repositories::instanceOfAcoustic.maxEigenvalue(Q, x, h, t, dt, normal);
270 },
272 loopParallelism
273 );
274}
275
276
282template <
283 class TempDataEnumerator,
284 ::peano4::utils::LoopPlacement loopParallelism>
286 int device,
288) {
290
292 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
293 HaloSize,
294 Acoustic::NumberOfUnknowns,
295 Acoustic::NumberOfAuxiliaryVariables,
296 EvaluateFlux,
297 EvaluateNonconservativeProduct,
298 EvaluateSource,
299 EvaluateMaximumEigenvalueAfterTimeStep,
300 TempDataEnumerator>(
301 patchData,
302 [&](
303 const double* __restrict__ Q,
304 const ::tarch::la::Vector<Dimensions, double>& x,
305 const ::tarch::la::Vector<Dimensions, double>& h,
306 double t,
307 double dt,
308 int normal,
309 double* __restrict__ F
310 ) -> void {
311 if constexpr (EvaluateFlux) {
312 repositories::instanceOfAcoustic.flux(Q, x, h, t, dt, normal, F);
313 }
314 },
315 [&](
316 const double* __restrict__ Q,
317 const double* __restrict__ deltaQ,
318 const ::tarch::la::Vector<Dimensions, double>& x,
319 const ::tarch::la::Vector<Dimensions, double>& h,
320 double t,
321 double dt,
322 int normal,
323 double* __restrict__ BTimesDeltaQ
324 ) -> void {
325 if constexpr (EvaluateNonconservativeProduct) {
326 repositories::instanceOfAcoustic
327 .nonconservativeProduct(Q, deltaQ, x, h, t, dt, normal, BTimesDeltaQ);
328 }
329 },
330 [&](
331 const double* __restrict__ Q,
332 const ::tarch::la::Vector<Dimensions, double>& x,
333 const ::tarch::la::Vector<Dimensions, double>& h,
334 double t,
335 double dt,
336 double* __restrict__ S
337 ) -> void {
338 if constexpr (EvaluateSource) {
339 repositories::instanceOfAcoustic.sourceTerm(Q, x, h, t, dt, S);
340 }
341 },
342 [&](
343 const double* __restrict__ Q,
344 const ::tarch::la::Vector<Dimensions, double>& x,
345 const ::tarch::la::Vector<Dimensions, double>& h,
346 double t,
347 double dt,
348 int normal
349 ) -> double {
350 return repositories::instanceOfAcoustic.maxEigenvalue(Q, x, h, t, dt, normal);
351 },
353 loopParallelism
354 );
355}
356
357
363template <
364 class TempDataEnumerator,
365 ::peano4::utils::LoopPlacement loopParallelism>
367 int device,
369) {
371
373 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
374 HaloSize,
375 Acoustic::NumberOfUnknowns,
376 Acoustic::NumberOfAuxiliaryVariables,
377 EvaluateFlux,
378 EvaluateNonconservativeProduct,
379 EvaluateSource,
380 EvaluateMaximumEigenvalueAfterTimeStep,
381 TempDataEnumerator>(
382 patchData,
383 [&](
384 const double* __restrict__ Q,
385 const ::tarch::la::Vector<Dimensions, double>& x,
386 const ::tarch::la::Vector<Dimensions, double>& h,
387 double t,
388 double dt,
389 int normal,
390 double* __restrict__ F
391 ) -> void {
392 if constexpr (EvaluateFlux) {
393 repositories::instanceOfAcoustic.flux(Q, x, h, t, dt, normal, F);
394 }
395 },
396 [&](
397 const double* __restrict__ Q,
398 const double* __restrict__ deltaQ,
399 const ::tarch::la::Vector<Dimensions, double>& x,
400 const ::tarch::la::Vector<Dimensions, double>& h,
401 double t,
402 double dt,
403 int normal,
404 double* __restrict__ BTimesDeltaQ
405 ) -> void {
406 if constexpr (EvaluateNonconservativeProduct) {
407 repositories::instanceOfAcoustic
408 .nonconservativeProduct(Q, deltaQ, x, h, t, dt, normal, BTimesDeltaQ);
409 }
410 },
411 [&](
412 const double* __restrict__ Q,
413 const ::tarch::la::Vector<Dimensions, double>& x,
414 const ::tarch::la::Vector<Dimensions, double>& h,
415 double t,
416 double dt,
417 double* __restrict__ S
418 ) -> void {
419 if constexpr (EvaluateSource) {
420 repositories::instanceOfAcoustic.sourceTerm(Q, x, h, t, dt, S);
421 }
422 },
423 [&](
424 const double* __restrict__ Q,
425 const ::tarch::la::Vector<Dimensions, double>& x,
426 const ::tarch::la::Vector<Dimensions, double>& h,
427 double t,
428 double dt,
429 int normal
430 ) -> double {
431 return repositories::instanceOfAcoustic.maxEigenvalue(Q, x, h, t, dt, normal);
432 },
434 loopParallelism
435 );
436}
437
438
444template <
445 void (*Function)(
449 ),
450 ::peano4::utils::LoopPlacement loopParallelism>
453 Function(patchData, timingComputeKernel, loopParallelism);
454}
455
456
457template <
461 Function(device, patchData, timingComputeKernel);
462}
463
464
482void runBenchmarks(int numberOfPatches, int launchingThreads) {
484 event.setX(CellOffset);
485 event.setH(CellSize);
487
488 auto assessKernel =
489 [&](
490 std::function<void(int device, ::exahype2::CellData<double, double>& patchData)> kernel,
491 const std::string& markerName,
492 int launchingThreads,
493 int device,
494 int patches
495 ) -> void {
497 ::tarch::timing::Measurement timingKernelLaunch;
498
499 // TODO: Does the number of samples change the outcome of the solution?
500 for (int j = 0; j < NumberOfSamples; j++) {
501 parallelFor(launchingThread, launchingThreads) {
502 ::exahype2::CellData<double, double> patchData(patches);
503 for (int i = 0; i < patches; i++) {
504 patchData.QIn[i] = ::tarch::allocateMemory<double>(
507 );
508 patchData.t[i] = TimeStamp;
509 patchData.dt[i] = TimeStepSize;
510 patchData.QOut[i] = ::tarch::allocateMemory<double>(
513 );
515 CellOffset + 0.5 * CellSize
516 );
519 );
520 patchData.maxEigenvalue[i] = 0.0;
521 initInputData(patchData.QIn[i]);
522 std::memset(
523 patchData.QOut[i],
524 0.0,
525 NumberOfOutputEntries * sizeof(double)
526 );
527 }
528
530 watchKernelLaunch("::runBenchmarks", "assessKernel(...)", false);
531 kernel(device, patchData);
532 watchKernelLaunch.stop();
533 timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
534
535 if constexpr (Accuracy > 0.0) {
536 int errors = 0;
537 double maxDifference = 0.0;
538 for (int i = 0; i < patches; i++) {
539 storeOutcome(patchData.QOut[i], patchData.maxEigenvalue[i]);
540 auto [maxDifferencePerPatch, errorsPerPatch] = validateOutcome(
541 patchData.QOut[i],
542 i,
543 patchData.maxEigenvalue[i]
544 );
545 errors += errorsPerPatch;
546 maxDifference = std::max(maxDifference, maxDifferencePerPatch);
547 }
548
549 if (errors > 0) {
550 logError(
551 "runBenchmarks()",
552 "Max difference of outcome from \""
553 << markerName << "\" for all patches is " << maxDifference
554 << " (admissible accuracy=" << Accuracy << ")"
555 << " for " << errors << " entries"
556 );
557 std::abort();
558 }
559 }
560
561 for (int i = 0; i < patches; i++) {
563 patchData.QIn[i],
565 );
567 patchData.QOut[i],
569 );
570 }
571 }
573 }
574
576 reportRuntime(markerName, timingKernelLaunch, patches);
577 }
578 };
579
580 // Kernel launches
581 const int device = 0;
582 const int rank = ::tarch::mpi::Rank::getInstance().getRank();
583 const int numberOfRanks = ::tarch::mpi::Rank::getInstance().getNumberOfRanks(
584 );
585 const int patchesPerProcess = numberOfPatches / numberOfRanks;
586 const int remainder = numberOfPatches % numberOfRanks;
587 const int startPatch = rank * patchesPerProcess;
588 const int endPatch = startPatch + patchesPerProcess
589 + (rank == numberOfRanks - 1 ? remainder : 0);
590 const int localPatches = endPatch - startPatch;
591
592 // Headers
593 std::stringstream ss;
594 ss << std::left;
595 ss << "\n";
596 ss << "Kernel ID:\n\t";
597 ss << "Compute Kernel Time |\n\t";
598 ss << "Compute Kernel Time (Normalised) |\n\t";
599 ss << "Compute Kernel String |\n\t";
600 ss << "Kernel Launch Time |\n\t";
601 ss << "Kernel Launch Time (Normalised) |\n\t";
602 ss << "Kernel Launch String";
603
605 logInfo("runBenchmarks()", "Number of patches per rank: " << localPatches);
606 logInfo("runBenchmarks()", ss.str());
607 }
608
609 std::string deviceString;
610#ifdef Parallel
611 std::vector<int> devices(numberOfRanks);
612 MPI_Gather(&device, 1, MPI_INT, devices.data(), 1, MPI_INT, 0, MPI_COMM_WORLD);
613 for (int i = 0; i < numberOfRanks; i++) {
614 deviceString += std::to_string(devices[i]);
615 if (i < numberOfRanks - 1) {
616 deviceString += ",";
617 }
618 }
619#else
620 deviceString = std::to_string(device);
621#endif
622
623 if constexpr (AssessHostKernels) {
628 "host, functors, batched, AoS, serial",
629 launchingThreads,
631 localPatches
632 );
637 "host, functors, batched, AoS, nested",
638 launchingThreads,
640 localPatches
641 );
646 "host, functors, batched, AoS, spread-out",
647 launchingThreads,
649 localPatches
650 );
651
656 "host, functors, patch-wise, AoS, serial",
657 launchingThreads,
659 localPatches
660 );
665 "host, functors, patch-wise, AoS, nested",
666 launchingThreads,
668 localPatches
669 );
674 "host, functors, patch-wise, AoS, spread-out",
675 launchingThreads,
677 localPatches
678 );
679
684 "host, functors, volume-wise, AoS, serial",
685 launchingThreads,
687 localPatches
688 );
693 "host, functors, volume-wise, AoS, nested",
694 launchingThreads,
696 localPatches
697 );
702 "host, functors, volume-wise, AoS, spread-out",
703 launchingThreads,
705 localPatches
706 );
707
711 Acoustic,
712 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
713 HaloSize,
714 Acoustic::NumberOfUnknowns,
715 Acoustic::NumberOfAuxiliaryVariables,
716 EvaluateFlux,
717 EvaluateNonconservativeProduct,
718 EvaluateSource,
719 EvaluateMaximumEigenvalueAfterTimeStep,
722 "host, stateless, batched, AoS, serial",
723 launchingThreads,
725 localPatches
726 );
730 Acoustic,
731 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
732 HaloSize,
733 Acoustic::NumberOfUnknowns,
734 Acoustic::NumberOfAuxiliaryVariables,
735 EvaluateFlux,
736 EvaluateNonconservativeProduct,
737 EvaluateSource,
738 EvaluateMaximumEigenvalueAfterTimeStep,
741 "host, stateless, batched, AoS, nested",
742 launchingThreads,
744 localPatches
745 );
749 Acoustic,
750 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
751 HaloSize,
752 Acoustic::NumberOfUnknowns,
753 Acoustic::NumberOfAuxiliaryVariables,
754 EvaluateFlux,
755 EvaluateNonconservativeProduct,
756 EvaluateSource,
757 EvaluateMaximumEigenvalueAfterTimeStep,
760 "host, stateless, batched, AoS, spread-out",
761 launchingThreads,
763 localPatches
764 );
768 Acoustic,
769 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
770 HaloSize,
771 Acoustic::NumberOfUnknowns,
772 Acoustic::NumberOfAuxiliaryVariables,
773 EvaluateFlux,
774 EvaluateNonconservativeProduct,
775 EvaluateSource,
776 EvaluateMaximumEigenvalueAfterTimeStep,
779 "host, stateless, batched, SoA, serial",
780 launchingThreads,
782 localPatches
783 );
787 Acoustic,
788 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
789 HaloSize,
790 Acoustic::NumberOfUnknowns,
791 Acoustic::NumberOfAuxiliaryVariables,
792 EvaluateFlux,
793 EvaluateNonconservativeProduct,
794 EvaluateSource,
795 EvaluateMaximumEigenvalueAfterTimeStep,
798 "host, stateless, batched, SoA, nested",
799 launchingThreads,
801 localPatches
802 );
806 Acoustic,
807 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
808 HaloSize,
809 Acoustic::NumberOfUnknowns,
810 Acoustic::NumberOfAuxiliaryVariables,
811 EvaluateFlux,
812 EvaluateNonconservativeProduct,
813 EvaluateSource,
814 EvaluateMaximumEigenvalueAfterTimeStep,
817 "host, stateless, batched, SoA, spread-out",
818 launchingThreads,
820 localPatches
821 );
825 Acoustic,
826 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
827 HaloSize,
828 Acoustic::NumberOfUnknowns,
829 Acoustic::NumberOfAuxiliaryVariables,
830 EvaluateFlux,
831 EvaluateNonconservativeProduct,
832 EvaluateSource,
833 EvaluateMaximumEigenvalueAfterTimeStep,
836 "host, stateless, batched, AoSoA, serial",
837 launchingThreads,
839 localPatches
840 );
844 Acoustic,
845 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
846 HaloSize,
847 Acoustic::NumberOfUnknowns,
848 Acoustic::NumberOfAuxiliaryVariables,
849 EvaluateFlux,
850 EvaluateNonconservativeProduct,
851 EvaluateSource,
852 EvaluateMaximumEigenvalueAfterTimeStep,
855 "host, stateless, batched, AoSoA, nested",
856 launchingThreads,
858 localPatches
859 );
863 Acoustic,
864 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
865 HaloSize,
866 Acoustic::NumberOfUnknowns,
867 Acoustic::NumberOfAuxiliaryVariables,
868 EvaluateFlux,
869 EvaluateNonconservativeProduct,
870 EvaluateSource,
871 EvaluateMaximumEigenvalueAfterTimeStep,
874 "host, stateless, batched, AoSoA, spread-out",
875 launchingThreads,
877 localPatches
878 );
879 if constexpr (not Accuracy > 0.0) {
883 Acoustic,
884 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
885 HaloSize,
886 Acoustic::NumberOfUnknowns,
887 Acoustic::NumberOfAuxiliaryVariables,
888 EvaluateFlux,
889 EvaluateNonconservativeProduct,
890 EvaluateSource,
891 EvaluateMaximumEigenvalueAfterTimeStep>,
893 "host, stateless, batched, insitu, serial",
894 launchingThreads,
896 localPatches
897 );
898 }
899
903 Acoustic,
904 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
905 HaloSize,
906 Acoustic::NumberOfUnknowns,
907 Acoustic::NumberOfAuxiliaryVariables,
908 EvaluateFlux,
909 EvaluateNonconservativeProduct,
910 EvaluateSource,
911 EvaluateMaximumEigenvalueAfterTimeStep,
914 "host, stateless, patch-wise, AoS, serial",
915 launchingThreads,
917 localPatches
918 );
922 Acoustic,
923 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
924 HaloSize,
925 Acoustic::NumberOfUnknowns,
926 Acoustic::NumberOfAuxiliaryVariables,
927 EvaluateFlux,
928 EvaluateNonconservativeProduct,
929 EvaluateSource,
930 EvaluateMaximumEigenvalueAfterTimeStep,
933 "host, stateless, patch-wise, AoS, nested",
934 launchingThreads,
936 localPatches
937 );
941 Acoustic,
942 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
943 HaloSize,
944 Acoustic::NumberOfUnknowns,
945 Acoustic::NumberOfAuxiliaryVariables,
946 EvaluateFlux,
947 EvaluateNonconservativeProduct,
948 EvaluateSource,
949 EvaluateMaximumEigenvalueAfterTimeStep,
952 "host, stateless, patch-wise, AoS, spread-out",
953 launchingThreads,
955 localPatches
956 );
960 Acoustic,
961 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
962 HaloSize,
963 Acoustic::NumberOfUnknowns,
964 Acoustic::NumberOfAuxiliaryVariables,
965 EvaluateFlux,
966 EvaluateNonconservativeProduct,
967 EvaluateSource,
968 EvaluateMaximumEigenvalueAfterTimeStep,
971 "host, stateless, patch-wise, SoA, serial",
972 launchingThreads,
974 localPatches
975 );
979 Acoustic,
980 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
981 HaloSize,
982 Acoustic::NumberOfUnknowns,
983 Acoustic::NumberOfAuxiliaryVariables,
984 EvaluateFlux,
985 EvaluateNonconservativeProduct,
986 EvaluateSource,
987 EvaluateMaximumEigenvalueAfterTimeStep,
990 "host, stateless, patch-wise, SoA, nested",
991 launchingThreads,
993 localPatches
994 );
998 Acoustic,
999 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1000 HaloSize,
1001 Acoustic::NumberOfUnknowns,
1002 Acoustic::NumberOfAuxiliaryVariables,
1003 EvaluateFlux,
1004 EvaluateNonconservativeProduct,
1005 EvaluateSource,
1006 EvaluateMaximumEigenvalueAfterTimeStep,
1009 "host, stateless, patch-wise, SoA, spread-out",
1010 launchingThreads,
1012 localPatches
1013 );
1017 Acoustic,
1018 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1019 HaloSize,
1020 Acoustic::NumberOfUnknowns,
1021 Acoustic::NumberOfAuxiliaryVariables,
1022 EvaluateFlux,
1023 EvaluateNonconservativeProduct,
1024 EvaluateSource,
1025 EvaluateMaximumEigenvalueAfterTimeStep,
1028 "host, stateless, patch-wise, AoSoA, serial",
1029 launchingThreads,
1031 localPatches
1032 );
1036 Acoustic,
1037 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1038 HaloSize,
1039 Acoustic::NumberOfUnknowns,
1040 Acoustic::NumberOfAuxiliaryVariables,
1041 EvaluateFlux,
1042 EvaluateNonconservativeProduct,
1043 EvaluateSource,
1044 EvaluateMaximumEigenvalueAfterTimeStep,
1047 "host, stateless, patch-wise, AoSoA, nested",
1048 launchingThreads,
1050 localPatches
1051 );
1055 Acoustic,
1056 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1057 HaloSize,
1058 Acoustic::NumberOfUnknowns,
1059 Acoustic::NumberOfAuxiliaryVariables,
1060 EvaluateFlux,
1061 EvaluateNonconservativeProduct,
1062 EvaluateSource,
1063 EvaluateMaximumEigenvalueAfterTimeStep,
1066 "host, stateless, patch-wise, AoSoA, spread-out",
1067 launchingThreads,
1069 localPatches
1070 );
1071 if constexpr (not Accuracy > 0.0) {
1075 Acoustic,
1076 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1077 HaloSize,
1078 Acoustic::NumberOfUnknowns,
1079 Acoustic::NumberOfAuxiliaryVariables,
1080 EvaluateFlux,
1081 EvaluateNonconservativeProduct,
1082 EvaluateSource,
1083 EvaluateMaximumEigenvalueAfterTimeStep>,
1085 "host, stateless, patch-wise, insitu, serial",
1086 launchingThreads,
1088 localPatches
1089 );
1090 }
1091
1095 Acoustic,
1096 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1097 HaloSize,
1098 Acoustic::NumberOfUnknowns,
1099 Acoustic::NumberOfAuxiliaryVariables,
1100 EvaluateFlux,
1101 EvaluateNonconservativeProduct,
1102 EvaluateSource,
1103 EvaluateMaximumEigenvalueAfterTimeStep,
1106 "host, stateless, volume-wise, AoS, serial",
1107 launchingThreads,
1109 localPatches
1110 );
1114 Acoustic,
1115 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1116 HaloSize,
1117 Acoustic::NumberOfUnknowns,
1118 Acoustic::NumberOfAuxiliaryVariables,
1119 EvaluateFlux,
1120 EvaluateNonconservativeProduct,
1121 EvaluateSource,
1122 EvaluateMaximumEigenvalueAfterTimeStep,
1125 "host, stateless, volume-wise, AoS, nested",
1126 launchingThreads,
1128 localPatches
1129 );
1133 Acoustic,
1134 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1135 HaloSize,
1136 Acoustic::NumberOfUnknowns,
1137 Acoustic::NumberOfAuxiliaryVariables,
1138 EvaluateFlux,
1139 EvaluateNonconservativeProduct,
1140 EvaluateSource,
1141 EvaluateMaximumEigenvalueAfterTimeStep,
1144 "host, stateless, volume-wise, AoS, spread-out",
1145 launchingThreads,
1147 localPatches
1148 );
1152 Acoustic,
1153 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1154 HaloSize,
1155 Acoustic::NumberOfUnknowns,
1156 Acoustic::NumberOfAuxiliaryVariables,
1157 EvaluateFlux,
1158 EvaluateNonconservativeProduct,
1159 EvaluateSource,
1160 EvaluateMaximumEigenvalueAfterTimeStep,
1163 "host, stateless, volume-wise, SoA, serial",
1164 launchingThreads,
1166 localPatches
1167 );
1171 Acoustic,
1172 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1173 HaloSize,
1174 Acoustic::NumberOfUnknowns,
1175 Acoustic::NumberOfAuxiliaryVariables,
1176 EvaluateFlux,
1177 EvaluateNonconservativeProduct,
1178 EvaluateSource,
1179 EvaluateMaximumEigenvalueAfterTimeStep,
1182 "host, stateless, volume-wise, SoA, nested",
1183 launchingThreads,
1185 localPatches
1186 );
1190 Acoustic,
1191 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1192 HaloSize,
1193 Acoustic::NumberOfUnknowns,
1194 Acoustic::NumberOfAuxiliaryVariables,
1195 EvaluateFlux,
1196 EvaluateNonconservativeProduct,
1197 EvaluateSource,
1198 EvaluateMaximumEigenvalueAfterTimeStep,
1201 "host, stateless, volume-wise, SoA, spread-out",
1202 launchingThreads,
1204 localPatches
1205 );
1209 Acoustic,
1210 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1211 HaloSize,
1212 Acoustic::NumberOfUnknowns,
1213 Acoustic::NumberOfAuxiliaryVariables,
1214 EvaluateFlux,
1215 EvaluateNonconservativeProduct,
1216 EvaluateSource,
1217 EvaluateMaximumEigenvalueAfterTimeStep,
1220 "host, stateless, volume-wise, AoSoA, serial",
1221 launchingThreads,
1223 localPatches
1224 );
1228 Acoustic,
1229 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1230 HaloSize,
1231 Acoustic::NumberOfUnknowns,
1232 Acoustic::NumberOfAuxiliaryVariables,
1233 EvaluateFlux,
1234 EvaluateNonconservativeProduct,
1235 EvaluateSource,
1236 EvaluateMaximumEigenvalueAfterTimeStep,
1239 "host, stateless, volume-wise, AoSoA, nested",
1240 launchingThreads,
1242 localPatches
1243 );
1247 Acoustic,
1248 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1249 HaloSize,
1250 Acoustic::NumberOfUnknowns,
1251 Acoustic::NumberOfAuxiliaryVariables,
1252 EvaluateFlux,
1253 EvaluateNonconservativeProduct,
1254 EvaluateSource,
1255 EvaluateMaximumEigenvalueAfterTimeStep,
1258 "host, stateless, volume-wise, AoSoA, spread-out",
1259 launchingThreads,
1261 localPatches
1262 );
1263 }
1264
1265 if constexpr (AssessDeviceKernels) {
1266#if defined(GPUOffloadingSYCL)
1270 Acoustic,
1271 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1272 HaloSize,
1273 Acoustic::NumberOfUnknowns,
1274 Acoustic::NumberOfAuxiliaryVariables,
1275 EvaluateFlux,
1276 EvaluateNonconservativeProduct,
1277 EvaluateSource,
1278 EvaluateMaximumEigenvalueAfterTimeStep,
1280 "device(s) " + deviceString + ", stateless, batched, AoS, usm",
1281 launchingThreads,
1282 device,
1283 localPatches
1284 );
1288 Acoustic,
1289 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1290 HaloSize,
1291 Acoustic::NumberOfUnknowns,
1292 Acoustic::NumberOfAuxiliaryVariables,
1293 EvaluateFlux,
1294 EvaluateNonconservativeProduct,
1295 EvaluateSource,
1296 EvaluateMaximumEigenvalueAfterTimeStep,
1298 "device(s) " + deviceString + ", stateless, batched, SoA, usm",
1299 launchingThreads,
1300 device,
1301 localPatches
1302 );
1306 Acoustic,
1307 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1308 HaloSize,
1309 Acoustic::NumberOfUnknowns,
1310 Acoustic::NumberOfAuxiliaryVariables,
1311 EvaluateFlux,
1312 EvaluateNonconservativeProduct,
1313 EvaluateSource,
1314 EvaluateMaximumEigenvalueAfterTimeStep,
1316 "device(s) " + deviceString + ", stateless, batched, AoSoA, usm",
1317 launchingThreads,
1318 device,
1319 localPatches
1320 );
1321
1325 Acoustic,
1326 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1327 HaloSize,
1328 Acoustic::NumberOfUnknowns,
1329 Acoustic::NumberOfAuxiliaryVariables,
1330 EvaluateFlux,
1331 EvaluateNonconservativeProduct,
1332 EvaluateSource,
1333 EvaluateMaximumEigenvalueAfterTimeStep,
1335 "device(s) " + deviceString + ", stateless, patch-wise, AoS, usm",
1336 launchingThreads,
1337 device,
1338 localPatches
1339 );
1343 Acoustic,
1344 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1345 HaloSize,
1346 Acoustic::NumberOfUnknowns,
1347 Acoustic::NumberOfAuxiliaryVariables,
1348 EvaluateFlux,
1349 EvaluateNonconservativeProduct,
1350 EvaluateSource,
1351 EvaluateMaximumEigenvalueAfterTimeStep,
1353 "device(s) " + deviceString + ", stateless, patch-wise, SoA, usm",
1354 launchingThreads,
1355 device,
1356 localPatches
1357 );
1361 Acoustic,
1362 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1363 HaloSize,
1364 Acoustic::NumberOfUnknowns,
1365 Acoustic::NumberOfAuxiliaryVariables,
1366 EvaluateFlux,
1367 EvaluateNonconservativeProduct,
1368 EvaluateSource,
1369 EvaluateMaximumEigenvalueAfterTimeStep,
1371 "device(s) " + deviceString + ", stateless, patch-wise, AoSoA, usm",
1372 launchingThreads,
1373 device,
1374 localPatches
1375 );
1376
1380 Acoustic,
1381 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1382 HaloSize,
1383 Acoustic::NumberOfUnknowns,
1384 Acoustic::NumberOfAuxiliaryVariables,
1385 EvaluateFlux,
1386 EvaluateNonconservativeProduct,
1387 EvaluateSource,
1388 EvaluateMaximumEigenvalueAfterTimeStep,
1390 "device(s) " + deviceString + ", stateless, task-graph, AoS, usm",
1391 launchingThreads,
1392 device,
1393 localPatches
1394 );
1398 Acoustic,
1399 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1400 HaloSize,
1401 Acoustic::NumberOfUnknowns,
1402 Acoustic::NumberOfAuxiliaryVariables,
1403 EvaluateFlux,
1404 EvaluateNonconservativeProduct,
1405 EvaluateSource,
1406 EvaluateMaximumEigenvalueAfterTimeStep,
1408 "device(s) " + deviceString + ", stateless, task-graph, SoA, usm",
1409 launchingThreads,
1410 device,
1411 localPatches
1412 );
1416 Acoustic,
1417 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1418 HaloSize,
1419 Acoustic::NumberOfUnknowns,
1420 Acoustic::NumberOfAuxiliaryVariables,
1421 EvaluateFlux,
1422 EvaluateNonconservativeProduct,
1423 EvaluateSource,
1424 EvaluateMaximumEigenvalueAfterTimeStep,
1426 "device(s) " + deviceString + ", stateless, task-graph, AoSoA, usm",
1427 launchingThreads,
1428 device,
1429 localPatches
1430 );
1431
1435 Acoustic,
1436 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1437 HaloSize,
1438 Acoustic::NumberOfUnknowns,
1439 Acoustic::NumberOfAuxiliaryVariables,
1440 EvaluateFlux,
1441 EvaluateNonconservativeProduct,
1442 EvaluateSource,
1443 EvaluateMaximumEigenvalueAfterTimeStep,
1445 "device(s) " + deviceString + ", stateless, batched, AoS, copy",
1446 launchingThreads,
1447 device,
1448 localPatches
1449 );
1453 Acoustic,
1454 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1455 HaloSize,
1456 Acoustic::NumberOfUnknowns,
1457 Acoustic::NumberOfAuxiliaryVariables,
1458 EvaluateFlux,
1459 EvaluateNonconservativeProduct,
1460 EvaluateSource,
1461 EvaluateMaximumEigenvalueAfterTimeStep,
1463 "device(s) " + deviceString + ", stateless, batched, SoA, copy",
1464 launchingThreads,
1465 device,
1466 localPatches
1467 );
1471 Acoustic,
1472 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1473 HaloSize,
1474 Acoustic::NumberOfUnknowns,
1475 Acoustic::NumberOfAuxiliaryVariables,
1476 EvaluateFlux,
1477 EvaluateNonconservativeProduct,
1478 EvaluateSource,
1479 EvaluateMaximumEigenvalueAfterTimeStep,
1481 "device(s) " + deviceString + ", stateless, batched, AoSoA, copy",
1482 launchingThreads,
1483 device,
1484 localPatches
1485 );
1486
1490 Acoustic,
1491 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1492 HaloSize,
1493 Acoustic::NumberOfUnknowns,
1494 Acoustic::NumberOfAuxiliaryVariables,
1495 EvaluateFlux,
1496 EvaluateNonconservativeProduct,
1497 EvaluateSource,
1498 EvaluateMaximumEigenvalueAfterTimeStep,
1500 "device(s) " + deviceString + ", stateless, patch-wise, AoS, copy",
1501 launchingThreads,
1502 device,
1503 localPatches
1504 );
1508 Acoustic,
1509 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1510 HaloSize,
1511 Acoustic::NumberOfUnknowns,
1512 Acoustic::NumberOfAuxiliaryVariables,
1513 EvaluateFlux,
1514 EvaluateNonconservativeProduct,
1515 EvaluateSource,
1516 EvaluateMaximumEigenvalueAfterTimeStep,
1518 "device(s) " + deviceString + ", stateless, patch-wise, SoA, copy",
1519 launchingThreads,
1520 device,
1521 localPatches
1522 );
1526 Acoustic,
1527 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1528 HaloSize,
1529 Acoustic::NumberOfUnknowns,
1530 Acoustic::NumberOfAuxiliaryVariables,
1531 EvaluateFlux,
1532 EvaluateNonconservativeProduct,
1533 EvaluateSource,
1534 EvaluateMaximumEigenvalueAfterTimeStep,
1536 "device(s) " + deviceString + ", stateless, patch-wise, AoSoA, copy",
1537 launchingThreads,
1538 device,
1539 localPatches
1540 );
1541
1545 Acoustic,
1546 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1547 HaloSize,
1548 Acoustic::NumberOfUnknowns,
1549 Acoustic::NumberOfAuxiliaryVariables,
1550 EvaluateFlux,
1551 EvaluateNonconservativeProduct,
1552 EvaluateSource,
1553 EvaluateMaximumEigenvalueAfterTimeStep,
1555 "device(s) " + deviceString + ", stateless, task-graph, AoS, copy",
1556 launchingThreads,
1557 device,
1558 localPatches
1559 );
1563 Acoustic,
1564 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1565 HaloSize,
1566 Acoustic::NumberOfUnknowns,
1567 Acoustic::NumberOfAuxiliaryVariables,
1568 EvaluateFlux,
1569 EvaluateNonconservativeProduct,
1570 EvaluateSource,
1571 EvaluateMaximumEigenvalueAfterTimeStep,
1573 "device(s) " + deviceString + ", stateless, task-graph, SoA, copy",
1574 launchingThreads,
1575 device,
1576 localPatches
1577 );
1581 Acoustic,
1582 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1583 HaloSize,
1584 Acoustic::NumberOfUnknowns,
1585 Acoustic::NumberOfAuxiliaryVariables,
1586 EvaluateFlux,
1587 EvaluateNonconservativeProduct,
1588 EvaluateSource,
1589 EvaluateMaximumEigenvalueAfterTimeStep,
1591 "device(s) " + deviceString + ", stateless, task-graph, AoSoA, copy",
1592 launchingThreads,
1593 device,
1594 localPatches
1595 );
1596
1600 Acoustic,
1601 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1602 HaloSize,
1603 Acoustic::NumberOfUnknowns,
1604 Acoustic::NumberOfAuxiliaryVariables,
1605 EvaluateFlux,
1606 EvaluateNonconservativeProduct,
1607 EvaluateSource,
1608 EvaluateMaximumEigenvalueAfterTimeStep,
1610 "device(s) " + deviceString + ", stateless, batched, AoS, managed",
1611 launchingThreads,
1612 device,
1613 localPatches
1614 );
1618 Acoustic,
1619 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1620 HaloSize,
1621 Acoustic::NumberOfUnknowns,
1622 Acoustic::NumberOfAuxiliaryVariables,
1623 EvaluateFlux,
1624 EvaluateNonconservativeProduct,
1625 EvaluateSource,
1626 EvaluateMaximumEigenvalueAfterTimeStep,
1628 "device(s) " + deviceString + ", stateless, batched, SoA, managed",
1629 launchingThreads,
1630 device,
1631 localPatches
1632 );
1636 Acoustic,
1637 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1638 HaloSize,
1639 Acoustic::NumberOfUnknowns,
1640 Acoustic::NumberOfAuxiliaryVariables,
1641 EvaluateFlux,
1642 EvaluateNonconservativeProduct,
1643 EvaluateSource,
1644 EvaluateMaximumEigenvalueAfterTimeStep,
1646 "device(s) " + deviceString + ", stateless, batched, AoSoA, managed",
1647 launchingThreads,
1648 device,
1649 localPatches
1650 );
1651
1654 ::exahype2::fv::rusanov::sycl::
1655 timeStepWithRusanovPatchwiseManagedStateless<
1656 Acoustic,
1657 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1658 HaloSize,
1659 Acoustic::NumberOfUnknowns,
1660 Acoustic::NumberOfAuxiliaryVariables,
1661 EvaluateFlux,
1662 EvaluateNonconservativeProduct,
1663 EvaluateSource,
1664 EvaluateMaximumEigenvalueAfterTimeStep,
1666 "device(s) " + deviceString + ", stateless, patch-wise, AoS, managed",
1667 launchingThreads,
1668 device,
1669 localPatches
1670 );
1673 ::exahype2::fv::rusanov::sycl::
1674 timeStepWithRusanovPatchwiseManagedStateless<
1675 Acoustic,
1676 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1677 HaloSize,
1678 Acoustic::NumberOfUnknowns,
1679 Acoustic::NumberOfAuxiliaryVariables,
1680 EvaluateFlux,
1681 EvaluateNonconservativeProduct,
1682 EvaluateSource,
1683 EvaluateMaximumEigenvalueAfterTimeStep,
1685 "device(s) " + deviceString + ", stateless, patch-wise, SoA, managed",
1686 launchingThreads,
1687 device,
1688 localPatches
1689 );
1692 ::exahype2::fv::rusanov::sycl::
1693 timeStepWithRusanovPatchwiseManagedStateless<
1694 Acoustic,
1695 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1696 HaloSize,
1697 Acoustic::NumberOfUnknowns,
1698 Acoustic::NumberOfAuxiliaryVariables,
1699 EvaluateFlux,
1700 EvaluateNonconservativeProduct,
1701 EvaluateSource,
1702 EvaluateMaximumEigenvalueAfterTimeStep,
1704 "device(s) " + deviceString + ", stateless, patch-wise, AoSoA, managed",
1705 launchingThreads,
1706 device,
1707 localPatches
1708 );
1709
1712 ::exahype2::fv::rusanov::sycl::
1713 timeStepWithRusanovTaskgraphManagedStateless<
1714 Acoustic,
1715 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1716 HaloSize,
1717 Acoustic::NumberOfUnknowns,
1718 Acoustic::NumberOfAuxiliaryVariables,
1719 EvaluateFlux,
1720 EvaluateNonconservativeProduct,
1721 EvaluateSource,
1722 EvaluateMaximumEigenvalueAfterTimeStep,
1724 "device(s) " + deviceString + ", stateless, task-graph, AoS, managed",
1725 launchingThreads,
1726 device,
1727 localPatches
1728 );
1731 ::exahype2::fv::rusanov::sycl::
1732 timeStepWithRusanovTaskgraphManagedStateless<
1733 Acoustic,
1734 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1735 HaloSize,
1736 Acoustic::NumberOfUnknowns,
1737 Acoustic::NumberOfAuxiliaryVariables,
1738 EvaluateFlux,
1739 EvaluateNonconservativeProduct,
1740 EvaluateSource,
1741 EvaluateMaximumEigenvalueAfterTimeStep,
1743 "device(s) " + deviceString + ", stateless, task-graph, SoA, managed",
1744 launchingThreads,
1745 device,
1746 localPatches
1747 );
1750 ::exahype2::fv::rusanov::sycl::
1751 timeStepWithRusanovTaskgraphManagedStateless<
1752 Acoustic,
1753 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1754 HaloSize,
1755 Acoustic::NumberOfUnknowns,
1756 Acoustic::NumberOfAuxiliaryVariables,
1757 EvaluateFlux,
1758 EvaluateNonconservativeProduct,
1759 EvaluateSource,
1760 EvaluateMaximumEigenvalueAfterTimeStep,
1762 "device(s) " + deviceString + ", stateless, task-graph, AoSoA, managed",
1763 launchingThreads,
1764 device,
1765 localPatches
1766 );
1767#endif
1768
1769#if defined(GPUOffloadingOMP)
1773 Acoustic,
1774 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1775 HaloSize,
1776 Acoustic::NumberOfUnknowns,
1777 Acoustic::NumberOfAuxiliaryVariables,
1778 EvaluateFlux,
1779 EvaluateNonconservativeProduct,
1780 EvaluateSource,
1781 EvaluateMaximumEigenvalueAfterTimeStep,
1783 "device(s) " + deviceString + ", stateless, batched, AoS, copy",
1784 launchingThreads,
1785 device,
1786 localPatches
1787 );
1791 Acoustic,
1792 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1793 HaloSize,
1794 Acoustic::NumberOfUnknowns,
1795 Acoustic::NumberOfAuxiliaryVariables,
1796 EvaluateFlux,
1797 EvaluateNonconservativeProduct,
1798 EvaluateSource,
1799 EvaluateMaximumEigenvalueAfterTimeStep,
1801 "device(s) " + deviceString + ", stateless, batched, SoA, copy",
1802 launchingThreads,
1803 device,
1804 localPatches
1805 );
1809 Acoustic,
1810 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1811 HaloSize,
1812 Acoustic::NumberOfUnknowns,
1813 Acoustic::NumberOfAuxiliaryVariables,
1814 EvaluateFlux,
1815 EvaluateNonconservativeProduct,
1816 EvaluateSource,
1817 EvaluateMaximumEigenvalueAfterTimeStep,
1819 "device(s) " + deviceString + ", stateless, batched, AoSoA, copy",
1820 launchingThreads,
1821 device,
1822 localPatches
1823 );
1824#if defined(__NVCOMPILER_CUDA__) // __NVCC__
1828 Acoustic,
1829 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1830 HaloSize,
1831 Acoustic::NumberOfUnknowns,
1832 Acoustic::NumberOfAuxiliaryVariables,
1833 EvaluateFlux,
1834 EvaluateNonconservativeProduct,
1835 EvaluateSource,
1836 EvaluateMaximumEigenvalueAfterTimeStep,
1838 "device(s) " + deviceString + ", stateless, batched, AoS, usm",
1839 launchingThreads,
1840 device,
1841 localPatches
1842 );
1846 Acoustic,
1847 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1848 HaloSize,
1849 Acoustic::NumberOfUnknowns,
1850 Acoustic::NumberOfAuxiliaryVariables,
1851 EvaluateFlux,
1852 EvaluateNonconservativeProduct,
1853 EvaluateSource,
1854 EvaluateMaximumEigenvalueAfterTimeStep,
1856 "device(s) " + deviceString + ", stateless, batched, SoA, usm",
1857 launchingThreads,
1858 device,
1859 localPatches
1860 );
1864 Acoustic,
1865 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1866 HaloSize,
1867 Acoustic::NumberOfUnknowns,
1868 Acoustic::NumberOfAuxiliaryVariables,
1869 EvaluateFlux,
1870 EvaluateNonconservativeProduct,
1871 EvaluateSource,
1872 EvaluateMaximumEigenvalueAfterTimeStep,
1874 "device(s) " + deviceString + ", stateless, batched, AoSoA, usm",
1875 launchingThreads,
1876 device,
1877 localPatches
1878 );
1879#endif
1880
1884 Acoustic,
1885 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1886 HaloSize,
1887 Acoustic::NumberOfUnknowns,
1888 Acoustic::NumberOfAuxiliaryVariables,
1889 EvaluateFlux,
1890 EvaluateNonconservativeProduct,
1891 EvaluateSource,
1892 EvaluateMaximumEigenvalueAfterTimeStep,
1894 "device(s) " + deviceString + ", stateless, patch-wise, AoS, copy",
1895 launchingThreads,
1896 device,
1897 localPatches
1898 );
1902 Acoustic,
1903 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1904 HaloSize,
1905 Acoustic::NumberOfUnknowns,
1906 Acoustic::NumberOfAuxiliaryVariables,
1907 EvaluateFlux,
1908 EvaluateNonconservativeProduct,
1909 EvaluateSource,
1910 EvaluateMaximumEigenvalueAfterTimeStep,
1912 "device(s) " + deviceString + ", stateless, patch-wise, SoA, copy",
1913 launchingThreads,
1914 device,
1915 localPatches
1916 );
1920 Acoustic,
1921 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1922 HaloSize,
1923 Acoustic::NumberOfUnknowns,
1924 Acoustic::NumberOfAuxiliaryVariables,
1925 EvaluateFlux,
1926 EvaluateNonconservativeProduct,
1927 EvaluateSource,
1928 EvaluateMaximumEigenvalueAfterTimeStep,
1930 "device(s) " + deviceString + ", stateless, patch-wise, AoSoA, copy",
1931 launchingThreads,
1932 device,
1933 localPatches
1934 );
1935#if defined(__NVCOMPILER_CUDA__) // __NVCC__
1939 Acoustic,
1940 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1941 HaloSize,
1942 Acoustic::NumberOfUnknowns,
1943 Acoustic::NumberOfAuxiliaryVariables,
1944 EvaluateFlux,
1945 EvaluateNonconservativeProduct,
1946 EvaluateSource,
1947 EvaluateMaximumEigenvalueAfterTimeStep,
1949 "device(s) " + deviceString + ", stateless, patch-wise, AoS, usm",
1950 launchingThreads,
1951 device,
1952 localPatches
1953 );
1957 Acoustic,
1958 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1959 HaloSize,
1960 Acoustic::NumberOfUnknowns,
1961 Acoustic::NumberOfAuxiliaryVariables,
1962 EvaluateFlux,
1963 EvaluateNonconservativeProduct,
1964 EvaluateSource,
1965 EvaluateMaximumEigenvalueAfterTimeStep,
1967 "device(s) " + deviceString + ", stateless, patch-wise, SoA, usm",
1968 launchingThreads,
1969 device,
1970 localPatches
1971 );
1975 Acoustic,
1976 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1977 HaloSize,
1978 Acoustic::NumberOfUnknowns,
1979 Acoustic::NumberOfAuxiliaryVariables,
1980 EvaluateFlux,
1981 EvaluateNonconservativeProduct,
1982 EvaluateSource,
1983 EvaluateMaximumEigenvalueAfterTimeStep,
1985 "device(s) " + deviceString + ", stateless, patch-wise, AoSoA, usm",
1986 launchingThreads,
1987 device,
1988 localPatches
1989 );
1990#endif
1991
1995 Acoustic,
1996 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
1997 HaloSize,
1998 Acoustic::NumberOfUnknowns,
1999 Acoustic::NumberOfAuxiliaryVariables,
2000 EvaluateFlux,
2001 EvaluateNonconservativeProduct,
2002 EvaluateSource,
2003 EvaluateMaximumEigenvalueAfterTimeStep,
2005 "device(s) " + deviceString + ", stateless, volume-wise, AoS, copy",
2006 launchingThreads,
2007 device,
2008 localPatches
2009 );
2013 Acoustic,
2014 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
2015 HaloSize,
2016 Acoustic::NumberOfUnknowns,
2017 Acoustic::NumberOfAuxiliaryVariables,
2018 EvaluateFlux,
2019 EvaluateNonconservativeProduct,
2020 EvaluateSource,
2021 EvaluateMaximumEigenvalueAfterTimeStep,
2023 "device(s) " + deviceString + ", stateless, volume-wise, SoA, copy",
2024 launchingThreads,
2025 device,
2026 localPatches
2027 );
2031 Acoustic,
2032 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
2033 HaloSize,
2034 Acoustic::NumberOfUnknowns,
2035 Acoustic::NumberOfAuxiliaryVariables,
2036 EvaluateFlux,
2037 EvaluateNonconservativeProduct,
2038 EvaluateSource,
2039 EvaluateMaximumEigenvalueAfterTimeStep,
2041 "device(s) " + deviceString + ", stateless, volume-wise, AoSoA, copy",
2042 launchingThreads,
2043 device,
2044 localPatches
2045 );
2046#endif
2047
2048#if defined(GPUOffloadingCPP)
2052 Acoustic,
2053 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
2054 HaloSize,
2055 Acoustic::NumberOfUnknowns,
2056 Acoustic::NumberOfAuxiliaryVariables,
2057 EvaluateFlux,
2058 EvaluateNonconservativeProduct,
2059 EvaluateSource,
2060 EvaluateMaximumEigenvalueAfterTimeStep,
2062 "device(s) " + deviceString + ", stateless, batched, AoS, usm",
2063 launchingThreads,
2064 device,
2065 localPatches
2066 );
2070 Acoustic,
2071 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
2072 HaloSize,
2073 Acoustic::NumberOfUnknowns,
2074 Acoustic::NumberOfAuxiliaryVariables,
2075 EvaluateFlux,
2076 EvaluateNonconservativeProduct,
2077 EvaluateSource,
2078 EvaluateMaximumEigenvalueAfterTimeStep,
2080 "device(s) " + deviceString + ", stateless, batched, SoA, usm",
2081 launchingThreads,
2082 device,
2083 localPatches
2084 );
2088 Acoustic,
2089 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
2090 HaloSize,
2091 Acoustic::NumberOfUnknowns,
2092 Acoustic::NumberOfAuxiliaryVariables,
2093 EvaluateFlux,
2094 EvaluateNonconservativeProduct,
2095 EvaluateSource,
2096 EvaluateMaximumEigenvalueAfterTimeStep,
2098 "device(s) " + deviceString + ", stateless, batched, AoSoA, usm",
2099 launchingThreads,
2100 device,
2101 localPatches
2102 );
2103
2107 Acoustic,
2108 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
2109 HaloSize,
2110 Acoustic::NumberOfUnknowns,
2111 Acoustic::NumberOfAuxiliaryVariables,
2112 EvaluateFlux,
2113 EvaluateNonconservativeProduct,
2114 EvaluateSource,
2115 EvaluateMaximumEigenvalueAfterTimeStep,
2117 "device(s) " + deviceString + ", stateless, patch-wise, AoS, usm",
2118 launchingThreads,
2119 device,
2120 localPatches
2121 );
2125 Acoustic,
2126 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
2127 HaloSize,
2128 Acoustic::NumberOfUnknowns,
2129 Acoustic::NumberOfAuxiliaryVariables,
2130 EvaluateFlux,
2131 EvaluateNonconservativeProduct,
2132 EvaluateSource,
2133 EvaluateMaximumEigenvalueAfterTimeStep,
2135 "device(s) " + deviceString + ", stateless, patch-wise, SoA, usm",
2136 launchingThreads,
2137 device,
2138 localPatches
2139 );
2143 Acoustic,
2144 Acoustic::NumberOfFiniteVolumesPerAxisPerPatch,
2145 HaloSize,
2146 Acoustic::NumberOfUnknowns,
2147 Acoustic::NumberOfAuxiliaryVariables,
2148 EvaluateFlux,
2149 EvaluateNonconservativeProduct,
2150 EvaluateSource,
2151 EvaluateMaximumEigenvalueAfterTimeStep,
2153 "device(s) " + deviceString + ", stateless, patch-wise, AoSoA, usm",
2154 launchingThreads,
2155 device,
2156 localPatches
2157 );
2158#endif
2159 }
2160}
2161
2162int main(int argc, char** argv) {
2164 // Do this early, so people can use logInfo properly.
2165 repositories::initLogFilters();
2169 repositories::initSharedMemoryAndGPUEnvironment();
2170
2171 if constexpr (EnableFPE) {
2172 feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
2173 }
2174
2176 logInfo("main()", "Dimensions: " << Dimensions);
2177 logInfo(
2178 "main()",
2179 "Number of threads launching compute kernels: "
2180 << NumberOfLaunchingThreads
2181 );
2182 logInfo(
2183 "main()",
2184 "Number of patches per thread/compute kernel launch to study: "
2185 << toString(NumberOfPatchesToStudy)
2186 );
2187 logInfo(
2188 "main()",
2189 "Number of compute threads: "
2190 << ::tarch::multicore::Core::getInstance().getNumberOfThreads()
2191 );
2192 logInfo(
2193 "main()",
2194 "Number of MPI ranks: "
2195 << ::tarch::mpi::Rank::getInstance().getNumberOfRanks()
2196 );
2197 logInfo(
2198 "main()",
2199 "Number of GPU devices: "
2200 << ::tarch::accelerator::Device::getInstance().getNumberOfDevices()
2201 );
2202 logInfo(
2203 "main()",
2204 "Number of finite volumes per axis per patch: "
2205 << Acoustic::NumberOfFiniteVolumesPerAxisPerPatch
2206 );
2207 logInfo("main()", "Number of samples per measurement: " << NumberOfSamples);
2208 logInfo(
2209 "main()",
2210 "Evaluate max. eigenvalue (reduction step): "
2211 << std::boolalpha << EvaluateMaximumEigenvalueAfterTimeStep
2212 );
2213 if constexpr (EnableFPE) {
2214 logInfo("main()", "Floating-point exception handler enabled");
2215 }
2216 if constexpr (Accuracy > 0.0) {
2217 logInfo(
2218 "main()",
2219 "Performing accuracy checks with precision: " << Accuracy
2220 );
2221 }
2222#if defined(GPUOffloadingSYCL)
2223 logInfo(
2224 "main()",
2225 "Set SYCL_DEVICE_FILTER=gpu or ONEAPI_DEVICE_SELECTOR=cuda:0 when using SYCL on the device"
2226 );
2227 logInfo("main()", "Set SYCL_PI_TRACE=2 in case of runtime errors");
2228#endif
2229 }
2230
2231#if defined(SharedOMP)
2232#pragma omp parallel
2233 {
2234#pragma omp master
2235 {
2236#endif
2237 for (int n = 0; n < NumberOfPatchesToStudy.size(); n++) {
2238 runBenchmarks(NumberOfPatchesToStudy[n], NumberOfLaunchingThreads);
2239 }
2240#if defined(SharedOMP)
2241 }
2242 }
2243#endif
2244
2245 if constexpr (Accuracy > 0.0) {
2246 logInfo(
2247 "main()",
2248 "All kernels yield the same outcome up to a accuracy of "
2249 << Accuracy << " unless reported otherwise"
2250 );
2251 } else {
2252 logInfo("main()", "No accuracy checks were performed");
2253 }
2254
2255 delete[] validOutcome;
2259 return EXIT_SUCCESS;
2260}
void wrapPatchwiseHeapFunctorsHostKernel(int device, ::exahype2::CellData< double, double > &patchData)
This is a wrapper around the kernel call with the functors.
void wrapStatelessHostKernel(int device, ::exahype2::CellData< double, double > &patchData)
Wrapper around stateless kernel invocations.
double validMaxEigenvalue
static constexpr int NumberOfInputEntries
std::tuple< double, int > validateOutcome(double *Q, int patchIndex, const double maxEigenvalue)
Validate data against pre-stored simulation outcome.
::tarch::timing::Measurement timingComputeKernel
void runBenchmarks(int numberOfPatches, int launchingThreads)
Run the benchmark for one particular number of patches.
void initInputData(double *Q)
Set input data.
static constexpr double CellOffset
void storeOutcome(const double *Q, const double maxEigenvalue)
Store outcome of one compute kernel.
::tarch::logging::Log _log("::")
void wrapDeviceKernel(int device, ::exahype2::CellData< double, double > &patchData)
static constexpr int NumberOfOutputEntries
static constexpr double TimeStamp
void reportRuntime(const std::string &kernelIdentificator, const ::tarch::timing::Measurement &timingKernelLaunch, int patches)
static constexpr int HaloSize
double * validOutcome
void wrapVolumewiseFunctorHostKernels(int device, ::exahype2::CellData< double, double > &patchData)
Another wrapper.
static constexpr int NumberOfFiniteVolumesPerPatch
static constexpr double TimeStepSize
static constexpr double CellSize
::tarch::multicore::BooleanSemaphore validateOutcomeSemaphore
void wrapBatchedHeapFunctorHostKernels(int device, ::exahype2::CellData< double, double > &patchData)
Another wrapper.
#define assertionEquals(lhs, rhs)
#define assertion(expr)
void assessKernel(std::function< void(int) > kernelCallInLoop, const std::string &name, int numberOfParticles)
#define logError(methodName, logMacroMessageStream)
Wrapper macro around tarch::tarch::logging::Log to improve logging.
Definition Log.h:464
#define logInfo(methodName, logMacroMessageStream)
Wrapper macro around tarch::tarch::logging::Log to improve logging.
Definition Log.h:411
#define parallelFor(counter, max)
Definition Loop.h:446
int main()
Definition main.cpp:321
static Device & getInstance()
static constexpr int HostDevice
Accelerator devices (GPUs) are enumerated starting from 0.
Definition Device.h:48
Log Device.
Definition Log.h:516
static int getGlobalMasterRank()
Get the global master.
Definition Rank.cpp:419
int getNumberOfRanks() const
Definition Rank.cpp:556
static Rank & getInstance()
This operation returns the singleton instance.
Definition Rank.cpp:543
int getRank() const
Return rank of this node.
Definition Rank.cpp:533
static Core & getInstance()
Definition Core.cpp:56
Create a lock around a boolean semaphore region.
Definition Lock.h:19
std::string toString() const
void setValue(const double &value)
Set the value.
A simple class that has to be included to measure the clock ticks required for an operation.
Definition Watch.h:45
double getCalendarTime()
This method returns the elapsed calendar time between the start and stop command of the timer,...
Definition Watch.cpp:74
void stop()
Stop timer.
Definition Watch.cpp:55
std::string toString(Filter filter)
Definition convert.cpp:170
#define endParallelFor
Definition Loop.h:63
static GPUCallableMethod double maxEigenvalue(const double *const __restrict__ Q, const tarch::la::Vector< Dimensions, double > &x, const tarch::la::Vector< Dimensions, double > &h, const double t, const double dt, const int normal) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseUSMStateless(int targetDevice, CellData< double, double > &patchData, tarch::timing::Measurement &measurement) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedUSMStateless(int targetDevice, CellData< double, double > &patchData, tarch::timing::Measurement &measurement) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedHeapStateless(int targetDevice, CellData< double, double > &patchData, tarch::timing::Measurement &measurement) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovVolumewiseStateless(int targetDevice, CellData< double, double > &patchData, tarch::timing::Measurement &measurement) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseUSMStateless(int targetDevice, CellData< double, double > &patchData, tarch::timing::Measurement &measurement) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseHeapStateless(int targetDevice, CellData< double, double > &patchData, tarch::timing::Measurement &measurement) InlineMethod
The name patchwise is a little bit irritating in a GPU context.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedUSMStateless(int targetDevice, CellData< double, double > &patchData, tarch::timing::Measurement &measurement) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovTaskgraphCopyStateless(int targetDevice, CellData< double, double > &patchData) InlineMethod
All the memory management is discussed in the documentation of GPUCellData.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedUSMStateless(int targetDevice, CellData< double, double > &patchData) InlineMethod
This version allocates the temporary data on the GPU via a device malloc, handing in the standard mem...
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovTaskgraphUSMStateless(int targetDevice, CellData< double, double > &patchData) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedManagedStateless(int targetDevice, CellData< double, double > &patchData) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseUSMStateless(int targetDevice, CellData< double, double > &patchData) InlineMethod
1:1 translation of the numerical scheme.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedHeapStateless(int targetDevice, CellData< double, double > &patchData) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseHeapStateless(int targetDevice, CellData< double, double > &patchData) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovVolumewiseFunctors(CellData< double, double > &patchData, const FluxFunctor &fluxFunctor, const NonconservativeProductFunctor &nonconservativeProductFunctor, const SourceFunctor &sourceFunctor, const MaxEigenvalueFunctor &maxEigenvalueFunctor, tarch::timing::Measurement &measurement, peano4::utils::LoopPlacement loopParallelism=peano4::utils::LoopPlacement::Serial) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovVolumewiseStateless(::exahype2::CellData< double, double > &patchData, tarch::timing::Measurement &measurement, peano4::utils::LoopPlacement loopParallelism=peano4::utils::LoopPlacement::Serial) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseHeapFunctors(CellData< double, double > &patchData, const FluxFunctor &fluxFunctor, const NonconservativeProductFunctor &nonconservativeProductFunctor, const SourceFunctor &sourceFunctor, const MaxEigenvalueFunctor &maxEigenvalueFunctor, tarch::timing::Measurement &measurement, peano4::utils::LoopPlacement loopParallelism=peano4::utils::LoopPlacement::Serial) InlineMethod
Apply the Rusanov Riemann solver over a set of patches.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedHeapStateless(::exahype2::CellData< double, double > &patchData, tarch::timing::Measurement &measurement, peano4::utils::LoopPlacement loopParallelism=peano4::utils::LoopPlacement::Serial) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseInsituStateless(CellData< double, double > &patchData, tarch::timing::Measurement &measurement, peano4::utils::LoopPlacement loopParallelism=peano4::utils::LoopPlacement::Serial) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedInsituStateless(CellData< double, double > &patchData, tarch::timing::Measurement &measurement, peano4::utils::LoopPlacement loopParallelism=peano4::utils::LoopPlacement::Serial) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedHeapFunctors(CellData< double, double > &patchData, const FluxFunctor &fluxFunctor, const NonconservativeProductFunctor &nonconservativeProductFunctor, const SourceFunctor &sourceFunctor, const MaxEigenvalueFunctor &maxEigenvalueFunctor, tarch::timing::Measurement &measurement, peano4::utils::LoopPlacement loopParallelism=peano4::utils::LoopPlacement::Serial) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseHeapStateless(CellData< double, double > &patchData, tarch::timing::Measurement &measurement, peano4::utils::LoopPlacement loopParallelism=peano4::utils::LoopPlacement::Serial) InlineMethod
LoopPlacement
Guide loop-level parallelism.
Definition Loop.h:65
void fillLookupTables()
Fill Lookup Tables.
Definition peano.cpp:87
int initParallelEnvironment(int *argc, char ***argv)
Init Parallel Environment.
Definition peano.cpp:101
void shutdownParallelEnvironment()
Shutdown all the parallel environment, i.e.
Definition peano.cpp:127
CF abs(const CF &cf)
constexpr double PI
Definition Scalar.h:12
bool equals(const Matrix< Rows, Cols, Scalar > &lhs, const Matrix< Rows, Cols, Scalar > &rhs, const Scalar &tolerance=NUMERICAL_ZERO_DIFFERENCE)
Compares to matrices on equality by means of a numerical accuracy.
void shutdownSmartMPI()
Definition multicore.cpp:49
void initSmartMPI()
Switch on SmartMPI.
Definition multicore.cpp:33
void freeMemory(void *data, MemoryLocation location, int device=accelerator::Device::HostDevice)
Free memory.
void shutdownNonCriticalAssertionEnvironment()
peano4::shutdownParallelEnvironment().
void initNonCriticalAssertionEnvironment()
Register the assertion tag from the global communicator.
@ ManagedSharedAcceleratorDeviceMemory
To be used on host only.
Representation of a number of cells which contains all information that's required to process the sto...
Definition CellData.h:77
outType ** QOut
Out values.
Definition CellData.h:116
inType ** QIn
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
Definition CellData.h:82
double * maxEigenvalue
Out values.
Definition CellData.h:121
tarch::la::Vector< Dimensions, double > * cellCentre
Definition CellData.h:83
tarch::la::Vector< Dimensions, double > * cellSize
Definition CellData.h:84
void setX(const tarch::la::Vector< Dimensions, double > &value)
void setH(const tarch::la::Vector< Dimensions, double > &value)
Simple vector class.
Definition Vector.h:134