7 int NumberOfVolumesPerAxisInPatch,
10 int NumberOfAuxiliaryVariables,
12 bool EvaluateNonconservativeProduct,
14 bool EvaluateMaximumEigenvalueAfterTimeStep,
15 class TempDataEnumeratorType>
19 double** mappedPointersToQIn,
20 const double* rawPointerToCellCentre,
21 const double* rawPointerToCellSize,
24 double* maxEigenvalue,
25 double** mappedPointersToQOut,
29 double* tempNonconservativeProductX,
30 double* tempNonconservativeProductY,
31 double* tempNonconservativeProductZ,
32 double* tempEigenvalueX,
33 double* tempEigenvalueY,
34 double* tempEigenvalueZ
38 const TempDataEnumeratorType fluxEnumerator(numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, NumberOfUnknowns, 0);
39 const TempDataEnumeratorType ncpEnumerator(numberOfCells, NumberOfVolumesPerAxisInPatch + 1,
HaloSize, NumberOfUnknowns, 0);
40 const TempDataEnumeratorType eigenvalueEnumerator(numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, 1, 0);
45 if constexpr (EvaluateSource) {
47#pragma omp target teams distribute parallel for simd collapse(3) device(targetDevice)
48 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
49 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
50 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
51 loopbodies::copySolutionAndAddSourceTerm<SolverType>(
52 mappedPointersToQIn[patchIndex],
54 rawPointerToCellCentre[patchIndex],
55 rawPointerToCellSize[patchIndex],
60 mappedPointersToQOut[patchIndex],
67#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice)
68 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
69 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
70 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
71 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
72 loopbodies::copySolutionAndAddSourceTerm<SolverType>(
73 mappedPointersToQIn[patchIndex],
75 rawPointerToCellCentre[patchIndex],
76 rawPointerToCellSize[patchIndex],
81 mappedPointersToQOut[patchIndex],
91#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice)
92 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
93 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
94 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
95 for (
int unknown = 0; unknown < NumberOfUnknowns + NumberOfAuxiliaryVariables; unknown++) {
102#pragma omp target teams distribute parallel for simd collapse(5) device(targetDevice)
103 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
104 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
105 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
106 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
107 for (
int unknown = 0; unknown < NumberOfUnknowns + NumberOfAuxiliaryVariables; unknown++) {
109 mappedPointersToQIn[patchIndex], QInEnumerator, patchIndex,
volumeIndex(x, y, z), unknown, mappedPointersToQOut[patchIndex], QOutEnumerator
123#pragma omp target teams distribute parallel for simd collapse(3) device(targetDevice)
124 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
125 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
126 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; x++) {
127 loopbodies::computeMaxEigenvalue<SolverType>(
128 mappedPointersToQIn[patchIndex],
130 rawPointerToCellCentre[patchIndex],
131 rawPointerToCellSize[patchIndex],
144#pragma omp target teams distribute parallel for simd collapse(3) device(targetDevice)
145 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
146 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; y++) {
147 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
148 loopbodies::computeMaxEigenvalue<SolverType>(
149 mappedPointersToQIn[patchIndex],
151 rawPointerToCellCentre[patchIndex],
152 rawPointerToCellSize[patchIndex],
165#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice)
166 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
167 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
168 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
169 for (
int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
171 mappedPointersToQIn[patchIndex],
176 eigenvalueEnumerator,
177 rawPointerToCellCentre[patchIndex],
178 rawPointerToCellSize[patchIndex],
183 mappedPointersToQOut[patchIndex],
191#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice)
192 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
193 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
194 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
195 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; x++) {
196 loopbodies::computeMaxEigenvalue<SolverType>(
197 mappedPointersToQIn[patchIndex],
199 rawPointerToCellCentre[patchIndex],
200 rawPointerToCellSize[patchIndex],
214#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice)
215 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
216 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
217 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; y++) {
218 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
219 loopbodies::computeMaxEigenvalue<SolverType>(
220 mappedPointersToQIn[patchIndex],
222 rawPointerToCellCentre[patchIndex],
223 rawPointerToCellSize[patchIndex],
237#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice)
238 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
239 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; z++) {
240 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
241 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
242 loopbodies::computeMaxEigenvalue<SolverType>(
243 mappedPointersToQIn[patchIndex],
245 rawPointerToCellCentre[patchIndex],
246 rawPointerToCellSize[patchIndex],
260#pragma omp target teams distribute parallel for simd collapse(5) device(targetDevice)
261 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
262 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
263 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
264 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
265 for (
int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
267 mappedPointersToQIn[patchIndex],
272 eigenvalueEnumerator,
273 rawPointerToCellCentre[patchIndex],
274 rawPointerToCellSize[patchIndex],
279 mappedPointersToQOut[patchIndex],
292 if constexpr (EvaluateFlux) {
294#pragma omp target teams distribute parallel for simd collapse(3) device(targetDevice)
295 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
296 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
297 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; x++) {
298 loopbodies::computeFlux<SolverType>(
299 mappedPointersToQIn[patchIndex],
301 rawPointerToCellCentre[patchIndex],
302 rawPointerToCellSize[patchIndex],
315#pragma omp target teams distribute parallel for simd collapse(3) device(targetDevice)
316 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
317 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; y++) {
318 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
319 loopbodies::computeFlux<SolverType>(
320 mappedPointersToQIn[patchIndex],
322 rawPointerToCellCentre[patchIndex],
323 rawPointerToCellSize[patchIndex],
336#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice)
337 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
338 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
339 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
340 for (
int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
346 rawPointerToCellCentre[patchIndex],
347 rawPointerToCellSize[patchIndex],
352 mappedPointersToQOut[patchIndex],
360#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice)
361 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
362 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
363 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
364 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; x++) {
365 loopbodies::computeFlux<SolverType>(
366 mappedPointersToQIn[patchIndex],
368 rawPointerToCellCentre[patchIndex],
369 rawPointerToCellSize[patchIndex],
383#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice)
384 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
385 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
386 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; y++) {
387 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
388 loopbodies::computeFlux<SolverType>(
389 mappedPointersToQIn[patchIndex],
391 rawPointerToCellCentre[patchIndex],
392 rawPointerToCellSize[patchIndex],
406#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice)
407 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
408 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; z++) {
409 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
410 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
411 loopbodies::computeFlux<SolverType>(
412 mappedPointersToQIn[patchIndex],
414 rawPointerToCellCentre[patchIndex],
415 rawPointerToCellSize[patchIndex],
429#pragma omp target teams distribute parallel for simd collapse(5) device(targetDevice)
430 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
431 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
432 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
433 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
434 for (
int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
440 rawPointerToCellCentre[patchIndex],
441 rawPointerToCellSize[patchIndex],
446 mappedPointersToQOut[patchIndex],
457 if constexpr (EvaluateNonconservativeProduct) {
459#pragma omp target teams distribute parallel for simd collapse(3) device(targetDevice)
460 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
461 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
462 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize - 1; x++) {
463 loopbodies::computeNonconservativeFlux<SolverType>(
464 mappedPointersToQIn[patchIndex],
466 rawPointerToCellCentre[patchIndex],
467 rawPointerToCellSize[patchIndex],
473 tempNonconservativeProductX,
480#pragma omp target teams distribute parallel for simd collapse(3) device(targetDevice)
481 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
482 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize - 1; y++) {
483 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
484 loopbodies::computeNonconservativeFlux<SolverType>(
485 mappedPointersToQIn[patchIndex],
487 rawPointerToCellCentre[patchIndex],
488 rawPointerToCellSize[patchIndex],
494 tempNonconservativeProductY,
501#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice)
502 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
503 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
504 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
505 for (
int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
507 tempNonconservativeProductX,
508 tempNonconservativeProductY,
509 tempNonconservativeProductZ,
511 rawPointerToCellCentre[patchIndex],
512 rawPointerToCellSize[patchIndex],
517 mappedPointersToQOut[patchIndex],
525#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice)
526 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
527 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
528 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
529 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize - 1; x++) {
530 loopbodies::computeNonconservativeFlux<SolverType>(
531 mappedPointersToQIn[patchIndex],
533 rawPointerToCellCentre[patchIndex],
534 rawPointerToCellSize[patchIndex],
540 tempNonconservativeProductX,
548#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice)
549 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
550 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
551 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize - 1; y++) {
552 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
553 loopbodies::computeNonconservativeFlux<SolverType>(
554 mappedPointersToQIn[patchIndex],
556 rawPointerToCellCentre[patchIndex],
557 rawPointerToCellSize[patchIndex],
563 tempNonconservativeProductY,
571#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice)
572 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
573 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize - 1; z++) {
574 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
575 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
576 loopbodies::computeNonconservativeFlux<SolverType>(
577 mappedPointersToQIn[patchIndex],
579 rawPointerToCellCentre[patchIndex],
580 rawPointerToCellSize[patchIndex],
586 tempNonconservativeProductZ,
594#pragma omp target teams distribute parallel for simd collapse(5) device(targetDevice)
595 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
596 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
597 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
598 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
599 for (
int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
601 tempNonconservativeProductX,
602 tempNonconservativeProductY,
603 tempNonconservativeProductZ,
605 rawPointerToCellCentre[patchIndex],
606 rawPointerToCellSize[patchIndex],
611 mappedPointersToQOut[patchIndex],
622 if constexpr (EvaluateMaximumEigenvalueAfterTimeStep) {
624#pragma omp target teams distribute device(targetDevice)
625 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
626 double newMaxEigenvalue = 0.0;
627#pragma omp parallel for simd collapse(2) reduction(max : newMaxEigenvalue)
628 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
629 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
630 newMaxEigenvalue = std::max(
632 loopbodies::reduceMaxEigenvalue<SolverType>(
633 mappedPointersToQOut[patchIndex],
635 rawPointerToCellCentre[patchIndex],
636 rawPointerToCellSize[patchIndex],
645 maxEigenvalue[patchIndex] = newMaxEigenvalue;
648#pragma omp target teams distribute device(targetDevice)
649 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
650 double newMaxEigenvalue = 0.0;
651#pragma omp parallel for simd collapse(3) reduction(max : newMaxEigenvalue)
652 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
653 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
654 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
655 newMaxEigenvalue = std::max(
657 loopbodies::reduceMaxEigenvalue<SolverType>(
658 mappedPointersToQOut[patchIndex],
660 rawPointerToCellCentre[patchIndex],
661 rawPointerToCellSize[patchIndex],
671 maxEigenvalue[patchIndex] = newMaxEigenvalue;
681 int NumberOfVolumesPerAxisInPatch,
683 int NumberOfUnknowns,
684 int NumberOfAuxiliaryVariables,
686 bool EvaluateNonconservativeProduct,
688 bool EvaluateMaximumEigenvalueAfterTimeStep,
689 class TempDataEnumeratorType>
694 logTraceIn(
"timeStepWithRusanovBatchedUSMStateless()");
698 const TempDataEnumeratorType fluxEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, NumberOfUnknowns, 0);
699 const TempDataEnumeratorType ncpEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch + 1,
HaloSize, NumberOfUnknowns, 0);
700 const TempDataEnumeratorType eigenvalueEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, 1, 0);
702 double* tempFluxX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(fluxEnumerator.size(), targetDevice);
703 double* tempFluxY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(fluxEnumerator.size(), targetDevice);
704 double* tempFluxZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(Dimensions == 3 ? fluxEnumerator.size() : 1, targetDevice);
705 double* tempNonconservativeProductX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(ncpEnumerator.size(), targetDevice);
706 double* tempNonconservativeProductY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(ncpEnumerator.size(), targetDevice);
707 double* tempNonconservativeProductZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(Dimensions == 3 ? ncpEnumerator.size() : 1, targetDevice);
708 double* tempEigenvalueX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(eigenvalueEnumerator.size(), targetDevice);
709 double* tempEigenvalueY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(eigenvalueEnumerator.size(), targetDevice);
710 double* tempEigenvalueZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(Dimensions == 3 ? eigenvalueEnumerator.size() : 1, targetDevice);
713 double** mappedPointersToQIn = patchData.
QIn;
714 double** mappedPointersToQOut = patchData.
QOut;
716 double* rawPointerToCellSize = patchData.
cellSize[0].
data();
717 double* t = patchData.
t;
718 double* dt = patchData.
dt;
721 tarch::timing::Watch watch(
"exahype2::fv::rusanov::omp",
"timeStepWithRusanovBatchedUSMStateless",
false,
true);
722 internal::timeStepWithRusanovBatchedStateless<
724 NumberOfVolumesPerAxisInPatch,
727 NumberOfAuxiliaryVariables,
729 EvaluateNonconservativeProduct,
731 EvaluateMaximumEigenvalueAfterTimeStep,
732 TempDataEnumeratorType>(
736 rawPointerToCellCentre,
737 rawPointerToCellSize,
741 mappedPointersToQOut,
745 tempNonconservativeProductX,
746 tempNonconservativeProductY,
747 tempNonconservativeProductZ,
753 measurement.
setValue(watch.getCalendarTime());
755 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxX, targetDevice);
756 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxY, targetDevice);
757 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxZ, targetDevice);
758 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductX, targetDevice);
759 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductY, targetDevice);
760 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductZ, targetDevice);
761 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueX, targetDevice);
762 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueY, targetDevice);
763 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueZ, targetDevice);
765 logTraceOut(
"timeStepWithRusanovBatchedUSMStateless()");
771 int NumberOfVolumesPerAxisInPatch,
773 int NumberOfUnknowns,
774 int NumberOfAuxiliaryVariables,
776 bool EvaluateNonconservativeProduct,
778 bool EvaluateMaximumEigenvalueAfterTimeStep,
779 class TempDataEnumeratorType>
784 logTraceIn(
"timeStepWithRusanovBatchedUSMStateless()");
788 const TempDataEnumeratorType fluxEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, NumberOfUnknowns, 0);
789 const TempDataEnumeratorType ncpEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch + 1,
HaloSize, NumberOfUnknowns, 0);
790 const TempDataEnumeratorType eigenvalueEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, 1, 0);
792 double* tempFluxX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(fluxEnumerator.size(), targetDevice);
793 double* tempFluxY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(fluxEnumerator.size(), targetDevice);
794 double* tempFluxZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(Dimensions == 3 ? fluxEnumerator.size() : 1, targetDevice);
795 double* tempNonconservativeProductX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(ncpEnumerator.size(), targetDevice);
796 double* tempNonconservativeProductY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(ncpEnumerator.size(), targetDevice);
797 double* tempNonconservativeProductZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(Dimensions == 3 ? ncpEnumerator.size() : 1, targetDevice);
798 double* tempEigenvalueX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(eigenvalueEnumerator.size(), targetDevice);
799 double* tempEigenvalueY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(eigenvalueEnumerator.size(), targetDevice);
800 double* tempEigenvalueZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(Dimensions == 3 ? eigenvalueEnumerator.size() : 1, targetDevice);
803 double** mappedPointersToQIn = patchData.
QIn;
804 double** mappedPointersToQOut = patchData.
QOut;
806 double* rawPointerToCellSize = patchData.
cellSize[0].
data();
807 double* t = patchData.
t;
808 double* dt = patchData.
dt;
811 internal::timeStepWithRusanovBatchedStateless<
813 NumberOfVolumesPerAxisInPatch,
816 NumberOfAuxiliaryVariables,
818 EvaluateNonconservativeProduct,
820 EvaluateMaximumEigenvalueAfterTimeStep,
821 TempDataEnumeratorType>(
825 rawPointerToCellCentre,
826 rawPointerToCellSize,
830 mappedPointersToQOut,
834 tempNonconservativeProductX,
835 tempNonconservativeProductY,
836 tempNonconservativeProductZ,
842 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxX, targetDevice);
843 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxY, targetDevice);
844 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxZ, targetDevice);
845 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductX, targetDevice);
846 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductY, targetDevice);
847 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductZ, targetDevice);
848 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueX, targetDevice);
849 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueY, targetDevice);
850 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueZ, targetDevice);
852 logTraceOut(
"timeStepWithRusanovBatchedUSMStateless()");
858 int NumberOfVolumesPerAxisInPatch,
860 int NumberOfUnknowns,
861 int NumberOfAuxiliaryVariables,
863 bool EvaluateNonconservativeProduct,
865 bool EvaluateMaximumEigenvalueAfterTimeStep,
866 class TempDataEnumeratorType>
871 logTraceIn(
"timeStepWithRusanovBatchedHeapStateless()");
875 const TempDataEnumeratorType fluxEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, NumberOfUnknowns, 0);
876 const TempDataEnumeratorType ncpEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch + 1,
HaloSize, NumberOfUnknowns, 0);
877 const TempDataEnumeratorType eigenvalueEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, 1, 0);
879 double* tempFluxX =
new double[fluxEnumerator.size()];
880 double* tempFluxY =
new double[fluxEnumerator.size()];
881 double* tempFluxZ =
new double[fluxEnumerator.size()];
882 double* tempNonconservativeProductX =
new double[ncpEnumerator.size()];
883 double* tempNonconservativeProductY =
new double[ncpEnumerator.size()];
884 double* tempNonconservativeProductZ =
new double[ncpEnumerator.size()];
885 double* tempEigenvalueX =
new double[eigenvalueEnumerator.size()];
886 double* tempEigenvalueY =
new double[eigenvalueEnumerator.size()];
887 double* tempEigenvalueZ =
new double[eigenvalueEnumerator.size()];
889#pragma omp target enter data map(alloc : tempFluxX[0 : fluxEnumerator.size()]) device(targetDevice)
890#pragma omp target enter data map(alloc : tempFluxY[0 : fluxEnumerator.size()]) device(targetDevice)
891#pragma omp target enter data map(alloc : tempFluxZ[0 : fluxEnumerator.size()]) device(targetDevice)
892#pragma omp target enter data map(alloc : tempNonconservativeProductX[0 : ncpEnumerator.size()]) device(targetDevice)
893#pragma omp target enter data map(alloc : tempNonconservativeProductY[0 : ncpEnumerator.size()]) device(targetDevice)
894#pragma omp target enter data map(alloc : tempNonconservativeProductZ[0 : ncpEnumerator.size()]) device(targetDevice)
895#pragma omp target enter data map(alloc : tempEigenvalueX[0 : eigenvalueEnumerator.size()]) device(targetDevice)
896#pragma omp target enter data map(alloc : tempEigenvalueY[0 : eigenvalueEnumerator.size()]) device(targetDevice)
897#pragma omp target enter data map(alloc : tempEigenvalueZ[0 : eigenvalueEnumerator.size()]) device(targetDevice)
899 double** mappedPointersToQIn =
new double*[patchData.
numberOfCells];
900 double** mappedPointersToQOut =
new double*[patchData.
numberOfCells];
902 for (
int patchIndex = 0; patchIndex < patchData.
numberOfCells; patchIndex++) {
903 const double* currentQIn = patchData.
QIn[patchIndex];
904 double* currentQOut = patchData.
QOut[patchIndex];
905#pragma omp target enter data map(to : currentQIn[0 : QInEnumerator.size()]) device(targetDevice)
906#pragma omp target enter data map(alloc : currentQOut[0 : QOutEnumerator.size()]) device(targetDevice)
907 mappedPointersToQIn[patchIndex] =
static_cast<double*
>(omp_get_mapped_ptr(currentQIn, targetDevice));
908 mappedPointersToQOut[patchIndex] =
static_cast<double*
>(omp_get_mapped_ptr(currentQOut, targetDevice));
913 double* rawPointerToCellSize = patchData.
cellSize[0].
data();
914 double* t = patchData.
t;
915 double* dt = patchData.
dt;
918#pragma omp target enter data map(to : rawPointerToCellCentre[0 : numberOfCells * Dimensions]) device(targetDevice)
919#pragma omp target enter data map(to : rawPointerToCellSize[0 : numberOfCells * Dimensions]) device(targetDevice)
920#pragma omp target enter data map(to : mappedPointersToQIn[0 : numberOfCells]) device(targetDevice)
921#pragma omp target enter data map(to : mappedPointersToQOut[0 : numberOfCells]) device(targetDevice)
922#pragma omp target enter data map(to : t[0 : numberOfCells]) device(targetDevice)
923#pragma omp target enter data map(to : dt[0 : numberOfCells]) device(targetDevice)
924#pragma omp target enter data map(alloc : maxEigenvalue[0 : numberOfCells]) device(targetDevice)
926 tarch::timing::Watch watch(
"exahype2::fv::rusanov::omp",
"timeStepWithRusanovBatchedHeapStateless",
false,
true);
927 internal::timeStepWithRusanovBatchedStateless<
929 NumberOfVolumesPerAxisInPatch,
932 NumberOfAuxiliaryVariables,
934 EvaluateNonconservativeProduct,
936 EvaluateMaximumEigenvalueAfterTimeStep,
937 TempDataEnumeratorType>(
941 rawPointerToCellCentre,
942 rawPointerToCellSize,
946 mappedPointersToQOut,
950 tempNonconservativeProductX,
951 tempNonconservativeProductY,
952 tempNonconservativeProductZ,
958 measurement.
setValue(watch.getCalendarTime());
960#pragma omp target exit data map(delete : rawPointerToCellCentre[0 : numberOfCells * Dimensions]) device(targetDevice)
961#pragma omp target exit data map(delete : rawPointerToCellSize[0 : numberOfCells * Dimensions]) device(targetDevice)
962#pragma omp target exit data map(delete : mappedPointersToQIn[0 : numberOfCells]) device(targetDevice)
963#pragma omp target exit data map(delete : mappedPointersToQOut[0 : numberOfCells]) device(targetDevice)
964#pragma omp target exit data map(delete : t[0 : numberOfCells]) device(targetDevice)
965#pragma omp target exit data map(delete : dt[0 : numberOfCells]) device(targetDevice)
966#pragma omp target exit data map(from : maxEigenvalue[0 : numberOfCells]) device(targetDevice)
968#pragma omp target exit data map(delete : tempFluxX[0 : fluxEnumerator.size()]) device(targetDevice)
969#pragma omp target exit data map(delete : tempFluxY[0 : fluxEnumerator.size()]) device(targetDevice)
970#pragma omp target exit data map(delete : tempFluxZ[0 : fluxEnumerator.size()]) device(targetDevice)
971#pragma omp target exit data map(delete : tempNonconservativeProductX[0 : ncpEnumerator.size()]) device(targetDevice)
972#pragma omp target exit data map(delete : tempNonconservativeProductY[0 : ncpEnumerator.size()]) device(targetDevice)
973#pragma omp target exit data map(delete : tempNonconservativeProductZ[0 : ncpEnumerator.size()]) device(targetDevice)
974#pragma omp target exit data map(delete : tempEigenvalueX[0 : eigenvalueEnumerator.size()]) device(targetDevice)
975#pragma omp target exit data map(delete : tempEigenvalueY[0 : eigenvalueEnumerator.size()]) device(targetDevice)
976#pragma omp target exit data map(delete : tempEigenvalueZ[0 : eigenvalueEnumerator.size()]) device(targetDevice)
978 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
979 const double* currentQIn = patchData.
QIn[patchIndex];
980 double* currentQOut = patchData.
QOut[patchIndex];
981#pragma omp target exit data map(delete : currentQIn[0 : QInEnumerator.size()]) device(targetDevice)
982#pragma omp target exit data map(from : currentQOut[0 : QOutEnumerator.size()]) device(targetDevice)
985 delete[] mappedPointersToQIn;
986 delete[] mappedPointersToQOut;
988 if (tempFluxX !=
nullptr) {
991 if (tempFluxY !=
nullptr) {
994 if (tempFluxZ !=
nullptr) {
997 if (tempNonconservativeProductX !=
nullptr) {
998 delete[] tempNonconservativeProductX;
1000 if (tempNonconservativeProductY !=
nullptr) {
1001 delete[] tempNonconservativeProductY;
1003 if (tempNonconservativeProductZ !=
nullptr) {
1004 delete[] tempNonconservativeProductZ;
1006 if (tempEigenvalueX !=
nullptr) {
1007 delete[] tempEigenvalueX;
1009 if (tempEigenvalueY !=
nullptr) {
1010 delete[] tempEigenvalueY;
1012 if (tempEigenvalueZ !=
nullptr) {
1013 delete[] tempEigenvalueZ;
1016 logTraceOut(
"timeStepWithRusanovBatchedHeapStateless()");
1022 int NumberOfVolumesPerAxisInPatch,
1024 int NumberOfUnknowns,
1025 int NumberOfAuxiliaryVariables,
1027 bool EvaluateNonconservativeProduct,
1028 bool EvaluateSource,
1029 bool EvaluateMaximumEigenvalueAfterTimeStep,
1030 class TempDataEnumeratorType>
1035 logTraceIn(
"timeStepWithRusanovBatchedHeapStateless()");
1037 const enumerator::AoSLexicographicEnumerator QInEnumerator(1, NumberOfVolumesPerAxisInPatch,
HaloSize, NumberOfUnknowns, NumberOfAuxiliaryVariables);
1038 const enumerator::AoSLexicographicEnumerator QOutEnumerator(1, NumberOfVolumesPerAxisInPatch, 0, NumberOfUnknowns, NumberOfAuxiliaryVariables);
1039 const TempDataEnumeratorType fluxEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, NumberOfUnknowns, 0);
1040 const TempDataEnumeratorType ncpEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch + 1,
HaloSize, NumberOfUnknowns, 0);
1041 const TempDataEnumeratorType eigenvalueEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, 1, 0);
1043 double* tempFluxX =
new double[fluxEnumerator.size()];
1044 double* tempFluxY =
new double[fluxEnumerator.size()];
1045 double* tempFluxZ =
new double[fluxEnumerator.size()];
1046 double* tempNonconservativeProductX =
new double[ncpEnumerator.size()];
1047 double* tempNonconservativeProductY =
new double[ncpEnumerator.size()];
1048 double* tempNonconservativeProductZ =
new double[ncpEnumerator.size()];
1049 double* tempEigenvalueX =
new double[eigenvalueEnumerator.size()];
1050 double* tempEigenvalueY =
new double[eigenvalueEnumerator.size()];
1051 double* tempEigenvalueZ =
new double[eigenvalueEnumerator.size()];
1053#pragma omp target enter data map(alloc : tempFluxX[0 : fluxEnumerator.size()]) device(targetDevice)
1054#pragma omp target enter data map(alloc : tempFluxY[0 : fluxEnumerator.size()]) device(targetDevice)
1055#pragma omp target enter data map(alloc : tempFluxZ[0 : fluxEnumerator.size()]) device(targetDevice)
1056#pragma omp target enter data map(alloc : tempNonconservativeProductX[0 : ncpEnumerator.size()]) device(targetDevice)
1057#pragma omp target enter data map(alloc : tempNonconservativeProductY[0 : ncpEnumerator.size()]) device(targetDevice)
1058#pragma omp target enter data map(alloc : tempNonconservativeProductZ[0 : ncpEnumerator.size()]) device(targetDevice)
1059#pragma omp target enter data map(alloc : tempEigenvalueX[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1060#pragma omp target enter data map(alloc : tempEigenvalueY[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1061#pragma omp target enter data map(alloc : tempEigenvalueZ[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1063 double** mappedPointersToQIn =
new double*[patchData.numberOfCells];
1064 double** mappedPointersToQOut =
new double*[patchData.numberOfCells];
1066 for (
int patchIndex = 0; patchIndex < patchData.numberOfCells; patchIndex++) {
1067 const double* currentQIn = patchData.QIn[patchIndex];
1068 double* currentQOut = patchData.QOut[patchIndex];
1069#pragma omp target enter data map(to : currentQIn[0 : QInEnumerator.size()]) device(targetDevice)
1070#pragma omp target enter data map(alloc : currentQOut[0 : QOutEnumerator.size()]) device(targetDevice)
1071 mappedPointersToQIn[patchIndex] =
static_cast<double*
>(omp_get_mapped_ptr(currentQIn, targetDevice));
1072 mappedPointersToQOut[patchIndex] =
static_cast<double*
>(omp_get_mapped_ptr(currentQOut, targetDevice));
1075 const int numberOfCells = patchData.numberOfCells;
1076 double* rawPointerToCellCentre = patchData.cellCentre[0].data();
1077 double* rawPointerToCellSize = patchData.cellSize[0].data();
1078 double*
t = patchData.t;
1079 double*
dt = patchData.dt;
1082#pragma omp target enter data map(to : rawPointerToCellCentre[0 : numberOfCells * Dimensions]) device(targetDevice)
1083#pragma omp target enter data map(to : rawPointerToCellSize[0 : numberOfCells * Dimensions]) device(targetDevice)
1084#pragma omp target enter data map(to : mappedPointersToQIn[0 : numberOfCells]) device(targetDevice)
1085#pragma omp target enter data map(to : mappedPointersToQOut[0 : numberOfCells]) device(targetDevice)
1086#pragma omp target enter data map(to : t[0 : numberOfCells]) device(targetDevice)
1087#pragma omp target enter data map(to : dt[0 : numberOfCells]) device(targetDevice)
1088#pragma omp target enter data map(alloc : maxEigenvalue[0 : numberOfCells]) device(targetDevice)
1090 internal::timeStepWithRusanovBatchedStateless<
1092 NumberOfVolumesPerAxisInPatch,
1095 NumberOfAuxiliaryVariables,
1097 EvaluateNonconservativeProduct,
1099 EvaluateMaximumEigenvalueAfterTimeStep,
1100 TempDataEnumeratorType>(
1103 mappedPointersToQIn,
1104 rawPointerToCellCentre,
1105 rawPointerToCellSize,
1109 mappedPointersToQOut,
1113 tempNonconservativeProductX,
1114 tempNonconservativeProductY,
1115 tempNonconservativeProductZ,
1121#pragma omp target exit data map(delete : rawPointerToCellCentre[0 : numberOfCells * Dimensions]) device(targetDevice)
1122#pragma omp target exit data map(delete : rawPointerToCellSize[0 : numberOfCells * Dimensions]) device(targetDevice)
1123#pragma omp target exit data map(delete : mappedPointersToQIn[0 : numberOfCells]) device(targetDevice)
1124#pragma omp target exit data map(delete : mappedPointersToQOut[0 : numberOfCells]) device(targetDevice)
1125#pragma omp target exit data map(delete : t[0 : numberOfCells]) device(targetDevice)
1126#pragma omp target exit data map(delete : dt[0 : numberOfCells]) device(targetDevice)
1127#pragma omp target exit data map(from : maxEigenvalue[0 : numberOfCells]) device(targetDevice)
1129#pragma omp target exit data map(delete : tempFluxX[0 : fluxEnumerator.size()]) device(targetDevice)
1130#pragma omp target exit data map(delete : tempFluxY[0 : fluxEnumerator.size()]) device(targetDevice)
1131#pragma omp target exit data map(delete : tempFluxZ[0 : fluxEnumerator.size()]) device(targetDevice)
1132#pragma omp target exit data map(delete : tempNonconservativeProductX[0 : ncpEnumerator.size()]) device(targetDevice)
1133#pragma omp target exit data map(delete : tempNonconservativeProductY[0 : ncpEnumerator.size()]) device(targetDevice)
1134#pragma omp target exit data map(delete : tempNonconservativeProductZ[0 : ncpEnumerator.size()]) device(targetDevice)
1135#pragma omp target exit data map(delete : tempEigenvalueX[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1136#pragma omp target exit data map(delete : tempEigenvalueY[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1137#pragma omp target exit data map(delete : tempEigenvalueZ[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1139 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
1140 const double* currentQIn = patchData.QIn[patchIndex];
1141 double* currentQOut = patchData.QOut[patchIndex];
1142#pragma omp target exit data map(delete : currentQIn[0 : QInEnumerator.size()]) device(targetDevice)
1143#pragma omp target exit data map(from : currentQOut[0 : QOutEnumerator.size()]) device(targetDevice)
1146 delete[] mappedPointersToQIn;
1147 delete[] mappedPointersToQOut;
1149 if (tempFluxX !=
nullptr) {
1152 if (tempFluxY !=
nullptr) {
1155 if (tempFluxZ !=
nullptr) {
1158 if (tempNonconservativeProductX !=
nullptr) {
1159 delete[] tempNonconservativeProductX;
1161 if (tempNonconservativeProductY !=
nullptr) {
1162 delete[] tempNonconservativeProductY;
1164 if (tempNonconservativeProductZ !=
nullptr) {
1165 delete[] tempNonconservativeProductZ;
1167 if (tempEigenvalueX !=
nullptr) {
1168 delete[] tempEigenvalueX;
1170 if (tempEigenvalueY !=
nullptr) {
1171 delete[] tempEigenvalueY;
1173 if (tempEigenvalueZ !=
nullptr) {
1174 delete[] tempEigenvalueZ;
1177 logTraceOut(
"timeStepWithRusanovBatchedHeapStateless()");
static constexpr int HaloSize
#define KeywordToAvoidDuplicateSymbolsForInlinedFunctions
#define logTraceOut(methodName)
#define logTraceIn(methodName)
tarch::logging::Log _log("::")
void setValue(const double &value)
Set the value.
A simple class that has to be included to measure the clock ticks required for an operation.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions double maxEigenvalue(const double *const Q, int normal, const double CCZ4e, const double CCZ4ds, const double CCZ4GLMc, const double CCZ4GLMd) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions GPUCallableInlineMethod void updateSolutionWithNonconservativeFlux(const double *__restrict__ ncpX, const double *__restrict__ ncpY, const double *__restrict__ ncpZ, const NCPFaceEnumeratorType &ncpEnumerator, const ::tarch::la::Vector< Dimensions, double > &patchCentre, const ::tarch::la::Vector< Dimensions, double > &patchSize, int patchIndex, const ::tarch::la::Vector< Dimensions, int > &volumeIndex, int unknown, double dt, double *__restrict__ QOut, const QOutEnumeratorType &QOutEnumerator) InlineMethod
Add the non-conservative flux contributions to one volume.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions GPUCallableInlineMethod void copySolution(const double *__restrict__ QIn, const QInEnumeratorType &QInEnumerator, int patchIndex, const ::tarch::la::Vector< Dimensions, int > &volumeIndex, int unknown, double *__restrict__ QOut, const QOutEnumeratorType &QOutEnumerator) InlineMethod
Copy solution from QIn to QOut.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions GPUCallableInlineMethod void updateSolutionWithFlux(const double *__restrict__ tempFluxX, const double *__restrict__ tempFluxY, const double *__restrict__ tempFluxZ, const FluxEnumeratorType &fluxEnumerator, const ::tarch::la::Vector< Dimensions, double > &patchCentre, const ::tarch::la::Vector< Dimensions, double > &patchSize, int patchIndex, const ::tarch::la::Vector< Dimensions, int > &volumeIndex, int unknown, double dt, double *__restrict__ QOut, const QOutEnumeratorType &QOutEnumerator) InlineMethod
Update one volume with the flux contribution.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions GPUCallableInlineMethod void updateSolutionWithEigenvalueDamping(const double *__restrict__ QIn, const QInEnumeratorType &QInEnumerator, const double *__restrict__ tempMaxEigenvalueX, const double *__restrict__ tempMaxEigenvalueY, const double *__restrict__ tempMaxEigenvalueZ, const MaxEigenvalueEnumeratorType &eigenvalueEnumerator, const ::tarch::la::Vector< Dimensions, double > &patchCentre, const ::tarch::la::Vector< Dimensions, double > &patchSize, int patchIndex, const ::tarch::la::Vector< Dimensions, int > &volumeIndex, int unknown, double dt, double *__restrict__ QOut, const QOutEnumeratorType &QOutEnumerator) InlineMethod
Uses the eigenvalues to damp the solution update.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedStateless(int targetDevice, int numberOfCells, double **mappedPointersToQIn, const double *rawPointerToCellCentre, const double *rawPointerToCellSize, const double *t, const double *dt, double *maxEigenvalue, double **mappedPointersToQOut, double *tempFluxX, double *tempFluxY, double *tempFluxZ, double *tempNonconservativeProductX, double *tempNonconservativeProductY, double *tempNonconservativeProductZ, double *tempEigenvalueX, double *tempEigenvalueY, double *tempEigenvalueZ) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedHeapStateless(int targetDevice, CellData< double, double > &patchData, tarch::timing::Measurement &measurement) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedUSMStateless(int targetDevice, CellData< double, double > &patchData, tarch::timing::Measurement &measurement) InlineMethod
auto volumeIndex(Args... args)
Representation of a number of cells which contains all information that's required to process the sto...
outType ** QOut
Out values.
inType ** QIn
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
const int numberOfCells
As we store data as SoA, we have to know how big the actual arrays are.
double * maxEigenvalue
Out values.
tarch::la::Vector< Dimensions, double > * cellCentre
tarch::la::Vector< Dimensions, double > * cellSize
Array of struct enumerator.
Scalar * data()
This routine returns a pointer to the first data element.
#define InlineMethod
This is the marker that is to be used after the argument list of a function declaration.