7 int NumberOfVolumesPerAxisInPatch,
10 int NumberOfAuxiliaryVariables,
12 bool EvaluateNonconservativeProduct,
14 bool EvaluateMaximumEigenvalueAfterTimeStep,
15 class TempDataEnumeratorType>
19 double** mappedPointersToQIn,
20 const double* rawPointerToCellCentre,
21 const double* rawPointerToCellSize,
24 double* maxEigenvalue,
25 double** mappedPointersToQOut,
29 double* tempNonconservativeProductX,
30 double* tempNonconservativeProductY,
31 double* tempNonconservativeProductZ,
32 double* tempEigenvalueX,
33 double* tempEigenvalueY,
34 double* tempEigenvalueZ
38 const TempDataEnumeratorType fluxEnumerator(numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, NumberOfUnknowns, 0);
39 const TempDataEnumeratorType ncpEnumerator(numberOfCells, NumberOfVolumesPerAxisInPatch + 1,
HaloSize, NumberOfUnknowns, 0);
40 const TempDataEnumeratorType eigenvalueEnumerator(numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, 1, 0);
42#pragma omp target teams distribute device(targetDevice)
43 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
47 if constexpr (EvaluateSource) {
49#pragma omp parallel for simd collapse(2)
50 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
51 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
52 loopbodies::copySolutionAndAddSourceTerm<SolverType>(
53 mappedPointersToQIn[patchIndex],
55 rawPointerToCellCentre[patchIndex],
56 rawPointerToCellSize[patchIndex],
61 mappedPointersToQOut[patchIndex],
67#pragma omp parallel for simd collapse(3)
68 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
69 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
70 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
71 loopbodies::copySolutionAndAddSourceTerm<SolverType>(
72 mappedPointersToQIn[patchIndex],
74 rawPointerToCellCentre[patchIndex],
75 rawPointerToCellSize[patchIndex],
80 mappedPointersToQOut[patchIndex],
89#pragma omp parallel for simd collapse(3)
90 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
91 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
92 for (
int unknown = 0; unknown < NumberOfUnknowns + NumberOfAuxiliaryVariables; unknown++) {
98#pragma omp parallel for simd collapse(4)
99 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
100 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
101 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
102 for (
int unknown = 0; unknown < NumberOfUnknowns + NumberOfAuxiliaryVariables; unknown++) {
104 mappedPointersToQIn[patchIndex], QInEnumerator, patchIndex,
volumeIndex(x, y, z), unknown, mappedPointersToQOut[patchIndex], QOutEnumerator
117#pragma omp parallel for simd collapse(2)
118 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
119 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; x++) {
120 loopbodies::computeMaxEigenvalue<SolverType>(
121 mappedPointersToQIn[patchIndex],
123 rawPointerToCellCentre[patchIndex],
124 rawPointerToCellSize[patchIndex],
136#pragma omp parallel for simd collapse(2)
137 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; y++) {
138 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
139 loopbodies::computeMaxEigenvalue<SolverType>(
140 mappedPointersToQIn[patchIndex],
142 rawPointerToCellCentre[patchIndex],
143 rawPointerToCellSize[patchIndex],
155#pragma omp parallel for simd collapse(3)
156 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
157 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
158 for (
int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
160 mappedPointersToQIn[patchIndex],
165 eigenvalueEnumerator,
166 rawPointerToCellCentre[patchIndex],
167 rawPointerToCellSize[patchIndex],
172 mappedPointersToQOut[patchIndex],
179#pragma omp parallel for simd collapse(3)
180 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
181 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
182 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; x++) {
183 loopbodies::computeMaxEigenvalue<SolverType>(
184 mappedPointersToQIn[patchIndex],
186 rawPointerToCellCentre[patchIndex],
187 rawPointerToCellSize[patchIndex],
200#pragma omp parallel for simd collapse(3)
201 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
202 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; y++) {
203 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
204 loopbodies::computeMaxEigenvalue<SolverType>(
205 mappedPointersToQIn[patchIndex],
207 rawPointerToCellCentre[patchIndex],
208 rawPointerToCellSize[patchIndex],
221#pragma omp parallel for simd collapse(3)
222 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; z++) {
223 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
224 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
225 loopbodies::computeMaxEigenvalue<SolverType>(
226 mappedPointersToQIn[patchIndex],
228 rawPointerToCellCentre[patchIndex],
229 rawPointerToCellSize[patchIndex],
242#pragma omp parallel for simd collapse(4)
243 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
244 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
245 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
246 for (
int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
248 mappedPointersToQIn[patchIndex],
253 eigenvalueEnumerator,
254 rawPointerToCellCentre[patchIndex],
255 rawPointerToCellSize[patchIndex],
260 mappedPointersToQOut[patchIndex],
273 if constexpr (EvaluateFlux) {
275#pragma omp parallel for simd collapse(2)
276 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
277 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; x++) {
278 loopbodies::computeFlux<SolverType>(
279 mappedPointersToQIn[patchIndex],
281 rawPointerToCellCentre[patchIndex],
282 rawPointerToCellSize[patchIndex],
294#pragma omp parallel for simd collapse(2)
295 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; y++) {
296 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
297 loopbodies::computeFlux<SolverType>(
298 mappedPointersToQIn[patchIndex],
300 rawPointerToCellCentre[patchIndex],
301 rawPointerToCellSize[patchIndex],
313#pragma omp parallel for simd collapse(3)
314 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
315 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
316 for (
int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
322 rawPointerToCellCentre[patchIndex],
323 rawPointerToCellSize[patchIndex],
328 mappedPointersToQOut[patchIndex],
335#pragma omp parallel for simd collapse(3)
336 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
337 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
338 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; x++) {
339 loopbodies::computeFlux<SolverType>(
340 mappedPointersToQIn[patchIndex],
342 rawPointerToCellCentre[patchIndex],
343 rawPointerToCellSize[patchIndex],
356#pragma omp parallel for simd collapse(3)
357 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
358 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; y++) {
359 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
360 loopbodies::computeFlux<SolverType>(
361 mappedPointersToQIn[patchIndex],
363 rawPointerToCellCentre[patchIndex],
364 rawPointerToCellSize[patchIndex],
377#pragma omp parallel for simd collapse(3)
378 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize; z++) {
379 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
380 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
381 loopbodies::computeFlux<SolverType>(
382 mappedPointersToQIn[patchIndex],
384 rawPointerToCellCentre[patchIndex],
385 rawPointerToCellSize[patchIndex],
398#pragma omp parallel for simd collapse(4)
399 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
400 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
401 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
402 for (
int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
408 rawPointerToCellCentre[patchIndex],
409 rawPointerToCellSize[patchIndex],
414 mappedPointersToQOut[patchIndex],
424 if constexpr (EvaluateNonconservativeProduct) {
426#pragma omp parallel for simd collapse(2)
427 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
428 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize - 1; x++) {
429 loopbodies::computeNonconservativeFlux<SolverType>(
430 mappedPointersToQIn[patchIndex],
432 rawPointerToCellCentre[patchIndex],
433 rawPointerToCellSize[patchIndex],
439 tempNonconservativeProductX,
445#pragma omp parallel for simd collapse(2)
446 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize - 1; y++) {
447 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
448 loopbodies::computeNonconservativeFlux<SolverType>(
449 mappedPointersToQIn[patchIndex],
451 rawPointerToCellCentre[patchIndex],
452 rawPointerToCellSize[patchIndex],
458 tempNonconservativeProductY,
464#pragma omp parallel for simd collapse(3)
465 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
466 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
467 for (
int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
469 tempNonconservativeProductX,
470 tempNonconservativeProductY,
471 tempNonconservativeProductZ,
473 rawPointerToCellCentre[patchIndex],
474 rawPointerToCellSize[patchIndex],
479 mappedPointersToQOut[patchIndex],
486#pragma omp parallel for simd collapse(3)
487 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
488 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
489 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize - 1; x++) {
490 loopbodies::computeNonconservativeFlux<SolverType>(
491 mappedPointersToQIn[patchIndex],
493 rawPointerToCellCentre[patchIndex],
494 rawPointerToCellSize[patchIndex],
500 tempNonconservativeProductX,
507#pragma omp parallel for simd collapse(3)
508 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
509 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize - 1; y++) {
510 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
511 loopbodies::computeNonconservativeFlux<SolverType>(
512 mappedPointersToQIn[patchIndex],
514 rawPointerToCellCentre[patchIndex],
515 rawPointerToCellSize[patchIndex],
521 tempNonconservativeProductY,
528#pragma omp parallel for simd collapse(3)
529 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch + 2 *
HaloSize - 1; z++) {
530 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
531 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
532 loopbodies::computeNonconservativeFlux<SolverType>(
533 mappedPointersToQIn[patchIndex],
535 rawPointerToCellCentre[patchIndex],
536 rawPointerToCellSize[patchIndex],
542 tempNonconservativeProductZ,
549#pragma omp parallel for simd collapse(4)
550 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
551 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
552 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
553 for (
int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
555 tempNonconservativeProductX,
556 tempNonconservativeProductY,
557 tempNonconservativeProductZ,
559 rawPointerToCellCentre[patchIndex],
560 rawPointerToCellSize[patchIndex],
565 mappedPointersToQOut[patchIndex],
575 if constexpr (EvaluateMaximumEigenvalueAfterTimeStep) {
576 double newMaxEigenvalue = 0.0;
578#pragma omp parallel for simd collapse(2) reduction(max : newMaxEigenvalue)
579 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
580 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
581 newMaxEigenvalue = std::max(
583 loopbodies::reduceMaxEigenvalue<SolverType>(
584 mappedPointersToQOut[patchIndex],
586 rawPointerToCellCentre[patchIndex],
587 rawPointerToCellSize[patchIndex],
597#pragma omp parallel for simd collapse(3) reduction(max : newMaxEigenvalue)
598 for (
int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
599 for (
int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
600 for (
int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
601 newMaxEigenvalue = std::max(
603 loopbodies::reduceMaxEigenvalue<SolverType>(
604 mappedPointersToQOut[patchIndex],
606 rawPointerToCellCentre[patchIndex],
607 rawPointerToCellSize[patchIndex],
618 maxEigenvalue[patchIndex] = newMaxEigenvalue;
627 int NumberOfVolumesPerAxisInPatch,
629 int NumberOfUnknowns,
630 int NumberOfAuxiliaryVariables,
632 bool EvaluateNonconservativeProduct,
634 bool EvaluateMaximumEigenvalueAfterTimeStep,
635 class TempDataEnumeratorType>
640 logTraceIn(
"timeStepWithRusanovPatchwiseUSMStateless()");
644 const TempDataEnumeratorType fluxEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, NumberOfUnknowns, 0);
645 const TempDataEnumeratorType ncpEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch + 1,
HaloSize, NumberOfUnknowns, 0);
646 const TempDataEnumeratorType eigenvalueEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, 1, 0);
648 double* tempFluxX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(fluxEnumerator.size(), targetDevice);
649 double* tempFluxY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(fluxEnumerator.size(), targetDevice);
650 double* tempFluxZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(Dimensions == 3 ? fluxEnumerator.size() : 1, targetDevice);
651 double* tempNonconservativeProductX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(ncpEnumerator.size(), targetDevice);
652 double* tempNonconservativeProductY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(ncpEnumerator.size(), targetDevice);
653 double* tempNonconservativeProductZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(Dimensions == 3 ? ncpEnumerator.size() : 1, targetDevice);
654 double* tempEigenvalueX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(eigenvalueEnumerator.size(), targetDevice);
655 double* tempEigenvalueY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(eigenvalueEnumerator.size(), targetDevice);
656 double* tempEigenvalueZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(Dimensions == 3 ? eigenvalueEnumerator.size() : 1, targetDevice);
659 double** mappedPointersToQIn = patchData.
QIn;
660 double** mappedPointersToQOut = patchData.
QOut;
662 double* rawPointerToCellSize = patchData.
cellSize[0].
data();
663 double* t = patchData.
t;
664 double* dt = patchData.
dt;
667 tarch::timing::Watch watch(
"exahype2::fv::rusanov::omp",
"timeStepWithRusanovPatchwiseUSMStateless",
false,
true);
668 internal::timeStepWithRusanovPatchwiseStateless<
670 NumberOfVolumesPerAxisInPatch,
673 NumberOfAuxiliaryVariables,
675 EvaluateNonconservativeProduct,
677 EvaluateMaximumEigenvalueAfterTimeStep,
678 TempDataEnumeratorType>(
682 rawPointerToCellCentre,
683 rawPointerToCellSize,
687 mappedPointersToQOut,
691 tempNonconservativeProductX,
692 tempNonconservativeProductY,
693 tempNonconservativeProductZ,
699 measurement.
setValue(watch.getCalendarTime());
701 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxX, targetDevice);
702 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxY, targetDevice);
703 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxZ, targetDevice);
704 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductX, targetDevice);
705 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductY, targetDevice);
706 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductZ, targetDevice);
707 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueX, targetDevice);
708 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueY, targetDevice);
709 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueZ, targetDevice);
711 logTraceOut(
"timeStepWithRusanovPatchwiseUSMStateless()");
717 int NumberOfVolumesPerAxisInPatch,
719 int NumberOfUnknowns,
720 int NumberOfAuxiliaryVariables,
722 bool EvaluateNonconservativeProduct,
724 bool EvaluateMaximumEigenvalueAfterTimeStep,
725 class TempDataEnumeratorType>
730 logTraceIn(
"timeStepWithRusanovPatchwiseUSMStateless()");
734 const TempDataEnumeratorType fluxEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, NumberOfUnknowns, 0);
735 const TempDataEnumeratorType ncpEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch + 1,
HaloSize, NumberOfUnknowns, 0);
736 const TempDataEnumeratorType eigenvalueEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, 1, 0);
738 double* tempFluxX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(fluxEnumerator.size(), targetDevice);
739 double* tempFluxY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(fluxEnumerator.size(), targetDevice);
740 double* tempFluxZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(Dimensions == 3 ? fluxEnumerator.size() : 1, targetDevice);
741 double* tempNonconservativeProductX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(ncpEnumerator.size(), targetDevice);
742 double* tempNonconservativeProductY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(ncpEnumerator.size(), targetDevice);
743 double* tempNonconservativeProductZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(Dimensions == 3 ? ncpEnumerator.size() : 1, targetDevice);
744 double* tempEigenvalueX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(eigenvalueEnumerator.size(), targetDevice);
745 double* tempEigenvalueY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(eigenvalueEnumerator.size(), targetDevice);
746 double* tempEigenvalueZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<
double>(Dimensions == 3 ? eigenvalueEnumerator.size() : 1, targetDevice);
749 double** mappedPointersToQIn = patchData.
QIn;
750 double** mappedPointersToQOut = patchData.
QOut;
752 double* rawPointerToCellSize = patchData.
cellSize[0].
data();
753 double* t = patchData.
t;
754 double* dt = patchData.
dt;
757 internal::timeStepWithRusanovPatchwiseStateless<
759 NumberOfVolumesPerAxisInPatch,
762 NumberOfAuxiliaryVariables,
764 EvaluateNonconservativeProduct,
766 EvaluateMaximumEigenvalueAfterTimeStep,
767 TempDataEnumeratorType>(
771 rawPointerToCellCentre,
772 rawPointerToCellSize,
776 mappedPointersToQOut,
780 tempNonconservativeProductX,
781 tempNonconservativeProductY,
782 tempNonconservativeProductZ,
788 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxX, targetDevice);
789 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxY, targetDevice);
790 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxZ, targetDevice);
791 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductX, targetDevice);
792 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductY, targetDevice);
793 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductZ, targetDevice);
794 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueX, targetDevice);
795 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueY, targetDevice);
796 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueZ, targetDevice);
798 logTraceOut(
"timeStepWithRusanovPatchwiseUSMStateless()");
804 int NumberOfVolumesPerAxisInPatch,
806 int NumberOfUnknowns,
807 int NumberOfAuxiliaryVariables,
809 bool EvaluateNonconservativeProduct,
811 bool EvaluateMaximumEigenvalueAfterTimeStep,
812 class TempDataEnumeratorType>
817 logTraceIn(
"timeStepWithRusanovPatchwiseHeapStateless()");
821 const TempDataEnumeratorType fluxEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, NumberOfUnknowns, 0);
822 const TempDataEnumeratorType ncpEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch + 1,
HaloSize, NumberOfUnknowns, 0);
823 const TempDataEnumeratorType eigenvalueEnumerator(patchData.
numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, 1, 0);
825 double* tempFluxX =
new double[fluxEnumerator.size()];
826 double* tempFluxY =
new double[fluxEnumerator.size()];
827 double* tempFluxZ =
new double[fluxEnumerator.size()];
828 double* tempNonconservativeProductX =
new double[ncpEnumerator.size()];
829 double* tempNonconservativeProductY =
new double[ncpEnumerator.size()];
830 double* tempNonconservativeProductZ =
new double[ncpEnumerator.size()];
831 double* tempEigenvalueX =
new double[eigenvalueEnumerator.size()];
832 double* tempEigenvalueY =
new double[eigenvalueEnumerator.size()];
833 double* tempEigenvalueZ =
new double[eigenvalueEnumerator.size()];
835#pragma omp target enter data map(alloc : tempFluxX[0 : fluxEnumerator.size()]) device(targetDevice)
836#pragma omp target enter data map(alloc : tempFluxY[0 : fluxEnumerator.size()]) device(targetDevice)
837#pragma omp target enter data map(alloc : tempFluxZ[0 : fluxEnumerator.size()]) device(targetDevice)
838#pragma omp target enter data map(alloc : tempNonconservativeProductX[0 : ncpEnumerator.size()]) device(targetDevice)
839#pragma omp target enter data map(alloc : tempNonconservativeProductY[0 : ncpEnumerator.size()]) device(targetDevice)
840#pragma omp target enter data map(alloc : tempNonconservativeProductZ[0 : ncpEnumerator.size()]) device(targetDevice)
841#pragma omp target enter data map(alloc : tempEigenvalueX[0 : eigenvalueEnumerator.size()]) device(targetDevice)
842#pragma omp target enter data map(alloc : tempEigenvalueY[0 : eigenvalueEnumerator.size()]) device(targetDevice)
843#pragma omp target enter data map(alloc : tempEigenvalueZ[0 : eigenvalueEnumerator.size()]) device(targetDevice)
845 double** mappedPointersToQIn =
new double*[patchData.
numberOfCells];
846 double** mappedPointersToQOut =
new double*[patchData.
numberOfCells];
848 for (
int patchIndex = 0; patchIndex < patchData.
numberOfCells; patchIndex++) {
849 const double* currentQIn = patchData.
QIn[patchIndex];
850 double* currentQOut = patchData.
QOut[patchIndex];
851#pragma omp target enter data map(to : currentQIn[0 : QInEnumerator.size()]) device(targetDevice)
852#pragma omp target enter data map(alloc : currentQOut[0 : QOutEnumerator.size()]) device(targetDevice)
853 mappedPointersToQIn[patchIndex] =
static_cast<double*
>(omp_get_mapped_ptr(currentQIn, targetDevice));
854 mappedPointersToQOut[patchIndex] =
static_cast<double*
>(omp_get_mapped_ptr(currentQOut, targetDevice));
859 double* rawPointerToCellSize = patchData.
cellSize[0].
data();
860 double* t = patchData.
t;
861 double* dt = patchData.
dt;
864#pragma omp target enter data map(to : rawPointerToCellCentre[0 : numberOfCells * Dimensions]) device(targetDevice)
865#pragma omp target enter data map(to : rawPointerToCellSize[0 : numberOfCells * Dimensions]) device(targetDevice)
866#pragma omp target enter data map(to : mappedPointersToQIn[0 : numberOfCells]) device(targetDevice)
867#pragma omp target enter data map(to : mappedPointersToQOut[0 : numberOfCells]) device(targetDevice)
868#pragma omp target enter data map(to : t[0 : numberOfCells]) device(targetDevice)
869#pragma omp target enter data map(to : dt[0 : numberOfCells]) device(targetDevice)
870#pragma omp target enter data map(alloc : maxEigenvalue[0 : numberOfCells]) device(targetDevice)
872 tarch::timing::Watch watch(
"exahype2::fv::rusanov::omp",
"timeStepWithRusanovPatchwiseHeapStateless",
false,
true);
873 internal::timeStepWithRusanovPatchwiseStateless<
875 NumberOfVolumesPerAxisInPatch,
878 NumberOfAuxiliaryVariables,
880 EvaluateNonconservativeProduct,
882 EvaluateMaximumEigenvalueAfterTimeStep,
883 TempDataEnumeratorType>(
887 rawPointerToCellCentre,
888 rawPointerToCellSize,
892 mappedPointersToQOut,
896 tempNonconservativeProductX,
897 tempNonconservativeProductY,
898 tempNonconservativeProductZ,
904 measurement.
setValue(watch.getCalendarTime());
906#pragma omp target exit data map(delete : rawPointerToCellCentre[0 : numberOfCells * Dimensions]) device(targetDevice)
907#pragma omp target exit data map(delete : rawPointerToCellSize[0 : numberOfCells * Dimensions]) device(targetDevice)
908#pragma omp target exit data map(delete : mappedPointersToQIn[0 : numberOfCells]) device(targetDevice)
909#pragma omp target exit data map(delete : mappedPointersToQOut[0 : numberOfCells]) device(targetDevice)
910#pragma omp target exit data map(delete : t[0 : numberOfCells]) device(targetDevice)
911#pragma omp target exit data map(delete : dt[0 : numberOfCells]) device(targetDevice)
912#pragma omp target exit data map(from : maxEigenvalue[0 : numberOfCells]) device(targetDevice)
914#pragma omp target exit data map(delete : tempFluxX[0 : fluxEnumerator.size()]) device(targetDevice)
915#pragma omp target exit data map(delete : tempFluxY[0 : fluxEnumerator.size()]) device(targetDevice)
916#pragma omp target exit data map(delete : tempFluxZ[0 : fluxEnumerator.size()]) device(targetDevice)
917#pragma omp target exit data map(delete : tempNonconservativeProductX[0 : ncpEnumerator.size()]) device(targetDevice)
918#pragma omp target exit data map(delete : tempNonconservativeProductY[0 : ncpEnumerator.size()]) device(targetDevice)
919#pragma omp target exit data map(delete : tempNonconservativeProductZ[0 : ncpEnumerator.size()]) device(targetDevice)
920#pragma omp target exit data map(delete : tempEigenvalueX[0 : eigenvalueEnumerator.size()]) device(targetDevice)
921#pragma omp target exit data map(delete : tempEigenvalueY[0 : eigenvalueEnumerator.size()]) device(targetDevice)
922#pragma omp target exit data map(delete : tempEigenvalueZ[0 : eigenvalueEnumerator.size()]) device(targetDevice)
924 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
925 const double* currentQIn = patchData.
QIn[patchIndex];
926 double* currentQOut = patchData.
QOut[patchIndex];
927#pragma omp target exit data map(delete : currentQIn[0 : QInEnumerator.size()]) device(targetDevice)
928#pragma omp target exit data map(from : currentQOut[0 : QOutEnumerator.size()]) device(targetDevice)
931 delete[] mappedPointersToQIn;
932 delete[] mappedPointersToQOut;
934 if (tempFluxX !=
nullptr) {
937 if (tempFluxY !=
nullptr) {
940 if (tempFluxZ !=
nullptr) {
943 if (tempNonconservativeProductX !=
nullptr) {
944 delete[] tempNonconservativeProductX;
946 if (tempNonconservativeProductY !=
nullptr) {
947 delete[] tempNonconservativeProductY;
949 if (tempNonconservativeProductZ !=
nullptr) {
950 delete[] tempNonconservativeProductZ;
952 if (tempEigenvalueX !=
nullptr) {
953 delete[] tempEigenvalueX;
955 if (tempEigenvalueY !=
nullptr) {
956 delete[] tempEigenvalueY;
958 if (tempEigenvalueZ !=
nullptr) {
959 delete[] tempEigenvalueZ;
962 logTraceOut(
"timeStepWithRusanovPatchwiseHeapStateless()");
968 int NumberOfVolumesPerAxisInPatch,
970 int NumberOfUnknowns,
971 int NumberOfAuxiliaryVariables,
973 bool EvaluateNonconservativeProduct,
975 bool EvaluateMaximumEigenvalueAfterTimeStep,
976 class TempDataEnumeratorType>
981 logTraceIn(
"timeStepWithRusanovPatchwiseHeapStateless()");
983 const enumerator::AoSLexicographicEnumerator QInEnumerator(1, NumberOfVolumesPerAxisInPatch,
HaloSize, NumberOfUnknowns, NumberOfAuxiliaryVariables);
984 const enumerator::AoSLexicographicEnumerator QOutEnumerator(1, NumberOfVolumesPerAxisInPatch, 0, NumberOfUnknowns, NumberOfAuxiliaryVariables);
985 const TempDataEnumeratorType fluxEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, NumberOfUnknowns, 0);
986 const TempDataEnumeratorType ncpEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch + 1,
HaloSize, NumberOfUnknowns, 0);
987 const TempDataEnumeratorType eigenvalueEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch,
HaloSize, 1, 0);
989 double* tempFluxX =
new double[fluxEnumerator.size()];
990 double* tempFluxY =
new double[fluxEnumerator.size()];
991 double* tempFluxZ =
new double[fluxEnumerator.size()];
992 double* tempNonconservativeProductX =
new double[ncpEnumerator.size()];
993 double* tempNonconservativeProductY =
new double[ncpEnumerator.size()];
994 double* tempNonconservativeProductZ =
new double[ncpEnumerator.size()];
995 double* tempEigenvalueX =
new double[eigenvalueEnumerator.size()];
996 double* tempEigenvalueY =
new double[eigenvalueEnumerator.size()];
997 double* tempEigenvalueZ =
new double[eigenvalueEnumerator.size()];
999#pragma omp target enter data map(alloc : tempFluxX[0 : fluxEnumerator.size()]) device(targetDevice)
1000#pragma omp target enter data map(alloc : tempFluxY[0 : fluxEnumerator.size()]) device(targetDevice)
1001#pragma omp target enter data map(alloc : tempFluxZ[0 : fluxEnumerator.size()]) device(targetDevice)
1002#pragma omp target enter data map(alloc : tempNonconservativeProductX[0 : ncpEnumerator.size()]) device(targetDevice)
1003#pragma omp target enter data map(alloc : tempNonconservativeProductY[0 : ncpEnumerator.size()]) device(targetDevice)
1004#pragma omp target enter data map(alloc : tempNonconservativeProductZ[0 : ncpEnumerator.size()]) device(targetDevice)
1005#pragma omp target enter data map(alloc : tempEigenvalueX[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1006#pragma omp target enter data map(alloc : tempEigenvalueY[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1007#pragma omp target enter data map(alloc : tempEigenvalueZ[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1009 double** mappedPointersToQIn =
new double*[patchData.numberOfCells];
1010 double** mappedPointersToQOut =
new double*[patchData.numberOfCells];
1012 for (
int patchIndex = 0; patchIndex < patchData.numberOfCells; patchIndex++) {
1013 const double* currentQIn = patchData.QIn[patchIndex];
1014 double* currentQOut = patchData.QOut[patchIndex];
1015#pragma omp target enter data map(to : currentQIn[0 : QInEnumerator.size()]) device(targetDevice)
1016#pragma omp target enter data map(alloc : currentQOut[0 : QOutEnumerator.size()]) device(targetDevice)
1017 mappedPointersToQIn[patchIndex] =
static_cast<double*
>(omp_get_mapped_ptr(currentQIn, targetDevice));
1018 mappedPointersToQOut[patchIndex] =
static_cast<double*
>(omp_get_mapped_ptr(currentQOut, targetDevice));
1021 const int numberOfCells = patchData.numberOfCells;
1022 double* rawPointerToCellCentre = patchData.cellCentre[0].data();
1023 double* rawPointerToCellSize = patchData.cellSize[0].data();
1024 double*
t = patchData.t;
1025 double*
dt = patchData.dt;
1028#pragma omp target enter data map(to : rawPointerToCellCentre[0 : numberOfCells * Dimensions]) device(targetDevice)
1029#pragma omp target enter data map(to : rawPointerToCellSize[0 : numberOfCells * Dimensions]) device(targetDevice)
1030#pragma omp target enter data map(to : mappedPointersToQIn[0 : numberOfCells]) device(targetDevice)
1031#pragma omp target enter data map(to : mappedPointersToQOut[0 : numberOfCells]) device(targetDevice)
1032#pragma omp target enter data map(to : t[0 : numberOfCells]) device(targetDevice)
1033#pragma omp target enter data map(to : dt[0 : numberOfCells]) device(targetDevice)
1034#pragma omp target enter data map(alloc : maxEigenvalue[0 : numberOfCells]) device(targetDevice)
1036 internal::timeStepWithRusanovPatchwiseStateless<
1038 NumberOfVolumesPerAxisInPatch,
1041 NumberOfAuxiliaryVariables,
1043 EvaluateNonconservativeProduct,
1045 EvaluateMaximumEigenvalueAfterTimeStep,
1046 TempDataEnumeratorType>(
1049 mappedPointersToQIn,
1050 rawPointerToCellCentre,
1051 rawPointerToCellSize,
1055 mappedPointersToQOut,
1059 tempNonconservativeProductX,
1060 tempNonconservativeProductY,
1061 tempNonconservativeProductZ,
1067#pragma omp target exit data map(delete : rawPointerToCellCentre[0 : numberOfCells * Dimensions]) device(targetDevice)
1068#pragma omp target exit data map(delete : rawPointerToCellSize[0 : numberOfCells * Dimensions]) device(targetDevice)
1069#pragma omp target exit data map(delete : mappedPointersToQIn[0 : numberOfCells]) device(targetDevice)
1070#pragma omp target exit data map(delete : mappedPointersToQOut[0 : numberOfCells]) device(targetDevice)
1071#pragma omp target exit data map(delete : t[0 : numberOfCells]) device(targetDevice)
1072#pragma omp target exit data map(delete : dt[0 : numberOfCells]) device(targetDevice)
1073#pragma omp target exit data map(from : maxEigenvalue[0 : numberOfCells]) device(targetDevice)
1075#pragma omp target exit data map(delete : tempFluxX[0 : fluxEnumerator.size()]) device(targetDevice)
1076#pragma omp target exit data map(delete : tempFluxY[0 : fluxEnumerator.size()]) device(targetDevice)
1077#pragma omp target exit data map(delete : tempFluxZ[0 : fluxEnumerator.size()]) device(targetDevice)
1078#pragma omp target exit data map(delete : tempNonconservativeProductX[0 : ncpEnumerator.size()]) device(targetDevice)
1079#pragma omp target exit data map(delete : tempNonconservativeProductY[0 : ncpEnumerator.size()]) device(targetDevice)
1080#pragma omp target exit data map(delete : tempNonconservativeProductZ[0 : ncpEnumerator.size()]) device(targetDevice)
1081#pragma omp target exit data map(delete : tempEigenvalueX[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1082#pragma omp target exit data map(delete : tempEigenvalueY[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1083#pragma omp target exit data map(delete : tempEigenvalueZ[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1085 for (
int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
1086 const double* currentQIn = patchData.QIn[patchIndex];
1087 double* currentQOut = patchData.QOut[patchIndex];
1088#pragma omp target exit data map(delete : currentQIn[0 : QInEnumerator.size()]) device(targetDevice)
1089#pragma omp target exit data map(from : currentQOut[0 : QOutEnumerator.size()]) device(targetDevice)
1092 delete[] mappedPointersToQIn;
1093 delete[] mappedPointersToQOut;
1095 if (tempFluxX !=
nullptr) {
1098 if (tempFluxY !=
nullptr) {
1101 if (tempFluxZ !=
nullptr) {
1104 if (tempNonconservativeProductX !=
nullptr) {
1105 delete[] tempNonconservativeProductX;
1107 if (tempNonconservativeProductY !=
nullptr) {
1108 delete[] tempNonconservativeProductY;
1110 if (tempNonconservativeProductZ !=
nullptr) {
1111 delete[] tempNonconservativeProductZ;
1113 if (tempEigenvalueX !=
nullptr) {
1114 delete[] tempEigenvalueX;
1116 if (tempEigenvalueY !=
nullptr) {
1117 delete[] tempEigenvalueY;
1119 if (tempEigenvalueZ !=
nullptr) {
1120 delete[] tempEigenvalueZ;
1123 logTraceOut(
"timeStepWithRusanovPatchwiseHeapStateless()");
static constexpr int HaloSize
#define KeywordToAvoidDuplicateSymbolsForInlinedFunctions
#define logTraceOut(methodName)
#define logTraceIn(methodName)
tarch::logging::Log _log("::")
void setValue(const double &value)
Set the value.
A simple class that has to be included to measure the clock ticks required for an operation.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions double maxEigenvalue(const double *const Q, int normal, const double CCZ4e, const double CCZ4ds, const double CCZ4GLMc, const double CCZ4GLMd) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions GPUCallableInlineMethod void updateSolutionWithNonconservativeFlux(const double *__restrict__ ncpX, const double *__restrict__ ncpY, const double *__restrict__ ncpZ, const NCPFaceEnumeratorType &ncpEnumerator, const ::tarch::la::Vector< Dimensions, double > &patchCentre, const ::tarch::la::Vector< Dimensions, double > &patchSize, int patchIndex, const ::tarch::la::Vector< Dimensions, int > &volumeIndex, int unknown, double dt, double *__restrict__ QOut, const QOutEnumeratorType &QOutEnumerator) InlineMethod
Add the non-conservative flux contributions to one volume.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions GPUCallableInlineMethod void copySolution(const double *__restrict__ QIn, const QInEnumeratorType &QInEnumerator, int patchIndex, const ::tarch::la::Vector< Dimensions, int > &volumeIndex, int unknown, double *__restrict__ QOut, const QOutEnumeratorType &QOutEnumerator) InlineMethod
Copy solution from QIn to QOut.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions GPUCallableInlineMethod void updateSolutionWithFlux(const double *__restrict__ tempFluxX, const double *__restrict__ tempFluxY, const double *__restrict__ tempFluxZ, const FluxEnumeratorType &fluxEnumerator, const ::tarch::la::Vector< Dimensions, double > &patchCentre, const ::tarch::la::Vector< Dimensions, double > &patchSize, int patchIndex, const ::tarch::la::Vector< Dimensions, int > &volumeIndex, int unknown, double dt, double *__restrict__ QOut, const QOutEnumeratorType &QOutEnumerator) InlineMethod
Update one volume with the flux contribution.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions GPUCallableInlineMethod void updateSolutionWithEigenvalueDamping(const double *__restrict__ QIn, const QInEnumeratorType &QInEnumerator, const double *__restrict__ tempMaxEigenvalueX, const double *__restrict__ tempMaxEigenvalueY, const double *__restrict__ tempMaxEigenvalueZ, const MaxEigenvalueEnumeratorType &eigenvalueEnumerator, const ::tarch::la::Vector< Dimensions, double > &patchCentre, const ::tarch::la::Vector< Dimensions, double > &patchSize, int patchIndex, const ::tarch::la::Vector< Dimensions, int > &volumeIndex, int unknown, double dt, double *__restrict__ QOut, const QOutEnumeratorType &QOutEnumerator) InlineMethod
Uses the eigenvalues to damp the solution update.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseStateless(int targetDevice, int numberOfCells, double **mappedPointersToQIn, const double *rawPointerToCellCentre, const double *rawPointerToCellSize, const double *t, const double *dt, double *maxEigenvalue, double **mappedPointersToQOut, double *tempFluxX, double *tempFluxY, double *tempFluxZ, double *tempNonconservativeProductX, double *tempNonconservativeProductY, double *tempNonconservativeProductZ, double *tempEigenvalueX, double *tempEigenvalueY, double *tempEigenvalueZ) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseUSMStateless(int targetDevice, CellData< double, double > &patchData, tarch::timing::Measurement &measurement) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseHeapStateless(int targetDevice, CellData< double, double > &patchData, tarch::timing::Measurement &measurement) InlineMethod
The name patchwise is a little bit irritating in a GPU context.
auto volumeIndex(Args... args)
Representation of a number of cells which contains all information that's required to process the sto...
outType ** QOut
Out values.
inType ** QIn
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
const int numberOfCells
As we store data as SoA, we have to know how big the actual arrays are.
double * maxEigenvalue
Out values.
tarch::la::Vector< Dimensions, double > * cellCentre
tarch::la::Vector< Dimensions, double > * cellSize
Array of struct enumerator.
Scalar * data()
This routine returns a pointer to the first data element.
#define InlineMethod
This is the marker that is to be used after the argument list of a function declaration.