Peano
Loading...
Searching...
No Matches
PatchwiseStateless.cpph
Go to the documentation of this file.
1// This file is part of the ExaHyPE2 project. For conditions of distribution and
2// use, please see the copyright notice at www.peano-framework.org
3
5 template <
6 class SolverType,
7 int NumberOfVolumesPerAxisInPatch,
8 int HaloSize,
9 int NumberOfUnknowns,
10 int NumberOfAuxiliaryVariables,
11 bool EvaluateFlux,
12 bool EvaluateNonconservativeProduct,
13 bool EvaluateSource,
14 bool EvaluateMaximumEigenvalueAfterTimeStep,
15 class TempDataEnumeratorType>
17 int targetDevice,
18 int numberOfCells,
19 double** mappedPointersToQIn,
20 const double* rawPointerToCellCentre,
21 const double* rawPointerToCellSize,
22 const double* t,
23 const double* dt,
24 double* maxEigenvalue,
25 double** mappedPointersToQOut,
26 double* tempFluxX,
27 double* tempFluxY,
28 double* tempFluxZ,
29 double* tempNonconservativeProductX,
30 double* tempNonconservativeProductY,
31 double* tempNonconservativeProductZ,
32 double* tempEigenvalueX,
33 double* tempEigenvalueY,
34 double* tempEigenvalueZ
35 ) InlineMethod {
36 const enumerator::AoSLexicographicEnumerator QInEnumerator(1, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, NumberOfAuxiliaryVariables);
37 const enumerator::AoSLexicographicEnumerator QOutEnumerator(1, NumberOfVolumesPerAxisInPatch, 0, NumberOfUnknowns, NumberOfAuxiliaryVariables);
38 const TempDataEnumeratorType fluxEnumerator(numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, 0);
39 const TempDataEnumeratorType ncpEnumerator(numberOfCells, NumberOfVolumesPerAxisInPatch + 1, HaloSize, NumberOfUnknowns, 0);
40 const TempDataEnumeratorType eigenvalueEnumerator(numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, 1, 0);
41
42#pragma omp target teams distribute device(targetDevice) // nowait
43 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
44 // ====================================================
45 // Copy solution over and evaluate source (if required)
46 // ====================================================
47 if constexpr (EvaluateSource) {
48#if Dimensions == 2
49#pragma omp parallel for simd collapse(2)
50 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
51 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
52 loopbodies::copySolutionAndAddSourceTerm<SolverType>(
53 mappedPointersToQIn[patchIndex],
54 QInEnumerator,
55 rawPointerToCellCentre[patchIndex],
56 rawPointerToCellSize[patchIndex],
57 patchIndex,
58 volumeIndex(x, y),
59 t[patchIndex],
60 dt[patchIndex],
61 mappedPointersToQOut[patchIndex],
62 QOutEnumerator
63 );
64 }
65 }
66#else
67#pragma omp parallel for simd collapse(3)
68 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
69 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
70 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
71 loopbodies::copySolutionAndAddSourceTerm<SolverType>(
72 mappedPointersToQIn[patchIndex],
73 QInEnumerator,
74 rawPointerToCellCentre[patchIndex],
75 rawPointerToCellSize[patchIndex],
76 patchIndex,
77 volumeIndex(x, y, z),
78 t[patchIndex],
79 dt[patchIndex],
80 mappedPointersToQOut[patchIndex],
81 QOutEnumerator
82 );
83 }
84 }
85 }
86#endif
87 } else {
88#if Dimensions == 2
89#pragma omp parallel for simd collapse(3)
90 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
91 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
92 for (int unknown = 0; unknown < NumberOfUnknowns + NumberOfAuxiliaryVariables; unknown++) {
93 loopbodies::copySolution(mappedPointersToQIn[patchIndex], QInEnumerator, patchIndex, volumeIndex(x, y), unknown, mappedPointersToQOut[patchIndex], QOutEnumerator);
94 }
95 }
96 }
97#else
98#pragma omp parallel for simd collapse(4)
99 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
100 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
101 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
102 for (int unknown = 0; unknown < NumberOfUnknowns + NumberOfAuxiliaryVariables; unknown++) {
104 mappedPointersToQIn[patchIndex], QInEnumerator, patchIndex, volumeIndex(x, y, z), unknown, mappedPointersToQOut[patchIndex], QOutEnumerator
105 );
106 }
107 }
108 }
109 }
110#endif
111 }
112
113 // ====================================================
114 // Compute damping due to max eigenvalue
115 // ====================================================
116#if Dimensions == 2
117#pragma omp parallel for simd collapse(2)
118 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
119 for (int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; x++) {
120 loopbodies::computeMaxEigenvalue<SolverType>(
121 mappedPointersToQIn[patchIndex],
122 QInEnumerator,
123 rawPointerToCellCentre[patchIndex],
124 rawPointerToCellSize[patchIndex],
125 patchIndex,
126 volumeIndex(x - HaloSize, y),
127 t[patchIndex],
128 dt[patchIndex],
129 0,
130 tempEigenvalueX,
131 eigenvalueEnumerator
132 );
133 }
134 }
135
136#pragma omp parallel for simd collapse(2)
137 for (int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; y++) {
138 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
139 loopbodies::computeMaxEigenvalue<SolverType>(
140 mappedPointersToQIn[patchIndex],
141 QInEnumerator,
142 rawPointerToCellCentre[patchIndex],
143 rawPointerToCellSize[patchIndex],
144 patchIndex,
145 volumeIndex(x, y - HaloSize),
146 t[patchIndex],
147 dt[patchIndex],
148 1,
149 tempEigenvalueY,
150 eigenvalueEnumerator
151 );
152 }
153 }
154
155#pragma omp parallel for simd collapse(3)
156 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
157 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
158 for (int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
160 mappedPointersToQIn[patchIndex],
161 QInEnumerator,
162 tempEigenvalueX,
163 tempEigenvalueY,
164 tempEigenvalueZ,
165 eigenvalueEnumerator,
166 rawPointerToCellCentre[patchIndex],
167 rawPointerToCellSize[patchIndex],
168 patchIndex,
169 volumeIndex(x, y),
170 unknown,
171 dt[patchIndex],
172 mappedPointersToQOut[patchIndex],
173 QOutEnumerator
174 );
175 }
176 }
177 }
178#else
179#pragma omp parallel for simd collapse(3)
180 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
181 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
182 for (int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; x++) {
183 loopbodies::computeMaxEigenvalue<SolverType>(
184 mappedPointersToQIn[patchIndex],
185 QInEnumerator,
186 rawPointerToCellCentre[patchIndex],
187 rawPointerToCellSize[patchIndex],
188 patchIndex,
189 volumeIndex(x - HaloSize, y, z),
190 t[patchIndex],
191 dt[patchIndex],
192 0,
193 tempEigenvalueX,
194 eigenvalueEnumerator
195 );
196 }
197 }
198 }
199
200#pragma omp parallel for simd collapse(3)
201 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
202 for (int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; y++) {
203 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
204 loopbodies::computeMaxEigenvalue<SolverType>(
205 mappedPointersToQIn[patchIndex],
206 QInEnumerator,
207 rawPointerToCellCentre[patchIndex],
208 rawPointerToCellSize[patchIndex],
209 patchIndex,
210 volumeIndex(x, y - HaloSize, z),
211 t[patchIndex],
212 dt[patchIndex],
213 1,
214 tempEigenvalueY,
215 eigenvalueEnumerator
216 );
217 }
218 }
219 }
220
221#pragma omp parallel for simd collapse(3)
222 for (int z = 0; z < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; z++) {
223 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
224 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
225 loopbodies::computeMaxEigenvalue<SolverType>(
226 mappedPointersToQIn[patchIndex],
227 QInEnumerator,
228 rawPointerToCellCentre[patchIndex],
229 rawPointerToCellSize[patchIndex],
230 patchIndex,
231 volumeIndex(x, y, z - HaloSize),
232 t[patchIndex],
233 dt[patchIndex],
234 2,
235 tempEigenvalueZ,
236 eigenvalueEnumerator
237 );
238 }
239 }
240 }
241
242#pragma omp parallel for simd collapse(4)
243 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
244 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
245 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
246 for (int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
248 mappedPointersToQIn[patchIndex],
249 QInEnumerator,
250 tempEigenvalueX,
251 tempEigenvalueY,
252 tempEigenvalueZ,
253 eigenvalueEnumerator,
254 rawPointerToCellCentre[patchIndex],
255 rawPointerToCellSize[patchIndex],
256 patchIndex,
257 volumeIndex(x, y, z),
258 unknown,
259 dt[patchIndex],
260 mappedPointersToQOut[patchIndex],
261 QOutEnumerator
262 );
263 }
264 }
265 }
266 }
267#endif
268
269
270 // ====================================================
271 // Normal (conservative) flux
272 // ====================================================
273 if constexpr (EvaluateFlux) {
274#if Dimensions == 2
275#pragma omp parallel for simd collapse(2)
276 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
277 for (int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; x++) {
278 loopbodies::computeFlux<SolverType>(
279 mappedPointersToQIn[patchIndex],
280 QInEnumerator,
281 rawPointerToCellCentre[patchIndex],
282 rawPointerToCellSize[patchIndex],
283 patchIndex,
284 volumeIndex(x - HaloSize, y),
285 t[patchIndex],
286 dt[patchIndex],
287 0, // normal
288 tempFluxX,
289 fluxEnumerator
290 );
291 }
292 }
293
294#pragma omp parallel for simd collapse(2)
295 for (int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; y++) {
296 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
297 loopbodies::computeFlux<SolverType>(
298 mappedPointersToQIn[patchIndex],
299 QInEnumerator,
300 rawPointerToCellCentre[patchIndex],
301 rawPointerToCellSize[patchIndex],
302 patchIndex,
303 volumeIndex(x, y - HaloSize),
304 t[patchIndex],
305 dt[patchIndex],
306 1, // normal
307 tempFluxY,
308 fluxEnumerator
309 );
310 }
311 }
312
313#pragma omp parallel for simd collapse(3)
314 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
315 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
316 for (int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
318 tempFluxX,
319 tempFluxY,
320 tempFluxZ,
321 fluxEnumerator,
322 rawPointerToCellCentre[patchIndex],
323 rawPointerToCellSize[patchIndex],
324 patchIndex,
325 volumeIndex(x, y),
326 unknown,
327 dt[patchIndex],
328 mappedPointersToQOut[patchIndex],
329 QOutEnumerator
330 );
331 }
332 }
333 }
334#else
335#pragma omp parallel for simd collapse(3)
336 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
337 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
338 for (int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; x++) {
339 loopbodies::computeFlux<SolverType>(
340 mappedPointersToQIn[patchIndex],
341 QInEnumerator,
342 rawPointerToCellCentre[patchIndex],
343 rawPointerToCellSize[patchIndex],
344 patchIndex,
345 volumeIndex(x - HaloSize, y, z),
346 t[patchIndex],
347 dt[patchIndex],
348 0, // normal
349 tempFluxX,
350 fluxEnumerator
351 );
352 }
353 }
354 }
355
356#pragma omp parallel for simd collapse(3)
357 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
358 for (int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; y++) {
359 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
360 loopbodies::computeFlux<SolverType>(
361 mappedPointersToQIn[patchIndex],
362 QInEnumerator,
363 rawPointerToCellCentre[patchIndex],
364 rawPointerToCellSize[patchIndex],
365 patchIndex,
366 volumeIndex(x, y - HaloSize, z),
367 t[patchIndex],
368 dt[patchIndex],
369 1, // normal
370 tempFluxY,
371 fluxEnumerator
372 );
373 }
374 }
375 }
376
377#pragma omp parallel for simd collapse(3)
378 for (int z = 0; z < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; z++) {
379 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
380 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
381 loopbodies::computeFlux<SolverType>(
382 mappedPointersToQIn[patchIndex],
383 QInEnumerator,
384 rawPointerToCellCentre[patchIndex],
385 rawPointerToCellSize[patchIndex],
386 patchIndex,
387 volumeIndex(x, y, z - HaloSize),
388 t[patchIndex],
389 dt[patchIndex],
390 2, // normal
391 tempFluxZ,
392 fluxEnumerator
393 );
394 }
395 }
396 }
397
398#pragma omp parallel for simd collapse(4)
399 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
400 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
401 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
402 for (int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
404 tempFluxX,
405 tempFluxY,
406 tempFluxZ,
407 fluxEnumerator,
408 rawPointerToCellCentre[patchIndex],
409 rawPointerToCellSize[patchIndex],
410 patchIndex,
411 volumeIndex(x, y, z),
412 unknown,
413 dt[patchIndex],
414 mappedPointersToQOut[patchIndex],
415 QOutEnumerator
416 );
417 }
418 }
419 }
420 }
421#endif
422 }
423
424 if constexpr (EvaluateNonconservativeProduct) {
425#if Dimensions == 2
426#pragma omp parallel for simd collapse(2)
427 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
428 for (int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 * HaloSize - 1; x++) {
429 loopbodies::computeNonconservativeFlux<SolverType>(
430 mappedPointersToQIn[patchIndex],
431 QInEnumerator,
432 rawPointerToCellCentre[patchIndex],
433 rawPointerToCellSize[patchIndex],
434 patchIndex,
435 volumeIndex(x - HaloSize, y),
436 t[patchIndex],
437 dt[patchIndex],
438 0, // normal
439 tempNonconservativeProductX,
440 ncpEnumerator
441 );
442 }
443 }
444
445#pragma omp parallel for simd collapse(2)
446 for (int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 * HaloSize - 1; y++) {
447 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
448 loopbodies::computeNonconservativeFlux<SolverType>(
449 mappedPointersToQIn[patchIndex],
450 QInEnumerator,
451 rawPointerToCellCentre[patchIndex],
452 rawPointerToCellSize[patchIndex],
453 patchIndex,
454 volumeIndex(x, y - HaloSize),
455 t[patchIndex],
456 dt[patchIndex],
457 1, // normal
458 tempNonconservativeProductY,
459 ncpEnumerator
460 );
461 }
462 }
463
464#pragma omp parallel for simd collapse(3)
465 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
466 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
467 for (int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
469 tempNonconservativeProductX,
470 tempNonconservativeProductY,
471 tempNonconservativeProductZ,
472 ncpEnumerator,
473 rawPointerToCellCentre[patchIndex],
474 rawPointerToCellSize[patchIndex],
475 patchIndex,
476 volumeIndex(x, y),
477 unknown,
478 dt[patchIndex],
479 mappedPointersToQOut[patchIndex],
480 QOutEnumerator
481 );
482 }
483 }
484 }
485#else
486#pragma omp parallel for simd collapse(3)
487 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
488 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
489 for (int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 * HaloSize - 1; x++) {
490 loopbodies::computeNonconservativeFlux<SolverType>(
491 mappedPointersToQIn[patchIndex],
492 QInEnumerator,
493 rawPointerToCellCentre[patchIndex],
494 rawPointerToCellSize[patchIndex],
495 patchIndex,
496 volumeIndex(x - HaloSize, y, z),
497 t[patchIndex],
498 dt[patchIndex],
499 0, // normal
500 tempNonconservativeProductX,
501 ncpEnumerator
502 );
503 }
504 }
505 }
506
507#pragma omp parallel for simd collapse(3)
508 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
509 for (int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 * HaloSize - 1; y++) {
510 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
511 loopbodies::computeNonconservativeFlux<SolverType>(
512 mappedPointersToQIn[patchIndex],
513 QInEnumerator,
514 rawPointerToCellCentre[patchIndex],
515 rawPointerToCellSize[patchIndex],
516 patchIndex,
517 volumeIndex(x, y - HaloSize, z),
518 t[patchIndex],
519 dt[patchIndex],
520 1, // normal
521 tempNonconservativeProductY,
522 ncpEnumerator
523 );
524 }
525 }
526 }
527
528#pragma omp parallel for simd collapse(3)
529 for (int z = 0; z < NumberOfVolumesPerAxisInPatch + 2 * HaloSize - 1; z++) {
530 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
531 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
532 loopbodies::computeNonconservativeFlux<SolverType>(
533 mappedPointersToQIn[patchIndex],
534 QInEnumerator,
535 rawPointerToCellCentre[patchIndex],
536 rawPointerToCellSize[patchIndex],
537 patchIndex,
538 volumeIndex(x, y, z - HaloSize),
539 t[patchIndex],
540 dt[patchIndex],
541 2, // normal
542 tempNonconservativeProductZ,
543 ncpEnumerator
544 );
545 }
546 }
547 }
548
549#pragma omp parallel for simd collapse(4)
550 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
551 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
552 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
553 for (int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
555 tempNonconservativeProductX,
556 tempNonconservativeProductY,
557 tempNonconservativeProductZ,
558 ncpEnumerator,
559 rawPointerToCellCentre[patchIndex],
560 rawPointerToCellSize[patchIndex],
561 patchIndex,
562 volumeIndex(x, y, z),
563 unknown,
564 dt[patchIndex],
565 mappedPointersToQOut[patchIndex],
566 QOutEnumerator
567 );
568 }
569 }
570 }
571 }
572#endif
573 }
574
575 if constexpr (EvaluateMaximumEigenvalueAfterTimeStep) {
576 double newMaxEigenvalue = 0.0;
577#if Dimensions == 2
578#pragma omp parallel for simd collapse(2) reduction(max : newMaxEigenvalue)
579 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
580 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
581 newMaxEigenvalue = std::max(
582 newMaxEigenvalue,
583 loopbodies::reduceMaxEigenvalue<SolverType>(
584 mappedPointersToQOut[patchIndex],
585 QOutEnumerator,
586 rawPointerToCellCentre[patchIndex],
587 rawPointerToCellSize[patchIndex],
588 patchIndex,
589 volumeIndex(x, y),
590 t[patchIndex],
591 dt[patchIndex]
592 )
593 );
594 }
595 }
596#else
597#pragma omp parallel for simd collapse(3) reduction(max : newMaxEigenvalue)
598 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
599 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
600 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
601 newMaxEigenvalue = std::max(
602 newMaxEigenvalue,
603 loopbodies::reduceMaxEigenvalue<SolverType>(
604 mappedPointersToQOut[patchIndex],
605 QOutEnumerator,
606 rawPointerToCellCentre[patchIndex],
607 rawPointerToCellSize[patchIndex],
608 patchIndex,
609 volumeIndex(x, y, z),
610 t[patchIndex],
611 dt[patchIndex]
612 )
613 );
614 }
615 }
616 }
617#endif
618 maxEigenvalue[patchIndex] = newMaxEigenvalue;
619 }
620 }
621 }
622} // namespace exahype2::fv::rusanov::omp::internal
623
624
625template <
626 class SolverType,
627 int NumberOfVolumesPerAxisInPatch,
628 int HaloSize,
629 int NumberOfUnknowns,
630 int NumberOfAuxiliaryVariables,
631 bool EvaluateFlux,
632 bool EvaluateNonconservativeProduct,
633 bool EvaluateSource,
634 bool EvaluateMaximumEigenvalueAfterTimeStep,
635 class TempDataEnumeratorType>
637 static_assert(HaloSize == 1);
638
639 static tarch::logging::Log _log("exahype2::fv::rusanov::omp");
640 logTraceIn("timeStepWithRusanovPatchwiseUSMStateless()");
641
642 const enumerator::AoSLexicographicEnumerator QInEnumerator(1, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, NumberOfAuxiliaryVariables);
643 const enumerator::AoSLexicographicEnumerator QOutEnumerator(1, NumberOfVolumesPerAxisInPatch, 0, NumberOfUnknowns, NumberOfAuxiliaryVariables);
644 const TempDataEnumeratorType fluxEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, 0);
645 const TempDataEnumeratorType ncpEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch + 1, HaloSize, NumberOfUnknowns, 0);
646 const TempDataEnumeratorType eigenvalueEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, 1, 0);
647
648 double* tempFluxX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(fluxEnumerator.size(), targetDevice);
649 double* tempFluxY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(fluxEnumerator.size(), targetDevice);
650 double* tempFluxZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(Dimensions == 3 ? fluxEnumerator.size() : 1, targetDevice);
651 double* tempNonconservativeProductX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(ncpEnumerator.size(), targetDevice);
652 double* tempNonconservativeProductY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(ncpEnumerator.size(), targetDevice);
653 double* tempNonconservativeProductZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(Dimensions == 3 ? ncpEnumerator.size() : 1, targetDevice);
654 double* tempEigenvalueX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(eigenvalueEnumerator.size(), targetDevice);
655 double* tempEigenvalueY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(eigenvalueEnumerator.size(), targetDevice);
656 double* tempEigenvalueZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(Dimensions == 3 ? eigenvalueEnumerator.size() : 1, targetDevice);
657
658 const int numberOfCells = patchData.numberOfCells;
659 double** mappedPointersToQIn = patchData.QIn;
660 double** mappedPointersToQOut = patchData.QOut;
661 double* rawPointerToCellCentre = patchData.cellCentre[0].data();
662 double* rawPointerToCellSize = patchData.cellSize[0].data();
663 double* t = patchData.t;
664 double* dt = patchData.dt;
665 double* maxEigenvalue = patchData.maxEigenvalue;
666
667 tarch::timing::Watch watch("exahype2::fv::rusanov::omp", "timeStepWithRusanovPatchwiseUSMStateless", false, true);
668 internal::timeStepWithRusanovPatchwiseStateless<
669 SolverType,
670 NumberOfVolumesPerAxisInPatch,
671 HaloSize,
672 NumberOfUnknowns,
673 NumberOfAuxiliaryVariables,
674 EvaluateFlux,
675 EvaluateNonconservativeProduct,
676 EvaluateSource,
677 EvaluateMaximumEigenvalueAfterTimeStep,
678 TempDataEnumeratorType>(
679 targetDevice,
680 numberOfCells,
681 mappedPointersToQIn,
682 rawPointerToCellCentre,
683 rawPointerToCellSize,
684 t,
685 dt,
686 maxEigenvalue,
687 mappedPointersToQOut,
688 tempFluxX,
689 tempFluxY,
690 tempFluxZ,
691 tempNonconservativeProductX,
692 tempNonconservativeProductY,
693 tempNonconservativeProductZ,
694 tempEigenvalueX,
695 tempEigenvalueY,
696 tempEigenvalueZ
697 );
698 watch.stop();
699 measurement.setValue(watch.getCalendarTime());
700
701 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxX, targetDevice);
702 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxY, targetDevice);
703 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxZ, targetDevice);
704 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductX, targetDevice);
705 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductY, targetDevice);
706 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductZ, targetDevice);
707 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueX, targetDevice);
708 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueY, targetDevice);
709 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueZ, targetDevice);
710
711 logTraceOut("timeStepWithRusanovPatchwiseUSMStateless()");
712}
713
714
715template <
716 class SolverType,
717 int NumberOfVolumesPerAxisInPatch,
718 int HaloSize,
719 int NumberOfUnknowns,
720 int NumberOfAuxiliaryVariables,
721 bool EvaluateFlux,
722 bool EvaluateNonconservativeProduct,
723 bool EvaluateSource,
724 bool EvaluateMaximumEigenvalueAfterTimeStep,
725 class TempDataEnumeratorType>
727 static_assert(HaloSize == 1);
728
729 static tarch::logging::Log _log("exahype2::fv::rusanov::omp");
730 logTraceIn("timeStepWithRusanovPatchwiseUSMStateless()");
731
732 const enumerator::AoSLexicographicEnumerator QInEnumerator(1, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, NumberOfAuxiliaryVariables);
733 const enumerator::AoSLexicographicEnumerator QOutEnumerator(1, NumberOfVolumesPerAxisInPatch, 0, NumberOfUnknowns, NumberOfAuxiliaryVariables);
734 const TempDataEnumeratorType fluxEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, 0);
735 const TempDataEnumeratorType ncpEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch + 1, HaloSize, NumberOfUnknowns, 0);
736 const TempDataEnumeratorType eigenvalueEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, 1, 0);
737
738 double* tempFluxX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(fluxEnumerator.size(), targetDevice);
739 double* tempFluxY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(fluxEnumerator.size(), targetDevice);
740 double* tempFluxZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(Dimensions == 3 ? fluxEnumerator.size() : 1, targetDevice);
741 double* tempNonconservativeProductX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(ncpEnumerator.size(), targetDevice);
742 double* tempNonconservativeProductY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(ncpEnumerator.size(), targetDevice);
743 double* tempNonconservativeProductZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(Dimensions == 3 ? ncpEnumerator.size() : 1, targetDevice);
744 double* tempEigenvalueX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(eigenvalueEnumerator.size(), targetDevice);
745 double* tempEigenvalueY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(eigenvalueEnumerator.size(), targetDevice);
746 double* tempEigenvalueZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(Dimensions == 3 ? eigenvalueEnumerator.size() : 1, targetDevice);
747
748 const int numberOfCells = patchData.numberOfCells;
749 double** mappedPointersToQIn = patchData.QIn;
750 double** mappedPointersToQOut = patchData.QOut;
751 double* rawPointerToCellCentre = patchData.cellCentre[0].data();
752 double* rawPointerToCellSize = patchData.cellSize[0].data();
753 double* t = patchData.t;
754 double* dt = patchData.dt;
755 double* maxEigenvalue = patchData.maxEigenvalue;
756
757 internal::timeStepWithRusanovPatchwiseStateless<
758 SolverType,
759 NumberOfVolumesPerAxisInPatch,
760 HaloSize,
761 NumberOfUnknowns,
762 NumberOfAuxiliaryVariables,
763 EvaluateFlux,
764 EvaluateNonconservativeProduct,
765 EvaluateSource,
766 EvaluateMaximumEigenvalueAfterTimeStep,
767 TempDataEnumeratorType>(
768 targetDevice,
769 numberOfCells,
770 mappedPointersToQIn,
771 rawPointerToCellCentre,
772 rawPointerToCellSize,
773 t,
774 dt,
775 maxEigenvalue,
776 mappedPointersToQOut,
777 tempFluxX,
778 tempFluxY,
779 tempFluxZ,
780 tempNonconservativeProductX,
781 tempNonconservativeProductY,
782 tempNonconservativeProductZ,
783 tempEigenvalueX,
784 tempEigenvalueY,
785 tempEigenvalueZ
786 );
787
788 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxX, targetDevice);
789 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxY, targetDevice);
790 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxZ, targetDevice);
791 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductX, targetDevice);
792 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductY, targetDevice);
793 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductZ, targetDevice);
794 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueX, targetDevice);
795 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueY, targetDevice);
796 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueZ, targetDevice);
797
798 logTraceOut("timeStepWithRusanovPatchwiseUSMStateless()");
799}
800
801
802template <
803 class SolverType,
804 int NumberOfVolumesPerAxisInPatch,
805 int HaloSize,
806 int NumberOfUnknowns,
807 int NumberOfAuxiliaryVariables,
808 bool EvaluateFlux,
809 bool EvaluateNonconservativeProduct,
810 bool EvaluateSource,
811 bool EvaluateMaximumEigenvalueAfterTimeStep,
812 class TempDataEnumeratorType>
814 static_assert(HaloSize == 1);
815
816 static tarch::logging::Log _log("exahype2::fv::rusanov::omp");
817 logTraceIn("timeStepWithRusanovPatchwiseHeapStateless()");
818
819 const enumerator::AoSLexicographicEnumerator QInEnumerator(1, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, NumberOfAuxiliaryVariables);
820 const enumerator::AoSLexicographicEnumerator QOutEnumerator(1, NumberOfVolumesPerAxisInPatch, 0, NumberOfUnknowns, NumberOfAuxiliaryVariables);
821 const TempDataEnumeratorType fluxEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, 0);
822 const TempDataEnumeratorType ncpEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch + 1, HaloSize, NumberOfUnknowns, 0);
823 const TempDataEnumeratorType eigenvalueEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, 1, 0);
824
825 double* tempFluxX = new double[fluxEnumerator.size()];
826 double* tempFluxY = new double[fluxEnumerator.size()];
827 double* tempFluxZ = new double[fluxEnumerator.size()];
828 double* tempNonconservativeProductX = new double[ncpEnumerator.size()];
829 double* tempNonconservativeProductY = new double[ncpEnumerator.size()];
830 double* tempNonconservativeProductZ = new double[ncpEnumerator.size()];
831 double* tempEigenvalueX = new double[eigenvalueEnumerator.size()];
832 double* tempEigenvalueY = new double[eigenvalueEnumerator.size()];
833 double* tempEigenvalueZ = new double[eigenvalueEnumerator.size()];
834
835#pragma omp target enter data map(alloc : tempFluxX[0 : fluxEnumerator.size()]) device(targetDevice)
836#pragma omp target enter data map(alloc : tempFluxY[0 : fluxEnumerator.size()]) device(targetDevice)
837#pragma omp target enter data map(alloc : tempFluxZ[0 : fluxEnumerator.size()]) device(targetDevice)
838#pragma omp target enter data map(alloc : tempNonconservativeProductX[0 : ncpEnumerator.size()]) device(targetDevice)
839#pragma omp target enter data map(alloc : tempNonconservativeProductY[0 : ncpEnumerator.size()]) device(targetDevice)
840#pragma omp target enter data map(alloc : tempNonconservativeProductZ[0 : ncpEnumerator.size()]) device(targetDevice)
841#pragma omp target enter data map(alloc : tempEigenvalueX[0 : eigenvalueEnumerator.size()]) device(targetDevice)
842#pragma omp target enter data map(alloc : tempEigenvalueY[0 : eigenvalueEnumerator.size()]) device(targetDevice)
843#pragma omp target enter data map(alloc : tempEigenvalueZ[0 : eigenvalueEnumerator.size()]) device(targetDevice)
844
845 double** mappedPointersToQIn = new double*[patchData.numberOfCells];
846 double** mappedPointersToQOut = new double*[patchData.numberOfCells];
847
848 for (int patchIndex = 0; patchIndex < patchData.numberOfCells; patchIndex++) {
849 const double* currentQIn = patchData.QIn[patchIndex];
850 double* currentQOut = patchData.QOut[patchIndex];
851#pragma omp target enter data map(to : currentQIn[0 : QInEnumerator.size()]) device(targetDevice)
852#pragma omp target enter data map(alloc : currentQOut[0 : QOutEnumerator.size()]) device(targetDevice)
853 mappedPointersToQIn[patchIndex] = static_cast<double*>(omp_get_mapped_ptr(currentQIn, targetDevice));
854 mappedPointersToQOut[patchIndex] = static_cast<double*>(omp_get_mapped_ptr(currentQOut, targetDevice));
855 }
856
857 const int numberOfCells = patchData.numberOfCells;
858 double* rawPointerToCellCentre = patchData.cellCentre[0].data();
859 double* rawPointerToCellSize = patchData.cellSize[0].data();
860 double* t = patchData.t;
861 double* dt = patchData.dt;
862 double* maxEigenvalue = patchData.maxEigenvalue;
863
864#pragma omp target enter data map(to : rawPointerToCellCentre[0 : numberOfCells * Dimensions]) device(targetDevice)
865#pragma omp target enter data map(to : rawPointerToCellSize[0 : numberOfCells * Dimensions]) device(targetDevice)
866#pragma omp target enter data map(to : mappedPointersToQIn[0 : numberOfCells]) device(targetDevice)
867#pragma omp target enter data map(to : mappedPointersToQOut[0 : numberOfCells]) device(targetDevice)
868#pragma omp target enter data map(to : t[0 : numberOfCells]) device(targetDevice)
869#pragma omp target enter data map(to : dt[0 : numberOfCells]) device(targetDevice)
870#pragma omp target enter data map(alloc : maxEigenvalue[0 : numberOfCells]) device(targetDevice)
871
872 tarch::timing::Watch watch("exahype2::fv::rusanov::omp", "timeStepWithRusanovPatchwiseHeapStateless", false, true);
873 internal::timeStepWithRusanovPatchwiseStateless<
874 SolverType,
875 NumberOfVolumesPerAxisInPatch,
876 HaloSize,
877 NumberOfUnknowns,
878 NumberOfAuxiliaryVariables,
879 EvaluateFlux,
880 EvaluateNonconservativeProduct,
881 EvaluateSource,
882 EvaluateMaximumEigenvalueAfterTimeStep,
883 TempDataEnumeratorType>(
884 targetDevice,
885 numberOfCells,
886 mappedPointersToQIn,
887 rawPointerToCellCentre,
888 rawPointerToCellSize,
889 t,
890 dt,
891 maxEigenvalue,
892 mappedPointersToQOut,
893 tempFluxX,
894 tempFluxY,
895 tempFluxZ,
896 tempNonconservativeProductX,
897 tempNonconservativeProductY,
898 tempNonconservativeProductZ,
899 tempEigenvalueX,
900 tempEigenvalueY,
901 tempEigenvalueZ
902 );
903 watch.stop();
904 measurement.setValue(watch.getCalendarTime());
905
906#pragma omp target exit data map(delete : rawPointerToCellCentre[0 : numberOfCells * Dimensions]) device(targetDevice)
907#pragma omp target exit data map(delete : rawPointerToCellSize[0 : numberOfCells * Dimensions]) device(targetDevice)
908#pragma omp target exit data map(delete : mappedPointersToQIn[0 : numberOfCells]) device(targetDevice)
909#pragma omp target exit data map(delete : mappedPointersToQOut[0 : numberOfCells]) device(targetDevice)
910#pragma omp target exit data map(delete : t[0 : numberOfCells]) device(targetDevice)
911#pragma omp target exit data map(delete : dt[0 : numberOfCells]) device(targetDevice)
912#pragma omp target exit data map(from : maxEigenvalue[0 : numberOfCells]) device(targetDevice)
913
914#pragma omp target exit data map(delete : tempFluxX[0 : fluxEnumerator.size()]) device(targetDevice)
915#pragma omp target exit data map(delete : tempFluxY[0 : fluxEnumerator.size()]) device(targetDevice)
916#pragma omp target exit data map(delete : tempFluxZ[0 : fluxEnumerator.size()]) device(targetDevice)
917#pragma omp target exit data map(delete : tempNonconservativeProductX[0 : ncpEnumerator.size()]) device(targetDevice)
918#pragma omp target exit data map(delete : tempNonconservativeProductY[0 : ncpEnumerator.size()]) device(targetDevice)
919#pragma omp target exit data map(delete : tempNonconservativeProductZ[0 : ncpEnumerator.size()]) device(targetDevice)
920#pragma omp target exit data map(delete : tempEigenvalueX[0 : eigenvalueEnumerator.size()]) device(targetDevice)
921#pragma omp target exit data map(delete : tempEigenvalueY[0 : eigenvalueEnumerator.size()]) device(targetDevice)
922#pragma omp target exit data map(delete : tempEigenvalueZ[0 : eigenvalueEnumerator.size()]) device(targetDevice)
923
924 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
925 const double* currentQIn = patchData.QIn[patchIndex];
926 double* currentQOut = patchData.QOut[patchIndex];
927#pragma omp target exit data map(delete : currentQIn[0 : QInEnumerator.size()]) device(targetDevice)
928#pragma omp target exit data map(from : currentQOut[0 : QOutEnumerator.size()]) device(targetDevice)
929 }
930
931 delete[] mappedPointersToQIn;
932 delete[] mappedPointersToQOut;
933
934 if (tempFluxX != nullptr) {
935 delete[] tempFluxX;
936 }
937 if (tempFluxY != nullptr) {
938 delete[] tempFluxY;
939 }
940 if (tempFluxZ != nullptr) {
941 delete[] tempFluxZ;
942 }
943 if (tempNonconservativeProductX != nullptr) {
944 delete[] tempNonconservativeProductX;
945 }
946 if (tempNonconservativeProductY != nullptr) {
947 delete[] tempNonconservativeProductY;
948 }
949 if (tempNonconservativeProductZ != nullptr) {
950 delete[] tempNonconservativeProductZ;
951 }
952 if (tempEigenvalueX != nullptr) {
953 delete[] tempEigenvalueX;
954 }
955 if (tempEigenvalueY != nullptr) {
956 delete[] tempEigenvalueY;
957 }
958 if (tempEigenvalueZ != nullptr) {
959 delete[] tempEigenvalueZ;
960 }
961
962 logTraceOut("timeStepWithRusanovPatchwiseHeapStateless()");
963}
964
965
966template <
967 class SolverType,
968 int NumberOfVolumesPerAxisInPatch,
969 int HaloSize,
970 int NumberOfUnknowns,
971 int NumberOfAuxiliaryVariables,
972 bool EvaluateFlux,
973 bool EvaluateNonconservativeProduct,
974 bool EvaluateSource,
975 bool EvaluateMaximumEigenvalueAfterTimeStep,
976 class TempDataEnumeratorType>
977void exahype2::fv::rusanov::omp::timeStepWithRusanovPatchwiseHeapStateless(int targetDevice, CellData<double, double>& patchData) {
978 static_assert(HaloSize == 1);
979
980 static tarch::logging::Log _log("exahype2::fv::rusanov::omp");
981 logTraceIn("timeStepWithRusanovPatchwiseHeapStateless()");
982
983 const enumerator::AoSLexicographicEnumerator QInEnumerator(1, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, NumberOfAuxiliaryVariables);
984 const enumerator::AoSLexicographicEnumerator QOutEnumerator(1, NumberOfVolumesPerAxisInPatch, 0, NumberOfUnknowns, NumberOfAuxiliaryVariables);
985 const TempDataEnumeratorType fluxEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, 0);
986 const TempDataEnumeratorType ncpEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch + 1, HaloSize, NumberOfUnknowns, 0);
987 const TempDataEnumeratorType eigenvalueEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, 1, 0);
988
989 double* tempFluxX = new double[fluxEnumerator.size()];
990 double* tempFluxY = new double[fluxEnumerator.size()];
991 double* tempFluxZ = new double[fluxEnumerator.size()];
992 double* tempNonconservativeProductX = new double[ncpEnumerator.size()];
993 double* tempNonconservativeProductY = new double[ncpEnumerator.size()];
994 double* tempNonconservativeProductZ = new double[ncpEnumerator.size()];
995 double* tempEigenvalueX = new double[eigenvalueEnumerator.size()];
996 double* tempEigenvalueY = new double[eigenvalueEnumerator.size()];
997 double* tempEigenvalueZ = new double[eigenvalueEnumerator.size()];
998
999#pragma omp target enter data map(alloc : tempFluxX[0 : fluxEnumerator.size()]) device(targetDevice)
1000#pragma omp target enter data map(alloc : tempFluxY[0 : fluxEnumerator.size()]) device(targetDevice)
1001#pragma omp target enter data map(alloc : tempFluxZ[0 : fluxEnumerator.size()]) device(targetDevice)
1002#pragma omp target enter data map(alloc : tempNonconservativeProductX[0 : ncpEnumerator.size()]) device(targetDevice)
1003#pragma omp target enter data map(alloc : tempNonconservativeProductY[0 : ncpEnumerator.size()]) device(targetDevice)
1004#pragma omp target enter data map(alloc : tempNonconservativeProductZ[0 : ncpEnumerator.size()]) device(targetDevice)
1005#pragma omp target enter data map(alloc : tempEigenvalueX[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1006#pragma omp target enter data map(alloc : tempEigenvalueY[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1007#pragma omp target enter data map(alloc : tempEigenvalueZ[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1008
1009 double** mappedPointersToQIn = new double*[patchData.numberOfCells];
1010 double** mappedPointersToQOut = new double*[patchData.numberOfCells];
1011
1012 for (int patchIndex = 0; patchIndex < patchData.numberOfCells; patchIndex++) {
1013 const double* currentQIn = patchData.QIn[patchIndex];
1014 double* currentQOut = patchData.QOut[patchIndex];
1015#pragma omp target enter data map(to : currentQIn[0 : QInEnumerator.size()]) device(targetDevice)
1016#pragma omp target enter data map(alloc : currentQOut[0 : QOutEnumerator.size()]) device(targetDevice)
1017 mappedPointersToQIn[patchIndex] = static_cast<double*>(omp_get_mapped_ptr(currentQIn, targetDevice));
1018 mappedPointersToQOut[patchIndex] = static_cast<double*>(omp_get_mapped_ptr(currentQOut, targetDevice));
1019 }
1020
1021 const int numberOfCells = patchData.numberOfCells;
1022 double* rawPointerToCellCentre = patchData.cellCentre[0].data();
1023 double* rawPointerToCellSize = patchData.cellSize[0].data();
1024 double* t = patchData.t;
1025 double* dt = patchData.dt;
1026 double* maxEigenvalue = patchData.maxEigenvalue;
1027
1028#pragma omp target enter data map(to : rawPointerToCellCentre[0 : numberOfCells * Dimensions]) device(targetDevice)
1029#pragma omp target enter data map(to : rawPointerToCellSize[0 : numberOfCells * Dimensions]) device(targetDevice)
1030#pragma omp target enter data map(to : mappedPointersToQIn[0 : numberOfCells]) device(targetDevice)
1031#pragma omp target enter data map(to : mappedPointersToQOut[0 : numberOfCells]) device(targetDevice)
1032#pragma omp target enter data map(to : t[0 : numberOfCells]) device(targetDevice)
1033#pragma omp target enter data map(to : dt[0 : numberOfCells]) device(targetDevice)
1034#pragma omp target enter data map(alloc : maxEigenvalue[0 : numberOfCells]) device(targetDevice)
1035
1036 internal::timeStepWithRusanovPatchwiseStateless<
1037 SolverType,
1038 NumberOfVolumesPerAxisInPatch,
1039 HaloSize,
1040 NumberOfUnknowns,
1041 NumberOfAuxiliaryVariables,
1042 EvaluateFlux,
1043 EvaluateNonconservativeProduct,
1044 EvaluateSource,
1045 EvaluateMaximumEigenvalueAfterTimeStep,
1046 TempDataEnumeratorType>(
1047 targetDevice,
1048 numberOfCells,
1049 mappedPointersToQIn,
1050 rawPointerToCellCentre,
1051 rawPointerToCellSize,
1052 t,
1053 dt,
1055 mappedPointersToQOut,
1056 tempFluxX,
1057 tempFluxY,
1058 tempFluxZ,
1059 tempNonconservativeProductX,
1060 tempNonconservativeProductY,
1061 tempNonconservativeProductZ,
1062 tempEigenvalueX,
1063 tempEigenvalueY,
1064 tempEigenvalueZ
1065 );
1066
1067#pragma omp target exit data map(delete : rawPointerToCellCentre[0 : numberOfCells * Dimensions]) device(targetDevice)
1068#pragma omp target exit data map(delete : rawPointerToCellSize[0 : numberOfCells * Dimensions]) device(targetDevice)
1069#pragma omp target exit data map(delete : mappedPointersToQIn[0 : numberOfCells]) device(targetDevice)
1070#pragma omp target exit data map(delete : mappedPointersToQOut[0 : numberOfCells]) device(targetDevice)
1071#pragma omp target exit data map(delete : t[0 : numberOfCells]) device(targetDevice)
1072#pragma omp target exit data map(delete : dt[0 : numberOfCells]) device(targetDevice)
1073#pragma omp target exit data map(from : maxEigenvalue[0 : numberOfCells]) device(targetDevice)
1074
1075#pragma omp target exit data map(delete : tempFluxX[0 : fluxEnumerator.size()]) device(targetDevice)
1076#pragma omp target exit data map(delete : tempFluxY[0 : fluxEnumerator.size()]) device(targetDevice)
1077#pragma omp target exit data map(delete : tempFluxZ[0 : fluxEnumerator.size()]) device(targetDevice)
1078#pragma omp target exit data map(delete : tempNonconservativeProductX[0 : ncpEnumerator.size()]) device(targetDevice)
1079#pragma omp target exit data map(delete : tempNonconservativeProductY[0 : ncpEnumerator.size()]) device(targetDevice)
1080#pragma omp target exit data map(delete : tempNonconservativeProductZ[0 : ncpEnumerator.size()]) device(targetDevice)
1081#pragma omp target exit data map(delete : tempEigenvalueX[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1082#pragma omp target exit data map(delete : tempEigenvalueY[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1083#pragma omp target exit data map(delete : tempEigenvalueZ[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1084
1085 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
1086 const double* currentQIn = patchData.QIn[patchIndex];
1087 double* currentQOut = patchData.QOut[patchIndex];
1088#pragma omp target exit data map(delete : currentQIn[0 : QInEnumerator.size()]) device(targetDevice)
1089#pragma omp target exit data map(from : currentQOut[0 : QOutEnumerator.size()]) device(targetDevice)
1090 }
1091
1092 delete[] mappedPointersToQIn;
1093 delete[] mappedPointersToQOut;
1094
1095 if (tempFluxX != nullptr) {
1096 delete[] tempFluxX;
1097 }
1098 if (tempFluxY != nullptr) {
1099 delete[] tempFluxY;
1100 }
1101 if (tempFluxZ != nullptr) {
1102 delete[] tempFluxZ;
1103 }
1104 if (tempNonconservativeProductX != nullptr) {
1105 delete[] tempNonconservativeProductX;
1106 }
1107 if (tempNonconservativeProductY != nullptr) {
1108 delete[] tempNonconservativeProductY;
1109 }
1110 if (tempNonconservativeProductZ != nullptr) {
1111 delete[] tempNonconservativeProductZ;
1112 }
1113 if (tempEigenvalueX != nullptr) {
1114 delete[] tempEigenvalueX;
1115 }
1116 if (tempEigenvalueY != nullptr) {
1117 delete[] tempEigenvalueY;
1118 }
1119 if (tempEigenvalueZ != nullptr) {
1120 delete[] tempEigenvalueZ;
1121 }
1122
1123 logTraceOut("timeStepWithRusanovPatchwiseHeapStateless()");
1124}
static constexpr int HaloSize
#define KeywordToAvoidDuplicateSymbolsForInlinedFunctions
Definition LinuxAMD.h:31
#define logTraceOut(methodName)
Definition Log.h:379
#define logTraceIn(methodName)
Definition Log.h:369
tarch::logging::Log _log("::")
Log Device.
Definition Log.h:516
void setValue(const double &value)
Set the value.
A simple class that has to be included to measure the clock ticks required for an operation.
Definition Watch.h:45
float dt
Definition DSL_test.py:5
KeywordToAvoidDuplicateSymbolsForInlinedFunctions double maxEigenvalue(const double *const Q, int normal, const double CCZ4e, const double CCZ4ds, const double CCZ4GLMc, const double CCZ4GLMd) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions GPUCallableInlineMethod void updateSolutionWithNonconservativeFlux(const double *__restrict__ ncpX, const double *__restrict__ ncpY, const double *__restrict__ ncpZ, const NCPFaceEnumeratorType &ncpEnumerator, const ::tarch::la::Vector< Dimensions, double > &patchCentre, const ::tarch::la::Vector< Dimensions, double > &patchSize, int patchIndex, const ::tarch::la::Vector< Dimensions, int > &volumeIndex, int unknown, double dt, double *__restrict__ QOut, const QOutEnumeratorType &QOutEnumerator) InlineMethod
Add the non-conservative flux contributions to one volume.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions GPUCallableInlineMethod void copySolution(const double *__restrict__ QIn, const QInEnumeratorType &QInEnumerator, int patchIndex, const ::tarch::la::Vector< Dimensions, int > &volumeIndex, int unknown, double *__restrict__ QOut, const QOutEnumeratorType &QOutEnumerator) InlineMethod
Copy solution from QIn to QOut.
Definition LoopBodies.h:75
KeywordToAvoidDuplicateSymbolsForInlinedFunctions GPUCallableInlineMethod void updateSolutionWithFlux(const double *__restrict__ tempFluxX, const double *__restrict__ tempFluxY, const double *__restrict__ tempFluxZ, const FluxEnumeratorType &fluxEnumerator, const ::tarch::la::Vector< Dimensions, double > &patchCentre, const ::tarch::la::Vector< Dimensions, double > &patchSize, int patchIndex, const ::tarch::la::Vector< Dimensions, int > &volumeIndex, int unknown, double dt, double *__restrict__ QOut, const QOutEnumeratorType &QOutEnumerator) InlineMethod
Update one volume with the flux contribution.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions GPUCallableInlineMethod void updateSolutionWithEigenvalueDamping(const double *__restrict__ QIn, const QInEnumeratorType &QInEnumerator, const double *__restrict__ tempMaxEigenvalueX, const double *__restrict__ tempMaxEigenvalueY, const double *__restrict__ tempMaxEigenvalueZ, const MaxEigenvalueEnumeratorType &eigenvalueEnumerator, const ::tarch::la::Vector< Dimensions, double > &patchCentre, const ::tarch::la::Vector< Dimensions, double > &patchSize, int patchIndex, const ::tarch::la::Vector< Dimensions, int > &volumeIndex, int unknown, double dt, double *__restrict__ QOut, const QOutEnumeratorType &QOutEnumerator) InlineMethod
Uses the eigenvalues to damp the solution update.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseStateless(int targetDevice, int numberOfCells, double **mappedPointersToQIn, const double *rawPointerToCellCentre, const double *rawPointerToCellSize, const double *t, const double *dt, double *maxEigenvalue, double **mappedPointersToQOut, double *tempFluxX, double *tempFluxY, double *tempFluxZ, double *tempNonconservativeProductX, double *tempNonconservativeProductY, double *tempNonconservativeProductZ, double *tempEigenvalueX, double *tempEigenvalueY, double *tempEigenvalueZ) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseUSMStateless(int targetDevice, CellData< double, double > &patchData, tarch::timing::Measurement &measurement) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovPatchwiseHeapStateless(int targetDevice, CellData< double, double > &patchData, tarch::timing::Measurement &measurement) InlineMethod
The name patchwise is a little bit irritating in a GPU context.
auto volumeIndex(Args... args)
Definition VolumeIndex.h:54
Representation of a number of cells which contains all information that's required to process the sto...
Definition CellData.h:77
outType ** QOut
Out values.
Definition CellData.h:116
inType ** QIn
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
Definition CellData.h:82
const int numberOfCells
As we store data as SoA, we have to know how big the actual arrays are.
Definition CellData.h:99
double * maxEigenvalue
Out values.
Definition CellData.h:121
tarch::la::Vector< Dimensions, double > * cellCentre
Definition CellData.h:83
tarch::la::Vector< Dimensions, double > * cellSize
Definition CellData.h:84
Scalar * data()
This routine returns a pointer to the first data element.
Definition Vector.h:254
#define InlineMethod
This is the marker that is to be used after the argument list of a function declaration.
Definition tarch.h:58