Peano
Loading...
Searching...
No Matches
BatchedStateless.cpph
Go to the documentation of this file.
1// This file is part of the ExaHyPE2 project. For conditions of distribution and
2// use, please see the copyright notice at www.peano-framework.org
3
5 template <
6 class SolverType,
7 int NumberOfVolumesPerAxisInPatch,
8 int HaloSize,
9 int NumberOfUnknowns,
10 int NumberOfAuxiliaryVariables,
11 bool EvaluateFlux,
12 bool EvaluateNonconservativeProduct,
13 bool EvaluateSource,
14 bool EvaluateMaximumEigenvalueAfterTimeStep,
15 class TempDataEnumeratorType>
17 int targetDevice,
18 int numberOfCells,
19 double** mappedPointersToQIn,
20 const double* rawPointerToCellCentre,
21 const double* rawPointerToCellSize,
22 const double* t,
23 const double* dt,
24 double* maxEigenvalue,
25 double** mappedPointersToQOut,
26 double* tempFluxX,
27 double* tempFluxY,
28 double* tempFluxZ,
29 double* tempNonconservativeProductX,
30 double* tempNonconservativeProductY,
31 double* tempNonconservativeProductZ,
32 double* tempEigenvalueX,
33 double* tempEigenvalueY,
34 double* tempEigenvalueZ
35 ) InlineMethod {
36 const enumerator::AoSLexicographicEnumerator QInEnumerator(1, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, NumberOfAuxiliaryVariables);
37 const enumerator::AoSLexicographicEnumerator QOutEnumerator(1, NumberOfVolumesPerAxisInPatch, 0, NumberOfUnknowns, NumberOfAuxiliaryVariables);
38 const TempDataEnumeratorType fluxEnumerator(numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, 0);
39 const TempDataEnumeratorType ncpEnumerator(numberOfCells, NumberOfVolumesPerAxisInPatch + 1, HaloSize, NumberOfUnknowns, 0);
40 const TempDataEnumeratorType eigenvalueEnumerator(numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, 1, 0);
41
42 // ====================================================
43 // Copy solution over and evaluate source (if required)
44 // ====================================================
45 if constexpr (EvaluateSource) {
46#if Dimensions == 2
47#pragma omp target teams distribute parallel for simd collapse(3) device(targetDevice) // nowait
48 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
49 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
50 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
51 loopbodies::copySolutionAndAddSourceTerm<SolverType>(
52 mappedPointersToQIn[patchIndex],
53 QInEnumerator,
54 rawPointerToCellCentre[patchIndex],
55 rawPointerToCellSize[patchIndex],
56 patchIndex,
57 volumeIndex(x, y),
58 t[patchIndex],
59 dt[patchIndex],
60 mappedPointersToQOut[patchIndex],
61 QOutEnumerator
62 );
63 }
64 }
65 }
66#else
67#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice) // nowait
68 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
69 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
70 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
71 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
72 loopbodies::copySolutionAndAddSourceTerm<SolverType>(
73 mappedPointersToQIn[patchIndex],
74 QInEnumerator,
75 rawPointerToCellCentre[patchIndex],
76 rawPointerToCellSize[patchIndex],
77 patchIndex,
78 volumeIndex(x, y, z),
79 t[patchIndex],
80 dt[patchIndex],
81 mappedPointersToQOut[patchIndex],
82 QOutEnumerator
83 );
84 }
85 }
86 }
87 }
88#endif
89 } else {
90#if Dimensions == 2
91#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice) // nowait
92 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
93 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
94 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
95 for (int unknown = 0; unknown < NumberOfUnknowns + NumberOfAuxiliaryVariables; unknown++) {
96 loopbodies::copySolution(mappedPointersToQIn[patchIndex], QInEnumerator, patchIndex, volumeIndex(x, y), unknown, mappedPointersToQOut[patchIndex], QOutEnumerator);
97 }
98 }
99 }
100 }
101#else
102#pragma omp target teams distribute parallel for simd collapse(5) device(targetDevice) // nowait
103 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
104 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
105 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
106 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
107 for (int unknown = 0; unknown < NumberOfUnknowns + NumberOfAuxiliaryVariables; unknown++) {
109 mappedPointersToQIn[patchIndex], QInEnumerator, patchIndex, volumeIndex(x, y, z), unknown, mappedPointersToQOut[patchIndex], QOutEnumerator
110 );
111 }
112 }
113 }
114 }
115 }
116#endif
117 }
118
119 // ====================================================
120 // Compute damping due to max eigenvalue
121 // ====================================================
122#if Dimensions == 2
123#pragma omp target teams distribute parallel for simd collapse(3) device(targetDevice) // nowait
124 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
125 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
126 for (int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; x++) {
127 loopbodies::computeMaxEigenvalue<SolverType>(
128 mappedPointersToQIn[patchIndex],
129 QInEnumerator,
130 rawPointerToCellCentre[patchIndex],
131 rawPointerToCellSize[patchIndex],
132 patchIndex,
133 volumeIndex(x - HaloSize, y),
134 t[patchIndex],
135 dt[patchIndex],
136 0,
137 tempEigenvalueX,
138 eigenvalueEnumerator
139 );
140 }
141 }
142 }
143
144#pragma omp target teams distribute parallel for simd collapse(3) device(targetDevice) // nowait
145 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
146 for (int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; y++) {
147 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
148 loopbodies::computeMaxEigenvalue<SolverType>(
149 mappedPointersToQIn[patchIndex],
150 QInEnumerator,
151 rawPointerToCellCentre[patchIndex],
152 rawPointerToCellSize[patchIndex],
153 patchIndex,
154 volumeIndex(x, y - HaloSize),
155 t[patchIndex],
156 dt[patchIndex],
157 1,
158 tempEigenvalueY,
159 eigenvalueEnumerator
160 );
161 }
162 }
163 }
164
165#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice) // nowait
166 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
167 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
168 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
169 for (int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
171 mappedPointersToQIn[patchIndex],
172 QInEnumerator,
173 tempEigenvalueX,
174 tempEigenvalueY,
175 tempEigenvalueZ,
176 eigenvalueEnumerator,
177 rawPointerToCellCentre[patchIndex],
178 rawPointerToCellSize[patchIndex],
179 patchIndex,
180 volumeIndex(x, y),
181 unknown,
182 dt[patchIndex],
183 mappedPointersToQOut[patchIndex],
184 QOutEnumerator
185 );
186 }
187 }
188 }
189 }
190#else
191#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice) // nowait
192 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
193 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
194 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
195 for (int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; x++) {
196 loopbodies::computeMaxEigenvalue<SolverType>(
197 mappedPointersToQIn[patchIndex],
198 QInEnumerator,
199 rawPointerToCellCentre[patchIndex],
200 rawPointerToCellSize[patchIndex],
201 patchIndex,
202 volumeIndex(x - HaloSize, y, z),
203 t[patchIndex],
204 dt[patchIndex],
205 0,
206 tempEigenvalueX,
207 eigenvalueEnumerator
208 );
209 }
210 }
211 }
212 }
213
214#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice) // nowait
215 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
216 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
217 for (int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; y++) {
218 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
219 loopbodies::computeMaxEigenvalue<SolverType>(
220 mappedPointersToQIn[patchIndex],
221 QInEnumerator,
222 rawPointerToCellCentre[patchIndex],
223 rawPointerToCellSize[patchIndex],
224 patchIndex,
225 volumeIndex(x, y - HaloSize, z),
226 t[patchIndex],
227 dt[patchIndex],
228 1,
229 tempEigenvalueY,
230 eigenvalueEnumerator
231 );
232 }
233 }
234 }
235 }
236
237#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice) // nowait
238 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
239 for (int z = 0; z < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; z++) {
240 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
241 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
242 loopbodies::computeMaxEigenvalue<SolverType>(
243 mappedPointersToQIn[patchIndex],
244 QInEnumerator,
245 rawPointerToCellCentre[patchIndex],
246 rawPointerToCellSize[patchIndex],
247 patchIndex,
248 volumeIndex(x, y, z - HaloSize),
249 t[patchIndex],
250 dt[patchIndex],
251 2,
252 tempEigenvalueZ,
253 eigenvalueEnumerator
254 );
255 }
256 }
257 }
258 }
259
260#pragma omp target teams distribute parallel for simd collapse(5) device(targetDevice) // nowait
261 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
262 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
263 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
264 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
265 for (int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
267 mappedPointersToQIn[patchIndex],
268 QInEnumerator,
269 tempEigenvalueX,
270 tempEigenvalueY,
271 tempEigenvalueZ,
272 eigenvalueEnumerator,
273 rawPointerToCellCentre[patchIndex],
274 rawPointerToCellSize[patchIndex],
275 patchIndex,
276 volumeIndex(x, y, z),
277 unknown,
278 dt[patchIndex],
279 mappedPointersToQOut[patchIndex],
280 QOutEnumerator
281 );
282 }
283 }
284 }
285 }
286 }
287#endif
288
289 // ====================================================
290 // Normal (conservative) flux
291 // ====================================================
292 if constexpr (EvaluateFlux) {
293#if Dimensions == 2
294#pragma omp target teams distribute parallel for simd collapse(3) device(targetDevice) // nowait
295 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
296 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
297 for (int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; x++) {
298 loopbodies::computeFlux<SolverType>(
299 mappedPointersToQIn[patchIndex],
300 QInEnumerator,
301 rawPointerToCellCentre[patchIndex],
302 rawPointerToCellSize[patchIndex],
303 patchIndex,
304 volumeIndex(x - HaloSize, y),
305 t[patchIndex],
306 dt[patchIndex],
307 0, // normal
308 tempFluxX,
309 fluxEnumerator
310 );
311 }
312 }
313 }
314
315#pragma omp target teams distribute parallel for simd collapse(3) device(targetDevice) // nowait
316 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
317 for (int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; y++) {
318 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
319 loopbodies::computeFlux<SolverType>(
320 mappedPointersToQIn[patchIndex],
321 QInEnumerator,
322 rawPointerToCellCentre[patchIndex],
323 rawPointerToCellSize[patchIndex],
324 patchIndex,
325 volumeIndex(x, y - HaloSize),
326 t[patchIndex],
327 dt[patchIndex],
328 1, // normal
329 tempFluxY,
330 fluxEnumerator
331 );
332 }
333 }
334 }
335
336#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice) // nowait
337 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
338 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
339 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
340 for (int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
342 tempFluxX,
343 tempFluxY,
344 tempFluxZ,
345 fluxEnumerator,
346 rawPointerToCellCentre[patchIndex],
347 rawPointerToCellSize[patchIndex],
348 patchIndex,
349 volumeIndex(x, y),
350 unknown,
351 dt[patchIndex],
352 mappedPointersToQOut[patchIndex],
353 QOutEnumerator
354 );
355 }
356 }
357 }
358 }
359#else
360#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice) // nowait
361 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
362 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
363 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
364 for (int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; x++) {
365 loopbodies::computeFlux<SolverType>(
366 mappedPointersToQIn[patchIndex],
367 QInEnumerator,
368 rawPointerToCellCentre[patchIndex],
369 rawPointerToCellSize[patchIndex],
370 patchIndex,
371 volumeIndex(x - HaloSize, y, z),
372 t[patchIndex],
373 dt[patchIndex],
374 0, // normal
375 tempFluxX,
376 fluxEnumerator
377 );
378 }
379 }
380 }
381 }
382
383#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice) // nowait
384 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
385 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
386 for (int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; y++) {
387 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
388 loopbodies::computeFlux<SolverType>(
389 mappedPointersToQIn[patchIndex],
390 QInEnumerator,
391 rawPointerToCellCentre[patchIndex],
392 rawPointerToCellSize[patchIndex],
393 patchIndex,
394 volumeIndex(x, y - HaloSize, z),
395 t[patchIndex],
396 dt[patchIndex],
397 1, // normal
398 tempFluxY,
399 fluxEnumerator
400 );
401 }
402 }
403 }
404 }
405
406#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice) // nowait
407 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
408 for (int z = 0; z < NumberOfVolumesPerAxisInPatch + 2 * HaloSize; z++) {
409 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
410 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
411 loopbodies::computeFlux<SolverType>(
412 mappedPointersToQIn[patchIndex],
413 QInEnumerator,
414 rawPointerToCellCentre[patchIndex],
415 rawPointerToCellSize[patchIndex],
416 patchIndex,
417 volumeIndex(x, y, z - HaloSize),
418 t[patchIndex],
419 dt[patchIndex],
420 2, // normal
421 tempFluxZ,
422 fluxEnumerator
423 );
424 }
425 }
426 }
427 }
428
429#pragma omp target teams distribute parallel for simd collapse(5) device(targetDevice) // nowait
430 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
431 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
432 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
433 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
434 for (int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
436 tempFluxX,
437 tempFluxY,
438 tempFluxZ,
439 fluxEnumerator,
440 rawPointerToCellCentre[patchIndex],
441 rawPointerToCellSize[patchIndex],
442 patchIndex,
443 volumeIndex(x, y, z),
444 unknown,
445 dt[patchIndex],
446 mappedPointersToQOut[patchIndex],
447 QOutEnumerator
448 );
449 }
450 }
451 }
452 }
453 }
454#endif
455 }
456
457 if constexpr (EvaluateNonconservativeProduct) {
458#if Dimensions == 2
459#pragma omp target teams distribute parallel for simd collapse(3) device(targetDevice) // nowait
460 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
461 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
462 for (int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 * HaloSize - 1; x++) {
463 loopbodies::computeNonconservativeFlux<SolverType>(
464 mappedPointersToQIn[patchIndex],
465 QInEnumerator,
466 rawPointerToCellCentre[patchIndex],
467 rawPointerToCellSize[patchIndex],
468 patchIndex,
469 volumeIndex(x - HaloSize, y),
470 t[patchIndex],
471 dt[patchIndex],
472 0, // normal
473 tempNonconservativeProductX,
474 ncpEnumerator
475 );
476 }
477 }
478 }
479
480#pragma omp target teams distribute parallel for simd collapse(3) device(targetDevice) // nowait
481 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
482 for (int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 * HaloSize - 1; y++) {
483 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
484 loopbodies::computeNonconservativeFlux<SolverType>(
485 mappedPointersToQIn[patchIndex],
486 QInEnumerator,
487 rawPointerToCellCentre[patchIndex],
488 rawPointerToCellSize[patchIndex],
489 patchIndex,
490 volumeIndex(x, y - HaloSize),
491 t[patchIndex],
492 dt[patchIndex],
493 1, // normal
494 tempNonconservativeProductY,
495 ncpEnumerator
496 );
497 }
498 }
499 }
500
501#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice) // nowait
502 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
503 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
504 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
505 for (int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
507 tempNonconservativeProductX,
508 tempNonconservativeProductY,
509 tempNonconservativeProductZ,
510 ncpEnumerator,
511 rawPointerToCellCentre[patchIndex],
512 rawPointerToCellSize[patchIndex],
513 patchIndex,
514 volumeIndex(x, y),
515 unknown,
516 dt[patchIndex],
517 mappedPointersToQOut[patchIndex],
518 QOutEnumerator
519 );
520 }
521 }
522 }
523 }
524#else
525#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice) // nowait
526 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
527 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
528 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
529 for (int x = 0; x < NumberOfVolumesPerAxisInPatch + 2 * HaloSize - 1; x++) {
530 loopbodies::computeNonconservativeFlux<SolverType>(
531 mappedPointersToQIn[patchIndex],
532 QInEnumerator,
533 rawPointerToCellCentre[patchIndex],
534 rawPointerToCellSize[patchIndex],
535 patchIndex,
536 volumeIndex(x - HaloSize, y, z),
537 t[patchIndex],
538 dt[patchIndex],
539 0, // normal
540 tempNonconservativeProductX,
541 ncpEnumerator
542 );
543 }
544 }
545 }
546 }
547
548#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice) // nowait
549 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
550 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
551 for (int y = 0; y < NumberOfVolumesPerAxisInPatch + 2 * HaloSize - 1; y++) {
552 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
553 loopbodies::computeNonconservativeFlux<SolverType>(
554 mappedPointersToQIn[patchIndex],
555 QInEnumerator,
556 rawPointerToCellCentre[patchIndex],
557 rawPointerToCellSize[patchIndex],
558 patchIndex,
559 volumeIndex(x, y - HaloSize, z),
560 t[patchIndex],
561 dt[patchIndex],
562 1, // normal
563 tempNonconservativeProductY,
564 ncpEnumerator
565 );
566 }
567 }
568 }
569 }
570
571#pragma omp target teams distribute parallel for simd collapse(4) device(targetDevice) // nowait
572 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
573 for (int z = 0; z < NumberOfVolumesPerAxisInPatch + 2 * HaloSize - 1; z++) {
574 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
575 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
576 loopbodies::computeNonconservativeFlux<SolverType>(
577 mappedPointersToQIn[patchIndex],
578 QInEnumerator,
579 rawPointerToCellCentre[patchIndex],
580 rawPointerToCellSize[patchIndex],
581 patchIndex,
582 volumeIndex(x, y, z - HaloSize),
583 t[patchIndex],
584 dt[patchIndex],
585 2, // normal
586 tempNonconservativeProductZ,
587 ncpEnumerator
588 );
589 }
590 }
591 }
592 }
593
594#pragma omp target teams distribute parallel for simd collapse(5) device(targetDevice) // nowait
595 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
596 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
597 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
598 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
599 for (int unknown = 0; unknown < NumberOfUnknowns; unknown++) {
601 tempNonconservativeProductX,
602 tempNonconservativeProductY,
603 tempNonconservativeProductZ,
604 ncpEnumerator,
605 rawPointerToCellCentre[patchIndex],
606 rawPointerToCellSize[patchIndex],
607 patchIndex,
608 volumeIndex(x, y, z),
609 unknown,
610 dt[patchIndex],
611 mappedPointersToQOut[patchIndex],
612 QOutEnumerator
613 );
614 }
615 }
616 }
617 }
618 }
619#endif
620 }
621
622 if constexpr (EvaluateMaximumEigenvalueAfterTimeStep) {
623#if Dimensions == 2
624#pragma omp target teams distribute device(targetDevice)
625 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
626 double newMaxEigenvalue = 0.0;
627#pragma omp parallel for simd collapse(2) reduction(max : newMaxEigenvalue)
628 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
629 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
630 newMaxEigenvalue = std::max(
631 newMaxEigenvalue,
632 loopbodies::reduceMaxEigenvalue<SolverType>(
633 mappedPointersToQOut[patchIndex],
634 QOutEnumerator,
635 rawPointerToCellCentre[patchIndex],
636 rawPointerToCellSize[patchIndex],
637 patchIndex,
638 volumeIndex(x, y),
639 t[patchIndex],
640 dt[patchIndex]
641 )
642 );
643 }
644 }
645 maxEigenvalue[patchIndex] = newMaxEigenvalue;
646 }
647#else
648#pragma omp target teams distribute device(targetDevice)
649 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
650 double newMaxEigenvalue = 0.0;
651#pragma omp parallel for simd collapse(3) reduction(max : newMaxEigenvalue)
652 for (int z = 0; z < NumberOfVolumesPerAxisInPatch; z++) {
653 for (int y = 0; y < NumberOfVolumesPerAxisInPatch; y++) {
654 for (int x = 0; x < NumberOfVolumesPerAxisInPatch; x++) {
655 newMaxEigenvalue = std::max(
656 newMaxEigenvalue,
657 loopbodies::reduceMaxEigenvalue<SolverType>(
658 mappedPointersToQOut[patchIndex],
659 QOutEnumerator,
660 rawPointerToCellCentre[patchIndex],
661 rawPointerToCellSize[patchIndex],
662 patchIndex,
663 volumeIndex(x, y, z),
664 t[patchIndex],
665 dt[patchIndex]
666 )
667 );
668 }
669 }
670 }
671 maxEigenvalue[patchIndex] = newMaxEigenvalue;
672 }
673#endif
674 }
675 }
676} // namespace exahype2::fv::rusanov::omp::internal
677
678
679template <
680 class SolverType,
681 int NumberOfVolumesPerAxisInPatch,
682 int HaloSize,
683 int NumberOfUnknowns,
684 int NumberOfAuxiliaryVariables,
685 bool EvaluateFlux,
686 bool EvaluateNonconservativeProduct,
687 bool EvaluateSource,
688 bool EvaluateMaximumEigenvalueAfterTimeStep,
689 class TempDataEnumeratorType>
691 static_assert(HaloSize == 1);
692
693 static tarch::logging::Log _log("exahype2::fv::rusanov::omp");
694 logTraceIn("timeStepWithRusanovBatchedUSMStateless()");
695
696 const enumerator::AoSLexicographicEnumerator QInEnumerator(1, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, NumberOfAuxiliaryVariables);
697 const enumerator::AoSLexicographicEnumerator QOutEnumerator(1, NumberOfVolumesPerAxisInPatch, 0, NumberOfUnknowns, NumberOfAuxiliaryVariables);
698 const TempDataEnumeratorType fluxEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, 0);
699 const TempDataEnumeratorType ncpEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch + 1, HaloSize, NumberOfUnknowns, 0);
700 const TempDataEnumeratorType eigenvalueEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, 1, 0);
701
702 double* tempFluxX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(fluxEnumerator.size(), targetDevice);
703 double* tempFluxY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(fluxEnumerator.size(), targetDevice);
704 double* tempFluxZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(Dimensions == 3 ? fluxEnumerator.size() : 1, targetDevice);
705 double* tempNonconservativeProductX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(ncpEnumerator.size(), targetDevice);
706 double* tempNonconservativeProductY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(ncpEnumerator.size(), targetDevice);
707 double* tempNonconservativeProductZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(Dimensions == 3 ? ncpEnumerator.size() : 1, targetDevice);
708 double* tempEigenvalueX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(eigenvalueEnumerator.size(), targetDevice);
709 double* tempEigenvalueY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(eigenvalueEnumerator.size(), targetDevice);
710 double* tempEigenvalueZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(Dimensions == 3 ? eigenvalueEnumerator.size() : 1, targetDevice);
711
712 const int numberOfCells = patchData.numberOfCells;
713 double** mappedPointersToQIn = patchData.QIn;
714 double** mappedPointersToQOut = patchData.QOut;
715 double* rawPointerToCellCentre = patchData.cellCentre[0].data();
716 double* rawPointerToCellSize = patchData.cellSize[0].data();
717 double* t = patchData.t;
718 double* dt = patchData.dt;
719 double* maxEigenvalue = patchData.maxEigenvalue;
720
721 tarch::timing::Watch watch("exahype2::fv::rusanov::omp", "timeStepWithRusanovBatchedUSMStateless", false, true);
722 internal::timeStepWithRusanovBatchedStateless<
723 SolverType,
724 NumberOfVolumesPerAxisInPatch,
725 HaloSize,
726 NumberOfUnknowns,
727 NumberOfAuxiliaryVariables,
728 EvaluateFlux,
729 EvaluateNonconservativeProduct,
730 EvaluateSource,
731 EvaluateMaximumEigenvalueAfterTimeStep,
732 TempDataEnumeratorType>(
733 targetDevice,
734 numberOfCells,
735 mappedPointersToQIn,
736 rawPointerToCellCentre,
737 rawPointerToCellSize,
738 t,
739 dt,
740 maxEigenvalue,
741 mappedPointersToQOut,
742 tempFluxX,
743 tempFluxY,
744 tempFluxZ,
745 tempNonconservativeProductX,
746 tempNonconservativeProductY,
747 tempNonconservativeProductZ,
748 tempEigenvalueX,
749 tempEigenvalueY,
750 tempEigenvalueZ
751 );
752 watch.stop();
753 measurement.setValue(watch.getCalendarTime());
754
755 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxX, targetDevice);
756 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxY, targetDevice);
757 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxZ, targetDevice);
758 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductX, targetDevice);
759 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductY, targetDevice);
760 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductZ, targetDevice);
761 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueX, targetDevice);
762 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueY, targetDevice);
763 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueZ, targetDevice);
764
765 logTraceOut("timeStepWithRusanovBatchedUSMStateless()");
766}
767
768
769template <
770 class SolverType,
771 int NumberOfVolumesPerAxisInPatch,
772 int HaloSize,
773 int NumberOfUnknowns,
774 int NumberOfAuxiliaryVariables,
775 bool EvaluateFlux,
776 bool EvaluateNonconservativeProduct,
777 bool EvaluateSource,
778 bool EvaluateMaximumEigenvalueAfterTimeStep,
779 class TempDataEnumeratorType>
781 static_assert(HaloSize == 1);
782
783 static tarch::logging::Log _log("exahype2::fv::rusanov::omp");
784 logTraceIn("timeStepWithRusanovBatchedUSMStateless()");
785
786 const enumerator::AoSLexicographicEnumerator QInEnumerator(1, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, NumberOfAuxiliaryVariables);
787 const enumerator::AoSLexicographicEnumerator QOutEnumerator(1, NumberOfVolumesPerAxisInPatch, 0, NumberOfUnknowns, NumberOfAuxiliaryVariables);
788 const TempDataEnumeratorType fluxEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, 0);
789 const TempDataEnumeratorType ncpEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch + 1, HaloSize, NumberOfUnknowns, 0);
790 const TempDataEnumeratorType eigenvalueEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, 1, 0);
791
792 double* tempFluxX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(fluxEnumerator.size(), targetDevice);
793 double* tempFluxY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(fluxEnumerator.size(), targetDevice);
794 double* tempFluxZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(Dimensions == 3 ? fluxEnumerator.size() : 1, targetDevice);
795 double* tempNonconservativeProductX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(ncpEnumerator.size(), targetDevice);
796 double* tempNonconservativeProductY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(ncpEnumerator.size(), targetDevice);
797 double* tempNonconservativeProductZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(Dimensions == 3 ? ncpEnumerator.size() : 1, targetDevice);
798 double* tempEigenvalueX = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(eigenvalueEnumerator.size(), targetDevice);
799 double* tempEigenvalueY = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(eigenvalueEnumerator.size(), targetDevice);
800 double* tempEigenvalueZ = ::tarch::accelerator::omp::GPUMemoryManager::getInstance().allocate<double>(Dimensions == 3 ? eigenvalueEnumerator.size() : 1, targetDevice);
801
802 const int numberOfCells = patchData.numberOfCells;
803 double** mappedPointersToQIn = patchData.QIn;
804 double** mappedPointersToQOut = patchData.QOut;
805 double* rawPointerToCellCentre = patchData.cellCentre[0].data();
806 double* rawPointerToCellSize = patchData.cellSize[0].data();
807 double* t = patchData.t;
808 double* dt = patchData.dt;
809 double* maxEigenvalue = patchData.maxEigenvalue;
810
811 internal::timeStepWithRusanovBatchedStateless<
812 SolverType,
813 NumberOfVolumesPerAxisInPatch,
814 HaloSize,
815 NumberOfUnknowns,
816 NumberOfAuxiliaryVariables,
817 EvaluateFlux,
818 EvaluateNonconservativeProduct,
819 EvaluateSource,
820 EvaluateMaximumEigenvalueAfterTimeStep,
821 TempDataEnumeratorType>(
822 targetDevice,
823 numberOfCells,
824 mappedPointersToQIn,
825 rawPointerToCellCentre,
826 rawPointerToCellSize,
827 t,
828 dt,
829 maxEigenvalue,
830 mappedPointersToQOut,
831 tempFluxX,
832 tempFluxY,
833 tempFluxZ,
834 tempNonconservativeProductX,
835 tempNonconservativeProductY,
836 tempNonconservativeProductZ,
837 tempEigenvalueX,
838 tempEigenvalueY,
839 tempEigenvalueZ
840 );
841
842 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxX, targetDevice);
843 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxY, targetDevice);
844 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempFluxZ, targetDevice);
845 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductX, targetDevice);
846 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductY, targetDevice);
847 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempNonconservativeProductZ, targetDevice);
848 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueX, targetDevice);
849 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueY, targetDevice);
850 ::tarch::accelerator::omp::GPUMemoryManager::getInstance().free(tempEigenvalueZ, targetDevice);
851
852 logTraceOut("timeStepWithRusanovBatchedUSMStateless()");
853}
854
855
856template <
857 class SolverType,
858 int NumberOfVolumesPerAxisInPatch,
859 int HaloSize,
860 int NumberOfUnknowns,
861 int NumberOfAuxiliaryVariables,
862 bool EvaluateFlux,
863 bool EvaluateNonconservativeProduct,
864 bool EvaluateSource,
865 bool EvaluateMaximumEigenvalueAfterTimeStep,
866 class TempDataEnumeratorType>
868 static_assert(HaloSize == 1);
869
870 static tarch::logging::Log _log("exahype2::fv::rusanov::omp");
871 logTraceIn("timeStepWithRusanovBatchedHeapStateless()");
872
873 const enumerator::AoSLexicographicEnumerator QInEnumerator(1, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, NumberOfAuxiliaryVariables);
874 const enumerator::AoSLexicographicEnumerator QOutEnumerator(1, NumberOfVolumesPerAxisInPatch, 0, NumberOfUnknowns, NumberOfAuxiliaryVariables);
875 const TempDataEnumeratorType fluxEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, 0);
876 const TempDataEnumeratorType ncpEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch + 1, HaloSize, NumberOfUnknowns, 0);
877 const TempDataEnumeratorType eigenvalueEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, 1, 0);
878
879 double* tempFluxX = new double[fluxEnumerator.size()];
880 double* tempFluxY = new double[fluxEnumerator.size()];
881 double* tempFluxZ = new double[fluxEnumerator.size()];
882 double* tempNonconservativeProductX = new double[ncpEnumerator.size()];
883 double* tempNonconservativeProductY = new double[ncpEnumerator.size()];
884 double* tempNonconservativeProductZ = new double[ncpEnumerator.size()];
885 double* tempEigenvalueX = new double[eigenvalueEnumerator.size()];
886 double* tempEigenvalueY = new double[eigenvalueEnumerator.size()];
887 double* tempEigenvalueZ = new double[eigenvalueEnumerator.size()];
888
889#pragma omp target enter data map(alloc : tempFluxX[0 : fluxEnumerator.size()]) device(targetDevice)
890#pragma omp target enter data map(alloc : tempFluxY[0 : fluxEnumerator.size()]) device(targetDevice)
891#pragma omp target enter data map(alloc : tempFluxZ[0 : fluxEnumerator.size()]) device(targetDevice)
892#pragma omp target enter data map(alloc : tempNonconservativeProductX[0 : ncpEnumerator.size()]) device(targetDevice)
893#pragma omp target enter data map(alloc : tempNonconservativeProductY[0 : ncpEnumerator.size()]) device(targetDevice)
894#pragma omp target enter data map(alloc : tempNonconservativeProductZ[0 : ncpEnumerator.size()]) device(targetDevice)
895#pragma omp target enter data map(alloc : tempEigenvalueX[0 : eigenvalueEnumerator.size()]) device(targetDevice)
896#pragma omp target enter data map(alloc : tempEigenvalueY[0 : eigenvalueEnumerator.size()]) device(targetDevice)
897#pragma omp target enter data map(alloc : tempEigenvalueZ[0 : eigenvalueEnumerator.size()]) device(targetDevice)
898
899 double** mappedPointersToQIn = new double*[patchData.numberOfCells];
900 double** mappedPointersToQOut = new double*[patchData.numberOfCells];
901
902 for (int patchIndex = 0; patchIndex < patchData.numberOfCells; patchIndex++) {
903 const double* currentQIn = patchData.QIn[patchIndex];
904 double* currentQOut = patchData.QOut[patchIndex];
905#pragma omp target enter data map(to : currentQIn[0 : QInEnumerator.size()]) device(targetDevice)
906#pragma omp target enter data map(alloc : currentQOut[0 : QOutEnumerator.size()]) device(targetDevice)
907 mappedPointersToQIn[patchIndex] = static_cast<double*>(omp_get_mapped_ptr(currentQIn, targetDevice));
908 mappedPointersToQOut[patchIndex] = static_cast<double*>(omp_get_mapped_ptr(currentQOut, targetDevice));
909 }
910
911 const int numberOfCells = patchData.numberOfCells;
912 double* rawPointerToCellCentre = patchData.cellCentre[0].data();
913 double* rawPointerToCellSize = patchData.cellSize[0].data();
914 double* t = patchData.t;
915 double* dt = patchData.dt;
916 double* maxEigenvalue = patchData.maxEigenvalue;
917
918#pragma omp target enter data map(to : rawPointerToCellCentre[0 : numberOfCells * Dimensions]) device(targetDevice)
919#pragma omp target enter data map(to : rawPointerToCellSize[0 : numberOfCells * Dimensions]) device(targetDevice)
920#pragma omp target enter data map(to : mappedPointersToQIn[0 : numberOfCells]) device(targetDevice)
921#pragma omp target enter data map(to : mappedPointersToQOut[0 : numberOfCells]) device(targetDevice)
922#pragma omp target enter data map(to : t[0 : numberOfCells]) device(targetDevice)
923#pragma omp target enter data map(to : dt[0 : numberOfCells]) device(targetDevice)
924#pragma omp target enter data map(alloc : maxEigenvalue[0 : numberOfCells]) device(targetDevice)
925
926 tarch::timing::Watch watch("exahype2::fv::rusanov::omp", "timeStepWithRusanovBatchedHeapStateless", false, true);
927 internal::timeStepWithRusanovBatchedStateless<
928 SolverType,
929 NumberOfVolumesPerAxisInPatch,
930 HaloSize,
931 NumberOfUnknowns,
932 NumberOfAuxiliaryVariables,
933 EvaluateFlux,
934 EvaluateNonconservativeProduct,
935 EvaluateSource,
936 EvaluateMaximumEigenvalueAfterTimeStep,
937 TempDataEnumeratorType>(
938 targetDevice,
939 numberOfCells,
940 mappedPointersToQIn,
941 rawPointerToCellCentre,
942 rawPointerToCellSize,
943 t,
944 dt,
945 maxEigenvalue,
946 mappedPointersToQOut,
947 tempFluxX,
948 tempFluxY,
949 tempFluxZ,
950 tempNonconservativeProductX,
951 tempNonconservativeProductY,
952 tempNonconservativeProductZ,
953 tempEigenvalueX,
954 tempEigenvalueY,
955 tempEigenvalueZ
956 );
957 watch.stop();
958 measurement.setValue(watch.getCalendarTime());
959
960#pragma omp target exit data map(delete : rawPointerToCellCentre[0 : numberOfCells * Dimensions]) device(targetDevice)
961#pragma omp target exit data map(delete : rawPointerToCellSize[0 : numberOfCells * Dimensions]) device(targetDevice)
962#pragma omp target exit data map(delete : mappedPointersToQIn[0 : numberOfCells]) device(targetDevice)
963#pragma omp target exit data map(delete : mappedPointersToQOut[0 : numberOfCells]) device(targetDevice)
964#pragma omp target exit data map(delete : t[0 : numberOfCells]) device(targetDevice)
965#pragma omp target exit data map(delete : dt[0 : numberOfCells]) device(targetDevice)
966#pragma omp target exit data map(from : maxEigenvalue[0 : numberOfCells]) device(targetDevice)
967
968#pragma omp target exit data map(delete : tempFluxX[0 : fluxEnumerator.size()]) device(targetDevice)
969#pragma omp target exit data map(delete : tempFluxY[0 : fluxEnumerator.size()]) device(targetDevice)
970#pragma omp target exit data map(delete : tempFluxZ[0 : fluxEnumerator.size()]) device(targetDevice)
971#pragma omp target exit data map(delete : tempNonconservativeProductX[0 : ncpEnumerator.size()]) device(targetDevice)
972#pragma omp target exit data map(delete : tempNonconservativeProductY[0 : ncpEnumerator.size()]) device(targetDevice)
973#pragma omp target exit data map(delete : tempNonconservativeProductZ[0 : ncpEnumerator.size()]) device(targetDevice)
974#pragma omp target exit data map(delete : tempEigenvalueX[0 : eigenvalueEnumerator.size()]) device(targetDevice)
975#pragma omp target exit data map(delete : tempEigenvalueY[0 : eigenvalueEnumerator.size()]) device(targetDevice)
976#pragma omp target exit data map(delete : tempEigenvalueZ[0 : eigenvalueEnumerator.size()]) device(targetDevice)
977
978 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
979 const double* currentQIn = patchData.QIn[patchIndex];
980 double* currentQOut = patchData.QOut[patchIndex];
981#pragma omp target exit data map(delete : currentQIn[0 : QInEnumerator.size()]) device(targetDevice)
982#pragma omp target exit data map(from : currentQOut[0 : QOutEnumerator.size()]) device(targetDevice)
983 }
984
985 delete[] mappedPointersToQIn;
986 delete[] mappedPointersToQOut;
987
988 if (tempFluxX != nullptr) {
989 delete[] tempFluxX;
990 }
991 if (tempFluxY != nullptr) {
992 delete[] tempFluxY;
993 }
994 if (tempFluxZ != nullptr) {
995 delete[] tempFluxZ;
996 }
997 if (tempNonconservativeProductX != nullptr) {
998 delete[] tempNonconservativeProductX;
999 }
1000 if (tempNonconservativeProductY != nullptr) {
1001 delete[] tempNonconservativeProductY;
1002 }
1003 if (tempNonconservativeProductZ != nullptr) {
1004 delete[] tempNonconservativeProductZ;
1005 }
1006 if (tempEigenvalueX != nullptr) {
1007 delete[] tempEigenvalueX;
1008 }
1009 if (tempEigenvalueY != nullptr) {
1010 delete[] tempEigenvalueY;
1011 }
1012 if (tempEigenvalueZ != nullptr) {
1013 delete[] tempEigenvalueZ;
1014 }
1015
1016 logTraceOut("timeStepWithRusanovBatchedHeapStateless()");
1017}
1018
1019
1020template <
1021 class SolverType,
1022 int NumberOfVolumesPerAxisInPatch,
1023 int HaloSize,
1024 int NumberOfUnknowns,
1025 int NumberOfAuxiliaryVariables,
1026 bool EvaluateFlux,
1027 bool EvaluateNonconservativeProduct,
1028 bool EvaluateSource,
1029 bool EvaluateMaximumEigenvalueAfterTimeStep,
1030 class TempDataEnumeratorType>
1031void exahype2::fv::rusanov::omp::timeStepWithRusanovBatchedHeapStateless(int targetDevice, CellData<double, double>& patchData) {
1032 static_assert(HaloSize == 1);
1033
1034 static tarch::logging::Log _log("exahype2::fv::rusanov::omp");
1035 logTraceIn("timeStepWithRusanovBatchedHeapStateless()");
1036
1037 const enumerator::AoSLexicographicEnumerator QInEnumerator(1, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, NumberOfAuxiliaryVariables);
1038 const enumerator::AoSLexicographicEnumerator QOutEnumerator(1, NumberOfVolumesPerAxisInPatch, 0, NumberOfUnknowns, NumberOfAuxiliaryVariables);
1039 const TempDataEnumeratorType fluxEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, NumberOfUnknowns, 0);
1040 const TempDataEnumeratorType ncpEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch + 1, HaloSize, NumberOfUnknowns, 0);
1041 const TempDataEnumeratorType eigenvalueEnumerator(patchData.numberOfCells, NumberOfVolumesPerAxisInPatch, HaloSize, 1, 0);
1042
1043 double* tempFluxX = new double[fluxEnumerator.size()];
1044 double* tempFluxY = new double[fluxEnumerator.size()];
1045 double* tempFluxZ = new double[fluxEnumerator.size()];
1046 double* tempNonconservativeProductX = new double[ncpEnumerator.size()];
1047 double* tempNonconservativeProductY = new double[ncpEnumerator.size()];
1048 double* tempNonconservativeProductZ = new double[ncpEnumerator.size()];
1049 double* tempEigenvalueX = new double[eigenvalueEnumerator.size()];
1050 double* tempEigenvalueY = new double[eigenvalueEnumerator.size()];
1051 double* tempEigenvalueZ = new double[eigenvalueEnumerator.size()];
1052
1053#pragma omp target enter data map(alloc : tempFluxX[0 : fluxEnumerator.size()]) device(targetDevice)
1054#pragma omp target enter data map(alloc : tempFluxY[0 : fluxEnumerator.size()]) device(targetDevice)
1055#pragma omp target enter data map(alloc : tempFluxZ[0 : fluxEnumerator.size()]) device(targetDevice)
1056#pragma omp target enter data map(alloc : tempNonconservativeProductX[0 : ncpEnumerator.size()]) device(targetDevice)
1057#pragma omp target enter data map(alloc : tempNonconservativeProductY[0 : ncpEnumerator.size()]) device(targetDevice)
1058#pragma omp target enter data map(alloc : tempNonconservativeProductZ[0 : ncpEnumerator.size()]) device(targetDevice)
1059#pragma omp target enter data map(alloc : tempEigenvalueX[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1060#pragma omp target enter data map(alloc : tempEigenvalueY[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1061#pragma omp target enter data map(alloc : tempEigenvalueZ[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1062
1063 double** mappedPointersToQIn = new double*[patchData.numberOfCells];
1064 double** mappedPointersToQOut = new double*[patchData.numberOfCells];
1065
1066 for (int patchIndex = 0; patchIndex < patchData.numberOfCells; patchIndex++) {
1067 const double* currentQIn = patchData.QIn[patchIndex];
1068 double* currentQOut = patchData.QOut[patchIndex];
1069#pragma omp target enter data map(to : currentQIn[0 : QInEnumerator.size()]) device(targetDevice)
1070#pragma omp target enter data map(alloc : currentQOut[0 : QOutEnumerator.size()]) device(targetDevice)
1071 mappedPointersToQIn[patchIndex] = static_cast<double*>(omp_get_mapped_ptr(currentQIn, targetDevice));
1072 mappedPointersToQOut[patchIndex] = static_cast<double*>(omp_get_mapped_ptr(currentQOut, targetDevice));
1073 }
1074
1075 const int numberOfCells = patchData.numberOfCells;
1076 double* rawPointerToCellCentre = patchData.cellCentre[0].data();
1077 double* rawPointerToCellSize = patchData.cellSize[0].data();
1078 double* t = patchData.t;
1079 double* dt = patchData.dt;
1080 double* maxEigenvalue = patchData.maxEigenvalue;
1081
1082#pragma omp target enter data map(to : rawPointerToCellCentre[0 : numberOfCells * Dimensions]) device(targetDevice)
1083#pragma omp target enter data map(to : rawPointerToCellSize[0 : numberOfCells * Dimensions]) device(targetDevice)
1084#pragma omp target enter data map(to : mappedPointersToQIn[0 : numberOfCells]) device(targetDevice)
1085#pragma omp target enter data map(to : mappedPointersToQOut[0 : numberOfCells]) device(targetDevice)
1086#pragma omp target enter data map(to : t[0 : numberOfCells]) device(targetDevice)
1087#pragma omp target enter data map(to : dt[0 : numberOfCells]) device(targetDevice)
1088#pragma omp target enter data map(alloc : maxEigenvalue[0 : numberOfCells]) device(targetDevice)
1089
1090 internal::timeStepWithRusanovBatchedStateless<
1091 SolverType,
1092 NumberOfVolumesPerAxisInPatch,
1093 HaloSize,
1094 NumberOfUnknowns,
1095 NumberOfAuxiliaryVariables,
1096 EvaluateFlux,
1097 EvaluateNonconservativeProduct,
1098 EvaluateSource,
1099 EvaluateMaximumEigenvalueAfterTimeStep,
1100 TempDataEnumeratorType>(
1101 targetDevice,
1102 numberOfCells,
1103 mappedPointersToQIn,
1104 rawPointerToCellCentre,
1105 rawPointerToCellSize,
1106 t,
1107 dt,
1109 mappedPointersToQOut,
1110 tempFluxX,
1111 tempFluxY,
1112 tempFluxZ,
1113 tempNonconservativeProductX,
1114 tempNonconservativeProductY,
1115 tempNonconservativeProductZ,
1116 tempEigenvalueX,
1117 tempEigenvalueY,
1118 tempEigenvalueZ
1119 );
1120
1121#pragma omp target exit data map(delete : rawPointerToCellCentre[0 : numberOfCells * Dimensions]) device(targetDevice)
1122#pragma omp target exit data map(delete : rawPointerToCellSize[0 : numberOfCells * Dimensions]) device(targetDevice)
1123#pragma omp target exit data map(delete : mappedPointersToQIn[0 : numberOfCells]) device(targetDevice)
1124#pragma omp target exit data map(delete : mappedPointersToQOut[0 : numberOfCells]) device(targetDevice)
1125#pragma omp target exit data map(delete : t[0 : numberOfCells]) device(targetDevice)
1126#pragma omp target exit data map(delete : dt[0 : numberOfCells]) device(targetDevice)
1127#pragma omp target exit data map(from : maxEigenvalue[0 : numberOfCells]) device(targetDevice)
1128
1129#pragma omp target exit data map(delete : tempFluxX[0 : fluxEnumerator.size()]) device(targetDevice)
1130#pragma omp target exit data map(delete : tempFluxY[0 : fluxEnumerator.size()]) device(targetDevice)
1131#pragma omp target exit data map(delete : tempFluxZ[0 : fluxEnumerator.size()]) device(targetDevice)
1132#pragma omp target exit data map(delete : tempNonconservativeProductX[0 : ncpEnumerator.size()]) device(targetDevice)
1133#pragma omp target exit data map(delete : tempNonconservativeProductY[0 : ncpEnumerator.size()]) device(targetDevice)
1134#pragma omp target exit data map(delete : tempNonconservativeProductZ[0 : ncpEnumerator.size()]) device(targetDevice)
1135#pragma omp target exit data map(delete : tempEigenvalueX[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1136#pragma omp target exit data map(delete : tempEigenvalueY[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1137#pragma omp target exit data map(delete : tempEigenvalueZ[0 : eigenvalueEnumerator.size()]) device(targetDevice)
1138
1139 for (int patchIndex = 0; patchIndex < numberOfCells; patchIndex++) {
1140 const double* currentQIn = patchData.QIn[patchIndex];
1141 double* currentQOut = patchData.QOut[patchIndex];
1142#pragma omp target exit data map(delete : currentQIn[0 : QInEnumerator.size()]) device(targetDevice)
1143#pragma omp target exit data map(from : currentQOut[0 : QOutEnumerator.size()]) device(targetDevice)
1144 }
1145
1146 delete[] mappedPointersToQIn;
1147 delete[] mappedPointersToQOut;
1148
1149 if (tempFluxX != nullptr) {
1150 delete[] tempFluxX;
1151 }
1152 if (tempFluxY != nullptr) {
1153 delete[] tempFluxY;
1154 }
1155 if (tempFluxZ != nullptr) {
1156 delete[] tempFluxZ;
1157 }
1158 if (tempNonconservativeProductX != nullptr) {
1159 delete[] tempNonconservativeProductX;
1160 }
1161 if (tempNonconservativeProductY != nullptr) {
1162 delete[] tempNonconservativeProductY;
1163 }
1164 if (tempNonconservativeProductZ != nullptr) {
1165 delete[] tempNonconservativeProductZ;
1166 }
1167 if (tempEigenvalueX != nullptr) {
1168 delete[] tempEigenvalueX;
1169 }
1170 if (tempEigenvalueY != nullptr) {
1171 delete[] tempEigenvalueY;
1172 }
1173 if (tempEigenvalueZ != nullptr) {
1174 delete[] tempEigenvalueZ;
1175 }
1176
1177 logTraceOut("timeStepWithRusanovBatchedHeapStateless()");
1178}
static constexpr int HaloSize
#define KeywordToAvoidDuplicateSymbolsForInlinedFunctions
Definition LinuxAMD.h:31
#define logTraceOut(methodName)
Definition Log.h:379
#define logTraceIn(methodName)
Definition Log.h:369
tarch::logging::Log _log("::")
Log Device.
Definition Log.h:516
void setValue(const double &value)
Set the value.
A simple class that has to be included to measure the clock ticks required for an operation.
Definition Watch.h:45
float dt
Definition DSL_test.py:5
KeywordToAvoidDuplicateSymbolsForInlinedFunctions double maxEigenvalue(const double *const Q, int normal, const double CCZ4e, const double CCZ4ds, const double CCZ4GLMc, const double CCZ4GLMd) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions GPUCallableInlineMethod void updateSolutionWithNonconservativeFlux(const double *__restrict__ ncpX, const double *__restrict__ ncpY, const double *__restrict__ ncpZ, const NCPFaceEnumeratorType &ncpEnumerator, const ::tarch::la::Vector< Dimensions, double > &patchCentre, const ::tarch::la::Vector< Dimensions, double > &patchSize, int patchIndex, const ::tarch::la::Vector< Dimensions, int > &volumeIndex, int unknown, double dt, double *__restrict__ QOut, const QOutEnumeratorType &QOutEnumerator) InlineMethod
Add the non-conservative flux contributions to one volume.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions GPUCallableInlineMethod void copySolution(const double *__restrict__ QIn, const QInEnumeratorType &QInEnumerator, int patchIndex, const ::tarch::la::Vector< Dimensions, int > &volumeIndex, int unknown, double *__restrict__ QOut, const QOutEnumeratorType &QOutEnumerator) InlineMethod
Copy solution from QIn to QOut.
Definition LoopBodies.h:75
KeywordToAvoidDuplicateSymbolsForInlinedFunctions GPUCallableInlineMethod void updateSolutionWithFlux(const double *__restrict__ tempFluxX, const double *__restrict__ tempFluxY, const double *__restrict__ tempFluxZ, const FluxEnumeratorType &fluxEnumerator, const ::tarch::la::Vector< Dimensions, double > &patchCentre, const ::tarch::la::Vector< Dimensions, double > &patchSize, int patchIndex, const ::tarch::la::Vector< Dimensions, int > &volumeIndex, int unknown, double dt, double *__restrict__ QOut, const QOutEnumeratorType &QOutEnumerator) InlineMethod
Update one volume with the flux contribution.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions GPUCallableInlineMethod void updateSolutionWithEigenvalueDamping(const double *__restrict__ QIn, const QInEnumeratorType &QInEnumerator, const double *__restrict__ tempMaxEigenvalueX, const double *__restrict__ tempMaxEigenvalueY, const double *__restrict__ tempMaxEigenvalueZ, const MaxEigenvalueEnumeratorType &eigenvalueEnumerator, const ::tarch::la::Vector< Dimensions, double > &patchCentre, const ::tarch::la::Vector< Dimensions, double > &patchSize, int patchIndex, const ::tarch::la::Vector< Dimensions, int > &volumeIndex, int unknown, double dt, double *__restrict__ QOut, const QOutEnumeratorType &QOutEnumerator) InlineMethod
Uses the eigenvalues to damp the solution update.
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedStateless(int targetDevice, int numberOfCells, double **mappedPointersToQIn, const double *rawPointerToCellCentre, const double *rawPointerToCellSize, const double *t, const double *dt, double *maxEigenvalue, double **mappedPointersToQOut, double *tempFluxX, double *tempFluxY, double *tempFluxZ, double *tempNonconservativeProductX, double *tempNonconservativeProductY, double *tempNonconservativeProductZ, double *tempEigenvalueX, double *tempEigenvalueY, double *tempEigenvalueZ) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedHeapStateless(int targetDevice, CellData< double, double > &patchData, tarch::timing::Measurement &measurement) InlineMethod
KeywordToAvoidDuplicateSymbolsForInlinedFunctions void timeStepWithRusanovBatchedUSMStateless(int targetDevice, CellData< double, double > &patchData, tarch::timing::Measurement &measurement) InlineMethod
auto volumeIndex(Args... args)
Definition VolumeIndex.h:54
Representation of a number of cells which contains all information that's required to process the sto...
Definition CellData.h:77
outType ** QOut
Out values.
Definition CellData.h:116
inType ** QIn
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
Definition CellData.h:82
const int numberOfCells
As we store data as SoA, we have to know how big the actual arrays are.
Definition CellData.h:99
double * maxEigenvalue
Out values.
Definition CellData.h:121
tarch::la::Vector< Dimensions, double > * cellCentre
Definition CellData.h:83
tarch::la::Vector< Dimensions, double > * cellSize
Definition CellData.h:84
Scalar * data()
This routine returns a pointer to the first data element.
Definition Vector.h:254
#define InlineMethod
This is the marker that is to be used after the argument list of a function declaration.
Definition tarch.h:58