Peano 4
Loading...
Searching...
No Matches
PredictorAoS.cpp
Go to the documentation of this file.
1#include "PredictorAoS.h"
2
3#include "KernelUtils.h"
4
5#include "Generic.h"
6
7#include "tarch/la/Vector.h"
8
9#include "tarch/logging/Log.h"
10
11/*
12#if defined(GPUOffloadingOMP)
13#pragma omp declare target
14#endif
15*/
17 double * __restrict__ QOut,
18 const double * const __restrict__ UIn,
19 const int nodesPerAxis,
20 const int strideQ,
21 const int scalarIndex) {
22 for (int var = 0; var < strideQ; var++) {
23 QOut[ scalarIndex*strideQ + var ] = UIn[ ( scalarIndex / nodesPerAxis )*strideQ + var ];
24 }
25}
26
27/*
28#if defined(GPUOffloadingOMP)
29#pragma omp end declare target
30#endif
31
32#if defined(GPUOffloadingOMP)
33#pragma omp declare target
34#endif
35*/
37 double * __restrict__ rhsOut,
38 const double * const __restrict__ UIn,
39 const double * const __restrict__ FLCoeff,
40 const int nodesPerAxis,
41 const int strideQ,
42 const int strideRhs,
43 const int scalarIndex) {
44 const int it = delineariseIndex(scalarIndex,getStrides(nodesPerAxis))[0];
45
46 const double coeff = FLCoeff[ it ];
47 for (int var = 0; var < strideRhs; var++) {
48 rhsOut[ scalarIndex*strideRhs + var ] = coeff * UIn[ ( scalarIndex / nodesPerAxis ) * strideQ + var ];
49 }
50}
51/*
52#if defined(GPUOffloadingOMP)
53#pragma omp end declare target
54#endif
55
56#if defined(GPUOffloadingOMP)
57#pragma omp declare target
58#endif
59*/
61 std::function< void(
62 const double * const __restrict__ Q,
64 double t,
65 int normal,
66 double * __restrict__ F
67 ) > flux,
68 std::function< void(
69 const double * const __restrict__ Q,
71 double t,
72 double * __restrict__ S
73 ) > algebraicSource,
74 std::function< void(
75 const double * const __restrict__ Q,
76 double * __restrict__ dQ_or_deltaQ,
78 double t,
79 int normal,
80 double * __restrict__ BgradQ
81 ) > nonconservativeProduct,
82 double * __restrict__ rhsOut,
83 double * __restrict__ FAux,
84 double * __restrict__ gradQAux,
85 double * __restrict__ SAux,
86 const double* __restrict__ QIn,
87 const double* __restrict__ nodes,
88 const double* __restrict__ weights,
89 const double* __restrict__ Kxi,
90 const double* __restrict__ dudx,
92 const double dx,
93 const double t,
94 const double dt,
95 const int nodesPerAxis,
96 const int unknowns,
97 const int strideQ,
98 const int strideRhs,
99 const bool callFlux,
100 const bool callSource,
101 const bool callNonconservativeProduct,
102 const int scalarIndex) {
103 const tarch::la::Vector<Dimensions+1,int> strides = getStrides(nodesPerAxis);
104 const tarch::la::Vector<Dimensions+1,int> index = delineariseIndex(scalarIndex,strides);
105 const tarch::la::Vector<Dimensions+1,double> coords = getCoordinates(index,cellCentre,dx,t,dt,nodes);
106 const tarch::la::Vector<Dimensions,double> x(&coords[1]);
107 const double time = coords[0];
108
109 const double invDx = 1.0/dx;
110 const double coeff0 = dt * weights[index[0]];
111
112 // flux contributions
113 if ( callFlux ) {
114 for ( int d = 1; d < Dimensions+1; d++) {
115 const double coeff1 = coeff0 * invDx/*[d-1]*/ / weights[index[d]];
116
117 for ( int a = 0; a < nodesPerAxis; a++ ) { // further collapsing causes data races, synchronization or GPU shared mem usage required
118 const double coeff = coeff1 * Kxi[ a*nodesPerAxis + index[d] ]; // @todo: provide transposed variant
119 const double * const Q = &QIn[ ( scalarIndex + (a - index[d])*strides[d] )*strideQ ];
120 flux(Q, x, time, d-1, FAux);
121 for (int var=0; var < unknowns; var++) {
122 rhsOut[ scalarIndex*strideRhs+var ] -= coeff * FAux[ var ];
123 }
124 }
125 }
126 }
127 // NCP contributions ; NOTE: gradient has same Q access pattern as flux
128 if ( callNonconservativeProduct ) {
129 gradient_AoS(QIn,dudx,invDx,nodesPerAxis,strideQ,scalarIndex,gradQAux);
130
131 const double* Q = &QIn [ scalarIndex*strideQ ];
132 for ( int direction = 0; direction < Dimensions; direction++ ) {
133 nonconservativeProduct(Q, &gradQAux[ direction*strideQ ], x, time, direction, SAux );
134 for(int var=0; var < unknowns; var++) {
135 rhsOut[ scalarIndex*strideRhs + var ] += coeff0 * SAux[var];
136 }
137 }
138 }
139 // source contributions
140 if ( callSource ) {
141 const double* Q = &QIn[ scalarIndex*strideQ ];
142
143 algebraicSource(Q, x, time, SAux);
144 for (int var = 0; var < unknowns; var++) {
145 rhsOut[ scalarIndex*strideRhs + var ] += coeff0 * SAux[var];
146 }
147 }
148}
149/*
150#if defined(GPUOffloadingOMP)
151#pragma omp end declare target
152#endif
153
154#if defined(GPUOffloadingOMP)
155#pragma omp declare target
156#endif
157*/
159 std::function< void(
160 const double * const __restrict__ Q,
162 double t,
163 int normal,
164 double * __restrict__ F
165 ) > flux,
166 double* __restrict__ rhsOut,
167 double* __restrict__ FAux,
168 const double* __restrict__ QIn,
169 const double* __restrict__ nodes,
170 const double* __restrict__ weights,
171 const double* __restrict__ Kxi,
172 const tarch::la::Vector<Dimensions,double>& cellCentre,
173 const double dx,
174 const double t,
175 const double dt,
176 const int nodesPerAxis,
177 const int unknowns,
178 const int strideQ,
179 const int strideRhs,
180 const int scalarIndex) {
181 const tarch::la::Vector<Dimensions+1,int> strides = getStrides(nodesPerAxis);
182 const tarch::la::Vector<Dimensions+1,int> index = delineariseIndex(scalarIndex,strides);
183 const tarch::la::Vector<Dimensions+1,double> coords = getCoordinates(index,cellCentre,dx,t,dt,nodes);
184 const tarch::la::Vector<Dimensions,double> x(&coords[1]);
185 const double time = coords[0];
186
187 const double invDx = 1.0/dx;
188
189 for ( int d = 1; d < Dimensions+1; d++) {
190 const double coeff0 = dt * invDx/*[d-1]*/ * weights[index[0]] / weights[index[d]];
191
192 for ( int a = 0; a < nodesPerAxis; a++ ) { // further collapsing causes data races, synchronization or GPU shared mem usage required
193 const double coeff = coeff0 * Kxi[ a*nodesPerAxis + index[d] ]; // @todo: provide transposed variant
194 const double * const Q = &QIn[ ( scalarIndex + (a - index[d])*strides[d] )*strideQ ];
195 flux(Q, x, time, d-1, FAux);
196 for (int var=0; var < unknowns; var++) {
197 rhsOut[ scalarIndex*strideRhs+var ] -= coeff * FAux[ var ];
198 }
199 }
200 }
201}
202/*
203#if defined(GPUOffloadingOMP)
204#pragma omp end declare target
205#endif
206
207#if defined(GPUOffloadingOMP)
208#pragma omp declare target
209#endif
210*/
212 std::function< void(
213 const double * const __restrict__ Q,
215 double t,
216 double * __restrict__ S
217 ) > algebraicSource,
218 double* __restrict__ rhsOut,
219 double* __restrict__ SAux,
220 const double* __restrict__ QIn,
221 const double* __restrict__ nodes,
222 const double* __restrict__ weights,
223 const tarch::la::Vector<Dimensions,double>& cellCentre,
224 const double dx,
225 const double t,
226 const double dt,
227 const int nodesPerAxis,
228 const int unknowns,
229 const int strideQ,
230 const int strideRhs,
231 const int scalarIndex) {
232 const tarch::la::Vector<Dimensions+1,int> index = delineariseIndex(scalarIndex, getStrides(nodesPerAxis));
233 const tarch::la::Vector<Dimensions+1, double> coords = getCoordinates(index,cellCentre,dx,t,dt,nodes);
234 const tarch::la::Vector<Dimensions, double> x( ( coords.data() + 1 ) );
235 const double time = coords[0];
236
237 const double coeff = dt * weights[index[0]];
238
239 const double* Q = &QIn[ scalarIndex*strideQ ];
240
241 algebraicSource(Q, x, time, SAux);
242 for (int var = 0; var < unknowns; var++) {
243 rhsOut[ scalarIndex*strideRhs + var ] += coeff * SAux[var];
244 }
245}
246/*
247#if defined(GPUOffloadingOMP)
248#pragma omp end declare target
249#endif
250
251#if defined(GPUOffloadingOMP)
252#pragma omp declare target
253#endif
254*/
256 std::function< void(
257 const double * const __restrict__ Q,
258 double * __restrict__ dQ_or_deltaQ,
260 double t,
261 int normal,
262 double * __restrict__ BgradQ
263 ) > nonconservativeProduct,
264 double * __restrict__ rhsOut,
265 double * __restrict__ gradQAux,
266 double * __restrict__ SAux,
267 const double * const __restrict__ QIn,
268 const double * const __restrict__ nodes,
269 const double * const __restrict__ weights,
270 const double * const __restrict__ dudx,
271 const tarch::la::Vector<Dimensions,double>& cellCentre,
272 const double dx,
273 const double t,
274 const double dt,
275 const int nodesPerAxis,
276 const int unknowns,
277 const int strideQ,
278 const int strideRhs,
279 const int scalarIndex) {
280 tarch::la::Vector<Dimensions+1,int> strides = getStrides(nodesPerAxis);
281 tarch::la::Vector<Dimensions+1,int> index = delineariseIndex(scalarIndex,strides);
282 const tarch::la::Vector<Dimensions+1, double> coords = getCoordinates(index,cellCentre,dx,t,dt,nodes);
283 const tarch::la::Vector<Dimensions, double> x( ( coords.data() + 1 ) );
284 const double time = coords[0];
285
286 const double invDx = 1.0/dx;
287
288 const double coeff = dt * weights[index[0]];
289
290 gradient_AoS(QIn,dudx,invDx,nodesPerAxis,strideQ,scalarIndex,gradQAux);
291
292 const double* Q = &QIn [ scalarIndex*strideQ ];
293 for ( int direction = 0; direction < Dimensions; direction++ ) {
294 nonconservativeProduct(Q, &gradQAux[ direction*strideQ ], x, time, direction, SAux );
295 for(int var=0; var < unknowns; var++) {
296 rhsOut[ scalarIndex*strideRhs + var ] += coeff * SAux[var];
297 }
298 }
299}
300/*
301#if defined(GPUOffloadingOMP)
302#pragma omp end declare target
303#endif
304
305#if defined(GPUOffloadingOMP)
306#pragma omp declare target
307#endif
308*/
310 double * __restrict__ QOut,
311 double& squaredResiduumOut,
312 const double * const __restrict__ rhsIn,
313 const double * const __restrict__ iK1,
314 const int nodesPerAxis,
315 const int unknowns,
316 const int strideQ,
317 const int strideRhs,
318 const int scalarIndex) {
319 const int it = delineariseIndex(scalarIndex,getStrides(nodesPerAxis))[0];
320
321 squaredResiduumOut = 0.0;
322 for (int var = 0; var < unknowns; var++) {
323 double Q_new = 0;
324 for (int a = 0; a < nodesPerAxis; a++) { // matmul time
325 const double rhsVal = rhsIn[ (scalarIndex + (a-it)*1) * strideRhs + var ];
326 Q_new += rhsVal * iK1[ it*nodesPerAxis + a ]; // note: iK1 is already the transposed inverse of K1
327 }
328
329 const int indexQ = scalarIndex*strideQ + var;
330 const double difference = Q_new - QOut[ indexQ ];
331 squaredResiduumOut += difference * difference;
332 QOut[ indexQ ] = Q_new;
333
334 //@todo: enable in CPU version
335 //assertion3( !std::isnan( Q[ scalarIndex ] ), scalarIndex, dt, invDx );
336 //assertion3( !std::isnan(Q_new), scalarIndex, dt, invDx );
337 }
338
339
340 // @todo: Get feedback from M.D.
341 // Qt is fundamental for debugging, do not remove this.
342 /*
343 double lQt[nodesPerAxis * unknowns];
344 idx2 idx_lQt(nodesPerAxis, unknowns);
345 for (int j = 0; j < nodesPerAxis; j++) {
346 for (int k = 0; k < nodesPerAxis; k++) {
347 const double weight = weights[j] *
348 weights[k];
349 const double iweight = 1.0 / weight;
350
351 std::memset(lQt, 0, nodesPerAxis * unknowns * sizeof(double));
352 for (int l = 0; l < nodesPerAxis; l++) {
353 for (int m = 0; m < unknowns; m++) {
354 for (int n = 0; n < nodesPerAxis; n++) { // t == n
355 lQt[idx_lQt(l, m)] += 1./dt * QOut[idx_lQi(j, k, n, m)] *
356 dudx[l][n];
357 }
358
359 printf("Qt[%d,%d] = %f\n", l, m, lQt[idx_lQt(l,m)]);
360 }
361 }
362 }
363 }
364
365 */
366}
367
368/*
369#if defined(GPUOffloadingOMP)
370#pragma omp end declare target
371#endif
372
373#if defined(GPUOffloadingOMP)
374#pragma omp declare target
375#endif
376*/
378 double * __restrict__ QHullOut[Dimensions*2],
379 const double * const __restrict__ QIn,
380 const double * const __restrict__ FLRCoeff[2],
381 const int nodesPerAxis,
382 const int strideQLR,
383 const int strideQ,
384 const int scalarIndexHull) {
385 const tarch::la::Vector<Dimensions+1,int> strides = getStrides(nodesPerAxis);
386 const tarch::la::Vector<Dimensions+1,int> indexQHull = delineariseIndex(scalarIndexHull,strides); // (t,y,z,face) , (t,x,z,face), (t,x,y,face)
387
388 const int faceIndex = indexQHull[Dimensions];
389 const int lr = faceIndex / Dimensions;
390 const int d = faceIndex - Dimensions*lr;
391 //const int d = faceIndex / 2;
392 //const int lr = faceIndex - 2*d;
393 const int scalarIndexFace = scalarIndexHull - faceIndex*strideQLR;
394
395 // zero out
396 const int writeIndex = (strideQLR*(1-lr) + scalarIndexFace)*strideQ;
397 for (int var=0; var < strideQ; var++) {
398 QHullOut[ faceIndex ][ writeIndex + var ] = 0.0;
399 }
400 // compute
401 for (int id=0; id<nodesPerAxis; id++) {
402 const int scalarIndexCell = mapSpaceTimeFaceIndexToScalarCellIndex(indexQHull,d,id,nodesPerAxis);
403
404 const double coeff = FLRCoeff[lr][id];
405 for (int var=0; var < strideQ; var++) {
406 QHullOut[ faceIndex ][ writeIndex + var ] += coeff * QIn[ scalarIndexCell*strideQ + var ];
407 }
408 }
409}
410
412 double * __restrict__ QHullOut[Dimensions*2],
413 const double * const __restrict__ QIn,
414 const int nodesPerAxis,
415 const int strideQLR,
416 const int strideQ,
417 const int scalarIndexHull) {
418 const tarch::la::Vector<Dimensions+1,int> strides = getStrides(nodesPerAxis);
419 const tarch::la::Vector<Dimensions+1,int> indexQHull = delineariseIndex(scalarIndexHull,strides); // (t,y,z,face) , (t,x,z,face), (t,x,y,face)
420
421 const int faceIndex = indexQHull[Dimensions];
422 const int lr = faceIndex / Dimensions;
423 const int d = faceIndex - Dimensions*lr;
424 //const int d = faceIndex / 2;
425 //const int lr = faceIndex - 2*d;
426 const int scalarIndexFace = scalarIndexHull - faceIndex*strideQLR;
427
428 const int id = ( lr == 0 ) ? 0 : (nodesPerAxis-1);
429 const int scalarIndexCell = mapSpaceTimeFaceIndexToScalarCellIndex(indexQHull,d,lr, id );
430
431 for (int var=0; var < strideQ; var++) {
432 QHullOut[ faceIndex ][ (strideQLR*(1-lr) + scalarIndexFace)*strideQ + var ] = QIn[ scalarIndexCell*strideQ + var ];
433 }
434}
435/*
436#if defined(GPUOffloadingOMP)
437#pragma omp end declare target
438#endif
439
440#if defined(GPUOffloadingOMP)
441#pragma omp declare target
442#endif
443*/
445 double * __restrict__ UOut,
446 const double * const __restrict__ QIn,
447 const double * const __restrict__ FRCoeff,
448 const double nodesPerAxis,
449 const int strideQ,
450 const int scalarIndexCell) {
451 // clear
452 for (int var = 0; var < strideQ; var++ ) {
453 UOut[ scalarIndexCell*strideQ + var ] = 0;
454 }
455 // compute
456 for (int it = 0; it < nodesPerAxis; it++ ) {
457 const int indexQ = (scalarIndexCell*nodesPerAxis + it)*strideQ;
458 const double coeff = FRCoeff[it];
459 for (int var = 0; var < strideQ; var++ ) {
460 UOut[ scalarIndexCell*strideQ + var ] += coeff * QIn[ indexQ + var ];
461 }
462 }
463}
464/*
465#if defined(GPUOffloadingOMP)
466#pragma omp end declare target
467#endif
468
469#if defined(GPUOffloadingOMP)
470#pragma omp declare target
471#endif
472*/
474 double * __restrict__ UOut,
475 const double * const __restrict__ QIn,
476 const double * const __restrict__ FRCoeff,
477 const double nodesPerAxis,
478 const int strideQ,
479 const int scalarIndexCell) {
480 const int it = nodesPerAxis-1;
481 const int indexQ = (scalarIndexCell*nodesPerAxis + it)*strideQ;
482 for (int var = 0; var < strideQ; var++ ) {
483 UOut[ scalarIndexCell*strideQ + var ] = QIn[ indexQ + var ];
484 }
485}
486/*
487#if defined(GPUOffloadingOMP)
488#pragma omp end declare target
489#endif
490*/
491
492//#if defined(__HIPCC__) or defined(__NVCC__)
493//#error HIP / CUDA kernels are currently not maintained
494#if 0
495
496// kernels
497__global__ void exahype2::aderdg::spaceTimePredictor_initialGuess_krnl_AoS(
498 const double * const __restrict__ QOut,
499 const double * const __restrict__ UIn,
500 const int nodesPerAxis,
501 const int strideQ) {
502 const int spaceTimeNodesPerCell = getSpaceTimeNodesPerCell(nodesPerAxis);
503 const int scalarIndexCell = threadIdx.x + blockDim.x * blockIdx.x;
504
505 if ( scalarIndexCell < spaceTimeNodesPerCell ) {
506 spaceTimePredictor_initialGuess_body_AoS(
507 QOut,
508 UIn,
509 nodesPerAxis,
510 strideQ,
511 scalarIndexCell);
512 }
513}
514
515__global__ void exahype2::aderdg::spaceTimePredictor_PicardLoop_assembleRhs_krnl_AoS(
516 std::function< void(
517 const double * const __restrict__ Q,
519 double t,
520 int normal,
521 double * __restrict__ F
522 ) > flux,
523 std::function< void(
524 const double * const __restrict__ Q,
526 double t,
527 double * __restrict__ S
528 ) > algebraicSource,
529 std::function< void(
530 const double * const __restrict__ Q,
531 double * __restrict__ dQ_or_deltaQ,
533 double t,
534 int normal,
535 double * __restrict__ BgradQ
536 ) > nonconservativeProduct,
537 double * __restrict__ rhsOut,
538 double * __restrict__ SAux,
539 double * __restrict__ gradQAux,
540 double * __restrict__ FAux,
541 const double * const __restrict__ QIn,
542 const double * const __restrict__ weights,
543 const double * const __restrict__ nodes,
544 const double * const __restrict__ Kxi,
545 const double * const __restrict__ FLCoeff,
546 const double * const __restrict__ dudx,
547 const tarch::la::Vector<Dimensions,double>& cellCentre,
548 const double dx,
549 const double t,
550 const double dt,
551 const int nodesPerAxis,
552 const int unknowns,
553 const int strideQ,
554 const int strideS,
555 const int strideGradQ,
556 const int strideRhs,
557 const bool callFlux,
558 const bool callSource,
559 const bool callNonconservativeProduct) {
560 const int spaceTimeNodesPerCell = getSpaceTimeNodesPerCell(nodesPerAxis);
561 const int scalarIndexCell = threadIdx.x + blockDim.x * blockIdx.x;
562
563 if ( scalarIndexCell < spaceTimeNodesPerCell ) {
565 rhsOut,
566 UIn,
567 FLCoeff,
568 nodesPerAxis,
569 strideQ,
570 strideRhs,
571 scalarIndex);
572
573 if ( callFlux ) {
574
576 flux,
577 rhsOut,
578 FAux,
579 QIn,
580 nodes,
581 weights,
582 Kxi,
583 cellCentre,
584 dx,
585 t,
586 dt,
587 nodesPerAxis,
588 unknowns,
589 strideQ,
590 strideRhs,
591 scalarIndex);
592 }
593 if ( callSource ) {
594
596 algebraicSource,
597 rhsOut,
598 SAux,
599 QIn,
600 nodes,
601 weights,
602 cellCentre,
603 dx,
604 t,
605 dt,
606 nodesPerAxis,
607 unknowns,
608 strideQ,
609 strideRhs,
610 scalarIndex);
611 }
612 if ( callNonconservativeProduct ) {
613
615 nonconservativeProduct,
616 rhsOut,
617 gradQAux,
618 SAux,
619 QIn,
620 nodes,
621 weights,
622 dudx,
623 cellCentre,
624 dx,
625 t,
626 dt,
627 nodesPerAxis,
628 unknowns,
629 strideQ,
630 strideRhs,
631 scalarIndex);
632 }
633 }
634 }
635}
636
637__global__ void exahype2::aderdg::spaceTimePredictor_PicardLoop_invert_krnl_AoS(
638 double * __restrict__ QOut,
639 double * __restrict__ partial_squaredResiduumOut,
640 const double * const __restrict__ rhsIn,
641 const double * const __restrict__ iK1,
642 const int nodesPerAxis,
643 const int unknowns,
644 const int strideQ,
645 const int strideRhs,
646 const int scalarIndex) {
647 HIP_DYNAMIC_SHARED( double, sdata)
648 const int threadIdxInBlock = threadIdx.x;
649 const int scalarIndexCell = threadIdxInBlock + blockDim.x * blockIdx.x;
650 const int spaceTimeNodesPerCell = getSpaceTimeNodesPerCell(nodesPerAxis);
651
652 double squaredResiduumOut = 0.0;
653 if ( scalarIndexCell < spaceTimeNodesPerCell ) {
655 QOut,
656 squaredResiduumOut,
657 rhsIn,
658 iK1,
659 nodesPerAxis,
660 unknowns,
661 strideQ,
662 strideRhs,
663 scalarIndex);
664 }
665
666 // below: (unoptimised) block-wide reduction; final result computed on host (as result is needed on host side)
667 sdata[ threadIdxInBlock ] = squaredResiduumOut;
668 __syncthreads();
669
670 for (int s = 1; s < blockDim.x; s *= 2) {
671 int index = 2 * s * threadIdxInBlock;
672 if (index < blockDim.x) {
673 sdata[index] += sdata[index + s];
674 }
675 __syncthreads();
676 }
677
678 if (threadIdxInBlock == 0) { // write partial result for this block to global memory
679 partial_squaredResiduumOut[ blockIdx.x ] = squaredResiduumOut;
680 }
681 }
682
683 __global__ void exahype2::aderdg::spaceTimePredictor_extrapolate_krnl_AoS(
684 double * __restrict__ QHullOut,
685 const double * const __restrict__ QIn,
686 const double * const __restrict__ FLCoeff,
687 const double * const __restrict__ FRCoeff,
688 const int nodesPerAxis,
689 const int unknowns,
690 const int strideQ) {
691 const int spaceTimeNodesOnCellHull = getNodesPerCell(nodesPerAxis) * Dimensions*2 // nodesPerAxis^d ;
692
693 const double* FLRCoeff[2] = {FLCoeff, FRCoeff};
694
695 const int scalarIndexHull = threadIdx.x + blockDim.x * blockIdx.x;
696 if ( scalarIndexHull < spaceTimeNodesOnCellHull ) {
698 QHullOut,
699 QIn,
700 FLRCoeff,
701 nodesPerAxis,
702 unknowns,
703 strideQ,
704 scalarIndexHull);
705 }
706}
707// launcher
708
709#endif
710
711// CPU launchers
713 std::function< void(
714 const double * const __restrict__ Q,
716 double t,
717 int normal,
718 double * __restrict__ F
719 ) > flux,
720 std::function< void(
721 const double * const __restrict__ Q,
723 double t,
724 double * __restrict__ S
725 ) > algebraicSource,
726 std::function< void(
727 const double * const __restrict__ Q,
728 const double * const __restrict__ dQ_or_deltaQ,
730 double t,
731 int normal,
732 double * __restrict__ BgradQ
733 ) > nonconservativeProduct,
734 double * __restrict__ QOut,
735 const double * const __restrict__ UIn,
736 const double * const __restrict__ nodes,
737 const double * const __restrict__ weights,
738 const double * const __restrict__ Kxi,
739 const double * const __restrict__ iK1,
740 const double * const __restrict__ FLCoeff,
741 const double * const __restrict__ dudx,
742 const tarch::la::Vector<Dimensions,double>& cellCentre,
743 const double dx,
744 const double t,
745 const double dt,
746 const int order,
747 const int unknowns,
748 const int auxiliaryVariables,
749 const double atol,
750 const bool callFlux,
751 const bool callSource,
752 const bool callNonconservativeProduct) {
753 const int nodesPerAxis = order+1;
754
755 const int spaceTimeNodesPerCell = getSpaceTimeNodesPerCell(nodesPerAxis);
756
757 const int strideQ = unknowns+auxiliaryVariables;
758 const int strideRhs = unknowns;
759 const int strideS = unknowns;
760 const int strideF = unknowns;
761 const int strideGradQ = strideQ*Dimensions; // gradient of auxiliary variables needed for some apps
762
763 double* rhs = new double[spaceTimeNodesPerCell*strideRhs]{0.0};
764
765 double* FAux = nullptr;
766 double* SAux = nullptr;
767 double* gradQAux = nullptr;
768 if ( callFlux ) {
769 FAux = new double[spaceTimeNodesPerCell*strideF]{0.0};
770 }
771 if ( callSource || callNonconservativeProduct ) {
772 SAux = new double[spaceTimeNodesPerCell*strideS]{0.0};
773 }
774 if ( callNonconservativeProduct ) {
775 gradQAux = new double[spaceTimeNodesPerCell*strideGradQ]{0.0};
776 }
777
778 // initial guess
779 for ( int scalarIndexCell = 0; scalarIndexCell < spaceTimeNodesPerCell; scalarIndexCell++ ) {
781 QOut,
782 UIn,
783 nodesPerAxis,
784 strideQ,
785 scalarIndexCell);
786 }
787
788 int iter = 0;
789 for ( ;iter < nodesPerAxis; iter++ ) {
790 for ( int scalarIndexCell = 0; scalarIndexCell < spaceTimeNodesPerCell; scalarIndexCell++ ) {
792 rhs,
793 UIn,
794 FLCoeff,
795 nodesPerAxis,
796 strideQ,
797 strideRhs,
798 scalarIndexCell);
799
801 flux,
802 algebraicSource,
803 nonconservativeProduct,
804 rhs,
805 FAux + scalarIndexCell*strideF,
806 gradQAux + scalarIndexCell*strideGradQ,
807 SAux + scalarIndexCell*strideS,
808 QOut,
809 nodes,
810 weights,
811 Kxi,
812 dudx,
813 cellCentre,
814 dx,
815 t,
816 dt,
817 nodesPerAxis,
818 unknowns,
819 strideQ,
820 strideRhs,
821 callFlux,
822 callSource,
823 callNonconservativeProduct,
824 scalarIndexCell);
825 } // scalarIndexCell
826
827 // 3. Multiply with (K1)^(-1) to get the discrete time integral of the discrete Picard iteration
828 double sq_res = 0.0;
829 for ( int scalarIndexCell = 0; scalarIndexCell < spaceTimeNodesPerCell; scalarIndexCell++ ) {
830 double squaredResiduum = 0.0;
832 QOut,
833 squaredResiduum,
834 rhs,
835 iK1,
836 nodesPerAxis,
837 unknowns,
838 strideQ,
839 strideRhs,
840 scalarIndexCell);
841 sq_res += squaredResiduum;
842 }
843
844 // 4. Exit condition
845 if ( atol > 0 && sq_res < atol * atol) {
846 iter = nodesPerAxis + 1; // break
847 }
848 if (iter == nodesPerAxis) { // No convergence after last iteration
849 static tarch::logging::Log _log("kernels::aderdg::generic::c");
850 logWarning("aderPicardLoopNonlinear(...)","|res|^2=" << sq_res << " > |atol|^2=" << atol * atol << " after "
851 << iter << " iterations. Solver seems not to have converged properly within maximum number of iteration steps");
852 }
853 } // iter
854
855 delete [] rhs;
856
857 if ( callFlux ) {
858 delete [] FAux;
859 }
860 if ( callSource || callNonconservativeProduct ) {
861 delete [] SAux;
862 }
863 if ( callNonconservativeProduct ) {
864 delete [] gradQAux;
865 }
866}
867
869 double * __restrict__ QHullOut[Dimensions*2],
870 const double * const __restrict__ QIn,
871 const double * const __restrict__ FLCoeff,
872 const double * const __restrict__ FRCoeff,
873 const int order,
874 const int unknowns,
875 const int auxiliaryVariables) {
876 const int nodesPerAxis = order + 1;
877
878 const int strideQ = unknowns+auxiliaryVariables;
879
880 // Each face stores first: the degrees of freedom of QL ("left") and then of QR ("right")
881 // The cell is always positioned "right" to a face with lr=0 and
882 // "left" to a face with lr=1.
883 const int strideQLR = getNodesPerCell(nodesPerAxis);
884
885 const int spaceTimeNodesOnCellHull = getNodesPerCell(nodesPerAxis)/* nodesPerAxis^d */ * Dimensions*2;
886
887 const double* FLRCoeff[2] = {FLCoeff, FRCoeff};
888
889 for ( int scalarIndexHull = 0; scalarIndexHull < spaceTimeNodesOnCellHull; scalarIndexHull++ ) {
891 QHullOut,
892 QIn,
893 FLRCoeff,
894 nodesPerAxis,
895 strideQLR,
896 strideQ,
897 scalarIndexHull);
898 }
899}
900
902 double * __restrict__ QHullOut[Dimensions*2],
903 const double * const __restrict__ QIn,
904 const double * const __restrict__ FLCoeff, // just to have same signature as other routine
905 const double * const __restrict__ FRCoeff, // just to have same signature as other routine
906 const int order,
907 const int unknowns,
908 const int auxiliaryVariables) {
909 const int nodesPerAxis = order + 1;
910
911 const int strideQ = unknowns+auxiliaryVariables;
912
913 const int spaceTimeNodesOnCellHull = getNodesPerCell(nodesPerAxis)/* nodesPerAxis^d */ * Dimensions*2;
914
915 // Each face stores first: the degrees of freedom of QL ("left") and then of QR ("right")
916 // The cell is always positioned "right" to a face with lr=0 and
917 // "left" to a face with lr=1.
918 const int strideQLR = getNodesPerCell(nodesPerAxis);
919
920 for ( int scalarIndexHull = 0; scalarIndexHull < spaceTimeNodesOnCellHull; scalarIndexHull++ ) {
922 QHullOut,
923 QIn,
924 nodesPerAxis,
925 strideQLR,
926 strideQ,
927 scalarIndexHull);
928 }
929}
930
932 double * __restrict__ UOut,
933 const double * const __restrict__ QIn,
934 const double * const __restrict__ FRCoeff,
935 const int order,
936 const int unknowns,
937 const int auxiliaryVariables) {
938 const int nodesPerAxis = order + 1;
939
940 const int strideQ = unknowns+auxiliaryVariables;
941
942 const int nodesPerCell = getNodesPerCell(nodesPerAxis);
943
944 for ( int scalarIndexCell = 0; scalarIndexCell < nodesPerCell; scalarIndexCell++ ) {
946 UOut,
947 QIn,
948 FRCoeff,
949 nodesPerAxis,
950 strideQ,
951 scalarIndexCell);
952 }
953}
954
956 double * __restrict__ UOut,
957 const double * const __restrict__ QIn,
958 const double * const __restrict__ FRCoeff,
959 const int order,
960 const int unknowns,
961 const int auxiliaryVariables) {
962 const int nodesPerAxis = order + 1;
963
964 const int strideQ = unknowns+auxiliaryVariables;
965
966 const int nodesPerCell = getNodesPerCell(nodesPerAxis);
967
968 for ( int scalarIndexCell = 0; scalarIndexCell < nodesPerCell; scalarIndexCell++ ) {
970 UOut,
971 QIn,
972 FRCoeff,
973 nodesPerAxis,
974 strideQ,
975 scalarIndexCell);
976 }
977}
And from this we can write down f$ nabla phi_i nabla phi_i dx but since we are constructing matrix let s investigate the f$ our matrix elements will nabla phi_i dx f By this will be a sparse as these basis functions are chosen to not overlap with each other almost everywhere In other they have only local support We can read off the right hand side by taking our known right hand side f$ f f$ and integrating against an appropriate test phi_i dx f Please excuse the slight abuse of notation here There should probably be a clearer indication that we move from a continuous f$ f f$ to some discrete f$ f_i f$ We can demonstrate simply It s worth as when we discuss the discontinuous version of this it will no longer disappear We take our left hand side and discretise it
we integrate over each cell and then take the sum across each of the cells We also consider the terms that enter the f$ so we are integrating with f$ phi_k phi_k dx
#define logWarning(methodName, logMacroMessageStream)
Wrapper macro around tarch::tarch::logging::Log to improve logging.
Definition Log.h:440
#define FLCoeff(i)
Definition KernelUtils.h:8
#define FRCoeff(i)
Definition KernelUtils.h:9
Log Device.
Definition Log.h:516
examples::exahype2::elastic::VariableShortcuts s
Definition loh.cpp:10
void spaceTimePredictor_extrapolateInTime_body_AoS(double *__restrict__ UOut, const double *const __restrict__ QIn, const double *const __restrict__ FRCoeff, const double nodesPerAxis, const int strideQ, const int scalarIndexCell)
Extrapolates the predictor to time t+dt.
GPUCallableMethod tarch::la::Vector< Dimensions+1, int > delineariseIndex(int scalarIndex, const tarch::la::Vector< Dimensions+1, int > &strides)
GPUCallableMethod int mapSpaceTimeFaceIndexToScalarCellIndex(const tarch::la::Vector< Dimensions+1, int > &indexFace, const int direction, const int id, const int nodesPerAxis)
void spaceTimePredictor_extrapolate_Lobatto_loop_AoS(double *__restrict__ QHullOut[Dimensions *2], const double *const __restrict__ QIn, const double *const __restrict__ FLCoeff, const double *const __restrict__ FRCoeff, const int order, const int unknowns, const int auxiliaryVariables)
Extrapolate all of cell's predictor DOF to the hull of the element.
GPUCallableMethod void gradient_AoS(const double *__restrict__ const QIn, const double *__restrict__ const dudx, const double invDx, const int nodesPerAxis, const int strideQ, const int scalarIndex, double *__restrict__ gradQAux)
Computes the gradient at a given (space-time) quadrature point.
void spaceTimePredictor_PicardLoop_addNcpContributionToRhs_body_AoS(std::function< void(const double *const __restrict__ Q, double *__restrict__ dQ_or_deltaQ, const tarch::la::Vector< Dimensions, double > &x, double t, int normal, double *__restrict__ BgradQ) > nonconservativeProduct, double *__restrict__ rhsOut, double *__restrict__ gradQAux, double *__restrict__ SAux, const double *const __restrict__ QIn, const double *const __restrict__ nodes, const double *const __restrict__ weights, const double *const __restrict__ dudx, const tarch::la::Vector< Dimensions, double > &cellCentre, const double dx, const double t, const double dt, const int nodesPerAxis, const int unknowns, const int strideQ, const int strideRhs, const int scalarIndex)
void spaceTimePredictor_PicardLoop_loop_AoS(std::function< void(const double *const __restrict__ Q, const tarch::la::Vector< Dimensions, double > &x, double t, int normal, double *__restrict__ F) > flux, std::function< void(const double *const __restrict__ Q, const tarch::la::Vector< Dimensions, double > &x, double t, double *__restrict__ S) > algebraicSource, std::function< void(const double *const __restrict__ Q, const double *const __restrict__ dQ_or_deltaQ, const tarch::la::Vector< Dimensions, double > &x, double t, int normal, double *__restrict__ BgradQ) > nonconservativeProduct, double *__restrict__ QOut, const double *const __restrict__ UIn, const double *const __restrict__ nodes, const double *const __restrict__ weights, const double *const __restrict__ Kxi, const double *const __restrict__ iK1, const double *const __restrict__ FLCoeff, const double *const __restrict__ dudx, const tarch::la::Vector< Dimensions, double > &cellCentre, const double dx, const double t, const double dt, const int order, const int unknowns, const int auxiliaryVariables, const double atol, const bool callFlux, const bool callSource, const bool callNonconservativeProduct)
Compute the space-time predictor (Qout) from the current solution (UIn).
void spaceTimePredictor_PicardLoop_initialiseRhs_AoS(double *__restrict__ rhsOut, const double *const __restrict__ UIn, const double *const __restrict__ FLCoeff, const int nodesPerAxis, const int strideQ, const int strideRhs, const int scalarIndex)
Initialise RHS of Picard iterations.
void spaceTimePredictor_extrapolate_Lobatto_body_AoS(double *__restrict__ QHullOut[Dimensions *2], const double *const __restrict__ QIn, const int nodesPerAxis, const int strideQLR, const int strideQ, const int scalarIndexHull)
Extrapolate the predictor onto DoF associated with the faces of a cell (cell hull).
void spaceTimePredictor_extrapolate_loop_AoS(double *__restrict__ QHullOut[Dimensions *2], const double *const __restrict__ QIn, const double *const __restrict__ FLCoeff, const double *const __restrict__ FRCoeff, const int order, const int unknowns, const int auxiliaryVariables)
Extrapolate all of cell's predictor DOF to the hull of the element.
void spaceTimePredictor_extrapolateInTime_loop_AoS(double *__restrict__ UOut, const double *const __restrict__ QIn, const double *const __restrict__ FRCoeff, const int order, const int unknowns, const int auxiliaryVariables)
Extrapolates the predictor to t+dt.
GPUCallableMethod tarch::la::Vector< Dimensions+1, int > getStrides(int nodesPerAxis, bool strides4SpaceTimeQuantity=true)
If the stride is used for a space-time quantity, then this function generates:
void spaceTimePredictor_extrapolateInTime_Lobatto_loop_AoS(double *__restrict__ UOut, const double *const __restrict__ QIn, const double *const __restrict__ FRCoeff, const int order, const int unknowns, const int auxiliaryVariables)
Extrapolates the predictor to t+dt.
void spaceTimePredictor_PicardLoop_addContributions_body_AoS(std::function< void(const double *const __restrict__ Q, const tarch::la::Vector< Dimensions, double > &x, double t, int normal, double *__restrict__ F) > flux, std::function< void(const double *const __restrict__ Q, const tarch::la::Vector< Dimensions, double > &x, double t, double *__restrict__ S) > algebraicSource, std::function< void(const double *const __restrict__ Q, double *__restrict__ dQ_or_deltaQ, const tarch::la::Vector< Dimensions, double > &x, double t, int normal, double *__restrict__ BgradQ) > nonconservativeProduct, double *__restrict__ rhsOut, double *__restrict__ FAux, double *__restrict__ gradQAux, double *__restrict__ SAux, const double *__restrict__ QIn, const double *__restrict__ nodes, const double *__restrict__ weights, const double *__restrict__ Kxi, const double *__restrict__ dudx, const tarch::la::Vector< Dimensions, double > &cellCentre, const double dx, const double t, const double dt, const int nodesPerAxis, const int unknowns, const int strideQ, const int strideRhs, const bool callFlux, const bool callSource, const bool callNonconservativeProduct, const int scalarIndex)
Add flux, NCP, and source contributions to RHS during STP Picard loop.
void spaceTimePredictor_extrapolate_body_AoS(double *__restrict__ QHullOut[Dimensions *2], const double *const __restrict__ QIn, const double *const __restrict__ FLRCoeff[2], const int nodesPerAxis, const int strideQLR, const int strideQ, const int scalarIndexHull)
Extrapolate the predictor onto DoF associated with the faces of a cell (cell hull).
void spaceTimePredictor_initialGuess_body_AoS(double *__restrict__ Qout, const double *__restrict__ UIn, const int nodesPerAxis, const int strideQ, const int scalarIndex)
Initial guess for Q before STP Picard loop.
void spaceTimePredictor_PicardLoop_addSourceContributionToRhs_body_AoS(std::function< void(const double *const __restrict__ Q, const tarch::la::Vector< Dimensions, double > &x, double t, double *__restrict__ S) > algebraicSource, double *__restrict__ rhsOut, double *__restrict__ SAux, const double *__restrict__ QIn, const double *__restrict__ nodes, const double *__restrict__ weights, const tarch::la::Vector< Dimensions, double > &cellCentre, const double dx, const double t, const double dt, const int nodesPerAxis, const int unknowns, const int strideQ, const int strideRhs, const int scalarIndex)
Add source contributions to RHS during STP Picard loop.
void spaceTimePredictor_PicardLoop_invert_body_AoS(double *__restrict__ QOut, double &squaredResiduumOut, const double *const __restrict__ rhsIn, const double *const __restrict__ iK1, const int nodesPerAxis, const int unknowns, const int strideQ, const int strideRhs, const int scalarIndex)
Invert the Picard space-time system to compute the next solution.
void spaceTimePredictor_PicardLoop_addFluxContributionsToRhs_body_AoS(std::function< void(const double *const __restrict__ Q, const tarch::la::Vector< Dimensions, double > &x, double t, int normal, double *__restrict__ F) > flux, double *__restrict__ rhsOut, double *__restrict__ FAux, const double *__restrict__ QIn, const double *__restrict__ nodes, const double *__restrict__ weights, const double *__restrict__ Kxi, const tarch::la::Vector< Dimensions, double > &cellCentre, const double dx, const double t, const double dt, const int nodesPerAxis, const int unknowns, const int strideQ, const int strideRhs, const int scalarIndex)
Add flux contributions to RHS during STP Picard loop.
GPUCallableMethod int getNodesPerCell(int nodesPerAxis)
GPUCallableMethod tarch::la::Vector< Dimensions+1, double > getCoordinates(const tarch::la::Vector< Dimensions+1, int > &index, const tarch::la::Vector< Dimensions, double > &centre, const double dx, const double t, const double dt, const double *__restrict__ nodes)
Compute coordinates from cell geometry and quadrature index.
GPUCallableMethod int getSpaceTimeNodesPerCell(int nodesPerAxis)
void spaceTimePredictor_extrapolateInTime_Lobatto_body_AoS(double *__restrict__ UOut, const double *const __restrict__ QIn, const double *const __restrict__ FRCoeff, const double nodesPerAxis, const int strideQ, const int scalarIndexCell)
Extrapolates the predictor to time t+dt.
index
Definition makeIC.py:38
tarch::logging::Log _log("examples::unittests")
Simple vector class.
Definition Vector.h:134