Peano
Loading...
Searching...
No Matches
kernels.py
Go to the documentation of this file.
1# This file is part of the ExaHyPE2 project. For conditions of distribution and
2# use, please see the copyright notice at www.peano-framework.org
3from exahype2.solvers.PDETerms import PDETerms
4
5import jinja2
6from enum import Enum
7
8
9class SolverVariant(Enum):
10 WithVirtualFunctions = 0
11 Stateless = 1
12 Multicore = 2
13 AcceleratorWithExplicitCopy = 3
14
15
16class KernelVariant(Enum):
17 PatchWiseAoSHeap = 30
18 PatchWiseAoSoAHeap = 31
19 PatchWiseSoAHeap = 32
20 BatchedAoSHeap = 40
21 BatchedAoSoAHeap = 41
22 BatchedSoAHeap = 42
23 TaskGraphAoSHeap = 60
24 TaskGraphAoSoAHeap = 61
25 TaskGraphSoAHeap = 62
26
27
28def create_abstract_solver_declarations(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state):
29 Template = jinja2.Template( """
30 public:
31 {% if EIGENVALUES_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
32 /**
33 * Depending on the implementation, this variant might be slow as it
34 * lacks an inline define. Also, if you don't want to use ipo aggressively,
35 * it might be clever to put the implementation into the header.
36 *
37 * ## SYCL
38 *
39 * At the moment, SYCL seems to struggle with ipo, even if a function is
40 * never called. So I embed the (empty) implementation directly into the
41 * header.
42 */
43 #if defined(GPUOffloadingOMP)
44 #pragma omp declare target
45 #endif
46 static GPUCallableMethod void maxEigenvalue(
47 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
48 const tarch::la::Vector<Dimensions,double>& faceCentre,
49 const tarch::la::Vector<Dimensions,double>& gridCellH,
50 double t,
51 double dt,
52 int normal,
53 double* __restrict__ maxEigenvalue,
54 Offloadable
55 )
56 //#if defined(GPUOffloadingSYCL)
57 //{}
58 //#else
59 ;
60 //#endif
61 #if defined(GPUOffloadingOMP)
62 #pragma omp end declare target
63 #endif
64 {% endif %}
65
66
67 {% if EIGENVALUES_IMPLEMENTATION!="<none>" %}
68 /**
69 * Determine max eigenvalue over Jacobian in a given point with solution values
70 * (states) Q. All parameters are in.
71 *
72 * @return Max eigenvalue. Result has to be positive, so we are actually speaking
73 * about the maximum absolute eigenvalue.
74 */
75 virtual void maxEigenvalue(
76 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
77 const tarch::la::Vector<Dimensions,double>& faceCentre,
78 const tarch::la::Vector<Dimensions,double>& gridCellH,
79 double t,
80 double dt,
81 int normal,
82 double* __restrict__ maxEigenvalue
83 ) {% if EIGENVALUES_IMPLEMENTATION=="<user-defined>" %}= 0{% else %} final{% endif %};
84 {% endif %}
85
86
87 {% if FLUX_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
88 /**
89 * Depending on the implementation, this variant might be slow as it
90 * lacks an inline define. Also, if you don't want to use ipo aggressively,
91 * it might be clever to put the implementation into the header.
92 *
93 * ## SYCL
94 *
95 * At the moment, SYCL seems to struggle with ipo, even if a function is
96 * never called. So I embed the (empty) implementation directly into the
97 * header.
98 */
99 #if defined(GPUOffloadingOMP)
100 #pragma omp declare target
101 #endif
102 static GPUCallableMethod void flux(
103 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
104 const tarch::la::Vector<Dimensions,double>& faceCentre,
105 const tarch::la::Vector<Dimensions,double>& gridCellH,
106 double t,
107 double dt,
108 int normal,
109 double * __restrict__ F, // F[{{NUMBER_OF_UNKNOWNS}}]
110 Offloadable
111 )
112 //#if defined(GPUOffloadingSYCL)
113 //{}
114 //#else
115 ;
116 //#endif
117 #if defined(GPUOffloadingOMP)
118 #pragma omp end declare target
119 #endif
120 {% endif %}
121
122 {% if FLUX_IMPLEMENTATION!="<none>" %}
123 virtual void flux(
124 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
125 const tarch::la::Vector<Dimensions,double>& faceCentre,
126 const tarch::la::Vector<Dimensions,double>& gridCellH,
127 double t,
128 double dt,
129 int normal,
130 double * __restrict__ F // F[{{NUMBER_OF_UNKNOWNS}}]
131 ) {% if FLUX_IMPLEMENTATION=="<user-defined>" %}=0{% else %} final {% endif %};
132 {% endif %}
133
134
135 {% if NCP_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
136 /**
137 * Depending on the implementation, this variant might be slow as it
138 * lacks an inline define. Also, if you don't want to use ipo aggressively,
139 * it might be clever to put the implementation into the header.
140 *
141 * ## SYCL
142 *
143 * At the moment, SYCL seems to struggle with ipo, even if a function is
144 * never called. So I embed the (empty) implementation directly into the
145 * header.
146 */
147 #if defined(GPUOffloadingOMP)
148 #pragma omp declare target
149 #endif
150 static GPUCallableMethod void nonconservativeProduct(
151 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
152 const double * __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
153 const tarch::la::Vector<Dimensions,double>& faceCentre,
154 const tarch::la::Vector<Dimensions,double>& gridCellH,
155 double t,
156 double dt,
157 int normal,
158 double * __restrict__ BTimesDeltaQ, // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
159 Offloadable
160 )
161 //#if defined(GPUOffloadingSYCL)
162 //{}
163 //#else
164 ;
165 //#endif
166 #if defined(GPUOffloadingOMP)
167 #pragma omp end declare target
168 #endif
169 {% endif %}
170
171 {% if NCP_IMPLEMENTATION!="<none>" %}
172 virtual void nonconservativeProduct(
173 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
174 const double * __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
175 const tarch::la::Vector<Dimensions,double>& faceCentre,
176 const tarch::la::Vector<Dimensions,double>& gridCellH,
177 double t,
178 double dt,
179 int normal,
180 double * __restrict__ BTimesDeltaQ // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
181 ) {% if NCP_IMPLEMENTATION=="<user-defined>" %}=0{% endif %};
182 {% endif %}
183
184
185 {% if SOURCE_TERM_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
186 /**
187 * Depending on the implementation, this variant might be slow as it
188 * lacks an inline define. Also, if you don't want to use ipo aggressively,
189 * it might be clever to put the implementation into the header.
190 *
191 * ## SYCL
192 *
193 * At the moment, SYCL seems to struggle with ipo, even if a function is
194 * never called. So I embed the (empty) implementation directly into the
195 * header.
196 */
197 #if defined(GPUOffloadingOMP)
198 #pragma omp declare target
199 #endif
200 static GPUCallableMethod void sourceTerm(
201 const double * __restrict__ Q,
202 const tarch::la::Vector<Dimensions,double>& gridCellCentre,
203 const tarch::la::Vector<Dimensions,double>& gridCellH,
204 double t,
205 double dt,
206 double * __restrict__ S,
207 Offloadable
208 )
209 //#if defined(GPUOffloadingSYCL)
210 //{}
211 //#else
212 ;
213 //#endif
214 #if defined(GPUOffloadingOMP)
215 #pragma omp end declare target
216 #endif
217 {% endif %}
218
219 {% if SOURCE_TERM_IMPLEMENTATION!="<none>" %}
220 virtual void sourceTerm(
221 const double * __restrict__ Q,
222 const tarch::la::Vector<Dimensions,double>& gridCellCentre,
223 const tarch::la::Vector<Dimensions,double>& gridCellH,
224 double t,
225 double dt,
226 double * __restrict__ S
227 ) {% if SOURCE_TERM_IMPLEMENTATION=="<user-defined>" %}= 0{% else %} final {% endif %};
228 {% endif %}
229
230""", undefined=jinja2.DebugUndefined)
231
232 d= {}
233 d[ "FLUX_IMPLEMENTATION"] = flux_implementation
234 d[ "NCP_IMPLEMENTATION"] = ncp_implementation
235 d[ "EIGENVALUES_IMPLEMENTATION"] = eigenvalues_implementation
236 d[ "SOURCE_TERM_IMPLEMENTATION"] = source_term_implementation
237 d[ "STATELESS_PDE_TERMS"] = pde_terms_without_state
238 return Template.render(**d)
239
240
241def create_abstract_solver_definitions(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state):
242 Template = jinja2.Template( """
243{% if EIGENVALUES_IMPLEMENTATION!="<user-defined>" and EIGENVALUES_IMPLEMENTATION!="<none>" %}
244void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::maxEigenvalue(
245 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
246 const tarch::la::Vector<Dimensions,double>& faceCentre,
247 const tarch::la::Vector<Dimensions,double>& gridCellH,
248 double t,
249 double dt,
250 int normal,
251 double* __restrict__ maxEigenvalue
252) {
253 {{EIGENVALUES_IMPLEMENTATION}}
254}
255{% endif %}
256
257
258{% if FLUX_IMPLEMENTATION!="<none>" and FLUX_IMPLEMENTATION!="<user-defined>" %}
259void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::flux(
260 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
261 const tarch::la::Vector<Dimensions,double>& faceCentre,
262 const tarch::la::Vector<Dimensions,double>& gridCellH,
263 double t,
264 double dt,
265 int normal,
266 double * __restrict__ F // F[{{NUMBER_OF_UNKNOWNS}}]
267) {
268 {{FLUX_IMPLEMENTATION}}
269}
270{% endif %}
271
272
273{% if NCP_IMPLEMENTATION!="<none>" and NCP_IMPLEMENTATION!="<user-defined>" %}
274void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::nonconservativeProduct(
275 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
276 const double * __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
277 const tarch::la::Vector<Dimensions,double>& faceCentre,
278 const tarch::la::Vector<Dimensions,double>& gridCellH,
279 double t,
280 double dt,
281 int normal,
282 double * __restrict__ BTimesDeltaQ // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
283) {
284 {{NCP_IMPLEMENTATION}}
285}
286{% endif %}
287
288
289{% if SOURCE_TERM_IMPLEMENTATION!="<user-defined>" and SOURCE_TERM_IMPLEMENTATION!="<none>" %}
290//#if !defined(GPUOffloadingSYCL)
291void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::sourceTerm(
292 const double * __restrict__ Q,
293 const tarch::la::Vector<Dimensions,double>& gridCellCentre,
294 const tarch::la::Vector<Dimensions,double>& gridCellH,
295 double t,
296 double dt,
297 double * __restrict__ S
298) {
299 {% if SOURCE_TERM_IMPLEMENTATION!="<empty>" %}
300 {{SOURCE_TERM_IMPLEMENTATION}}
301 {% else %}
302 std::fill_n(S,{{NUMBER_OF_UNKNOWNS}},0.0);
303 {% endif %}
304}
305//#endif
306{% endif %}
307
308
309
310{% if EIGENVALUES_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
311#if defined(GPUOffloadingOMP)
312#pragma omp declare target
313#endif
314//#if !defined(GPUOffloadingSYCL)
315GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::maxEigenvalue(
316 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
317 const tarch::la::Vector<Dimensions,double>& faceCentre,
318 const tarch::la::Vector<Dimensions,double>& gridCellH,
319 double t,
320 double dt,
321 int normal,
322 double* __restrict__ maxEigenvalue,
323 Offloadable
324) {
325 {{EIGENVALUES_IMPLEMENTATION}};
326}
327//#endif
328#if defined(GPUOffloadingOMP)
329#pragma omp end declare target
330#endif
331{% endif %}
332
333
334{% if FLUX_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
335#if defined(GPUOffloadingOMP)
336#pragma omp declare target
337#endif
338//#if !defined(GPUOffloadingSYCL)
339GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::flux(
340 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
341 const tarch::la::Vector<Dimensions,double>& faceCentre,
342 const tarch::la::Vector<Dimensions,double>& gridCellH,
343 double t,
344 double dt,
345 int normal,
346 double * __restrict__ F, // F[{{NUMBER_OF_UNKNOWNS}}]
347 Offloadable
348) {
349 {% if FLUX_IMPLEMENTATION=="<none>" %}
350 tarch::gpuAbort();
351 {% else %}
352 {{FLUX_IMPLEMENTATION}}
353 {% endif %}
354}
355//#endif
356#if defined(GPUOffloadingOMP)
357#pragma omp end declare target
358#endif
359{% endif %}
360
361
362{% if NCP_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
363#if defined(GPUOffloadingOMP)
364#pragma omp declare target
365#endif
366//#if !defined(GPUOffloadingSYCL)
367GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::nonconservativeProduct(
368 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
369 const double * __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
370 const tarch::la::Vector<Dimensions,double>& faceCentre,
371 const tarch::la::Vector<Dimensions,double>& gridCellH,
372 double t,
373 double dt,
374 int normal,
375 double * __restrict__ BTimesDeltaQ, // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
376 Offloadable
377) {
378 {% if NCP_IMPLEMENTATION=="<none>" %}
379 tarch::gpuAbort();
380 {% else %}
381 {{NCP_IMPLEMENTATION}}
382 {% endif %}
383}
384//#endif
385#if defined(GPUOffloadingOMP)
386#pragma omp end declare target
387#endif
388{% endif %}
389
390
391{% if SOURCE_TERM_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
392#if defined(GPUOffloadingOMP)
393#pragma omp declare target
394#endif
395//#if !defined(GPUOffloadingSYCL)
396GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::sourceTerm(
397 const double * __restrict__ Q,
398 const tarch::la::Vector<Dimensions,double>& gridCellCentre,
399 const tarch::la::Vector<Dimensions,double>& gridCellH,
400 double t,
401 double dt,
402 double * __restrict__ S,
403 Offloadable
404) {
405 {% if SOURCE_TERM_IMPLEMENTATION=="<none>" %}
406 tarch::gpuAbort();
407 {% else %}
408 {{SOURCE_TERM_IMPLEMENTATION}}
409 {% endif %}
410}
411//#endif
412#if defined(GPUOffloadingOMP)
413#pragma omp end declare target
414#endif
415{% endif %}
416""", undefined=jinja2.DebugUndefined)
417
418 d= {}
419 d[ "FLUX_IMPLEMENTATION"] = flux_implementation
420 d[ "NCP_IMPLEMENTATION"] = ncp_implementation
421 d[ "EIGENVALUES_IMPLEMENTATION"] = eigenvalues_implementation
422 d[ "SOURCE_TERM_IMPLEMENTATION"] = source_term_implementation
423 d[ "STATELESS_PDE_TERMS"] = pde_terms_without_state
424 return Template.render(**d)
425
426
427def create_solver_declarations(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state):
428 Template = jinja2.Template( """
429 public:
430 {% if SOURCE_TERM_IMPLEMENTATION=="<user-defined>" %}
431 virtual void sourceTerm(
432 const double * __restrict__ Q,
433 const tarch::la::Vector<Dimensions,double>& gridCellCentre,
434 const tarch::la::Vector<Dimensions,double>& gridCellH,
435 double t,
436 double dt,
437 double * __restrict__ S
438 ) override;
439 {% endif %}
440
441 {% if EIGENVALUES_IMPLEMENTATION=="<user-defined>" %}
442 /**
443 * Determine max eigenvalue over Jacobian in a given point with solution values
444 * (states) Q. All parameters are in.
445 *
446 * @return Max eigenvalue. Result has to be positive, so we are actually speaking
447 * about the maximum absolute eigenvalue.
448 */
449 virtual void maxEigenvalue(
450 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
451 const tarch::la::Vector<Dimensions,double>& faceCentre,
452 const tarch::la::Vector<Dimensions,double>& gridCellH,
453 double t,
454 double dt,
455 int normal,
456 double* __restrict__ maxEigenvalue
457 ) override;
458 {% endif %}
459
460
461 {% if FLUX_IMPLEMENTATION=="<user-defined>" %}
462 virtual void flux(
463 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
464 const tarch::la::Vector<Dimensions,double>& faceCentre,
465 const tarch::la::Vector<Dimensions,double>& gridCellH,
466 double t,
467 double dt,
468 int normal,
469 double * __restrict__ F // F[{{NUMBER_OF_UNKNOWNS}}]
470 ) override;
471 {% endif %}
472
473
474 {% if NCP_IMPLEMENTATION=="<user-defined>" %}
475 virtual void nonconservativeProduct(
476 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
477 const double * __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
478 const tarch::la::Vector<Dimensions,double>& faceCentre,
479 const tarch::la::Vector<Dimensions,double>& gridCellH,
480 double t,
481 double dt,
482 int normal,
483 double * __restrict__ BTimesDeltaQ // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
484 ) override;
485 {% endif %}
486
487
488 {% if STATELESS_PDE_TERMS and SOURCE_TERM_IMPLEMENTATION=="<user-defined>" %}
489 /**
490 * To obtain the best performance, I recommend to man inline command to
491 * this signature and to copy the implementation into the header. So it would
492 * read
493 *
494 * static inline void sourceTerm( ... ) {
495 * code here
496 * }
497 *
498 * The GPU offloading requires static functions. As we cannot overload the
499 * original (virtual) function with a static alternative, we do the
500 * TBB trick and overload by adding an additional enum. It has no semantics
501 * but helps the compiler to distinguish the different function variants.
502 */
503 #if defined(GPUOffloadingOMP)
504 #pragma omp declare target
505 #endif
506 static GPUCallableMethod void sourceTerm(
507 const double * __restrict__ Q,
508 const tarch::la::Vector<Dimensions,double>& gridCellCentre,
509 const tarch::la::Vector<Dimensions,double>& gridCellH,
510 double t,
511 double dt,
512 double * __restrict__ S,
513 Offloadable
514 );
515 #if defined(GPUOffloadingOMP)
516 #pragma omp end declare target
517 #endif
518 {% endif %}
519
520
521 {% if EIGENVALUES_IMPLEMENTATION=="<user-defined>" and STATELESS_PDE_TERMS %}
522 /**
523 * To obtain the best performance, I recommend to man inline command to
524 * this signature and to copy the implementation into the header. So it would
525 * read
526 *
527 * static inline double maxEigenvalue( ... ) {
528 * code here
529 * }
530 *
531 * The GPU offloading requires static functions. As we cannot overload the
532 * original (virtual) function with a static alternative, we do the
533 * TBB trick and overload by adding an additional enum. It has no semantics
534 * but helps the compiler to distinguish the different function variants.
535 */
536 #if defined(GPUOffloadingOMP)
537 #pragma omp declare target
538 #endif
539 static GPUCallableMethod void maxEigenvalue(
540 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
541 const tarch::la::Vector<Dimensions,double>& faceCentre,
542 const tarch::la::Vector<Dimensions,double>& gridCellH,
543 double t,
544 int normal,
545 double* __restrict__ maxEigenvalue
546 Offloadable
547 );
548 #if defined(GPUOffloadingOMP)
549 #pragma omp end declare target
550 #endif
551 {% endif %}
552
553
554 {% if STATELESS_PDE_TERMS and FLUX_IMPLEMENTATION=="<user-defined>" %}
555 /**
556 * To obtain the best performance, I recommend to man inline command to
557 * this signature and to copy the implementation into the header. So it would
558 * read
559 *
560 * static inline void flux( ... ) {
561 * code here
562 * }
563 *
564 * The GPU offloading requires static functions. As we cannot overload the
565 * original (virtual) function with a static alternative, we do the
566 * TBB trick and overload by adding an additional enum. It has no semantics
567 * but helps the compiler to distinguish the different function variants.
568 */
569 #if defined(GPUOffloadingOMP)
570 #pragma omp declare target
571 #endif
572 static GPUCallableMethod void flux(
573 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
574 const tarch::la::Vector<Dimensions,double>& faceCentre,
575 const tarch::la::Vector<Dimensions,double>& gridCellH,
576 double t,
577 double dt,
578 int normal,
579 double * __restrict__ F, // F[{{NUMBER_OF_UNKNOWNS}}]
580 Offloadable
581 );
582 #if defined(GPUOffloadingOMP)
583 #pragma omp end declare target
584 #endif
585 {% endif %}
586
587
588 {% if STATELESS_PDE_TERMS and NCP_IMPLEMENTATION=="<user-defined>" %}
589 /**
590 * To obtain the best performance, I recommend to man inline command to
591 * this signature and to copy the implementation into the header. So it would
592 * read
593 *
594 * static inline void nonconservativeProduct( ... ) {
595 * code here
596 * }
597 *
598 * The GPU offloading requires static functions. As we cannot overload the
599 * original (virtual) function with a static alternative, we do the
600 * TBB trick and overload by adding an additional enum. It has no semantics
601 * but helps the compiler to distinguish the different function variants.
602 */
603 #if defined(GPUOffloadingOMP)
604 #pragma omp declare target
605 #endif
606 static GPUCallableMethod void nonconservativeProduct(
607 const double * __restrict__ Q,
608 const double * __restrict__ deltaQ,
609 const tarch::la::Vector<Dimensions,double>& faceCentre,
610 const tarch::la::Vector<Dimensions,double>& gridCellH,
611 double t,
612 double dt,
613 int normal,
614 double * __restrict__ BTimesDeltaQ,
615 Offloadable
616 );
617 #if defined(GPUOffloadingOMP)
618 #pragma omp end declare target
619 #endif
620 {% endif %}
621""", undefined=jinja2.DebugUndefined)
622 d= {}
623 d[ "FLUX_IMPLEMENTATION"] = flux_implementation
624 d[ "NCP_IMPLEMENTATION"] = ncp_implementation
625 d[ "EIGENVALUES_IMPLEMENTATION"] = eigenvalues_implementation
626 d[ "SOURCE_TERM_IMPLEMENTATION"] = source_term_implementation
627 d[ "STATELESS_PDE_TERMS"] = pde_terms_without_state
628 return Template.render(**d)
629
630
631def create_solver_definitions(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state):
632 Template = jinja2.Template( """
633{% if EIGENVALUES_IMPLEMENTATION=="<user-defined>" %}
634void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::maxEigenvalue(
635 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
636 const tarch::la::Vector<Dimensions,double>& faceCentre,
637 const tarch::la::Vector<Dimensions,double>& gridCellH,
638 double t,
639 double dt,
640 int normal,
641 double* __restrict__ maxEigenvalue
642) {
643 logTraceInWith4Arguments( "maxEigenvalue(...)", faceCentre, gridCellH, t, normal );
644 // @todo implement
645 logTraceOut( "maxEigenvalue(...)" );
646}
647{% endif %}
648
649
650{% if FLUX_IMPLEMENTATION=="<user-defined>" %}
651void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::flux(
652 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
653 const tarch::la::Vector<Dimensions,double>& faceCentre,
654 const tarch::la::Vector<Dimensions,double>& gridCellH,
655 double t,
656 double dt,
657 int normal,
658 double * __restrict__ F // F[{{NUMBER_OF_UNKNOWNS}}]
659) {
660 logTraceInWith4Arguments( "flux(...)", faceCentre, gridCellH, t, normal );
661 // @todo implement
662 logTraceOut( "flux(...)" );
663}
664{% endif %}
665
666
667{% if NCP_IMPLEMENTATION=="<user-defined>" %}
668void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::nonconservativeProduct(
669 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
670 const double * __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
671 const tarch::la::Vector<Dimensions,double>& faceCentre,
672 const tarch::la::Vector<Dimensions,double>& gridCellH,
673 double t,
674 double dt,
675 int normal,
676 double * __restrict__ BTimesDeltaQ // BTimesDeltaQQ[{{NUMBER_OF_UNKNOWNS}}]
677) {
678 logTraceInWith4Arguments( "nonconservativeProduct(...)", faceCentre, gridCellH, t, normal );
679 // @todo implement
680 logTraceOut( "nonconservativeProduct(...)" );
681}
682{% endif %}
683
684
685{% if SOURCE_TERM_IMPLEMENTATION=="<user-defined>" %}
686void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::sourceTerm(
687 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
688 const tarch::la::Vector<Dimensions,double>& gridCellX,
689 const tarch::la::Vector<Dimensions,double>& gridCellH,
690 double t,
691 double dt,
692 double * __restrict__ S // S[{{NUMBER_OF_UNKNOWNS}}]
693) {
694 logTraceInWith4Arguments( "sourceTerm(...)", gridCellX, gridCellH, t, dt );
695
696 // @todo implement and ensure that all entries of S are properly set
697 for (int i=0; i<NumberOfUnknowns; i++) {
698 S[i] = 0.0;
699 }
700
701 logTraceOut( "sourceTerm(...)" );
702}
703{% endif %}
704
705
706
707
708
709{% if EIGENVALUES_IMPLEMENTATION=="<user-defined>" and STATELESS_PDE_TERMS %}
710 #if defined(GPUOffloadingOMP)
711 #pragma omp declare target
712 #endif
713GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::maxEigenvalue(
714 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
715 const tarch::la::Vector<Dimensions,double>& faceCentre,
716 const tarch::la::Vector<Dimensions,double>& gridCellH,
717 double t,
718 int normal,
719 double* __restrict__ maxEigenvalue,
720 Offloadable
721) {
722 // @todo implement
723}
724 #if defined(GPUOffloadingOMP)
725 #pragma omp end declare target
726 #endif
727{% endif %}
728
729
730{% if FLUX_IMPLEMENTATION=="<user-defined>" and STATELESS_PDE_TERMS %}
731 #if defined(GPUOffloadingOMP)
732 #pragma omp declare target
733 #endif
734GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::flux(
735 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
736 const tarch::la::Vector<Dimensions,double>& faceCentre,
737 const tarch::la::Vector<Dimensions,double>& gridCellH,
738 double t,
739 double dt,
740 int normal,
741 double * __restrict__ F,
742 Offloadable
743) {
744 // @todo implement
745}
746 #if defined(GPUOffloadingOMP)
747 #pragma omp end declare target
748 #endif
749{% endif %}
750
751
752{% if NCP_IMPLEMENTATION=="<user-defined>" and STATELESS_PDE_TERMS %}
753 #if defined(GPUOffloadingOMP)
754 #pragma omp declare target
755 #endif
756GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::nonconservativeProduct(
757 const double * __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}],
758 const double * __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
759 const tarch::la::Vector<Dimensions,double>& faceCentre,
760 const tarch::la::Vector<Dimensions,double>& gridCellH,
761 double t,
762 double dt,
763 int normal,
764 double * __restrict__ BTimesDeltaQ,
765 Offloadable
766) {
767 // @todo implement
768}
769 #if defined(GPUOffloadingOMP)
770 #pragma omp end declare target
771 #endif
772{% endif %}
773
774
775
776{% if SOURCE_TERM_IMPLEMENTATION=="<user-defined>" and STATELESS_PDE_TERMS %}
777 #if defined(GPUOffloadingOMP)
778 #pragma omp declare target
779 #endif
780GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::sourceTerm(
781 const double * __restrict__ Q,
782 const tarch::la::Vector<Dimensions,double>& gridCellCentre,
783 const tarch::la::Vector<Dimensions,double>& gridCellH,
784 double t,
785 double dt,
786 double * __restrict__ S,
787 Offloadable
788) {
789 // @todo implement
790}
791 #if defined(GPUOffloadingOMP)
792 #pragma omp end declare target
793 #endif
794{% endif %}
795
796
797""", undefined=jinja2.DebugUndefined)
798 d= {}
799 d[ "FLUX_IMPLEMENTATION"] = flux_implementation
800 d[ "NCP_IMPLEMENTATION"] = ncp_implementation
801 d[ "EIGENVALUES_IMPLEMENTATION"] = eigenvalues_implementation
802 d[ "SOURCE_TERM_IMPLEMENTATION"] = source_term_implementation
803 d[ "STATELESS_PDE_TERMS"] = pde_terms_without_state
804 return Template.render(**d)
805
806
create_abstract_solver_declarations(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state)
Definition kernels.py:28
create_solver_declarations(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state)
Definition kernels.py:427
create_abstract_solver_definitions(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state)
Definition kernels.py:241
create_solver_definitions(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state)
Definition kernels.py:631