41 source_implementation,
42 compute_max_eigenvalue_of_next_time_step,
43 solver_variant: SolverVariant,
44 kernel_variant: KernelVariant):
46 Return only the unqualified function call, i.e., without any namespaces.
47 So by setting the right namespace as prefix, you can direct it to particular
51 KernelVariant.PatchWiseAoS:
"timeStepWithRusanovPatchwiseHeap",
52 KernelVariant.PatchWiseAoSoA:
"timeStepWithRusanovPatchwiseHeap",
53 KernelVariant.PatchWiseSoA:
"timeStepWithRusanovPatchwiseHeap",
54 KernelVariant.BatchedAoS:
"timeStepWithRusanovBatchedHeap",
55 KernelVariant.BatchedAoSoA:
"timeStepWithRusanovBatchedHeap",
56 KernelVariant.BatchedSoA:
"timeStepWithRusanovBatchedHeap",
57 KernelVariant.TaskGraphAoS:
"timeStepWithRusanovTaskgraphHeap",
58 KernelVariant.TaskGraphAoSoA:
"timeStepWithRusanovTaskgraphHeap",
59 KernelVariant.TaskGraphSoA:
"timeStepWithRusanovTaskgraphHeap",
60 KernelVariant.VolumeWiseAoS:
"timeStepWithRusanovVolumewise",
61 KernelVariant.VolumeWiseAoSoA:
"timeStepWithRusanovVolumewise",
62 KernelVariant.VolumeWiseSoA:
"timeStepWithRusanovVolumewise",
65 EnumeratorTemplateTypes = {
66 KernelVariant.PatchWiseAoS:
"::exahype2::enumerator::AoSLexicographicEnumerator",
67 KernelVariant.PatchWiseAoSoA:
"::exahype2::enumerator::AoSoALexicographicEnumerator",
68 KernelVariant.PatchWiseSoA:
"::exahype2::enumerator::SoALexicographicEnumerator",
69 KernelVariant.BatchedAoS:
"::exahype2::enumerator::AoSLexicographicEnumerator",
70 KernelVariant.BatchedAoSoA:
"::exahype2::enumerator::AoSoALexicographicEnumerator",
71 KernelVariant.BatchedSoA:
"::exahype2::enumerator::SoALexicographicEnumerator",
72 KernelVariant.TaskGraphAoS:
"::exahype2::enumerator::AoSLexicographicEnumerator",
73 KernelVariant.TaskGraphAoSoA:
"::exahype2::enumerator::AoSoALexicographicEnumerator",
74 KernelVariant.TaskGraphSoA:
"::exahype2::enumerator::SoALexicographicEnumerator",
75 KernelVariant.VolumeWiseAoS:
"::exahype2::enumerator::AoSLexicographicEnumerator",
76 KernelVariant.VolumeWiseAoSoA:
"::exahype2::enumerator::AoSoALexicographicEnumerator",
77 KernelVariant.VolumeWiseSoA:
"::exahype2::enumerator::SoALexicographicEnumerator",
80 template = KernelCalls[kernel_variant]
82 if solver_variant == SolverVariant.WithVirtualFunctions:
83 template +=
"""Functors<
84 {{NUMBER_OF_VOLUMES_PER_AXIS}},
86 {{NUMBER_OF_UNKNOWNS}},
87 {{NUMBER_OF_AUXILIARY_VARIABLES}},
88 {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
89 {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
90 {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
91 {% if COMPUTE_MAX_EIGENVALUE==False %} false {% else %} true {% endif %},
92 {{TEMP_DATA_ENUMERATOR}}
95 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
96 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
97 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
98 [[maybe_unused]] double t,
99 [[maybe_unused]] double dt,
100 [[maybe_unused]] int normal,
101 [[maybe_unused]] double* __restrict__ F // F[{{NUMBER_OF_UNKNOWNS}}]
103 {% if FLUX_IMPLEMENTATION!="<none>" %}
104 repositories::{{SOLVER_INSTANCE}}.flux(Q, faceCentre, volumeH, t, dt, normal, F);
108 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
109 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
110 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
111 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
112 [[maybe_unused]] double t,
113 [[maybe_unused]] double dt,
114 [[maybe_unused]] int normal,
115 [[maybe_unused]] double* __restrict__ BTimesDeltaQ // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
117 {% if NCP_IMPLEMENTATION!="<none>" %}
118 repositories::{{SOLVER_INSTANCE}}.nonconservativeProduct(Q, deltaQ, faceCentre, volumeH, t, dt, normal, BTimesDeltaQ);
122 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
123 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeX,
124 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
125 [[maybe_unused]] double t,
126 [[maybe_unused]] double dt,
127 [[maybe_unused]] double* __restrict__ S // S[{{NUMBER_OF_UNKNOWNS}}]
129 {% if SOURCE_IMPLEMENTATION!="<none>" %}
130 repositories::{{SOLVER_INSTANCE}}.sourceTerm(Q, volumeX, volumeH, t, dt, S);
134 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
135 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
136 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
137 [[maybe_unused]] double t,
138 [[maybe_unused]] double dt,
139 [[maybe_unused]] int normal
141 return repositories::{{SOLVER_INSTANCE}}.maxEigenvalue(Q, faceCentre, volumeH, t, dt, normal);
145 elif solver_variant == SolverVariant.Stateless:
146 template +=
"""Stateless<
148 {{NUMBER_OF_VOLUMES_PER_AXIS}},
150 {{NUMBER_OF_UNKNOWNS}},
151 {{NUMBER_OF_AUXILIARY_VARIABLES}},
152 {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
153 {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
154 {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
155 {% if COMPUTE_MAX_EIGENVALUE==False %} false {% else %} true {% endif %},
156 {{TEMP_DATA_ENUMERATOR}}
159 elif solver_variant == SolverVariant.Multicore:
160 template +=
"""Stateless<
162 {{NUMBER_OF_VOLUMES_PER_AXIS}},
164 {{NUMBER_OF_UNKNOWNS}},
165 {{NUMBER_OF_AUXILIARY_VARIABLES}},
166 {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
167 {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
168 {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
169 {% if COMPUTE_MAX_EIGENVALUE==False %} false {% else %} true {% endif %},
170 {{TEMP_DATA_ENUMERATOR}}
171 >(patchData, peano4::utils::LoopPlacement::SpreadOut);
173 elif solver_variant == SolverVariant.Accelerator:
174 template +=
"""Stateless<
176 {{NUMBER_OF_VOLUMES_PER_AXIS}},
178 {{NUMBER_OF_UNKNOWNS}},
179 {{NUMBER_OF_AUXILIARY_VARIABLES}},
180 {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
181 {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
182 {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
183 {% if COMPUTE_MAX_EIGENVALUE==False %} false {% else %} true {% endif %},
184 {{TEMP_DATA_ENUMERATOR}}
185 >(targetDevice, patchData);
188 assert False,
"Not supported combination: {} x {}".format(solver_variant, kernel_variant)
190 result = jinja2.Template( template, undefined=jinja2.DebugUndefined)
192 d[
"FLUX_IMPLEMENTATION"] = flux_implementation
193 d[
"NCP_IMPLEMENTATION"] = ncp_implementation
194 d[
"SOURCE_IMPLEMENTATION"] = source_implementation
195 d[
"COMPUTE_MAX_EIGENVALUE"] = compute_max_eigenvalue_of_next_time_step
196 d[
"TEMP_DATA_ENUMERATOR"] = EnumeratorTemplateTypes[kernel_variant]
197 return result.render(**d)
201 eigenvalues_implementation,
202 source_term_implementation,
203 pde_terms_without_state):
204 Template = jinja2.Template(
"""
206 {% if EIGENVALUES_IMPLEMENTATION=="<none>" %}
207 #error eigenvalue implementation cannot be none
210 {% if EIGENVALUES_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
212 * Depending on the implementation, this variant might be slow as it
213 * lacks an inline define. Also, if you don't want to use ipo aggressively,
214 * it might be clever to put the implementation into the header.
218 * At the moment, SYCL seems to struggle with ipo, even if a function is
219 * never called. So I embed the (empty) implementation directly into the
222 #if defined(GPUOffloadingOMP)
223 #pragma omp declare target
225 static GPUCallableMethod double maxEigenvalue(
226 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
227 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
228 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
229 [[maybe_unused]] double t,
230 [[maybe_unused]] double dt,
231 [[maybe_unused]] int normal,
234 //#if defined(GPUOffloadingSYCL)
239 #if defined(GPUOffloadingOMP)
240 #pragma omp end declare target
245 * Determine max eigenvalue over Jacobian in a given point with solution values
246 * (states) Q. All parameters are in.
248 * @return Max eigenvalue. Result has to be positive, so we are actually speaking
249 * about the maximum absolute eigenvalue.
251 virtual double maxEigenvalue(
252 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
253 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
254 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
255 [[maybe_unused]] double t,
256 [[maybe_unused]] double dt,
257 [[maybe_unused]] int normal
258 ) {% if EIGENVALUES_IMPLEMENTATION=="<user-defined>" %}= 0{% else %} final{% endif %};
260 {% if FLUX_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
262 * Depending on the implementation, this variant might be slow as it
263 * lacks an inline define. Also, if you don't want to use ipo aggressively,
264 * it might be clever to put the implementation into the header.
268 * At the moment, SYCL seems to struggle with ipo, even if a function is
269 * never called. So I embed the (empty) implementation directly into the
272 #if defined(GPUOffloadingOMP)
273 #pragma omp declare target
275 static GPUCallableMethod void flux(
276 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
277 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
278 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
279 [[maybe_unused]] double t,
280 [[maybe_unused]] double dt,
281 [[maybe_unused]] int normal,
282 [[maybe_unused]] double* __restrict__ F, // F[{{NUMBER_OF_UNKNOWNS}}]
285 #if defined(GPUOffloadingOMP)
286 #pragma omp end declare target
290 {% if FLUX_IMPLEMENTATION!="<none>" %}
292 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
293 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
294 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
295 [[maybe_unused]] double t,
296 [[maybe_unused]] double dt,
297 [[maybe_unused]] int normal,
298 [[maybe_unused]] double* __restrict__ F // F[{{NUMBER_OF_UNKNOWNS}}]
299 ) {% if FLUX_IMPLEMENTATION=="<user-defined>" %}=0{% else %} final {% endif %};
302 {% if NCP_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
304 * Depending on the implementation, this variant might be slow as it
305 * lacks an inline define. Also, if you don't want to use ipo aggressively,
306 * it might be clever to put the implementation into the header.
310 * At the moment, SYCL seems to struggle with ipo, even if a function is
311 * never called. So I embed the (empty) implementation directly into the
314 #if defined(GPUOffloadingOMP)
315 #pragma omp declare target
317 static GPUCallableMethod void nonconservativeProduct(
318 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
319 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
320 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
321 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
322 [[maybe_unused]] double t,
323 [[maybe_unused]] double dt,
324 [[maybe_unused]] int normal,
325 [[maybe_unused]] double* __restrict__ BTimesDeltaQ, // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
328 #if defined(GPUOffloadingOMP)
329 #pragma omp end declare target
333 {% if NCP_IMPLEMENTATION!="<none>" %}
334 virtual void nonconservativeProduct(
335 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
336 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
337 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
338 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
339 [[maybe_unused]] double t,
340 [[maybe_unused]] double dt,
341 [[maybe_unused]] int normal,
342 [[maybe_unused]] double* __restrict__ BTimesDeltaQ // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
343 ) {% if NCP_IMPLEMENTATION=="<user-defined>" %}=0{% endif %};
346 {% if SOURCE_TERM_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
348 * Depending on the implementation, this variant might be slow as it
349 * lacks an inline define. Also, if you don't want to use ipo aggressively,
350 * it might be clever to put the implementation into the header.
354 * At the moment, SYCL seems to struggle with ipo, even if a function is
355 * never called. So I embed the (empty) implementation directly into the
358 #if defined(GPUOffloadingOMP)
359 #pragma omp declare target
361 static GPUCallableMethod void sourceTerm(
362 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
363 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeCentre,
364 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
365 [[maybe_unused]] double t,
366 [[maybe_unused]] double dt,
367 [[maybe_unused]] double* __restrict__ S, // S[{{NUMBER_OF_UNKNOWNS}}]
370 #if defined(GPUOffloadingOMP)
371 #pragma omp end declare target
375 {% if SOURCE_TERM_IMPLEMENTATION!="<none>" %}
376 virtual void sourceTerm(
377 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
378 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeCentre,
379 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
380 [[maybe_unused]] double t,
381 [[maybe_unused]] double dt,
382 [[maybe_unused]] double* __restrict__ S // S[{{NUMBER_OF_UNKNOWNS}}]
383 ) {% if SOURCE_TERM_IMPLEMENTATION=="<user-defined>" %}= 0{% else %} final {% endif %};
385""", undefined=jinja2.DebugUndefined)
388 d[
"FLUX_IMPLEMENTATION"] = flux_implementation
389 d[
"NCP_IMPLEMENTATION"] = ncp_implementation
390 d[
"EIGENVALUES_IMPLEMENTATION"] = eigenvalues_implementation
391 d[
"SOURCE_TERM_IMPLEMENTATION"] = source_term_implementation
392 d[
"STATELESS_PDE_TERMS"] = pde_terms_without_state
393 return Template.render(**d)
396 Template = jinja2.Template(
"""
397{% if EIGENVALUES_IMPLEMENTATION!="<user-defined>" and EIGENVALUES_IMPLEMENTATION!="<none>" %}
398double {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::maxEigenvalue(
399 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
400 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
401 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
402 [[maybe_unused]] double t,
403 [[maybe_unused]] double dt,
404 [[maybe_unused]] int normal
406 {{EIGENVALUES_IMPLEMENTATION}}
410{% if FLUX_IMPLEMENTATION!="<none>" and FLUX_IMPLEMENTATION!="<user-defined>" %}
411void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::flux(
412 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
413 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
414 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
415 [[maybe_unused]] double t,
416 [[maybe_unused]] double dt,
417 [[maybe_unused]] int normal,
418 [[maybe_unused]] double* __restrict__ F // F[{{NUMBER_OF_UNKNOWNS}}]
420 {{FLUX_IMPLEMENTATION}}
424{% if NCP_IMPLEMENTATION!="<none>" and NCP_IMPLEMENTATION!="<user-defined>" %}
425void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::nonconservativeProduct(
426 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
427 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
428 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
429 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
430 [[maybe_unused]] double t,
431 [[maybe_unused]] double dt,
432 [[maybe_unused]] int normal,
433 [[maybe_unused]] double* __restrict__ BTimesDeltaQ // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
435 {{NCP_IMPLEMENTATION}}
439{% if SOURCE_TERM_IMPLEMENTATION!="<user-defined>" and SOURCE_TERM_IMPLEMENTATION!="<none>" %}
440//#if !defined(GPUOffloadingSYCL)
441void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::sourceTerm(
442 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
443 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeCentre,
444 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
445 [[maybe_unused]] double t,
446 [[maybe_unused]] double dt,
447 [[maybe_unused]] double* __restrict__ S // S[{{NUMBER_OF_UNKNOWNS}}]
449 {% if SOURCE_TERM_IMPLEMENTATION!="<empty>" %}
450 {{SOURCE_TERM_IMPLEMENTATION}}
452 std::fill_n(S,{{NUMBER_OF_UNKNOWNS}},0.0);
458{% if EIGENVALUES_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
459#if defined(GPUOffloadingOMP)
460#pragma omp declare target
462//#if !defined(GPUOffloadingSYCL)
463GPUCallableMethod double {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::maxEigenvalue(
464 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
465 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
466 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
467 [[maybe_unused]] double t,
468 [[maybe_unused]] double dt,
469 [[maybe_unused]] int normal,
472 {{EIGENVALUES_IMPLEMENTATION}};
475#if defined(GPUOffloadingOMP)
476#pragma omp end declare target
480{% if FLUX_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
481#if defined(GPUOffloadingOMP)
482#pragma omp declare target
484//#if !defined(GPUOffloadingSYCL)
485GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::flux(
486 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
487 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
488 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
489 [[maybe_unused]] double t,
490 [[maybe_unused]] double dt,
491 [[maybe_unused]] int normal,
492 [[maybe_unused]] double* __restrict__ F, // F[{{NUMBER_OF_UNKNOWNS}}]
495 {% if FLUX_IMPLEMENTATION=="<none>" %}
498 {{FLUX_IMPLEMENTATION}}
502#if defined(GPUOffloadingOMP)
503#pragma omp end declare target
507{% if NCP_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
508#if defined(GPUOffloadingOMP)
509#pragma omp declare target
511GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::nonconservativeProduct(
512 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
513 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
514 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
515 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
516 [[maybe_unused]] double t,
517 [[maybe_unused]] double dt,
518 [[maybe_unused]] int normal,
519 [[maybe_unused]] double* __restrict__ BTimesDeltaQ, // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
520 [[maybe_unused]] Offloadable
522 {% if NCP_IMPLEMENTATION=="<none>" %}
525 {{NCP_IMPLEMENTATION}}
528#if defined(GPUOffloadingOMP)
529#pragma omp end declare target
533{% if SOURCE_TERM_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
534#if defined(GPUOffloadingOMP)
535#pragma omp declare target
537GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::sourceTerm(
538 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
539 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeCentre,
540 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
541 [[maybe_unused]] double t,
542 [[maybe_unused]] double dt,
543 [[maybe_unused]] double* __restrict__ S, // S[{{NUMBER_OF_UNKNOWNS}}]
544 [[maybe_unused]] Offloadable
546 {% if SOURCE_TERM_IMPLEMENTATION=="<none>" %}
549 {{SOURCE_TERM_IMPLEMENTATION}}
552#if defined(GPUOffloadingOMP)
553#pragma omp end declare target
556""", undefined=jinja2.DebugUndefined)
559 d[
"FLUX_IMPLEMENTATION"] = flux_implementation
560 d[
"NCP_IMPLEMENTATION"] = ncp_implementation
561 d[
"EIGENVALUES_IMPLEMENTATION"] = eigenvalues_implementation
562 d[
"SOURCE_TERM_IMPLEMENTATION"] = source_term_implementation
563 d[
"STATELESS_PDE_TERMS"] = pde_terms_without_state
564 return Template.render(**d)
566def create_solver_declarations(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state):
567 Template = jinja2.Template(
"""
569 {% if SOURCE_TERM_IMPLEMENTATION=="<user-defined>" %}
570 virtual void sourceTerm(
571 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
572 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeCentre,
573 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
574 [[maybe_unused]] double t,
575 [[maybe_unused]] double dt,
576 [[maybe_unused]] double* __restrict__ S // S[{{NUMBER_OF_UNKNOWNS}}]
580 {% if EIGENVALUES_IMPLEMENTATION=="<user-defined>" %}
582 * Determine max eigenvalue over Jacobian in a given point with solution values
583 * (states) Q. All parameters are in.
585 * @return Max eigenvalue. Result has to be positive, so we are actually speaking
586 * about the maximum absolute eigenvalue.
588 virtual double maxEigenvalue(
589 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
590 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
591 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
592 [[maybe_unused]] double t,
593 [[maybe_unused]] double dt,
594 [[maybe_unused]] int normal
598 {% if FLUX_IMPLEMENTATION=="<user-defined>" %}
600 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
601 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
602 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
603 [[maybe_unused]] double t,
604 [[maybe_unused]] double dt,
605 [[maybe_unused]] int normal,
606 [[maybe_unused]] double* __restrict__ F // F[{{NUMBER_OF_UNKNOWNS}}]
610 {% if NCP_IMPLEMENTATION=="<user-defined>" %}
611 virtual void nonconservativeProduct(
612 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
613 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
614 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
615 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
616 [[maybe_unused]] double t,
617 [[maybe_unused]] double dt,
618 [[maybe_unused]] int normal,
619 [[maybe_unused]] double* __restrict__ BTimesDeltaQ // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
623 {% if STATELESS_PDE_TERMS and SOURCE_TERM_IMPLEMENTATION=="<user-defined>" %}
625 * To obtain the best performance, I recommend to man inline command to
626 * this signature and to copy the implementation into the header. So it would
629 * static inline void sourceTerm( ... ) {
633 * The GPU offloading requires static functions. As we cannot overload the
634 * original (virtual) function with a static alternative, we do the
635 * TBB trick and overload by adding an additional enum. It has no semantics
636 * but helps the compiler to distinguish the different function variants.
638 #if defined(GPUOffloadingOMP)
639 #pragma omp declare target
641 static GPUCallableMethod void sourceTerm(
642 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
643 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeCentre,
644 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
645 [[maybe_unused]] double t,
646 [[maybe_unused]] double dt,
647 [[maybe_unused]] double* __restrict__ S, // S[{{NUMBER_OF_UNKNOWNS}}]
648 [[maybe_unused]] Offloadable
650 #if defined(GPUOffloadingOMP)
651 #pragma omp end declare target
655 {% if EIGENVALUES_IMPLEMENTATION=="<user-defined>" and STATELESS_PDE_TERMS %}
657 * To obtain the best performance, I recommend to man inline command to
658 * this signature and to copy the implementation into the header. So it would
661 * static inline double maxEigenvalue( ... ) {
665 * The GPU offloading requires static functions. As we cannot overload the
666 * original (virtual) function with a static alternative, we do the
667 * TBB trick and overload by adding an additional enum. It has no semantics
668 * but helps the compiler to distinguish the different function variants.
670 #if defined(GPUOffloadingOMP)
671 #pragma omp declare target
673 static GPUCallableMethod double maxEigenvalue(
674 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
675 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
676 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
677 [[maybe_unused]] double t,
678 [[maybe_unused]] double dt,
679 [[maybe_unused]] int normal,
680 [[maybe_unused]] Offloadable
682 #if defined(GPUOffloadingOMP)
683 #pragma omp end declare target
687 {% if STATELESS_PDE_TERMS and FLUX_IMPLEMENTATION=="<user-defined>" %}
689 * To obtain the best performance, I recommend to man inline command to
690 * this signature and to copy the implementation into the header. So it would
693 * static inline void flux( ... ) {
697 * The GPU offloading requires static functions. As we cannot overload the
698 * original (virtual) function with a static alternative, we do the
699 * TBB trick and overload by adding an additional enum. It has no semantics
700 * but helps the compiler to distinguish the different function variants.
702 #if defined(GPUOffloadingOMP)
703 #pragma omp declare target
705 static GPUCallableMethod void flux(
706 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
707 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
708 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
709 [[maybe_unused]] double t,
710 [[maybe_unused]] double dt,
711 [[maybe_unused]] int normal,
712 [[maybe_unused]] double* __restrict__ F, // F[{{NUMBER_OF_UNKNOWNS}}]
713 [[maybe_unused]] Offloadable
715 #if defined(GPUOffloadingOMP)
716 #pragma omp end declare target
720 {% if STATELESS_PDE_TERMS and NCP_IMPLEMENTATION=="<user-defined>" %}
722 * To obtain the best performance, I recommend to man inline command to
723 * this signature and to copy the implementation into the header. So it would
726 * static inline void nonconservativeProduct( ... ) {
730 * The GPU offloading requires static functions. As we cannot overload the
731 * original (virtual) function with a static alternative, we do the
732 * TBB trick and overload by adding an additional enum. It has no semantics
733 * but helps the compiler to distinguish the different function variants.
735 #if defined(GPUOffloadingOMP)
736 #pragma omp declare target
738 static GPUCallableMethod void nonconservativeProduct(
739 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
740 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
741 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
742 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
743 [[maybe_unused]] double t,
744 [[maybe_unused]] double dt,
745 [[maybe_unused]] int normal,
746 [[maybe_unused]] double* __restrict__ BTimesDeltaQ, // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
747 [[maybe_unused]] Offloadable
749 #if defined(GPUOffloadingOMP)
750 #pragma omp end declare target
753""", undefined=jinja2.DebugUndefined)
755 d[
"FLUX_IMPLEMENTATION"] = flux_implementation
756 d[
"NCP_IMPLEMENTATION"] = ncp_implementation
757 d[
"EIGENVALUES_IMPLEMENTATION"] = eigenvalues_implementation
758 d[
"SOURCE_TERM_IMPLEMENTATION"] = source_term_implementation
759 d[
"STATELESS_PDE_TERMS"] = pde_terms_without_state
760 return Template.render(**d)
762def create_solver_definitions(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state):
763 Template = jinja2.Template(
"""
764{% if EIGENVALUES_IMPLEMENTATION=="<user-defined>" %}
765double {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::maxEigenvalue(
766 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
767 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
768 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
769 [[maybe_unused]] double t,
770 [[maybe_unused]] double dt,
771 [[maybe_unused]] int normal
773 logTraceInWith4Arguments( "maxEigenvalue(...)", faceCentre, volumeH, t, normal );
775 logTraceOut( "maxEigenvalue(...)" );
779{% if FLUX_IMPLEMENTATION=="<user-defined>" %}
780void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::flux(
781 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
782 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
783 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
784 [[maybe_unused]] double t,
785 [[maybe_unused]] double dt,
786 [[maybe_unused]] int normal,
787 [[maybe_unused]] double* __restrict__ F // F[{{NUMBER_OF_UNKNOWNS}}]
789 logTraceInWith4Arguments( "flux(...)", faceCentre, volumeH, t, normal );
791 logTraceOut( "flux(...)" );
795{% if NCP_IMPLEMENTATION=="<user-defined>" %}
796void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::nonconservativeProduct(
797 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
798 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
799 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
800 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
801 [[maybe_unused]] double t,
802 [[maybe_unused]] double dt,
803 [[maybe_unused]] int normal,
804 [[maybe_unused]] double* __restrict__ BTimesDeltaQ // BTimesDeltaQQ[{{NUMBER_OF_UNKNOWNS}}]
806 logTraceInWith4Arguments( "nonconservativeProduct(...)", faceCentre, volumeH, t, normal );
808 logTraceOut( "nonconservativeProduct(...)" );
812{% if SOURCE_TERM_IMPLEMENTATION=="<user-defined>" %}
813void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::sourceTerm(
814 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
815 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeX,
816 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
817 [[maybe_unused]] double t,
818 [[maybe_unused]] double dt,
819 [[maybe_unused]] double* __restrict__ S // S[{{NUMBER_OF_UNKNOWNS}}]
821 logTraceInWith4Arguments( "sourceTerm(...)", volumeX, volumeH, t, dt );
823 // @todo implement and ensure that all entries of S are properly set
824 for (int i=0; i<NumberOfUnknowns; i++) {
828 logTraceOut( "sourceTerm(...)" );
832{% if EIGENVALUES_IMPLEMENTATION=="<user-defined>" and STATELESS_PDE_TERMS %}
833#if defined(GPUOffloadingOMP)
834#pragma omp declare target
836GPUCallableMethod double {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::maxEigenvalue(
837 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
838 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
839 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
840 [[maybe_unused]] double t,
841 [[maybe_unused]] double dt,
842 [[maybe_unused]] int normal,
847#if defined(GPUOffloadingOMP)
848#pragma omp end declare target
852{% if FLUX_IMPLEMENTATION=="<user-defined>" and STATELESS_PDE_TERMS %}
853#if defined(GPUOffloadingOMP)
854#pragma omp declare target
856GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::flux(
857 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
858 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
859 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
860 [[maybe_unused]] double t,
861 [[maybe_unused]] double dt,
862 [[maybe_unused]] int normal,
863 [[maybe_unused]] double* __restrict__ F, // F[{{NUMBER_OF_UNKNOWNS}}]
868#if defined(GPUOffloadingOMP)
869#pragma omp end declare target
873{% if NCP_IMPLEMENTATION=="<user-defined>" and STATELESS_PDE_TERMS %}
874#if defined(GPUOffloadingOMP)
875#pragma omp declare target
877GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::nonconservativeProduct(
878 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
879 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
880 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
881 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
882 [[maybe_unused]] double t,
883 [[maybe_unused]] double dt,
884 [[maybe_unused]] int normal,
885 [[maybe_unused]] double* __restrict__ BTimesDeltaQ, // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
886 [[maybe_unused]] Offloadable
890#if defined(GPUOffloadingOMP)
891#pragma omp end declare target
895{% if SOURCE_TERM_IMPLEMENTATION=="<user-defined>" and STATELESS_PDE_TERMS %}
896#if defined(GPUOffloadingOMP)
897#pragma omp declare target
899GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::sourceTerm(
900 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
901 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeCentre,
902 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
903 [[maybe_unused]] double t,
904 [[maybe_unused]] double dt,
905 [[maybe_unused]] double* __restrict__ S, // S[{{NUMBER_OF_UNKNOWNS}}]
906 [[maybe_unused]] Offloadable
908 // @todo implement but ensure that all entries of S are properly set
909 for (int i=0; i<NumberOfUnknowns; i++) {
913#if defined(GPUOffloadingOMP)
914#pragma omp end declare target
918""", undefined=jinja2.DebugUndefined)
920 d[
"FLUX_IMPLEMENTATION"] = flux_implementation
921 d[
"NCP_IMPLEMENTATION"] = ncp_implementation
922 d[
"EIGENVALUES_IMPLEMENTATION"] = eigenvalues_implementation
923 d[
"SOURCE_TERM_IMPLEMENTATION"] = source_term_implementation
924 d[
"STATELESS_PDE_TERMS"] = pde_terms_without_state
925 return Template.render(**d)
create_compute_Riemann_kernel_for_Rusanov(flux_implementation, ncp_implementation, source_implementation, compute_max_eigenvalue_of_next_time_step, SolverVariant solver_variant, KernelVariant kernel_variant)
Return only the unqualified function call, i.e., without any namespaces.