Peano
Loading...
Searching...
No Matches
kernels.py
Go to the documentation of this file.
1# This file is part of the ExaHyPE2 project. For conditions of distribution and
2# use, please see the copyright notice at www.peano-framework.org
3import jinja2
4
5from enum import Enum
6import os
7import exahype2
8import exahype2.kerneldsl as DSL
9
10class SolverVariant(Enum):
11 WithVirtualFunctions = 0
12 Stateless = 1
13 Multicore = 2
14 Accelerator = 3
15
16class KernelVariant(Enum):
17 PatchWiseAoS = 10
18 PatchWiseAoSoA = 11
19 PatchWiseSoA = 12
20 BatchedAoS = 20
21 BatchedAoSoA = 21
22 BatchedSoA = 22
23 TaskGraphAoS = 30
24 TaskGraphAoSoA = 31
25 TaskGraphSoA = 32
26 VolumeWiseAoS = 40
27 VolumeWiseAoSoA = 41
28 VolumeWiseSoA = 42
29
31 if not os.path.exists("kernels"):
32 os.makedirs("kernels")
33
34 file = open("kernels/rusanov.h", "w")
35 file.write("""#if Dimensions == 2
36#include "rusanov2d.h"
37#elif Dimensions == 3
38#include "rusanov3d.h"
39#endif
40""")
41
44
46 template_parameters = [DSL.SyntaxTree.Argument("NumberOfVolumesPerAxisInPatch", DSL.SyntaxTree.TInteger()),
47 DSL.SyntaxTree.Argument("HaloSize", DSL.SyntaxTree.TInteger()),
48 DSL.SyntaxTree.Argument("NumberOfUnknowns", DSL.SyntaxTree.TInteger()),
49 DSL.SyntaxTree.Argument("NumberOfAuxiliaryVariables", DSL.SyntaxTree.TInteger()),
50 DSL.SyntaxTree.Argument("EvaluateFlux", DSL.SyntaxTree.TBoolean()),
51 DSL.SyntaxTree.Argument("EvaluateNonconservativeProduct", DSL.SyntaxTree.TBoolean()),
52 DSL.SyntaxTree.Argument("EvaluateSource", DSL.SyntaxTree.TBoolean()),
53 DSL.SyntaxTree.Argument("EvaluateMaximumEigenvalueAfterTimeStep", DSL.SyntaxTree.TBoolean())]
54
55 functor_arguments = [DSL.SyntaxTree.Argument("flux", DSL.SyntaxTree.TCustom("const FluxFunctor&")),
56 DSL.SyntaxTree.Argument("nonconservativeProduct", DSL.SyntaxTree.TCustom("const NonconservativeProductFunctor&")),
57 DSL.SyntaxTree.Argument("sourceTerm", DSL.SyntaxTree.TCustom("const SourceFunctor&")),
58 DSL.SyntaxTree.Argument("maxEigenvalue", DSL.SyntaxTree.TCustom("const MaxEigenvalueFunctor&"))]
59
60 rusanov_tree = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov2d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov"])
61 rusanov_kernel = rusanov_tree.print_cpp()
62 rusanov_call_with_measurement = rusanov_tree.print_definition_with_timer()
63 rusanov_kernel_declaration = rusanov_tree.print_declaration()
64 rusanov_call_with_measurement_declaration = rusanov_tree.print_declaration_with_timer()
65
66 rusanov_stateless_tree = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov2d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov"], stateless=True)
67 rusanov_stateless_kernel = rusanov_stateless_tree.print_cpp()
68 rusanov_stateless_call_with_measurement = rusanov_stateless_tree.print_definition_with_timer()
69 rusanov_stateless_kernel_declaration = rusanov_stateless_tree.print_declaration()
70 rusanov_stateless_call_with_measurement_declaration = rusanov_stateless_tree.print_declaration_with_timer()
71
72 #rusanov_accelerator_tree = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov2d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov"], True, True).print_cpp()
73 #rusanov_accelerator_kernel = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov2d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov"], True, True).print_cpp()
74 #rusanov_accelerator_call_with_measurement = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov2d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov"], True, True).print_definition_with_timer()
75 #rusanov_accelerator_kernel_declaration = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov2d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov"], True, True).print_declaration()
76 #rusanov_accelerator_call_with_measurement_declaration = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov2d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov"], True, True).print_declaration_with_timer()
77
78 rusanov_omp_tree = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov2d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov", "omp"])
79 rusanov_omp_kernel = rusanov_omp_tree.print_omp()
80 rusanov_omp_call_with_measurement = rusanov_omp_tree.print_definition_with_timer()
81 rusanov_omp_kernel_declaration = rusanov_omp_tree.print_declaration()
82 rusanov_omp_call_with_measurement_declaration = rusanov_omp_tree.print_declaration_with_timer()
83
84 rusanov_omp_stateless_tree = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov2d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov", "omp"], stateless=True)
85 rusanov_omp_stateless_kernel = rusanov_omp_stateless_tree.print_omp()
86 rusanov_omp_stateless_call_with_measurement = rusanov_omp_stateless_tree.print_definition_with_timer()
87 rusanov_omp_stateless_kernel_declaration = rusanov_omp_stateless_tree.print_declaration()
88 rusanov_omp_stateless_call_with_measurement_declaration = rusanov_omp_stateless_tree.print_declaration_with_timer()
89
90 rusanov_omp_accelerator_tree = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov2d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov", "omp"], stateless=True, use_accelerator=True)
91 rusanov_omp_accelerator_kernel = rusanov_omp_accelerator_tree.print_omp()
92 rusanov_omp_accelerator_call_with_measurement = rusanov_omp_accelerator_tree.print_definition_with_timer()
93 rusanov_omp_accelerator_kernel_declaration = rusanov_omp_accelerator_tree.print_declaration()
94 rusanov_omp_accelerator_call_with_measurement_declaration = rusanov_omp_accelerator_tree.print_declaration_with_timer()
95
96 rusanov_sycl_accelerator_tree = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov2d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov", "sycl"], stateless=True, use_accelerator=True)
97 rusanov_sycl_accelerator_kernel = rusanov_sycl_accelerator_tree.print_sycl()
98 rusanov_sycl_accelerator_call_with_measurement = rusanov_sycl_accelerator_tree.print_definition_with_timer()
99 rusanov_sycl_accelerator_kernel_declaration = rusanov_sycl_accelerator_tree.print_declaration()
100 rusanov_sycl_accelerator_call_with_measurement_declaration = rusanov_sycl_accelerator_tree.print_declaration_with_timer()
101
102 if not os.path.exists("kernels"):
103 os.makedirs("kernels")
104
105 file = open("kernels/rusanov2d.h", "w")
106 file.write(f"""#pragma once
107#include "exahype2/CellData.h"
108#include "exahype2/VolumeIndex.h"
109#include "exahype2/fv/PatchUtils.h"
110#include "exahype2/fv/rusanov/Functors.h"
111#include "peano4/utils/Loop.h"
112#include "tarch/timing/Measurement.h"
113#include "tarch/timing/Watch.h"
114#include <fstream>
115
116{rusanov_kernel_declaration}
117{rusanov_call_with_measurement_declaration}
118
119{rusanov_stateless_kernel_declaration}
120{rusanov_stateless_call_with_measurement_declaration}
121
122#if defined(SharedOMP)
123{rusanov_omp_kernel_declaration}
124{rusanov_omp_call_with_measurement_declaration}
125{rusanov_omp_stateless_kernel_declaration}
126{rusanov_omp_stateless_call_with_measurement_declaration}
127#endif
128
129#if defined(GPUOffloadingOMP)
130{rusanov_omp_accelerator_kernel_declaration}
131{rusanov_omp_accelerator_call_with_measurement_declaration}
132#endif
133
134#if defined(GPUOffloadingSYCL)
135{rusanov_sycl_accelerator_kernel_declaration}
136{rusanov_sycl_accelerator_call_with_measurement_declaration}
137#endif
138
139#include "rusanov2d.cpph"
140""")
141 file.close()
142
143 file = open("kernels/rusanov2d.cpph", "w")
144 file.write(rusanov_kernel)
145 file.write(rusanov_call_with_measurement)
146 file.write(rusanov_stateless_kernel)
147 file.write(rusanov_stateless_call_with_measurement)
148
149 file.write("#if defined(SharedOMP)\n")
150 file.write(rusanov_omp_kernel)
151 file.write(rusanov_omp_call_with_measurement)
152 file.write(rusanov_omp_stateless_kernel)
153 file.write(rusanov_omp_stateless_call_with_measurement)
154 file.write("#endif\n")
155
156 file.write("#if defined(GPUOffloadingOMP)\n")
157 file.write(rusanov_omp_accelerator_kernel)
158 file.write(rusanov_omp_accelerator_call_with_measurement)
159 file.write("#endif\n")
160
161 file.write("#if defined(GPUOffloadingSYCL)\n")
162 file.write(rusanov_sycl_accelerator_kernel)
163 file.write(rusanov_sycl_accelerator_call_with_measurement)
164 file.write("#endif\n")
165 file.close()
166
168 template_parameters = [DSL.SyntaxTree.Argument("NumberOfVolumesPerAxisInPatch", DSL.SyntaxTree.TInteger()),
169 DSL.SyntaxTree.Argument("HaloSize", DSL.SyntaxTree.TInteger()),
170 DSL.SyntaxTree.Argument("NumberOfUnknowns", DSL.SyntaxTree.TInteger()),
171 DSL.SyntaxTree.Argument("NumberOfAuxiliaryVariables", DSL.SyntaxTree.TInteger()),
172 DSL.SyntaxTree.Argument("EvaluateFlux", DSL.SyntaxTree.TBoolean()),
173 DSL.SyntaxTree.Argument("EvaluateNonconservativeProduct", DSL.SyntaxTree.TBoolean()),
174 DSL.SyntaxTree.Argument("EvaluateSource", DSL.SyntaxTree.TBoolean()),
175 DSL.SyntaxTree.Argument("EvaluateMaximumEigenvalueAfterTimeStep", DSL.SyntaxTree.TBoolean())]
176
177 functor_arguments = [DSL.SyntaxTree.Argument("flux", DSL.SyntaxTree.TCustom("const FluxFunctor&")),
178 DSL.SyntaxTree.Argument("nonconservativeProduct", DSL.SyntaxTree.TCustom("const NonconservativeProductFunctor&")),
179 DSL.SyntaxTree.Argument("sourceTerm", DSL.SyntaxTree.TCustom("const SourceFunctor&")),
180 DSL.SyntaxTree.Argument("maxEigenvalue", DSL.SyntaxTree.TCustom("const MaxEigenvalueFunctor&"))]
181
182 rusanov_tree = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov3d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov"])
183 rusanov_kernel = rusanov_tree.print_cpp()
184 rusanov_call_with_measurement = rusanov_tree.print_definition_with_timer()
185 rusanov_kernel_declaration = rusanov_tree.print_declaration()
186 rusanov_call_with_measurement_declaration = rusanov_tree.print_declaration_with_timer()
187
188 rusanov_stateless_tree = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov3d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov"], stateless=True)
189 rusanov_stateless_kernel = rusanov_stateless_tree.print_cpp()
190 rusanov_stateless_call_with_measurement = rusanov_stateless_tree.print_definition_with_timer()
191 rusanov_stateless_kernel_declaration = rusanov_stateless_tree.print_declaration()
192 rusanov_stateless_call_with_measurement_declaration = rusanov_stateless_tree.print_declaration_with_timer()
193
194 rusanov_accelerator_kernel = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov3d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov"], True, True).print_cpp()
195 rusanov_accelerator_call_with_measurement = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov3d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov"], True, True).print_definition_with_timer()
196 rusanov_accelerator_kernel_declaration = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov3d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov"], True, True).print_declaration()
197 rusanov_accelerator_call_with_measurement_declaration = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov3d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov"], True, True).print_declaration_with_timer()
198
199 rusanov_omp_tree = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov3d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov", "omp"])
200 rusanov_omp_kernel = rusanov_omp_tree.print_omp()
201 rusanov_omp_call_with_measurement = rusanov_omp_tree.print_definition_with_timer()
202 rusanov_omp_kernel_declaration = rusanov_omp_tree.print_declaration()
203 rusanov_omp_call_with_measurement_declaration = rusanov_omp_tree.print_declaration_with_timer()
204
205 rusanov_omp_stateless_tree = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov3d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov", "omp"], stateless=True)
206 rusanov_omp_stateless_kernel = rusanov_omp_stateless_tree.print_omp()
207 rusanov_omp_stateless_call_with_measurement = rusanov_omp_stateless_tree.print_definition_with_timer()
208 rusanov_omp_stateless_kernel_declaration = rusanov_omp_stateless_tree.print_declaration()
209 rusanov_omp_stateless_call_with_measurement_declaration = rusanov_omp_stateless_tree.print_declaration_with_timer()
210
211 rusanov_omp_accelerator_tree = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov3d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov", "omp"], stateless=True, use_accelerator=True)
212 rusanov_omp_accelerator_kernel = rusanov_omp_accelerator_tree.print_omp()
213 rusanov_omp_accelerator_call_with_measurement = rusanov_omp_accelerator_tree.print_definition_with_timer()
214 rusanov_omp_accelerator_kernel_declaration = rusanov_omp_accelerator_tree.print_declaration()
215 rusanov_omp_accelerator_call_with_measurement_declaration = rusanov_omp_accelerator_tree.print_declaration_with_timer()
216
217 rusanov_sycl_accelerator_tree = DSL.Parser().parse(exahype2.solvers.fv.rusanov.timeStepWithRusanov3d, template_parameters, functor_arguments, ["exahype2", "fv", "rusanov", "sycl"], stateless=True, use_accelerator=True)
218 rusanov_sycl_accelerator_kernel = rusanov_sycl_accelerator_tree.print_sycl()
219 rusanov_sycl_accelerator_call_with_measurement = rusanov_sycl_accelerator_tree.print_definition_with_timer()
220 rusanov_sycl_accelerator_kernel_declaration = rusanov_sycl_accelerator_tree.print_declaration()
221 rusanov_sycl_accelerator_call_with_measurement_declaration = rusanov_sycl_accelerator_tree.print_declaration_with_timer()
222
223 if not os.path.exists("kernels"):
224 os.makedirs("kernels")
225
226 file = open("kernels/rusanov3d.h", "w")
227 file.write(f"""#pragma once
228#include "exahype2/CellData.h"
229#include "exahype2/VolumeIndex.h"
230#include "exahype2/fv/PatchUtils.h"
231#include "exahype2/fv/rusanov/Functors.h"
232#include "peano4/utils/Loop.h"
233#include "tarch/timing/Measurement.h"
234#include "tarch/timing/Watch.h"
235#include <fstream>
236
237{rusanov_kernel_declaration}
238{rusanov_call_with_measurement_declaration}
239{rusanov_stateless_kernel_declaration}
240{rusanov_stateless_call_with_measurement_declaration}
241{rusanov_accelerator_kernel_declaration}
242{rusanov_accelerator_call_with_measurement_declaration}
243
244#if defined(SharedOMP)
245{rusanov_omp_kernel_declaration}
246{rusanov_omp_call_with_measurement_declaration}
247{rusanov_omp_stateless_kernel_declaration}
248{rusanov_omp_stateless_call_with_measurement_declaration}
249#endif
250
251#if defined(GPUOffloadingOMP)
252{rusanov_omp_accelerator_kernel_declaration}
253{rusanov_omp_accelerator_call_with_measurement_declaration}
254#endif
255
256#if defined(GPUOffloadingSYCL)
257{rusanov_sycl_accelerator_kernel_declaration}
258{rusanov_sycl_accelerator_call_with_measurement_declaration}
259#endif
260
261#include "rusanov3d.cpph"
262""")
263 file.close()
264
265 file = open("kernels/rusanov3d.cpph", "w")
266 file.write(rusanov_kernel)
267 file.write(rusanov_call_with_measurement)
268 file.write(rusanov_stateless_kernel)
269 file.write(rusanov_stateless_call_with_measurement)
270 file.write(rusanov_accelerator_kernel)
271 file.write(rusanov_accelerator_call_with_measurement)
272
273 file.write("#if defined(SharedOMP)\n")
274 file.write(rusanov_omp_kernel)
275 file.write(rusanov_omp_call_with_measurement)
276 file.write(rusanov_omp_stateless_kernel)
277 file.write(rusanov_omp_stateless_call_with_measurement)
278 file.write("#endif\n")
279
280 file.write("#if defined(GPUOffloadingOMP)\n")
281 file.write(rusanov_omp_accelerator_kernel)
282 file.write(rusanov_omp_accelerator_call_with_measurement)
283 file.write("#endif\n")
284
285 file.write("#if defined(GPUOffloadingSYCL)\n")
286 file.write(rusanov_sycl_accelerator_kernel)
287 file.write(rusanov_sycl_accelerator_call_with_measurement)
288 file.write("#endif\n")
289
290 file.close()
291
293 ncp_implementation,
294 source_implementation,
295 compute_max_eigenvalue_of_next_time_step,
296 solver_variant: SolverVariant,
297 kernel_variant: KernelVariant):
298 """
299 Return only the unqualified function call, i.e., without any namespaces.
300 So by setting the right namespace as prefix, you can direct it to particular
301 implementations.
302 """
303
304 template = "timeStepWithRusanov"
305
306 if solver_variant == SolverVariant.WithVirtualFunctions:
307 template += """<
308 {{NUMBER_OF_VOLUMES_PER_AXIS}},
309 {{HALO_SIZE}},
310 {{NUMBER_OF_UNKNOWNS}},
311 {{NUMBER_OF_AUXILIARY_VARIABLES}},
312 {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
313 {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
314 {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
315 {% if COMPUTE_MAX_EIGENVALUE==False %} false {% else %} true {% endif %}
316 >(patchData,
317 [&](
318 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
319 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
320 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
321 [[maybe_unused]] double t,
322 [[maybe_unused]] double dt,
323 [[maybe_unused]] int normal,
324 [[maybe_unused]] double* __restrict__ F // F[{{NUMBER_OF_UNKNOWNS}}]
325 )->void {
326 {% if FLUX_IMPLEMENTATION!="<none>" %}
327 repositories::{{SOLVER_INSTANCE}}.flux(Q, faceCentre, volumeH, t, dt, normal, F);
328 {% endif %}
329 },
330 [&](
331 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
332 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
333 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
334 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
335 [[maybe_unused]] double t,
336 [[maybe_unused]] double dt,
337 [[maybe_unused]] int normal,
338 [[maybe_unused]] double* __restrict__ BTimesDeltaQ // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
339 )->void {
340 {% if NCP_IMPLEMENTATION!="<none>" %}
341 repositories::{{SOLVER_INSTANCE}}.nonconservativeProduct(Q, deltaQ, faceCentre, volumeH, t, dt, normal, BTimesDeltaQ);
342 {% endif %}
343 },
344 [&](
345 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
346 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeX,
347 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
348 [[maybe_unused]] double t,
349 [[maybe_unused]] double dt,
350 [[maybe_unused]] double* __restrict__ S // S[{{NUMBER_OF_UNKNOWNS}}]
351 )->void {
352 {% if SOURCE_IMPLEMENTATION!="<none>" %}
353 repositories::{{SOLVER_INSTANCE}}.sourceTerm(Q, volumeX, volumeH, t, dt, S);
354 {% endif %}
355 },
356 [&](
357 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
358 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
359 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
360 [[maybe_unused]] double t,
361 [[maybe_unused]] double dt,
362 [[maybe_unused]] int normal,
363 [[maybe_unused]] double* __restrict__ maxEigenvalue
364 )->void {
365 repositories::{{SOLVER_INSTANCE}}.maxEigenvalue(Q, faceCentre, volumeH, t, dt, normal, maxEigenvalue);
366 }
367);
368 """
369 elif solver_variant == SolverVariant.Stateless:
370 template += """Stateless<
371 {{SOLVER_NAME}},
372 {{NUMBER_OF_VOLUMES_PER_AXIS}},
373 {{HALO_SIZE}},
374 {{NUMBER_OF_UNKNOWNS}},
375 {{NUMBER_OF_AUXILIARY_VARIABLES}},
376 {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
377 {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
378 {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
379 {% if COMPUTE_MAX_EIGENVALUE==False %} false {% else %} true {% endif %}
380 >(patchData);
381 """
382 elif solver_variant == SolverVariant.Accelerator:
383 template += """Stateless<
384 {{SOLVER_NAME}},
385 {{NUMBER_OF_VOLUMES_PER_AXIS}},
386 {{HALO_SIZE}},
387 {{NUMBER_OF_UNKNOWNS}},
388 {{NUMBER_OF_AUXILIARY_VARIABLES}},
389 {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
390 {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
391 {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
392 {% if COMPUTE_MAX_EIGENVALUE==False %} false {% else %} true {% endif %}
393 >(targetDevice, gpuPatchData);
394"""
395 else:
396 assert False, "Not supported combination: {} x {}".format(solver_variant, kernel_variant)
397
398 result = jinja2.Template( template, undefined=jinja2.DebugUndefined)
399 d= {}
400 d["FLUX_IMPLEMENTATION"] = flux_implementation
401 d["NCP_IMPLEMENTATION"] = ncp_implementation
402 d["SOURCE_IMPLEMENTATION"] = source_implementation
403 d["COMPUTE_MAX_EIGENVALUE"] = compute_max_eigenvalue_of_next_time_step
404 return result.render(**d)
405
406
408 ncp_implementation,
409 eigenvalues_implementation,
410 source_term_implementation,
411 pde_terms_without_state):
412 Template = jinja2.Template( """
413 public:
414 {% if EIGENVALUES_IMPLEMENTATION=="<none>" %}
415 #error eigenvalue implementation cannot be none
416 {% endif %}
417
418 {% if EIGENVALUES_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
419 /**
420 * Depending on the implementation, this variant might be slow as it
421 * lacks an inline define. Also, if you don't want to use ipo aggressively,
422 * it might be clever to put the implementation into the header.
423 *
424 * ## SYCL
425 *
426 * At the moment, SYCL seems to struggle with ipo, even if a function is
427 * never called. So I embed the (empty) implementation directly into the
428 * header.
429 */
430 #if defined(GPUOffloadingOMP)
431 #pragma omp declare target
432 #endif
433 static GPUCallableMethod void maxEigenvalue(
434 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
435 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
436 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
437 [[maybe_unused]] double t,
438 [[maybe_unused]] double dt,
439 [[maybe_unused]] int normal,
440 [[maybe_unused]] double* __restrict__ maxEigenvalue,
441 Offloadable
442 )
443 //#if defined(GPUOffloadingSYCL)
444 //{}
445 //#else
446 ;
447 //#endif
448 #if defined(GPUOffloadingOMP)
449 #pragma omp end declare target
450 #endif
451 {% endif %}
452
453 /**
454 * Determine max eigenvalue over Jacobian in a given point with solution values
455 * (states) Q. All parameters are in.
456 *
457 * @return Max eigenvalue. Result has to be positive, so we are actually speaking
458 * about the maximum absolute eigenvalue.
459 */
460 virtual void maxEigenvalue(
461 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
462 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
463 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
464 [[maybe_unused]] double t,
465 [[maybe_unused]] double dt,
466 [[maybe_unused]] int normal,
467 [[maybe_unused]] double* __restrict__ maxEigenvalue
468 ) {% if EIGENVALUES_IMPLEMENTATION=="<user-defined>" %}= 0{% else %} final{% endif %};
469
470 {% if FLUX_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
471 /**
472 * Depending on the implementation, this variant might be slow as it
473 * lacks an inline define. Also, if you don't want to use ipo aggressively,
474 * it might be clever to put the implementation into the header.
475 *
476 * ## SYCL
477 *
478 * At the moment, SYCL seems to struggle with ipo, even if a function is
479 * never called. So I embed the (empty) implementation directly into the
480 * header.
481 */
482 #if defined(GPUOffloadingOMP)
483 #pragma omp declare target
484 #endif
485 static GPUCallableMethod void flux(
486 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
487 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
488 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
489 [[maybe_unused]] double t,
490 [[maybe_unused]] double dt,
491 [[maybe_unused]] int normal,
492 [[maybe_unused]] double* __restrict__ F, // F[{{NUMBER_OF_UNKNOWNS}}]
493 Offloadable
494 );
495 #if defined(GPUOffloadingOMP)
496 #pragma omp end declare target
497 #endif
498 {% endif %}
499
500 {% if FLUX_IMPLEMENTATION!="<none>" %}
501 virtual void flux(
502 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
503 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
504 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
505 [[maybe_unused]] double t,
506 [[maybe_unused]] double dt,
507 [[maybe_unused]] int normal,
508 [[maybe_unused]] double* __restrict__ F // F[{{NUMBER_OF_UNKNOWNS}}]
509 ) {% if FLUX_IMPLEMENTATION=="<user-defined>" %}=0{% else %} final {% endif %};
510 {% endif %}
511
512 {% if NCP_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
513 /**
514 * Depending on the implementation, this variant might be slow as it
515 * lacks an inline define. Also, if you don't want to use ipo aggressively,
516 * it might be clever to put the implementation into the header.
517 *
518 * ## SYCL
519 *
520 * At the moment, SYCL seems to struggle with ipo, even if a function is
521 * never called. So I embed the (empty) implementation directly into the
522 * header.
523 */
524 #if defined(GPUOffloadingOMP)
525 #pragma omp declare target
526 #endif
527 static GPUCallableMethod void nonconservativeProduct(
528 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
529 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
530 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
531 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
532 [[maybe_unused]] double t,
533 [[maybe_unused]] double dt,
534 [[maybe_unused]] int normal,
535 [[maybe_unused]] double* __restrict__ BTimesDeltaQ, // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
536 Offloadable
537 );
538 #if defined(GPUOffloadingOMP)
539 #pragma omp end declare target
540 #endif
541 {% endif %}
542
543 {% if NCP_IMPLEMENTATION!="<none>" %}
544 virtual void nonconservativeProduct(
545 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
546 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
547 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
548 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
549 [[maybe_unused]] double t,
550 [[maybe_unused]] double dt,
551 [[maybe_unused]] int normal,
552 [[maybe_unused]] double* __restrict__ BTimesDeltaQ // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
553 ) {% if NCP_IMPLEMENTATION=="<user-defined>" %}=0{% endif %};
554 {% endif %}
555
556 {% if SOURCE_TERM_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
557 /**
558 * Depending on the implementation, this variant might be slow as it
559 * lacks an inline define. Also, if you don't want to use ipo aggressively,
560 * it might be clever to put the implementation into the header.
561 *
562 * ## SYCL
563 *
564 * At the moment, SYCL seems to struggle with ipo, even if a function is
565 * never called. So I embed the (empty) implementation directly into the
566 * header.
567 */
568 #if defined(GPUOffloadingOMP)
569 #pragma omp declare target
570 #endif
571 static GPUCallableMethod void sourceTerm(
572 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
573 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeCentre,
574 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
575 [[maybe_unused]] double t,
576 [[maybe_unused]] double dt,
577 [[maybe_unused]] double* __restrict__ S, // S[{{NUMBER_OF_UNKNOWNS}}]
578 Offloadable
579 );
580 #if defined(GPUOffloadingOMP)
581 #pragma omp end declare target
582 #endif
583 {% endif %}
584
585 {% if SOURCE_TERM_IMPLEMENTATION!="<none>" %}
586 virtual void sourceTerm(
587 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
588 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeCentre,
589 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
590 [[maybe_unused]] double t,
591 [[maybe_unused]] double dt,
592 [[maybe_unused]] double* __restrict__ S // S[{{NUMBER_OF_UNKNOWNS}}]
593 ) {% if SOURCE_TERM_IMPLEMENTATION=="<user-defined>" %}= 0{% else %} final {% endif %};
594 {% endif %}
595""", undefined=jinja2.DebugUndefined)
596
597 d= {}
598 d[ "FLUX_IMPLEMENTATION"] = flux_implementation
599 d[ "NCP_IMPLEMENTATION"] = ncp_implementation
600 d[ "EIGENVALUES_IMPLEMENTATION"] = eigenvalues_implementation
601 d[ "SOURCE_TERM_IMPLEMENTATION"] = source_term_implementation
602 d[ "STATELESS_PDE_TERMS"] = pde_terms_without_state
603 return Template.render(**d)
604
605def create_abstract_solver_definitions(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state):
606 Template = jinja2.Template( """
607{% if EIGENVALUES_IMPLEMENTATION!="<user-defined>" and EIGENVALUES_IMPLEMENTATION!="<none>" %}
608void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::maxEigenvalue(
609 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
610 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
611 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
612 [[maybe_unused]] double t,
613 [[maybe_unused]] double dt,
614 [[maybe_unused]] int normal,
615 [[maybe_unused]] double* __restrict__ maxEigenvalue
616) {
617 {{EIGENVALUES_IMPLEMENTATION}}
618}
619{% endif %}
620
621{% if FLUX_IMPLEMENTATION!="<none>" and FLUX_IMPLEMENTATION!="<user-defined>" %}
622void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::flux(
623 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
624 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
625 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
626 [[maybe_unused]] double t,
627 [[maybe_unused]] double dt,
628 [[maybe_unused]] int normal,
629 [[maybe_unused]] double* __restrict__ F // F[{{NUMBER_OF_UNKNOWNS}}]
630) {
631 {{FLUX_IMPLEMENTATION}}
632}
633{% endif %}
634
635{% if NCP_IMPLEMENTATION!="<none>" and NCP_IMPLEMENTATION!="<user-defined>" %}
636void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::nonconservativeProduct(
637 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
638 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
639 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
640 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
641 [[maybe_unused]] double t,
642 [[maybe_unused]] double dt,
643 [[maybe_unused]] int normal,
644 [[maybe_unused]] double* __restrict__ BTimesDeltaQ // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
645) {
646 {{NCP_IMPLEMENTATION}}
647}
648{% endif %}
649
650{% if SOURCE_TERM_IMPLEMENTATION!="<user-defined>" and SOURCE_TERM_IMPLEMENTATION!="<none>" %}
651//#if !defined(GPUOffloadingSYCL)
652void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::sourceTerm(
653 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
654 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeCentre,
655 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
656 [[maybe_unused]] double t,
657 [[maybe_unused]] double dt,
658 [[maybe_unused]] double* __restrict__ S // S[{{NUMBER_OF_UNKNOWNS}}]
659) {
660 {% if SOURCE_TERM_IMPLEMENTATION!="<empty>" %}
661 {{SOURCE_TERM_IMPLEMENTATION}}
662 {% else %}
663 std::fill_n(S,{{NUMBER_OF_UNKNOWNS}},0.0);
664 {% endif %}
665}
666//#endif
667{% endif %}
668
669{% if EIGENVALUES_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
670#if defined(GPUOffloadingOMP)
671#pragma omp declare target
672#endif
673//#if !defined(GPUOffloadingSYCL)
674GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::maxEigenvalue(
675 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
676 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
677 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
678 [[maybe_unused]] double t,
679 [[maybe_unused]] double dt,
680 [[maybe_unused]] int normal,
681 [[maybe_unused]] double* __restrict__ maxEigenvalue,
682 Offloadable
683) {
684 {{EIGENVALUES_IMPLEMENTATION}};
685}
686//#endif
687#if defined(GPUOffloadingOMP)
688#pragma omp end declare target
689#endif
690{% endif %}
691
692
693{% if FLUX_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
694#if defined(GPUOffloadingOMP)
695#pragma omp declare target
696#endif
697//#if !defined(GPUOffloadingSYCL)
698GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::flux(
699 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
700 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
701 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
702 [[maybe_unused]] double t,
703 [[maybe_unused]] double dt,
704 [[maybe_unused]] int normal,
705 [[maybe_unused]] double* __restrict__ F, // F[{{NUMBER_OF_UNKNOWNS}}]
706 Offloadable
707) {
708 {% if FLUX_IMPLEMENTATION=="<none>" %}
709 tarch::gpuAbort();
710 {% else %}
711 {{FLUX_IMPLEMENTATION}}
712 {% endif %}
713}
714//#endif
715#if defined(GPUOffloadingOMP)
716#pragma omp end declare target
717#endif
718{% endif %}
719
720{% if NCP_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
721#if defined(GPUOffloadingOMP)
722#pragma omp declare target
723#endif
724GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::nonconservativeProduct(
725 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
726 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
727 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
728 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
729 [[maybe_unused]] double t,
730 [[maybe_unused]] double dt,
731 [[maybe_unused]] int normal,
732 [[maybe_unused]] double* __restrict__ BTimesDeltaQ, // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
733 [[maybe_unused]] Offloadable
734) {
735 {% if NCP_IMPLEMENTATION=="<none>" %}
736 tarch::gpuAbort();
737 {% else %}
738 {{NCP_IMPLEMENTATION}}
739 {% endif %}
740}
741#if defined(GPUOffloadingOMP)
742#pragma omp end declare target
743#endif
744{% endif %}
745
746{% if SOURCE_TERM_IMPLEMENTATION!="<user-defined>" and STATELESS_PDE_TERMS %}
747#if defined(GPUOffloadingOMP)
748#pragma omp declare target
749#endif
750GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::sourceTerm(
751 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
752 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeCentre,
753 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
754 [[maybe_unused]] double t,
755 [[maybe_unused]] double dt,
756 [[maybe_unused]] double* __restrict__ S, // S[{{NUMBER_OF_UNKNOWNS}}]
757 [[maybe_unused]] Offloadable
758) {
759 {% if SOURCE_TERM_IMPLEMENTATION=="<none>" %}
760 tarch::gpuAbort();
761 {% else %}
762 {{SOURCE_TERM_IMPLEMENTATION}}
763 {% endif %}
764}
765#if defined(GPUOffloadingOMP)
766#pragma omp end declare target
767#endif
768{% endif %}
769""", undefined=jinja2.DebugUndefined)
770
771 d= {}
772 d[ "FLUX_IMPLEMENTATION"] = flux_implementation
773 d[ "NCP_IMPLEMENTATION"] = ncp_implementation
774 d[ "EIGENVALUES_IMPLEMENTATION"] = eigenvalues_implementation
775 d[ "SOURCE_TERM_IMPLEMENTATION"] = source_term_implementation
776 d[ "STATELESS_PDE_TERMS"] = pde_terms_without_state
777 return Template.render(**d)
778
779def create_solver_declarations(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state):
780 Template = jinja2.Template( """
781 public:
782 {% if SOURCE_TERM_IMPLEMENTATION=="<user-defined>" %}
783 virtual void sourceTerm(
784 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
785 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeCentre,
786 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
787 [[maybe_unused]] double t,
788 [[maybe_unused]] double dt,
789 [[maybe_unused]] double* __restrict__ S // S[{{NUMBER_OF_UNKNOWNS}}]
790 ) override;
791 {% endif %}
792
793 {% if EIGENVALUES_IMPLEMENTATION=="<user-defined>" %}
794 /**
795 * Determine max eigenvalue over Jacobian in a given point with solution values
796 * (states) Q. All parameters are in.
797 *
798 * @return Max eigenvalue. Result has to be positive, so we are actually speaking
799 * about the maximum absolute eigenvalue.
800 */
801 virtual void maxEigenvalue(
802 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
803 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
804 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
805 [[maybe_unused]] double t,
806 [[maybe_unused]] double dt,
807 [[maybe_unused]] int normal,
808 [[maybe_unused]] double* __restrict__ maxEigenvalue
809 ) override;
810 {% endif %}
811
812 {% if FLUX_IMPLEMENTATION=="<user-defined>" %}
813 virtual void flux(
814 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
815 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
816 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
817 [[maybe_unused]] double t,
818 [[maybe_unused]] double dt,
819 [[maybe_unused]] int normal,
820 [[maybe_unused]] double* __restrict__ F // F[{{NUMBER_OF_UNKNOWNS}}]
821 ) override;
822 {% endif %}
823
824 {% if NCP_IMPLEMENTATION=="<user-defined>" %}
825 virtual void nonconservativeProduct(
826 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
827 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
828 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
829 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
830 [[maybe_unused]] double t,
831 [[maybe_unused]] double dt,
832 [[maybe_unused]] int normal,
833 [[maybe_unused]] double* __restrict__ BTimesDeltaQ // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
834 ) override;
835 {% endif %}
836
837 {% if STATELESS_PDE_TERMS and SOURCE_TERM_IMPLEMENTATION=="<user-defined>" %}
838 /**
839 * To obtain the best performance, I recommend to man inline command to
840 * this signature and to copy the implementation into the header. So it would
841 * read
842 *
843 * static inline void sourceTerm( ... ) {
844 * code here
845 * }
846 *
847 * The GPU offloading requires static functions. As we cannot overload the
848 * original (virtual) function with a static alternative, we do the
849 * TBB trick and overload by adding an additional enum. It has no semantics
850 * but helps the compiler to distinguish the different function variants.
851 */
852 #if defined(GPUOffloadingOMP)
853 #pragma omp declare target
854 #endif
855 static GPUCallableMethod void sourceTerm(
856 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
857 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeCentre,
858 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
859 [[maybe_unused]] double t,
860 [[maybe_unused]] double dt,
861 [[maybe_unused]] double* __restrict__ S, // S[{{NUMBER_OF_UNKNOWNS}}]
862 [[maybe_unused]] Offloadable
863 );
864 #if defined(GPUOffloadingOMP)
865 #pragma omp end declare target
866 #endif
867 {% endif %}
868
869 {% if EIGENVALUES_IMPLEMENTATION=="<user-defined>" and STATELESS_PDE_TERMS %}
870 #if defined(GPUOffloadingOMP)
871 #pragma omp declare target
872 #endif
873 static GPUCallableMethod void maxEigenvalue(
874 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
875 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
876 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
877 [[maybe_unused]] double t,
878 [[maybe_unused]] double dt,
879 [[maybe_unused]] int normal,
880 [[maybe_unused]] double* __restrict__ maxEigenvalue,
881 [[maybe_unused]] Offloadable
882 );
883 #if defined(GPUOffloadingOMP)
884 #pragma omp end declare target
885 #endif
886 {% endif %}
887
888 {% if STATELESS_PDE_TERMS and FLUX_IMPLEMENTATION=="<user-defined>" %}
889 /**
890 * To obtain the best performance, I recommend to man inline command to
891 * this signature and to copy the implementation into the header. So it would
892 * read
893 *
894 * static inline void flux( ... ) {
895 * code here
896 * }
897 *
898 * The GPU offloading requires static functions. As we cannot overload the
899 * original (virtual) function with a static alternative, we do the
900 * TBB trick and overload by adding an additional enum. It has no semantics
901 * but helps the compiler to distinguish the different function variants.
902 */
903 #if defined(GPUOffloadingOMP)
904 #pragma omp declare target
905 #endif
906 static GPUCallableMethod void flux(
907 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
908 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
909 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
910 [[maybe_unused]] double t,
911 [[maybe_unused]] double dt,
912 [[maybe_unused]] int normal,
913 [[maybe_unused]] double* __restrict__ F, // F[{{NUMBER_OF_UNKNOWNS}}]
914 [[maybe_unused]] Offloadable
915 );
916 #if defined(GPUOffloadingOMP)
917 #pragma omp end declare target
918 #endif
919 {% endif %}
920
921 {% if STATELESS_PDE_TERMS and NCP_IMPLEMENTATION=="<user-defined>" %}
922 /**
923 * To obtain the best performance, I recommend to man inline command to
924 * this signature and to copy the implementation into the header. So it would
925 * read
926 *
927 * static inline void nonconservativeProduct( ... ) {
928 * code here
929 * }
930 *
931 * The GPU offloading requires static functions. As we cannot overload the
932 * original (virtual) function with a static alternative, we do the
933 * TBB trick and overload by adding an additional enum. It has no semantics
934 * but helps the compiler to distinguish the different function variants.
935 */
936 #if defined(GPUOffloadingOMP)
937 #pragma omp declare target
938 #endif
939 static GPUCallableMethod void nonconservativeProduct(
940 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
941 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
942 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
943 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
944 [[maybe_unused]] double t,
945 [[maybe_unused]] double dt,
946 [[maybe_unused]] int normal,
947 [[maybe_unused]] double* __restrict__ BTimesDeltaQ, // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
948 [[maybe_unused]] Offloadable
949 );
950 #if defined(GPUOffloadingOMP)
951 #pragma omp end declare target
952 #endif
953 {% endif %}
954""", undefined=jinja2.DebugUndefined)
955 d= {}
956 d[ "FLUX_IMPLEMENTATION"] = flux_implementation
957 d[ "NCP_IMPLEMENTATION"] = ncp_implementation
958 d[ "EIGENVALUES_IMPLEMENTATION"] = eigenvalues_implementation
959 d[ "SOURCE_TERM_IMPLEMENTATION"] = source_term_implementation
960 d[ "STATELESS_PDE_TERMS"] = pde_terms_without_state
961 return Template.render(**d)
962
963def create_solver_definitions(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state):
964 Template = jinja2.Template( """
965{% if EIGENVALUES_IMPLEMENTATION=="<user-defined>" %}
966void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::maxEigenvalue(
967 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
968 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
969 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
970 [[maybe_unused]] double t,
971 [[maybe_unused]] double dt,
972 [[maybe_unused]] int normal,
973 [[maybe_unused]] double* __restrict__ maxEigenvalue
974) {
975 logTraceInWith4Arguments( "maxEigenvalue(...)", faceCentre, volumeH, t, normal );
976 // @todo implement
977 logTraceOut( "maxEigenvalue(...)" );
978}
979{% endif %}
980
981{% if FLUX_IMPLEMENTATION=="<user-defined>" %}
982void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::flux(
983 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
984 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
985 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
986 [[maybe_unused]] double t,
987 [[maybe_unused]] double dt,
988 [[maybe_unused]] int normal,
989 [[maybe_unused]] double* __restrict__ F // F[{{NUMBER_OF_UNKNOWNS}}]
990) {
991 logTraceInWith4Arguments("flux(...)", faceCentre, volumeH, t, normal);
992 // @todo Implement
993 logTraceOut("flux(...)");
994}
995{% endif %}
996
997{% if NCP_IMPLEMENTATION=="<user-defined>" %}
998void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::nonconservativeProduct(
999 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
1000 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
1001 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
1002 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
1003 [[maybe_unused]] double t,
1004 [[maybe_unused]] double dt,
1005 [[maybe_unused]] int normal,
1006 [[maybe_unused]] double* __restrict__ BTimesDeltaQ // BTimesDeltaQQ[{{NUMBER_OF_UNKNOWNS}}]
1007) {
1008 logTraceInWith4Arguments("nonconservativeProduct(...)", faceCentre, volumeH, t, normal);
1009 // @todo Implement
1010 logTraceOut("nonconservativeProduct(...)");
1011}
1012{% endif %}
1013
1014{% if SOURCE_TERM_IMPLEMENTATION=="<user-defined>" %}
1015void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::sourceTerm(
1016 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
1017 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeX,
1018 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
1019 [[maybe_unused]] double t,
1020 [[maybe_unused]] double dt,
1021 [[maybe_unused]] double* __restrict__ S // S[{{NUMBER_OF_UNKNOWNS}}]
1022) {
1023 logTraceInWith4Arguments("sourceTerm(...)", volumeX, volumeH, t, dt);
1024
1025 // @todo Implement and ensure that all entries of S are properly set
1026 for (int i = 0; i < NumberOfUnknowns; i++) {
1027 S[i] = 0.0;
1028 }
1029
1030 logTraceOut("sourceTerm(...)");
1031}
1032{% endif %}
1033
1034
1035{% if EIGENVALUES_IMPLEMENTATION=="<user-defined>" and STATELESS_PDE_TERMS %}
1036#if defined(GPUOffloadingOMP)
1037#pragma omp declare target
1038#endif
1039GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::maxEigenvalue(
1040 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
1041 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
1042 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
1043 [[maybe_unused]] double t,
1044 [[maybe_unused]] double dt,
1045 [[maybe_unused]] int normal,
1046 [[maybe_unused]] double* __restrict__ maxEigenvalue,
1047 Offloadable
1048) {
1049 // @todo implement
1050}
1051#if defined(GPUOffloadingOMP)
1052#pragma omp end declare target
1053#endif
1054{% endif %}
1055
1056{% if FLUX_IMPLEMENTATION=="<user-defined>" and STATELESS_PDE_TERMS %}
1057#if defined(GPUOffloadingOMP)
1058#pragma omp declare target
1059#endif
1060GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::flux(
1061 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
1062 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
1063 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
1064 [[maybe_unused]] double t,
1065 [[maybe_unused]] double dt,
1066 [[maybe_unused]] int normal,
1067 [[maybe_unused]] double* __restrict__ F, // F[{{NUMBER_OF_UNKNOWNS}}]
1068 Offloadable
1069) {
1070 // @todo Implement
1071}
1072#if defined(GPUOffloadingOMP)
1073#pragma omp end declare target
1074#endif
1075{% endif %}
1076
1077{% if NCP_IMPLEMENTATION=="<user-defined>" and STATELESS_PDE_TERMS %}
1078#if defined(GPUOffloadingOMP)
1079#pragma omp declare target
1080#endif
1081GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::nonconservativeProduct(
1082 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
1083 [[maybe_unused]] const double* __restrict__ deltaQ, // deltaQ[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
1084 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& faceCentre,
1085 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
1086 [[maybe_unused]] double t,
1087 [[maybe_unused]] double dt,
1088 [[maybe_unused]] int normal,
1089 [[maybe_unused]] double* __restrict__ BTimesDeltaQ, // BTimesDeltaQ[{{NUMBER_OF_UNKNOWNS}}]
1090 [[maybe_unused]] Offloadable
1091) {
1092 // @todo Implement
1093}
1094#if defined(GPUOffloadingOMP)
1095#pragma omp end declare target
1096#endif
1097{% endif %}
1098
1099{% if SOURCE_TERM_IMPLEMENTATION=="<user-defined>" and STATELESS_PDE_TERMS %}
1100#if defined(GPUOffloadingOMP)
1101#pragma omp declare target
1102#endif
1103GPUCallableMethod void {{FULL_QUALIFIED_NAMESPACE}}::{{CLASSNAME}}::sourceTerm(
1104 [[maybe_unused]] const double* __restrict__ Q, // Q[{{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}]
1105 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeCentre,
1106 [[maybe_unused]] const tarch::la::Vector<Dimensions, double>& volumeH,
1107 [[maybe_unused]] double t,
1108 [[maybe_unused]] double dt,
1109 [[maybe_unused]] double* __restrict__ S, // S[{{NUMBER_OF_UNKNOWNS}}]
1110 [[maybe_unused]] Offloadable
1111) {
1112 // @todo Implement but ensure that all entries of S are properly set
1113 for (int i = 0; i < NumberOfUnknowns; i++) {
1114 S[i] = 0.0;
1115 }
1116}
1117#if defined(GPUOffloadingOMP)
1118#pragma omp end declare target
1119#endif
1120{% endif %}
1121
1122""", undefined=jinja2.DebugUndefined)
1123 d= {}
1124 d[ "FLUX_IMPLEMENTATION"] = flux_implementation
1125 d[ "NCP_IMPLEMENTATION"] = ncp_implementation
1126 d[ "EIGENVALUES_IMPLEMENTATION"] = eigenvalues_implementation
1127 d[ "SOURCE_TERM_IMPLEMENTATION"] = source_term_implementation
1128 d[ "STATELESS_PDE_TERMS"] = pde_terms_without_state
1129 return Template.render(**d)
create_solver_definitions(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state)
Definition kernels.py:963
create_compute_Riemann_kernel_for_Rusanov_dsl(flux_implementation, ncp_implementation, source_implementation, compute_max_eigenvalue_of_next_time_step, SolverVariant solver_variant, KernelVariant kernel_variant)
Return only the unqualified function call, i.e., without any namespaces.
Definition kernels.py:297
create_solver_declarations(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state)
Definition kernels.py:779
create_abstract_solver_definitions(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state)
Definition kernels.py:605
create_abstract_solver_declarations(flux_implementation, ncp_implementation, eigenvalues_implementation, source_term_implementation, pde_terms_without_state)
Definition kernels.py:411