Peano
Loading...
Searching...
No Matches
kernels.py
Go to the documentation of this file.
1# This file is part of the ExaHyPE2 project. For conditions of distribution and
2# use, please see the copyright notice at www.peano-framework.org
3from exahype2.solvers.PDETerms import PDETerms
4
5import jinja2
6
7from enum import Enum
8import os
9import exahype2
10import exahype2.kerneldsl as DSL
11
12from exahype2.solvers.rkfd.kernels import SolverVariant
13from exahype2.solvers.rkfd.kernels import KernelVariant
14
16 template_parameters = [DSL.SyntaxTree.Argument("NC", DSL.SyntaxTree.TInteger()),
17 DSL.SyntaxTree.Argument("H", DSL.SyntaxTree.TInteger()),
18 DSL.SyntaxTree.Argument("NumberOfUnknowns", DSL.SyntaxTree.TInteger()),
19 DSL.SyntaxTree.Argument("NumberOfAuxiliaryVariables", DSL.SyntaxTree.TInteger()),
20 DSL.SyntaxTree.Argument("KOsigma", DSL.SyntaxTree.TInteger()),
21 DSL.SyntaxTree.Argument("EvaluateFlux", DSL.SyntaxTree.TBoolean()),
22 DSL.SyntaxTree.Argument("EvaluateNonconservativeProduct", DSL.SyntaxTree.TBoolean()),
23 DSL.SyntaxTree.Argument("EvaluateSource", DSL.SyntaxTree.TBoolean()),
24 DSL.SyntaxTree.Argument("copyOldTimeStepAndScaleWithTimeStepSize", DSL.SyntaxTree.TBoolean()), #not use actually
25 DSL.SyntaxTree.Argument("SecondOrderFormulation", DSL.SyntaxTree.TBoolean()),
26 DSL.SyntaxTree.Argument("FirstOrderFormulation", DSL.SyntaxTree.TBoolean())]
27
28 functor_arguments = [DSL.SyntaxTree.Argument("flux", DSL.SyntaxTree.TCustom("const Flux&")),
29 DSL.SyntaxTree.Argument("nonconservativeProduct", DSL.SyntaxTree.TCustom("const NonconservativeProduct&")),
30 DSL.SyntaxTree.Argument("sourceTerm", DSL.SyntaxTree.TCustom("const Source&"))]
31
32 FD4InExaGRyPE_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4InExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"])
33 FD4InExaGRyPE_kernel = FD4InExaGRyPE_tree.print_cpp()
34 FD4InExaGRyPE_call_with_measurement = FD4InExaGRyPE_tree.print_definition_with_timer()
35 FD4InExaGRyPE_kernel_declaration = FD4InExaGRyPE_tree.print_declaration()
36 FD4InExaGRyPE_call_with_measurement_declaration = FD4InExaGRyPE_tree.print_declaration_with_timer()
37
38 FD4InExaGRyPE_stateless_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4InExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], stateless=True)
39 FD4InExaGRyPE_stateless_kernel = FD4InExaGRyPE_stateless_tree.print_cpp()
40 FD4InExaGRyPE_stateless_call_with_measurement = FD4InExaGRyPE_stateless_tree.print_definition_with_timer()
41 FD4InExaGRyPE_stateless_kernel_declaration = FD4InExaGRyPE_stateless_tree.print_declaration()
42 FD4InExaGRyPE_stateless_call_with_measurement_declaration = FD4InExaGRyPE_stateless_tree.print_declaration_with_timer()
43
44 #FD4InExaGRyPE_accelerator_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4InExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], True, True).print_cpp()
45 #FD4InExaGRyPE_accelerator_kernel = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4InExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], True, True).print_cpp()
46 #FD4InExaGRyPE_accelerator_call_with_measurement = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4InExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], True, True).print_definition_with_timer()
47 #FD4InExaGRyPE_accelerator_kernel_declaration = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4InExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], True, True).print_declaration()
48 #FD4InExaGRyPE_accelerator_call_with_measurement_declaration = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4InExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], True, True).print_declaration_with_timer()
49
50 FD4InExaGRyPE_omp_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4InExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4", "omp"])
51 FD4InExaGRyPE_omp_kernel = FD4InExaGRyPE_omp_tree.print_omp()
52 FD4InExaGRyPE_omp_call_with_measurement = FD4InExaGRyPE_omp_tree.print_definition_with_timer()
53 FD4InExaGRyPE_omp_kernel_declaration = FD4InExaGRyPE_omp_tree.print_declaration()
54 FD4InExaGRyPE_omp_call_with_measurement_declaration = FD4InExaGRyPE_omp_tree.print_declaration_with_timer()
55
56 FD4InExaGRyPE_omp_stateless_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4InExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4", "omp"], stateless=True)
57 FD4InExaGRyPE_omp_stateless_kernel = FD4InExaGRyPE_omp_stateless_tree.print_omp()
58 FD4InExaGRyPE_omp_stateless_call_with_measurement = FD4InExaGRyPE_omp_stateless_tree.print_definition_with_timer()
59 FD4InExaGRyPE_omp_stateless_kernel_declaration = FD4InExaGRyPE_omp_stateless_tree.print_declaration()
60 FD4InExaGRyPE_omp_stateless_call_with_measurement_declaration = FD4InExaGRyPE_omp_stateless_tree.print_declaration_with_timer()
61
62 FD4InExaGRyPE_omp_accelerator_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4InExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4", "omp"], stateless=True, use_accelerator=True)
63 FD4InExaGRyPE_omp_accelerator_kernel = FD4InExaGRyPE_omp_accelerator_tree.print_omp()
64 FD4InExaGRyPE_omp_accelerator_call_with_measurement = FD4InExaGRyPE_omp_accelerator_tree.print_definition_with_timer()
65 FD4InExaGRyPE_omp_accelerator_kernel_declaration = FD4InExaGRyPE_omp_accelerator_tree.print_declaration()
66 FD4InExaGRyPE_omp_accelerator_call_with_measurement_declaration = FD4InExaGRyPE_omp_accelerator_tree.print_declaration_with_timer()
67
68 FD4InExaGRyPE_sycl_accelerator_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4InExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4", "sycl"], stateless=True, use_accelerator=True)
69 FD4InExaGRyPE_sycl_accelerator_kernel = FD4InExaGRyPE_sycl_accelerator_tree.print_sycl()
70 FD4InExaGRyPE_sycl_accelerator_call_with_measurement = FD4InExaGRyPE_sycl_accelerator_tree.print_definition_with_timer()
71 FD4InExaGRyPE_sycl_accelerator_kernel_declaration = FD4InExaGRyPE_sycl_accelerator_tree.print_declaration()
72 FD4InExaGRyPE_sycl_accelerator_call_with_measurement_declaration = FD4InExaGRyPE_sycl_accelerator_tree.print_declaration_with_timer()
73
74 if not os.path.exists("kernels"):
75 os.makedirs("kernels")
76
77 file = open("kernels/FD4InExaGRyPE.h", "w")
78 file.write(f"""#pragma once
79#include "exahype2/CellData.h"
80#include "exahype2/VolumeIndex.h"
81#include "exahype2/fd/PatchUtils.h"
82#include "exahype2/fd/Functors.h"
83#include "peano4/utils/Loop.h"
84#include "tarch/timing/Measurement.h"
85#include "tarch/timing/Watch.h"
86
87{FD4InExaGRyPE_kernel_declaration}
88{FD4InExaGRyPE_call_with_measurement_declaration}
89{FD4InExaGRyPE_stateless_kernel_declaration}
90{FD4InExaGRyPE_stateless_call_with_measurement_declaration}
91
92#if defined(SharedOMP)
93{FD4InExaGRyPE_omp_kernel_declaration}
94{FD4InExaGRyPE_omp_call_with_measurement_declaration}
95{FD4InExaGRyPE_omp_stateless_kernel_declaration}
96{FD4InExaGRyPE_omp_stateless_call_with_measurement_declaration}
97#endif
98
99#if defined(GPUOffloadingOMP)
100{FD4InExaGRyPE_omp_accelerator_kernel_declaration}
101{FD4InExaGRyPE_omp_accelerator_call_with_measurement_declaration}
102#endif
103
104#if defined(GPUOffloadingSYCL)
105{FD4InExaGRyPE_sycl_accelerator_kernel_declaration}
106{FD4InExaGRyPE_sycl_accelerator_call_with_measurement_declaration}
107#endif
108
109#include "FD4InExaGRyPE.cpph"
110""")
111 file.close()
112
113 file = open("kernels/FD4InExaGRyPE.cpph", "w")
114 file.write(FD4InExaGRyPE_kernel)
115 file.write(FD4InExaGRyPE_call_with_measurement)
116 file.write(FD4InExaGRyPE_stateless_kernel)
117 file.write(FD4InExaGRyPE_stateless_call_with_measurement)
118
119 file.write("#if defined(SharedOMP)\n")
120 file.write(FD4InExaGRyPE_omp_kernel)
121 file.write(FD4InExaGRyPE_omp_call_with_measurement)
122 file.write(FD4InExaGRyPE_omp_stateless_kernel)
123 file.write(FD4InExaGRyPE_omp_stateless_call_with_measurement)
124 file.write("#endif\n")
125
126 file.write("#if defined(GPUOffloadingOMP)\n")
127 file.write(FD4InExaGRyPE_omp_accelerator_kernel)
128 file.write(FD4InExaGRyPE_omp_accelerator_call_with_measurement)
129 file.write("#endif\n")
130
131 file.write("#if defined(GPUOffloadingSYCL)\n")
132 file.write(FD4InExaGRyPE_sycl_accelerator_kernel)
133 file.write(FD4InExaGRyPE_sycl_accelerator_call_with_measurement)
134 file.write("#endif\n")
135 file.close()
136
138 template_parameters = [DSL.SyntaxTree.Argument("NC", DSL.SyntaxTree.TInteger()),
139 DSL.SyntaxTree.Argument("H", DSL.SyntaxTree.TInteger()),
140 DSL.SyntaxTree.Argument("NumberOfUnknowns", DSL.SyntaxTree.TInteger()),
141 DSL.SyntaxTree.Argument("NumberOfAuxiliaryVariables", DSL.SyntaxTree.TInteger()),
142 DSL.SyntaxTree.Argument("SecondOrderFormulation", DSL.SyntaxTree.TBoolean())]
143
144 functor_arguments = []
145
146 DeriCallInExaGRyPE_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.DerivativesCalInExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"])
147 DeriCallInExaGRyPE_kernel = DeriCallInExaGRyPE_tree.print_cpp()
148 DeriCallInExaGRyPE_call_with_measurement = DeriCallInExaGRyPE_tree.print_definition_with_timer()
149 DeriCallInExaGRyPE_kernel_declaration = DeriCallInExaGRyPE_tree.print_declaration()
150 DeriCallInExaGRyPE_call_with_measurement_declaration = DeriCallInExaGRyPE_tree.print_declaration_with_timer()
151
152 DeriCallInExaGRyPE_stateless_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.DerivativesCalInExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], stateless=True)
153 DeriCallInExaGRyPE_stateless_kernel = DeriCallInExaGRyPE_stateless_tree.print_cpp()
154 DeriCallInExaGRyPE_stateless_call_with_measurement = DeriCallInExaGRyPE_stateless_tree.print_definition_with_timer()
155 DeriCallInExaGRyPE_stateless_kernel_declaration = DeriCallInExaGRyPE_stateless_tree.print_declaration()
156 DeriCallInExaGRyPE_stateless_call_with_measurement_declaration = DeriCallInExaGRyPE_stateless_tree.print_declaration_with_timer()
157
158 #DeriCallInExaGRyPE_accelerator_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.DerivativesCalInExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], True, True).print_cpp()
159 #DeriCallInExaGRyPE_accelerator_kernel = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.DerivativesCalInExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], True, True).print_cpp()
160 #DeriCallInExaGRyPE_accelerator_call_with_measurement = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.DerivativesCalInExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], True, True).print_definition_with_timer()
161 #DeriCallInExaGRyPE_accelerator_kernel_declaration = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.DerivativesCalInExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], True, True).print_declaration()
162 #DeriCallInExaGRyPE_accelerator_call_with_measurement_declaration = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.DerivativesCalInExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], True, True).print_declaration_with_timer()
163
164 DeriCallInExaGRyPE_omp_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.DerivativesCalInExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4", "omp"])
165 DeriCallInExaGRyPE_omp_kernel = DeriCallInExaGRyPE_omp_tree.print_omp()
166 DeriCallInExaGRyPE_omp_call_with_measurement = DeriCallInExaGRyPE_omp_tree.print_definition_with_timer()
167 DeriCallInExaGRyPE_omp_kernel_declaration = DeriCallInExaGRyPE_omp_tree.print_declaration()
168 DeriCallInExaGRyPE_omp_call_with_measurement_declaration = DeriCallInExaGRyPE_omp_tree.print_declaration_with_timer()
169
170 DeriCallInExaGRyPE_omp_stateless_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.DerivativesCalInExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4", "omp"], stateless=True)
171 DeriCallInExaGRyPE_omp_stateless_kernel = DeriCallInExaGRyPE_omp_stateless_tree.print_omp()
172 DeriCallInExaGRyPE_omp_stateless_call_with_measurement = DeriCallInExaGRyPE_omp_stateless_tree.print_definition_with_timer()
173 DeriCallInExaGRyPE_omp_stateless_kernel_declaration = DeriCallInExaGRyPE_omp_stateless_tree.print_declaration()
174 DeriCallInExaGRyPE_omp_stateless_call_with_measurement_declaration = DeriCallInExaGRyPE_omp_stateless_tree.print_declaration_with_timer()
175
176 DeriCallInExaGRyPE_omp_accelerator_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.DerivativesCalInExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4", "omp"], stateless=True, use_accelerator=True)
177 DeriCallInExaGRyPE_omp_accelerator_kernel = DeriCallInExaGRyPE_omp_accelerator_tree.print_omp()
178 DeriCallInExaGRyPE_omp_accelerator_call_with_measurement = DeriCallInExaGRyPE_omp_accelerator_tree.print_definition_with_timer()
179 DeriCallInExaGRyPE_omp_accelerator_kernel_declaration = DeriCallInExaGRyPE_omp_accelerator_tree.print_declaration()
180 DeriCallInExaGRyPE_omp_accelerator_call_with_measurement_declaration = DeriCallInExaGRyPE_omp_accelerator_tree.print_declaration_with_timer()
181
182 DeriCallInExaGRyPE_sycl_accelerator_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.DerivativesCalInExaGRyPE, template_parameters, functor_arguments, ["exahype2", "fd", "fd4", "sycl"], stateless=True, use_accelerator=True)
183 DeriCallInExaGRyPE_sycl_accelerator_kernel = DeriCallInExaGRyPE_sycl_accelerator_tree.print_sycl()
184 DeriCallInExaGRyPE_sycl_accelerator_call_with_measurement = DeriCallInExaGRyPE_sycl_accelerator_tree.print_definition_with_timer()
185 DeriCallInExaGRyPE_sycl_accelerator_kernel_declaration = DeriCallInExaGRyPE_sycl_accelerator_tree.print_declaration()
186 DeriCallInExaGRyPE_sycl_accelerator_call_with_measurement_declaration = DeriCallInExaGRyPE_sycl_accelerator_tree.print_declaration_with_timer()
187
188 if not os.path.exists("kernels"):
189 os.makedirs("kernels")
190
191 file = open("kernels/DeriCallInExaGRyPE.h", "w")
192 file.write(f"""#pragma once
193#include "exahype2/CellData.h"
194#include "exahype2/VolumeIndex.h"
195#include "exahype2/fd/PatchUtils.h"
196#include "exahype2/fd/Functors.h"
197#include "peano4/utils/Loop.h"
198#include "tarch/timing/Measurement.h"
199#include "tarch/timing/Watch.h"
200
201{DeriCallInExaGRyPE_kernel_declaration}
202{DeriCallInExaGRyPE_call_with_measurement_declaration}
203{DeriCallInExaGRyPE_stateless_kernel_declaration}
204{DeriCallInExaGRyPE_stateless_call_with_measurement_declaration}
205
206#if defined(SharedOMP)
207{DeriCallInExaGRyPE_omp_kernel_declaration}
208{DeriCallInExaGRyPE_omp_call_with_measurement_declaration}
209{DeriCallInExaGRyPE_omp_stateless_kernel_declaration}
210{DeriCallInExaGRyPE_omp_stateless_call_with_measurement_declaration}
211#endif
212
213#if defined(GPUOffloadingOMP)
214{DeriCallInExaGRyPE_omp_accelerator_kernel_declaration}
215{DeriCallInExaGRyPE_omp_accelerator_call_with_measurement_declaration}
216#endif
217
218#if defined(GPUOffloadingSYCL)
219{DeriCallInExaGRyPE_sycl_accelerator_kernel_declaration}
220{DeriCallInExaGRyPE_sycl_accelerator_call_with_measurement_declaration}
221#endif
222
223#include "DeriCallInExaGRyPE.cpph"
224""")
225 file.close()
226
227 file = open("kernels/DeriCallInExaGRyPE.cpph", "w")
228 file.write(DeriCallInExaGRyPE_kernel)
229 file.write(DeriCallInExaGRyPE_call_with_measurement)
230 file.write(DeriCallInExaGRyPE_stateless_kernel)
231 file.write(DeriCallInExaGRyPE_stateless_call_with_measurement)
232
233 file.write("#if defined(SharedOMP)\n")
234 file.write(DeriCallInExaGRyPE_omp_kernel)
235 file.write(DeriCallInExaGRyPE_omp_call_with_measurement)
236 file.write(DeriCallInExaGRyPE_omp_stateless_kernel)
237 file.write(DeriCallInExaGRyPE_omp_stateless_call_with_measurement)
238 file.write("#endif\n")
239
240 file.write("#if defined(GPUOffloadingOMP)\n")
241 file.write(DeriCallInExaGRyPE_omp_accelerator_kernel)
242 file.write(DeriCallInExaGRyPE_omp_accelerator_call_with_measurement)
243 file.write("#endif\n")
244
245 file.write("#if defined(GPUOffloadingSYCL)\n")
246 file.write(DeriCallInExaGRyPE_sycl_accelerator_kernel)
247 file.write(DeriCallInExaGRyPE_sycl_accelerator_call_with_measurement)
248 file.write("#endif\n")
249 file.close()
250
252 template_parameters = [DSL.SyntaxTree.Argument("NC", DSL.SyntaxTree.TInteger()),
253 DSL.SyntaxTree.Argument("H", DSL.SyntaxTree.TInteger()),
254 DSL.SyntaxTree.Argument("NumberOfUnknowns", DSL.SyntaxTree.TInteger()),
255 DSL.SyntaxTree.Argument("NumberOfAuxiliaryVariables", DSL.SyntaxTree.TInteger()),
256 DSL.SyntaxTree.Argument("KOsigma", DSL.SyntaxTree.TInteger()),
257 DSL.SyntaxTree.Argument("EvaluateFlux", DSL.SyntaxTree.TBoolean()),
258 DSL.SyntaxTree.Argument("EvaluateNonconservativeProduct", DSL.SyntaxTree.TBoolean()),
259 DSL.SyntaxTree.Argument("EvaluateSource", DSL.SyntaxTree.TBoolean()),
260 DSL.SyntaxTree.Argument("copyOldTimeStepAndScaleWithTimeStepSize", DSL.SyntaxTree.TBoolean())] #not use actually
261
262 functor_arguments = [DSL.SyntaxTree.Argument("flux", DSL.SyntaxTree.TCustom("const Flux&")),
263 DSL.SyntaxTree.Argument("nonconservativeProduct", DSL.SyntaxTree.TCustom("const NonconservativeProduct&")),
264 DSL.SyntaxTree.Argument("sourceTerm", DSL.SyntaxTree.TCustom("const Source&"))]
265
266 FD4_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"])
267 FD4_kernel = FD4_tree.print_cpp()
268 FD4_call_with_measurement = FD4_tree.print_definition_with_timer()
269 FD4_kernel_declaration = FD4_tree.print_declaration()
270 FD4_call_with_measurement_declaration = FD4_tree.print_declaration_with_timer()
271
272 FD4_stateless_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], stateless=True)
273 FD4_stateless_kernel = FD4_stateless_tree.print_cpp()
274 FD4_stateless_call_with_measurement = FD4_stateless_tree.print_definition_with_timer()
275 FD4_stateless_kernel_declaration = FD4_stateless_tree.print_declaration()
276 FD4_stateless_call_with_measurement_declaration = FD4_stateless_tree.print_declaration_with_timer()
277
278 #FD4_accelerator_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], True, True).print_cpp()
279 #FD4_accelerator_kernel = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], True, True).print_cpp()
280 #FD4_accelerator_call_with_measurement = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], True, True).print_definition_with_timer()
281 #FD4_accelerator_kernel_declaration = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], True, True).print_declaration()
282 #FD4_accelerator_call_with_measurement_declaration = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4, template_parameters, functor_arguments, ["exahype2", "fd", "fd4"], True, True).print_declaration_with_timer()
283
284 FD4_omp_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4, template_parameters, functor_arguments, ["exahype2", "fd", "fd4", "omp"])
285 FD4_omp_kernel = FD4_omp_tree.print_omp()
286 FD4_omp_call_with_measurement = FD4_omp_tree.print_definition_with_timer()
287 FD4_omp_kernel_declaration = FD4_omp_tree.print_declaration()
288 FD4_omp_call_with_measurement_declaration = FD4_omp_tree.print_declaration_with_timer()
289
290 FD4_omp_stateless_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4, template_parameters, functor_arguments, ["exahype2", "fd", "fd4", "omp"], stateless=True)
291 FD4_omp_stateless_kernel = FD4_omp_stateless_tree.print_omp()
292 FD4_omp_stateless_call_with_measurement = FD4_omp_stateless_tree.print_definition_with_timer()
293 FD4_omp_stateless_kernel_declaration = FD4_omp_stateless_tree.print_declaration()
294 FD4_omp_stateless_call_with_measurement_declaration = FD4_omp_stateless_tree.print_declaration_with_timer()
295
296 FD4_omp_accelerator_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4, template_parameters, functor_arguments, ["exahype2", "fd", "fd4", "omp"], stateless=True, use_accelerator=True)
297 FD4_omp_accelerator_kernel = FD4_omp_accelerator_tree.print_omp()
298 FD4_omp_accelerator_call_with_measurement = FD4_omp_accelerator_tree.print_definition_with_timer()
299 FD4_omp_accelerator_kernel_declaration = FD4_omp_accelerator_tree.print_declaration()
300 FD4_omp_accelerator_call_with_measurement_declaration = FD4_omp_accelerator_tree.print_declaration_with_timer()
301
302 FD4_sycl_accelerator_tree = DSL.Parser().parse(exahype2.solvers.rkfd.fd4.timeStepWithFD4, template_parameters, functor_arguments, ["exahype2", "fd", "fd4", "sycl"], stateless=True, use_accelerator=True)
303 FD4_sycl_accelerator_kernel = FD4_sycl_accelerator_tree.print_sycl()
304 FD4_sycl_accelerator_call_with_measurement = FD4_sycl_accelerator_tree.print_definition_with_timer()
305 FD4_sycl_accelerator_kernel_declaration = FD4_sycl_accelerator_tree.print_declaration()
306 FD4_sycl_accelerator_call_with_measurement_declaration = FD4_sycl_accelerator_tree.print_declaration_with_timer()
307
308 if not os.path.exists("kernels"):
309 os.makedirs("kernels")
310
311 file = open("kernels/FD4.h", "w")
312 file.write(f"""#pragma once
313#include "exahype2/CellData.h"
314#include "exahype2/VolumeIndex.h"
315#include "exahype2/fd/PatchUtils.h"
316#include "exahype2/fd/Functors.h"
317#include "peano4/utils/Loop.h"
318#include "tarch/timing/Measurement.h"
319#include "tarch/timing/Watch.h"
320
321{FD4_kernel_declaration}
322{FD4_call_with_measurement_declaration}
323{FD4_stateless_kernel_declaration}
324{FD4_stateless_call_with_measurement_declaration}
325
326#if defined(SharedOMP)
327{FD4_omp_kernel_declaration}
328{FD4_omp_call_with_measurement_declaration}
329{FD4_omp_stateless_kernel_declaration}
330{FD4_omp_stateless_call_with_measurement_declaration}
331#endif
332
333#if defined(GPUOffloadingOMP)
334{FD4_omp_accelerator_kernel_declaration}
335{FD4_omp_accelerator_call_with_measurement_declaration}
336#endif
337
338#if defined(GPUOffloadingSYCL)
339{FD4_sycl_accelerator_kernel_declaration}
340{FD4_sycl_accelerator_call_with_measurement_declaration}
341#endif
342
343#include "FD4.cpph"
344""")
345 file.close()
346
347 file = open("kernels/FD4.cpph", "w")
348 file.write(FD4_kernel)
349 file.write(FD4_call_with_measurement)
350 file.write(FD4_stateless_kernel)
351 file.write(FD4_stateless_call_with_measurement)
352
353 file.write("#if defined(SharedOMP)\n")
354 file.write(FD4_omp_kernel)
355 file.write(FD4_omp_call_with_measurement)
356 file.write(FD4_omp_stateless_kernel)
357 file.write(FD4_omp_stateless_call_with_measurement)
358 file.write("#endif\n")
359
360 file.write("#if defined(GPUOffloadingOMP)\n")
361 file.write(FD4_omp_accelerator_kernel)
362 file.write(FD4_omp_accelerator_call_with_measurement)
363 file.write("#endif\n")
364
365 file.write("#if defined(GPUOffloadingSYCL)\n")
366 file.write(FD4_sycl_accelerator_kernel)
367 file.write(FD4_sycl_accelerator_call_with_measurement)
368 file.write("#endif\n")
369 file.close()
370
372 ncp_implementation,
373 source_implementation,
374 compute_max_eigenvalue_of_next_time_step,
375 solver_variant: SolverVariant,
376 kernel_variant: KernelVariant,
377 KOSigma,
378 SecondOrder):
379 """
380
381 I return only the unqualified function call, i.e. without any namespaces.
382 So by setting the right namespace as prefix, you can direct it to particular
383 implementations.
384
385 """
386
387 template = "timeStepWithFD4InExaGRyPE"
388
389 if solver_variant == SolverVariant.WithVirtualFunctions:
390 template += """<
391 {{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}},
392 {{HALO_SIZE}},
393 {{NUMBER_OF_UNKNOWNS}},
394 {{NUMBER_OF_AUXILIARY_VARIABLES}},
395 static_cast<int>({{KOSIGMA}}),
396 {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
397 {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
398 {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
399 false, // Runge-Kutta, so no need to copy old data over or to take dt into account
400 {% if SECONDORDER %} true {% else %} false {% endif %}, //if we have SO enabled, the quantities that get advections would be different
401 {% if SECONDORDER %} false {% else %} true {% endif %} //if we have SO enabled, the quantities that get advections would be different
402 >(patchData,
403 [&](
404 const double * __restrict__ Q,
405 const tarch::la::Vector<Dimensions,double>& faceCentre,
406 const tarch::la::Vector<Dimensions,double>& volumeH,
407 double t,
408 double dt,
409 int normal,
410 double * __restrict__ F
411 )->void {
412 {% if FLUX_IMPLEMENTATION!="<none>" %}
413 repositories::{{SOLVER_INSTANCE}}.flux( Q, faceCentre, volumeH, t, dt, normal, F );
414 {% endif %}
415 },
416 [&](
417 const double * __restrict__ Q,
418 const double * __restrict__ deltaQ,
419 const tarch::la::Vector<Dimensions,double>& faceCentre,
420 const tarch::la::Vector<Dimensions,double>& volumeH,
421 double t,
422 double dt,
423 int normal,
424 double * __restrict__ BTimesDeltaQ
425 )->void {
426 {% if NCP_IMPLEMENTATION!="<none>" %}
427 repositories::{{SOLVER_INSTANCE}}.nonconservativeProduct( Q, deltaQ, faceCentre, volumeH, t, dt, normal, BTimesDeltaQ );
428 {% endif %}
429 },
430 [&](
431 const double * __restrict__ Q,
432 const tarch::la::Vector<Dimensions,double>& volumeX,
433 const tarch::la::Vector<Dimensions,double>& volumeH,
434 double t,
435 double dt,
436 double * __restrict__ S
437 )->void {
438 {% if SOURCE_IMPLEMENTATION!="<none>" %}
439 repositories::{{SOLVER_INSTANCE}}.sourceTerm( Q, volumeX, volumeH, t, dt, S );
440 {% endif %}
441 }
442);
443 """
444 elif solver_variant == SolverVariant.Stateless:
445 template += """Stateless<
446 {{SOLVER_NAME}},
447 {{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}},
448 {{HALO_SIZE}},
449 {{NUMBER_OF_UNKNOWNS}},
450 {{NUMBER_OF_AUXILIARY_VARIABLES}},
451 static_cast<int>({{KOSIGMA}}),
452 {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
453 {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
454 {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
455 false, // Runge-Kutta, so no need to copy old data over or to take dt into account
456 {% if SECONDORDER %} true {% else %} false {% endif %}, //if we have SO enabled, the quantities that get advections would be different
457 {% if SECONDORDER %} false {% else %} true {% endif %} //if we have SO enabled, the quantities that get advections would be different
458 >(patchData);
459 """
460 elif solver_variant == SolverVariant.AcceleratorWithExplicitCopy:
461 template += """Stateless<
462 {{SOLVER_NAME}},
463 {{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}},
464 {{HALO_SIZE}},
465 {{NUMBER_OF_UNKNOWNS}},
466 {{NUMBER_OF_AUXILIARY_VARIABLES}},
467 static_cast<int>({{KOSIGMA}}),
468 {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
469 {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
470 {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
471 false, // Runge-Kutta, so no need to copy old data over or to take dt into account
472 {% if SECONDORDER %} true {% else %} false {% endif %}, //if we have SO enabled, the quantities that get advections would be different
473 {% if SECONDORDER %} false {% else %} true {% endif %} //if we have SO enabled, the quantities that get advections would be different
474 >(gpuPatchData.targetDevice, gpuPatchData);
475 """
476 else:
477 assert False, "not supported combination: {} x {}".format( solver_variant, kernel_variant )
478
479 result = jinja2.Template( template, undefined=jinja2.DebugUndefined)
480 d= {}
481 d[ "FLUX_IMPLEMENTATION" ] = flux_implementation
482 d[ "NCP_IMPLEMENTATION" ] = ncp_implementation
483 d[ "SOURCE_IMPLEMENTATION" ] = source_implementation
484 d[ "COMPUTE_MAX_EIGENVALUE" ] = compute_max_eigenvalue_of_next_time_step
485 d[ "KOSIGMA" ] = KOSigma
486 d[ "SECONDORDER" ] = SecondOrder
487 return result.render(**d)
488
489def create_compute_kernel_for_FD4_DSL(flux_implementation,
490 ncp_implementation,
491 source_implementation,
492 compute_max_eigenvalue_of_next_time_step,
493 solver_variant: SolverVariant,
494 kernel_variant: KernelVariant,
495 KOSigma):
496 """
497
498 I return only the unqualified function call, i.e. without any namespaces.
499 So by setting the right namespace as prefix, you can direct it to particular
500 implementations.
501
502 """
503
504 template = "timeStepWithFD4"
505
506 if solver_variant == SolverVariant.WithVirtualFunctions:
507 template += """<
508 {{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}},
509 {{HALO_SIZE}},
510 {{NUMBER_OF_UNKNOWNS}},
511 {{NUMBER_OF_AUXILIARY_VARIABLES}},
512 static_cast<int>({{KOSIGMA}}),
513 {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
514 {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
515 {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
516 false // Runge-Kutta, so no need to copy old data over or to take dt into account
517 >(patchData,
518 [&](
519 const double * __restrict__ Q,
520 const tarch::la::Vector<Dimensions,double>& faceCentre,
521 const tarch::la::Vector<Dimensions,double>& volumeH,
522 double t,
523 double dt,
524 int normal,
525 double * __restrict__ F
526 )->void {
527 {% if FLUX_IMPLEMENTATION!="<none>" %}
528 repositories::{{SOLVER_INSTANCE}}.flux( Q, faceCentre, volumeH, t, dt, normal, F );
529 {% endif %}
530 },
531 [&](
532 const double * __restrict__ Q,
533 const double * __restrict__ deltaQ,
534 const tarch::la::Vector<Dimensions,double>& faceCentre,
535 const tarch::la::Vector<Dimensions,double>& volumeH,
536 double t,
537 double dt,
538 int normal,
539 double * __restrict__ BTimesDeltaQ
540 )->void {
541 {% if NCP_IMPLEMENTATION!="<none>" %}
542 repositories::{{SOLVER_INSTANCE}}.nonconservativeProduct( Q, deltaQ, faceCentre, volumeH, t, dt, normal, BTimesDeltaQ );
543 {% endif %}
544 },
545 [&](
546 const double * __restrict__ Q,
547 const tarch::la::Vector<Dimensions,double>& volumeX,
548 const tarch::la::Vector<Dimensions,double>& volumeH,
549 double t,
550 double dt,
551 double * __restrict__ S
552 )->void {
553 {% if SOURCE_IMPLEMENTATION!="<none>" %}
554 repositories::{{SOLVER_INSTANCE}}.sourceTerm( Q, volumeX, volumeH, t, dt, S );
555 {% endif %}
556 }
557);
558 """
559 elif solver_variant == SolverVariant.Stateless:
560 template += """Stateless<
561 {{SOLVER_NAME}},
562 {{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}},
563 {{HALO_SIZE}},
564 {{NUMBER_OF_UNKNOWNS}},
565 {{NUMBER_OF_AUXILIARY_VARIABLES}},
566 static_cast<int>({{KOSIGMA}}),
567 {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
568 {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
569 {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
570 false // Runge-Kutta, so no need to copy old data over or to take dt into account
571 >(patchData);
572 """
573 elif solver_variant == SolverVariant.AcceleratorWithExplicitCopy:
574 template += """Stateless<
575 {{SOLVER_NAME}},
576 {{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}},
577 {{HALO_SIZE}},
578 {{NUMBER_OF_UNKNOWNS}},
579 {{NUMBER_OF_AUXILIARY_VARIABLES}},
580 static_cast<int>({{KOSIGMA}}),
581 {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
582 {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
583 {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
584 false // Runge-Kutta, so no need to copy old data over or to take dt into account
585 >(gpuPatchData.targetDevice, gpuPatchData);
586 """
587 else:
588 assert False, "not supported combination: {} x {}".format( solver_variant, kernel_variant )
589
590 result = jinja2.Template( template, undefined=jinja2.DebugUndefined)
591 d= {}
592 d[ "FLUX_IMPLEMENTATION" ] = flux_implementation
593 d[ "NCP_IMPLEMENTATION" ] = ncp_implementation
594 d[ "SOURCE_IMPLEMENTATION" ] = source_implementation
595 d[ "COMPUTE_MAX_EIGENVALUE" ] = compute_max_eigenvalue_of_next_time_step
596 d[ "KOSIGMA" ] = KOSigma
597 return result.render(**d)
598
600 kernel_variant: KernelVariant,
601 SecondOrder):
602
603 template = "DerivativesCalInExaGRyPE"
604
605 if solver_variant == SolverVariant.WithVirtualFunctions:
606 template += """<
607 {{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}},
608 {{HALO_SIZE}},
609 {{NUMBER_OF_UNKNOWNS}},
610 {{NUMBER_OF_AUXILIARY_VARIABLES}},
611 {% if SECONDORDER %} true {% else %} false {% endif %} //if we have SO enabled, the quantities that get advections would be different
612 >(patchData);
613 """
614 elif solver_variant == SolverVariant.Stateless:
615 template += """Stateless<
616 {{SOLVER_NAME}},
617 {{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}},
618 {{HALO_SIZE}},
619 {{NUMBER_OF_UNKNOWNS}},
620 {{NUMBER_OF_AUXILIARY_VARIABLES}},
621 {% if SECONDORDER %} true {% else %} false {% endif %} //if we have SO enabled, the quantities that get advections would be different
622 >(patchData);
623 """
624 elif solver_variant == SolverVariant.AcceleratorWithExplicitCopy:
625 template += """Stateless<
626 {{SOLVER_NAME}},
627 {{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}},
628 {{HALO_SIZE}},
629 {{NUMBER_OF_UNKNOWNS}},
630 {{NUMBER_OF_AUXILIARY_VARIABLES}},
631 {% if SECONDORDER %} true {% else %} false {% endif %} //if we have SO enabled, the quantities that get advections would be different
632 >(targetDevice, gpuPatchData);
633 """
634 else:
635 assert False, "not supported combination: {} x {}".format( solver_variant, kernel_variant )
636
637 result = jinja2.Template( template, undefined=jinja2.DebugUndefined)
638 d= {}
639 d[ "SECONDORDER" ] = SecondOrder
640 return result.render(**d)
641
642
643#def create_compute_kernel_for_FD4(flux_implementation,
644# ncp_implementation,
645# source_implementation,
646# compute_max_eigenvalue_of_next_time_step,
647# solver_variant,
648# kernel_variant,
649# KOSigma):
650# """
651#
652# I return only the unqualified function call, i.e. without any namespaces.
653# So by setting the right namespace as prefix, you can direct it to particular
654# implementations.
655#
656# """
657# KernelCalls = {
658# KernelVariant.PatchWiseAoSHeap: "timeStep_patchwise_heap",
659# KernelVariant.PatchWiseAoSoAHeap: "timeStep_patchwise_heap",
660# KernelVariant.PatchWiseSoAHeap: "timeStep_patchwise_heap",
661# KernelVariant.BatchedAoSHeap: "timeStep_batched_heap",
662# KernelVariant.BatchedAoSoAHeap: "timeStep_batched_heap",
663# KernelVariant.BatchedSoAHeap: "timeStep_batched_heap",
664# KernelVariant.TaskGraphAoSHeap: "timeStep_taskgraph_heap",
665# KernelVariant.TaskGraphAoSoAHeap: "timeStep_taskgraph_heap",
666# KernelVariant.TaskGraphSoAHeap: "timeStep_taskgraph_heap",
667# }
668#
669# EnumeratorTemplateTypes = {
670# KernelVariant.PatchWiseAoSHeap: "::exahype2::enumerator::AoSLexicographicEnumerator",
671# KernelVariant.PatchWiseAoSoAHeap: "::exahype2::enumerator::AoSoALexicographicEnumerator",
672# KernelVariant.PatchWiseSoAHeap: "::exahype2::enumerator::SoALexicographicEnumerator",
673# KernelVariant.BatchedAoSHeap: "::exahype2::enumerator::AoSLexicographicEnumerator",
674# KernelVariant.BatchedAoSoAHeap: "::exahype2::enumerator::AoSoALexicographicEnumerator",
675# KernelVariant.BatchedSoAHeap: "::exahype2::enumerator::SoALexicographicEnumerator",
676# KernelVariant.TaskGraphAoSHeap: "::exahype2::enumerator::AoSLexicographicEnumerator",
677# KernelVariant.TaskGraphAoSoAHeap: "::exahype2::enumerator::AoSoALexicographicEnumerator",
678# KernelVariant.TaskGraphSoAHeap: "::exahype2::enumerator::SoALexicographicEnumerator",
679# }
680#
681# template = KernelCalls[kernel_variant]
682#
683# if solver_variant == SolverVariant.WithVirtualFunctions:
684# template += """_functors(
685# patchData,
686# {{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}},
687# {{HALO_SIZE}},
688# {{NUMBER_OF_UNKNOWNS}},
689# {{NUMBER_OF_AUXILIARY_VARIABLES}},
690# {{KOSIGMA}},
691# {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
692# {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
693# {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
694# false, // Runge-Kutta, so no need to copy old data over or to take dt into account
695# ::exahype2::fd::fd4::DifferentialSourceTermVariant::CentralDifferencesWithLopsidedAdvection,
696# [&](
697# const double * __restrict__ Q,
698# const tarch::la::Vector<Dimensions,double>& faceCentre,
699# const tarch::la::Vector<Dimensions,double>& volumeH,
700# double t,
701# double dt,
702# int normal,
703# double * __restrict__ F
704# )->void {
705# {% if FLUX_IMPLEMENTATION!="<none>" %}
706# repositories::{{SOLVER_INSTANCE}}.flux( Q, faceCentre, volumeH, t, dt, normal, F );
707# {% endif %}
708# },
709# [&](
710# const double * __restrict__ Q,
711# const double * __restrict__ deltaQ,
712# const tarch::la::Vector<Dimensions,double>& faceCentre,
713# const tarch::la::Vector<Dimensions,double>& volumeH,
714# double t,
715# double dt,
716# int normal,
717# double * __restrict__ BTimesDeltaQ
718# )->void {
719# {% if NCP_IMPLEMENTATION!="<none>" %}
720# repositories::{{SOLVER_INSTANCE}}.nonconservativeProduct( Q, deltaQ, faceCentre, volumeH, t, dt, normal, BTimesDeltaQ );
721# {% endif %}
722# },
723# [&](
724# const double * __restrict__ Q,
725# const tarch::la::Vector<Dimensions,double>& volumeX,
726# const tarch::la::Vector<Dimensions,double>& volumeH,
727# double t,
728# double dt,
729# double * __restrict__ S
730# )->void {
731# {% if SOURCE_IMPLEMENTATION!="<none>" %}
732# repositories::{{SOLVER_INSTANCE}}.sourceTerm( Q, volumeX, volumeH, t, dt, S );
733# {% endif %}
734# }
735#);
736# """
737# elif solver_variant == SolverVariant.Stateless:
738# template += """_static_calls<{{SOLVER_NAME}},{{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}},{{HALO_SIZE}}, {{NUMBER_OF_UNKNOWNS}}, {{NUMBER_OF_AUXILIARY_VARIABLES}}, {{TEMP_DATA_ENUMERATOR}}>(
739# patchData,
740# {{KOSIGMA}},
741# {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
742# {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
743# {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
744# false, // Runge-Kutta, so no need to copy old data over or to take dt into account
745# ::exahype2::fd::fd4::DifferentialSourceTermVariant::CentralDifferencesWithLopsidedAdvection
746# );
747# """
748# elif solver_variant == SolverVariant.Multicore:
749# template += """_multicore_static_calls<{{SOLVER_NAME}},{{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}},{{HALO_SIZE}}, {{NUMBER_OF_UNKNOWNS}}, {{NUMBER_OF_AUXILIARY_VARIABLES}}, {{TEMP_DATA_ENUMERATOR}}>(
750# patchData,
751# {{KOSIGMA}},
752# {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
753# {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
754# {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
755# false, // Runge-Kutta, so no need to copy old data over or to take dt into account
756# ::exahype2::fd::fd4::DifferentialSourceTermVariant::CentralDifferencesWithLopsidedAdvection
757# );
758# """
759# elif solver_variant == SolverVariant.AcceleratorWithExplicitCopy:
760# template += """_static_calls<{{SOLVER_NAME}},{{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}},{{HALO_SIZE}}, {{NUMBER_OF_UNKNOWNS}}, {{NUMBER_OF_AUXILIARY_VARIABLES}}, {{TEMP_DATA_ENUMERATOR}}>(
761# targetDevice,
762# patchData,
763# {{KOSIGMA}},
764# {% if FLUX_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
765# {% if NCP_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
766# {% if SOURCE_IMPLEMENTATION=="<none>" %} false {% else %} true {% endif %},
767# false, // Runge-Kutta, so no need to copy old data over or to take dt into account
768# ::exahype2::fd::fd4::DifferentialSourceTermVariant::CentralDifferencesWithLopsidedAdvection
769# );
770# """
771# else:
772# assert False, "not supported combination: {} x {}".format( solver_variant, kernel_variant )
773#
774# result = jinja2.Template( template, undefined=jinja2.DebugUndefined)
775# d= {}
776# d[ "FLUX_IMPLEMENTATION" ] = flux_implementation
777# d[ "NCP_IMPLEMENTATION" ] = ncp_implementation
778# d[ "SOURCE_IMPLEMENTATION" ] = source_implementation
779# d[ "COMPUTE_MAX_EIGENVALUE" ] = compute_max_eigenvalue_of_next_time_step
780# d[ "KOSIGMA" ] = KOSigma
781# d[ "TEMP_DATA_ENUMERATOR" ] = EnumeratorTemplateTypes[kernel_variant]
782# return result.render(**d)
create_derivative_calculation_kernel_for_FD4InExaGRyPE_DSL(SolverVariant solver_variant, KernelVariant kernel_variant, SecondOrder)
Definition kernels.py:601
create_compute_kernel_for_FD4InExaGRyPE_DSL(flux_implementation, ncp_implementation, source_implementation, compute_max_eigenvalue_of_next_time_step, SolverVariant solver_variant, KernelVariant kernel_variant, KOSigma, SecondOrder)
I return only the unqualified function call, i.e.
Definition kernels.py:378
create_compute_kernel_for_FD4_DSL(flux_implementation, ncp_implementation, source_implementation, compute_max_eigenvalue_of_next_time_step, SolverVariant solver_variant, KernelVariant kernel_variant, KOSigma)
I return only the unqualified function call, i.e.
Definition kernels.py:495