7#include "{{CLASSNAME}}.h"
13#if GPUOffloadingSYCL>0
14#include "tarch/multicore/sycl/SYCL.h"
36#if defined(UseSmartMPI)
37#include "communication/Tags.h"
47int {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::getEnclaveTaskTypeId() {
48 return _enclaveTaskTypeId;
52void {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::applyKernelToCell(
53 const ::peano4::datamanagement::CellMarker& marker,
56 double* __restrict__ QIn,
57 double* __restrict__ QOut
68 {%
if STATELESS_PDE_TERMS %}
69 if (repositories::{{SOLVER_INSTANCE}}.cellCanUseStatelessPDETerms(
75 ::exahype2::dg::{{KERNEL_NAMESPACE}}::{{VOLUMETRIC_COMPUTE_KERNEL_CALL_STATELESS}}
79 ::exahype2::dg::{{KERNEL_NAMESPACE}}::{{VOLUMETRIC_COMPUTE_KERNEL_CALL}}
83{{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::{{CLASSNAME}}(
84 const ::peano4::datamanagement::CellMarker&
marker,
87 const double* __restrict__ linearCombinationOfPreviousShots
97 {{NUMBER_OF_DOFS_PER_CELL_2D}}*({{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}),
98 {{NUMBER_OF_DOFS_PER_CELL_2D}}* {{NUMBER_OF_UNKNOWNS}},
102 {{NUMBER_OF_DOFS_PER_CELL_3D}}*({{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}),
103 {{NUMBER_OF_DOFS_PER_CELL_3D}}* {{NUMBER_OF_UNKNOWNS}},
118 smartmpi::Task(_enclaveTaskTypeId),
123 setPriority({{ENCLAVE_TASK_PRIORITY}});
124 std::copy_n(linearCombinationOfPreviousShots, _numberOfInputValues, _inputValues);
128{{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::{{CLASSNAME}}(
129 const ::peano4::datamanagement::CellMarker&
marker,
132 std::shared_ptr< double[] > linearCombinationOfPreviousShots,
133 double* __restrict__ output
140 linearCombinationOfPreviousShots.get(),
143 {{NUMBER_OF_DOFS_PER_CELL_2D}}*({{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}),
144 {{NUMBER_OF_DOFS_PER_CELL_2D}}* {{NUMBER_OF_UNKNOWNS}},
146 {{NUMBER_OF_DOFS_PER_CELL_3D}}*({{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}),
147 {{NUMBER_OF_DOFS_PER_CELL_3D}}* {{NUMBER_OF_UNKNOWNS}},
160 smartmpi::Task(_enclaveTaskTypeId),
165 setPriority({{ENCLAVE_TASK_PRIORITY}});
170bool {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::isSmartMPITask()
const {
179void {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::runLocally() {
181 if (_remoteTaskId != -1) {
189void {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::moveTask(
int rank,
int tag, MPI_Comm communicator) {
194 if ( tag != smartmpi::communication::MoveTaskToMyServerForEvaluationTag &&
195 tag != smartmpi::communication::MoveTaskToComputesComputeRankTag ) {
196 taskIdMessage.
setValue(_remoteTaskId);
198 taskIdMessage.
setValue(getTaskId());
207 MPI_Isend( _inputValues, _numberOfInputValues, MPI_DOUBLE, rank, tag, communicator, &request );
211 "sent (" << _marker.toString() <<
"," << tMessage.toString() <<
"," << dtMessage.toString() <<
"," << _numberOfInputValues <<
212 "," << taskIdMessage.
toString() <<
") to rank " << rank <<
218smartmpi::Task* {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::receiveTask(
int rank,
int tag, MPI_Comm communicator) {
220 const int NumberOfInputValues =
222 {{NUMBER_OF_DOUBLE_VALUES_IN_PATCH_PLUS_HALO_2D}};
224 {{NUMBER_OF_DOUBLE_VALUES_IN_PATCH_PLUS_HALO_3D}};
240 "received (" << markerMessage.toString() <<
"," << tMessage.
toString() <<
"," << dtMessage.
toString() <<
"," << taskIdMessage.
toString() <<
") from rank " << rank <<
241 " via tag " << tag <<
" and will now receive " << NumberOfInputValues <<
" doubles"
244 MPI_Recv( inputValues, NumberOfInputValues, MPI_DOUBLE, rank, tag, communicator,
248 {{CLASSNAME}}* result =
new {{CLASSNAME}}(
254 result->_remoteTaskId = taskIdMessage.
getValue();
259void {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::runLocallyAndSendTaskOutputToRank(
int rank,
int tag, MPI_Comm communicator) {
267 "runLocallyAndSendTaskOutputToRank(...)",
268 "executed remote task on this rank. Will start to send result back"
271 forwardTaskOutputToRank(rank, tag, communicator);
275void {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::forwardTaskOutputToRank(
int rank,
int tag, MPI_Comm communicator) {
277 "forwardTaskOutputToRank(...)",
278 "will start to forward task output (which has already been computed)"
293 MPI_Isend( _outputValues, _numberOfResultValues, MPI_DOUBLE, rank, tag, communicator, &request );
296 "forwardTaskOutputToRank(...)",
297 "sent (" << _marker.toString() <<
"," << tMessage.
toString() <<
"," << dtMessage.
toString() <<
"," << _numberOfResultValues <<
298 "," << taskIdMessage.
toString() <<
") to rank " << rank <<
306smartmpi::Task* {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::receiveOutcome(
int rank,
int tag, MPI_Comm communicator,
const bool intentionToForward) {
307 logInfo(
"receiveOutcome(...)",
"rank=" << rank <<
", tag=" << tag );
309 const int NumberOfResultValues =
311 {{NUMBER_OF_DOUBLE_VALUES_IN_PATCH_2D}};
313 {{NUMBER_OF_DOUBLE_VALUES_IN_PATCH_3D}};
330 "receiveOutcome(...)",
331 "received (" << markerMessage.toString() <<
"," << tMessage.
toString() <<
"," << dtMessage.
toString() <<
"," << taskIdMessage.
toString() <<
") from rank " << rank <<
332 " via tag " << tag <<
" and will now receive " << NumberOfResultValues <<
" doubles"
335 MPI_Recv( outputValues, NumberOfResultValues, MPI_DOUBLE, rank, tag, communicator,
345 if(intentionToForward) {
346 double* inputValues =
nullptr;
348 {{CLASSNAME}}* result =
new {{CLASSNAME}}(
354 result->_remoteTaskId = taskIdMessage.
getValue();
355 result->_outputValues = outputValues;
356 result->_maxEigenvalue = eValueMessage.
getValue();
360 "receiveOutcome(...)",
361 "bookmark outcome of task " << taskIdMessage.
getValue()
369{%
if STATELESS_PDE_TERMS %}
371bool {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::canFuse()
const {
372 return repositories::{{SOLVER_INSTANCE}}.cellCanUseStatelessPDETerms(
384void {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::fuse(
const std::list<Task*>& otherTasks,
int targetDevice ) {
386 <<
" asked to fuse " << (otherTasks.size() + 1) <<
" tasks into one large GPU task on device " << targetDevice);
390 for (
auto& p: otherTasks) {
391 cellData.QIn[currentTask] =
static_cast<{{NAMESPACE | join("::")}}::{{CLASSNAME}}*
>(
p)->_inputValues;
392 cellData.cellCentre[currentTask] =
static_cast<{{NAMESPACE | join("::")}}::{{CLASSNAME}}*
>(
p)->_marker.x();
393 cellData.cellSize[currentTask] =
static_cast<{{NAMESPACE | join("::")}}::{{CLASSNAME}}*
>(
p)->_marker.h();
394 cellData.t[currentTask] =
static_cast<{{NAMESPACE | join("::")}}::{{CLASSNAME}}*
>(
p)->_t;
395 cellData.dt[currentTask] =
static_cast<{{NAMESPACE | join("::")}}::{{CLASSNAME}}*
>(
p)->_dt;
396 cellData.id[currentTask] =
static_cast<{{NAMESPACE | join("::")}}::{{CLASSNAME}}*
>(
p)->_taskNumber;
400 cellData.QIn[currentTask] = _inputValues;
401 cellData.cellCentre[currentTask] = _marker.x();
402 cellData.cellSize[currentTask] = _marker.h();
403 cellData.t[currentTask] = _t;
404 cellData.dt[currentTask] = _dt;
405 cellData.id[currentTask] = _taskNumber;
413 bool foundOffloadingBranch =
false;
415 #if defined(GPUOffloadingOMP)
416 if (targetDevice>=0) {
417 foundOffloadingBranch =
true;
418 ::exahype2::dg::{{KERNEL_NAMESPACE}}::omp::{{FUSED_VOLUMETRIC_COMPUTE_KERNEL_CALL_STATELESS_GPU}}
422 #if defined(GPUOffloadingHIP)
423 if (targetDevice>=0) {
424 foundOffloadingBranch =
true;
425 ::exahype2::dg::{{KERNEL_NAMESPACE}}::hip::{{FUSED_VOLUMETRIC_COMPUTE_KERNEL_CALL_STATELESS_GPU}}
429 #if defined(GPUOffloadingSYCL)
430 if (targetDevice>=0 or targetDevice==Host) {
431 foundOffloadingBranch =
true;
432 ::exahype2::dg::{{KERNEL_NAMESPACE}}::sycl::{{FUSED_VOLUMETRIC_COMPUTE_KERNEL_CALL_STATELESS_GPU}}
436 if (not foundOffloadingBranch) {
438 <<
" cannot find offloading branch for device " << targetDevice <<
". Process fused tasks on the CPU.");
439 ::exahype2::dg::{{KERNEL_NAMESPACE}}::{{FUSED_VOLUMETRIC_COMPUTE_KERNEL_CALL_STATELESS_CPU}}
447 for (
int i=0;
i<cellData.numberOfCells;
i++) {
#define logDebug(methodName, logMacroMessageStream)
#define logInfo(methodName, logMacroMessageStream)
Wrapper macro around tarch::tarch::logging::Log to improve logging.
_linearCombinationOfPreviousShots(nullptr)
static EnclaveBookkeeping & getInstance()
void finishedTask(int taskNumber, int numberOfResultValues, double *data, double maxEigenvalue)
Usually called directly by EnclaveTask.
static Rank & getInstance()
This operation returns the singleton instance.
tarch::logging::Log _log("exahype2::fv")
For the generic kernels that I use here most of the time.
int getTaskType(const std::string &className)
Get unique number (id) for task.
Have to include this header, as I need access to the SYCL_EXTERNAL keyword.
void freeMemory(void *data, MemoryLocation location, int device=accelerator::Device::HostDevice)
Free memory.
@ ManagedSharedAcceleratorDeviceMemory
To be used on host only.
T * allocateMemory(std::size_t count, MemoryLocation location, int device=accelerator::Device::HostDevice)
Allocates memory on the specified memory location.
Representation of a number of cells which contains all information that's required to process the sto...
static void receive(CellMarker &buffer, int source, int tag, MPI_Comm communicator)
static void send(const CellMarker &buffer, int destination, int tag, MPI_Comm communicator)
In DaStGen (the first version), I had a non-static version of the send as well as the receive.
std::string toString() const
static void receive(tarch::mpi::DoubleMessage &buffer, int source, int tag, MPI_Comm communicator)
static void send(const tarch::mpi::DoubleMessage &buffer, int destination, int tag, MPI_Comm communicator)
In DaStGen (the first version), I had a non-static version of the send as well as the receive.
std::string toString() const
static void receive(tarch::mpi::IntegerMessage &buffer, int source, int tag, MPI_Comm communicator)
static void send(const tarch::mpi::IntegerMessage &buffer, int destination, int tag, MPI_Comm communicator)
In DaStGen (the first version), I had a non-static version of the send as well as the receive.