7#include "{{CLASSNAME}}.h"
35#if defined(UseSmartMPI)
36#include "communication/Tags.h"
46int {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::getEnclaveTaskTypeId() {
47 return _enclaveTaskTypeId;
51double {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::applyKernelToCell(
52 const ::peano4::datamanagement::CellMarker& marker,
55 double* __restrict__ reconstructedPatch,
56 double* __restrict__ targetPatch
60 {%
if STATELESS_PDE_TERMS %}
61 if ( repositories::{{SOLVER_INSTANCE}}.patchCanUseStatelessPDETerms(
67 ::exahype2::fd::{{KERNEL_NAMESPACE}}::{{FUSED_COMPUTE_KERNEL_CALL_CPU}}
72 ::exahype2::fd::{{KERNEL_NAMESPACE}}::{{COMPUTE_KERNEL_CALL}}
76 {{NUMBER_OF_UNKNOWNS}},
77 {{NUMBER_OF_AUXILIARY_VARIABLES}},
78 {{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}},
80 std::string(__FILE__) +
"(" + std::to_string(__LINE__) +
"): " +
marker.toString()
85 {%
if COMPUTE_MAX_EIGENVALUE %}
86 return patchData.maxEigenvalue[0];
95{{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::{{CLASSNAME}}(
96 const ::peano4::datamanagement::CellMarker&
marker,
99 double* __restrict__ reconstructedPatch,
100 double* __restrict__ output
110 ({{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}}+2*{{OVERLAP}}) * ({{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}}+2*{{OVERLAP}}) * ({{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}),
111 {{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}} * {{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}} * {{NUMBER_OF_UNKNOWNS}},
113 ({{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}}+2*{{OVERLAP}}) * ({{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}}+2*{{OVERLAP}}) * ({{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}}+2*{{OVERLAP}}) * ({{NUMBER_OF_UNKNOWNS}}+{{NUMBER_OF_AUXILIARY_VARIABLES}}),
114 {{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}} * {{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}} * {{NUMBER_OF_GRID_CELLS_PER_PATCH_PER_AXIS}} * {{NUMBER_OF_UNKNOWNS}},
117 _maxEigenvalue = applyKernelToCell(
127 , smartmpi::Task(_enclaveTaskTypeId)
130 setPriority( {{ENCLAVE_TASK_PRIORITY}} );
135bool {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::isSmartMPITask()
const {
144void {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::runLocally() {
146 if (_remoteTaskId != -1) {
154void {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::moveTask(
int rank,
int tag, MPI_Comm communicator) {
159 if ( tag != smartmpi::communication::MoveTaskToMyServerForEvaluationTag &&
160 tag != smartmpi::communication::MoveTaskToComputesComputeRankTag ) {
161 taskIdMessage.
setValue(_remoteTaskId);
163 taskIdMessage.
setValue(getTaskId());
172 MPI_Isend( _inputValues, _numberOfInputValues, MPI_DOUBLE, rank, tag, communicator, &request );
176 "sent (" << _marker.toString() <<
"," << tMessage.toString() <<
"," << dtMessage.toString() <<
"," << _numberOfInputValues <<
177 "," << taskIdMessage.
toString() <<
") to rank " << rank <<
183smartmpi::Task* {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::receiveTask(
int rank,
int tag, MPI_Comm communicator) {
185 const int NumberOfInputValues =
187 {{NUMBER_OF_DOUBLE_VALUES_IN_PATCH_PLUS_HALO_2D}};
189 {{NUMBER_OF_DOUBLE_VALUES_IN_PATCH_PLUS_HALO_3D}};
205 "received (" << markerMessage.toString() <<
"," << tMessage.
toString() <<
"," << dtMessage.
toString() <<
"," << taskIdMessage.
toString() <<
") from rank " << rank <<
206 " via tag " << tag <<
" and will now receive " << NumberOfInputValues <<
" doubles"
209 MPI_Recv( inputValues, NumberOfInputValues, MPI_DOUBLE, rank, tag, communicator,
213 {{CLASSNAME}}* result =
new {{CLASSNAME}}(
219 result->_remoteTaskId = taskIdMessage.
getValue();
224void {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::runLocallyAndSendTaskOutputToRank(
int rank,
int tag, MPI_Comm communicator) {
232 "runLocallyAndSendTaskOutputToRank(...)",
233 "executed remote task on this rank. Will start to send result back"
236 forwardTaskOutputToRank(rank, tag, communicator);
240void {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::forwardTaskOutputToRank(
int rank,
int tag, MPI_Comm communicator) {
242 "forwardTaskOutputToRank(...)",
243 "will start to forward task output (which has already been computed)"
258 MPI_Isend( _outputValues, _numberOfResultValues, MPI_DOUBLE, rank, tag, communicator, &request );
261 "forwardTaskOutputToRank(...)",
262 "sent (" << _marker.toString() <<
"," << tMessage.
toString() <<
"," << dtMessage.
toString() <<
"," << _numberOfResultValues <<
263 "," << taskIdMessage.
toString() <<
") to rank " << rank <<
271smartmpi::Task* {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::receiveOutcome(
int rank,
int tag, MPI_Comm communicator,
const bool intentionToForward) {
272 logInfo(
"receiveOutcome(...)",
"rank=" << rank <<
", tag=" << tag );
274 const int NumberOfResultValues =
276 {{NUMBER_OF_DOUBLE_VALUES_IN_PATCH_2D}};
278 {{NUMBER_OF_DOUBLE_VALUES_IN_PATCH_3D}};
295 "receiveOutcome(...)",
296 "received (" << markerMessage.toString() <<
"," << tMessage.
toString() <<
"," << dtMessage.
toString() <<
"," << taskIdMessage.
toString() <<
") from rank " << rank <<
297 " via tag " << tag <<
" and will now receive " << NumberOfResultValues <<
" doubles"
300 MPI_Recv( outputValues, NumberOfResultValues, MPI_DOUBLE, rank, tag, communicator,
310 if(intentionToForward) {
311 double* inputValues =
nullptr;
313 {{CLASSNAME}}* result =
new {{CLASSNAME}}(
319 result->_remoteTaskId = taskIdMessage.
getValue();
320 result->_outputValues = outputValues;
321 result->_maxEigenvalue = eValueMessage.
getValue();
325 "receiveOutcome(...)",
326 "bookmark outcome of task " << taskIdMessage.
getValue()
334{%
if STATELESS_PDE_TERMS %}
335bool {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::canFuse()
const {
336 return repositories::{{SOLVER_INSTANCE}}.patchCanUseStatelessPDETerms(
348void {{NAMESPACE | join(
"::")}}::{{CLASSNAME}}::fuse(
const std::list<Task*>& otherTasks,
int targetDevice ) {
349 logDebug(
"fuse(...)",
"asked to fuse " << (otherTasks.size()+1) <<
" tasks into one large GPU task" );
353 for (
auto& p: otherTasks) {
354 patchData.QIn[currentTask] =
static_cast<{{NAMESPACE | join("::")}}::{{CLASSNAME}}*
>(
p)->_inputValues;
355 patchData.cellCentre[currentTask] =
static_cast<{{NAMESPACE | join("::")}}::{{CLASSNAME}}*
>(
p)->_marker.x();
356 patchData.cellSize[currentTask] =
static_cast<{{NAMESPACE | join("::")}}::{{CLASSNAME}}*
>(
p)->_marker.h();
357 patchData.t[currentTask] =
static_cast<{{NAMESPACE | join("::")}}::{{CLASSNAME}}*
>(
p)->_t;
358 patchData.dt[currentTask] =
static_cast<{{NAMESPACE | join("::")}}::{{CLASSNAME}}*
>(
p)->_dt;
359 patchData.id[currentTask] =
static_cast<{{NAMESPACE | join("::")}}::{{CLASSNAME}}*
>(
p)->_taskNumber;
363 patchData.QIn[currentTask] = _inputValues;
364 patchData.cellCentre[currentTask] = _marker.x();
365 patchData.cellSize[currentTask] = _marker.h();
366 patchData.t[currentTask] = _t;
367 patchData.dt[currentTask] = _dt;
368 patchData.id[currentTask] = _taskNumber;
377 bool foundOffloadingBranch =
false;
380 #if defined(GPUOffloadingOMP)
381 if (targetDevice>=0) {
382 foundOffloadingBranch =
true;
383 ::exahype2::fd::{{KERNEL_NAMESPACE}}::omp::{{FUSED_COMPUTE_KERNEL_CALL_GPU}}
387 #if defined(GPUOffloadingHIP)
388 if (targetDevice>=0) {
389 foundOffloadingBranch =
true;
390 ::exahype2::fd::{{KERNEL_NAMESPACE}}::hip::{{FUSED_COMPUTE_KERNEL_CALL_GPU}}
394 #if defined(GPUOffloadingSYCL)
395 if (targetDevice>=0 or targetDevice==Host) {
396 foundOffloadingBranch =
true;
398 ::exahype2::fd::{{KERNEL_NAMESPACE}}::sycl::{{FUSED_COMPUTE_KERNEL_CALL_GPU}}
402 if (not foundOffloadingBranch) {
403 ::exahype2::fd::{{KERNEL_NAMESPACE}}::{{FUSED_COMPUTE_KERNEL_CALL_CPU}}
412 for (
int i=0;
i<patchData.numberOfCells;
i++) {
#define logDebug(methodName, logMacroMessageStream)
#define logInfo(methodName, logMacroMessageStream)
Wrapper macro around tarch::tarch::logging::Log to improve logging.
static EnclaveBookkeeping & getInstance()
void finishedTask(int taskNumber, int numberOfResultValues, double *data, double maxEigenvalue)
Usually called directly by EnclaveTask.
tarch::logging::Log _log("exahype2::fv")
void validatePatch(const double *__restrict__ Q, int unknowns, int auxiliaryVariables, int numberOfGridCellsPerPatchPerAxis, int haloSize, const std::string &location="", bool triggerNonCriticalAssertion=true, double *minValues=nullptr, double *maxValues=nullptr)
Just runs over the patch and ensures that no entry is non or infinite.
For the generic kernels that I use here most of the time.
int getTaskType(const std::string &className)
Get unique number (id) for task.
void freeMemory(void *data, MemoryLocation location, int device=accelerator::Device::HostDevice)
Free memory.
@ ManagedSharedAcceleratorDeviceMemory
To be used on host only.
T * allocateMemory(std::size_t count, MemoryLocation location, int device=accelerator::Device::HostDevice)
Allocates memory on the specified memory location.
Representation of a number of cells which contains all information that's required to process the sto...
static void receive(CellMarker &buffer, int source, int tag, MPI_Comm communicator)
static void send(const CellMarker &buffer, int destination, int tag, MPI_Comm communicator)
In DaStGen (the first version), I had a non-static version of the send as well as the receive.
std::string toString() const
static void receive(tarch::mpi::DoubleMessage &buffer, int source, int tag, MPI_Comm communicator)
static void send(const tarch::mpi::DoubleMessage &buffer, int destination, int tag, MPI_Comm communicator)
In DaStGen (the first version), I had a non-static version of the send as well as the receive.
std::string toString() const
static void receive(tarch::mpi::IntegerMessage &buffer, int source, int tag, MPI_Comm communicator)
static void send(const tarch::mpi::IntegerMessage &buffer, int destination, int tag, MPI_Comm communicator)
In DaStGen (the first version), I had a non-static version of the send as well as the receive.