22#ifdef CompilerHasUTSName
23#include <sys/utsname.h>
40 assertion2(numberOfTags>=1,fullQualifiedMessageName,numberOfTags);
77 MPI_Iprobe(fromRank, tag,
_communicator, &flag, MPI_STATUS_IGNORE);
90 MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG,
_communicator, &flag, &status);
92 logError(
"plotMessageQueues()",
"there are no messages from any sender in MPI queue");
96 "plotMessageQueues()",
97 "there is still a message in queue "
98 " from rank " << status.MPI_SOURCE <<
99 " with tag " << status.MPI_TAG
125 const std::string& className,
126 const std::string& methodName,
127 int communicationPartnerRank,
129 int numberOfExpectedMessages,
130 const std::string& comment
133 std::ostringstream out;
134 out <<
"operation " << className <<
"::" << methodName <<
" on node "
136 <<
" seconds for " << numberOfExpectedMessages
137 <<
" message(s) from node " << communicationPartnerRank <<
" with tag " << tag
138 <<
". Timeout. " << comment;
139 logError(
"triggerDeadlockTimeOut(...)", out.str() );
149 const std::string& className,
150 const std::string& methodName,
151 int communicationPartnerRank,
153 int numberOfExpectedMessages
157 "writeTimeOutWarning(...)",
158 "operation " << className <<
"::" << methodName <<
" on node "
160 <<
" seconds for " << numberOfExpectedMessages
161 <<
" message(s) from node " << communicationPartnerRank <<
" with tag " << tag
166 "writeTimeOutWarning(...)",
167 "application will terminate after " << std::to_string(
_deadlockTimeOut.count()) <<
" seconds because of a deadlock"
171 "writeTimeOutWarning(...)",
172 "deadlock detection switched off as deadlock time out is set to " <<
_deadlockTimeOut.count() <<
", i.e. you will continue to get warnings, but code will not shut down"
183 "writeTimeOutWarning(...)",
184 "increase time out warning threshold from " << std::to_string(
_timeOutWarning.count()) <<
" seconds to " << std::to_string(newTimeOutWarning.count()) <<
" seconds to avoid flood of warning messages"
220 _initIsCalled(false),
222 _numberOfProcessors(1),
225 _areTimeoutsEnabled(true) {}
232 const void *sendbuf,
void *recvbuf,
int count,
233 MPI_Datatype datatype,
235 std::function<
void()> waitor
240 MPI_Request* request =
new MPI_Request();
241 MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op,
getCommunicator(), request );
255 MPI_Test( request, &flag, MPI_STATUS_IGNORE );
266 const void *sendbuf,
void *recvbuf,
int count,
267 MPI_Datatype datatype,
269 std::function<
void()> waitor
274 MPI_Request* request =
new MPI_Request();
275 MPI_Ireduce(sendbuf, recvbuf, count, datatype, op, root,
getCommunicator(), request );
288 MPI_Test( request, &flag, MPI_STATUS_IGNORE );
304 MPI_Request* request =
new MPI_Request();
318 MPI_Test( request, &flag, MPI_STATUS_IGNORE );
349 int errorCode = MPI_Finalize();
378 std::ostringstream statusMessage;
379 statusMessage <<
"MPI status:";
381 #ifdef CompilerHasUTSName
382 utsname* utsdata =
new utsname();
385 statusMessage <<
" nodename=" << utsdata->nodename;
388 statusMessage <<
" nodename=undef";
391 statusMessage <<
", rank=" <<
_rank;
397 logInfo(
"logStatus()", statusMessage.str() );
410 logWarning(
"validateMaxTagIsSupported()",
"maximum tag value is " <<
_maxTags <<
" though we would need " <<
_tagCounter <<
" tags. Code will likely crash" );
416static void tdTextPointer() {}
427 int result = MPI_SUCCESS;
429 #if defined( SharedMemoryParallelisation )
432 std::cerr <<
"warning: MPI implementation does not support MPI_THREAD_MULTIPLE. Support multithreading level is "
434 <<
". Disable MultipleThreadsMayTriggerMPICalls in the compiler-specific settings or via -DnoMultipleThreadsMayTriggerMPICalls."<< std::endl;
437 result = MPI_Init( argc, argv );
441 td_init(
reinterpret_cast<void*
>(&tdTextPointer));
444 if (result!=MPI_SUCCESS) {
445 std::cerr <<
"init(int*,char***)\t initialisation failed: " +
MPIReturnValueToString(result) +
" (no logging available yet)" << std::endl;
450 if (result!=MPI_SUCCESS) {
451 std::cerr <<
"init(int*,char***)\t initialisation failed: " +
MPIReturnValueToString(result) +
" (no logging available yet)" << std::endl;
455 result = MPI_Comm_rank( MPI_COMM_WORLD, &
_rank );
456 if (result!=MPI_SUCCESS) {
457 std::cerr <<
"init(int*,char***)\t initialisation failed: " +
MPIReturnValueToString(result) +
" (no logging available yet)" << std::endl;
463 MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &rawMaxTag, &answerFlag);
468 std::cerr <<
"init(int*,char***)\t was not able to query what the maximum tag value is" << std::endl;
517 logInfo(
"setDeadlockTimeOut(int)",
"set deadlock timeout to " <<
_deadlockTimeOut.count() <<
" and hence disabled timeout checks" );
527 if (result!=MPI_SUCCESS) {
532 if (result!=MPI_SUCCESS) {
543 MPI_Abort(MPI_COMM_WORLD,errorCode);
#define assertion2(expr, param0, param1)
#define assertion3(expr, param0, param1, param2)
#define logError(methodName, logMacroMessageStream)
Wrapper macro around tarch::tarch::logging::Log to improve logging.
#define logTraceOut(methodName)
#define logWarning(methodName, logMacroMessageStream)
Wrapper macro around tarch::tarch::logging::Log to improve logging.
#define logTraceIn(methodName)
#define logInfo(methodName, logMacroMessageStream)
Wrapper macro around tarch::tarch::logging::Log to improve logging.
Represents a program instance within a cluster.
void setCommunicator(MPI_Comm communicator, bool recomputeRankAndWorld=true)
Set communicator to be used by Peano.
Rank()
The standard constructor assignes the attributes default values and checks whether the program is com...
std::chrono::system_clock::time_point _globalTimeOutDeadlock
int getProvidedThreadLevelSupport() const
Information on supported thread-level support.
void setDeadlockTimeOut(int valueInSeconds)
Set deadlock time out.
static int _tagCounter
Count the tags that have already been handed out.
static int getGlobalMasterRank()
Get the global master.
int getNumberOfRanks() const
bool isInitialised() const
void triggerDeadlockTimeOut(const std::string &className, const std::string &methodName, int communicationPartnerRank, int tag, int numberOfExpectedMessages=1, const std::string &comment="")
Triggers a time out and shuts down the cluster if a timeout is violated.
bool isGlobalMaster() const
Is this node the global master process, i.e.
void setDeadlockWarningTimeStamp()
Memorise global timeout.
bool exceededTimeOutWarningThreshold() const
static int _maxTags
Set by init() and actually stores the number of valid tags.
MPI_Comm _communicator
MPI Communicator this process belongs to.
void writeTimeOutWarning(const std::string &className, const std::string &methodName, int communicationPartnerRank, int tag, int numberOfExpectedMessages=1)
Writes a warning if relevant.
int _numberOfProcessors
Number of processors available.
std::chrono::system_clock::time_point _globalTimeOutWarning
void setDeadlockTimeOutTimeStamp()
void ensureThatMessageQueuesAreEmpty(int fromRank, int tag)
Ensure that there are no messages anymore from the specified rank.
int _rank
Rank (id) of this process.
void reduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, std::function< void()> waitor=[]() -> void {})
static Rank & getInstance()
This operation returns the singleton instance.
std::chrono::seconds _deadlockTimeOut
Time to timeout.
bool isMessageInQueue(int tag) const
In older DaStGen version, I tried to find out whether a particular message type is in the MPI queue.
void setTimeOutWarning(int valueInSeconds)
Set time out warning.
bool _areTimeoutsEnabled
Global toggle to enable/disable timeouts.
int getRank() const
Return rank of this node.
bool init(int *argc, char ***argv)
This operation initializes the MPI environment and the program instance.
void allReduce(const void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, std::function< void()> waitor=[]() -> void {})
Wrapper around allreduce.
void suspendTimeouts(bool timeoutsDisabled)
static tarch::logging::Log _log
Logging device.
std::chrono::seconds _timeOutWarning
Timeout warning.
void barrier(std::function< void()> waitor=[]() -> void {})
Global MPI barrier.
bool _initIsCalled
Is set true if init() is called.
void logStatus() const
Logs the status of the process onto the log device.
static bool validateMaxTagIsSupported()
Just try to find out if a tag is actually supported.
int _providedThreadLevelSupport
static int reserveFreeTag(const std::string &fullQualifiedMessageName, int numberOfTags=1)
Return a Free Tag.
virtual ~Rank()
The standard destructor calls MPI_Finalize().
static void releaseTag(int tag)
bool exceededDeadlockThreshold() const
void shutdown()
Shuts down the application.
static void abort(int errorCode)
A proper abort in an MPI context has to use MPI_Abort.
MPI_Comm getCommunicator() const
static const int DEADLOCK_EXIT_CODE
std::string MPIReturnValueToString(int result)
static void initDatatype()
Wrapper around getDatatype() to trigger lazy evaluation if we use the lazy initialisation.
static void shutdownDatatype()
Free the underlying MPI datatype.
static void initDatatype()
Wrapper around getDatatype() to trigger lazy evaluation if we use the lazy initialisation.