Peano
Loading...
Searching...
No Matches
SpreadOutHierarchically.cpp
Go to the documentation of this file.
2
4
5
8
9
10tarch::logging::Log toolbox::loadbalancing::strategies::SpreadOutHierarchically::_log( "toolbox::loadbalancing::strategies::SpreadOutHierarchically" );
11
12
14 switch ( action ) {
16 return "unspecified";
17 case Action::None:
18 return "none";
20 return "spread-equally-over-all-ranks";
22 return "spread-equally-over-all-threads";
23 }
24 return "<undef>";
25}
26
27
29 AbstractLoadBalancing( configuration, costMetrics),
30 _stepsToWaitForNextLoadBalancingDecision(0) {
31 assertion( configuration!=nullptr );
34}
35
36
39
40
42 switch (_state) {
44 if (
45 _statistics.getGlobalNumberOfTrees() > 1
46 or
48 ) {
50 logDebug( "updateState()", "switch to state " << ::toString(_state) );
51 }
52 break;
54 if (
55 peano4::parallel::SpacetreeSet::getInstance().getLocalSpacetrees().size() > 1
56 ) {
58 logDebug( "updateState()", "switch to state " << ::toString(_state) << " as more than one local subpartition is already hosted, i.e. we assume rank has already spread out locally" );
59 }
60 if (
61 tarch::multicore::Core::getInstance().getNumberOfThreads()<=1
62 ) {
64 logDebug( "updateState()", "switch to state " << ::toString(_state) << " as rank is degenerated with only one thread, i.e. rank-local distribution makes no sense" );
65 }
66 break;
67 default:
68 break;
69 }
70}
71
72
74 if (_stepsToWaitForNextLoadBalancingDecision>0) {
76 "getAction()",
77 "shall wait for another " << _stepsToWaitForNextLoadBalancingDecision << " step(s) before we rebalance"
78 );
79 return Action::None;
80 }
81
82 switch (_state) {
84 {
85 const int MinOriginalTreeSizeToTriggerMPISpreadOut = std::max(
86 _configuration->getMinTreeSize(_state) * tarch::mpi::Rank::getInstance().getNumberOfRanks(),
88 );
89
90 if (
91 tarch::mpi::Rank::getInstance().getNumberOfRanks()<=1
92 or
93 not tarch::mpi::Rank::getInstance().isGlobalMaster()
94 or
95 hasSplitRecently()
96 ) {
97 return Action::None;
98 }
99 else if ( _statistics.getLocalNumberOfInnerUnrefinedCells() < MinOriginalTreeSizeToTriggerMPISpreadOut ) {
100 logInfo(
101 "getAction()",
102 "have to postpone any decision, as local no of inner unrefined cells of " << _statistics.getLocalNumberOfInnerUnrefinedCells() << " is smaller than " << MinOriginalTreeSizeToTriggerMPISpreadOut << " (which would occupy all ranks)" );
103 return Action::None;
104 }
105 else {
106 return Action::SpreadEquallyOverAllRanks;
107 }
108 }
110 {
111 const int ThreadsToKeepBusy = std::min( tarch::multicore::Core::getInstance().getNumberOfThreads(), _configuration->getMaxLocalTreesPerRank(_state) );
112 const int MinOriginalTreeSizeToTriggerThreadSpreadOut = std::max(
113 _configuration->getMinTreeSize(_state) * ThreadsToKeepBusy,
115 );
116
117 if ( _statistics.getLocalNumberOfInnerUnrefinedCells() < MinOriginalTreeSizeToTriggerThreadSpreadOut ) {
118 logInfo( "getAction()", "have to postpone any decision, as local no of inner unrefined cells of " << _statistics.getLocalNumberOfInnerUnrefinedCells() << " is smaller than " << MinOriginalTreeSizeToTriggerThreadSpreadOut << " (which would occupy all threads)" );
119 return Action::None;
120 }
121 else {
122 return Action::SpreadEquallyOverAllThreads;
123 }
124 }
125 default:
126 break;
127 }
128
129 return Action::None;
130}
131
132
134 assertionEquals( peano4::parallel::SpacetreeSet::getInstance().getLocalSpacetrees().size(), 1 );
135
136 int maxSizeOfLocalRank = getWeightOfHeaviestLocalSpacetree();
137 int maxLocalTrees = std::min( {_configuration->getMaxLocalTreesPerRank(_state)-1, maxSizeOfLocalRank-1, tarch::multicore::Core::getInstance().getNumberOfThreads()-1} );
138 int numberOfSplits = std::max(1,maxLocalTrees);
139
140 int worstCaseEstimateForSizeOfSpacetree = tarch::getMemoryUsage( tarch::MemoryUsageFormat::MByte );
141 int maxAdditionalSplitsDueToMemory = tarch::getFreeMemory( tarch::MemoryUsageFormat::MByte ) / worstCaseEstimateForSizeOfSpacetree;
142 int estimatedCellsPerTree = maxSizeOfLocalRank / (numberOfSplits+1);
143
144 const int MinTreeSize = _configuration->getMinTreeSize(_state);
145 if ( estimatedCellsPerTree<MinTreeSize ) {
146 const int adoptedSplits = std::max(1, maxSizeOfLocalRank / MinTreeSize - 1 );
147 logInfo(
148 "getNumberOfSplitsOnLocalRank(...)",
149 "coded wanted to split " << numberOfSplits <<
150 " times, but this would yield around " << estimatedCellsPerTree <<
151 " cells per tree, whereas the number of cells per tree should be at least " << MinTreeSize <<
152 ". Split only " << adoptedSplits << " times"
153 );
154 numberOfSplits = adoptedSplits;
155 }
156 if (
157 peano4::parallel::SpacetreeSet::getInstance().getLocalSpacetrees().size()<=1
158 or
159 maxAdditionalSplitsDueToMemory>=numberOfSplits
160 ) {
161 logInfo(
162 "getNumberOfSplitsOnLocalRank(...)",
163 "assume enough memory is available, so split " << numberOfSplits <<
164 " times (current mem footprint=" << worstCaseEstimateForSizeOfSpacetree << " MByte, free memory=" <<
165 tarch::getFreeMemory( tarch::MemoryUsageFormat::MByte ) << " MByte, est. cells per tree=" << estimatedCellsPerTree <<
166 ", max-local-trees-per-rank=" << _configuration->getMaxLocalTreesPerRank(_state) << ", no-of-threads=" <<
168 );
169 }
170 else if ( _configuration->makeSplitDependOnMemory(_state) ) {
171 int adoptedSplitCount = std::max(1,maxAdditionalSplitsDueToMemory);
172 logInfo(
173 "getNumberOfSplitsOnLocalRank(...)",
174 "not sure if additional trees fit on node. Optimal number of splits is " << numberOfSplits <<
175 ". With current mem footprint of " << worstCaseEstimateForSizeOfSpacetree << " MByte and free memory of " <<
176 tarch::getFreeMemory( tarch::MemoryUsageFormat::MByte ) << ", we manually reduce split count to " << adoptedSplitCount
177 );
178 numberOfSplits = adoptedSplitCount;
179 }
180
181 return numberOfSplits;
182}
183
184
186 auto action = getAction();
187
188 #if PeanoDebug>0
189 logInfo( "updateLoadBalancing()", "load balancing's action " << toString(action) << " with internal state " << AbstractLoadBalancing::toString() );
190 #else
191 if ( action!=Action::None ) {
192 logInfo( "updateLoadBalancing()", "load balancing's action " << toString(action) << " with internal state" << AbstractLoadBalancing::toString() );
193 }
194 #endif
195
196 switch ( action ) {
197 case Action::SpreadEquallyOverAllRanks:
198 {
199 int cellsPerRank = std::max(
200 static_cast<int>(std::round(_statistics.getGlobalNumberOfInnerUnrefinedCells() / tarch::mpi::Rank::getInstance().getNumberOfRanks())),
201 1
202 );
203
204 for (int targetRank=1; targetRank<tarch::mpi::Rank::getInstance().getNumberOfRanks(); targetRank++ ) {
205 int thisRanksCells = cellsPerRank;
206 if (static_cast<int>(_statistics.getGlobalNumberOfInnerUnrefinedCells()) % tarch::mpi::Rank::getInstance().getNumberOfRanks() >= targetRank) {
207 thisRanksCells++;
208 }
209 triggerSplit(thisRanksCells, targetRank);
210 }
211 }
212 break;
213 case Action::SpreadEquallyOverAllThreads:
214 {
215 int heaviestSpacetree = getIdOfHeaviestLocalSpacetree();
216 if (heaviestSpacetree!=NoHeaviestTreeAvailable and not _blacklist.isBlacklisted(heaviestSpacetree) ) {
217 int numberOfLocalUnrefinedCellsOfHeaviestSpacetree = getWeightOfHeaviestLocalSpacetree();
218 // This operation takes care of the max tree count and size
219 int numberOfSplits = getNumberOfSplitsOnLocalRank();
220 int cellsPerCore = std::max( {1, numberOfLocalUnrefinedCellsOfHeaviestSpacetree/(numberOfSplits+1),_configuration->getMinTreeSize(_state)} );
221
222 logInfo(
223 "updateLoadBalancing()",
224 "split " << cellsPerCore << " or " << (cellsPerCore+1) << " cells " << numberOfSplits <<
225 " times from tree " << heaviestSpacetree << " on local rank (hosts " << numberOfLocalUnrefinedCellsOfHeaviestSpacetree <<
226 " unrefined cells with " << tarch::multicore::Core::getInstance().getNumberOfThreads() << " threads per rank)" );
227
228 for (int i=0; i<numberOfSplits; i++) {
229 int thisCellsPerCore = cellsPerCore;
230 if (i<numberOfLocalUnrefinedCellsOfHeaviestSpacetree % (numberOfSplits+1)) {
231 thisCellsPerCore++;
232 }
233 triggerSplit(thisCellsPerCore, tarch::mpi::Rank::getInstance().getRank());
234 }
235 }
236 else {
237 logInfo( "updateLoadBalancing()", "local tree is not yet available for further splits (heaviest-spacetree=" << heaviestSpacetree << ")" );
238 }
239 }
240 break;
241 default:
242 break;
243 }
244}
245
246
248 _statistics.updateGlobalView();
249 _costMetrics->updateGlobalView();
250 _blacklist.update();
251
252 if ( _statistics.hasConsistentViewOfWorld() ) {
253 _stepsToWaitForNextLoadBalancingDecision = std::max( _stepsToWaitForNextLoadBalancingDecision-1, 0 );
254 }
255 else {
256 logInfo( "finishStep()", "statistics have no consistent view of world, so postpone load balancing decisions" );
257 _stepsToWaitForNextLoadBalancingDecision = std::max(_stepsToWaitForNextLoadBalancingDecision,1);
258 }
259
260 updateLoadBalancing();
261 updateState();
262
263 _statistics.notifyOfStateChange( _state );
264
266}
267
268
270 assertionEquals( peano4::parallel::SpacetreeSet::getInstance().getLocalSpacetrees().size(), 1 );
271
272 const int sourceTree = *( peano4::parallel::SpacetreeSet::getInstance().getLocalSpacetrees().begin() );
273 bool success = peano4::parallel::SpacetreeSet::getInstance().split(sourceTree,peano4::SplitInstruction{numberOfCells,_configuration->getMode(_state)},targetRank);
274 if (not success) {
275 logInfo( "triggerSplit()", "wanted to split local rank " << sourceTree << " but failed" );
276 }
277
278 _blacklist.triggeredSplit( sourceTree );
279 _statistics.incLocalNumberOfSplits();
280
281 _stepsToWaitForNextLoadBalancingDecision = 3;
282}
283
284
#define assertionEquals(lhs, rhs)
#define assertion(expr)
#define logDebug(methodName, logMacroMessageStream)
Definition Log.h:50
#define logInfo(methodName, logMacroMessageStream)
Wrapper macro around tarch::tarch::logging::Log to improve logging.
Definition Log.h:411
bool split(int treeId, const peano4::SplitInstruction &instruction, int targetRank)
Split a local tree.
static SpacetreeSet & getInstance()
std::set< int > getLocalSpacetrees() const
Log Device.
Definition Log.h:516
int getNumberOfRanks() const
Definition Rank.cpp:552
static Rank & getInstance()
This operation returns the singleton instance.
Definition Rank.cpp:539
static Core & getInstance()
Definition Core.cpp:56
int getNumberOfThreads() const
Returns the number of threads that is used.
Definition Core.cpp:67
virtual std::string toString() const
Generic string serialisation.
Abstract interface to tweak the behaviour of the recursive subdivision.
void notifyOfStateChange(State state)
SpreadOutHierarchically(Configuration *configuration=new DefaultConfiguration(), CostMetrics *costMetrics=new toolbox::loadbalancing::metrics::CellCount())
std::string toString(Filter filter)
Definition convert.cpp:170
int getMemoryUsage(MemoryUsageFormat format)
Method for getting the application's memory footprint.
Definition tarch.cpp:95
int getFreeMemory(MemoryUsageFormat format)
Definition tarch.cpp:83
int getWeightOfHeaviestLocalSpacetree()
This is a helper routine which is used by ExaHyPE's default main for for example.
void dumpStatistics()
Dump the stats of the lb to the terminal (info device).
@ Stagnation
You usually don't get this state when we query the configuration, i.e.
@ InterRankDistribution
Code has not yet spread out over all ranks but would like to do so now.
@ IntraRankDistribution
Code has spread over all ranks, but it has not spread over all cores yet, i.e.
Instruction to split.
Definition grid.h:34