Peano 4
Loading...
Searching...
No Matches
SpreadOutHierarchically.cpp
Go to the documentation of this file.
2
4
5
8
9
10tarch::logging::Log toolbox::loadbalancing::strategies::SpreadOutHierarchically::_log( "toolbox::loadbalancing::strategies::SpreadOutHierarchically" );
11
12
14 switch ( action ) {
16 return "unspecified";
17 case Action::None:
18 return "none";
20 return "spread-equally-over-all-ranks";
22 return "spread-equally-over-all-threads";
23 }
24 return "<undef>";
25}
26
27
29 AbstractLoadBalancing( configuration, costMetrics),
30 _stepsToWaitForNextLoadBalancingDecision(0) {
31 assertion( configuration!=nullptr );
34}
35
36
39
40
42 switch (_state) {
44 if (
45 _statistics.getGlobalNumberOfTrees() > 1
46 or
48 ) {
50 logDebug( "updateState()", "switch to state " << ::toString(_state) );
51 }
52 break;
54 if (
55 peano4::parallel::SpacetreeSet::getInstance().getLocalSpacetrees().size() > 1
56 ) {
57 _state = State::Stagnation;
58 logDebug( "updateState()", "switch to state " << ::toString(_state) << " as more than one local subpartition is already hosted, i.e. we assume rank has already spread out locally" );
59 }
60 if (
61 tarch::multicore::Core::getInstance().getNumberOfThreads()<=1
62 ) {
63 _state = State::Stagnation;
64 logDebug( "updateState()", "switch to state " << ::toString(_state) << " as rank is degenerated with only one thread, i.e. rank-local distribution makes no sense" );
65 }
66 break;
67 default:
68 break;
69 }
70}
71
72
74 if (_stepsToWaitForNextLoadBalancingDecision>0) {
75 // @todo Debug
76 logInfo(
77 "getAction()",
78 "shall wait for another " << _stepsToWaitForNextLoadBalancingDecision << " step(s) before we rebalance"
79 );
80 return Action::None;
81 }
82
83 switch (_state) {
85 {
86 const int MinOriginalTreeSizeToTriggerMPISpreadOut = std::max(
87 _configuration->getMinTreeSize(_state) * tarch::mpi::Rank::getInstance().getNumberOfRanks(),
89 );
90
91 if (
92 tarch::mpi::Rank::getInstance().getNumberOfRanks()<=1
93 or
94 not tarch::mpi::Rank::getInstance().isGlobalMaster()
95 or
96 hasSplitRecently()
97 ) {
98 return Action::None;
99 }
100 else if ( _statistics.getLocalNumberOfInnerUnrefinedCells() < MinOriginalTreeSizeToTriggerMPISpreadOut ) {
101 logInfo(
102 "getAction()",
103 "have to postpone any decision, as local no of inner unrefined cells of " << _statistics.getLocalNumberOfInnerUnrefinedCells() << " is smaller than " << MinOriginalTreeSizeToTriggerMPISpreadOut << " (which would occupy all ranks)" );
104 return Action::None;
105 }
106 else {
107 return Action::SpreadEquallyOverAllRanks;
108 }
109 }
110 break;
112 {
113 const int ThreadsToKeepBusy = std::min( tarch::multicore::Core::getInstance().getNumberOfThreads(), _configuration->getMaxLocalTreesPerRank(_state) );
114 const int MinOriginalTreeSizeToTriggerThreadSpreadOut = std::max(
115 _configuration->getMinTreeSize(_state) * ThreadsToKeepBusy,
117 );
118
119 if ( _statistics.getLocalNumberOfInnerUnrefinedCells() < MinOriginalTreeSizeToTriggerThreadSpreadOut ) {
120 logInfo( "getAction()", "have to postpone any decision, as local no of inner unrefined cells of " << _statistics.getLocalNumberOfInnerUnrefinedCells() << " is smaller than " << MinOriginalTreeSizeToTriggerThreadSpreadOut << " (which would occupy all threads)" );
121 return Action::None;
122 }
123 else {
124 return Action::SpreadEquallyOverAllThreads;
125 }
126 }
127 break;
128 default:
129 break;
130 }
131
132 return Action::None;
133}
134
135
137 assertionEquals( peano4::parallel::SpacetreeSet::getInstance().getLocalSpacetrees().size(), 1 );
138
139 int maxSizeOfLocalRank = getWeightOfHeaviestLocalSpacetree();
140 int maxLocalTrees = std::min( {_configuration->getMaxLocalTreesPerRank(_state)-1, maxSizeOfLocalRank-1, tarch::multicore::Core::getInstance().getNumberOfThreads()-1} );
141 int numberOfSplits = std::max(1,maxLocalTrees);
142
143 int worstCaseEstimateForSizeOfSpacetree = tarch::getMemoryUsage( tarch::MemoryUsageFormat::MByte );
144 int maxAdditionalSplitsDueToMemory = tarch::getFreeMemory( tarch::MemoryUsageFormat::MByte ) / worstCaseEstimateForSizeOfSpacetree;
145 int estimatedCellsPerTree = maxSizeOfLocalRank / (numberOfSplits+1);
146
147 const int MinTreeSize = _configuration->getMinTreeSize(_state);
148 if ( estimatedCellsPerTree<MinTreeSize ) {
149 const int adoptedSplits = std::max(1, maxSizeOfLocalRank / MinTreeSize - 1 );
150 logInfo(
151 "getNumberOfSplitsOnLocalRank(...)",
152 "coded wanted to split " << numberOfSplits <<
153 " times, but this would yield around " << estimatedCellsPerTree <<
154 " cells per tree, whereas the number of cells per tree should be at least " << MinTreeSize <<
155 ". Split only " << adoptedSplits << " times"
156 );
157 numberOfSplits = adoptedSplits;
158 }
159 if (
160 peano4::parallel::SpacetreeSet::getInstance().getLocalSpacetrees().size()<=1
161 or
162 maxAdditionalSplitsDueToMemory>=numberOfSplits
163 ) {
164 logInfo(
165 "getNumberOfSplitsOnLocalRank(...)",
166 "assume enough memory is available, so split " << numberOfSplits <<
167 " times (current mem footprint=" << worstCaseEstimateForSizeOfSpacetree << " MByte, free memory=" <<
168 tarch::getFreeMemory( tarch::MemoryUsageFormat::MByte ) << " MByte, est. cells per tree=" << estimatedCellsPerTree <<
169 ", max-local-trees-per-rank=" << _configuration->getMaxLocalTreesPerRank(_state) << ", no-of-threads=" <<
171 );
172 }
173 else if ( _configuration->makeSplitDependOnMemory(_state) ) {
174 int adoptedSplitCount = std::max(1,maxAdditionalSplitsDueToMemory);
175 logInfo(
176 "getNumberOfSplitsOnLocalRank(...)",
177 "not sure if additional trees fit on node. Optimal number of splits is " << numberOfSplits <<
178 ". With current mem footprint of " << worstCaseEstimateForSizeOfSpacetree << " MByte and free memory of " <<
179 tarch::getFreeMemory( tarch::MemoryUsageFormat::MByte ) << ", we manually reduce split count to " << adoptedSplitCount
180 );
181 numberOfSplits = adoptedSplitCount;
182 }
183
184 return numberOfSplits;
185}
186
187
189 auto action = getAction();
190
191 #if PeanoDebug>0
192 logInfo( "updateLoadBalancing()", "load balancing's action " << toString(action) << " with internal state " << AbstractLoadBalancing::toString() );
193 #else
194 if ( action!=Action::None ) {
195 logInfo( "updateLoadBalancing()", "load balancing's action " << toString(action) << " with internal state" << AbstractLoadBalancing::toString() );
196 }
197 #endif
198
199 switch ( action ) {
200 case Action::SpreadEquallyOverAllRanks:
201 {
202 int cellsPerRank = std::max(
203 static_cast<int>(std::round(_statistics.getGlobalNumberOfInnerUnrefinedCells() / tarch::mpi::Rank::getInstance().getNumberOfRanks())),
204 1
205 );
206
207 for (int targetRank=1; targetRank<tarch::mpi::Rank::getInstance().getNumberOfRanks(); targetRank++ ) {
208 int thisRanksCells = cellsPerRank;
209 if (static_cast<int>(_statistics.getGlobalNumberOfInnerUnrefinedCells()) % tarch::mpi::Rank::getInstance().getNumberOfRanks() >= targetRank) {
210 thisRanksCells++;
211 }
212 triggerSplit(thisRanksCells, targetRank);
213 }
214 }
215 break;
216 case Action::SpreadEquallyOverAllThreads:
217 {
218 int heaviestSpacetree = getIdOfHeaviestLocalSpacetree();
219 if (heaviestSpacetree!=NoHeaviestTreeAvailable and not _blacklist.isBlacklisted(heaviestSpacetree) ) {
220 int numberOfLocalUnrefinedCellsOfHeaviestSpacetree = getWeightOfHeaviestLocalSpacetree();
221 // This operation takes care of the max tree count and size
222 int numberOfSplits = getNumberOfSplitsOnLocalRank();
223 int cellsPerCore = std::max( {1, numberOfLocalUnrefinedCellsOfHeaviestSpacetree/(numberOfSplits+1),_configuration->getMinTreeSize(_state)} );
224
225 logInfo(
226 "updateLoadBalancing()",
227 "split " << cellsPerCore << " or " << (cellsPerCore+1) << " cells " << numberOfSplits <<
228 " times from tree " << heaviestSpacetree << " on local rank (hosts " << numberOfLocalUnrefinedCellsOfHeaviestSpacetree <<
229 " unrefined cells with " << tarch::multicore::Core::getInstance().getNumberOfThreads() << " threads per rank)" );
230
231 for (int i=0; i<numberOfSplits; i++) {
232 int thisCellsPerCore = cellsPerCore;
233 if (i<numberOfLocalUnrefinedCellsOfHeaviestSpacetree % (numberOfSplits+1)) {
234 thisCellsPerCore++;
235 }
236 triggerSplit(thisCellsPerCore, tarch::mpi::Rank::getInstance().getRank());
237 }
238 }
239 else {
240 logInfo( "updateLoadBalancing()", "local tree is not yet available for further splits (heaviest-spacetree=" << heaviestSpacetree << ")" );
241 }
242 }
243 break;
244 default:
245 break;
246 }
247}
248
249
251 _statistics.updateGlobalView();
252 _costMetrics->updateGlobalView();
253 _blacklist.update();
254
255 if ( _statistics.hasConsistentViewOfWorld() ) {
256 _stepsToWaitForNextLoadBalancingDecision = std::max( _stepsToWaitForNextLoadBalancingDecision-1, 0 );
257 }
258 else {
259 logInfo( "finishStep()", "statistics have no consistent view of world, so postpone load balancing decisions" );
260 _stepsToWaitForNextLoadBalancingDecision = std::max(_stepsToWaitForNextLoadBalancingDecision,1);
261 }
262
263 updateLoadBalancing();
264 updateState();
265
266 _statistics.notifyOfStateChange( _state );
267
269}
270
271
273 assertionEquals( peano4::parallel::SpacetreeSet::getInstance().getLocalSpacetrees().size(), 1 );
274
275 const int sourceTree = *( peano4::parallel::SpacetreeSet::getInstance().getLocalSpacetrees().begin() );
277 if (not success) {
278 logInfo( "triggerSplit()", "wanted to split local rank " << sourceTree << " but failed" );
279 }
280
281 _blacklist.triggeredSplit( sourceTree );
282 _statistics.incLocalNumberOfSplits();
283
284 _stepsToWaitForNextLoadBalancingDecision = 3;
285}
286
287
#define assertionEquals(lhs, rhs)
#define assertion(expr)
#define logDebug(methodName, logMacroMessageStream)
Definition Log.h:50
#define logInfo(methodName, logMacroMessageStream)
Wrapper macro around tarch::tarch::logging::Log to improve logging.
Definition Log.h:411
bool split(int treeId, const peano4::SplitInstruction &instruction, int targetRank)
Split a local tree.
static SpacetreeSet & getInstance()
std::set< int > getLocalSpacetrees() const
Log Device.
Definition Log.h:516
int getNumberOfRanks() const
Definition Rank.cpp:551
static Rank & getInstance()
This operation returns the singleton instance.
Definition Rank.cpp:538
static Core & getInstance()
Definition Core.cpp:55
int getNumberOfThreads() const
Returns the number of threads that is used.
Definition Core.cpp:66
virtual std::string toString() const
Generic string serialisation.
Abstract interface to tweak the behaviour of the recursive subdivision.
void notifyOfStateChange(State state)
SpreadOutHierarchically(Configuration *configuration=new DefaultConfiguration(), CostMetrics *costMetrics=new toolbox::loadbalancing::metrics::CellCount())
std::string toString(Filter filter)
Definition convert.cpp:170
int getMemoryUsage(MemoryUsageFormat format)
Method for getting the application's memory footprint.
Definition tarch.cpp:95
int getFreeMemory(MemoryUsageFormat format)
Definition tarch.cpp:83
int getWeightOfHeaviestLocalSpacetree()
This is a helper routine which is used by ExaHyPE's default main for for example.
void dumpStatistics()
Dump the stats of the lb to the terminal (info device).
@ Stagnation
You usually don't get this state when we query the configuration, i.e.
@ InterRankDistribution
Code has not yet spread out over all ranks but would like to do so now.
@ IntraRankDistribution
Code has spread over all ranks, but it has not spread over all cores yet, i.e.
Instruction to split.
Definition grid.h:34