Peano
Loading...
Searching...
No Matches
PerformanceData.py
Go to the documentation of this file.
1# This file is part of the ExaHyPE2 project. For conditions of distribution and
2# use, please see the copyright notice at www.peano-framework.org
3from fileinput import filename
4
5
6import re
7import traceback
8import glob
9
10
11class PerformanceData(object):
12 def __init__(self,file_name,solver_name="",verbose=False):
13 """
14 :: Attributes
15
16 self._number_of_time_steps: Int
17 Includes the empty time steps (warm-up phase) if you have
18 adaptive time stepping.
19
20 :: Arguments
21
22 file_name: String
23 This is a mandatory field, as it tells us which file to parse.
24
25 solver_name: String
26 In theory, you could live without this one if you have only one
27 solver. However, ExaHyPE2 supports multiple solvers within one
28 grid and therefore needs a dedicated solver name to know where
29 to look in the file. Leave it empty if there's only one solver.
30
31 """
32 self._file_name = file_name
33 self._solver_name = solver_name
34
35 self._threads = 1
36 self._ranks = 1
38
42
46
48
54 self._updates = []
55
57
58 """
59 The code tries to parse the machine format first. This format however is only
60 used by the command line logger. If you use the other loggers, they tend to
61 dump their data in human readable format; which is less accurate. I thought
62 about giving the user the option which format to use or to require her to use
63 the command line logger. But that's all too complicated: Why not try to parse
64 the most accurate format first. If that fails, we can still use the human
65 readable one. See parse_machine_time_format.
66 """
68
69 self.valid = False
70 self.parse(verbose)
71 pass
72
73
75 """
76
77 Hand in a string in nanoseconds and return double as s
78
79 """
80 result = float(data)
81 return result / 1000 / 1000 / 1000
82
83
85 """
86
87 Hand in a string in the format hh:mm:ss
88
89 """
90 match = re.findall( r"\d\d\:\d\d\:\d\d", data) # found, match.group() == "123"
91
92 hours = match[0].split(":")[0]
93 minutes = match[0].split(":")[1]
94 seconds = match[0].split(":")[2]
95 return float(hours)*60*60 + float(minutes)*60 + float(seconds)
96
97
98 def __str__(self, *args, **kwargs):
99 return "(ranks=" + str(self._ranks) + ",threads=" + str(self._threads) + \
100 ",#time-steps=" + str(self._number_of_time_steps) + \
101 ",grid-construction=" + str(self.total_construction_time) + "s/#" + str(self.total_construction_steps) + \
102 ",time-stepping=" + str(self.total_time_stepping_time) + "s/#" + str(self.total_time_stepping_steps) + \
103 ",plotting=" + str(self.total_plotting_time) + "s/#" + str(self.total_plotting_steps) + \
104 ",valid=" + str(self.valid) + \
105 ")"
106
107
109 result = 0
111 try:
112 result = self.__convert_machine_readable_timestamp_to_seconds(line.strip().split( " " )[0])
113 except:
114 print( "Warning: Have not found machine-readable log format. Use command line logger for this one. Will continue with human readable format.")
115 self._parse_machine_time_format = False
116
117 if not self._parse_machine_time_format:
118 result = self.__convert_human_readable_timestamp_to_seconds(line.strip().split( " " )[0])
119
120 return result
121
122
123 def parse(self,verbose):
124 file = open(self._file_name, "r", encoding="unicode_escape")
125 self.valid = True
126
127 print( "parse " + self._file_name )
128
129 try:
130 for line in file:
131 if "manually reset number of used cores to" in line:
132 new_thread_count = int( line.split( "manually reset number of used cores to" )[1] )
133 if new_thread_count!=self._threads:
134 print( "WARNING: Number of manually set cores ({}) does not match system settings of {}. Use manual value".format(new_thread_count,self._threads) )
135 self._threads = new_thread_count
136 if "build:" in line:
137 if "2d" in line:
138 self._d = 2
139 if "3d" in line:
140 self._d = 3
141
142 if "no threading" in line:
143 self._threads = 1
144 self._cores_per_node = 1
145 elif "tbb" in line or "omp" in line:
146 threads_parsed=line.split( "threads" )[0].split( "(")[-1]
147 if not threads_parsed.replace(" ","").isdigit():
148 print('Warning: failed to find a number of threads in:', threads_parsed)
149 print('Assuming threads=1')
150 self._threads = 1
151 else:
152 self._threads = int( threads_parsed )
153 self._cores_per_node = self._threads
154 if verbose:
155 print( "file has been written with {} cores/threads per node".format(self._threads) )
156
157
158 if "no mpi" in line:
159 self._ranks = 1
160 elif "mpi" in line:
161 self._ranks = int( line.split( "ranks)" )[0].split( "(")[-1] )
162 if verbose:
163 print( "file has been written with {} ranks".format(self._ranks) )
164
165 predicate = "rank:0" in line and "Abstract" + self._solver_name in line
166
167 if predicate and "Solver " + self._solver_name in line:
168 time_stamp = self.__extract_time_stamp_from_run_call(line)
169 if verbose:
170 print( "started new time step at " + str(time_stamp) + "s" )
171 self._number_of_time_steps += 1
172 self._time_step_time_stamp.append( time_stamp )
173 self._time_step_size_min.append( -1 )
174 self._time_step_size_max.append( -1 )
175 self._simulated_time_stamp_min.append( -1 )
176 self._simulated_time_stamp_max.append( -1 )
177 self._updates.append( -1 )
178
179 #
180 # Be careful with the order: the first one is more specific
181 #
182 if predicate and re.search( r"#updates\s*=", line)!=None and self._updates[-1]<0:
183 self._updates[-1] = float( line.split("=")[-1].split( "(")[0] )
184 if predicate and re.search( r"dt_{min,this-step}\s*=", line)!=None and self._time_step_size_min[-1]<0:
185 self._time_step_size_min[-1] = float( line.split("=")[-1] )
186 elif predicate and re.search( r"dt_{max,this-step}\s*=", line)!=None and self._time_step_size_max[-1]<0:
187 self._time_step_size_max[-1] = float( line.split("=")[-1] )
188 elif predicate and re.search( r"dt\s*=", line)!=None and self._time_step_size_max[-1]<0:
189 if "not yet known" in line:
190 self._time_step_size_max[-1] = 0.0
191 self._time_step_size_min[-1] = 0.0
192 else:
193 self._time_step_size_max[-1] = float( line.split("=")[-1] )
194 self._time_step_size_min[-1] = float( line.split("=")[-1] )
195 elif predicate and re.search( r"t_{max,global}\s*=", line)!=None and self._simulated_time_stamp_max[-1]<0:
196 self._simulated_time_stamp_max[-1] = float( line.split("=")[-1] )
197 elif predicate and re.search( r"t_{min,global}\s*=", line)!=None and self._simulated_time_stamp_min[-1]<0:
198 self._simulated_time_stamp_min[-1] = float( line.split("=")[-1] )
199 elif predicate and re.search( r"t\s*=", line)!=None and self._simulated_time_stamp_max[-1]<0:
200 self._simulated_time_stamp_max[-1] = float( line.split("=")[-1] )
201 self._simulated_time_stamp_min[-1] = float( line.split("=")[-1] )
202
203 if "step()" in line and "PlotSolution" in line and "rank:0" in line:
204 time_stamp = self.__extract_time_stamp_from_run_call(line)
205 if verbose:
206 print( "triggered plot at " + str(time_stamp) + "s" )
207 self.plotting_time_stamp.append( time_stamp )
208
209 if "terminated successfully" in line and "rank:0" in line:
210 time_stamp = self.__extract_time_stamp_from_run_call(line)
211 if verbose:
212 print( "terminated simulation at " + str(time_stamp) + "s" )
213 self._time_step_time_stamp.append( time_stamp )
214
215 if "initial grid construction:" in line:
216 self.total_construction_time = float( line.split("grid construction:")[1].split( "s" )[0] )
217 match = re.findall( r"measurements=\d+", line)
218 self.total_construction_steps = int( match[0].split( "=" )[1] )
219 print( "grid construction lasts " + str(self.total_construction_time) + " over " + str(self.total_construction_steps) + " steps")
220
221
222 if "time stepping:" in line and not "#measurements=0" in line:
223 self.total_time_stepping_time = float( line.split("time stepping:")[1].split( "s" )[0] )
224 match = re.findall( r"measurements=\d+", line)
225 if match:
226 self.total_time_stepping_steps = int( match[0].split( "=" )[1] )
227 print( "time stepping lasts " + str(self.total_time_stepping_time) + " over " + str(self.total_time_stepping_steps) + " steps" )
228 print( "assume time per time step of " + str(self.time_per_time_step()) )
229
230 if "plotting:" in line and not "#measurements=0" in line:
231 self.total_plotting_time = float( line.split("plotting:")[1].split( "s" )[0] )
232 match = re.findall( r"measurements=\d+", line)
233 self.total_plotting_steps = int( match[0].split( "=" )[1] )
234 print( "plotting lasts " + str(self.total_plotting_time) + " over " + str(self.total_plotting_steps) + " steps" )
235
236
237 except Exception as ex:
238 print( "parsing failed: " + str(ex))
239 print(traceback.format_exc())
240 self.valid = False
241
242 if self._number_of_time_steps<=0:
243 print( "** Warning: number of time steps could not be found **" )
245 print( "Setting number of time steps = "
246 +str(self.total_time_stepping_steps) + " based on the time stepping information")
247 else:
248 print( "file " + self._file_name + " is invalid as number of time steps equals zero" )
249 self.valid = False
250
251
252
254 """
255
256 Returns a whole array of times per time step, so you can plot the evoluation
257 of the cost per step over time.
258
259 """
260 result = []
261 for i in range(1,len(self._time_step_time_stamp)):
262 result.append( self._time_step_time_stamp[i]-self._time_step_time_stamp[i-1] )
263 return result
264
265
267 """
268
269 Returns a whole array of times per time step, so you can plot the evoluation
270 of the cost per step over time.
271
272 """
273 data = self.get_time_per_time_step()
274 if len(data)==0:
275 return 0.0
276 else:
277 result = 0;
278 for i in data:
279 result += i
280 return result/len(data)
281
282
283 def get_updates(self):
284 return [x for x in self._updates]
285
286
288 """
289
290 Returns a series of real time stamps (in seconds) snapshotted
291 per time step.
292
293
294 This is not a mere copy, as the last entry in the local set is the end
295 of the simulation. So we remove this one. At the same time, the very
296 first entry is the start of the simulation or first time step where
297 nothing happens yet (if we have to analyse the eigenvalue first).
298
299 """
300 shifted_data = [x-self._time_step_time_stamp[0] for x in self._time_step_time_stamp]
301 #shifted_data.pop()
302 #return shifted_data[1:]
303 return shifted_data
304
305
307 """
308
309 Returns a sequence of time stamps that are the simulated time
310 stamps (where has the simulation been at a certain point) per
311 step.
312
313 This is not a mere copy, as the last entry in the local set is the end
314 of the simulation
315
316 """
317 if self.uses_local_timestepping():
318 result = [x for x in self._simulated_time_stamp_min], [x for x in self._simulated_time_stamp_max]
319 if result[0][0]<0.0:
320 result = result[0][1:], result[1][1:]
321 return result
322 else:
323 return [x for x in self._simulated_time_stamp_min]
324
325
327 if self.uses_local_timestepping():
328 result = [x for x in self._time_step_size_min], [x for x in self._time_step_size_max]
329 if result[0][0]<=0.0:
330 result = result[0][1:], result[1][1:]
331 return result
332 else:
333 return [x for x in self._time_step_size_min]
334
335
337 result = False
338 if len(self._time_step_size_min)!=len(self._time_step_size_max):
339 raise Exception( "incompatible time step sizes" )
340 for lr in zip(self._time_step_size_min,self._time_step_size_max):
341 result |= (lr[0]!=lr[1])
342 return result
343
344
345 def timesteps(self):
346 """
347
348 Should maybe eliminate the time steps that are not really steps
349
350 """
351 if len(self._time_step_size_min)!=len(self._time_step_size_max):
352 raise Exception( "incompatible time step sizes" )
353 return len(self._time_step_size_min)
354
355
357 """
358
359 Time of last time step normalised (multiplied) with h^d
360
361 """
363 return raw_data
364
365
366 def remove_first_n_entries(self,count):
367 """
368
369 Remove the first count entries from the dataset. Usually, count is one and
370 anticipates that the solver requires one ``warm up'' sweep to determine h
371 and the eigenvalue, e.g.
372
373 """
374 #self._start_time_step_time_stamp_max = self._start_time_step_time_stamp_max[count:]
375 #self._start_time_step_time_stamp_min = self._start_time_step_time_stamp_min[count:]
378 self._time_step_size_max = self._time_step_size_max[count:]
379 self._time_step_size_min = self._time_step_size_min[count:]
381
382
383
384
385def extract_grid_construction_times(performance_data_points):
386 """
387
388 Returns a tuple of arrays to be plotted
389
390 """
391 x_data = []
392 y_data = []
393
394 for point in performance_data_points:
395 x_value = point._threads + (point._ranks-1) * point._cores_per_node
396 insert_at_position = 0
397 while insert_at_position<len(x_data) and x_data[insert_at_position]<x_value:
398 insert_at_position += 1
399 x_data.insert( insert_at_position, x_value )
400 y_data.insert( insert_at_position, point._start_time_stepping )
401
402 return (x_data,y_data)
403
404
405def extract_total_time_stepping_times(performance_data_points, max_cores_per_rank=0, verbose=False):
406 x_data = []
407 y_data = []
408
409 for point in performance_data_points:
410 if verbose:
411 print( "study " + str(point) + " with " + str(point.total_time_stepping_steps) + " time step(s)" )
412 if point.total_time_stepping_steps>0:
413 x_value = 0.0
414 if max_cores_per_rank>0:
415 x_value = point._ranks + 0.5*point._threads/max_cores_per_rank
416 if max_cores_per_rank==0:
417 x_value = point._ranks
418 if max_cores_per_rank<0:
419 x_value = point._threads
420 if verbose:
421 print( "experiment results from " + str(x_value) + " cores/ranks" )
422 insert_at_position = 0
423 while insert_at_position<len(x_data) and x_data[insert_at_position]<x_value:
424 insert_at_position += 1
425 x_data.insert( insert_at_position, x_value )
426 raw_data = point.total_time_stepping_time
427 y_data.insert( insert_at_position, raw_data )
428
429 return (x_data,y_data)
430
431
432def extract_times_per_step(performance_data_points, max_cores_per_rank=0, verbose=False):
433 """
434
435 Returns a tuple of arrays to be plotted
436
437 This is a rather simple routine. Its big USP is that it accepts an unsorted list of
438 measurements and returns properly sorted measurements.
439
440 performance_data_points: [exahype2.postprocessing.PerformanceData]
441 Set of measurements.
442
443 max_cores_per_rank: Integer
444 Should be set to -1 if you analyse single node data. Should be set to 0 if you have only
445 one core count measurement per rank count. Should be set to something bigger than 0 if
446 you scale over both the mpi rank count and the core count.
447
448 """
449 x_data = []
450 y_data = []
451
452 for point in performance_data_points:
453 if verbose:
454 print( "study " + str(point) + " with " + str(point.total_time_stepping_steps) + " time step(s)" )
455 if point.total_time_stepping_steps>0:
456 x_value = 0.0
457 if max_cores_per_rank>0:
458 x_value = point._ranks + 0.5*point._threads/max_cores_per_rank
459 if max_cores_per_rank==0:
460 x_value = point._ranks
461 if max_cores_per_rank<0:
462 x_value = point._threads
463 if verbose:
464 print( "experiment results from " + str(x_value) + " cores/ranks" )
465 insert_at_position = 0
466 while insert_at_position<len(x_data) and x_data[insert_at_position]<x_value:
467 insert_at_position += 1
468 x_data.insert( insert_at_position, x_value )
469 raw_data = point.time_per_time_step()
470 y_data.insert( insert_at_position, raw_data )
471
472 return (x_data,y_data)
473
474
475def load_file_sequence(file_prefix,file_postfix="", solver_name="",verbose=False):
476 """
477
478 Load all files that start with file_prefix and merge them into one
479 instance of Dataset.
480
481 """
482 result = []
483 filtered_files = glob.glob( file_prefix + "*" + file_postfix )
484 for filtered_file in filtered_files:
485 new_dataset = PerformanceData(filtered_file,solver_name,verbose)
486 if new_dataset.valid and result!=None:
487 result.append( new_dataset )
488 else:
489 print( "ERROR: file " + filtered_file + " was not a valid Peano 4 output file" )
490 return result
get_time_step_simulated_time_stamps(self)
Returns a sequence of time stamps that are the simulated time stamps (where has the simulation been a...
get_time_per_time_step(self)
Returns a whole array of times per time step, so you can plot the evoluation of the cost per step ove...
__convert_human_readable_timestamp_to_seconds(self, data)
Hand in a string in the format hh:mm:ss.
__init__(self, file_name, solver_name="", verbose=False)
:: Attributes
get_average_time_per_time_step(self)
Returns a whole array of times per time step, so you can plot the evoluation of the cost per step ove...
time_per_time_step(self)
Time of last time step normalised (multiplied) with h^d.
remove_first_n_entries(self, count)
Remove the first count entries from the dataset.
timesteps(self)
Should maybe eliminate the time steps that are not really steps.
__convert_machine_readable_timestamp_to_seconds(self, data)
Hand in a string in nanoseconds and return double as s.
get_time_step_real_time_stamps(self)
Returns a series of real time stamps (in seconds) snapshotted per time step.
extract_total_time_stepping_times(performance_data_points, max_cores_per_rank=0, verbose=False)
extract_times_per_step(performance_data_points, max_cores_per_rank=0, verbose=False)
Returns a tuple of arrays to be plotted.
extract_grid_construction_times(performance_data_points)
Returns a tuple of arrays to be plotted.
load_file_sequence(file_prefix, file_postfix="", solver_name="", verbose=False)
Load all files that start with file_prefix and merge them into one instance of Dataset.