Peano 4
Loading...
Searching...
No Matches
DeviceInfo.cpp
Go to the documentation of this file.
1#include "DeviceInfo.h"
2
3#include <algorithm>
4#include <iostream>
5#include <sstream>
6#include <stdexcept>
7
8#if defined(_NVHPC_CUDA) or defined(GPUOffloadingCUDA)
9#include "cuda/DeviceInfo.h"
10#endif
11
12#if defined(GPUOffloadingSYCL)
13#include "sycl/DeviceInfo.h"
14#endif
15
16#if defined(GPUOffloadingOMP)
17#include "omp/DeviceInfo.h"
18#endif
19
20#if defined(GPUOffloadingCPP)
21#include "cpp/DeviceInfo.h"
22#endif
23
24#include <cassert>
25// #else if defined(WITH_SYCL_API) // Possible TODO to differentiate SYCL_API and SYCL
26
27namespace tarch::accelerator {
28 std::vector<std::vector<std::string>> queryDeviceInformation() {
29 // If we have CPP we cant have any information and in the benchmarks we already have distribution
30 // Therefore we do not call the CUDA API, otherwise we always try to prefer the CUDA API as it gives us
31 // the maximum amount of information
32 std::vector<std::vector<std::string>> devInfo;
33#if defined(GPUOffloadingCPP)
36#elif defined(_NVHPC_CUDA) or defined(GPUOffloadingCUDA)
39#elif defined(GPUOffloadingSYCL)
42#elif defined(GPUOffloadingOMP)
45#endif
46 return devInfo;
47 }
48
50 // see queryDeviceInformation
51 auto propertymatrix = queryDeviceInformation();
52#if defined(GPUOffloadingCPP)
53 // Can't get any information with cpp GPU offloading rn
55#elif defined(_NVHPC_CUDA) or defined(GPUOffloadingCUDA)
57#elif defined(GPUOffloadingSYCL)
59#elif defined(GPUOffloadingOMP)
60 // An empty table, with OpenMP we will only know how many GPUs we have
62#else
63 // Print noinformation since we do not have an accelerator
64 // The following logInfo not really needed
65 // logInfo("printDeviceInformation()", "No GPU Accelerator Backend is enabled. The code will run on CPUs.");
66#endif
67 }
68
69 size_t getPropertyOffset(const char name[], const std::vector<std::string>& identifiers) {
70 auto it = std::find(identifiers.begin(), identifiers.end(), std::string(name));
71 if (it == identifiers.end()) {
72 throw std::runtime_error("Property input name not a part of the identifiers list!");
73 }
74 return std::distance(identifiers.begin(), it);
75 }
76
77 std::string parseMatrix(
78 const std::vector<std::vector<std::string>>& propertyMatrix,
79 int numberOfDevices,
80 const std::vector<std::string>& identifiers
81 ) {
82 // Although the code looks scary:
83 // Transpose the matrix on the fly,
84 // Pad columns so that every entry of the column has the same length
85 // Pad = empty string before
86 // First row is for identifiers
87 // First column for accelerator ids
88 // Separators between the first columns and the rest
89
90 unsigned int propertyCount = identifiers.size();
91
92 std::stringstream ss;
93
94 // This size is the padding we need to add the porpety-name column
95 unsigned int maxPropertyLengthWithPadding
96 = std::max_element(
97 identifiers.begin(),
98 identifiers.end(),
99 [](const std::string& lhs, const std::string& rhs) { return lhs.size() < rhs.size(); }
100 )->size()
101 + 1;
102
103 // Get the string entry of a columng with the highest length
104 std::vector<unsigned int> columnMaxStringSizes(identifiers.size(), 0);
105 for (unsigned int row = 0; row < identifiers.size(); row++) {
106 std::vector<unsigned int> col_sizes;
107 for (unsigned int col = 0; col < numberOfDevices; col++) {
108 col_sizes.push_back(propertyMatrix[row][col].size());
109 }
110
111 // Device property is also a part of the column
112 col_sizes.push_back(identifiers[row].size());
113
114 unsigned int maxLength = *std::max_element(col_sizes.begin(), col_sizes.end());
115 columnMaxStringSizes[row] = maxLength;
116 }
117
118 unsigned int maxIdLength = std::max(std::to_string(numberOfDevices).size(), std::string("DeviceId").size());
119
120 // Iterate the matrix and write per row, also while accessing the padding we need to add
121 std::string propIdPad(maxIdLength + 1 - std::string("DeviceId").size(), ' ');
122 unsigned int lineLength = 0;
123
124 ss << propIdPad << "DeviceId |";
125 lineLength += propIdPad.size() + 10;
126 for (unsigned int x = 0; x < identifiers.size(); x++) {
127 std::string propPadding(columnMaxStringSizes[x] + 1 - identifiers[x].size(), ' ');
128 ss << propPadding << identifiers[x] << " |";
129 lineLength += propPadding.size() + identifiers[x].size() + 2;
130 }
131 ss.seekp(-1, ss.cur);
132 lineLength -= 1;
133 ss << "\n";
134
135 std::string dash(lineLength, '-');
136 ss << dash << "\n";
137
138 for (unsigned int x = 0; x < numberOfDevices; x++) {
139#if defined(GPUOffloadingCPP)
140 std::string idPad(maxIdLength, ' ');
141 ss << idPad << "X"
142 << " |";
143#else
144 std::string idPad(maxIdLength + 1 - std::to_string(x).size(), ' ');
145 ss << idPad << x << " |";
146#endif
147 for (unsigned int y = 0; y < identifiers.size(); y++) //(const auto& row : propertyMatrix)
148 {
149 unsigned int paddingLength = columnMaxStringSizes[y] + 1 - propertyMatrix[y][x].size();
150 std::string s(paddingLength, ' ');
151 // ss << s << el << " ";
152 ss << s << propertyMatrix[y][x] << " |";
153 }
154 ss.seekp(-1, ss.cur);
155 ss << "\n";
156 }
157 return ss.str();
158 }
159
161 // Try to run the kernels in the configured backend
162 std::vector<bool> testResults;
163#if defined(GPUOffloadingCUDA)
165#elif defined(GPUOffloadingSYCL)
167#elif defined(GPUOffloadingOMP)
169#elif defined(GPUOffloadingCPP)
171 if (testResults.size() == 0) {
172 return;
173 } else if (testResults.size() == 1 && !testResults[0]) {
174 throw std::runtime_error(
175 "None of the found devices could successfully launch GPU kernels. Check for wrong build configuration."
176 );
177 }
178#endif
179
180#if defined(GPUOffloadingCUDA) or defined(GPUOffloadingSYCL) or defined(GPUOffloadingOMP) or defined(GPUOffloadingCPP)
181 unsigned int offloadCapableGPUCount = 0;
182 std::string failMessage;
183
184 // If one of the GPUs fail to launch but we have multiple, then we inform which ones
185 // failed by adding them to the message in the first loop.
186 // If all of the available GPUs fail, we throw a runtime error that everything failed
187 for (unsigned int i = 0; i < testResults.size(); i++) {
188 if (!testResults[i]) {
189 // Using std::format would be nice but 10.06.2023 CUDA does support gcc upto 12, no std::format supprot
190 failMessage += "Device " + std::to_string(i) + " could not successfully launch a GPU kernel\n";
191 } else {
192 offloadCapableGPUCount += 1;
193 }
194 }
195
196 if (testResults.size() != 0) {
197 if (offloadCapableGPUCount < testResults.size()) {
198 if (offloadCapableGPUCount == 0) {
199 throw std::runtime_error("None of the found devices could successfully launch GPU kernels");
200 } else {
201 throw std::runtime_error(failMessage);
202 }
203 } else {
204 logInfo("offloadCapabiltiyTest()", "Kernel launch test successful");
205 }
206 } else {
207 throw std::runtime_error(
208 "Peano was configured to run on accelerators, but no offload capable devices were found."
209 );
210 }
211#else
212 // Print noinformation since we do not have an accelerator
213 logInfo("offloadCapabiltiyTest()", "Peano was not configured to run on accelerators. The code will run on CPUs.");
214#endif
215 }
216
217 void runBenchmarks(std::vector<std::vector<std::string>>& propertyMatrix) {
218 std::vector<BenchmarkResultInNanoSeconds> results;
219#if defined(GPUOffloadingCUDA)
221 parseBenchmarkResults(results, propertyMatrix);
222#elif defined(GPUOffloadingSYCL)
224 parseBenchmarkResults(results, propertyMatrix);
225#elif defined(GPUOffloadingOMP)
227 parseBenchmarkResults(results, propertyMatrix);
228#elif defined(GPUOffloadingCPP)
230 parseBenchmarkResults(results, propertyMatrix);
231#endif
232 }
233
235 std::vector<BenchmarkResultInNanoSeconds>& benchmarkResults,
236 std::vector<std::vector<std::string>>& benchmarkResultsStringRepresentations
237 ) {
238
239 // A * 1e-9 / (B * 1e-9) = A / B but, MB -> B 1024*1024 and for time it is 1e9.
240 for (uint i = 0; i < benchmarkResults.size(); i++) {
241 // Parse the results, bring nanoseconds to something humans would like more and add the units
242 std::stringstream ss;
243 auto& benchmarkResult = benchmarkResults[i];
244 constexpr double byteToGB = (1.0/(1000.0*1000.0*1000.0));
245 ss << (static_cast<double>(benchmarkResult.allocatedDataSize) * static_cast<double>(benchmarkResult.repeats) * byteToGB)
246 * (1.0 / (static_cast<double>(benchmarkResult.GPUtoGPUCopy) * 1e-9))
247 << " Gb/s";
248 benchmarkResultsStringRepresentations[tarch::accelerator::getPropertyOffset("Bandwidth")][i] = ss.str();
249 ss.str(std::string());
250 }
251 }
252} // namespace tarch::accelerator
And from this we can write down f$ nabla phi_i nabla phi_i dx but since we are constructing matrix let s investigate the f$ our matrix elements will nabla phi_i dx f By this will be a sparse as these basis functions are chosen to not overlap with each other almost everywhere In other they have only local support We can read off the right hand side by taking our known right hand side f$ f f$ and integrating against an appropriate test phi_i dx f Please excuse the slight abuse of notation here There should probably be a clearer indication that we move from a continuous f$ f f$ to some discrete f$ f_i f$ We can demonstrate simply It s worth as when we discuss the discontinuous version of this it will no longer disappear We take our left hand side and discretise it
#define logInfo(methodName, logMacroMessageStream)
Wrapper macro around tarch::tarch::logging::Log to improve logging.
Definition Log.h:411
examples::exahype2::elastic::VariableShortcuts s
Definition loh.cpp:10
void printDeviceInformation(std::vector< std::vector< std::string > > &propertyMatrix)
Print device information based on the provided property matrix.
std::vector< tarch::accelerator::BenchmarkResultInNanoSeconds > runBenchmarks()
Run benchmarks with the C++ backend.
void queryDeviceInformation(std::vector< std::vector< std::string > > &propertyMatrix)
Queries device information for C++ GPU accelerator backend.
std::vector< bool > testKernelLaunch()
Test the kernel launch capability of the system.
std::vector< bool > testKernelLaunch()
Test kernel launch capability.
std::vector< tarch::accelerator::BenchmarkResultInNanoSeconds > runBenchmarks()
Run benchmarks with CUDA backend by launch kernels to perform the streaming benchmarks.
void queryDeviceInformation(std::vector< std::vector< std::string > > &propertyMatrix)
Query device information using CUDA API.
void printDeviceInformation(std::vector< std::vector< std::string > > &propertyMatrix)
Print device information when there is access to the CUDA API.
void queryDeviceInformation(std::vector< std::vector< std::string > > &propertyMatrix)
Queries device information for OpenMP GPU accelerator backend.
std::vector< bool > testKernelLaunch()
Prints device information for OpenMP GPU accelerator backend.
std::vector< tarch::accelerator::BenchmarkResultInNanoSeconds > runBenchmarks()
Runs benchmarks for the OpenMP GPU accelerator backend.
void printDeviceInformation(std::vector< std::vector< std::string > > &propertyMatrix)
Prints device information for OpenMP GPU accelerator backend.
std::vector< bool > testKernelLaunch()
Test the kernel launch with SYCL backend.
std::vector< tarch::accelerator::BenchmarkResultInNanoSeconds > runBenchmarks()
Return empty benchmark results as SYCL benchmarks are being implemented.
void queryDeviceInformation(std::vector< std::vector< std::string > > &propertyMatrix)
Queries device information for Sycl GPU accelerator backend.
void printDeviceInformation(std::vector< std::vector< std::string > > &propertyMatrix)
Print device information.
void printDeviceInformation()
Prints device information based on the available accelerator backend.
std::vector< std::vector< std::string > > queryDeviceInformation()
Queries device information and runs benchmarks on the available accelerators.
std::string parseMatrix(const std::vector< std::vector< std::string > > &propertyMatrix, int numberOfDevices, const std::vector< std::string > &identifiers)
Parses a matrix of properties into a formatted string representation.
void parseBenchmarkResults(std::vector< BenchmarkResultInNanoSeconds > &benchmarkResults, std::vector< std::vector< std::string > > &benchmarkResultsStringRepresentations)
Parses benchmark results and updates the string representations in the property matrix.
void offloadCapabilityTest()
Performs offload capability tests for the available GPU accelerator backend.
size_t getPropertyOffset(const char name[], const std::vector< std::string > &identifiers)
Retrieves the offset of a property in a vector of identifiers.
void runBenchmarks(std::vector< std::vector< std::string > > &propertyMatrix)
Runs benchmarks for the available GPU accelerator backend and populates the property matrix.