Chi-Tech
ParallelSTLVector.cc
Go to the documentation of this file.
1#include "ParallelSTLVector.h"
2
5
6#include <petsc.h>
7
8#include "chi_log.h"
10
11#include <sstream>
12#include <stdexcept>
13#include <cmath>
14#include <numeric>
15
16#define scint64_t static_cast<int64_t>
17#define scint static_cast<int>
18#define scshort static_cast<short>
19
20namespace chi_math
21{
22
23ParallelSTLVector::ParallelSTLVector(const uint64_t local_size,
24 const uint64_t global_size,
25 const MPI_Comm communicator)
26 : ParallelVector(local_size, global_size, communicator),
27 extents_(DefineExtents(local_size, process_count_, communicator)),
28 values_(local_size_, 0.0)
29{
30}
31
33 : ParallelVector(other), extents_(other.extents_), values_(other.values_)
34{
35}
36
38 : ParallelVector(std::move(other)),
39 extents_(other.extents_),
40 values_(std::move(other.values_))
41{
42}
43
44std::unique_ptr<ParallelVector> ParallelSTLVector::MakeCopy() const
45{
46 return std::make_unique<ParallelSTLVector>(*this);
47}
48
49std::unique_ptr<ParallelVector> ParallelSTLVector::MakeClone() const
50{
51 auto new_vec = std::make_unique<ParallelSTLVector>(*this);
52
53 new_vec->Set(0.0);
54
55 return new_vec;
56}
57
58double* ParallelSTLVector::Data() { return values_.data(); }
59const double* ParallelSTLVector::Data() const { return values_.data(); }
60
61const std::vector<double>& ParallelSTLVector::LocalSTLData() const
62{
63 return values_;
64}
65
66std::vector<double>& ParallelSTLVector::LocalSTLData() { return values_; }
67
69{
70 return std::vector<double>(values_.begin(),
71 values_.begin() + scint(local_size_));
72}
73
74double ParallelSTLVector::operator[](const int64_t local_id) const
75{
76 ChiInvalidArgumentIf(local_id < 0 or local_id >= values_.size(),
77 "Invalid local index provided. " +
78 std::to_string(local_id) + " vs [0," +
79 std::to_string(values_.size()) + ")");
80
81 return values_[local_id];
82}
83
84double& ParallelSTLVector::operator[](const int64_t local_id)
85{
86 ChiInvalidArgumentIf(local_id < 0 or local_id >= values_.size(),
87 "Invalid local index provided. " +
88 std::to_string(local_id) + " vs [0," +
89 std::to_string(values_.size()) + ")");
90
91 return values_[local_id];
92}
93
94void ParallelSTLVector::Set(const double value)
95{
96 values_.assign(values_.size(), value);
97}
98
99void ParallelSTLVector::Set(const std::vector<double>& local_vector)
100{
101 ChiInvalidArgumentIf(local_vector.size() < local_size_,
102 "Incompatible local vector size.");
103
104 // We cannot assign values = local_vector because this might
105 // destroy the internals of a ghosted vector
106 std::copy(local_vector.begin(),
107 local_vector.begin() + scint64_t(local_size_),
108 values_.begin());
109}
110
111void ParallelSTLVector::BlockSet(const std::vector<double>& y,
112 int64_t local_offset,
113 int64_t num_values)
114{
115 ChiInvalidArgumentIf(y.size() < num_values,
116 "y.size() < num_values " + std::to_string(y.size()) +
117 " < " + std::to_string(num_values));
118
119 const int64_t local_end = local_offset + num_values;
121 local_end > local_size_,
122 "local_offset + num_values=" + std::to_string(local_end) +
123 ", is out of range for destination vector with local size " +
124 std::to_string(local_size_));
125
126 std::copy(y.begin(), y.begin() + num_values, values_.begin() + local_offset);
127}
128
130{
132 "y.LocalSize() != local_size_");
134 "y.GlobalSize() != global_size_");
135
136 // This prevents us from unnecessarily going through the operator[]
137 // of y, which again applies bounds checking
138 const double* y_data = y.Data();
139
140 std::copy(y_data, y_data + local_size_, values_.begin());
141}
142
144{
145 PetscInt n;
146 VecGetLocalSize(y, &n);
147
149 "Attempted update with a vector of insufficient size.");
150
151 const double* x;
152 VecGetArrayRead(y, &x);
153 std::copy(x, x + n, values_.begin());
154 VecRestoreArrayRead(y, &x);
155}
156
158 int64_t y_offset,
159 int64_t local_offset,
160 int64_t num_values)
161{
162 ChiInvalidArgumentIf(y_offset < 0, "y_offset < 0.");
163 ChiInvalidArgumentIf(local_offset < 0, "local_offset < 0.");
164
165 const int64_t y_end = y_offset + num_values;
166 const int64_t local_end = local_offset + num_values;
167
169 "y_offset + num_values=" + std::to_string(y_end) +
170 ", is out of range for vector y with local size " +
171 std::to_string(y.LocalSize()));
172
174 local_end > local_size_,
175 "local_offset + num_values=" + std::to_string(local_end) +
176 ", is out of range for destination vector with local size " +
177 std::to_string(local_size_));
178
179 // This prevents us from unnecessarily going through the operator[]
180 // of y, which again applies bounds checking
181 const double* y_data = y.Data();
182
183 std::copy(y_data + y_offset,
184 y_data + y_offset + num_values,
185 values_.begin() + local_offset);
186}
187
189 int64_t y_offset,
190 int64_t local_offset,
191 int64_t num_values)
192{
193 ChiInvalidArgumentIf(y_offset < 0, "y_offset < 0.");
194 ChiInvalidArgumentIf(local_offset < 0, "local_offset < 0.");
195
196 const int64_t y_end = y_offset + num_values;
197 const int64_t local_end = local_offset + num_values;
198
199 int64_t y_local_size;
200 VecGetLocalSize(y, &y_local_size);
201
202 ChiInvalidArgumentIf(y_end > y_local_size,
203 "y_offset + num_values=" + std::to_string(y_end) +
204 ", is out of range for vector y with local size " +
205 std::to_string(y_local_size));
206
208 local_end > local_size_,
209 "local_offset + num_values=" + std::to_string(local_end) +
210 ", is out of range for destination vector with local size " +
211 std::to_string(local_size_));
212
213 const double* y_data;
214 VecGetArrayRead(y, &y_data);
215
216 std::copy(y_data + y_offset,
217 y_data + y_offset + num_values,
218 values_.begin() + local_offset);
219
220 VecRestoreArrayRead(y, &y_data);
221}
222
223void ParallelSTLVector::SetValue(const int64_t global_id,
224 const double value,
225 const VecOpType op_type)
226{
227 ChiInvalidArgumentIf(global_id < 0 or global_id >= global_size_,
228 "Invalid global index encountered. Global indices "
229 "must be in the range [0, this->GlobalSize()].");
230
231 auto& op_cache = op_type == VecOpType::SET_VALUE ? set_cache_ : add_cache_;
232 op_cache.emplace_back(global_id, value);
233}
234
235void ParallelSTLVector::SetValues(const std::vector<int64_t>& global_ids,
236 const std::vector<double>& values,
237 const VecOpType op_type)
238{
239 ChiInvalidArgumentIf(global_ids.size() != values.size(),
240 "Size mismatch between indices and values.");
241
242 auto& op_cache = op_type == VecOpType::SET_VALUE ? set_cache_ : add_cache_;
243 for (size_t i = 0; i < global_ids.size(); ++i)
244 {
245 const auto& global_id = global_ids[i];
246 ChiInvalidArgumentIf(global_id < 0 or global_id >= global_size_,
247 "Invalid global index encountered. Global indices "
248 "must be in the range [0, this->GlobalSize()].");
249 op_cache.emplace_back(global_id, values[i]);
250 }
251}
252
254{
256 "y.LocalSize() != local_size_");
258 "y.GlobalSize() != global_size_");
259
260 // This prevents us from unnecessarily going through the operator[]
261 // of y, which again applies bounds checking
262 const double* y_data = y.Data();
263
264 for (int64_t i = 0; i < local_size_; ++i)
265 values_[i] += y_data[i];
266}
267
269{
271 "y.LocalSize() != local_size_");
273 "y.GlobalSize() != global_size_");
274
275 // This prevents us from unnecessarily going through the operator[]
276 // of y, which again applies bounds checking
277 const double* y_data = y.Data();
278
279 if (a == 1.0)
280 for (int64_t i = 0; i < local_size_; ++i)
281 values_[i] += y_data[i];
282 else if (a == -1.0)
283 for (int64_t i = 0; i < local_size_; ++i)
284 values_[i] -= y_data[i];
285 else
286 for (int64_t i = 0; i < local_size_; ++i)
287 values_[i] += a * y_data[i];
288}
289
291{
293 "y.LocalSize() != local_size_");
295 "y.GlobalSize() != global_size_");
296
297 // This prevents us from unnecessarily going through the operator[]
298 // of y, which again applies bounds checking
299 const double* y_data = y.Data();
300
301 for (int64_t i = 0; i < local_size_; ++i)
302 values_[i] = a * values_[i] + y_data[i];
303}
304
306{
307 for (size_t i = 0; i < local_size_; ++i)
308 values_[i] *= a;
309}
310
312{
313 for (size_t i = 0; i < local_size_; ++i)
314 values_[i] += a;
315}
316
317/**Returns the specified norm of the vector.*/
319{
320 switch (norm_type)
321 {
323 {
324 const double local_norm_val = std::accumulate(
325 values_.begin(), values_.begin() + scint(local_size_), 0.0);
326
327 double global_norm_val;
328 MPI_Allreduce(&local_norm_val, // sendbuf
329 &global_norm_val, // recvbuf
330 1, // sendcount
331 MPI_DOUBLE, // sendtype
332 MPI_SUM, // operation
333 comm_); // communicator
334
335 return global_norm_val;
336 }
338 {
339 double local_norm_val = 0.0;
340 for (size_t i = 0; i < local_size_; ++i)
341 {
342 const double value = values_[i];
343 local_norm_val += value * value;
344 }
345 double global_norm_val;
346 MPI_Allreduce(&local_norm_val, // sendbuf
347 &global_norm_val, // recvbuf
348 1, // sendcount
349 MPI_DOUBLE, // sendtype
350 MPI_SUM, // operation
351 comm_); // communicator
352
353 return std::sqrt(global_norm_val);
354 }
356 {
357 const double local_norm_val = *std::max_element(
358 values_.begin(), values_.begin() + scint(local_size_));
359
360 double global_norm_val;
361 MPI_Allreduce(&local_norm_val, // sendbuf
362 &global_norm_val, // recvbuf
363 1, // sendcount
364 MPI_DOUBLE, // sendtype
365 MPI_MAX, // operation
366 comm_); // communicator
367
368 return global_norm_val;
369 }
370 default:
371 return 0.0;
372 }
373}
374
376{
377 // Define the local operation mode.
378 // 0=Do Nothing, 1=Set, 2=Add, 3=INVALID (mixed set/add ops)
379 const short local_mode = scshort(not set_cache_.empty()) +
380 scshort(not add_cache_.empty()) * short(2);
381 ChiLogicalErrorIf(local_mode == 3, "Invalid operation mode.");
382
383 // Now, determine the global operation mode
384 short global_mode;
385 MPI_Allreduce(&local_mode, &global_mode, 1, MPI_SHORT, MPI_MAX, comm_);
386
387 // If the mode is to do nothing, exit
388 if (global_mode == 0) return;
389
390 // Next, ensure that all operation types are compatible
392 local_mode != 0 and local_mode != global_mode,
393 "The operation on each process must be either 0 (do nothing),"
394 "or the same across all processes.");
395
396 // Now, store the global operation type and get the appropriate cache
397 using OpType = VecOpType;
398 const auto global_op_type = static_cast<OpType>(global_mode);
399 auto& op_cache =
400 global_op_type == OpType ::SET_VALUE ? set_cache_ : add_cache_;
401
402 // First, segregate the local and non-local operations
403 std::vector<std::pair<int64_t, double>> local_cache;
404 std::vector<std::pair<int64_t, double>> nonlocal_cache;
405 for (const auto& op : op_cache)
406 {
407 const int op_pid = FindOwnerPID(op.first);
408 if (op_pid == location_id_) local_cache.emplace_back(op);
409 else
410 nonlocal_cache.emplace_back(op);
411 }
412
413 // The local operations can be handled immediately
414 for (const auto& [global_id, value] : local_cache)
415 {
416 const int64_t local_id = global_id - scint64_t(extents_[location_id_]);
417 ChiLogicalErrorIf(local_id < 0 or local_id >= local_size_,
418 "Invalid mapping from global to local.");
419
420 if (global_op_type == OpType::SET_VALUE) values_[local_id] = value;
421 else
422 values_[local_id] += value;
423 }
424
425 // With this, the data that needs to be sent to other processes must be
426 // determined. Here, a mapping is developed between the processes that
427 // need to be sent information, and the serialized operations that need
428 // to be sent. The operations are serialized by converting the
429 // int64_t-double pair to bytes.
430 std::map<int, chi_data_types::ByteArray> pid_send_map;
431 for (const auto& [global_id, value] : nonlocal_cache)
432 {
433 const int pid = FindOwnerPID(global_id);
434 auto& byte_array = pid_send_map[pid];
435 byte_array.Write(global_id);
436 byte_array.Write(value);
437 }
438
439 // For use with MPI, the byte arrays from above is converted to an
440 // STL vector of bytes.
441 std::map<int, std::vector<std::byte>> pid_send_map_bytes;
442 for (const auto& [pid, byte_array] : pid_send_map)
443 pid_send_map_bytes[pid] = byte_array.Data();
444
445 // With the information that needs to be sent to other processes obtained,
446 // now, the information to be received from other processes is needed.
447 // To do this, each process must send to each other process the information
448 // that it needs. With each process knowing what each other process needs
449 // from it, a map of information to be sent is obtained.
450 std::map<int, std::vector<std::byte>> pid_recv_map_bytes =
451 chi_mpi_utils::MapAllToAll(pid_send_map_bytes, MPI_BYTE);
452
453 // The received information is now processed, unpacked, and the
454 // necessary operations performed
455 for (const auto& [pid, byte_vector] : pid_recv_map_bytes)
456 {
457 const auto packet_size = sizeof(std::pair<int64_t, double>);
458
460 byte_vector.size() % packet_size != 0,
461 "Unrecognized received operations. Operations are serialized with "
462 "an int64_t and double, but the received packet from process " +
463 std::to_string(pid) + " on process " + std::to_string(location_id_) +
464 " is not an integer multiple of the size of an int64_t and double.");
465
466 const size_t num_ops = byte_vector.size() / packet_size;
467 chi_data_types::ByteArray byte_array(byte_vector);
468 for (size_t k = 0; k < num_ops; ++k)
469 {
470 const int64_t global_id = byte_array.Read<int64_t>();
471 const double value = byte_array.Read<double>();
472
473 // Check that the global ID is in fact valid for this process
474 const int64_t local_id = global_id - scint64_t(extents_[location_id_]);
475
476 ChiLogicalErrorIf(local_id < 0 or local_id >= local_size_,
477 "A non-local global ID was received by process " +
478 std::to_string(location_id_) + " by process " +
479 std::to_string(pid) + " during vector assembly.");
480
481 // Contribute to the local vector
482 if (global_op_type == OpType ::SET_VALUE) values_[local_id] = value;
483 else
484 values_[local_id] += value;
485 }
486 }
487
488 // Finally, clear the operation cache
489 op_cache.clear();
490}
491
492std::vector<uint64_t> ParallelSTLVector::DefineExtents(uint64_t local_size,
493 int comm_size,
494 MPI_Comm communicator)
495{
496 // Get the local vector sizes per processor
497 std::vector<uint64_t> local_sizes(comm_size, 0);
498 MPI_Allgather(&local_size, // sendbuf
499 1, // sendcount
500 MPI_UINT64_T, // sendcount
501 local_sizes.data(), // recvbuf
502 1, // recvcount
503 MPI_UINT64_T, // recvtype
504 communicator); // communicator
505
506 // With the vector sizes per processor, now the offsets for each
507 // processor can be defined using a cumulative sum per processor.
508 // This allows for the determination of whether a global index is
509 // locally owned or not.
510 std::vector<uint64_t> extents(comm_size + 1, 0);
511 for (size_t p = 1; p < comm_size; ++p)
512 extents[p] = extents[p - 1] + local_sizes[p - 1];
513 extents[comm_size] = extents[comm_size - 1] + local_sizes.back();
514
515 return extents;
516}
517
518int ParallelSTLVector::FindOwnerPID(const uint64_t global_id) const
519{
521 "Invalid global id specified.");
522
523 for (int p = 0; p < process_count_; ++p)
524 if (global_id >= extents_[p] and global_id < extents_[p + 1]) return p;
525 return -1;
526}
527
529{
530 std::stringstream ss;
531 for (size_t i = 0; i < values_.size(); ++i)
532 ss << (i == 0 ? "[" : "") << values_[i]
533 << (i < values_.size() - 1 ? " " : "]");
534 return ss.str();
535}
536
537} // namespace chi_math
#define scint64_t
#define scshort
#define scint
#define ChiLogicalErrorIf(condition, message)
#define ChiInvalidArgumentIf(condition, message)
const std::vector< uint64_t > extents_
const std::vector< double > & LocalSTLData() const
void SetValue(int64_t global_id, double value, VecOpType op_type) override
ParallelSTLVector(uint64_t local_size, uint64_t global_size, MPI_Comm communicator)
double ComputeNorm(chi_math::NormType norm_type) const override
std::unique_ptr< ParallelVector > MakeClone() const override
void BlockSet(const std::vector< double > &y, int64_t local_offset, int64_t num_values) override
void SetValues(const std::vector< int64_t > &global_ids, const std::vector< double > &values, VecOpType op_type) override
void PlusAY(const ParallelVector &y, double a) override
void AXPlusY(double a, const ParallelVector &y) override
int FindOwnerPID(uint64_t global_id) const
void operator+=(const ParallelVector &y) override
void Shift(double a) override
std::vector< double > values_
void Scale(double a) override
void BlockCopyLocalValues(const ParallelVector &y, int64_t y_offset, int64_t local_offset, int64_t num_values) override
static std::vector< uint64_t > DefineExtents(uint64_t local_size, int comm_size, MPI_Comm communicator)
void Set(double value) override
double operator[](int64_t local_id) const override
std::vector< Operation > add_cache_
std::string PrintStr() const override
Print the local vectors to stings.
std::vector< double > MakeLocalVector() override
Return a vector containing the locally owned data.
std::vector< Operation > set_cache_
void CopyLocalValues(const ParallelVector &y) override
std::unique_ptr< ParallelVector > MakeCopy() const override
const uint64_t global_size_
virtual double * Data()=0
const uint64_t local_size_
uint64_t GlobalSize() const
Return the global size of the parallel vector.
uint64_t LocalSize() const
Return the size of the locally owned portion of the parallel vector.
std::map< K, std::vector< T > > MapAllToAll(const std::map< K, std::vector< T > > &pid_data_pairs, const MPI_Datatype data_mpi_type, const MPI_Comm communicator=Chi::mpi.comm)
struct _p_Vec * Vec