|
|
Megadep Algorithm
|
|
|
=================
|
|
|
|
|
|
The megadep algorithm is a distributed algorithm for determining all the dependencies direct and indirect
|
|
|
of source files in a project.
|
|
|
|
|
|
Each process reads $`\left(\text{\#files} / \text{\#processes}\right)`$ files.
|
|
|
Each process also does $`\mathcal{O}\left(\text{\#files} / \text{\#processes}\right)`$
|
|
|
operations at each step.
|
|
|
Finally, 3 All-gather steps are done throughout the algorithm to pull the data together on each process.
|
|
|
|
|
|
~~~c++
|
|
|
int main() {
|
|
|
auto file_names = list_files();
|
|
|
// Sort the names so we can use a binary search lookup.
|
|
|
std::sort(file_names.begin(), file_names.end());
|
|
|
|
|
|
// Block decomposition of the range [0, file_names.size()) across the processes in
|
|
|
// MPI_COMM_WORLD:
|
|
|
// Process 1 gets [0, ..., block_size-1]
|
|
|
// Process 2 gets [block_size, ..., 2*block_size-1]
|
|
|
// ...
|
|
|
// Process n gets [(n-1)*block_size, ..., n*block_size]
|
|
|
auto decomposition = block_decomposition(MPI_COMM_WORLD, 0, file_names.size());
|
|
|
|
|
|
|
|
|
// Generate a map from a file index to the file indices it #includes
|
|
|
// Only generate the map for the "local" file indices.
|
|
|
std::map<File_index, std::vector<File_index>> file_to_includes;
|
|
|
|
|
|
for(auto file_index: decomposition) {
|
|
|
|
|
|
// Get list of included files for the current file.
|
|
|
// Find the equivalent file index for each and add it to the map.
|
|
|
auto include_names = read_includes(file_names[file_index]);
|
|
|
|
|
|
for(auto& include_name: include_names) {
|
|
|
auto include_it = std::lower_bound(file_names.begin(), file_names.end(), include_name);
|
|
|
auto include_name_index = std::distance(file_names.begin(), include_it);
|
|
|
|
|
|
file_to_includes[file_index].push_back(include_name_index);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// All-gather with every process so everyone has the full file_to_includes map.
|
|
|
// Each process will have a unique set of keys.
|
|
|
sync_distributed_maps(MPI_COMM_WORLD, file_to_includes);
|
|
|
|
|
|
|
|
|
// For each "local" file, find which files directly include it.
|
|
|
// Effectively, this partially inverts file_to_includes
|
|
|
std::map<File_index, std::vector<File_index>> file_to_includers;
|
|
|
|
|
|
for(auto file_index: decomposition) {
|
|
|
for(auto include: file_to_includes[file_index]) {
|
|
|
file_to_includers[include].push_back(file_index);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// All-gather, but every process can have overlapping keys. All the key values must be merged.
|
|
|
// This completes the inversion.
|
|
|
sync_merge_distributed_maps(MPI_COMM_WORLD, file_to_includers);
|
|
|
|
|
|
|
|
|
// For each "local" file, recursively find all its includers, and add it to
|
|
|
// each includers list of indirect includes.
|
|
|
std::map<File_index, std::vector<File_index>> file_to_indirect_includes;
|
|
|
|
|
|
for(auto file_index: decomposition) {
|
|
|
|
|
|
// We keep a list of includers that have been processed so we don't
|
|
|
// add them twice, or repeat work. (assume all initialized to false)
|
|
|
std::map<File_index, bool> processed;
|
|
|
|
|
|
for(auto include: file_to_includers[file_index]) {
|
|
|
if(processed[include] == true) continue;
|
|
|
processed[include] = true;
|
|
|
|
|
|
file_to_indirect_includes[include].push_back(file_index);
|
|
|
|
|
|
for(auto sub_include: file_to_includers[include]) {
|
|
|
if(processed[sub_include] == true) continue;
|
|
|
processed[sub_include] = true;
|
|
|
|
|
|
file_to_indirect_includes[sub_include].push_back(file_index);
|
|
|
|
|
|
for(auto sub_sub_include: file_to_includers[sub_include]) {
|
|
|
// recurse
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
// All-gather, and again, every process can have overlapping keys. All the key values must be merged.
|
|
|
sync_merge_distributed_maps(MPI_COMM_WORLD, file_to_indirect_includes);
|
|
|
|
|
|
|
|
|
// file_to_indirect_includes now contains a map from a file to every file it includes directly
|
|
|
// or indirectly.
|
|
|
}
|
|
|
~~~
|
|
|
|
|
|
In each instance where `std::map` is used, a `std::vector` should do the same job, and more efficiently.
|
|
|
However, `std::map` is used here for illustrative purposes.
|
|
|
|
|
|
The definitions of `sync_distributed_maps` and `sync_merge_distributed_maps` have been omitted here.
|
|
|
They can be implemented using `MPI_Allgatherv`. |
|
|
\ No newline at end of file |