Changes

Pádraig Ó Conbhuí · 6df5bbe4
--- a/home.md
+++ b/home.md
+Megadep Algorithm
+=================
+
+The megadep algorithm is a distributed algorithm for determining all the dependencies direct and indirect
+of source files in a project.
+
+Each process reads $`\left(\text{\#files} / \text{\#processes}\right)`$ files.  
+Each process also does $`\mathcal{O}\left(\text{\#files} / \text{\#processes}\right)`$
+operations at each step.  
+Finally, 3 All-gather steps are done throughout the algorithm to pull the data together on each process.
+
+~~~c++
+int main() {
+    auto file_names = list_files();
+    // Sort the names so we can use a binary search lookup.
+    std::sort(file_names.begin(), file_names.end());
+
+    // Block decomposition of the range [0, file_names.size()) across the processes in
+    // MPI_COMM_WORLD:
+    //     Process 1 gets [0, ..., block_size-1]
+    //     Process 2 gets [block_size, ..., 2*block_size-1]
+    //     ...
+    //     Process n gets [(n-1)*block_size, ..., n*block_size]
+    auto decomposition = block_decomposition(MPI_COMM_WORLD, 0, file_names.size());
+
+
+    // Generate a map from a file index to the file indices it #includes
+    // Only generate the map for the "local" file indices.
+    std::map<File_index, std::vector<File_index>> file_to_includes;
+
+    for(auto file_index: decomposition) {
+
+        // Get list of included files for the current file.
+        // Find the equivalent file index for each and add it to the map.
+        auto include_names = read_includes(file_names[file_index]);
+
+        for(auto& include_name: include_names) {
+            auto include_it = std::lower_bound(file_names.begin(), file_names.end(), include_name);
+            auto include_name_index = std::distance(file_names.begin(), include_it);
+
+            file_to_includes[file_index].push_back(include_name_index);
+        }
+    }
+
+    // All-gather with every process so everyone has the full file_to_includes map.
+    // Each process will have a unique set of keys.
+    sync_distributed_maps(MPI_COMM_WORLD, file_to_includes);
+
+
+    // For each "local" file, find which files directly include it.
+    // Effectively, this partially inverts file_to_includes
+    std::map<File_index, std::vector<File_index>> file_to_includers;
+
+    for(auto file_index: decomposition) {
+        for(auto include: file_to_includes[file_index]) {
+            file_to_includers[include].push_back(file_index);
+        }
+    }
+
+    // All-gather, but every process can have overlapping keys. All the key values must be merged.
+    // This completes the inversion.
+    sync_merge_distributed_maps(MPI_COMM_WORLD, file_to_includers);
+
+
+    // For each "local" file, recursively find all its includers, and add it to
+    // each includers list of indirect includes.
+    std::map<File_index, std::vector<File_index>> file_to_indirect_includes;
+
+    for(auto file_index: decomposition) {
+
+        // We keep a list of includers that have been processed so we don't
+        // add them twice, or repeat work. (assume all initialized to false)
+        std::map<File_index, bool> processed;
+
+        for(auto include: file_to_includers[file_index]) {
+            if(processed[include] == true) continue;
+            processed[include] = true;
+
+            file_to_indirect_includes[include].push_back(file_index);
+
+            for(auto sub_include: file_to_includers[include]) {
+                if(processed[sub_include] == true) continue;
+                processed[sub_include] = true;
+
+                file_to_indirect_includes[sub_include].push_back(file_index);
+
+                for(auto sub_sub_include: file_to_includers[sub_include]) {
+                    // recurse
+                }
+            }
+        }
+    }
+
+    // All-gather, and again, every process can have overlapping keys. All the key values must be merged.
+    sync_merge_distributed_maps(MPI_COMM_WORLD, file_to_indirect_includes);
+
+
+    // file_to_indirect_includes now contains a map from a file to every file it includes directly
+    // or indirectly.
+}
+~~~
+
+In each instance where `std::map` is used, a `std::vector` should do the same job, and more efficiently.
+However, `std::map` is used here for illustrative purposes.
+
+The definitions of `sync_distributed_maps` and `sync_merge_distributed_maps` have been omitted here.
+They can be implemented using `MPI_Allgatherv`.
\ No newline at end of file