Commit 7b4e8135 authored by Ciarán Ó Rourke's avatar Ciarán Ó Rourke
Browse files

Merge branch '2-mpi-non-blocking-exercises' into 'master'

Resolve "MPI: non-blocking exercises"

Closes #2

See merge request training/sohpc-training-2021!3
parents 8f80bc72 877d1b82
......@@ -30,3 +30,6 @@ modified.
## Persistent Heat Equation
Further modify the heat equation code to use persistent communication.
***Hint:*** There are two buffers so we need two sets of persistent
communications
#!/bin/bash
#SBATCH --partition=CourseDevQ
#SBATCH --reservation=SOHPCCourse
#SBATCH --nodes=1
#SBATCH --time=00:10:00
#SBATCH --account=course
module load gcc
cd $SLURM_SUBMIT_DIR
# Run MPI code
mpirun -n 6 ./prog
# Heat Equation
## Configuration
Parameters can be edited in the [param.h](./params.h) file. `nx`, `ny`, and `nt`
are the size of the grid in the x and y dimensions and the number of timesteps
respectively.
#!/bin/bash
#SBATCH --partition=CourseDevQ
#SBATCH --reservation=SOHPCCourse
#SBATCH --nodes=1
#SBATCH --time=00:10:00
#SBATCH --account=course
module load gcc
cd $SLURM_SUBMIT_DIR
# Run MPI code
mpirun -n 6 ./prog
void evolve(double *u, double *u_prev, const int nx, const int ny,
const double a, const double dt, const double dx2,
const double dy2) {
for (int i = 1; i < ny + 1; i++) {
for (int j = 0; j < nx; j++) {
const int self = i * nx + j;
const int left = i * nx + (j - 1 + ny) % ny;
const int right = i * nx + (j + 1) % ny;
const int up = ((i - 1 + nx) % nx) * nx + j;
const int down = ((i + 1) % nx) * nx + j;
u[self] =
u_prev[self] +
dt * a *
((u_prev[left] - 2.0 * u_prev[self] + u_prev[right]) / dx2 +
(u_prev[up] - 2.0 * u_prev[self] + u_prev[down]) / dy2);
}
}
}
#include <stdlib.h>
#include <stdio.h>
#include <mpi.h>
/* initialise with randomly with even distribution */
void randomise(double *u, const int nx, const int ny) {
for (int i = 0; i < nx * ny; i++) {
u[i] = ((double)rand() / (double)RAND_MAX) * (double)(10);
}
}
/*
* calculate size of subdomain and allocate the local grid
*/
double *create_grid(const int nx, const int ny, const int initialise) {
double *u = (double *)malloc(nx * ny * sizeof(double));
if (initialise) {
randomise(u, nx, ny);
}
return u;
}
void free_grid(double *u) { free(u); }
void print_local_grid(double *u, const int nx, const int ny) {
/* don't print halo regions */
for (int i = 1; i < ny + 1; i++) {
for (int j = 0; j < nx; j++) {
printf("%lf ", u[i * nx + j]);
}
printf("\n");
}
}
void print_grid(double *u, const int nx, const int ny, const int rank, const int size) {
int i;
for (i = 0; i < size - 1; i++) {
if (rank == i) {
print_local_grid(u, nx, ny);
fflush(stdout);
}
MPI_Barrier(MPI_COMM_WORLD);
}
/* last process prints an extra line */
if (rank == i) {
print_local_grid(u, nx, ny);
printf("\n");
fflush(stdout);
}
MPI_Barrier(MPI_COMM_WORLD);
}
void write_local_grid_to_file(FILE *file, double *u, const int nx, const int ny) {
for (int i = 1; i < ny + 1; i++) {
for (int j = 0; j < nx; j++) {
fprintf(file, "%lf ", u[i * nx + j]);
}
fprintf(file, "\n");
}
}
void write_grid_to_file(const char *filename, double *u, const int nx, const int ny, const int rank, const int size) {
FILE *file;
/* first process truncates file to size 0 on open */
if (rank == 0) {
file = fopen(filename, "w+");
write_local_grid_to_file(file, u, nx, ny);
fclose(file);
}
MPI_Barrier(MPI_COMM_WORLD);
int i;
for (i = 1; i < size; i++) {
if (rank == i) {
file = fopen(filename, "a");
write_local_grid_to_file(file, u, nx, ny);
fclose(file);
}
MPI_Barrier(MPI_COMM_WORLD);
}
}
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <mpi.h>
#include "params.h"
#include "grid.h"
#include "evolve.h"
void halo_exchange(double *u, const int nx, const int ny, const int rank, const int size) {
const int up_neighbour = (rank - 1 + size) % size;
const int down_neighbour = (rank + 1) % size;
/*
* checkerboard scheme to avoid deadlocks
* note: no longer required for non-blocking
*/
if ((rank % 2) == 0) {
/* send top row up */
MPI_Ssend(&u[nx], nx, MPI_DOUBLE, up_neighbour, 0, MPI_COMM_WORLD);
/* send bottom row down */
MPI_Ssend(&u[ny*nx], nx, MPI_DOUBLE, down_neighbour, 1, MPI_COMM_WORLD);
/* receive up neighbour's bottom border region */
MPI_Recv(&u[0], nx, MPI_DOUBLE, up_neighbour, 2, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
/* receive down neighbour's top border region */
MPI_Recv(&u[(ny + 1)*nx], nx, MPI_DOUBLE, down_neighbour, 3, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
} else {
/* alternate order of communication for odd ranks */
MPI_Recv(&u[(ny + 1)*nx], nx, MPI_DOUBLE, down_neighbour, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Recv(&u[0], nx, MPI_DOUBLE, up_neighbour, 1, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
MPI_Ssend(&u[ny*nx], nx, MPI_DOUBLE, down_neighbour, 2, MPI_COMM_WORLD);
MPI_Ssend(&u[nx], nx, MPI_DOUBLE, up_neighbour, 3, MPI_COMM_WORLD);
}
}
void iterate(double *u, double *u_prev, const int nx, const int ny,
const int nt, const double a, const double dt, const double dx2,
const double dy2, const int rank, const int size) {
for (int i = 0; i < nt; i++) {
halo_exchange(u_prev, nx, ny, rank, size);
evolve(u, u_prev, nx, ny, a, dt, dx2, dy2);
double *temp = u_prev;
u_prev = u;
u = temp;
}
}
int main(int argc, char *argv[]) {
MPI_Init(&argc, &argv);
srand(time(NULL));
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
/*
* calculate size of local domain
* distribute rows amongst process as evenly as possible
*/
int temp_ny = ny / size;
if (rank < ny%size) {
temp_ny += 1;
}
const int ny_local = temp_ny;
const int nx_local = nx;
/* +2 for halo regions in y dimension */
double *init_grid = create_grid(nx_local, ny_local + 2, 1);
double *grid = create_grid(nx_local, ny_local + 2, 0);
iterate(grid, init_grid, nx_local, ny_local, nt, a, dt, dx2, dy2, rank, size);
write_grid_to_file("final_grid.txt", grid, nx_local, ny_local, rank, size);
free_grid(init_grid);
free_grid(grid);
MPI_Finalize();
return 0;
}
const int nx = 15;
const int ny = 15;
const int nt = 10000; /* number of time-steps to evolve the system */
const double a = 1.0; /* diffusion constant */
const double dx = 1.0 / (double)(nx);
const double dy = 1.0 / (double)(ny);
const double dx2 = dx * dx;
const double dy2 = dy * dy;
const double dt = dx2*dy2 / (2.0*a*(dx2 + dy2));
#!/bin/bash
#SBATCH --partition=CourseDevQ
#SBATCH --reservation=SOHPCCourse
#SBATCH --nodes=1
#SBATCH --time=00:10:00
#SBATCH --account=course
module load gcc
cd $SLURM_SUBMIT_DIR
# Run MPI code
mpirun -n 6 ./prog
# Heat Equation
## Configuration
Parameters can be edited in the [param.h](./params.h) file. `nx`, `ny`, and `nt`
are the size of the grid in the x and y dimensions and the number of timesteps
respectively.
#!/bin/bash
#SBATCH --partition=CourseDevQ
#SBATCH --reservation=SOHPCCourse
#SBATCH --nodes=1
#SBATCH --time=00:10:00
#SBATCH --account=course
module load gcc
cd $SLURM_SUBMIT_DIR
# Run MPI code
mpirun -n 6 ./prog
#include <stdio.h>
void evolve(double *u, double *u_prev, const int nx, const int ny,
const double a, const double dt, const double dx2,
const double dy2, MPI_Request *req) {
/* update interior region */
for (int i = 2; i < ny; i++) {
for (int j = 0; j < nx; j++) {
const int self = i * nx + j;
const int left = i * nx + (j - 1 + ny) % ny;
const int right = i * nx + (j + 1) % ny;
const int up = ((i - 1 + nx) % nx) * nx + j;
const int down = ((i + 1) % nx) * nx + j;
u[self] =
u_prev[self] +
dt * a *
((u_prev[left] - 2.0 * u_prev[self] + u_prev[right]) / dx2 +
(u_prev[up] - 2.0 * u_prev[self] + u_prev[down]) / dy2);
}
}
/*
* only need to wait for receives
* no need to wait for send because we are only updating u,
* u_prev was the array that is being sent
*
* try to update the border that is ready first with MPI_Waitany
* MPI_Waitall works fine as a solution also
*/
int index;
MPI_Waitany(2, &req[0], &index, MPI_STATUSES_IGNORE);
for (int boundaries = 0; boundaries < 2; boundaries++) {
/* get index of ready row first */
const int i = index * (ny - 1) + 1;
for (int j = 0; j < nx; j++) {
const int self = i * nx + j;
const int left = i * nx + (j - 1 + ny) % ny;
const int right = i * nx + (j + 1) % ny;
const int up = ((i - 1 + nx) % nx) * nx + j;
const int down = ((i + 1) % nx) * nx + j;
u[self] =
u_prev[self] +
dt * a *
((u_prev[left] - 2.0 * u_prev[self] + u_prev[right]) / dx2 +
(u_prev[up] - 2.0 * u_prev[self] + u_prev[down]) / dy2);
}
/* switch index to other boundary and wait for it to be ready */
index += 1;
index %= 2;
MPI_Wait(&req[index], MPI_STATUS_IGNORE);
}
}
#include <stdlib.h>
#include <stdio.h>
#include <mpi.h>
/* initialise with randomly with even distribution */
void randomise(double *u, const int nx, const int ny) {
for (int i = 0; i < nx * ny; i++) {
u[i] = ((double)rand() / (double)RAND_MAX) * (double)(10);
}
}
/*
* calculate size of subdomain and allocate the local grid
*/
double *create_grid(const int nx, const int ny, const int initialise) {
double *u = (double *)malloc(nx * ny * sizeof(double));
if (initialise) {
randomise(u, nx, ny);
}
return u;
}
void free_grid(double *u) { free(u); }
void print_local_grid(double *u, const int nx, const int ny) {
/* don't print halo regions */
for (int i = 1; i < ny + 1; i++) {
for (int j = 0; j < nx; j++) {
printf("%lf ", u[i * nx + j]);
}
printf("\n");
}
}
void print_grid(double *u, const int nx, const int ny, const int rank, const int size) {
int i;
for (i = 0; i < size - 1; i++) {
if (rank == i) {
print_local_grid(u, nx, ny);
fflush(stdout);
}
MPI_Barrier(MPI_COMM_WORLD);
}
/* last process prints an extra line */
if (rank == i) {
print_local_grid(u, nx, ny);
printf("\n");
fflush(stdout);
}
MPI_Barrier(MPI_COMM_WORLD);
}
void write_local_grid_to_file(FILE *file, double *u, const int nx, const int ny) {
for (int i = 1; i < ny + 1; i++) {
for (int j = 0; j < nx; j++) {
fprintf(file, "%lf ", u[i * nx + j]);
}
fprintf(file, "\n");
}
}
void write_grid_to_file(const char *filename, double *u, const int nx, const int ny, const int rank, const int size) {
FILE *file;
/* first process truncates file to size 0 on open */
if (rank == 0) {
file = fopen(filename, "w+");
write_local_grid_to_file(file, u, nx, ny);
fclose(file);
}
MPI_Barrier(MPI_COMM_WORLD);
int i;
for (i = 1; i < size; i++) {
if (rank == i) {
file = fopen(filename, "a");
write_local_grid_to_file(file, u, nx, ny);
fclose(file);
}
MPI_Barrier(MPI_COMM_WORLD);
}
}
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <mpi.h>
#include "params.h"
#include "grid.h"
#include "evolve.h"
void halo_exchange(double *u, const int nx, const int ny, const int rank, const int size, MPI_Request req[4]) {
const int up_neighbour = (rank - 1 + size) % size;
const int down_neighbour = (rank + 1) % size;
/* receive up neighbour's bottom border region */
MPI_Irecv(&u[0], nx, MPI_DOUBLE, up_neighbour, 1, MPI_COMM_WORLD, &req[0]);
/* receive down neighbour's top border region */
MPI_Irecv(&u[(ny + 1)*nx], nx, MPI_DOUBLE, down_neighbour, 0, MPI_COMM_WORLD, &req[1]);
/* send top row up */
MPI_Issend(&u[nx], nx, MPI_DOUBLE, up_neighbour, 0, MPI_COMM_WORLD, &req[2]);
/* send bottom row down */
MPI_Issend(&u[ny*nx], nx, MPI_DOUBLE, down_neighbour, 1, MPI_COMM_WORLD, &req[3]);
}
void iterate(double *u, double *u_prev, const int nx, const int ny,
const int nt, const double a, const double dt, const double dx2,
const double dy2, const int rank, const int size) {
MPI_Request req[4];
for (int i = 0; i < nt; i++) {
halo_exchange(u_prev, nx, ny, rank, size, req);
evolve(u, u_prev, nx, ny, a, dt, dx2, dy2, req);
double *temp = u_prev;
u_prev = u;
u = temp;
/* wait for sends to complete */
MPI_Waitall(2, &req[2], MPI_STATUSES_IGNORE);
}
}
int main(int argc, char *argv[]) {
MPI_Init(&argc, &argv);
srand(time(NULL));
int rank, size;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
/*
* calculate size of local domain
* distribute rows amongst process as evenly as possible
*/
int temp_ny = ny / size;
if (rank < ny%size) {
temp_ny += 1;
}
const int ny_local = temp_ny;
const int nx_local = nx;
/* +2 for halo regions in y dimension */
double *init_grid = create_grid(nx_local, ny_local + 2, 1);
double *grid = create_grid(nx_local, ny_local + 2, 0);
iterate(grid, init_grid, nx_local, ny_local, nt, a, dt, dx2, dy2, rank, size);
write_grid_to_file("final_grid.txt", grid, nx_local, ny_local, rank, size);
free_grid(init_grid);
free_grid(grid);
MPI_Finalize();
return 0;
}
const int nx = 15;
const int ny = 15;
const int nt = 10000; /* number of time-steps to evolve the system */
const double a = 1.0; /* diffusion constant */
const double dx = 1.0 / (double)(nx);
const double dy = 1.0 / (double)(ny);
const double dx2 = dx * dx;
const double dy2 = dy * dy;
const double dt = dx2*dy2 / (2.0*a*(dx2 + dy2));
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <mpi.h>
#include "params.h"
#include "grid.h"
#include "evolve.h"
void set_up_halo_exchange(double *u, const int nx, const int ny, const int rank, const int size, MPI_Request *req) {
const int up_neighbour = (rank - 1 + size) % size;
const int down_neighbour = (rank + 1) % size;
/* receive up neighbour's bottom border region */
MPI_Recv_init(&u[0], nx, MPI_DOUBLE, up_neighbour, 1, MPI_COMM_WORLD, &req[0]);
/* receive down neighbour's top border region */
MPI_Recv_init(&u[(ny + 1)*nx], nx, MPI_DOUBLE, down_neighbour, 0, MPI_COMM_WORLD, &req[1]);
/* send top row up */
MPI_Send_init(&u[nx], nx, MPI_DOUBLE, up_neighbour, 0, MPI_COMM_WORLD, &req[2]);
/* send bottom row down */
MPI_Send_init(&u[ny*nx], nx, MPI_DOUBLE, down_neighbour, 1, MPI_COMM_WORLD, &req[3]);
}
void iterate(double *u, double *u_prev, const int nx, const int ny,
const int nt, const double a, const double dt, const double dx2,
const double dy2, const int rank, const int size) {
/*
* need two sets of request because we are alternating grids
* persistent comms send from transer to/from the same address each time
*/
MPI_Request *req = malloc(4*sizeof(MPI_Request));
MPI_Request *req_prev = malloc(4*sizeof(MPI_Request));
set_up_halo_exchange(u, nx, ny, rank, size, req);
set_up_halo_exchange(u_prev, nx, ny, rank, size, req_prev);
for (int i = 0; i < nt; i++) {
MPI_Startall(4, req_prev);
evolve(u, u_prev, nx, ny, a, dt, dx2, dy2, req_prev);
double *temp = u_prev;
u_prev = u;
u = temp;
/* wait for sends to complete */