#include #include "mpi.h" #include #include #include #include #include #include #include "mmlib.h" #define VERSION "mm.0.6" #define MAXNTIDS 100 #define MAXROW 100 #define ATAG 2 #define BTAG 3 #define VERBOSE 0 #define PERMS 0666 int parse_args(int, char*[], int*, int*, int *, int [], int); int main(int argc, char* argv[]){ int ntask, my_rank; int i, j, m, blksize, matrix_size; int row, col, up, down; int myrow[MAXROW]; MPI_Status status; /* A = P[0] B = P[1] C = P[2] */ char P[3][PATH_MAX]; /* A = F[0] B = F[1] C = F[2] */ int F[3]; /* A = M[0] B = M[1] C = M[2] TMP = M[3] */ float *M[4]; MPI_Comm grid_comm, row_comm, col_comm; int grid_rank; int wrap_around[2]; int coordinates[2]; int free_coords[2]; int dimensions[2]; int bcast_root; //real time for the time being int start_time = time(NULL); MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); if(parse_args(argc, argv, &m, &blksize, &ntask, F, my_rank) < 0){ MPI_Finalize(); exit(0); } MPI_Barrier(MPI_COMM_WORLD); for (i = 0; i < m; i++) myrow[i] = (my_rank/m)*m + i; for(i = 0; i < 4; i++) M[i] = (float*)calloc(sizeof(float), blksize*blksize); if (!(M[0] && M[1] && M[2] && M[3])) { fprintf(stderr, "%s: out of memory!\n", argv[0]); for(i = 0; i < 4; i++)free(M[i]); MPI_Finalize(); return -1; } // row = my_rank/m; // col = my_rank % m; // set up the grid wrap_around[0] = wrap_around[1] = 1; dimensions[0] = dimensions[1] = m; MPI_Cart_create(MPI_COMM_WORLD, 2, dimensions, wrap_around, 1, &grid_comm); MPI_Comm_rank(grid_comm, &grid_rank); MPI_Cart_coords(grid_comm, grid_rank, 2, coordinates); // get row n column row = coordinates[0]; col = coordinates[1]; // setup row communicator free_coords[0] = 0; free_coords[1] = 1; MPI_Cart_sub(grid_comm, free_coords, &row_comm); // setup column communicator free_coords[0] = 1; free_coords[1] = 0; MPI_Cart_sub(grid_comm, free_coords, &col_comm); for(i = 0; i < 3; i++){ sprintf(P[i], "%s.%d.%d", argv[i+1], row, col); if((F[i] = open(P[i], ( i < 2 ? O_RDONLY : O_WRONLY) )) < 0){ perror("File open failed"); fprintf(stderr, "Failed to open %s\n", P[i]); } } matrix_size = m * blksize; for(j = 0; j < 2; j++) get_block_row(F[j], blksize, 0, 0, 0, blksize*blksize, M[j]); down = (row + 1) % m; up = (row + m - 1) % m; for (i = 0; i < m; i++) { bcast_root = (row + i)%m; if (col == bcast_root) { MPI_Bcast(M[0], blksize*blksize, MPI_INT, bcast_root, row_comm); block_mult(M[2],M[0],M[1],blksize); } else { MPI_Bcast(M[3], blksize*blksize, MPI_INT, bcast_root, row_comm); block_mult(M[2],M[3],M[1],blksize); } MPI_Sendrecv_replace(M[1], blksize*blksize, MPI_INT, up, 0, down, 0, col_comm, &status); } // Make sure tasks are all done MPI_Barrier(MPI_COMM_WORLD); for (i = 0 ; i < blksize*blksize; i++) if (M[0][i] != M[2][i]) printf("Error a[%d] (%g) != c[%d] (%g) \n", i, M[0][i], i, M[2][i]); //more verbose code exists in verbose_code for(i = 0; i < 2; i++) close(F[i]); set_block_row(F[2], blksize, 0, 0, 0, blksize*blksize, M[2]); printf("%s %d %d task %d done successfully after %ld seconds.\n", VERSION, m, blksize, my_rank, time(NULL) - start_time); for(i = 0; i < 4; i++) free(M[i]); close(F[2]); MPI_Finalize(); return 0; } int parse_args(int argc, char *argv[], int *m, int *blksize, int *ntask, int F[], int rank){ int i; if ((argc != 6) || ((*m = atoi(argv[4])) <= 0) || ((*blksize = atoi(argv[5])) <= 0)){ fprintf(stderr, "Usage: %s matrixA matrixB matrixC m blk\n", argv[0]); return(-1); }; if(rank == 0) if(((F[0] = open(argv[1], O_RDONLY)) == -1) || ((F[1] = open(argv[2], O_RDONLY)) == -1) || ((F[2] = open(argv[3], O_WRONLY)) == -1)){ fprintf(stderr, "Usage: %s matrixA matrixB matrixC m blk\n", argv[0]); return(-1); } if(*m > MAXROW){ fprintf(stderr, "m = %d not valid.\n", *m); return(-1); }; *ntask = (*m)*(*m); if ((*ntask < 1) || (*ntask >= MAXNTIDS)) { fprintf(stderr, "ntask in parse_args = %d not valid.\n", *ntask); MPI_Finalize(); return(-1); }; for(i = 0; i < 3; i++)close(F[i]); return(0); }