// version 0.4 modifed by ian a. mason // may 26 @ u.n.e // // real live file manipulation // cleaned it up a bit // seems to work alright (touch wood) // here is the biggest // test so far: // b1 % mm.0.4 ~/A ~/B ~/C 4 1000 (all 16 nodes online) // mm.0.4 4 1000 task 262154 done successfully after 3682 seconds. // b1 % cmp ~/A ~/C // and this will be slowed down by NFS bottleneck to bourbaki. // On the new cluster (2002): // [comp309@o13 LINUX]$ ./mm.0.4 ~/A ~/B ~/C 4 1000 // mm.0.4 4 1000 task 3932204 done successfully after 683 seconds. // [comp309@o13 LINUX]$ #include #include #include #include #include #include #include #include "mmlib.h" #define VERSION "mm.0.4" //make this automatic in beta #define ENCODING PvmDataDefault //PvmDataRaw #define MAXNTIDS 1025 #define MAXROW 106 #define ATAG 2 #define BTAG 3 #define VERBOSE 1 #define PERMS 0666 #define VIEW_RESULT 0 int parse_args(int, char*[], int*, int*, int *, int []); int main(int argc, char* argv[]){ int ntask, info, mytid, mygid; int i, j, m, blksize, blksquare, matrix_size; int row, col, up, down; int child[MAXNTIDS-1]; int myrow[MAXROW]; int F[3]; /* A = F[0] B = F[1] C = F[2] */ int *M[4]; /* A = M[0] B = M[1] C = M[2] TMP = M[3] */ //real time for the time being int start_time = time(NULL); if(parse_args(argc, argv, &m, &blksize, &ntask, F) < 0) exit(0); blksquare = blksize * blksize; mytid = pvm_mytid(); if(VERBOSE) fprintf(stderr, "Task %d with:\n\tm = %d\n\tblksize = %d\n\tntask = %d\n", mytid, m, blksize, ntask); //tinkerville pvm_setopt(PvmRoute, PvmRouteDirect); if (mytid < 0){ pvm_perror(argv[0]); return -1; } mygid = pvm_joingroup("mmult"); if (mygid < 0){ pvm_perror(argv[0]); pvm_exit(); return -1; } if ((mygid == 0) && (ntask > 1)) { if(VERBOSE)fprintf(stderr, "Spawning %d tasks\n", ntask-1); info = pvm_spawn(VERSION, &argv[1], PvmTaskDefault, (char*)0, ntask-1, child); if(info != ntask - 1){ pvm_lvgroup("mmult"); pvm_exit(); return -1; } //what about the orphans? } // barrier if(VERBOSE)fprintf(stderr, "Waiting at barrier\n"); info = pvm_barrier("mmult", ntask); if (info < 0) pvm_perror(argv[0]); if(VERBOSE)fprintf(stderr, "Crossed barrier\n"); for (i = 0; i < m; i++) myrow[i] = pvm_gettid("mmult", (mygid/m)*m + i); if(VERBOSE) fprintf(stderr, "Opening, allocating and initializing matrices\n"); for(i = 0; i < 4; i++) M[i] = (int*)calloc(sizeof(int), blksize*blksize); if (!(M[0] && M[1] && M[2] && M[3])) { fprintf(stderr, "%s: out of memory!\n", argv[0]); for(i = 0; i < 4; i++)free(M[i]); pvm_lvgroup("mmult"); pvm_exit(); return -1; } row = mygid/m; col = mygid % m; matrix_size = m * blksize; for(j = 0; j < 2; j++) for(i = 0; i < blksize; i++) get_block_row(F[j], matrix_size, row, col, i, blksize, &(M[j][i*blksize])); if(VERBOSE) fprintf(stderr, "Finished opening, allocating and initializing matrices\n"); up = pvm_gettid("mmult", ((row)? (row-1): (m-1))*m+col); down = pvm_gettid("mmult", ((row == (m-1))? col: (row+1)*m+col)); for (i = 0; i < m; i++) { if(VERBOSE) fprintf(stderr, "Looping: iteration %d out of a total of %d\n", i + 1, m); if (col == (row + i)%m) { pvm_initsend(ENCODING); pvm_pkint(M[0], blksize*blksize, 1); pvm_mcast(myrow, m, (i+1)*ATAG); block_mult(M[2],M[0],M[1],blksize); } else { pvm_recv(pvm_gettid("mmult", row*m + (row +i)%m), (i+1)*ATAG); pvm_upkint(M[3], blksize*blksize, 1); block_mult(M[2],M[3],M[1],blksize); } pvm_initsend(ENCODING); pvm_pkint(M[1], blksize*blksize, 1); pvm_send(up, (i+1)*BTAG); pvm_recv(down, (i+1)*BTAG); pvm_upkint(M[1], blksize*blksize, 1); } if(VERBOSE) fprintf(stderr, "Finished computation\n"); if(VERBOSE)fprintf(stderr, "Waiting at barrier\n"); info = pvm_barrier("mmult",ntask); if (info < 0) pvm_perror(argv[0]); if(VERBOSE)fprintf(stderr, "Crossed barrier\n"); if(VIEW_RESULT && (mygid == 0)){ fprintf(stderr, "Clone %d computed:\n\n", mygid); for(i = 0; i < blksquare; i++){ fprintf(stderr, "%5d ", (M[2][i])); if((( i + 1) % blksize) == 0)fprintf(stderr, "\n"); } fprintf(stderr, "\n"); } for(i = 0; i < 2; i++) close(F[i]); if(VERBOSE)fprintf(stderr, "Writing C\n"); for(i = 0; i < blksize; i++) set_block_row(F[2], matrix_size, row, col, i, blksize, &M[2][i*blksize]); fprintf(stderr, "%s %d %d task %d done successfully after %ld seconds.\n", VERSION, m, blksize, mytid, time(NULL) - start_time); for(i = 0; i < 4; i++) free(M[i]); close(F[2]); pvm_lvgroup("mmult"); pvm_exit(); return 0; } int parse_args(int argc, char *argv[], int *m, int *blksize, int *ntask, int F[]){ if ((argc != 6) || ((F[0] = open(argv[1], O_RDONLY)) == -1) || ((F[1] = open(argv[2], O_RDONLY)) == -1) || ((F[2] = open(argv[3], O_WRONLY)) == -1) || ((*m = atoi(argv[4])) <= 0) || ((*blksize = atoi(argv[5])) <= 0)){ fprintf(stderr, "Usage: %s matrixA matrixB matrixC m blk\n", argv[0]); return(-1); }; if(*m > MAXROW){ fprintf(stderr, "m = %d not valid.\n", *m); return(-1); }; *ntask = (*m)*(*m); if ((*ntask < 1) || (*ntask >= MAXNTIDS)) { fprintf(stderr, "ntask in parse_args = %d not valid.\n", *ntask); pvm_exit(); return(-1); }; return(0); }