Skip to content

Commit eb5c919

Browse files
committed
Try issend/irecv to replace MPI_Alltoall
1 parent d39934d commit eb5c919

1 file changed

Lines changed: 31 additions & 4 deletions

File tree

src/drivers/pncio/pncio_lustre_wrcoll.c

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -345,10 +345,11 @@ void LUSTRE_Calc_others_req(PNCIO_File *fd,
345345
const PNCIO_Access *my_req,
346346
PNCIO_Access **others_req_ptr)
347347
{
348-
int i, myrank, nprocs, do_alltoallv;
348+
int i, myrank, nprocs, do_alltoallv, nreqs;
349349
MPI_Count *count_my_req_per_proc, *count_others_req_per_proc;
350350
PNCIO_Access *others_req;
351351
size_t npairs, alloc_sz, pair_sz;
352+
MPI_Request *requests;
352353

353354
/* first find out how much to send/recv and from/to whom */
354355

@@ -362,14 +363,41 @@ void LUSTRE_Calc_others_req(PNCIO_File *fd,
362363
* aggregator i's file domain) to set count_others_req_per_proc[j] (the
363364
* number of noncontiguous requests from process j fall into this
364365
* aggregator's file domain).
366+
*
367+
* The below MPI_Alltoall() is actually an all-to-many, i,e, all ranks
368+
* send to aggregators only.
365369
*/
366370
count_my_req_per_proc = (MPI_Count *) NCI_Calloc(nprocs * 2, sizeof(MPI_Count));
367371
count_others_req_per_proc = count_my_req_per_proc + nprocs;
368372
for (i=0; i<fd->hints->cb_nodes; i++)
369373
count_my_req_per_proc[fd->hints->ranklist[i]] = my_req[i].count;
370374

375+
#if 1
376+
requests = NCI_Malloc(sizeof(MPI_Request) * (nprocs + fd->hints->cb_nodes));
377+
nreqs = 0;
378+
if (fd->is_agg) {
379+
for (i=0; i<nprocs; i++)
380+
MPI_Irecv(count_others_req_per_proc+i, 1, MPI_COUNT, i, 0, fd->comm, &requests[nreqs++]);
381+
}
382+
for (i=0; i<fd->hints->cb_nodes; i++) {
383+
int dest = fd->hints->ranklist[i];
384+
MPI_Issend(&my_req[i].count, 1, MPI_COUNT, dest, 0, fd->comm, &requests[nreqs++]);
385+
}
386+
if (nreqs) {
387+
#ifdef HAVE_MPI_STATUSES_IGNORE
388+
MPI_Waitall(nreqs, requests, MPI_STATUSES_IGNORE);
389+
#else
390+
MPI_Status *statuses = (MPI_Status *)
391+
NCI_Malloc(nreqs * sizeof(MPI_Status));
392+
MPI_Waitall(nreqs, requests, statuses);
393+
NCI_Free(statuses);
394+
#endif
395+
}
396+
NCI_Free(requests);
397+
#else
371398
MPI_Alltoall(count_my_req_per_proc, 1, MPI_COUNT,
372399
count_others_req_per_proc, 1, MPI_COUNT, fd->comm);
400+
#endif
373401

374402
/* calculate total number of offset-length pairs to be handled by this
375403
* aggregator, only aggregators will have non-zero number of pairs.
@@ -482,9 +510,8 @@ void LUSTRE_Calc_others_req(PNCIO_File *fd,
482510
NCI_Free(sendCounts);
483511
}
484512
else { /* instead of using alltoall, use MPI_Issend and MPI_Irecv */
485-
int nreqs;
486-
MPI_Request *requests = (MPI_Request *)
487-
NCI_Malloc((nprocs + fd->hints->cb_nodes) * sizeof(MPI_Request));
513+
requests = (MPI_Request *)
514+
NCI_Malloc(sizeof(MPI_Request) * (nprocs + fd->hints->cb_nodes));
488515

489516
nreqs = 0;
490517
for (i = 0; i < nprocs; i++) {

0 commit comments

Comments
 (0)