Skip to content

Commit faefcd7

Browse files
committed
add an option of MPI_Allreduce besides MPI_Allgather
However, MPI_Allreduce does not appear to be faster on Perlmutter at NERSC. Maybe it is on some machines.
1 parent eb5c919 commit faefcd7

1 file changed

Lines changed: 9 additions & 1 deletion

File tree

src/drivers/pncio/pncio_lustre_wrcoll.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -674,16 +674,24 @@ double curT = MPI_Wtime();
674674
* write requests are interleaved among all ranks.
675675
*/
676676
int is_interleaved, large_indv_req = 1;
677-
MPI_Offset striping_range, st_end[2], *st_end_all = NULL;
677+
MPI_Offset striping_range, *st_end_all = NULL;
678678

679679
/* Gather starting and ending file offsets of write requests from all
680680
* ranks into st_end_all[]. Even indices of st_end_all[] are starting
681681
* offsets, and odd indices are ending offsets.
682682
*/
683+
#if 0
684+
st_end_all = (MPI_Offset *) NCI_Calloc(nprocs * 2, sizeof(MPI_Offset));
685+
st_end_all[myrank*2] = start_offset;
686+
st_end_all[myrank*2+1] = end_offset;
687+
MPI_Allreduce(MPI_IN_PLACE, st_end_all, nprocs*2, MPI_OFFSET, MPI_MAX, fd->comm);
688+
#else
689+
MPI_Offset st_end[2];
683690
st_end[0] = start_offset;
684691
st_end[1] = end_offset;
685692
st_end_all = (MPI_Offset *) NCI_Malloc(nprocs * 2 * sizeof(MPI_Offset));
686693
MPI_Allgather(st_end, 2, MPI_OFFSET, st_end_all, 2, MPI_OFFSET, fd->comm);
694+
#endif
687695

688696
/* The loop below does the followings.
689697
* 1. Calculate this rank's aggregate access region.

0 commit comments

Comments
 (0)