@@ -345,10 +345,11 @@ void LUSTRE_Calc_others_req(PNCIO_File *fd,
345345 const PNCIO_Access * my_req ,
346346 PNCIO_Access * * others_req_ptr )
347347{
348- int i , myrank , nprocs , do_alltoallv ;
348+ int i , myrank , nprocs , do_alltoallv , nreqs ;
349349 MPI_Count * count_my_req_per_proc , * count_others_req_per_proc ;
350350 PNCIO_Access * others_req ;
351351 size_t npairs , alloc_sz , pair_sz ;
352+ MPI_Request * requests ;
352353
353354 /* first find out how much to send/recv and from/to whom */
354355
@@ -362,14 +363,41 @@ void LUSTRE_Calc_others_req(PNCIO_File *fd,
362363 * aggregator i's file domain) to set count_others_req_per_proc[j] (the
363364 * number of noncontiguous requests from process j fall into this
364365 * aggregator's file domain).
366+ *
367+ * The below MPI_Alltoall() is actually an all-to-many, i,e, all ranks
368+ * send to aggregators only.
365369 */
366370 count_my_req_per_proc = (MPI_Count * ) NCI_Calloc (nprocs * 2 , sizeof (MPI_Count ));
367371 count_others_req_per_proc = count_my_req_per_proc + nprocs ;
368372 for (i = 0 ; i < fd -> hints -> cb_nodes ; i ++ )
369373 count_my_req_per_proc [fd -> hints -> ranklist [i ]] = my_req [i ].count ;
370374
375+ #if 1
376+ requests = NCI_Malloc (sizeof (MPI_Request ) * (nprocs + fd -> hints -> cb_nodes ));
377+ nreqs = 0 ;
378+ if (fd -> is_agg ) {
379+ for (i = 0 ; i < nprocs ; i ++ )
380+ MPI_Irecv (count_others_req_per_proc + i , 1 , MPI_COUNT , i , 0 , fd -> comm , & requests [nreqs ++ ]);
381+ }
382+ for (i = 0 ; i < fd -> hints -> cb_nodes ; i ++ ) {
383+ int dest = fd -> hints -> ranklist [i ];
384+ MPI_Issend (& my_req [i ].count , 1 , MPI_COUNT , dest , 0 , fd -> comm , & requests [nreqs ++ ]);
385+ }
386+ if (nreqs ) {
387+ #ifdef HAVE_MPI_STATUSES_IGNORE
388+ MPI_Waitall (nreqs , requests , MPI_STATUSES_IGNORE );
389+ #else
390+ MPI_Status * statuses = (MPI_Status * )
391+ NCI_Malloc (nreqs * sizeof (MPI_Status ));
392+ MPI_Waitall (nreqs , requests , statuses );
393+ NCI_Free (statuses );
394+ #endif
395+ }
396+ NCI_Free (requests );
397+ #else
371398 MPI_Alltoall (count_my_req_per_proc , 1 , MPI_COUNT ,
372399 count_others_req_per_proc , 1 , MPI_COUNT , fd -> comm );
400+ #endif
373401
374402 /* calculate total number of offset-length pairs to be handled by this
375403 * aggregator, only aggregators will have non-zero number of pairs.
@@ -482,9 +510,8 @@ void LUSTRE_Calc_others_req(PNCIO_File *fd,
482510 NCI_Free (sendCounts );
483511 }
484512 else { /* instead of using alltoall, use MPI_Issend and MPI_Irecv */
485- int nreqs ;
486- MPI_Request * requests = (MPI_Request * )
487- NCI_Malloc ((nprocs + fd -> hints -> cb_nodes ) * sizeof (MPI_Request ));
513+ requests = (MPI_Request * )
514+ NCI_Malloc (sizeof (MPI_Request ) * (nprocs + fd -> hints -> cb_nodes ));
488515
489516 nreqs = 0 ;
490517 for (i = 0 ; i < nprocs ; i ++ ) {
0 commit comments