Test for the completion of a Kernel Triggered MPI operation from a GPU kernel.


C/C++ Synopsis

int MPIX_Test(MPIX_Drequest *drequest,
              int *flag,
              MPI_Status *status)
int MPIX_Testall(int count,
                 MPIX_Drequest array_of_drequests[],
                 int *flag,
                 MPI_Status array_of_statuses[])
int MPIX_Testany(int count,
                 MPIX_Drequest array_of_drequests[],
                 int *index,
                 int *flag,
                 MPI_Status array_of_statuses[])


count                    IN      List length (i.e., number of device requests and status objects).
drequest                 IN      The device communication request handle.
array_of_drequests   IN          Array of device communication request handles.
flag                 INOUT   Flag indicating completion of one or more operations.
                             For MPIX_Testall it indicates completion of all operations,
                             while for MPIX_Testany it indicates completion of one operation.
index                    OUT     Index of device request handle corresponding to operation that completed.
status                   OUT     MPI status object. Currently must be MPI_STATUS_IGNORE.
array_of_statuses    OUT         Array of MPI status object. Currently must be MPI_STATUSES_IGNORE.


Similar to MPI_Test, this operation tests for the status of a persistent MPI operation. However, the MPIX_Test operation is invoked from a GPU thread executing within a GPU kernel.

Return Values



C/C++ Example

Example host and device code snippet showing the usage of basic kernel triggered communication operations.

Host Code

MPI_Send_init(data, num_bytes, MPI_BYTE,
              1, 999, MPI_COMM_WORLD, &send_request);
MPI_Recv_init(data, num_bytes, MPI_BYTE,
              0, 999, MPI_COMM_WORLD, &recv_request);

MPIX_Get_drequest(send_request, &send_drequest);
MPIX_Get_drequest(recv_request, &recv_drequest);


double begin = MPI_Wtime();
startit<<<1, 1, 0, stream>>>(send_drequest, recv_drequest,
                             rank, data, num_bytes, num_iter);
hipEventRecord(event, stream);
while (hipErrorNotReady == hipEventQuery(event)) {}
double end = MPI_Wtime();

fprintf(stdout, "[%d] latency = %lf us\n",
        rank, 1e6 * (end - begin) / (double) (2 * num_iter));

Device Code

#include <mpi_kt.h>
__global__ void startit(MPIX_Drequest send_drequest,
                        MPIX_Drequest recv_drequest, int rank,
                        int *data, int num_bytes, int num_iter)
    int flag, i = 0;
    MPIX_Drequest dreq1, dreq2;
    if (rank == 0) dreq1 = send_drequest; dreq2 = recv_drequest;
    else           dreq1 = recv_drequest; dreq2 = send_drequest;
    for (i = 0; i < num_iter; ++i) {
        /* ping */
        flag = 0;
        while (!flag) {
            MPIX_Test(&dreq1, &flag, MPI_STATUS_IGNORE);
        /* pong */
        flag = 0;
        while (!flag) {
            MPIX_Test(&drequest2, &flag, MPI_STATUS_IGNORE);