MPIX_Start
Initiate a Kernel Triggered MPI operation from a GPU kernel.
Definitions
C/C++ Synopsis
int MPIX_Start(MPIX_Drequest *drequest)
Arguments
drequest     IN      The device communication request handle.
Description
Similar to MPI_Start, this operation initiates the execution of a persistent MPI operation. However, the MPIX_Start operation is invoked from a GPU thread executing within a GPU kernel.
Return Values
None.
Examples
C/C++ Example
Example host and device code snippet showing the usage of basic kernel triggered communication operations.
Host Code
MPI_Send_init(data, num_bytes, MPI_BYTE,
              1, 999, MPI_COMM_WORLD, &send_request);
MPI_Recv_init(data, num_bytes, MPI_BYTE,
              0, 999, MPI_COMM_WORLD, &recv_request);
MPIX_Get_drequest(send_request, &send_drequest);
MPIX_Get_drequest(recv_request, &recv_drequest);
hipEventCreate(&event);
hipStreamCreate(&stream);
double begin = MPI_Wtime();
startit<<<1, 1, 0, stream>>>(send_drequest, recv_drequest,
                             rank, data, num_bytes, num_iter);
hipEventRecord(event, stream);
while (hipErrorNotReady == hipEventQuery(event)) {}
double end = MPI_Wtime();
fprintf(stdout, "[%d] latency = %lf us\n",
        rank, 1e6 * (end - begin) / (double) (2 * num_iter));
Device Code
#include <mpi_kt.h>
__global__ void startit(MPIX_Drequest send_drequest,
                        MPIX_Drequest recv_drequest, int rank,
                        int *data, int num_bytes, int num_iter)
{
    int flag, i = 0;
    MPIX_Drequest dreq1, dreq2;
    if (rank == 0) dreq1 = send_drequest; dreq2 = recv_drequest;
    else           dreq1 = recv_drequest; dreq2 = send_drequest;
    for (i = 0; i < num_iter; ++i) {
        /* ping */
        MPIX_Start(dreq1);
        flag = 0;
        while (!flag) {
            MPIX_Test(&dreq1, &flag, MPI_STATUS_IGNORE);
        }
        /* pong */
        MPIX_Start(dreq2);
        flag = 0;
        while (!flag) {
            MPIX_Test(&drequest2, &flag, MPI_STATUS_IGNORE);
        }
    }
}