How to send columns of a matrix via Boost MPI using C++ STL vectors?_问答_开发者

I want to send multiple columns of a matrix stored as in STL vector form

    vector < vector < double > > A ( 10, vector <double> (10));

without copying the content to some buffer (because computation time is crucial here) with Boost MPI.

I found out, how this could be done with MPI. Here is the example Code how to send the 4th, 5th and 6th column of a 10 by 10 matrix from one process (rank==0) to another (rank==1). (Even though I do not know why I have to add the '2' in the third argument of MPI_Typ_vector. Does anyone know why?).

    int rank, size;
    MPI_Init (&argc, &argv);        /* starts MPI */
    MPI_Comm_rank (MPI_COMM_WORLD, &rank);  /* get current process id */
    MPI_Comm_size (MPI_COMM_WORLD, &size);  /* get number of processes */

    // fill matrices
    vector< vector <float> >A(10, vector <float> (10));
    vector< vector <float> >A_copy(10, vector <float> (10));
    for (int i=0; i!=10; i++)
    {
            for (int j=0; j!=10; j++)
            {
                    A[i][j]=j+ i*10;
                    A_copy[i][j]=0.0;
            }
    }

    int dest=1;
    int tag=1;
    // define new type = two columns
    MPI_Datatype    newtype;
    MPI_Type_vector(10,     /* # column elements */
    3,                      /* 3 column only */
    10+2,                   /* skip 10 elements */
    MPI_FLOAT,              /* elements are float */
    &newtype);              /* MPI derived datatype */
    MPI_Type_commit(&newtype);

    if (rank==0)
    {
            MPI_S开发者_Python百科end(&A[0][4], 1, newtype, dest, tag, MPI_COMM_WORLD);
    }
    if (rank==1)
            MPI_Status status;
            MPI_Recv(&A_copy[0][4], 1, newtype, 0, tag, MPI_COMM_WORLD, &status);
    }
    MPI_Finalize();

On the Boost webpage, they claim that MPI_Type_vector is "used automatically in Boost.MPI" (http://www.boost.org/doc/libs/1_47_0/doc/html/mpi/tutorial.html#mpi.c_mapping).

But I can not find an example how to do this in detail. In only know how to send the whole matrix or each element after another with Boost.

Thank you in advance,

Tobias

I solved the problem by writing my own class 'columns' and serialize it. Here is an example code:

#include<iostream>
#include<vector>
#include <boost/mpi/environment.hpp>
#include <boost/mpi/communicator.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/serialization/complex.hpp>

using namespace std;   
namespace mpi=boost::mpi;

class columns
{
public:
int Nr;
int Nc;
int J0;
int J1;
vector < vector <double> >* matrix;

columns(vector < vector <double> > & A, int j0, int j1)
{
    this->matrix=&A;
    this->Nr=A.size();
    this->Nc=A[0].size();
    this->J0=j0;
    this->J1=j1;
}
columns(vector < vector <double> > & A)
{
    this->matrix=&A;
    this->Nr=A.size();
    this->Nc=A[0].size();
}
columns(){};
};

namespace boost {
namespace serialization {

    template<class Archive>
    void serialize(Archive & ar, columns & g, const unsigned int version)
    {
        ar & g.Nr;
        ar & g.Nc;
        ar & g.J0;
        ar & g.J1;

        for (int i=0; i!=g.Nr; i++)
        {       
            for (int j=g.J0; j!=g.J1; j++)
            {       
                ar & (*g.matrix)[i][j];
            }
        }
    }
}
}


int main(int argc, char * argv[])
{
mpi::environment env(argc, argv);
mpi::communicator world;
int myid=world.rank();
int NN=world.size();

int Nl=3;
int Ng=5;

int myStart=myid*Ng/NN;
int myEnd=(myid+1)*Ng/NN;
int myN=myEnd-myStart;

if (myid==0)
{
    vector < vector <double> > input (Nl, vector <double> (Ng));
    for (int n=0; n!=Nl; n++)
    {
        for (int j=0; j!=Ng; j++)
        {
            input[n][j]=n+j;
        }
    }

    cout << "##### process " << myid << " ############" << endl;
    for (int n=0; n!=Nl; n++)
    {
        for (int j=0; j!=Ng; j++)
        {
            cout << input[n][j] << "\t";
        }
        cout << endl;
    }
    cout << "############################" << endl;

    // divide grid for parallization
    vector<int> starts(NN);
    vector<int> ends(NN);
    vector<int> Nwork(NN);
    for (int p=0; p!=NN; p++)
    {
        starts[p]=p*Ng/NN;
        ends[p]=(p+1)*Ng/NN;
        Nwork[p]=ends[p]-starts[p];
    }


    vector<columns> input_columns(NN);
    for (int p=1; p!=NN; p++)
    {
        input_columns[p]=columns(input, starts[p], ends[p]);
    }


    for (int p=1; p!=NN; p++)
    {
        world.send(p, 1, input_columns[p]);
    }
}

if (myid!=0)
{
    vector < vector <double> > input (Nl, vector <double> (Ng));
    for (int n=0; n!=Nl; n++)
    {
        for (int j=0; j!=Ng; j++)
        {
            input[n][j]=0.0;
        }
    }

    columns input_columns  = columns(input, myStart, myEnd);

    world.recv(0, 1, input_columns); 


    cout << "##### process " << myid << " ############" << endl;
    for (int n=0; n!=Nl; n++)
    {
        for (int j=0; j!=Ng; j++)
        {
            cout << input[n][j] << "\t";
        }
        cout << endl;
    }
    cout << "############################" << endl;
}
}

Explanation: The 'columns'-class contains a pointer to the matrix and two numbers indicating where the columns start and end.

class columns
{
    public:
    int Nr;              // number of rows in the matrix
    int Nc;              // number of columns in the matrix
    int J0;              // column start index
    int J1;              // column end index
    vector < vector <double> >* matrix;

    columns(vector < vector <double> > & A, int j0, int j1)
    {
            this->matrix=&A;
            this->Nr=A.size();
            this->Nc=A[0].size();
            this->J0=j0;
            this->J1=j1;
    }
    columns(vector < vector <double> > & A)
    {
            this->matrix=&A;
            this->Nr=A.size();
            this->Nc=A[0].size();
    }
    columns(){};
};

With the following code one tells boost-serialization how to serialize this 'columns'-class:

namespace boost {
    namespace serialization {

            template<class Archive>
            void serialize(Archive & ar, columns & g, const unsigned int version)
            {
                    ar & g.Nr;
                    ar & g.Nc;
                    ar & g.J0;
                    ar & g.J1;

                    for (int i=0; i!=g.Nr; i++)
                    {
                            for (int j=g.J0; j!=g.J1; j++)
                            {
                                    ar & (*g.matrix)[i][j];
                            }
                    }
            }
    }
}

Then one fill the matrix 'input'

vector < vector <double> > input (Nl, vector <double>(Ng));
            for (int n=0; n!=Nl; n++)
            {
                    for (int j=0; j!=Ng; j++)
                    {
                            input[n][j]=n+j;
                    }
            }

and initialize a columns-class object (which now contains a pointer to the matrix 'input'):

vector<columns> input_columns(NN)

and send it to another (sub)process by

world.send(p, 1, input_columns);

In the end it is received by

world.recv(0, 1, input_columns);

If you are going to do lots of column operations on A, maybe you should store A transpose rather than A. This will put the columns in contiguous memory locations. This means you could send a column using MPI_Send without doing any copy operations. Additionally, column operations will be faster.