non square matrices multiplication_问答_开发者

开发者 https://www.devze.com 2023-03-06 11:31 出处：网络

This code doesn\'t work except for squared matrices like a[4][4], b[4][4], result2[4][4] but it didn\'t work for a[4][10], b[10][10], result2[4][10]

相关专题：openmp

This code doesn't work except for squared matrices like a[4][4], b[4][4], result2[4][4] but it didn't work for a[4][10], b[10][10], result2[4][10]

//Init
for(r=0;r<numrowsa;r++){
    for(c=0;c<numcolsa;c++){
        a[r][c]=rand()%101;
    }
}

for(r=0;r<numrowsb;r++){
    for(c=0;c<numcolsb;c++){
        b[r][c]=rand()%101;
    }
}
for(r=0;r<numrowsr;r++){
    for(c=0;c<numcolsr;c++){
        result[r][c]=0;
    }
}
for(r=0;r<numrowsr2;r++){
    for(c=0;c<numcolr2;c++){
        result2[r][c]=0;
    }
}
//end init
t1=clock();

//trying parallel calculation
# pragma omp parallel num_threads(4) private(j)
{
    int thr = omp_get_thread_num();

    if(thr == 0)
        for(c=0;c<numcolsr;c++){
            for(j=0 ;j<numcolsa ; j++)
                result[0][c]+=a[0][j]*b[j][c];
        }
    else if (thr ==1 )
        for(c=0;c<numcolsr;c++){
            for(j=0 ; j<numcolsa ; j++)
                result[1][c]+=a[1][j]*b[j][c];
        }
    else if (thr ==2 )
        for(c=0;c<numcolsr;c++){
            for(j=0 ; j<numcol开发者_运维技巧sa ; j++)
                result[2][c]+=a[2][j]*b[j][c];
        }
    else if (thr ==3)
        for(c=0;c<numcolsr;c++){
            for(j=0 ; j<numcolsa ; j++)
                result[3][c]+=a[3][j]*b[j][c];
        }
}
t2=clock();
cout <<endl<< "Time"<<t2-t1<<"ms"<<endl;

t1 = clock();
//trying serial calculation 
for(r=0;r<numrowsr2;r++){
    for(c=0;c<numcolr2;c++){
        for(i=0 ; i<numcolsa ; i++)
            result2[r][c]+=a[r][i]*b[i][c];
    }
}
t2 = clock();

/*
for(r=0;r<numrows;r++){
    for(c=0;c<numcols;c++){
        cout <<result[r][c]<<" ";
        if(c == numcols-1)
            cout << endl;
    }
}
*/
cout <<endl<< "Time"<<t2-t1<<"ms"<<endl;

The error is

Runtime check failure Stack around variable 'b' was is corrupted

What's the problem?

ouch.

# pragma omp parallel
int t = omp_get_thread_num();
int nt = ...;
for(int i=t ; i<M ; i += nt) {
  for(int j=0 ; j<N ; ++j) {
    for(int k=0 ; k<K ; ++k) {
        .... 
    }
  }
}

You wrote:

if(thr == 0)
    for(c=0;c<numcolsr;c++){
        for(j=0 ;j<numcolsa ; j++)
            result[0][c]+=a[0][j]*b[j][c];
    }
 else if (thr ==1 )
    for(c=0;c<numcolsr;c++){
        for(j=0 ; j<numcolsa ; j++)
            result[1][c]+=a[1][j]*b[j][c];
    } 
 // and so on

I don't know the details of omp, but am I correct in thinking that c and j will be shared among the threads? Because all loops use the same c and the same j.

In that case, various race conditions could happen. E.g. thread 1 could execute c++ just before thread 0 would execute result[0][c]+=a[0][j]*b[j][c], with an out-of-bounds read/write as result.