Upgrade to Pro — share decks privately, control downloads, hide ads and more …

PMML

Sponsored · Your Podcast. Everywhere. Effortlessly. Share. Educate. Inspire. Entertain. You do you. We'll handle the rest.

 PMML

Parallel Matrix Manipulation Library

Avatar for Brian Gianforcaro

Brian Gianforcaro

September 27, 2011
Tweet

More Decks by Brian Gianforcaro

Other Decks in Programming

Transcript

  1. + PMML Feature Set n  Targeted towards SMP systems n 

    Implemented in Java n  Four different Matrix multiplication Implementations n  Parallel and sequential standard O(n3) algorithm n  Parallel and sequential Strassen O(n2.807) algorithm n  Sequential and parallel Matrix addition and subtraction n  Sequential Matrix-Scalar multiplication n  Sequential and parallel solver for linear systems and matrix inversion through Gaussian Elimination
  2. + PMML Types n  All the PMML algorithms operate on

    a few basic types: n  MatrixInt n  MatrixDouble n  VectorInt n  VectorDouble n  int n  double // Allocate a new matrix MatrixInt A = MatrixInt.random( 1000, 1000 ); MatrixInt B = new MatrixInt( 1000, 1000 ); // Allocate a new vector VectorDouble A = VectorDouble.random( 1000 ); VectorDouble B = new VectorInt( 1000 );
  3. + Algorithm: •  For i in every row in A

    •  For j in every column in B •  Output[i,j] = dot product of row I and column j PMML Syntax: MatrixInt A = … MatrixInt B = … MatrixInt C1 = MatrixOp.mult( A, B ); MatrixInt C2 = MatrixParallelOp.mult( A, B ); Parallel Matrix Multiplication
  4. 31 team.execute( new ParallelRegion() { 32 public void run() throws

    Exception { 33 execute( 0, output.rows()-1, new IntegerForLoop() { 34 public void run( int first, int last ) { 35 for ( int r = first; r <= last; r++ ) { 36 for (int c = 0; c < output.cols(); c++) { 37 for (int k = 0; k < a.cols(); k++) { 38 output.data[r][c] += (a.data[r][k] * b.data[k][c]); 39 } 40 } 41 } 42 } 43 }); 44 } 45 }); 257 public static MatrixInt mult( MatrixInt a, MatrixInt b ) { 258 259 if ( ! a.correctDim( b ) ) { 260 throw new RuntimeException("Multiplication of different sized matricies"); 261 } 262 263 MatrixInt output = new MatrixInt(a.rows(), b.cols()); 264 265 for (int r = 0; r < output.rows(); r++) { 266 for (int c = 0; c < output.cols(); c++) { 267 for (int k = 0; k < a.cols(); k++) { 268 output.data[r][c] += (a.data[r][k] * b.data[k][c]); 269 } 270 } 271 } 272 return output; 273 }
  5. + Strassen Matrix Multiplication One of the fastest known Matrix

    Multiplication Algorithms PMML Syntax: MatrixInt A = … MatrixInt B = … MatrixInt C1 = MatrixOp.strassenMult( A, B ); MatrixInt C2 = MatrixParallelStrOp.mult( A, B );
  6. 343 public static MatrixInt strassenMultRun( MatrixInt a, MatrixInt b) {

    344 int n = a.rows(); 345 // 128 seems to be the n where classic mult is more efficient (64 is still fine) 346 if ( n <= 128 ) { 347 return mult(a,b); 348 } 349 // Divide a and b each into 4 matrices of size (n-1) 350 MatrixInt[] subMatA = new MatrixInt[4]; 351 MatrixInt[] subMatB = new MatrixInt[4]; 352 353 for ( int i = 0; i < 2; i++ ) { // rows 354 for ( int j = 0; j < 2; j++ ) { // cols 355 subMatA[i*2+j] = new MatrixInt(n/2,n/2); 356 subMatB[i*2+j] = new MatrixInt(n/2,n/2); 357 // copy the data into the submatrix : 358 for( int l = 0; l < n/2; l++ ) { 359 for( int m = 0; m < n/2; m++ ) { 360 subMatA[i*2+j].data[l][m] = a.data[i*n/2 + l][j*n/2 + m]; 361 subMatB[i*2+j].data[l][m] = b.data[i*n/2 + l][j*n/2 + m]; 362 } 363 } 364 } 365 } 366 367 MatrixInt[] M = new MatrixInt[7]; 368 M[0] = strassenMultRun( sub(subMatA[1],subMatA[3] ),add( subMatB[2],subMatB[3]) ); 369 M[1] = strassenMultRun( add(subMatA[0],subMatA[3] ),add( subMatB[0], subMatB[3] )); 370 M[2] = strassenMultRun( sub(subMatA[0],subMatA[2] ),add( subMatB[0], subMatB[1] )); 371 M[3] = strassenMultRun( add(subMatA[0], subMatA[1]),subMatB[3] ) ; 372 M[4] = strassenMultRun( subMatA[0],sub(subMatB[1], subMatB[3] ) ); 373 M[5] = strassenMultRun( subMatA[3],sub(subMatB[2], subMatB[0] ) ); 374 M[6] = strassenMultRun( add(subMatA[2],subMatA[3] ),subMatB[0] ); 375 376 MatrixInt[] subMatC = new MatrixInt[4]; 377 subMatC[0] = add( M[0], add(M[5], sub(M[1],M[3]))); 378 subMatC[1] = add( M[3], M[4] ); 379 subMatC[2] = add( M[5], M[6] ); 380 subMatC[3] = add( sub(M[1],M[2]), sub(M[4],M[6] )); 381 382 int[][] cdata = new int[n][n]; 383 // Copy the data into the submatrix : 384 for( int l = 0; l < 2; l++ ) { 385 for( int m = 0; m < 2; m++ ) { 386 for( int i = 0; i < n/2; i++ ) { 387 for( int j = 0; j < n/2; j++ ) { 388 cdata[l*n/2+i][m*n/2+j] = subMatC[l*2+m].data[i][j]; 389 } 390 } 391 } 392 } 393 return new MatrixInt( cdata ); 394 }
  7. 187 final int STEPS = 7; 188 final MatrixInt[] M

    = new MatrixInt[STEPS]; 189 team.execute( new ParallelRegion() { 190 public void run() throws Exception { 191 execute( 0, STEPS-1, new IntegerForLoop() { 192 193 public IntegerSchedule schedule() { 194 return IntegerSchedule.dynamic(); 195 } 196 197 public void run( int first, int last ) throws Exception { 198 for ( int step = first; step <= last; step++ ) { 199 switch ( step ) { 200 case 0: 201 M[0] = multRunSeq( MatrixOp.sub( subMatA[1], subMatA[3] ), MatrixOp.add( subMatB[2],subMatB[3]) ); 202 break; 203 case 1: 204 M[1] = multRunSeq( MatrixOp.add( subMatA[0], subMatA[3] ), MatrixOp.add( subMatB[0], subMatB[3] )); 205 break; 206 case 2: 207 M[2] = multRunSeq( MatrixOp.sub( subMatA[0], subMatA[2] ), MatrixOp.add( subMatB[0], subMatB[1] )); 208 break; 209 case 3: 210 M[3] = multRunSeq( MatrixOp.add( subMatA[0], subMatA[1]), subMatB[3] ) ; 211 break; 212 case 4: 213 M[4] = multRunSeq( subMatA[0], MatrixOp.sub( subMatB[1], subMatB[3] ) ); 214 break; 215 case 5: 216 M[5] = multRunSeq( subMatA[3], MatrixOp.sub( subMatB[2], subMatB[0] ) ); 217 break; 218 case 6: 219 M[6] = multRunSeq( MatrixOp.add( subMatA[2],subMatA[3] ), subMatB[0] ); 220 break; 221 default: 222 break; 223 } 224 } 225 } 226 }); 227 } 228 });
  8. + 343 public static MatrixInt strassenMultRun( MatrixInt a, MatrixInt b)

    { 344 int n = a.rows(); 345 // 128 seems to be the n where classic mult is more efficient (64 is still fine) 346 if ( n <= 128 ) { 347 return mult(a,b); 348 } 349 // Divide a and b each into 4 matrices of size (n-1) 350 MatrixInt[] subMatA = new MatrixInt[4]; 351 MatrixInt[] subMatB = new MatrixInt[4]; 352 353 for ( int i = 0; i < 2; i++ ) { // rows 354 for ( int j = 0; j < 2; j++ ) { // cols 355 subMatA[i*2+j] = new MatrixInt(n/2,n/2); 356 subMatB[i*2+j] = new MatrixInt(n/2,n/2); 357 // copy the data into the submatrix : 358 for( int l = 0; l < n/2; l++ ) { 359 for( int m = 0; m < n/2; m++ ) { 360 subMatA[i*2+j].data[l][m] = a.data[i*n/2 + l][j*n/2 + m]; 361 subMatB[i*2+j].data[l][m] = b.data[i*n/2 + l][j*n/2 + m]; 362 } 363 } 364 } 365 } 366 367 MatrixInt[] M = new MatrixInt[7]; 368 M[0] = strassenMultRun( sub(subMatA[1],subMatA[3] ),add( subMatB[2],subMatB[3]) ); 369 M[1] = strassenMultRun( add(subMatA[0],subMatA[3] ),add( subMatB[0], subMatB[3] )); 370 M[2] = strassenMultRun( sub(subMatA[0],subMatA[2] ),add( subMatB[0], subMatB[1] )); 371 M[3] = strassenMultRun( add(subMatA[0], subMatA[1]),subMatB[3] ) ; 372 M[4] = strassenMultRun( subMatA[0],sub(subMatB[1], subMatB[3] ) ); 373 M[5] = strassenMultRun( subMatA[3],sub(subMatB[2], subMatB[0] ) ); 374 M[6] = strassenMultRun( add(subMatA[2],subMatA[3] ),subMatB[0] ); 375 376 MatrixInt[] subMatC = new MatrixInt[4]; 377 subMatC[0] = add( M[0], add(M[5], sub(M[1],M[3]))); 378 subMatC[1] = add( M[3], M[4] ); 379 subMatC[2] = add( M[5], M[6] ); 380 subMatC[3] = add( sub(M[1],M[2]), sub(M[4],M[6] )); 381 382 int[][] cdata = new int[n][n]; 383 // Copy the data into the submatrix : 384 for( int l = 0; l < 2; l++ ) { 385 for( int m = 0; m < 2; m++ ) { 386 for( int i = 0; i < n/2; i++ ) { 387 for( int j = 0; j < n/2; j++ ) { 388 cdata[l*n/2+i][m*n/2+j] = subMatC[l*2+m].data[i][j]; 389 } 390 } 391 } 392 } 393 return new MatrixInt( cdata ); 394 }
  9. 230 final int SUB_STEPS = 3; 231 final MatrixInt[] subMatC

    = new MatrixInt[4]; 232 team.execute( new ParallelRegion() { 233 public void run() throws Exception { 234 execute( 0, SUB_STEPS, new IntegerForLoop() { 235 public void run( int first, int last ) throws Exception{ 236 for ( int step = first; step <= last; step++ ) { 237 switch ( step ) { 238 case 0: 239 subMatC[0] = MatrixOp.add( M[0], MatrixOp.add(M[5], MatrixOp.sub(M[1],M[3]))); 240 break; 241 case 1: 242 subMatC[1] = MatrixOp.add( M[3], M[4] ); 243 break; 244 case 2: 245 subMatC[2] = MatrixOp.add( M[5], M[6] ); 246 break; 247 case 3: 248 subMatC[3] = MatrixOp.add( MatrixOp.sub(M[1],M[2]), MatrixOp.sub(M[4],M[6] )); 249 break; 250 default: 251 break; 252 } 253 } 254 } 255 }); 256 } 257 }); 258
  10. + Parallel Matrix – Vector Multiplication Algorithm: Just a special

    case of Matrix – Matrix Multiplication PMML Syntax: MatrixInt A = … VectorInt B = … MatrixInt C1 = MatrixOp.mult( A, B ); MatrixInt C2 = MatrixParallelOp.mult( A, B );
  11. + Parallel Matrix – Vector Multiplication 407 public static MatrixInt

    mult( MatrixInt a, VectorInt b ) { 408 409 if ( ! (a.cols() == b.length()) ) { 410 throw new RuntimeException("Multiplication of different sized matrix & vector"); 411 } 412 413 MatrixInt output = new MatrixInt(a.rows(), 1); 414 415 for (int r = 0; r < a.rows(); r++) { 416 int sum = 0; 417 for (int c = 0 ; c < a.cols(); c++) { 418 sum += (a.data[r][c] * b.data[c]); 419 } 420 output.data[r][0] = sum; 421 } 422 return output; 423 } 36 team.execute( new ParallelRegion() { 137 public void run() throws Exception { 138 execute( 0, a.rows()-1, new IntegerForLoop() { 139 public void run( int first, int last ) { 140 for ( int r = first; r <= last; r++ ) { 141 int sum = 0; 142 for (int c = 0 ; c < a.cols(); c++) { 143 sum += (a.data[r][c] * b.data[c]); 144 } 145 output.data[r][0] = sum; 146 } 147 } 148 }); 149 } 150 });
  12. + Solution to Linear Systems of Equations PMML Syntax: MatrixInt

    A = … VectorInt b = … VectorInt x1 = MatrixInverse.solve( A, b ); VectorInt x2 = MatrixInverseSmp.mult( A, b );
  13. + 20 public static VectorDouble solve(MatrixDouble A, VectorDouble b) throws

    Exception { 21 // Determine the number of rows of A 22 int N = A.rows(); 23 24 // Iterate through all of the rows 25 for(int pivot = 0; pivot < N; pivot++) { 26 27 // Initialize the maximum to the first row out of the remaining· 28 // N-pivot rows 29 int max = pivot; 30 // Iterate through the remaining rows 31 for(int i = pivot + 1; i < N; i++) { 32 // If the absolute value of this row is greater than the last max 33 if(Math.abs(A.data[i][pivot]) > Math.abs(A.data[max][pivot])) { 34 // update the new maximum 35 max = i; 36 } 37 } 38 // Holder for the pivot row 39 double[] holder = A.data[pivot];· 40 // Swap the old max row with the pivot row 41 A.data[pivot] = A.data[max];· 42 // Swap the pivot row into the old max row 43 A.data[max] = holder; 44 ..snip.. 52 // Perform elementary row operations 53 for (int i = pivot + 1; i < N; i++) { 54 double alpha = A.data[i][pivot] / A.data[pivot][pivot]; 55 for (int j = pivot; j < N; j++) { 56 A.data[i][j] = A.data[i][j] - alpha * A.data[pivot][j]; 57 } 58 } 59 } 60 61 // solution vector 62 VectorDouble x = new VectorDouble(b.length()); 63 // set all x_i to 0 64 x.zero(); 65 66 // perform backsubstitution 67 for (int i = N - 1; i >= 0; i--) { 68 // initialize sum to 0 69 double sum = 0.0;
  14. + 59 // Execute parallel row operations 60 team.execute( new

    ParallelRegion() { 61 public void run() throws Exception { 62 execute( p + 1, N-1, new IntegerForLoop() { 63 public void run(int first, int last) { 64 for(int i = first; i <= last; i++) { 65 double alpha = A.data[i][p] / A.data[p][p]; 66 b.data[i] -= alpha * b.data[p]; 67 for (int j = p; j < N; j++) { 68 A.data[i][j] = A.data[i][j] - alpha * A.data[p][j]; 69 } 70 } 71 } 72 }); 73 } 74 }); 75 } 76 77 // solution vector 78 VectorDouble x = new VectorDouble(b.length()); 79 // set all x_i to 0 80 x.zero(); 81 82 // perform backsubstitution 83 for (int i = N - 1; i >= 0; i--) { 84 // initialize sum to 0 85 double sum = 0.0; 86 for (int j = i + 1; j < N; j++) { 87 sum += A.data[i][j] * x.data[j]; 88 } 89 x.data[i] = (b.data[i] - sum) / A.data[i][i]; 90 } 91 return x; 92 }