PMML

+ Parallel Matrix Manipulation Library (PMML) Nicholas Dexter Brian Gianforcaro
Jerome Marhic

+ PMML Feature Set n  Targeted towards SMP systems n 
Implemented in Java n  Four different Matrix multiplication Implementations n  Parallel and sequential standard O(n3) algorithm n  Parallel and sequential Strassen O(n2.807) algorithm n  Sequential and parallel Matrix addition and subtraction n  Sequential Matrix-Scalar multiplication n  Sequential and parallel solver for linear systems and matrix inversion through Gaussian Elimination

+ PMML Types n  All the PMML algorithms operate on
a few basic types: n  MatrixInt n  MatrixDouble n  VectorInt n  VectorDouble n  int n  double // Allocate a new matrix MatrixInt A = MatrixInt.random( 1000, 1000 ); MatrixInt B = new MatrixInt( 1000, 1000 ); // Allocate a new vector VectorDouble A = VectorDouble.random( 1000 ); VectorDouble B = new VectorInt( 1000 );

+ Algorithm: •  For i in every row in A
•  For j in every column in B •  Output[i,j] = dot product of row I and column j PMML Syntax: MatrixInt A = … MatrixInt B = … MatrixInt C1 = MatrixOp.mult( A, B ); MatrixInt C2 = MatrixParallelOp.mult( A, B ); Parallel Matrix Multiplication

31 team.execute( new ParallelRegion() { 32 public void run() throws
Exception { 33 execute( 0, output.rows()-1, new IntegerForLoop() { 34 public void run( int first, int last ) { 35 for ( int r = first; r <= last; r++ ) { 36 for (int c = 0; c < output.cols(); c++) { 37 for (int k = 0; k < a.cols(); k++) { 38 output.data[r][c] += (a.data[r][k] * b.data[k][c]); 39 } 40 } 41 } 42 } 43 }); 44 } 45 }); 257 public static MatrixInt mult( MatrixInt a, MatrixInt b ) { 258 259 if ( ! a.correctDim( b ) ) { 260 throw new RuntimeException("Multiplication of different sized matricies"); 261 } 262 263 MatrixInt output = new MatrixInt(a.rows(), b.cols()); 264 265 for (int r = 0; r < output.rows(); r++) { 266 for (int c = 0; c < output.cols(); c++) { 267 for (int k = 0; k < a.cols(); k++) { 268 output.data[r][c] += (a.data[r][k] * b.data[k][c]); 269 } 270 } 271 } 272 return output; 273 }

+ Parallel Matrix Multiplication

+ Strassen Matrix Multiplication One of the fastest known Matrix
Multiplication Algorithms PMML Syntax: MatrixInt A = … MatrixInt B = … MatrixInt C1 = MatrixOp.strassenMult( A, B ); MatrixInt C2 = MatrixParallelStrOp.mult( A, B );

343 public static MatrixInt strassenMultRun( MatrixInt a, MatrixInt b) {
344 int n = a.rows(); 345 // 128 seems to be the n where classic mult is more efficient (64 is still fine) 346 if ( n <= 128 ) { 347 return mult(a,b); 348 } 349 // Divide a and b each into 4 matrices of size (n-1) 350 MatrixInt[] subMatA = new MatrixInt[4]; 351 MatrixInt[] subMatB = new MatrixInt[4]; 352 353 for ( int i = 0; i < 2; i++ ) { // rows 354 for ( int j = 0; j < 2; j++ ) { // cols 355 subMatA[i*2+j] = new MatrixInt(n/2,n/2); 356 subMatB[i*2+j] = new MatrixInt(n/2,n/2); 357 // copy the data into the submatrix : 358 for( int l = 0; l < n/2; l++ ) { 359 for( int m = 0; m < n/2; m++ ) { 360 subMatA[i*2+j].data[l][m] = a.data[i*n/2 + l][j*n/2 + m]; 361 subMatB[i*2+j].data[l][m] = b.data[i*n/2 + l][j*n/2 + m]; 362 } 363 } 364 } 365 } 366 367 MatrixInt[] M = new MatrixInt[7]; 368 M[0] = strassenMultRun( sub(subMatA[1],subMatA[3] ),add( subMatB[2],subMatB[3]) ); 369 M[1] = strassenMultRun( add(subMatA[0],subMatA[3] ),add( subMatB[0], subMatB[3] )); 370 M[2] = strassenMultRun( sub(subMatA[0],subMatA[2] ),add( subMatB[0], subMatB[1] )); 371 M[3] = strassenMultRun( add(subMatA[0], subMatA[1]),subMatB[3] ) ; 372 M[4] = strassenMultRun( subMatA[0],sub(subMatB[1], subMatB[3] ) ); 373 M[5] = strassenMultRun( subMatA[3],sub(subMatB[2], subMatB[0] ) ); 374 M[6] = strassenMultRun( add(subMatA[2],subMatA[3] ),subMatB[0] ); 375 376 MatrixInt[] subMatC = new MatrixInt[4]; 377 subMatC[0] = add( M[0], add(M[5], sub(M[1],M[3]))); 378 subMatC[1] = add( M[3], M[4] ); 379 subMatC[2] = add( M[5], M[6] ); 380 subMatC[3] = add( sub(M[1],M[2]), sub(M[4],M[6] )); 381 382 int[][] cdata = new int[n][n]; 383 // Copy the data into the submatrix : 384 for( int l = 0; l < 2; l++ ) { 385 for( int m = 0; m < 2; m++ ) { 386 for( int i = 0; i < n/2; i++ ) { 387 for( int j = 0; j < n/2; j++ ) { 388 cdata[l*n/2+i][m*n/2+j] = subMatC[l*2+m].data[i][j]; 389 } 390 } 391 } 392 } 393 return new MatrixInt( cdata ); 394 }

187 final int STEPS = 7; 188 final MatrixInt[] M
= new MatrixInt[STEPS]; 189 team.execute( new ParallelRegion() { 190 public void run() throws Exception { 191 execute( 0, STEPS-1, new IntegerForLoop() { 192 193 public IntegerSchedule schedule() { 194 return IntegerSchedule.dynamic(); 195 } 196 197 public void run( int first, int last ) throws Exception { 198 for ( int step = first; step <= last; step++ ) { 199 switch ( step ) { 200 case 0: 201 M[0] = multRunSeq( MatrixOp.sub( subMatA[1], subMatA[3] ), MatrixOp.add( subMatB[2],subMatB[3]) ); 202 break; 203 case 1: 204 M[1] = multRunSeq( MatrixOp.add( subMatA[0], subMatA[3] ), MatrixOp.add( subMatB[0], subMatB[3] )); 205 break; 206 case 2: 207 M[2] = multRunSeq( MatrixOp.sub( subMatA[0], subMatA[2] ), MatrixOp.add( subMatB[0], subMatB[1] )); 208 break; 209 case 3: 210 M[3] = multRunSeq( MatrixOp.add( subMatA[0], subMatA[1]), subMatB[3] ) ; 211 break; 212 case 4: 213 M[4] = multRunSeq( subMatA[0], MatrixOp.sub( subMatB[1], subMatB[3] ) ); 214 break; 215 case 5: 216 M[5] = multRunSeq( subMatA[3], MatrixOp.sub( subMatB[2], subMatB[0] ) ); 217 break; 218 case 6: 219 M[6] = multRunSeq( MatrixOp.add( subMatA[2],subMatA[3] ), subMatB[0] ); 220 break; 221 default: 222 break; 223 } 224 } 225 } 226 }); 227 } 228 });

+ 343 public static MatrixInt strassenMultRun( MatrixInt a, MatrixInt b)
{ 344 int n = a.rows(); 345 // 128 seems to be the n where classic mult is more efficient (64 is still fine) 346 if ( n <= 128 ) { 347 return mult(a,b); 348 } 349 // Divide a and b each into 4 matrices of size (n-1) 350 MatrixInt[] subMatA = new MatrixInt[4]; 351 MatrixInt[] subMatB = new MatrixInt[4]; 352 353 for ( int i = 0; i < 2; i++ ) { // rows 354 for ( int j = 0; j < 2; j++ ) { // cols 355 subMatA[i*2+j] = new MatrixInt(n/2,n/2); 356 subMatB[i*2+j] = new MatrixInt(n/2,n/2); 357 // copy the data into the submatrix : 358 for( int l = 0; l < n/2; l++ ) { 359 for( int m = 0; m < n/2; m++ ) { 360 subMatA[i*2+j].data[l][m] = a.data[i*n/2 + l][j*n/2 + m]; 361 subMatB[i*2+j].data[l][m] = b.data[i*n/2 + l][j*n/2 + m]; 362 } 363 } 364 } 365 } 366 367 MatrixInt[] M = new MatrixInt[7]; 368 M[0] = strassenMultRun( sub(subMatA[1],subMatA[3] ),add( subMatB[2],subMatB[3]) ); 369 M[1] = strassenMultRun( add(subMatA[0],subMatA[3] ),add( subMatB[0], subMatB[3] )); 370 M[2] = strassenMultRun( sub(subMatA[0],subMatA[2] ),add( subMatB[0], subMatB[1] )); 371 M[3] = strassenMultRun( add(subMatA[0], subMatA[1]),subMatB[3] ) ; 372 M[4] = strassenMultRun( subMatA[0],sub(subMatB[1], subMatB[3] ) ); 373 M[5] = strassenMultRun( subMatA[3],sub(subMatB[2], subMatB[0] ) ); 374 M[6] = strassenMultRun( add(subMatA[2],subMatA[3] ),subMatB[0] ); 375 376 MatrixInt[] subMatC = new MatrixInt[4]; 377 subMatC[0] = add( M[0], add(M[5], sub(M[1],M[3]))); 378 subMatC[1] = add( M[3], M[4] ); 379 subMatC[2] = add( M[5], M[6] ); 380 subMatC[3] = add( sub(M[1],M[2]), sub(M[4],M[6] )); 381 382 int[][] cdata = new int[n][n]; 383 // Copy the data into the submatrix : 384 for( int l = 0; l < 2; l++ ) { 385 for( int m = 0; m < 2; m++ ) { 386 for( int i = 0; i < n/2; i++ ) { 387 for( int j = 0; j < n/2; j++ ) { 388 cdata[l*n/2+i][m*n/2+j] = subMatC[l*2+m].data[i][j]; 389 } 390 } 391 } 392 } 393 return new MatrixInt( cdata ); 394 }

230 final int SUB_STEPS = 3; 231 final MatrixInt[] subMatC
= new MatrixInt[4]; 232 team.execute( new ParallelRegion() { 233 public void run() throws Exception { 234 execute( 0, SUB_STEPS, new IntegerForLoop() { 235 public void run( int first, int last ) throws Exception{ 236 for ( int step = first; step <= last; step++ ) { 237 switch ( step ) { 238 case 0: 239 subMatC[0] = MatrixOp.add( M[0], MatrixOp.add(M[5], MatrixOp.sub(M[1],M[3]))); 240 break; 241 case 1: 242 subMatC[1] = MatrixOp.add( M[3], M[4] ); 243 break; 244 case 2: 245 subMatC[2] = MatrixOp.add( M[5], M[6] ); 246 break; 247 case 3: 248 subMatC[3] = MatrixOp.add( MatrixOp.sub(M[1],M[2]), MatrixOp.sub(M[4],M[6] )); 249 break; 250 default: 251 break; 252 } 253 } 254 } 255 }); 256 } 257 }); 258

+ Parallel Strassen Matrix Multiplication

+ Parallel Matrix – Vector Multiplication Algorithm: Just a special
case of Matrix – Matrix Multiplication PMML Syntax: MatrixInt A = … VectorInt B = … MatrixInt C1 = MatrixOp.mult( A, B ); MatrixInt C2 = MatrixParallelOp.mult( A, B );

+ Parallel Matrix – Vector Multiplication 407 public static MatrixInt
mult( MatrixInt a, VectorInt b ) { 408 409 if ( ! (a.cols() == b.length()) ) { 410 throw new RuntimeException("Multiplication of different sized matrix & vector"); 411 } 412 413 MatrixInt output = new MatrixInt(a.rows(), 1); 414 415 for (int r = 0; r < a.rows(); r++) { 416 int sum = 0; 417 for (int c = 0 ; c < a.cols(); c++) { 418 sum += (a.data[r][c] * b.data[c]); 419 } 420 output.data[r][0] = sum; 421 } 422 return output; 423 } 36 team.execute( new ParallelRegion() { 137 public void run() throws Exception { 138 execute( 0, a.rows()-1, new IntegerForLoop() { 139 public void run( int first, int last ) { 140 for ( int r = first; r <= last; r++ ) { 141 int sum = 0; 142 for (int c = 0 ; c < a.cols(); c++) { 143 sum += (a.data[r][c] * b.data[c]); 144 } 145 output.data[r][0] = sum; 146 } 147 } 148 }); 149 } 150 });

+ Parallel Matrix – Vector Multiplication

+ Solution to Linear Systems of Equations PMML Syntax: MatrixInt
A = … VectorInt b = … VectorInt x1 = MatrixInverse.solve( A, b ); VectorInt x2 = MatrixInverseSmp.mult( A, b );

+ 20 public static VectorDouble solve(MatrixDouble A, VectorDouble b) throws
Exception { 21 // Determine the number of rows of A 22 int N = A.rows(); 23 24 // Iterate through all of the rows 25 for(int pivot = 0; pivot < N; pivot++) { 26 27 // Initialize the maximum to the first row out of the remaining· 28 // N-pivot rows 29 int max = pivot; 30 // Iterate through the remaining rows 31 for(int i = pivot + 1; i < N; i++) { 32 // If the absolute value of this row is greater than the last max 33 if(Math.abs(A.data[i][pivot]) > Math.abs(A.data[max][pivot])) { 34 // update the new maximum 35 max = i; 36 } 37 } 38 // Holder for the pivot row 39 double[] holder = A.data[pivot];· 40 // Swap the old max row with the pivot row 41 A.data[pivot] = A.data[max];· 42 // Swap the pivot row into the old max row 43 A.data[max] = holder; 44 ..snip.. 52 // Perform elementary row operations 53 for (int i = pivot + 1; i < N; i++) { 54 double alpha = A.data[i][pivot] / A.data[pivot][pivot]; 55 for (int j = pivot; j < N; j++) { 56 A.data[i][j] = A.data[i][j] - alpha * A.data[pivot][j]; 57 } 58 } 59 } 60 61 // solution vector 62 VectorDouble x = new VectorDouble(b.length()); 63 // set all x_i to 0 64 x.zero(); 65 66 // perform backsubstitution 67 for (int i = N - 1; i >= 0; i--) { 68 // initialize sum to 0 69 double sum = 0.0;

+ 59 // Execute parallel row operations 60 team.execute( new
ParallelRegion() { 61 public void run() throws Exception { 62 execute( p + 1, N-1, new IntegerForLoop() { 63 public void run(int first, int last) { 64 for(int i = first; i <= last; i++) { 65 double alpha = A.data[i][p] / A.data[p][p]; 66 b.data[i] -= alpha * b.data[p]; 67 for (int j = p; j < N; j++) { 68 A.data[i][j] = A.data[i][j] - alpha * A.data[p][j]; 69 } 70 } 71 } 72 }); 73 } 74 }); 75 } 76 77 // solution vector 78 VectorDouble x = new VectorDouble(b.length()); 79 // set all x_i to 0 80 x.zero(); 81 82 // perform backsubstitution 83 for (int i = N - 1; i >= 0; i--) { 84 // initialize sum to 0 85 double sum = 0.0; 86 for (int j = i + 1; j < N; j++) { 87 sum += A.data[i][j] * x.data[j]; 88 } 89 x.data[i] = (b.data[i] - sum) / A.data[i][i]; 90 } 91 return x; 92 }

+ Parallel Matrix Inversion

+ Questions?

PMML

PMML

Brian Gianforcaro

More Decks by Brian Gianforcaro

Other Decks in Programming

Featured

Transcript

+ Parallel Matrix Manipulation Library (PMML) Nicholas Dexter Brian Gianforcaro

+ PMML Feature Set n  Targeted towards SMP systems n

+ PMML Types n  All the PMML algorithms operate on

+ Algorithm: •  For i in every row in A

31 team.execute( new ParallelRegion() { 32 public void run() throws

+ Parallel Matrix Multiplication

+ Strassen Matrix Multiplication One of the fastest known Matrix

343 public static MatrixInt strassenMultRun( MatrixInt a, MatrixInt b) {

187 final int STEPS = 7; 188 final MatrixInt[] M

+ 343 public static MatrixInt strassenMultRun( MatrixInt a, MatrixInt b)

230 final int SUB_STEPS = 3; 231 final MatrixInt[] subMatC

+ Parallel Strassen Matrix Multiplication

+ Parallel Matrix – Vector Multiplication Algorithm: Just a special

+ Parallel Matrix – Vector Multiplication 407 public static MatrixInt

+ Parallel Matrix – Vector Multiplication

+ Solution to Linear Systems of Equations PMML Syntax: MatrixInt

+ 20 public static VectorDouble solve(MatrixDouble A, VectorDouble b) throws

+ 59 // Execute parallel row operations 60 team.execute( new

+ Parallel Matrix Inversion

+ Questions?