Slide 84
Slide 84 text
What if we did this manually?
Create monolithic kernel for a
single step of Runge-Kutta method.
Launch the kernel in a loop.
This would be 10x faster! But,
We lost odeint’s generality.
1 double3 lorenz system(double r, double sigma, double b, double3 s) {
2 return (double3)( sigma ∗ (s.y − s.x),
3 r ∗ s.x − s.y − s.x ∗ s.z,
4 s.x ∗ s.y − b ∗ s.z);
5 }
6 kernel void lorenz ensemble(
7 ulong n, double dt, double sigma, double b,
8 const global double ∗R,
9 global double ∗X,
10 global double ∗Y,
11 global double ∗Z
12 )
13 {
14 for(size t i = get global id(0); i < n; i += get global size(0)) {
15 double r = R[i];
16 double3 s = (double3)(X[i], Y[i ], Z[i ]);
17 double3 k1, k2, k3, k4;
18
19 k1 = dt ∗ lorenz system(r, sigma, b, s);
20 k2 = dt ∗ lorenz system(r, sigma, b, s + 0.5 ∗ k1);
21 k3 = dt ∗ lorenz system(r, sigma, b, s + 0.5 ∗ k2);
22 k4 = dt ∗ lorenz system(r, sigma, b, s + k3);
23
24 s += (k1 + 2 ∗ k2 + 2 ∗ k3 + k4) / 6;
25
26 X[i] = s.x; Y[i] = s.y; Z[i ] = s.z;
27 }
28 }