2 * O(nlog n) work algorithm (Hillis-Steele)
7 * Hillis/Steele, prefix sum version
10 void algorithm (numtype x[], unsigned long size, unsigned int ops[]) {
15 for(k=2; k <= size; k <<=1){
16 #pragma omp parallel for shared(x, size, ops, k) private(i)
17 for(i = (k-1); i < size; i+=k){
19 printf ("x[%2li] = x[%2li] + x[%2li]; // {i:%li, k:%li}\n", i, i-k, i, i, k);
21 x[i] = x[i-(k/2)] + x[i];
28 * Hillis/Steele, partial prefix sum version
30 void algorithm (numtype x[], unsigned long size, unsigned int ops[]) {
35 y = malloc(size * sizeof(numtype));
38 for (k=1; k<size; k<<=1) {
39 #pragma omp parallel for shared (x, y, ops, k) private (i)
40 for (i=k; i<size; i++) {
42 printf ("y[%li] = x[%li] + x[%li]; // %li + %li", i, i-k, i, x[i-k], x[i]);
46 memcpy (x, y, size * sizeof(numtype));
53 printf ("Has not been freed!!!\n");