Sanchayan Maity
0f4b39775c
During the last commit of splash2 benchmark it seems before committing when we ran "make clean", it effectively undid what the patch at below link did http://www.capsl.udel.edu/splash/Download.html Fix this since without this it is not possible to build the arcane splash2 benchmark.
451 lines
13 KiB
C
451 lines
13 KiB
C
/*************************************************************************/
|
|
/* */
|
|
/* Copyright (c) 1994 Stanford University */
|
|
/* */
|
|
/* All rights reserved. */
|
|
/* */
|
|
/* Permission is given to use, copy, and modify this software for any */
|
|
/* non-commercial purpose as long as this copyright notice is not */
|
|
/* removed. All other uses, including redistribution in whole or in */
|
|
/* part, are forbidden without prior written permission. */
|
|
/* */
|
|
/* This software is provided with absolutely no warranty and no */
|
|
/* support. */
|
|
/* */
|
|
/*************************************************************************/
|
|
|
|
/* shared memory implementation of the multigrid method
|
|
implementation uses red-black gauss-seidel relaxation
|
|
iterations, w cycles, and the method of half-injection for
|
|
residual computation */
|
|
|
|
#include <stdio.h>
|
|
#include <math.h>
|
|
#include <time.h>
|
|
#include <stdlib.h>
|
|
#include "decs.h"
|
|
|
|
/* perform multigrid (w cycles) */
|
|
void multig(long my_id)
|
|
{
|
|
long iter;
|
|
double wu;
|
|
double errp;
|
|
long m;
|
|
long minlevel;
|
|
long flag1;
|
|
long flag2;
|
|
long k;
|
|
long my_num;
|
|
double wmax;
|
|
double local_err;
|
|
double red_local_err;
|
|
double black_local_err;
|
|
double g_error;
|
|
|
|
flag1 = 0;
|
|
flag2 = 0;
|
|
iter = 0;
|
|
m = numlev-1;
|
|
wmax = maxwork;
|
|
minlevel = minlev;
|
|
my_num = my_id;
|
|
wu = 0.0;
|
|
|
|
k = m;
|
|
g_error = 1.0e30;
|
|
while ((!flag1) && (!flag2)) {
|
|
errp = g_error;
|
|
iter++;
|
|
if (my_num == MASTER) {
|
|
multi->err_multi = 0.0;
|
|
}
|
|
|
|
/* barrier to make sure all procs have finished intadd or rescal */
|
|
/* before proceeding with relaxation */
|
|
|
|
#if defined(MULTIPLE_BARRIERS)
|
|
BARRIER(bars->error_barrier,nprocs)
|
|
#else
|
|
BARRIER(bars->barrier,nprocs)
|
|
#endif
|
|
relax(k,&red_local_err,RED_ITER,my_num);
|
|
|
|
/* barrier to make sure all red computations have been performed */
|
|
|
|
#if defined(MULTIPLE_BARRIERS)
|
|
BARRIER(bars->error_barrier,nprocs)
|
|
#else
|
|
BARRIER(bars->barrier,nprocs)
|
|
#endif
|
|
relax(k,&black_local_err,BLACK_ITER,my_num);
|
|
|
|
/* compute max local error from red_local_err and black_local_err */
|
|
|
|
if (red_local_err > black_local_err) {
|
|
local_err = red_local_err;
|
|
} else {
|
|
local_err = black_local_err;
|
|
}
|
|
|
|
/* update the global error if necessary */
|
|
|
|
LOCK(locks->error_lock)
|
|
if (local_err > multi->err_multi) {
|
|
multi->err_multi = local_err;
|
|
}
|
|
UNLOCK(locks->error_lock)
|
|
|
|
/* a single relaxation sweep at the finest level is one unit of */
|
|
/* work */
|
|
|
|
wu+=pow((double)4.0,(double)k-m);
|
|
|
|
/* barrier to make sure all processors have checked local error */
|
|
|
|
#if defined(MULTIPLE_BARRIERS)
|
|
BARRIER(bars->error_barrier,nprocs)
|
|
#else
|
|
BARRIER(bars->barrier,nprocs)
|
|
#endif
|
|
g_error = multi->err_multi;
|
|
|
|
/* barrier to make sure master does not cycle back to top of loop */
|
|
/* and reset global->err before we read it and decide what to do */
|
|
|
|
#if defined(MULTIPLE_BARRIERS)
|
|
BARRIER(bars->error_barrier,nprocs)
|
|
#else
|
|
BARRIER(bars->barrier,nprocs)
|
|
#endif
|
|
if (g_error >= lev_tol[k]) {
|
|
if (wu > wmax) {
|
|
/* max work exceeded */
|
|
flag1 = 1;
|
|
fprintf(stderr,"ERROR: Maximum work limit %0.5f exceeded\n",wmax);
|
|
exit(-1);
|
|
} else {
|
|
/* if we have not converged */
|
|
if ((k != 1) && (g_error/errp >= 0.6) && (k > minlevel)) {
|
|
/* if need to go to coarser grid */
|
|
rescal(k,my_num);
|
|
/* transfer residual to rhs of coarser grid */
|
|
lev_tol[k-1] = 0.3 * g_error;
|
|
k = k-1;
|
|
putz(k,my_num);
|
|
/* make initial guess on coarser grid zero */
|
|
g_error = 1.0e30;
|
|
}
|
|
}
|
|
} else {
|
|
/* if we have converged at this level */
|
|
if (k == m) {
|
|
/* if finest grid, we are done */
|
|
flag2 = 1;
|
|
} else {
|
|
/* else go to next finest grid */
|
|
intadd(k,my_num);
|
|
k++;
|
|
g_error = 1.0e30;
|
|
}
|
|
}
|
|
}
|
|
if (do_output) {
|
|
if (my_num == MASTER) {
|
|
printf("iter %ld, level %ld, residual norm %12.8e, work = %7.3f\n", iter,k,multi->err_multi,wu);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* perform red or black iteration (not both) */
|
|
void relax(long k, double *err, long color, long my_num)
|
|
{
|
|
long i;
|
|
long j;
|
|
long iend;
|
|
long jend;
|
|
long oddistart;
|
|
long oddjstart;
|
|
long evenistart;
|
|
long evenjstart;
|
|
long oddiendst;
|
|
long eveniendst;
|
|
long oddjendst;
|
|
long evenjendst;
|
|
double a;
|
|
double h;
|
|
double factor;
|
|
double maxerr;
|
|
double newerr;
|
|
double oldval;
|
|
double newval;
|
|
|
|
i = 0;
|
|
j = 0;
|
|
|
|
*err = 0.0;
|
|
h = lev_res[k];
|
|
|
|
/* points whose sum of row and col index is even do a red iteration, */
|
|
/* others do a black */
|
|
|
|
evenistart = gp[my_num].eist[k];
|
|
evenjstart = gp[my_num].ejst[k];
|
|
oddistart = gp[my_num].oist[k];
|
|
oddjstart = gp[my_num].ojst[k];
|
|
eveniendst = gp[my_num].eiest[k];
|
|
evenjendst = gp[my_num].ejest[k];
|
|
oddiendst = gp[my_num].oiest[k];
|
|
oddjendst = gp[my_num].ojest[k];
|
|
|
|
iend = gp[my_num].rel_start_y[k] + gp[my_num].rel_num_y[k];
|
|
jend = gp[my_num].rel_start_x[k] + gp[my_num].rel_num_x[k];
|
|
|
|
factor = 4.0 - eig2 * h * h ;
|
|
maxerr = 0.0;
|
|
if (color == RED_ITER) {
|
|
for (i=evenistart;i<iend;i+=2) {
|
|
for (j=evenjstart;j<jend;j+=2) {
|
|
a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
|
|
multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
|
|
multi->rhs_multi[k][i][j] ;
|
|
oldval = multi->q_multi[k][i][j];
|
|
newval = a / factor;
|
|
newerr = oldval - newval;
|
|
multi->q_multi[k][i][j] = newval;
|
|
if (fabs(newerr) > maxerr) {
|
|
maxerr = fabs(newerr);
|
|
}
|
|
}
|
|
}
|
|
for (i=oddistart;i<iend;i+=2) {
|
|
for (j=oddjstart;j<jend;j+=2) {
|
|
a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
|
|
multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
|
|
multi->rhs_multi[k][i][j] ;
|
|
oldval = multi->q_multi[k][i][j];
|
|
newval = a / factor;
|
|
newerr = oldval - newval;
|
|
multi->q_multi[k][i][j] = newval;
|
|
if (fabs(newerr) > maxerr) {
|
|
maxerr = fabs(newerr);
|
|
}
|
|
}
|
|
}
|
|
} else if (color == BLACK_ITER) {
|
|
for (i=evenistart;i<iend;i+=2) {
|
|
for (j=oddjstart;j<jend;j+=2) {
|
|
a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
|
|
multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
|
|
multi->rhs_multi[k][i][j] ;
|
|
oldval = multi->q_multi[k][i][j];
|
|
newval = a / factor;
|
|
newerr = oldval - newval;
|
|
multi->q_multi[k][i][j] = newval;
|
|
if (fabs(newerr) > maxerr) {
|
|
maxerr = fabs(newerr);
|
|
}
|
|
}
|
|
}
|
|
for (i=oddistart;i<iend;i+=2) {
|
|
for (j=evenjstart;j<jend;j+=2) {
|
|
a = multi->q_multi[k][i][j+1] + multi->q_multi[k][i][j-1] +
|
|
multi->q_multi[k][i-1][j] + multi->q_multi[k][i+1][j] -
|
|
multi->rhs_multi[k][i][j] ;
|
|
oldval = multi->q_multi[k][i][j];
|
|
newval = a / factor;
|
|
newerr = oldval - newval;
|
|
multi->q_multi[k][i][j] = newval;
|
|
if (fabs(newerr) > maxerr) {
|
|
maxerr = fabs(newerr);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
*err = maxerr;
|
|
}
|
|
|
|
/* perform half-injection to next coarsest level */
|
|
void rescal(long kf, long my_num)
|
|
{
|
|
long ic;
|
|
long if17;
|
|
long jf;
|
|
long jc;
|
|
long krc;
|
|
long istart;
|
|
long iend;
|
|
long jstart;
|
|
long jend;
|
|
double hf;
|
|
double hc;
|
|
double s;
|
|
double s1;
|
|
double s2;
|
|
double s3;
|
|
double s4;
|
|
double factor;
|
|
double int1;
|
|
double int2;
|
|
double i_int_factor;
|
|
double j_int_factor;
|
|
double int_val;
|
|
|
|
krc = kf - 1;
|
|
hc = lev_res[krc];
|
|
hf = lev_res[kf];
|
|
|
|
istart = gp[my_num].rlist[krc];
|
|
jstart = gp[my_num].rljst[krc];
|
|
iend = gp[my_num].rlien[krc];
|
|
jend = gp[my_num].rljen[krc];
|
|
iend = gp[my_num].rel_start_y[krc] + gp[my_num].rel_num_y[krc] - 1;
|
|
jend = gp[my_num].rel_start_x[krc] + gp[my_num].rel_num_x[krc] - 1;
|
|
|
|
factor = 4.0 - eig2 * hf * hf;
|
|
|
|
if17=2*(istart-1);
|
|
for(ic=istart;ic<=iend;ic++) {
|
|
if17+=2;
|
|
i_int_factor = ic * i_int_coeff[krc] * 0.5;
|
|
jf = 2 * (jstart - 1);
|
|
for(jc=jstart;jc<=jend;jc++) {
|
|
jf+=2;
|
|
j_int_factor = jc*j_int_coeff[krc] * 0.5;
|
|
/* method of half-injection uses 2.0 instead of 4.0 */
|
|
s = multi->q_multi[kf][if17][jf+1] + multi->q_multi[kf][if17][jf-1] +
|
|
multi->q_multi[kf][if17-1][jf] + multi->q_multi[kf][if17+1][jf];
|
|
s1 = 2.0 * (multi->rhs_multi[kf][if17][jf] - s +
|
|
factor * multi->q_multi[kf][if17][jf]);
|
|
if ((if17 == 2) || (jf ==2)) {
|
|
s2 = 0;
|
|
} else {
|
|
s = multi->q_multi[kf][if17][jf-1] + multi->q_multi[kf][if17][jf-3] +
|
|
multi->q_multi[kf][if17-1][jf-2] + multi->q_multi[kf][if17+1][jf-2];
|
|
s2 = 2.0 * (multi->rhs_multi[kf][if17][jf-2] - s +
|
|
factor * multi->q_multi[kf][if17][jf-2]);
|
|
}
|
|
if ((if17 == 2) || (jf ==2)) {
|
|
s3 = 0;
|
|
} else {
|
|
s = multi->q_multi[kf][if17-2][jf+1] + multi->q_multi[kf][if17-2][jf-1] +
|
|
multi->q_multi[kf][if17-3][jf] + multi->q_multi[kf][if17-1][jf];
|
|
s3 = 2.0 * (multi->rhs_multi[kf][if17-2][jf] - s +
|
|
factor * multi->q_multi[kf][if17-2][jf]);
|
|
}
|
|
if ((if17 == 2) || (jf ==2)) {
|
|
s4 = 0;
|
|
} else {
|
|
s = multi->q_multi[kf][if17-2][jf-1] + multi->q_multi[kf][if17-2][jf-3] +
|
|
multi->q_multi[kf][if17-3][jf-2] + multi->q_multi[kf][if17-1][jf-2];
|
|
s4 = 2.0 * (multi->rhs_multi[kf][if17-2][jf-2] - s +
|
|
factor * multi->q_multi[kf][if17-2][jf-2]);
|
|
}
|
|
int1 = j_int_factor*s4 + (1.0-j_int_factor)*s3;
|
|
int2 = j_int_factor*s2 + (1.0-j_int_factor)*s1;
|
|
int_val = i_int_factor*int1+(1.0-i_int_factor)*int2;
|
|
multi->rhs_multi[krc][ic][jc] = i_int_factor*int1+(1.0-i_int_factor)*int2;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* perform interpolation and addition to next finest grid */
|
|
void intadd(long kc, long my_num)
|
|
{
|
|
long ic;
|
|
long if17;
|
|
long jf;
|
|
long jc;
|
|
long kf;
|
|
long istart;
|
|
long jstart;
|
|
long iend;
|
|
long jend;
|
|
double hc;
|
|
double hf;
|
|
long ifine1;
|
|
long ifine2;
|
|
long jfine1;
|
|
long jfine2;
|
|
double int1;
|
|
double int2;
|
|
double i_int_factor1;
|
|
double j_int_factor1;
|
|
double i_int_factor2;
|
|
double j_int_factor2;
|
|
|
|
kf = kc + 1;
|
|
hc = lev_res[kc];
|
|
hf = lev_res[kf];
|
|
|
|
istart = gp[my_num].iist[kc];
|
|
jstart = gp[my_num].ijst[kc];
|
|
iend = gp[my_num].iien[kc];
|
|
jend = gp[my_num].ijen[kc];
|
|
|
|
istart = gp[my_num].rel_start_y[kc];
|
|
jstart = gp[my_num].rel_start_x[kc];
|
|
iend = gp[my_num].rel_start_y[kc] + gp[my_num].rel_num_y[kc] - 1;
|
|
jend = gp[my_num].rel_start_x[kc] + gp[my_num].rel_num_x[kc] - 1;
|
|
if17 = 2*(istart-1);
|
|
for(ic=istart;ic<=iend;ic++) {
|
|
|
|
if17+=2;
|
|
ifine1 = if17-1;
|
|
ifine2 = if17;
|
|
i_int_factor1= ((imx[kc]-2)-(ic-1)) * (i_int_coeff[kf]);
|
|
i_int_factor2= ic * i_int_coeff[kf];
|
|
|
|
jf = 2*(jstart-1);
|
|
|
|
for(jc=jstart;jc<=jend;jc++) {
|
|
jf+=2;
|
|
jfine1 = jf-1;
|
|
jfine2 = jf;
|
|
j_int_factor1= ((jmx[kc]-2)-(jc-1)) * (j_int_coeff[kf]);
|
|
j_int_factor2= jc * j_int_coeff[kf];
|
|
|
|
int1 = j_int_factor1*multi->q_multi[kc][ic][jc-1] +
|
|
(1.0-j_int_factor1)*multi->q_multi[kc][ic][jc];
|
|
int2 = j_int_factor1*multi->q_multi[kc][ic-1][jc-1] +
|
|
(1.0-j_int_factor1)*multi->q_multi[kc][ic-1][jc];
|
|
multi->q_multi[kf][if17-1][jf-1] += i_int_factor1*int2 +
|
|
(1.0-i_int_factor1)*int1;
|
|
int2 = j_int_factor1*multi->q_multi[kc][ic+1][jc-1] +
|
|
(1.0-j_int_factor1)*multi->q_multi[kc][ic+1][jc];
|
|
multi->q_multi[kf][if17][jf-1] += i_int_factor2*int2 +
|
|
(1.0-i_int_factor2)*int1;
|
|
int1 = j_int_factor2*multi->q_multi[kc][ic][jc+1] +
|
|
(1.0-j_int_factor2)*multi->q_multi[kc][ic][jc];
|
|
int2 = j_int_factor2*multi->q_multi[kc][ic-1][jc+1] +
|
|
(1.0-j_int_factor2)*multi->q_multi[kc][ic-1][jc];
|
|
multi->q_multi[kf][if17-1][jf] += i_int_factor1*int2 +
|
|
(1.0-i_int_factor1)*int1;
|
|
int2 = j_int_factor2*multi->q_multi[kc][ic+1][jc+1] +
|
|
(1.0-j_int_factor2)*multi->q_multi[kc][ic+1][jc];
|
|
multi->q_multi[kf][if17][jf] += i_int_factor2*int2 +
|
|
(1.0-i_int_factor2)*int1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* initialize a grid to zero in parallel */
|
|
void putz(long k, long my_num)
|
|
{
|
|
long i;
|
|
long j;
|
|
long istart;
|
|
long jstart;
|
|
long iend;
|
|
long jend;
|
|
|
|
istart = gp[my_num].pist[k];
|
|
jstart = gp[my_num].pjst[k];
|
|
iend = gp[my_num].pien[k];
|
|
jend = gp[my_num].pjen[k];
|
|
for (i=istart;i<=iend;i++) {
|
|
for (j=jstart;j<=jend;j++) {
|
|
multi->q_multi[k][i][j] = 0.0;
|
|
}
|
|
}
|
|
}
|